Roopalgn commited on
Commit
a5859dc
·
1 Parent(s): c0d489c

Normalize remaining score fields into open interval

Browse files
Files changed (2) hide show
  1. inference.py +3 -2
  2. server/environment.py +3 -2
inference.py CHANGED
@@ -945,14 +945,15 @@ def run() -> None:
945
  if final_rubric_reward is not None
946
  else (task_step_rewards[-1] if task_step_rewards else 0.0)
947
  )
 
948
  all_results[task_id] = {
949
  "final_reward": final_reward,
950
  "step_count": step_num,
951
  }
952
  emit_log(
953
  "END",
954
- final_reward=round(final_reward, 4),
955
- score=round(clamp_reported_score(final_reward), 4),
956
  step_count=step_num,
957
  task_id=task_id,
958
  task_name=task["name"],
 
945
  if final_rubric_reward is not None
946
  else (task_step_rewards[-1] if task_step_rewards else 0.0)
947
  )
948
+ reported_score = clamp_reported_score(final_reward)
949
  all_results[task_id] = {
950
  "final_reward": final_reward,
951
  "step_count": step_num,
952
  }
953
  emit_log(
954
  "END",
955
+ final_reward=round(reported_score, 4),
956
+ score=round(reported_score, 4),
957
  step_count=step_num,
958
  task_id=task_id,
959
  task_name=task["name"],
server/environment.py CHANGED
@@ -754,13 +754,14 @@ class HelpdeskTicketRoutingEnvironment(
754
  )
755
  self._state.last_tool_result = tool_result
756
  investigation_reward = USEFUL_INVESTIGATION_REWARD if useful_investigation else 0.0
 
757
  self._state.last_step_reward = investigation_reward
758
  self._state.reward = investigation_reward
759
  self._state.done = False
760
  self._state.investigation_penalty_applied = self._compute_episode_penalty()
761
  progress = self._tool_progress_for_ticket(current_ticket)
762
  reward_components = self._build_reward_components(
763
- ticket_score=0.0,
764
  field_breakdown={},
765
  shaped_step_reward=investigation_reward,
766
  reward_kind="investigation",
@@ -779,7 +780,7 @@ class HelpdeskTicketRoutingEnvironment(
779
  self._build_history_entry(
780
  current_ticket,
781
  predicted=action.model_dump(exclude_none=True),
782
- score=0.0,
783
  breakdown={},
784
  queue_position=idx + 1,
785
  reward=investigation_reward,
 
754
  )
755
  self._state.last_tool_result = tool_result
756
  investigation_reward = USEFUL_INVESTIGATION_REWARD if useful_investigation else 0.0
757
+ investigation_score = clamp_open_unit_interval(0.0)
758
  self._state.last_step_reward = investigation_reward
759
  self._state.reward = investigation_reward
760
  self._state.done = False
761
  self._state.investigation_penalty_applied = self._compute_episode_penalty()
762
  progress = self._tool_progress_for_ticket(current_ticket)
763
  reward_components = self._build_reward_components(
764
+ ticket_score=investigation_score,
765
  field_breakdown={},
766
  shaped_step_reward=investigation_reward,
767
  reward_kind="investigation",
 
780
  self._build_history_entry(
781
  current_ticket,
782
  predicted=action.model_dump(exclude_none=True),
783
+ score=investigation_score,
784
  breakdown={},
785
  queue_position=idx + 1,
786
  reward=investigation_reward,