Spaces:

Roopalgn
/

AIHack-ITHelpDesk

Running

Roopalgn commited on Apr 7

Commit

a5859dc

1 Parent(s): c0d489c

Normalize remaining score fields into open interval

Files changed (2) hide show

inference.py CHANGED Viewed

@@ -945,14 +945,15 @@ def run() -> None:
             if final_rubric_reward is not None
             else (task_step_rewards[-1] if task_step_rewards else 0.0)
         )
         all_results[task_id] = {
             "final_reward": final_reward,
             "step_count": step_num,
         }
         emit_log(
             "END",
-            final_reward=round(final_reward, 4),
-            score=round(clamp_reported_score(final_reward), 4),
             step_count=step_num,
             task_id=task_id,
             task_name=task["name"],

             if final_rubric_reward is not None
             else (task_step_rewards[-1] if task_step_rewards else 0.0)
         )
+        reported_score = clamp_reported_score(final_reward)
         all_results[task_id] = {
             "final_reward": final_reward,
             "step_count": step_num,
         }
         emit_log(
             "END",
+            final_reward=round(reported_score, 4),
+            score=round(reported_score, 4),
             step_count=step_num,
             task_id=task_id,
             task_name=task["name"],

server/environment.py CHANGED Viewed

@@ -754,13 +754,14 @@ class HelpdeskTicketRoutingEnvironment(
         )
         self._state.last_tool_result = tool_result
         investigation_reward = USEFUL_INVESTIGATION_REWARD if useful_investigation else 0.0
         self._state.last_step_reward = investigation_reward
         self._state.reward = investigation_reward
         self._state.done = False
         self._state.investigation_penalty_applied = self._compute_episode_penalty()
         progress = self._tool_progress_for_ticket(current_ticket)
         reward_components = self._build_reward_components(
-            ticket_score=0.0,
             field_breakdown={},
             shaped_step_reward=investigation_reward,
             reward_kind="investigation",
@@ -779,7 +780,7 @@ class HelpdeskTicketRoutingEnvironment(
             self._build_history_entry(
                 current_ticket,
                 predicted=action.model_dump(exclude_none=True),
-                score=0.0,
                 breakdown={},
                 queue_position=idx + 1,
                 reward=investigation_reward,

         )
         self._state.last_tool_result = tool_result
         investigation_reward = USEFUL_INVESTIGATION_REWARD if useful_investigation else 0.0
+        investigation_score = clamp_open_unit_interval(0.0)
         self._state.last_step_reward = investigation_reward
         self._state.reward = investigation_reward
         self._state.done = False
         self._state.investigation_penalty_applied = self._compute_episode_penalty()
         progress = self._tool_progress_for_ticket(current_ticket)
         reward_components = self._build_reward_components(
+            ticket_score=investigation_score,
             field_breakdown={},
             shaped_step_reward=investigation_reward,
             reward_kind="investigation",
             self._build_history_entry(
                 current_ticket,
                 predicted=action.model_dump(exclude_none=True),
+                score=investigation_score,
                 breakdown={},
                 queue_position=idx + 1,
                 reward=investigation_reward,