Spaces:
Running
Running
Normalize remaining score fields into open interval
Browse files- inference.py +3 -2
- server/environment.py +3 -2
inference.py
CHANGED
|
@@ -945,14 +945,15 @@ def run() -> None:
|
|
| 945 |
if final_rubric_reward is not None
|
| 946 |
else (task_step_rewards[-1] if task_step_rewards else 0.0)
|
| 947 |
)
|
|
|
|
| 948 |
all_results[task_id] = {
|
| 949 |
"final_reward": final_reward,
|
| 950 |
"step_count": step_num,
|
| 951 |
}
|
| 952 |
emit_log(
|
| 953 |
"END",
|
| 954 |
-
final_reward=round(
|
| 955 |
-
score=round(
|
| 956 |
step_count=step_num,
|
| 957 |
task_id=task_id,
|
| 958 |
task_name=task["name"],
|
|
|
|
| 945 |
if final_rubric_reward is not None
|
| 946 |
else (task_step_rewards[-1] if task_step_rewards else 0.0)
|
| 947 |
)
|
| 948 |
+
reported_score = clamp_reported_score(final_reward)
|
| 949 |
all_results[task_id] = {
|
| 950 |
"final_reward": final_reward,
|
| 951 |
"step_count": step_num,
|
| 952 |
}
|
| 953 |
emit_log(
|
| 954 |
"END",
|
| 955 |
+
final_reward=round(reported_score, 4),
|
| 956 |
+
score=round(reported_score, 4),
|
| 957 |
step_count=step_num,
|
| 958 |
task_id=task_id,
|
| 959 |
task_name=task["name"],
|
server/environment.py
CHANGED
|
@@ -754,13 +754,14 @@ class HelpdeskTicketRoutingEnvironment(
|
|
| 754 |
)
|
| 755 |
self._state.last_tool_result = tool_result
|
| 756 |
investigation_reward = USEFUL_INVESTIGATION_REWARD if useful_investigation else 0.0
|
|
|
|
| 757 |
self._state.last_step_reward = investigation_reward
|
| 758 |
self._state.reward = investigation_reward
|
| 759 |
self._state.done = False
|
| 760 |
self._state.investigation_penalty_applied = self._compute_episode_penalty()
|
| 761 |
progress = self._tool_progress_for_ticket(current_ticket)
|
| 762 |
reward_components = self._build_reward_components(
|
| 763 |
-
ticket_score=
|
| 764 |
field_breakdown={},
|
| 765 |
shaped_step_reward=investigation_reward,
|
| 766 |
reward_kind="investigation",
|
|
@@ -779,7 +780,7 @@ class HelpdeskTicketRoutingEnvironment(
|
|
| 779 |
self._build_history_entry(
|
| 780 |
current_ticket,
|
| 781 |
predicted=action.model_dump(exclude_none=True),
|
| 782 |
-
score=
|
| 783 |
breakdown={},
|
| 784 |
queue_position=idx + 1,
|
| 785 |
reward=investigation_reward,
|
|
|
|
| 754 |
)
|
| 755 |
self._state.last_tool_result = tool_result
|
| 756 |
investigation_reward = USEFUL_INVESTIGATION_REWARD if useful_investigation else 0.0
|
| 757 |
+
investigation_score = clamp_open_unit_interval(0.0)
|
| 758 |
self._state.last_step_reward = investigation_reward
|
| 759 |
self._state.reward = investigation_reward
|
| 760 |
self._state.done = False
|
| 761 |
self._state.investigation_penalty_applied = self._compute_episode_penalty()
|
| 762 |
progress = self._tool_progress_for_ticket(current_ticket)
|
| 763 |
reward_components = self._build_reward_components(
|
| 764 |
+
ticket_score=investigation_score,
|
| 765 |
field_breakdown={},
|
| 766 |
shaped_step_reward=investigation_reward,
|
| 767 |
reward_kind="investigation",
|
|
|
|
| 780 |
self._build_history_entry(
|
| 781 |
current_ticket,
|
| 782 |
predicted=action.model_dump(exclude_none=True),
|
| 783 |
+
score=investigation_score,
|
| 784 |
breakdown={},
|
| 785 |
queue_position=idx + 1,
|
| 786 |
reward=investigation_reward,
|