BrowserForge / models.py
cryptodarth's picture
V1
42d1599
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""Public OpenEnv models for the Browser RL environment."""
from typing import Any, Dict, List, Literal, Optional
from pydantic import BaseModel, Field
try:
from openenv.core.env_server.types import Action, Observation
except Exception: # pragma: no cover - lightweight fallback for local eval/notebook paths
class Action(BaseModel):
"""Fallback action base when openenv is unavailable during local eval."""
class Observation(BaseModel):
"""Fallback observation base for eval paths that do not need openenv runtime."""
reward: float = Field(default=0.0)
done: bool = Field(default=False)
metadata: Dict[str, Any] = Field(default_factory=dict)
ActionType = Literal[
"click",
"type",
"clear",
"select",
"submit",
"scroll",
"noop",
"ask_oracle",
]
FailureReason = Literal[
"none",
"success",
"max_steps_exceeded",
"invalid_action",
"browsergym_action_error",
"wrong_target",
"wrong_text",
"submission_failed",
"task_failed",
"oracle_budget_exceeded",
"browser_error",
"task_validation_failed",
"repeated_action_loop",
"too_many_invalid_actions",
"client_step_cap",
"low_progress_abort",
]
class BrowserElement(BaseModel):
"""Compact policy-facing element view derived from BrowserGym raw observations."""
id: str = Field(..., description="Stable BrowserGym BID or environment id")
role: str = Field(default="", description="Accessibility role when available")
tag: str = Field(default="", description="HTML tag name when available")
type: str = Field(default="", description="Input/control type when available")
text: str = Field(default="", description="Visible text after masking")
bbox: Dict[str, float] = Field(default_factory=dict, description="Element bounding box")
visible: bool = Field(default=True, description="Whether the element is visible")
enabled: bool = Field(default=True, description="Whether the element can be interacted with")
attributes: Dict[str, Any] = Field(default_factory=dict, description="Allowed unmasked attributes")
class ConstraintState(BaseModel):
"""Episode-level resource and behavior constraints."""
step_budget: int = Field(default=15)
llm_budget: int = Field(default=3)
oracle_calls: int = Field(default=0)
invalid_actions: int = Field(default=0)
repeated_actions: int = Field(default=0)
delayed_failures: int = Field(default=0)
current_difficulty: str = Field(default="easy")
curriculum_variant_id: str = Field(default="base")
class RewardBreakdown(BaseModel):
"""Multi-objective reward components returned on every step.
The environment exposes both low-level shaping terms and higher-level
rubric-style channels. The low-level terms are convenient for debugging the
exact reward arithmetic; the rubric channels are the clearer task-spec view
we want to monitor during training:
- `task_completion`: task/browser success signal
- `action_validity`: penalizes invalid or malformed actions
- `efficiency`: step cost plus small progress credit and timeout-style penalties
- `non_repetition`: penalizes repeated ineffective behavior
- `help_independence`: penalizes oracle / helper reliance
- `trajectory_quality`: terminal judge quality reward
"""
browsergym_reward_scaled: float = Field(default=0.0)
success_reward: float = Field(default=0.0)
step_penalty: float = Field(default=0.0)
progress_reward: float = Field(default=0.0)
llm_penalty: float = Field(default=0.0)
mistake_penalty: float = Field(default=0.0)
repetition_penalty: float = Field(default=0.0)
judge_quality_reward: float = Field(default=0.0)
delayed_penalty: float = Field(default=0.0)
task_completion: float = Field(default=0.0)
action_validity: float = Field(default=0.0)
efficiency: float = Field(default=0.0)
non_repetition: float = Field(default=0.0)
help_independence: float = Field(default=0.0)
trajectory_quality: float = Field(default=0.0)
total: float = Field(default=0.0)
class BrowserAction(Action):
"""Structured browser-agent action exposed through OpenEnv."""
action_type: ActionType = Field(default="noop", description="Browser action primitive")
target_id: Optional[str] = Field(default=None, description="BrowserGym BID or environment element id")
text: Optional[str] = Field(default=None, description="Text/select payload")
scroll_dx: int = Field(default=0, description="Horizontal scroll delta")
scroll_dy: int = Field(default=0, description="Vertical scroll delta")
confidence: Optional[float] = Field(default=None, description="Policy confidence for uncertainty tracking")
reasoning: Optional[str] = Field(default=None, description="Optional rationale for replay and judging")
class BrowserObservation(Observation):
"""Compact OpenEnv observation for browser-agent RL."""
episode_id: str = Field(default="")
task_id: str = Field(default="")
task_family: str = Field(default="")
difficulty: str = Field(default="easy")
instruction: str = Field(default="")
url: str = Field(default="")
step_index: int = Field(default=0)
max_steps: int = Field(default=15)
elements: List[BrowserElement] = Field(default_factory=list)
history: List[Dict[str, Any]] = Field(default_factory=list)
constraints: ConstraintState = Field(default_factory=ConstraintState)
reward_breakdown: RewardBreakdown = Field(default_factory=RewardBreakdown)
success: bool = Field(default=False)
failure_reason: FailureReason = Field(default="none")