Spaces:
Sleeping
Sleeping
| # Copyright (c) Meta Platforms, Inc. and affiliates. | |
| # All rights reserved. | |
| # | |
| # This source code is licensed under the BSD-style license found in the | |
| # LICENSE file in the root directory of this source tree. | |
| """Public OpenEnv models for the Browser RL environment.""" | |
| from typing import Any, Dict, List, Literal, Optional | |
| from pydantic import BaseModel, Field | |
| try: | |
| from openenv.core.env_server.types import Action, Observation | |
| except Exception: # pragma: no cover - lightweight fallback for local eval/notebook paths | |
| class Action(BaseModel): | |
| """Fallback action base when openenv is unavailable during local eval.""" | |
| class Observation(BaseModel): | |
| """Fallback observation base for eval paths that do not need openenv runtime.""" | |
| reward: float = Field(default=0.0) | |
| done: bool = Field(default=False) | |
| metadata: Dict[str, Any] = Field(default_factory=dict) | |
| ActionType = Literal[ | |
| "click", | |
| "type", | |
| "clear", | |
| "select", | |
| "submit", | |
| "scroll", | |
| "noop", | |
| "ask_oracle", | |
| ] | |
| FailureReason = Literal[ | |
| "none", | |
| "success", | |
| "max_steps_exceeded", | |
| "invalid_action", | |
| "browsergym_action_error", | |
| "wrong_target", | |
| "wrong_text", | |
| "submission_failed", | |
| "task_failed", | |
| "oracle_budget_exceeded", | |
| "browser_error", | |
| "task_validation_failed", | |
| "repeated_action_loop", | |
| "too_many_invalid_actions", | |
| "client_step_cap", | |
| "low_progress_abort", | |
| ] | |
| class BrowserElement(BaseModel): | |
| """Compact policy-facing element view derived from BrowserGym raw observations.""" | |
| id: str = Field(..., description="Stable BrowserGym BID or environment id") | |
| role: str = Field(default="", description="Accessibility role when available") | |
| tag: str = Field(default="", description="HTML tag name when available") | |
| type: str = Field(default="", description="Input/control type when available") | |
| text: str = Field(default="", description="Visible text after masking") | |
| bbox: Dict[str, float] = Field(default_factory=dict, description="Element bounding box") | |
| visible: bool = Field(default=True, description="Whether the element is visible") | |
| enabled: bool = Field(default=True, description="Whether the element can be interacted with") | |
| attributes: Dict[str, Any] = Field(default_factory=dict, description="Allowed unmasked attributes") | |
| class ConstraintState(BaseModel): | |
| """Episode-level resource and behavior constraints.""" | |
| step_budget: int = Field(default=15) | |
| llm_budget: int = Field(default=3) | |
| oracle_calls: int = Field(default=0) | |
| invalid_actions: int = Field(default=0) | |
| repeated_actions: int = Field(default=0) | |
| delayed_failures: int = Field(default=0) | |
| current_difficulty: str = Field(default="easy") | |
| curriculum_variant_id: str = Field(default="base") | |
| class RewardBreakdown(BaseModel): | |
| """Multi-objective reward components returned on every step. | |
| The environment exposes both low-level shaping terms and higher-level | |
| rubric-style channels. The low-level terms are convenient for debugging the | |
| exact reward arithmetic; the rubric channels are the clearer task-spec view | |
| we want to monitor during training: | |
| - `task_completion`: task/browser success signal | |
| - `action_validity`: penalizes invalid or malformed actions | |
| - `efficiency`: step cost plus small progress credit and timeout-style penalties | |
| - `non_repetition`: penalizes repeated ineffective behavior | |
| - `help_independence`: penalizes oracle / helper reliance | |
| - `trajectory_quality`: terminal judge quality reward | |
| """ | |
| browsergym_reward_scaled: float = Field(default=0.0) | |
| success_reward: float = Field(default=0.0) | |
| step_penalty: float = Field(default=0.0) | |
| progress_reward: float = Field(default=0.0) | |
| llm_penalty: float = Field(default=0.0) | |
| mistake_penalty: float = Field(default=0.0) | |
| repetition_penalty: float = Field(default=0.0) | |
| judge_quality_reward: float = Field(default=0.0) | |
| delayed_penalty: float = Field(default=0.0) | |
| task_completion: float = Field(default=0.0) | |
| action_validity: float = Field(default=0.0) | |
| efficiency: float = Field(default=0.0) | |
| non_repetition: float = Field(default=0.0) | |
| help_independence: float = Field(default=0.0) | |
| trajectory_quality: float = Field(default=0.0) | |
| total: float = Field(default=0.0) | |
| class BrowserAction(Action): | |
| """Structured browser-agent action exposed through OpenEnv.""" | |
| action_type: ActionType = Field(default="noop", description="Browser action primitive") | |
| target_id: Optional[str] = Field(default=None, description="BrowserGym BID or environment element id") | |
| text: Optional[str] = Field(default=None, description="Text/select payload") | |
| scroll_dx: int = Field(default=0, description="Horizontal scroll delta") | |
| scroll_dy: int = Field(default=0, description="Vertical scroll delta") | |
| confidence: Optional[float] = Field(default=None, description="Policy confidence for uncertainty tracking") | |
| reasoning: Optional[str] = Field(default=None, description="Optional rationale for replay and judging") | |
| class BrowserObservation(Observation): | |
| """Compact OpenEnv observation for browser-agent RL.""" | |
| episode_id: str = Field(default="") | |
| task_id: str = Field(default="") | |
| task_family: str = Field(default="") | |
| difficulty: str = Field(default="easy") | |
| instruction: str = Field(default="") | |
| url: str = Field(default="") | |
| step_index: int = Field(default=0) | |
| max_steps: int = Field(default=15) | |
| elements: List[BrowserElement] = Field(default_factory=list) | |
| history: List[Dict[str, Any]] = Field(default_factory=list) | |
| constraints: ConstraintState = Field(default_factory=ConstraintState) | |
| reward_breakdown: RewardBreakdown = Field(default_factory=RewardBreakdown) | |
| success: bool = Field(default=False) | |
| failure_reason: FailureReason = Field(default="none") | |