Spaces:

creovateHQ
/

BrowserForge

Sleeping

42d1599 about 1 month ago

5.9 kB

	# Copyright (c) Meta Platforms, Inc. and affiliates.
	# All rights reserved.
	#
	# This source code is licensed under the BSD-style license found in the
	# LICENSE file in the root directory of this source tree.

	"""Public OpenEnv models for the Browser RL environment."""

	from typing import Any, Dict, List, Literal, Optional

	from pydantic import BaseModel, Field

	try:
	from openenv.core.env_server.types import Action, Observation
	except Exception: # pragma: no cover - lightweight fallback for local eval/notebook paths
	class Action(BaseModel):
	"""Fallback action base when openenv is unavailable during local eval."""

	class Observation(BaseModel):
	"""Fallback observation base for eval paths that do not need openenv runtime."""

	reward: float = Field(default=0.0)
	done: bool = Field(default=False)
	metadata: Dict[str, Any] = Field(default_factory=dict)


	ActionType = Literal[
	"click",
	"type",
	"clear",
	"select",
	"submit",
	"scroll",
	"noop",
	"ask_oracle",
	]


	FailureReason = Literal[
	"none",
	"success",
	"max_steps_exceeded",
	"invalid_action",
	"browsergym_action_error",
	"wrong_target",
	"wrong_text",
	"submission_failed",
	"task_failed",
	"oracle_budget_exceeded",
	"browser_error",
	"task_validation_failed",
	"repeated_action_loop",
	"too_many_invalid_actions",
	"client_step_cap",
	"low_progress_abort",
	]


	class BrowserElement(BaseModel):
	"""Compact policy-facing element view derived from BrowserGym raw observations."""

	id: str = Field(..., description="Stable BrowserGym BID or environment id")
	role: str = Field(default="", description="Accessibility role when available")
	tag: str = Field(default="", description="HTML tag name when available")
	type: str = Field(default="", description="Input/control type when available")
	text: str = Field(default="", description="Visible text after masking")
	bbox: Dict[str, float] = Field(default_factory=dict, description="Element bounding box")
	visible: bool = Field(default=True, description="Whether the element is visible")
	enabled: bool = Field(default=True, description="Whether the element can be interacted with")
	attributes: Dict[str, Any] = Field(default_factory=dict, description="Allowed unmasked attributes")


	class ConstraintState(BaseModel):
	"""Episode-level resource and behavior constraints."""

	step_budget: int = Field(default=15)
	llm_budget: int = Field(default=3)
	oracle_calls: int = Field(default=0)
	invalid_actions: int = Field(default=0)
	repeated_actions: int = Field(default=0)
	delayed_failures: int = Field(default=0)
	current_difficulty: str = Field(default="easy")
	curriculum_variant_id: str = Field(default="base")


	class RewardBreakdown(BaseModel):
	"""Multi-objective reward components returned on every step.

	The environment exposes both low-level shaping terms and higher-level
	rubric-style channels. The low-level terms are convenient for debugging the
	exact reward arithmetic; the rubric channels are the clearer task-spec view
	we want to monitor during training:

	- `task_completion`: task/browser success signal
	- `action_validity`: penalizes invalid or malformed actions
	- `efficiency`: step cost plus small progress credit and timeout-style penalties
	- `non_repetition`: penalizes repeated ineffective behavior
	- `help_independence`: penalizes oracle / helper reliance
	- `trajectory_quality`: terminal judge quality reward
	"""

	browsergym_reward_scaled: float = Field(default=0.0)
	success_reward: float = Field(default=0.0)
	step_penalty: float = Field(default=0.0)
	progress_reward: float = Field(default=0.0)
	llm_penalty: float = Field(default=0.0)
	mistake_penalty: float = Field(default=0.0)
	repetition_penalty: float = Field(default=0.0)
	judge_quality_reward: float = Field(default=0.0)
	delayed_penalty: float = Field(default=0.0)
	task_completion: float = Field(default=0.0)
	action_validity: float = Field(default=0.0)
	efficiency: float = Field(default=0.0)
	non_repetition: float = Field(default=0.0)
	help_independence: float = Field(default=0.0)
	trajectory_quality: float = Field(default=0.0)
	total: float = Field(default=0.0)


	class BrowserAction(Action):
	"""Structured browser-agent action exposed through OpenEnv."""

	action_type: ActionType = Field(default="noop", description="Browser action primitive")
	target_id: Optional[str] = Field(default=None, description="BrowserGym BID or environment element id")
	text: Optional[str] = Field(default=None, description="Text/select payload")
	scroll_dx: int = Field(default=0, description="Horizontal scroll delta")
	scroll_dy: int = Field(default=0, description="Vertical scroll delta")
	confidence: Optional[float] = Field(default=None, description="Policy confidence for uncertainty tracking")
	reasoning: Optional[str] = Field(default=None, description="Optional rationale for replay and judging")


	class BrowserObservation(Observation):
	"""Compact OpenEnv observation for browser-agent RL."""

	episode_id: str = Field(default="")
	task_id: str = Field(default="")
	task_family: str = Field(default="")
	difficulty: str = Field(default="easy")
	instruction: str = Field(default="")
	url: str = Field(default="")
	step_index: int = Field(default=0)
	max_steps: int = Field(default=15)
	elements: List[BrowserElement] = Field(default_factory=list)
	history: List[Dict[str, Any]] = Field(default_factory=list)
	constraints: ConstraintState = Field(default_factory=ConstraintState)
	reward_breakdown: RewardBreakdown = Field(default_factory=RewardBreakdown)
	success: bool = Field(default=False)
	failure_reason: FailureReason = Field(default="none")