# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""Data models for the returns decision environment."""

from typing import Any, Dict, Literal, Optional

from openenv.core.env_server.types import Action, Observation
from pydantic import BaseModel, Field, model_validator

ActionType = Literal["APPROVE", "REJECT", "ESCALATE", "REQUEST_INFO"]
RejectReason = Literal["TIME_EXPIRED", "POLICY_VIOLATION", "SUSPECTED_FRAUD"]
ValueBucket = Literal["low", "medium", "high"]


class EcomAction(Action):
    """Action schema for return-request handling."""

    action_type: ActionType = Field(..., description="Decision type")
    reason_code: Optional[RejectReason] = Field(
        default=None,
        description="Required when action_type is REJECT",
    )

    @model_validator(mode="after")
    def validate_reason_code(self) -> "EcomAction":
        if self.action_type == "REJECT" and self.reason_code is None:
            raise ValueError("reason_code is required when action_type is REJECT")
        if self.action_type != "REJECT" and self.reason_code is not None:
            raise ValueError("reason_code is only allowed when action_type is REJECT")
        return self


class EcomObservation(Observation):
    """Observation schema for the partially observable returns task."""

    return_reason: str = Field(..., description="Customer-provided return reason")
    product_category: str = Field(..., description="Product category")
    product_value: ValueBucket = Field(..., description="Value bucket")
    days_since_purchase: int = Field(..., ge=0, description="Elapsed days")
    user_account_age_days: int = Field(..., ge=0, description="Account age in days")
    product_condition_notes: str = Field(..., description="Condition summary")
    return_rate: float = Field(
        ..., ge=0.0, le=1.0, description="Historical return rate"
    )
    total_orders: int = Field(..., ge=1, description="Total historical orders")
    policy_summary: str = Field(
        ...,
        description="Natural-language policy text with rules and exceptions",
    )
    info: Dict[str, Any] = Field(
        default_factory=dict,
        description="Step info payload (OpenEnv-compatible info channel)",
    )


class EcomReward(BaseModel):
    """Typed reward breakdown used by deterministic task graders."""

    policy_gate: float = Field(..., ge=0.0, le=1.0)
    financial_score: float = Field(..., ge=0.0, le=1.0)
    fraud_score: float = Field(..., ge=0.0, le=1.0)
    efficiency_score: float = Field(..., ge=0.0, le=1.0)
    normalized_reward: float = Field(..., ge=0.0, le=1.0)
    policy_violation: bool
    optimal_action: Optional[str] = None
    matched_optimal: Optional[bool] = None