Spaces:
Running
Running
| # scikit-plots/ai Β· _hf_spaces_proxy/app.py v6.0.0 | |
| # | |
| # Thin OpenAI-compatible reverse proxy for sphinx-ai-assistant. | |
| # | |
| # THREE-PATH ROUTING (evaluated in order) | |
| # βββββββββββββββββββββββββββββββββββββββββ | |
| # Path 1 β BACKEND_URL set (explicit custom backend override) | |
| # Forward verbatim to BACKEND_URL. | |
| # HF_TOKEN injected as Bearer token when also set. | |
| # Read timeout: PROXY_TIMEOUT (default 600 s). | |
| # | |
| # Path 2 β Model namespace in HF_SPACES_MODEL_NAMESPACES | |
| # Model owner matches a custom namespace (default: "scikit-plots"). | |
| # Forward to HF_SPACES_MODEL_URL (the scikit-plots/ai-model HF Space). | |
| # CPU inference on a 7B model takes 4-5 minutes. | |
| # Read timeout: PATH2_TIMEOUT (default 600 s). | |
| # | |
| # Path 3 β HF Serverless Inference API (default fallback) | |
| # Model has a registered HF Inference Provider (openai/*, Qwen/*, etc.). | |
| # Build {HF_BASE}/{model}/v1/chat/completions and inject HF_TOKEN. | |
| # Read timeout: PATH3_TIMEOUT (default 120 s). | |
| # | |
| # WHY PER-PATH TIMEOUTS MATTER (root cause of the "network error" in v4) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # v4.0.0 used a single PROXY_TIMEOUT=120 s applied to ALL paths. | |
| # The ai-model Space runs a 7B model on CPU basic hardware. Cold-start | |
| # inference requires ~50 s tokenizer load + ~50 s model load + ~4.5 min | |
| # generation. Every request to the ai-model Space timed out at 120 s and | |
| # the browser reported "Sorry, something went wrong: network error". | |
| # v5.0.0 fixes this by: | |
| # 1. Raising DEFAULT_PROXY_TIMEOUT from 120 to 600 s. | |
| # 2. Adding per-path timeouts so Path 3 (fast GPU) stays at 120 s | |
| # while Path 2 (slow CPU) gets the full 600 s. | |
| # 3. Using httpx per-request timeouts so a single shared client | |
| # serves both fast and slow paths without interference. | |
| # | |
| # ENVIRONMENT VARIABLES (Space β Settings β Repository secrets) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # HF_TOKEN Required for Path 3; optional for Path 2. | |
| # HF_SPACES_MODEL_URL Path 2 destination URL. | |
| # Default: https://scikit-plots-ai-model.hf.space/v1/chat/completions | |
| # HF_SPACES_MODEL_NAMESPACES Comma-separated owner namespaces for Path 2. | |
| # Default: scikit-plots | |
| # BACKEND_URL Path 1 override (all requests go here when set). | |
| # HF_BASE HF Serverless API base URL. | |
| # Default: https://router.huggingface.co | |
| # DEFAULT_MODEL Fallback model when request body omits "model". | |
| # Default: scikit-plots/Qwen2.5-Coder-32B-Instruct | |
| # PROXY_TIMEOUT Path 1 read timeout in seconds. Default: 600. | |
| # PATH2_TIMEOUT Path 2 read timeout in seconds. Default: 600. | |
| # PATH3_TIMEOUT Path 3 read timeout in seconds. Default: 120. | |
| # PROXY_CONNECT_TIMEOUT TCP handshake timeout. Default: 10. | |
| # PROXY_WRITE_TIMEOUT Request body upload timeout. Default: 30. | |
| # PROXY_POOL_TIMEOUT Connection pool acquire timeout. Default: 10. | |
| # ALLOWED_ORIGINS Comma-separated CORS origins. Default: *. | |
| # MAX_BODY_BYTES Maximum accepted body size. Default: 10485760. | |
| # | |
| # Authors: The scikit-plots developers | |
| # SPDX-License-Identifier: BSD-3-Clause | |
| """ | |
| FastAPI reverse proxy for sphinx-ai-assistant (scikit-plots/ai HF Space). | |
| Routes browser POST requests through three ordered paths with independent | |
| per-path read timeouts: | |
| * **Path 1** β ``BACKEND_URL`` set: explicit custom backend. | |
| * **Path 2** β Model namespace in ``HF_SPACES_MODEL_NAMESPACES``: | |
| forward to ``HF_SPACES_MODEL_URL`` (the ``scikit-plots/ai-model`` Space, | |
| CPU inference β read timeout 600 s by default). | |
| * **Path 3** β Default: HF Serverless Inference API (GPU, read timeout 120 s). | |
| Notes | |
| ----- | |
| Developer note β per-path timeouts | |
| ``_resolve_upstream_url`` returns ``(url, headers, read_timeout_s)``. | |
| ``_forward`` builds an ``httpx.Timeout`` from *read_timeout_s* and | |
| the shared connect/write/pool values, then passes it **per-request** | |
| so the shared client never imposes a global ceiling. This means | |
| concurrent slow (Path 2) and fast (Path 3) requests never block each | |
| other. | |
| Developer note β shared HTTP client | |
| A single :class:`httpx.AsyncClient` is created during lifespan and | |
| shared across all requests. It is created with ``timeout=None`` so | |
| all timeout control lives in each request's own ``httpx.Timeout`` | |
| object. Streaming uses ``client.stream()`` which closes the response | |
| body (not the client) on context exit, so concurrent SSE requests are | |
| safe. | |
| Developer note β explicit error handling | |
| ``_forward`` catches ``httpx.ReadTimeout``, ``httpx.ConnectTimeout``, | |
| and ``httpx.RequestError`` individually and returns meaningful JSON | |
| errors with appropriate HTTP status codes so the browser widget can | |
| display a useful message instead of a generic "network error". | |
| """ | |
| from __future__ import annotations | |
| import asyncio | |
| import json | |
| import logging | |
| import os | |
| import uuid | |
| from collections.abc import AsyncGenerator | |
| from contextlib import asynccontextmanager | |
| from typing import Any | |
| import httpx | |
| from fastapi import Depends, FastAPI, HTTPException, Request | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from fastapi.responses import JSONResponse, Response, StreamingResponse | |
| # _shared_logic.py must live alongside this file. | |
| try: | |
| from _shared_logic import ( # type: ignore[import] | |
| DEFAULT_HF_BASE, | |
| DEFAULT_HF_SPACES_MODEL_NAMESPACES, | |
| DEFAULT_HF_SPACES_MODEL_URL, | |
| DEFAULT_MAX_BODY_BYTES, | |
| DEFAULT_MODEL, | |
| DEFAULT_PATH2_READ_TIMEOUT, | |
| DEFAULT_PATH3_READ_TIMEOUT, | |
| DEFAULT_PROXY_TIMEOUT, | |
| PROXY_VERSION, | |
| _resolve_upstream_url, | |
| _safe_float, | |
| _safe_int, | |
| _token_log_fragment, | |
| _validate_env, | |
| ) | |
| except ImportError: | |
| from .._shared_logic import ( # type: ignore[import] | |
| DEFAULT_HF_BASE, | |
| DEFAULT_HF_SPACES_MODEL_NAMESPACES, | |
| DEFAULT_HF_SPACES_MODEL_URL, | |
| DEFAULT_MAX_BODY_BYTES, | |
| DEFAULT_MODEL, | |
| DEFAULT_PATH2_READ_TIMEOUT, | |
| DEFAULT_PATH3_READ_TIMEOUT, | |
| DEFAULT_PROXY_TIMEOUT, | |
| PROXY_VERSION, | |
| _resolve_upstream_url, | |
| _safe_float, | |
| _safe_int, | |
| _token_log_fragment, | |
| _validate_env, | |
| ) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Helpers β placed before configuration so they are available at module scope | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _client_ip(request: Request) -> str: | |
| """Extract the real client IP from the request. | |
| Parameters | |
| ---------- | |
| request : fastapi.Request | |
| Incoming HTTP request. | |
| Returns | |
| ------- | |
| str | |
| Best-effort client IP string; ``"unknown"`` when unavailable. | |
| Notes | |
| ----- | |
| Developer: HF Spaces sits behind a proxy so ``request.client.host`` | |
| is the proxy IP, not the user IP. ``X-Forwarded-For`` is the correct | |
| source β take the FIRST value only (leftmost = original client; | |
| rightmost values can be spoofed by intermediaries). | |
| """ | |
| xff = request.headers.get("x-forwarded-for", "") | |
| if xff: | |
| return xff.split(",")[0].strip() | |
| if request.client: | |
| return request.client.host or "unknown" | |
| return "unknown" | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Logging | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class _StructuredFormatter(logging.Formatter): | |
| """Emit one JSON object per log record to stdout. | |
| Parameters | |
| ---------- | |
| *args, **kwargs | |
| Forwarded to :class:`logging.Formatter`. | |
| Notes | |
| ----- | |
| Developer: JSON format is required for machine-parseable log ingestion | |
| (HF Spaces log export, Datadog, etc.). Text-format lines require regex | |
| in log queries; JSON fields are natively queryable. | |
| The ``exc_info`` key is omitted entirely when no exception is attached | |
| so log consumers do not need to handle a null field on every record. | |
| """ | |
| def format(self, record: logging.LogRecord) -> str: # noqa: A003 | |
| payload: dict = { | |
| "ts": self.formatTime(record, datefmt="%Y-%m-%dT%H:%M:%S"), | |
| "level": record.levelname, | |
| "logger": record.name, | |
| "event": record.getMessage(), | |
| } | |
| if record.exc_info: | |
| payload["exc_info"] = self.formatException(record.exc_info) | |
| return json.dumps(payload, ensure_ascii=False) | |
| _handler = logging.StreamHandler() | |
| _handler.setFormatter(_StructuredFormatter()) | |
| logging.root.handlers = [_handler] | |
| logging.root.setLevel(logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Configuration β read once at module import, never at request time | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| #: Explicit custom backend URL (Path 1). | |
| BACKEND_URL: str = os.environ.get("BACKEND_URL", "").strip() | |
| #: HuggingFace API token. Required for Path 3; optional for Path 2. | |
| HF_TOKEN: str = os.environ.get("HF_TOKEN", "").strip() | |
| #: HF Serverless Inference API base URL (no trailing slash). | |
| HF_BASE: str = os.environ.get("HF_BASE", DEFAULT_HF_BASE).rstrip("/") | |
| #: Fallback model when request body omits ``model``. | |
| DEFAULT_MODEL = os.environ.get("DEFAULT_MODEL", DEFAULT_MODEL).strip() or DEFAULT_MODEL | |
| #: Path 2 destination URL β the custom ai-model HF Space. | |
| HF_SPACES_MODEL_URL: str = os.environ.get( | |
| "HF_SPACES_MODEL_URL", DEFAULT_HF_SPACES_MODEL_URL | |
| ).strip() | |
| #: Parsed model owner namespaces routed to HF_SPACES_MODEL_URL (Path 2). | |
| _raw_namespaces: str = os.environ.get( | |
| "HF_SPACES_MODEL_NAMESPACES", | |
| ",".join(DEFAULT_HF_SPACES_MODEL_NAMESPACES), | |
| ) | |
| HF_SPACES_MODEL_NAMESPACES: tuple[str, ...] = ( | |
| tuple(ns.strip() for ns in _raw_namespaces.split(",") if ns.strip()) | |
| or DEFAULT_HF_SPACES_MODEL_NAMESPACES | |
| ) | |
| #: Maximum accepted request body size (bytes). | |
| MAX_BODY_BYTES: int = _safe_int( | |
| os.environ.get("MAX_BODY_BYTES"), | |
| DEFAULT_MAX_BODY_BYTES, | |
| ) | |
| # ββ Per-path read timeouts ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| #: Path 1 (BACKEND_URL) read timeout in seconds. | |
| _proxy_timeout_secs: float = float( | |
| _safe_int( | |
| os.environ.get("PROXY_TIMEOUT"), | |
| DEFAULT_PROXY_TIMEOUT, | |
| ) | |
| ) | |
| #: Path 2 (ai-model Space, CPU inference) read timeout in seconds. | |
| #: Default 600 s β covers 4-5 min CPU inference with 1 min headroom. | |
| _path2_timeout_secs: float = _safe_float( | |
| os.environ.get("PATH2_TIMEOUT"), | |
| DEFAULT_PATH2_READ_TIMEOUT, | |
| ) | |
| #: Path 3 (HF Serverless API, GPU) read timeout in seconds. | |
| #: Default 120 s β generous margin for GPU-backed inference (30-90 s typical). | |
| _path3_timeout_secs: float = _safe_float( | |
| os.environ.get("PATH3_TIMEOUT"), | |
| DEFAULT_PATH3_READ_TIMEOUT, | |
| ) | |
| # ββ Shared phase timeouts (apply to all paths) ββββββββββββββββββββββββββββββββ | |
| #: TCP handshake timeout in seconds. | |
| _connect_timeout_secs: float = float(os.environ.get("PROXY_CONNECT_TIMEOUT", "10")) | |
| #: Request body upload timeout in seconds. | |
| _write_timeout_secs: float = float(os.environ.get("PROXY_WRITE_TIMEOUT", "30")) | |
| #: Connection pool acquire timeout in seconds. | |
| _pool_timeout_secs: float = float(os.environ.get("PROXY_POOL_TIMEOUT", "10")) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # CORS | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| _raw_origins: str = os.environ.get("ALLOWED_ORIGINS", "*").strip() | |
| _allowed_origins: list[str] = ( | |
| ["*"] | |
| if _raw_origins == "*" | |
| else [o.strip() for o in _raw_origins.split(",") if o.strip()] | |
| ) | |
| #: HuggingFace Dataset repo for training contributions. | |
| #: Must be set if POST /v1/contribute is expected to succeed. | |
| TRAINING_DATASET_REPO: str = os.environ.get("TRAINING_DATASET_REPO", "").strip() | |
| #: Current consent version string. Must match the JS constant _TRAINING_CONSENT_VERSION. | |
| #: Increment when the consent UI text changes materially. | |
| TRAINING_CONSENT_VERSION: str = "v1.0" | |
| #: Maximum records per contribution POST. | |
| MAX_CONTRIBUTION_RECORDS: int = 100 | |
| #: In-memory per-IP rate-limit store for contribution endpoint. | |
| #: Keys: IP string. Values: (count, window_start_timestamp). | |
| _contrib_rl: dict[str, tuple[int, float]] = {} | |
| _contrib_rl_lock = asyncio.Lock() | |
| #: In-memory per-IP rate-limit store for share endpoint. | |
| _share_rl: dict[str, tuple[int, float]] = {} | |
| _share_rl_lock = asyncio.Lock() | |
| #: In-memory conversation share store. | |
| #: Keys: UUID hex string. Values: share metadata dict including ``expiresAt_ts``. | |
| #: Ephemeral β clears on process restart. Stale entries are evicted lazily on write. | |
| _share_store: dict[str, dict] = {} | |
| _share_store_lock = asyncio.Lock() | |
| #: In-memory per-IP rate-limit store for feedback endpoint. | |
| _feedback_rl: dict[str, tuple[int, float]] = {} | |
| _feedback_rl_lock = asyncio.Lock() | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Startup validation β fail fast with actionable messages | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| _validate_env(BACKEND_URL, HF_TOKEN, HF_SPACES_MODEL_URL) | |
| if not BACKEND_URL and not HF_TOKEN: | |
| logger.warning( | |
| "HF_TOKEN is not set. Requests to standard HF Inference API models " | |
| "(e.g. openai/gpt-oss-20b, Qwen/*) will fail with 401 Unauthorized. " | |
| "Only models in namespaces %s will be served via %s.", | |
| list(HF_SPACES_MODEL_NAMESPACES), | |
| HF_SPACES_MODEL_URL or "<HF_SPACES_MODEL_URL not set>", | |
| ) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Shared HTTP client β lifecycle managed by FastAPI lifespan | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| #: Module-level reference to the shared httpx client. | |
| #: Created with ``timeout=None`` so all timeout control is per-request. | |
| _http_client: httpx.AsyncClient | None = None | |
| async def _lifespan(app: FastAPI) -> AsyncGenerator[None, None]: | |
| """ | |
| Create and close the shared HTTP client on application startup / shutdown. | |
| Parameters | |
| ---------- | |
| app : FastAPI | |
| The FastAPI application instance. | |
| Notes | |
| ----- | |
| **Developer note** β The client is created with ``timeout=None`` so | |
| that every request supplies its own :class:`httpx.Timeout` object. | |
| This allows concurrent Path 2 requests (600 s) and Path 3 requests | |
| (120 s) to coexist on the same client without either blocking the other. | |
| """ | |
| global _http_client # noqa: PLW0603 | |
| _http_client = httpx.AsyncClient() | |
| logger.info( | |
| "Proxy v%s started. HTTP client ready (timeout=per-request).", | |
| PROXY_VERSION, | |
| ) | |
| logger.info( | |
| "Routing: backend_url=%r | hf_spaces_model_url=%r | " | |
| "hf_spaces_namespaces=%r | hf_token=%s | default_model=%r", | |
| BACKEND_URL or None, | |
| HF_SPACES_MODEL_URL or None, | |
| list(HF_SPACES_MODEL_NAMESPACES), | |
| _token_log_fragment(HF_TOKEN), | |
| DEFAULT_MODEL, | |
| ) | |
| logger.info( | |
| "Timeouts (seconds): path1=%s | path2=%s | path3=%s | " | |
| "connect=%s | write=%s | pool=%s", | |
| _proxy_timeout_secs, | |
| _path2_timeout_secs, | |
| _path3_timeout_secs, | |
| _connect_timeout_secs, | |
| _write_timeout_secs, | |
| _pool_timeout_secs, | |
| ) | |
| try: | |
| yield | |
| finally: | |
| await _http_client.aclose() | |
| _http_client = None | |
| logger.info("Proxy shutdown. HTTP client closed.") | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Application | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| app = FastAPI( | |
| title="sphinx-ai-assistant proxy", | |
| description=( | |
| "Thin OpenAI-compatible reverse proxy for sphinx-ai-assistant. " | |
| "Routes to HF Serverless Inference API, a custom ai-model Space, " | |
| "or an explicit backend URL based on the model namespace." | |
| ), | |
| version=PROXY_VERSION, | |
| lifespan=_lifespan, | |
| docs_url=None, | |
| redoc_url=None, | |
| ) | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=_allowed_origins, | |
| allow_methods=["GET", "POST", "OPTIONS"], | |
| # Authorization added for write endpoints (POST /v1/feedback, POST /v1/contribute) | |
| # that validate a Bearer token. Without this the browser preflight rejects | |
| # requests containing Authorization headers before the handler runs. | |
| allow_headers=["Content-Type", "Authorization"], | |
| allow_credentials=False, | |
| ) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Helpers | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _resolve_url(body: bytes) -> tuple[str, dict[str, str], float]: | |
| """ | |
| Thin wrapper around :func:`_resolve_upstream_url`. | |
| Closes over module-level config globals so route handlers need not pass | |
| environment variables explicitly. | |
| Parameters | |
| ---------- | |
| body : bytes | |
| Raw JSON request body from the browser. | |
| Returns | |
| ------- | |
| url : str | |
| Fully-qualified upstream endpoint URL. | |
| headers : dict[str, str] | |
| HTTP headers for the upstream POST request. | |
| read_timeout_s : float | |
| Per-path read timeout in seconds. | |
| See Also | |
| -------- | |
| _shared_logic._resolve_upstream_url : Full three-path routing logic. | |
| """ | |
| return _resolve_upstream_url( | |
| body, | |
| backend_url=BACKEND_URL, | |
| hf_token=HF_TOKEN, | |
| hf_base=HF_BASE, | |
| default_model=DEFAULT_MODEL, | |
| hf_spaces_model_url=HF_SPACES_MODEL_URL, | |
| hf_spaces_model_namespaces=HF_SPACES_MODEL_NAMESPACES, | |
| proxy_timeout=_proxy_timeout_secs, | |
| path2_read_timeout=_path2_timeout_secs, | |
| path3_read_timeout=_path3_timeout_secs, | |
| ) | |
| def _make_timeout(read_s: float) -> httpx.Timeout: | |
| """ | |
| Build a per-request :class:`httpx.Timeout` with the given read timeout. | |
| Parameters | |
| ---------- | |
| read_s : float | |
| Read timeout in seconds for this specific request. | |
| Returns | |
| ------- | |
| httpx.Timeout | |
| Fully specified timeout with connect, read, write, and pool phases. | |
| Notes | |
| ----- | |
| **Developer note** β connect, write, and pool timeouts are shared | |
| across all paths because they do not vary by inference speed. Only | |
| the read timeout varies: long (600 s) for CPU inference (Path 2), | |
| short (120 s) for GPU inference (Path 3). | |
| """ | |
| return httpx.Timeout( | |
| connect=_connect_timeout_secs, | |
| read=read_s, | |
| write=_write_timeout_secs, | |
| pool=_pool_timeout_secs, | |
| ) | |
| async def _validated_body(request: Request) -> bytes: | |
| """ | |
| FastAPI dependency: read and validate the request body size. | |
| Parameters | |
| ---------- | |
| request : Request | |
| The incoming FastAPI request. | |
| Returns | |
| ------- | |
| bytes | |
| The raw request body. | |
| Raises | |
| ------ | |
| HTTPException | |
| HTTP 413 when the body exceeds :data:`MAX_BODY_BYTES`. | |
| """ | |
| cl = _safe_int(request.headers.get("content-length"), -1) | |
| if cl > MAX_BODY_BYTES: | |
| raise HTTPException( | |
| status_code=413, | |
| detail=( | |
| f"Request body too large (Content-Length: {cl:,} bytes). " | |
| f"Maximum allowed: {MAX_BODY_BYTES:,} bytes." | |
| ), | |
| ) | |
| body: bytes = await request.body() | |
| if len(body) > MAX_BODY_BYTES: | |
| raise HTTPException( | |
| status_code=413, | |
| detail=( | |
| f"Request body too large ({len(body):,} bytes). " | |
| f"Maximum allowed: {MAX_BODY_BYTES:,} bytes." | |
| ), | |
| ) | |
| return body | |
| async def _forward(body: bytes) -> Response: | |
| """ | |
| Forward *body* to the resolved upstream and return the response. | |
| Handles both non-streaming (JSON) and streaming (SSE) responses | |
| transparently by detecting ``"stream": true`` in the request body. | |
| Per-path timeouts are applied at the individual request level so slow | |
| CPU (Path 2) and fast GPU (Path 3) requests do not interfere with each | |
| other on the shared HTTP client. | |
| Parameters | |
| ---------- | |
| body : bytes | |
| Raw JSON request body from the browser. | |
| Returns | |
| ------- | |
| fastapi.Response | |
| Upstream response with original status code and content-type. | |
| SSE streaming is preserved via :class:`~fastapi.responses.StreamingResponse`. | |
| Notes | |
| ----- | |
| **Developer note** β Error handling is explicit and specific: | |
| * ``httpx.ReadTimeout`` β HTTP 504 with actionable timeout message. | |
| * ``httpx.ConnectTimeout`` β HTTP 504 with connect-specific message. | |
| * ``httpx.RequestError`` β HTTP 502 Bad Gateway. | |
| These map to the correct HTTP semantics and allow the browser widget | |
| to display a useful message rather than a generic "network error". | |
| **Developer note** β SSE error events include a UUID so log aggregators | |
| can correlate browser-visible errors to specific upstream failure events. | |
| """ | |
| if _http_client is None: | |
| raise RuntimeError( | |
| "HTTP client is not initialised. " | |
| "FastAPI lifespan may not have started correctly." | |
| ) | |
| url, headers, read_timeout_s = _resolve_url(body) | |
| req_timeout = _make_timeout(read_timeout_s) | |
| # Detect streaming intent before opening the upstream connection. | |
| stream_requested: bool = False | |
| try: | |
| payload: Any = json.loads(body) | |
| stream_requested = bool(payload.get("stream", False)) | |
| except (json.JSONDecodeError, ValueError, AttributeError, TypeError): | |
| pass | |
| if stream_requested: | |
| async def _sse_chunks() -> AsyncGenerator[bytes, None]: | |
| """Async generator proxying upstream SSE frames to the browser.""" | |
| try: | |
| async with _http_client.stream( # type: ignore[union-attr] | |
| "POST", url, content=body, headers=headers, timeout=req_timeout | |
| ) as upstream: | |
| if upstream.status_code != 200: # noqa: PLR2004 | |
| err_body = await upstream.aread() | |
| error_payload = json.dumps( | |
| { | |
| "id": f"err-{uuid.uuid4().hex}", | |
| "error": { | |
| "status": upstream.status_code, | |
| "message": err_body.decode(errors="replace")[:500], | |
| }, | |
| } | |
| ) | |
| yield f"data: {error_payload}\n\n".encode() | |
| else: | |
| async for chunk in upstream.aiter_bytes(): | |
| yield chunk | |
| except httpx.ReadTimeout: | |
| err_id = uuid.uuid4().hex | |
| logger.warning( | |
| "ReadTimeout after %.0f s on streaming request to %s [%s]", | |
| read_timeout_s, | |
| url, | |
| err_id, | |
| ) | |
| yield f'data: {{"id":"err-{err_id}","error":{{"status":504,"message":' | |
| yield ( | |
| f'"Upstream timed out after {read_timeout_s:.0f} s. ' | |
| f"CPU inference can take 4-5 minutes. " | |
| f'If using the ai-model Space, the model may still be loading."}}}}\n\n' | |
| ).encode() | |
| except httpx.ConnectTimeout: | |
| err_id = uuid.uuid4().hex | |
| logger.warning( | |
| "ConnectTimeout on streaming request to %s [%s]", url, err_id | |
| ) | |
| yield ( | |
| f'data: {{"id":"err-{err_id}","error":{{"status":504,"message":' | |
| f'"Connection timed out reaching {url}. ' | |
| f'The HF Space may be starting up."}}}}\n\n' | |
| ).encode() | |
| except httpx.RequestError as exc: | |
| err_id = uuid.uuid4().hex | |
| logger.warning( | |
| "RequestError on streaming request to %s: %s [%s]", | |
| url, | |
| exc, | |
| err_id, | |
| ) | |
| yield ( | |
| f'data: {{"id":"err-{err_id}","error":{{"status":502,"message":' | |
| f'"Failed to reach upstream: {type(exc).__name__}"}}}}\n\n' | |
| ).encode() | |
| return StreamingResponse( | |
| _sse_chunks(), | |
| status_code=200, | |
| media_type="text/event-stream", | |
| headers={ | |
| "Cache-Control": "no-cache", | |
| "X-Accel-Buffering": "no", | |
| }, | |
| ) | |
| # Non-streaming path: await the full upstream response. | |
| try: | |
| upstream = await _http_client.post( | |
| url, content=body, headers=headers, timeout=req_timeout | |
| ) | |
| except httpx.ReadTimeout: | |
| logger.warning( | |
| "ReadTimeout after %.0f s on non-streaming request to %s", | |
| read_timeout_s, | |
| url, | |
| ) | |
| return JSONResponse( | |
| status_code=504, | |
| content={ | |
| "error": { | |
| "type": "timeout_error", | |
| "message": ( | |
| f"Upstream timed out after {read_timeout_s:.0f} s. " | |
| "CPU inference on the ai-model Space can take 4-5 minutes. " | |
| "The model may still be loading β retry in a few minutes." | |
| ), | |
| } | |
| }, | |
| ) | |
| except httpx.ConnectTimeout: | |
| logger.warning("ConnectTimeout on non-streaming request to %s", url) | |
| return JSONResponse( | |
| status_code=504, | |
| content={ | |
| "error": { | |
| "type": "timeout_error", | |
| "message": ( | |
| f"Connection timed out reaching {url}. " | |
| "The HF Space may be cold-starting β retry in 30 seconds." | |
| ), | |
| } | |
| }, | |
| ) | |
| except httpx.RequestError as exc: | |
| logger.warning("RequestError on non-streaming request to %s: %s", url, exc) | |
| return JSONResponse( | |
| status_code=502, | |
| content={ | |
| "error": { | |
| "type": "upstream_error", | |
| "message": f"Failed to reach upstream: {type(exc).__name__}", | |
| } | |
| }, | |
| ) | |
| return Response( | |
| content=upstream.content, | |
| status_code=upstream.status_code, | |
| media_type=upstream.headers.get("content-type", "application/json"), | |
| ) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Routes | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| async def root() -> JSONResponse: | |
| """ | |
| Human-readable status page and HF Space health-check handler. | |
| Returns | |
| ------- | |
| JSONResponse | |
| HTTP 200 with service status and the active routing configuration. | |
| Notes | |
| ----- | |
| **User note** β The ``timeouts`` field shows read timeouts in seconds | |
| per path. ``path2`` corresponds to the ai-model Space (CPU inference, | |
| default 600 s). ``path3`` corresponds to the HF Serverless API (GPU, | |
| default 120 s). | |
| """ | |
| return JSONResponse( | |
| { | |
| "status": "ok", | |
| "service": f"sphinx-ai-assistant proxy v{PROXY_VERSION}", | |
| "routing": { | |
| "path_1_backend_url": BACKEND_URL or None, | |
| "path_2_model_space_url": HF_SPACES_MODEL_URL or None, | |
| "path_2_namespaces": list(HF_SPACES_MODEL_NAMESPACES), | |
| "path_3_hf_api_base": HF_BASE, | |
| "path_3_hf_token_set": bool(HF_TOKEN), | |
| }, | |
| "timeouts": { | |
| "path1_s": _proxy_timeout_secs, | |
| "path2_s": _path2_timeout_secs, | |
| "path3_s": _path3_timeout_secs, | |
| "connect_s": _connect_timeout_secs, | |
| "write_s": _write_timeout_secs, | |
| }, | |
| "cors_origins": _allowed_origins, | |
| "endpoints": { | |
| "chat": "POST /v1/chat/completions (primary)", | |
| "share": "POST /v1/share (conversation share)", | |
| "share_get": "GET /v1/share/{uuid} (retrieve shared snapshot)", | |
| "feedback": "POST /v1/feedback (rating persistence)", | |
| "training": "POST /v1/contribute (GDPR-gated training data)", | |
| "alias": "POST / (path-agnostic alias)", | |
| "health": "GET /health (liveness probe)", | |
| }, | |
| } | |
| ) | |
| async def health() -> JSONResponse: | |
| """ | |
| Minimal liveness probe for container orchestrators and uptime monitors. | |
| Returns | |
| ------- | |
| JSONResponse | |
| Always HTTP 200 while the process is running. | |
| """ | |
| return JSONResponse({"status": "ok", "version": PROXY_VERSION}) | |
| async def chat_completions(body: bytes = Depends(_validated_body)) -> Response: | |
| """ | |
| Primary proxy endpoint β OpenAI-compatible ``/v1/chat/completions``. | |
| Parameters | |
| ---------- | |
| body : bytes | |
| Raw request body, pre-validated by :func:`_validated_body`. | |
| Returns | |
| ------- | |
| fastapi.Response | |
| Upstream response. SSE streaming preserved when ``"stream": true``. | |
| Notes | |
| ----- | |
| **User note** β Set ``endpoint`` in ``conf.py`` to:: | |
| "https://scikit-plots-ai.hf.space/v1/chat/completions" | |
| **User note** β Model routing: | |
| * ``scikit-plots/Qwen2.5-Coder-7B-Instruct`` β ai-model Space (Path 2, | |
| CPU inference, up to 5 minutes per response). | |
| * ``openai/gpt-oss-20b``, ``Qwen/Qwen2.5-Coder-7B-Instruct`` β | |
| HF Serverless Inference API (Path 3, GPU, typically 30-90 s). | |
| See Also | |
| -------- | |
| chat_completions_alias : ``POST /`` path-agnostic alias. | |
| """ | |
| return await _forward(body) | |
| async def chat_completions_alias(body: bytes = Depends(_validated_body)) -> Response: | |
| """ | |
| Path-agnostic alias: ``POST /`` β identical to ``POST /v1/chat/completions``. | |
| Parameters | |
| ---------- | |
| body : bytes | |
| Raw request body, pre-validated by :func:`_validated_body`. | |
| Returns | |
| ------- | |
| fastapi.Response | |
| Identical to :func:`chat_completions`. | |
| Notes | |
| ----- | |
| **User note** β Prefer the explicit ``/v1/chat/completions`` path. | |
| This alias handles ``conf.py`` configurations that set ``endpoint`` | |
| to the bare Space URL without the path suffix. | |
| """ | |
| return await _forward(body) | |
| async def contribute(request: Request) -> JSONResponse: | |
| """Accept a training data contribution from the AI assistant browser widget. | |
| Parameters | |
| ---------- | |
| request : fastapi.Request | |
| HTTP request. Body must be JSON conforming to the contribution schema. | |
| Returns | |
| ------- | |
| fastapi.responses.JSONResponse | |
| ``{"contributed": true, "rows": N}`` on success. | |
| Raises | |
| ------ | |
| fastapi.HTTPException | |
| 422 when consent is absent/false, schemaVersion is unsupported, or | |
| records exceed the maximum allowed count. | |
| 429 when the IP rate limit is exceeded. | |
| 503 when the HF Dataset push fails. | |
| Notes | |
| ----- | |
| Developer: ``consentFlag`` and ``consentVersion`` are checked before any | |
| other validation. A missing or mismatched consent version is a hard | |
| rejection β the UI must always send the current version string so that | |
| old cached pages cannot submit records that would silently bypass a | |
| consent-text update. | |
| Developer: The in-memory rate-limit store (``_contrib_rl``) is per-process | |
| only. HF Spaces may run multiple replicas. The limit (5 per hour) is | |
| intentionally loose; the primary defence is the GDPR consent gate. | |
| Developer: ``huggingface_hub.HfApi.commit`` is called synchronously inside | |
| an async handler. This blocks the event loop for the duration of the HTTP | |
| round-trip to HF (~200 ms on a warm connection). For the current traffic | |
| level this is acceptable; if throughput grows, wrap in | |
| ``asyncio.get_event_loop().run_in_executor(None, ...)`` instead. | |
| """ | |
| import time as _time | |
| # Body size guard | |
| raw = await request.body() | |
| if len(raw) > DEFAULT_MAX_BODY_BYTES: | |
| raise HTTPException(status_code=413, detail="Payload too large.") | |
| try: | |
| payload = json.loads(raw) | |
| except json.JSONDecodeError as exc: | |
| raise HTTPException(status_code=400, detail=f"Invalid JSON: {exc}") from exc | |
| # Consent guard β GDPR Article 7: explicit consent required | |
| if not payload.get("consentFlag"): | |
| raise HTTPException( | |
| status_code=422, | |
| detail="consentFlag must be true. Contribution requires explicit user consent.", | |
| ) | |
| if payload.get("consentVersion") != TRAINING_CONSENT_VERSION: | |
| raise HTTPException( | |
| status_code=422, | |
| detail=( | |
| f"consentVersion {payload.get('consentVersion')!r} is not current. " | |
| f"Expected {TRAINING_CONSENT_VERSION!r}. Reload the page and try again." | |
| ), | |
| ) | |
| # Schema version guard | |
| supported_versions: frozenset[int] = frozenset({1}) | |
| if payload.get("schemaVersion") not in supported_versions: | |
| raise HTTPException( | |
| status_code=422, | |
| detail=( | |
| f"Unsupported schemaVersion {payload.get('schemaVersion')!r}. " | |
| f"Supported: {sorted(supported_versions)}" | |
| ), | |
| ) | |
| records = payload.get("records", []) | |
| if not isinstance(records, list): | |
| raise HTTPException(status_code=422, detail="records must be a list.") | |
| if len(records) > MAX_CONTRIBUTION_RECORDS: | |
| raise HTTPException( | |
| status_code=422, | |
| detail=f"Too many records. Maximum {MAX_CONTRIBUTION_RECORDS} per request.", | |
| ) | |
| # Per-IP rate limit: 5 contributions per hour | |
| client_ip = _client_ip(request) | |
| async with _contrib_rl_lock: | |
| now = _time.time() | |
| count, window_start = _contrib_rl.get(client_ip, (0, now)) | |
| if now - window_start > 3600: | |
| count, window_start = 0, now | |
| count += 1 | |
| _contrib_rl[client_ip] = (count, window_start) | |
| if count > 5: | |
| logger.warning(json.dumps({"event": "contribute.ratelimit", "ip": client_ip})) | |
| raise HTTPException( | |
| status_code=429, | |
| detail="Rate limit exceeded. Maximum 5 contributions per hour.", | |
| headers={"Retry-After": "3600"}, | |
| ) | |
| if not TRAINING_DATASET_REPO: | |
| raise HTTPException( | |
| status_code=503, | |
| detail="Training endpoint not configured (TRAINING_DATASET_REPO not set).", | |
| ) | |
| if not HF_TOKEN: | |
| raise HTTPException(status_code=503, detail="HF_TOKEN not set.") | |
| # Push to HF Dataset | |
| from huggingface_hub import CommitOperationAdd, HfApi | |
| api = HfApi(token=HF_TOKEN) | |
| rows_jsonl = "\n".join( | |
| json.dumps( | |
| { | |
| **rec, | |
| "_sessionId": payload.get("sessionId", ""), | |
| "_page": payload.get("page", ""), | |
| "_model": payload.get("model"), | |
| "_consentVersion": payload.get("consentVersion"), | |
| "_ts": int(_time.time() * 1000), | |
| }, | |
| ensure_ascii=False, | |
| ) | |
| for rec in records | |
| if isinstance(rec, dict) | |
| ) | |
| filename = f"contributions/{int(_time.time() * 1000)}.jsonl" | |
| try: | |
| api.commit( | |
| repo_id=TRAINING_DATASET_REPO, | |
| repo_type="dataset", | |
| operations=[ | |
| CommitOperationAdd( | |
| path_in_repo=filename, | |
| path_or_fileobj=rows_jsonl.encode(), | |
| ) | |
| ], | |
| commit_message=f"Add {len(records)} feedback record(s)", | |
| ) | |
| except Exception as exc: | |
| logger.error(json.dumps({"event": "contribute.hf_fail", "error": str(exc)})) | |
| raise HTTPException( | |
| status_code=503, | |
| detail="Failed to store contribution. Try again later.", | |
| ) from exc | |
| logger.info(json.dumps({"event": "contribute.write", "rows": len(records), "ip": client_ip})) | |
| return JSONResponse({"contributed": True, "rows": len(records)}) | |
| async def share(request: Request) -> JSONResponse: | |
| """Accept a conversation snapshot for global sharing. | |
| Parameters | |
| ---------- | |
| request : fastapi.Request | |
| HTTP request. Body must be JSON with at minimum a ``content`` field. | |
| Returns | |
| ------- | |
| fastapi.responses.JSONResponse | |
| ``{"uuid": "<hex>", "url": "<share_url>", "expiresAt": "<ISO-8601>"}`` | |
| on success. | |
| Raises | |
| ------ | |
| fastapi.HTTPException | |
| 400 when the body is not valid JSON. | |
| 413 when the body exceeds :data:`~_shared_logic.DEFAULT_MAX_BODY_BYTES`. | |
| 422 when ``content`` is missing or empty. | |
| 429 when the IP rate limit (10/hour) is exceeded. | |
| Notes | |
| ----- | |
| **User note** β Stored shares are ephemeral: they live only for the lifetime | |
| of the process. A Space restart (or scale-to-zero cold start) clears all | |
| shares. For persistent storage, set ``TRAINING_DATASET_REPO`` and use the | |
| ``/v1/contribute`` endpoint instead. | |
| **User note** β The returned ``url`` points to | |
| ``GET /v1/share/{uuid}`` on this proxy. Distribute that link to share the | |
| conversation. | |
| **Developer note** β ``ttlDays`` is clamped to ``[1, 365]``. Expired | |
| entries are evicted lazily: on every write, entries whose | |
| ``expiresAt_ts`` has passed are removed before the new entry is stored. | |
| This keeps memory bounded without a background task. | |
| **Developer note** β Rate limit (10/hour per IP) is intentionally more | |
| permissive than ``/v1/contribute`` because share payloads are user-facing | |
| outputs, not GDPR-sensitive training data. | |
| """ | |
| import time as _time | |
| # Body size guard | |
| raw = await request.body() | |
| if len(raw) > DEFAULT_MAX_BODY_BYTES: | |
| raise HTTPException(status_code=413, detail="Payload too large.") | |
| try: | |
| payload = json.loads(raw) | |
| except json.JSONDecodeError as exc: | |
| raise HTTPException(status_code=400, detail=f"Invalid JSON: {exc}") from exc | |
| # Required field: content | |
| content = payload.get("content", "") | |
| if not content or not isinstance(content, str): | |
| raise HTTPException( | |
| status_code=422, | |
| detail="content is required and must be a non-empty string.", | |
| ) | |
| mime_type: str = payload.get("mimeType") or "text/html;charset=utf-8" | |
| ext: str = payload.get("ext") or ".html" | |
| title: str = payload.get("title") or "Shared conversation" | |
| ttl_days: int = max(1, min(int(payload.get("ttlDays") or 30), 365)) | |
| # Per-IP rate limit: 10 shares per hour | |
| client_ip = _client_ip(request) | |
| async with _share_rl_lock: | |
| now = _time.time() | |
| count, window_start = _share_rl.get(client_ip, (0, now)) | |
| if now - window_start > 3600: | |
| count, window_start = 0, now | |
| count += 1 | |
| _share_rl[client_ip] = (count, window_start) | |
| if count > 10: | |
| logger.warning(json.dumps({"event": "share.ratelimit", "ip": client_ip})) | |
| raise HTTPException( | |
| status_code=429, | |
| detail="Rate limit exceeded. Maximum 10 shares per hour.", | |
| headers={"Retry-After": "3600"}, | |
| ) | |
| share_id: str = uuid.uuid4().hex | |
| now_ts = _time.time() | |
| expires_ts = now_ts + ttl_days * 86400 | |
| expires_iso = _time.strftime("%Y-%m-%dT%H:%M:%SZ", _time.gmtime(expires_ts)) | |
| # Store in memory; evict expired entries lazily on each write. | |
| async with _share_store_lock: | |
| expired_keys = [k for k, v in _share_store.items() if v["expiresAt_ts"] < now_ts] | |
| for k in expired_keys: | |
| del _share_store[k] | |
| _share_store[share_id] = { | |
| "content": content, | |
| "mimeType": mime_type, | |
| "ext": ext, | |
| "title": title, | |
| "expiresAt_ts": expires_ts, | |
| "expiresAt": expires_iso, | |
| } | |
| logger.info( | |
| json.dumps( | |
| {"event": "share.create", "id": share_id, "ip": client_ip, "ttl_days": ttl_days} | |
| ) | |
| ) | |
| base_url = str(request.base_url).rstrip("/") | |
| share_url = f"{base_url}/v1/share/{share_id}" | |
| return JSONResponse({"uuid": share_id, "url": share_url, "expiresAt": expires_iso}) | |
| async def share_get(share_id: str) -> Response: | |
| """Retrieve a shared conversation snapshot by UUID. | |
| Parameters | |
| ---------- | |
| share_id : str | |
| UUID hex string returned by ``POST /v1/share``. | |
| Returns | |
| ------- | |
| fastapi.Response | |
| The stored content with its original MIME type and a | |
| ``Content-Disposition: inline`` header. | |
| Raises | |
| ------ | |
| fastapi.HTTPException | |
| 404 when the UUID is not found. | |
| 410 when the share exists but has expired. | |
| Notes | |
| ----- | |
| **User note** β Shares are stored in process memory only. A Space | |
| restart clears all shares, returning 404 for previously valid links. | |
| """ | |
| import time as _time | |
| async with _share_store_lock: | |
| entry = _share_store.get(share_id) | |
| if entry is None: | |
| raise HTTPException(status_code=404, detail="Share not found or expired.") | |
| if entry["expiresAt_ts"] < _time.time(): | |
| async with _share_store_lock: | |
| _share_store.pop(share_id, None) | |
| raise HTTPException(status_code=410, detail="Share has expired.") | |
| return Response( | |
| content=entry["content"], | |
| status_code=200, | |
| media_type=entry.get("mimeType", "text/html;charset=utf-8"), | |
| headers={ | |
| "Content-Disposition": f'inline; filename="share{entry.get("ext", ".html")}"', | |
| }, | |
| ) | |
| async def feedback(request: Request) -> JSONResponse: | |
| """Accept a thumbs-up/down feedback record from the AI assistant widget. | |
| Parameters | |
| ---------- | |
| request : fastapi.Request | |
| HTTP request. Body must be valid JSON; all fields are optional. | |
| Typical payload fields from the JS widget: | |
| * ``ratingValue`` β ``"thumbs-up"`` or ``"thumbs-down"`` | |
| * ``ratingLabel`` β human-readable label (e.g. ``"Helpful"``) | |
| * ``message`` β optional free-text comment from the user | |
| * ``query`` β the question that was asked | |
| * ``answer`` β the response that was rated | |
| * ``model`` β model ID that produced the answer | |
| * ``sessionId`` β browser session UUID | |
| * ``page`` β originating documentation page URL | |
| * ``ts`` β client-side timestamp (ms since epoch) | |
| Returns | |
| ------- | |
| fastapi.responses.JSONResponse | |
| ``{"ok": true}`` on success. | |
| Raises | |
| ------ | |
| fastapi.HTTPException | |
| 400 when the body is not valid JSON. | |
| 413 when the body exceeds :data:`~_shared_logic.DEFAULT_MAX_BODY_BYTES`. | |
| 429 when the IP rate limit (30/hour) is exceeded. | |
| Notes | |
| ----- | |
| **User note** β Feedback is logged to the Space log stream but is NOT | |
| persisted to a dataset. To persist feedback, enable the ``/v1/contribute`` | |
| endpoint by setting ``TRAINING_DATASET_REPO``. | |
| **Developer note** β The JS widget sends feedback with ``keepalive: true`` | |
| so the request survives page unload. The handler therefore needs no | |
| response body beyond ``{"ok": true}``; the widget does not read it. | |
| **Developer note** β Rate limit (30/hour per IP) is set high because | |
| users commonly rate multiple answers in a session. The limit prevents | |
| programmatic flooding while allowing normal interactive use. | |
| """ | |
| import time as _time | |
| # Body size guard | |
| raw = await request.body() | |
| if len(raw) > DEFAULT_MAX_BODY_BYTES: | |
| raise HTTPException(status_code=413, detail="Payload too large.") | |
| try: | |
| payload = json.loads(raw) | |
| except json.JSONDecodeError as exc: | |
| raise HTTPException(status_code=400, detail=f"Invalid JSON: {exc}") from exc | |
| # Per-IP rate limit: 30 per hour | |
| client_ip = _client_ip(request) | |
| async with _feedback_rl_lock: | |
| now = _time.time() | |
| count, window_start = _feedback_rl.get(client_ip, (0, now)) | |
| if now - window_start > 3600: | |
| count, window_start = 0, now | |
| count += 1 | |
| _feedback_rl[client_ip] = (count, window_start) | |
| if count > 30: | |
| logger.warning(json.dumps({"event": "feedback.ratelimit", "ip": client_ip})) | |
| raise HTTPException( | |
| status_code=429, | |
| detail="Rate limit exceeded. Maximum 30 feedback submissions per hour.", | |
| headers={"Retry-After": "3600"}, | |
| ) | |
| if not isinstance(payload, dict): | |
| raise HTTPException(status_code=422, detail="Feedback body must be a JSON object.") | |
| logger.info( | |
| json.dumps( | |
| { | |
| "event": "feedback.receive", | |
| "ip": client_ip, | |
| "ratingValue": payload.get("ratingValue"), | |
| "model": payload.get("model"), | |
| "sessionId": payload.get("sessionId"), | |
| "page": payload.get("page"), | |
| "ts": payload.get("ts"), | |
| } | |
| ) | |
| ) | |
| return JSONResponse({"ok": True}) | |