Spaces:
Sleeping
Sleeping
Macbook
commited on
Commit
·
cc4ea58
1
Parent(s):
aae6d8c
Add FastAPI application
Browse files- Dockerfile +13 -0
- api/__init__.py +1 -0
- api/__pycache__/__init__.cpython-312.pyc +0 -0
- api/__pycache__/config.cpython-312.pyc +0 -0
- api/__pycache__/main.cpython-312.pyc +0 -0
- api/config.py +29 -0
- api/endpoints/__init__.py +0 -0
- api/endpoints/__pycache__/__init__.cpython-312.pyc +0 -0
- api/endpoints/v1/__init__.py +0 -0
- api/endpoints/v1/__pycache__/__init__.cpython-312.pyc +0 -0
- api/endpoints/v1/__pycache__/api.cpython-312.pyc +0 -0
- api/endpoints/v1/analytics/__init__.py +1 -0
- api/endpoints/v1/api.py +31 -0
- api/endpoints/v1/auth/__init__.py +0 -0
- api/endpoints/v1/auth/__pycache__/__init__.cpython-312.pyc +0 -0
- api/endpoints/v1/auth/__pycache__/verify.cpython-312.pyc +0 -0
- api/endpoints/v1/auth/verify.py +122 -0
- api/endpoints/v1/clients/__init__.py +0 -0
- api/endpoints/v1/processing/__init__.py +0 -0
- api/endpoints/v1/processing/__pycache__/__init__.cpython-312.pyc +0 -0
- api/endpoints/v1/processing/__pycache__/ai_feedback.cpython-312.pyc +0 -0
- api/endpoints/v1/processing/__pycache__/audio.cpython-312.pyc +0 -0
- api/endpoints/v1/processing/__pycache__/pronunciation_analysis.cpython-312.pyc +0 -0
- api/endpoints/v1/processing/__pycache__/soap.cpython-312.pyc +0 -0
- api/endpoints/v1/processing/__pycache__/therapy_asr.cpython-312.pyc +0 -0
- api/endpoints/v1/processing/__pycache__/therapy_tts.cpython-312.pyc +0 -0
- api/endpoints/v1/processing/ai_feedback.py +239 -0
- api/endpoints/v1/processing/audio.py +31 -0
- api/endpoints/v1/processing/pronunciation_analysis.py +468 -0
- api/endpoints/v1/processing/soap.py +119 -0
- api/endpoints/v1/processing/therapy_asr.py +305 -0
- api/endpoints/v1/processing/therapy_tts.py +354 -0
- api/endpoints/v1/routers/__init__.py +0 -0
- api/endpoints/v1/routers/__pycache__/__init__.cpython-312.pyc +0 -0
- api/endpoints/v1/routers/__pycache__/analytics.cpython-312.pyc +0 -0
- api/endpoints/v1/routers/__pycache__/health.cpython-312.pyc +0 -0
- api/endpoints/v1/routers/__pycache__/therapy.cpython-312.pyc +0 -0
- api/endpoints/v1/routers/__pycache__/upload.cpython-312.pyc +0 -0
- api/endpoints/v1/routers/analytics.py +364 -0
- api/endpoints/v1/routers/health.py +24 -0
- api/endpoints/v1/routers/therapy.py +639 -0
- api/endpoints/v1/routers/upload.py +82 -0
- api/endpoints/v1/therapy/__init__.py +1 -0
- api/endpoints/v1/utils.py +7 -0
- api/index.py +5 -0
- api/main.py +63 -0
- app.py +5 -0
- requirements.txt +10 -0
Dockerfile
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.9
|
| 2 |
+
|
| 3 |
+
RUN useradd -m -u 1000 user
|
| 4 |
+
USER user
|
| 5 |
+
ENV PATH="/home/user/.local/bin:$PATH"
|
| 6 |
+
|
| 7 |
+
WORKDIR /app
|
| 8 |
+
|
| 9 |
+
COPY --chown=user ./requirements.txt requirements.txt
|
| 10 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 11 |
+
|
| 12 |
+
COPY --chown=user . /app
|
| 13 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
api/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
__version__ = "0.1.0"
|
api/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (189 Bytes). View file
|
|
|
api/__pycache__/config.cpython-312.pyc
ADDED
|
Binary file (1.48 kB). View file
|
|
|
api/__pycache__/main.cpython-312.pyc
ADDED
|
Binary file (2.66 kB). View file
|
|
|
api/config.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
| 5 |
+
|
| 6 |
+
load_dotenv()
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
# Otherwise, the root path will be the local host. ROOT_PATH is an env var configured in Vercel deployment.
|
| 10 |
+
# The value for production is equal to the root path of the deployment URL in Vercel.
|
| 11 |
+
ROOT_PATH = os.getenv("ROOT_PATH", "/")
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class Settings(BaseSettings):
|
| 15 |
+
PROJECT_NAME: str = "FastAPI App"
|
| 16 |
+
PROJECT_DESCRIPTION: str = "A simple FastAPI app"
|
| 17 |
+
ENVIRONMENT: str = os.getenv("ENVIRONMENT", "development")
|
| 18 |
+
CLERK_JWKS_URL: str = os.getenv("CLERK_JWKS_URL")
|
| 19 |
+
CLERK_PEM_PUBLIC_KEY: str = os.getenv("CLERK_PEM_PUBLIC_KEY")
|
| 20 |
+
OPENAI_API_KEY: str = os.getenv("OPENAI_API_KEY", "")
|
| 21 |
+
GITHUB_TOKEN: str = os.getenv("GITHUB_TOKEN", "") # For GitHub Models GPT-4o
|
| 22 |
+
|
| 23 |
+
model_config = SettingsConfigDict(env_file=".env", extra="ignore")
|
| 24 |
+
openapi_url: str = "/openapi.json"
|
| 25 |
+
API_VERSION: str = "/v1"
|
| 26 |
+
ROOT: str = ROOT_PATH
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
settings = Settings()
|
api/endpoints/__init__.py
ADDED
|
File without changes
|
api/endpoints/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (173 Bytes). View file
|
|
|
api/endpoints/v1/__init__.py
ADDED
|
File without changes
|
api/endpoints/v1/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (176 Bytes). View file
|
|
|
api/endpoints/v1/__pycache__/api.cpython-312.pyc
ADDED
|
Binary file (988 Bytes). View file
|
|
|
api/endpoints/v1/analytics/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Analytics module - Progress tracking and metrics."""
|
api/endpoints/v1/api.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter
|
| 2 |
+
|
| 3 |
+
from api.endpoints.v1.routers import health, upload, therapy, analytics
|
| 4 |
+
|
| 5 |
+
api_router = APIRouter()
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
api_router.include_router(
|
| 9 |
+
health.router,
|
| 10 |
+
prefix="/health",
|
| 11 |
+
tags=["health"],
|
| 12 |
+
responses={404: {"description": "Not found"}},
|
| 13 |
+
)
|
| 14 |
+
api_router.include_router(
|
| 15 |
+
upload.router,
|
| 16 |
+
prefix="/upload",
|
| 17 |
+
tags=["upload"],
|
| 18 |
+
responses={404: {"description": "Not found"}},
|
| 19 |
+
)
|
| 20 |
+
api_router.include_router(
|
| 21 |
+
therapy.router,
|
| 22 |
+
prefix="/therapy",
|
| 23 |
+
tags=["therapy"],
|
| 24 |
+
responses={404: {"description": "Not found"}},
|
| 25 |
+
)
|
| 26 |
+
api_router.include_router(
|
| 27 |
+
analytics.router,
|
| 28 |
+
prefix="/analytics",
|
| 29 |
+
tags=["analytics"],
|
| 30 |
+
responses={404: {"description": "Not found"}},
|
| 31 |
+
)
|
api/endpoints/v1/auth/__init__.py
ADDED
|
File without changes
|
api/endpoints/v1/auth/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (181 Bytes). View file
|
|
|
api/endpoints/v1/auth/__pycache__/verify.cpython-312.pyc
ADDED
|
Binary file (5.49 kB). View file
|
|
|
api/endpoints/v1/auth/verify.py
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
from datetime import datetime, timezone
|
| 3 |
+
from typing import Optional, List
|
| 4 |
+
|
| 5 |
+
import requests
|
| 6 |
+
from fastapi import HTTPException, Security
|
| 7 |
+
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
|
| 8 |
+
from jose import exceptions, jwk, jwt
|
| 9 |
+
from jose.utils import base64url_decode
|
| 10 |
+
|
| 11 |
+
from api.config import settings
|
| 12 |
+
|
| 13 |
+
if settings.ENVIRONMENT == "development":
|
| 14 |
+
logging.basicConfig(level=logging.DEBUG)
|
| 15 |
+
else:
|
| 16 |
+
logging.basicConfig(level=logging.WARNING)
|
| 17 |
+
|
| 18 |
+
ALGORITHM = "RS256"
|
| 19 |
+
security = HTTPBearer()
|
| 20 |
+
|
| 21 |
+
# Define the allowed origins for the azp claim
|
| 22 |
+
ALLOWED_ORIGINS = [
|
| 23 |
+
"http://localhost:3000",
|
| 24 |
+
"com.ubumuntu.app",
|
| 25 |
+
]
|
| 26 |
+
|
| 27 |
+
def get_jwks(jwks_url: str):
|
| 28 |
+
"""Fetch the JWKS from the given URL."""
|
| 29 |
+
response = requests.get(jwks_url)
|
| 30 |
+
response.raise_for_status()
|
| 31 |
+
return response.json()
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def get_public_key(token: str, jwks_url: str):
|
| 35 |
+
"""Get the public key for the given token from the JWKS."""
|
| 36 |
+
jwks = get_jwks(jwks_url)
|
| 37 |
+
header = jwt.get_unverified_header(token)
|
| 38 |
+
rsa_key = {}
|
| 39 |
+
for key in jwks["keys"]:
|
| 40 |
+
if key["kid"] == header["kid"]:
|
| 41 |
+
rsa_key = {
|
| 42 |
+
"kty": key["kty"],
|
| 43 |
+
"kid": key["kid"],
|
| 44 |
+
"use": key["use"],
|
| 45 |
+
"n": key["n"],
|
| 46 |
+
"e": key["e"],
|
| 47 |
+
}
|
| 48 |
+
break
|
| 49 |
+
if not rsa_key:
|
| 50 |
+
raise HTTPException(status_code=401, detail="Unable to find appropriate key")
|
| 51 |
+
return jwk.construct(rsa_key, algorithm=ALGORITHM)
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def decode_jwt(token: str, jwks_url: str, allowed_origins: List[str]) -> Optional[dict]:
|
| 55 |
+
"""Decode a JWT token and verify its expiration and azp claim using JWKS."""
|
| 56 |
+
try:
|
| 57 |
+
logging.info("Attempting to decode the JWT token.")
|
| 58 |
+
public_key = get_public_key(token, jwks_url)
|
| 59 |
+
message, encoded_signature = token.rsplit(".", 1)
|
| 60 |
+
decoded_signature = base64url_decode(encoded_signature.encode("utf-8"))
|
| 61 |
+
|
| 62 |
+
if not public_key.verify(message.encode("utf-8"), decoded_signature):
|
| 63 |
+
logging.warning("Invalid token signature.")
|
| 64 |
+
return None
|
| 65 |
+
|
| 66 |
+
payload = jwt.decode(
|
| 67 |
+
token,
|
| 68 |
+
public_key.to_pem().decode("utf-8"),
|
| 69 |
+
algorithms=[ALGORITHM],
|
| 70 |
+
audience="authenticated",
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
# Validate expiration (exp) and not before (nbf) claims
|
| 74 |
+
now = datetime.now(tz=timezone.utc)
|
| 75 |
+
exp = payload.get("exp")
|
| 76 |
+
nbf = payload.get("nbf")
|
| 77 |
+
|
| 78 |
+
if exp and datetime.fromtimestamp(exp, tz=timezone.utc) < now:
|
| 79 |
+
logging.warning("Token has expired.")
|
| 80 |
+
return None
|
| 81 |
+
|
| 82 |
+
if nbf and datetime.fromtimestamp(nbf, tz=timezone.utc) > now:
|
| 83 |
+
logging.warning("Token not yet valid.")
|
| 84 |
+
return None
|
| 85 |
+
|
| 86 |
+
# Validate authorized parties by the azp claim
|
| 87 |
+
azp = payload.get("azp")
|
| 88 |
+
logging.debug(f"azp: {azp}")
|
| 89 |
+
|
| 90 |
+
if azp and azp not in allowed_origins:
|
| 91 |
+
logging.warning(f"Unauthorized party: {azp}")
|
| 92 |
+
return None
|
| 93 |
+
|
| 94 |
+
logging.info("JWT successfully decoded.")
|
| 95 |
+
return payload
|
| 96 |
+
|
| 97 |
+
except exceptions.ExpiredSignatureError:
|
| 98 |
+
logging.error("JWT has expired.")
|
| 99 |
+
return None
|
| 100 |
+
except exceptions.JWTClaimsError:
|
| 101 |
+
logging.error("JWT claims error.")
|
| 102 |
+
return None
|
| 103 |
+
except exceptions.JWTError as e:
|
| 104 |
+
logging.error(f"JWT decoding error: {e}")
|
| 105 |
+
return None
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def verify_token(credentials: HTTPAuthorizationCredentials = Security(security)):
|
| 109 |
+
"""Verify the incoming token using the `decode_jwt` function."""
|
| 110 |
+
token = credentials.credentials
|
| 111 |
+
|
| 112 |
+
credentials_exception = HTTPException(
|
| 113 |
+
status_code=401,
|
| 114 |
+
detail="Could not validate credentials",
|
| 115 |
+
headers={"WWW-Authenticate": "Bearer"},
|
| 116 |
+
)
|
| 117 |
+
|
| 118 |
+
payload = decode_jwt(token, settings.CLERK_JWKS_URL, ALLOWED_ORIGINS)
|
| 119 |
+
if not payload or "sub" not in payload:
|
| 120 |
+
raise credentials_exception
|
| 121 |
+
|
| 122 |
+
return payload["sub"]
|
api/endpoints/v1/clients/__init__.py
ADDED
|
File without changes
|
api/endpoints/v1/processing/__init__.py
ADDED
|
File without changes
|
api/endpoints/v1/processing/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (187 Bytes). View file
|
|
|
api/endpoints/v1/processing/__pycache__/ai_feedback.cpython-312.pyc
ADDED
|
Binary file (10.2 kB). View file
|
|
|
api/endpoints/v1/processing/__pycache__/audio.cpython-312.pyc
ADDED
|
Binary file (1.48 kB). View file
|
|
|
api/endpoints/v1/processing/__pycache__/pronunciation_analysis.cpython-312.pyc
ADDED
|
Binary file (17.5 kB). View file
|
|
|
api/endpoints/v1/processing/__pycache__/soap.cpython-312.pyc
ADDED
|
Binary file (5.08 kB). View file
|
|
|
api/endpoints/v1/processing/__pycache__/therapy_asr.cpython-312.pyc
ADDED
|
Binary file (12.8 kB). View file
|
|
|
api/endpoints/v1/processing/__pycache__/therapy_tts.cpython-312.pyc
ADDED
|
Binary file (13.5 kB). View file
|
|
|
api/endpoints/v1/processing/ai_feedback.py
ADDED
|
@@ -0,0 +1,239 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
AI Feedback Module - GPT-4o powered speech therapy feedback.
|
| 3 |
+
|
| 4 |
+
Uses GitHub Models API for GPT-4o access.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import json
|
| 9 |
+
import logging
|
| 10 |
+
from typing import Optional, List
|
| 11 |
+
from dataclasses import dataclass
|
| 12 |
+
|
| 13 |
+
from openai import OpenAI
|
| 14 |
+
|
| 15 |
+
logger = logging.getLogger(__name__)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
@dataclass
|
| 19 |
+
class AIFeedbackResult:
|
| 20 |
+
"""AI-generated feedback for speech therapy."""
|
| 21 |
+
feedback: str
|
| 22 |
+
encouragement: str
|
| 23 |
+
specific_tips: List[str]
|
| 24 |
+
recommended_exercises: List[str]
|
| 25 |
+
difficulty_adjustment: Optional[str] # "easier", "same", "harder"
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class AIFeedbackGenerator:
|
| 29 |
+
"""
|
| 30 |
+
Generate personalized speech therapy feedback using GPT-4o.
|
| 31 |
+
|
| 32 |
+
Uses GitHub Models API (free for GitHub users).
|
| 33 |
+
"""
|
| 34 |
+
|
| 35 |
+
def __init__(self):
|
| 36 |
+
self.client: Optional[OpenAI] = None
|
| 37 |
+
self.model = "gpt-4o"
|
| 38 |
+
self._initialize_client()
|
| 39 |
+
|
| 40 |
+
def _initialize_client(self):
|
| 41 |
+
"""Initialize the OpenAI client with GitHub Models."""
|
| 42 |
+
github_token = os.getenv("GITHUB_TOKEN")
|
| 43 |
+
|
| 44 |
+
if not github_token:
|
| 45 |
+
raise ValueError(
|
| 46 |
+
"GITHUB_TOKEN not found. Please set it in your .env file. "
|
| 47 |
+
"Get your token at: https://github.com/settings/tokens"
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
# Use GitHub Models (free GPT-4o access)
|
| 51 |
+
self.client = OpenAI(
|
| 52 |
+
base_url="https://models.inference.ai.azure.com",
|
| 53 |
+
api_key=github_token,
|
| 54 |
+
)
|
| 55 |
+
self.model = "gpt-4o"
|
| 56 |
+
logger.info("AI Feedback: Using GitHub Models (GPT-4o)")
|
| 57 |
+
|
| 58 |
+
async def generate_feedback(
|
| 59 |
+
self,
|
| 60 |
+
target_text: str,
|
| 61 |
+
transcription: str,
|
| 62 |
+
overall_score: float,
|
| 63 |
+
clarity_score: float,
|
| 64 |
+
pace_score: float,
|
| 65 |
+
fluency_score: float,
|
| 66 |
+
errors: List[dict],
|
| 67 |
+
user_context: Optional[dict] = None
|
| 68 |
+
) -> AIFeedbackResult:
|
| 69 |
+
"""
|
| 70 |
+
Generate personalized feedback for a speech exercise attempt.
|
| 71 |
+
|
| 72 |
+
Args:
|
| 73 |
+
target_text: The text the user was supposed to say
|
| 74 |
+
transcription: What the ASR heard
|
| 75 |
+
overall_score: 0-100 overall score
|
| 76 |
+
clarity_score: 0-100 clarity score
|
| 77 |
+
pace_score: 0-100 pace score
|
| 78 |
+
fluency_score: 0-100 fluency score
|
| 79 |
+
errors: List of pronunciation errors detected
|
| 80 |
+
user_context: Optional user profile info (speech condition, etc.)
|
| 81 |
+
|
| 82 |
+
Returns:
|
| 83 |
+
AIFeedbackResult with personalized feedback
|
| 84 |
+
"""
|
| 85 |
+
# Build context about user if available
|
| 86 |
+
user_info = ""
|
| 87 |
+
if user_context:
|
| 88 |
+
condition = user_context.get("speech_condition", "")
|
| 89 |
+
severity = user_context.get("severity_level", "")
|
| 90 |
+
if condition:
|
| 91 |
+
user_info = f"\nUser has {condition}"
|
| 92 |
+
if severity:
|
| 93 |
+
user_info += f" (severity: {severity}/5)"
|
| 94 |
+
user_info += ". Adjust feedback accordingly."
|
| 95 |
+
|
| 96 |
+
# Format errors for the prompt
|
| 97 |
+
error_summary = ""
|
| 98 |
+
if errors:
|
| 99 |
+
error_items = []
|
| 100 |
+
for e in errors[:5]: # Limit to 5 errors
|
| 101 |
+
error_items.append(
|
| 102 |
+
f"- '{e.get('expected', '')}' → '{e.get('actual', '')}' ({e.get('error_type', '')})"
|
| 103 |
+
)
|
| 104 |
+
error_summary = "\n".join(error_items)
|
| 105 |
+
|
| 106 |
+
system_prompt = """You are a supportive, encouraging speech therapist helping users improve their speech clarity.
|
| 107 |
+
|
| 108 |
+
Your feedback should be:
|
| 109 |
+
- Warm and encouraging, never discouraging
|
| 110 |
+
- Specific and actionable
|
| 111 |
+
- Age-appropriate and easy to understand
|
| 112 |
+
- Focused on progress, not perfection
|
| 113 |
+
|
| 114 |
+
Always acknowledge effort and provide constructive guidance."""
|
| 115 |
+
|
| 116 |
+
user_prompt = f"""Please provide feedback for this speech exercise attempt:
|
| 117 |
+
|
| 118 |
+
**Target phrase:** "{target_text}"
|
| 119 |
+
**User said:** "{transcription}"
|
| 120 |
+
|
| 121 |
+
**Scores:**
|
| 122 |
+
- Overall: {overall_score:.0f}/100
|
| 123 |
+
- Clarity: {clarity_score:.0f}/100
|
| 124 |
+
- Pace: {pace_score:.0f}/100
|
| 125 |
+
- Fluency: {fluency_score:.0f}/100
|
| 126 |
+
|
| 127 |
+
**Pronunciation differences:**
|
| 128 |
+
{error_summary if error_summary else "No major differences detected"}
|
| 129 |
+
{user_info}
|
| 130 |
+
|
| 131 |
+
Please respond in this JSON format:
|
| 132 |
+
{{
|
| 133 |
+
"feedback": "2-3 sentences of overall feedback",
|
| 134 |
+
"encouragement": "A short encouraging message",
|
| 135 |
+
"specific_tips": ["tip 1", "tip 2", "tip 3"],
|
| 136 |
+
"recommended_exercises": ["exercise 1", "exercise 2"],
|
| 137 |
+
"difficulty_adjustment": "easier" or "same" or "harder"
|
| 138 |
+
}}"""
|
| 139 |
+
|
| 140 |
+
response = self.client.chat.completions.create(
|
| 141 |
+
model=self.model,
|
| 142 |
+
messages=[
|
| 143 |
+
{"role": "system", "content": system_prompt},
|
| 144 |
+
{"role": "user", "content": user_prompt}
|
| 145 |
+
],
|
| 146 |
+
temperature=0.7,
|
| 147 |
+
max_tokens=500,
|
| 148 |
+
response_format={"type": "json_object"}
|
| 149 |
+
)
|
| 150 |
+
|
| 151 |
+
# Parse the response
|
| 152 |
+
result = json.loads(response.choices[0].message.content)
|
| 153 |
+
|
| 154 |
+
return AIFeedbackResult(
|
| 155 |
+
feedback=result.get("feedback", "Good effort! Keep practicing."),
|
| 156 |
+
encouragement=result.get("encouragement", "You're making progress!"),
|
| 157 |
+
specific_tips=result.get("specific_tips", []),
|
| 158 |
+
recommended_exercises=result.get("recommended_exercises", []),
|
| 159 |
+
difficulty_adjustment=result.get("difficulty_adjustment", "same")
|
| 160 |
+
)
|
| 161 |
+
|
| 162 |
+
async def generate_session_summary(
|
| 163 |
+
self,
|
| 164 |
+
session_stats: dict,
|
| 165 |
+
attempts: List[dict]
|
| 166 |
+
) -> str:
|
| 167 |
+
"""Generate an AI summary of a therapy session."""
|
| 168 |
+
prompt = f"""Summarize this speech therapy session for the user:
|
| 169 |
+
|
| 170 |
+
**Session Stats:**
|
| 171 |
+
- Duration: {session_stats.get('duration_minutes', 0)} minutes
|
| 172 |
+
- Exercises completed: {session_stats.get('exercise_count', 0)}
|
| 173 |
+
- Average score: {session_stats.get('average_score', 0):.0f}/100
|
| 174 |
+
- Best score: {session_stats.get('best_score', 0):.0f}/100
|
| 175 |
+
|
| 176 |
+
**Exercise Types Practiced:** {', '.join(session_stats.get('exercise_types', []))}
|
| 177 |
+
|
| 178 |
+
Please provide a brief, encouraging 2-3 sentence summary of their session."""
|
| 179 |
+
|
| 180 |
+
response = self.client.chat.completions.create(
|
| 181 |
+
model=self.model,
|
| 182 |
+
messages=[
|
| 183 |
+
{"role": "system", "content": "You are a supportive speech therapist providing session summaries."},
|
| 184 |
+
{"role": "user", "content": prompt}
|
| 185 |
+
],
|
| 186 |
+
temperature=0.7,
|
| 187 |
+
max_tokens=150
|
| 188 |
+
)
|
| 189 |
+
|
| 190 |
+
return response.choices[0].message.content
|
| 191 |
+
|
| 192 |
+
async def generate_weekly_insights(
|
| 193 |
+
self,
|
| 194 |
+
weekly_data: dict
|
| 195 |
+
) -> dict:
|
| 196 |
+
"""Generate AI-powered weekly progress insights."""
|
| 197 |
+
prompt = f"""Analyze this user's weekly speech therapy progress:
|
| 198 |
+
|
| 199 |
+
**This Week:**
|
| 200 |
+
- Sessions: {weekly_data.get('sessions_this_week', 0)}
|
| 201 |
+
- Total practice time: {weekly_data.get('practice_minutes', 0)} minutes
|
| 202 |
+
- Average score: {weekly_data.get('avg_score', 0):.0f}/100
|
| 203 |
+
- Score change from last week: {weekly_data.get('score_change', 0):+.1f}%
|
| 204 |
+
|
| 205 |
+
**Strengths:** {', '.join(weekly_data.get('strengths', ['Consistent practice']))}
|
| 206 |
+
**Areas to improve:** {', '.join(weekly_data.get('weaknesses', ['Continue practicing']))}
|
| 207 |
+
|
| 208 |
+
Provide a JSON response with:
|
| 209 |
+
{{
|
| 210 |
+
"summary": "2-3 sentence progress summary",
|
| 211 |
+
"celebration": "Something specific to celebrate",
|
| 212 |
+
"focus_area": "One specific thing to focus on next week",
|
| 213 |
+
"goal": "A realistic goal for next week"
|
| 214 |
+
}}"""
|
| 215 |
+
|
| 216 |
+
response = self.client.chat.completions.create(
|
| 217 |
+
model=self.model,
|
| 218 |
+
messages=[
|
| 219 |
+
{"role": "system", "content": "You are an encouraging speech therapist analyzing weekly progress."},
|
| 220 |
+
{"role": "user", "content": prompt}
|
| 221 |
+
],
|
| 222 |
+
temperature=0.7,
|
| 223 |
+
max_tokens=300,
|
| 224 |
+
response_format={"type": "json_object"}
|
| 225 |
+
)
|
| 226 |
+
|
| 227 |
+
return json.loads(response.choices[0].message.content)
|
| 228 |
+
|
| 229 |
+
|
| 230 |
+
# Singleton instance
|
| 231 |
+
_feedback_generator: Optional[AIFeedbackGenerator] = None
|
| 232 |
+
|
| 233 |
+
|
| 234 |
+
def get_ai_feedback_generator() -> AIFeedbackGenerator:
|
| 235 |
+
"""Get or create AIFeedbackGenerator singleton."""
|
| 236 |
+
global _feedback_generator
|
| 237 |
+
if _feedback_generator is None:
|
| 238 |
+
_feedback_generator = AIFeedbackGenerator()
|
| 239 |
+
return _feedback_generator
|
api/endpoints/v1/processing/audio.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import io
|
| 2 |
+
import logging
|
| 3 |
+
|
| 4 |
+
from openai import OpenAI
|
| 5 |
+
|
| 6 |
+
from api.config import settings
|
| 7 |
+
|
| 8 |
+
client = OpenAI(api_key=settings.OPENAI_API_KEY)
|
| 9 |
+
|
| 10 |
+
if settings.ENVIRONMENT == "development":
|
| 11 |
+
logging.basicConfig(level=logging.DEBUG)
|
| 12 |
+
else:
|
| 13 |
+
logging.basicConfig(level=logging.WARNING)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def transcribe_with_whisper(
|
| 17 |
+
filename: str, file_like: io.BytesIO, content_type: str
|
| 18 |
+
) -> str:
|
| 19 |
+
"""Helper function to transcribe audio using OpenAI Whisper."""
|
| 20 |
+
logging.info("Transcribing with whisper")
|
| 21 |
+
|
| 22 |
+
# Prepare the file data as a tuple
|
| 23 |
+
file_data = (filename, file_like.read(), content_type)
|
| 24 |
+
|
| 25 |
+
# Call the OpenAI API to transcribe the audio file
|
| 26 |
+
transcription = client.audio.transcriptions.create(
|
| 27 |
+
model="whisper-1", file=file_data
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
logging.debug(f"Transcription: {transcription.text}")
|
| 31 |
+
return transcription
|
api/endpoints/v1/processing/pronunciation_analysis.py
ADDED
|
@@ -0,0 +1,468 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Pronunciation Analysis Module - Speech clarity and pronunciation feedback.
|
| 3 |
+
|
| 4 |
+
Provides:
|
| 5 |
+
- Pronunciation scoring (PCC - Percent Consonants Correct)
|
| 6 |
+
- Clarity assessment
|
| 7 |
+
- Pace analysis
|
| 8 |
+
- Per-word feedback
|
| 9 |
+
- Improvement suggestions
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import io
|
| 13 |
+
import logging
|
| 14 |
+
from typing import Optional, List
|
| 15 |
+
from dataclasses import dataclass, field
|
| 16 |
+
from enum import Enum
|
| 17 |
+
|
| 18 |
+
from api.config import settings
|
| 19 |
+
|
| 20 |
+
if settings.ENVIRONMENT == "development":
|
| 21 |
+
logging.basicConfig(level=logging.DEBUG)
|
| 22 |
+
else:
|
| 23 |
+
logging.basicConfig(level=logging.WARNING)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class ErrorType(str, Enum):
|
| 27 |
+
"""Types of pronunciation errors."""
|
| 28 |
+
SUBSTITUTION = "substitution" # Wrong sound
|
| 29 |
+
OMISSION = "omission" # Missing sound
|
| 30 |
+
ADDITION = "addition" # Extra sound
|
| 31 |
+
DISTORTION = "distortion" # Unclear sound
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
@dataclass
|
| 35 |
+
class PhonemeError:
|
| 36 |
+
"""Individual phoneme-level error."""
|
| 37 |
+
word: str
|
| 38 |
+
position: int # Position in word
|
| 39 |
+
expected: str
|
| 40 |
+
actual: Optional[str]
|
| 41 |
+
error_type: ErrorType
|
| 42 |
+
suggestion: str
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
@dataclass
|
| 46 |
+
class WordScore:
|
| 47 |
+
"""Per-word pronunciation score."""
|
| 48 |
+
word: str
|
| 49 |
+
score: float # 0-100
|
| 50 |
+
start_time: Optional[float] = None
|
| 51 |
+
end_time: Optional[float] = None
|
| 52 |
+
errors: List[PhonemeError] = field(default_factory=list)
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
@dataclass
|
| 56 |
+
class AIFeedback:
|
| 57 |
+
"""AI-generated personalized feedback."""
|
| 58 |
+
feedback: str
|
| 59 |
+
encouragement: str
|
| 60 |
+
specific_tips: List[str]
|
| 61 |
+
recommended_exercises: List[str]
|
| 62 |
+
difficulty_adjustment: Optional[str] = None # "easier", "same", "harder"
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
@dataclass
|
| 66 |
+
class PronunciationFeedback:
|
| 67 |
+
"""Complete pronunciation analysis result."""
|
| 68 |
+
overall_score: float # 0-100
|
| 69 |
+
clarity_score: float # 0-100
|
| 70 |
+
pace_score: float # 0-100
|
| 71 |
+
fluency_score: float # 0-100
|
| 72 |
+
word_scores: List[WordScore]
|
| 73 |
+
suggestions: List[str]
|
| 74 |
+
phoneme_errors: List[PhonemeError]
|
| 75 |
+
transcription: str
|
| 76 |
+
target_text: str
|
| 77 |
+
duration_seconds: Optional[float] = None
|
| 78 |
+
ai_feedback: Optional[AIFeedback] = None # GPT-4o powered feedback
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
class PronunciationAnalyzer:
|
| 82 |
+
"""
|
| 83 |
+
Analyze pronunciation against target text.
|
| 84 |
+
|
| 85 |
+
Uses ASR with forced alignment to compare user speech
|
| 86 |
+
against expected pronunciation. Integrates GPT-4o for
|
| 87 |
+
personalized feedback via GitHub Models API.
|
| 88 |
+
"""
|
| 89 |
+
|
| 90 |
+
def __init__(self):
|
| 91 |
+
self._asr = None
|
| 92 |
+
self._ai_feedback = None
|
| 93 |
+
|
| 94 |
+
def _get_ai_feedback_generator(self):
|
| 95 |
+
"""Get AI feedback generator instance."""
|
| 96 |
+
if self._ai_feedback is None:
|
| 97 |
+
from api.endpoints.v1.processing.ai_feedback import get_ai_feedback_generator
|
| 98 |
+
self._ai_feedback = get_ai_feedback_generator()
|
| 99 |
+
return self._ai_feedback
|
| 100 |
+
|
| 101 |
+
def _get_asr(self):
|
| 102 |
+
"""Get ASR instance for transcription."""
|
| 103 |
+
if self._asr is None:
|
| 104 |
+
from api.endpoints.v1.processing.therapy_asr import get_therapy_asr
|
| 105 |
+
self._asr = get_therapy_asr()
|
| 106 |
+
return self._asr
|
| 107 |
+
|
| 108 |
+
async def analyze(
|
| 109 |
+
self,
|
| 110 |
+
audio_bytes: bytes,
|
| 111 |
+
target_text: str,
|
| 112 |
+
user_baseline: Optional[dict] = None,
|
| 113 |
+
user_context: Optional[dict] = None,
|
| 114 |
+
include_ai_feedback: bool = True
|
| 115 |
+
) -> PronunciationFeedback:
|
| 116 |
+
"""
|
| 117 |
+
Analyze pronunciation of audio against target text.
|
| 118 |
+
|
| 119 |
+
Args:
|
| 120 |
+
audio_bytes: User's recorded audio
|
| 121 |
+
target_text: Expected text/phrase
|
| 122 |
+
user_baseline: Optional baseline metrics for comparison
|
| 123 |
+
user_context: Optional user profile (speech condition, severity)
|
| 124 |
+
include_ai_feedback: Whether to generate GPT-4o feedback
|
| 125 |
+
|
| 126 |
+
Returns:
|
| 127 |
+
PronunciationFeedback with scores, suggestions, and AI feedback
|
| 128 |
+
"""
|
| 129 |
+
logging.info(f"Analyzing pronunciation for target: {target_text}")
|
| 130 |
+
|
| 131 |
+
# 1. Transcribe the audio
|
| 132 |
+
asr = self._get_asr()
|
| 133 |
+
result = asr.transcribe(audio_bytes)
|
| 134 |
+
transcription = result.text.strip().lower()
|
| 135 |
+
target_clean = target_text.strip().lower()
|
| 136 |
+
|
| 137 |
+
logging.debug(f"Transcription: {transcription}")
|
| 138 |
+
logging.debug(f"Target: {target_clean}")
|
| 139 |
+
|
| 140 |
+
# 2. Compare transcription to target
|
| 141 |
+
word_scores, phoneme_errors = self._compare_texts(
|
| 142 |
+
transcription, target_clean
|
| 143 |
+
)
|
| 144 |
+
|
| 145 |
+
# 3. Calculate scores
|
| 146 |
+
overall_score = self._calculate_overall_score(word_scores)
|
| 147 |
+
clarity_score = self._calculate_clarity_score(word_scores, phoneme_errors)
|
| 148 |
+
pace_score = self._calculate_pace_score(result.word_timestamps)
|
| 149 |
+
fluency_score = self._calculate_fluency_score(transcription, target_clean)
|
| 150 |
+
|
| 151 |
+
# 4. Generate rule-based suggestions
|
| 152 |
+
suggestions = self._generate_suggestions(phoneme_errors, word_scores)
|
| 153 |
+
|
| 154 |
+
# 5. Generate AI-powered feedback (GPT-4o via GitHub Models)
|
| 155 |
+
ai_feedback = None
|
| 156 |
+
if include_ai_feedback:
|
| 157 |
+
try:
|
| 158 |
+
ai_generator = self._get_ai_feedback_generator()
|
| 159 |
+
# Convert phoneme errors to dict format for AI
|
| 160 |
+
errors_dict = [
|
| 161 |
+
{
|
| 162 |
+
"word": e.word,
|
| 163 |
+
"expected": e.expected,
|
| 164 |
+
"actual": e.actual,
|
| 165 |
+
"error_type": e.error_type.value
|
| 166 |
+
}
|
| 167 |
+
for e in phoneme_errors
|
| 168 |
+
]
|
| 169 |
+
|
| 170 |
+
ai_result = await ai_generator.generate_feedback(
|
| 171 |
+
target_text=target_text,
|
| 172 |
+
transcription=transcription,
|
| 173 |
+
overall_score=overall_score,
|
| 174 |
+
clarity_score=clarity_score,
|
| 175 |
+
pace_score=pace_score,
|
| 176 |
+
fluency_score=fluency_score,
|
| 177 |
+
errors=errors_dict,
|
| 178 |
+
user_context=user_context
|
| 179 |
+
)
|
| 180 |
+
|
| 181 |
+
ai_feedback = AIFeedback(
|
| 182 |
+
feedback=ai_result.feedback,
|
| 183 |
+
encouragement=ai_result.encouragement,
|
| 184 |
+
specific_tips=ai_result.specific_tips,
|
| 185 |
+
recommended_exercises=ai_result.recommended_exercises,
|
| 186 |
+
difficulty_adjustment=ai_result.difficulty_adjustment
|
| 187 |
+
)
|
| 188 |
+
logging.info("AI feedback generated successfully")
|
| 189 |
+
except Exception as e:
|
| 190 |
+
logging.warning(f"AI feedback generation failed: {e}")
|
| 191 |
+
ai_feedback = None
|
| 192 |
+
|
| 193 |
+
return PronunciationFeedback(
|
| 194 |
+
overall_score=overall_score,
|
| 195 |
+
clarity_score=clarity_score,
|
| 196 |
+
pace_score=pace_score,
|
| 197 |
+
fluency_score=fluency_score,
|
| 198 |
+
word_scores=word_scores,
|
| 199 |
+
suggestions=suggestions,
|
| 200 |
+
phoneme_errors=phoneme_errors,
|
| 201 |
+
transcription=transcription,
|
| 202 |
+
target_text=target_text,
|
| 203 |
+
ai_feedback=ai_feedback
|
| 204 |
+
)
|
| 205 |
+
|
| 206 |
+
def _compare_texts(
|
| 207 |
+
self,
|
| 208 |
+
transcription: str,
|
| 209 |
+
target: str
|
| 210 |
+
) -> tuple[List[WordScore], List[PhonemeError]]:
|
| 211 |
+
"""Compare transcribed text to target text."""
|
| 212 |
+
trans_words = transcription.split()
|
| 213 |
+
target_words = target.split()
|
| 214 |
+
|
| 215 |
+
word_scores = []
|
| 216 |
+
phoneme_errors = []
|
| 217 |
+
|
| 218 |
+
# Simple word-level comparison (can be enhanced with phoneme alignment)
|
| 219 |
+
max_len = max(len(trans_words), len(target_words))
|
| 220 |
+
|
| 221 |
+
for i in range(max_len):
|
| 222 |
+
target_word = target_words[i] if i < len(target_words) else ""
|
| 223 |
+
trans_word = trans_words[i] if i < len(trans_words) else ""
|
| 224 |
+
|
| 225 |
+
if not target_word:
|
| 226 |
+
# Extra word in transcription
|
| 227 |
+
phoneme_errors.append(PhonemeError(
|
| 228 |
+
word=trans_word,
|
| 229 |
+
position=i,
|
| 230 |
+
expected="",
|
| 231 |
+
actual=trans_word,
|
| 232 |
+
error_type=ErrorType.ADDITION,
|
| 233 |
+
suggestion=f"Extra word '{trans_word}' detected"
|
| 234 |
+
))
|
| 235 |
+
continue
|
| 236 |
+
|
| 237 |
+
if not trans_word:
|
| 238 |
+
# Missing word
|
| 239 |
+
word_scores.append(WordScore(
|
| 240 |
+
word=target_word,
|
| 241 |
+
score=0.0,
|
| 242 |
+
errors=[PhonemeError(
|
| 243 |
+
word=target_word,
|
| 244 |
+
position=i,
|
| 245 |
+
expected=target_word,
|
| 246 |
+
actual=None,
|
| 247 |
+
error_type=ErrorType.OMISSION,
|
| 248 |
+
suggestion=f"Try to include the word '{target_word}'"
|
| 249 |
+
)]
|
| 250 |
+
))
|
| 251 |
+
phoneme_errors.append(word_scores[-1].errors[0])
|
| 252 |
+
continue
|
| 253 |
+
|
| 254 |
+
# Compare words
|
| 255 |
+
score, errors = self._compare_words(target_word, trans_word, i)
|
| 256 |
+
word_scores.append(WordScore(
|
| 257 |
+
word=target_word,
|
| 258 |
+
score=score,
|
| 259 |
+
errors=errors
|
| 260 |
+
))
|
| 261 |
+
phoneme_errors.extend(errors)
|
| 262 |
+
|
| 263 |
+
return word_scores, phoneme_errors
|
| 264 |
+
|
| 265 |
+
def _compare_words(
|
| 266 |
+
self,
|
| 267 |
+
target_word: str,
|
| 268 |
+
trans_word: str,
|
| 269 |
+
position: int
|
| 270 |
+
) -> tuple[float, List[PhonemeError]]:
|
| 271 |
+
"""Compare two words and return score and errors."""
|
| 272 |
+
errors = []
|
| 273 |
+
|
| 274 |
+
# Exact match
|
| 275 |
+
if target_word == trans_word:
|
| 276 |
+
return 100.0, []
|
| 277 |
+
|
| 278 |
+
# Calculate similarity (simple Levenshtein-based)
|
| 279 |
+
similarity = self._word_similarity(target_word, trans_word)
|
| 280 |
+
score = similarity * 100
|
| 281 |
+
|
| 282 |
+
# Detect error type
|
| 283 |
+
if len(trans_word) > len(target_word):
|
| 284 |
+
error_type = ErrorType.ADDITION
|
| 285 |
+
suggestion = f"'{trans_word}' has extra sounds, expected '{target_word}'"
|
| 286 |
+
elif len(trans_word) < len(target_word):
|
| 287 |
+
error_type = ErrorType.OMISSION
|
| 288 |
+
suggestion = f"Some sounds missing in '{trans_word}', expected '{target_word}'"
|
| 289 |
+
else:
|
| 290 |
+
error_type = ErrorType.SUBSTITUTION
|
| 291 |
+
suggestion = f"'{trans_word}' should be '{target_word}'"
|
| 292 |
+
|
| 293 |
+
if score < 100:
|
| 294 |
+
errors.append(PhonemeError(
|
| 295 |
+
word=target_word,
|
| 296 |
+
position=position,
|
| 297 |
+
expected=target_word,
|
| 298 |
+
actual=trans_word,
|
| 299 |
+
error_type=error_type,
|
| 300 |
+
suggestion=suggestion
|
| 301 |
+
))
|
| 302 |
+
|
| 303 |
+
return score, errors
|
| 304 |
+
|
| 305 |
+
def _word_similarity(self, word1: str, word2: str) -> float:
|
| 306 |
+
"""Calculate similarity between two words (0-1)."""
|
| 307 |
+
if word1 == word2:
|
| 308 |
+
return 1.0
|
| 309 |
+
|
| 310 |
+
# Levenshtein distance normalized
|
| 311 |
+
len1, len2 = len(word1), len(word2)
|
| 312 |
+
if len1 == 0 or len2 == 0:
|
| 313 |
+
return 0.0
|
| 314 |
+
|
| 315 |
+
# Create distance matrix
|
| 316 |
+
dp = [[0] * (len2 + 1) for _ in range(len1 + 1)]
|
| 317 |
+
|
| 318 |
+
for i in range(len1 + 1):
|
| 319 |
+
dp[i][0] = i
|
| 320 |
+
for j in range(len2 + 1):
|
| 321 |
+
dp[0][j] = j
|
| 322 |
+
|
| 323 |
+
for i in range(1, len1 + 1):
|
| 324 |
+
for j in range(1, len2 + 1):
|
| 325 |
+
cost = 0 if word1[i-1] == word2[j-1] else 1
|
| 326 |
+
dp[i][j] = min(
|
| 327 |
+
dp[i-1][j] + 1, # deletion
|
| 328 |
+
dp[i][j-1] + 1, # insertion
|
| 329 |
+
dp[i-1][j-1] + cost # substitution
|
| 330 |
+
)
|
| 331 |
+
|
| 332 |
+
distance = dp[len1][len2]
|
| 333 |
+
max_len = max(len1, len2)
|
| 334 |
+
|
| 335 |
+
return 1.0 - (distance / max_len)
|
| 336 |
+
|
| 337 |
+
def _calculate_overall_score(self, word_scores: List[WordScore]) -> float:
|
| 338 |
+
"""Calculate overall pronunciation score."""
|
| 339 |
+
if not word_scores:
|
| 340 |
+
return 0.0
|
| 341 |
+
return sum(ws.score for ws in word_scores) / len(word_scores)
|
| 342 |
+
|
| 343 |
+
def _calculate_clarity_score(
|
| 344 |
+
self,
|
| 345 |
+
word_scores: List[WordScore],
|
| 346 |
+
errors: List[PhonemeError]
|
| 347 |
+
) -> float:
|
| 348 |
+
"""Calculate speech clarity score."""
|
| 349 |
+
if not word_scores:
|
| 350 |
+
return 0.0
|
| 351 |
+
|
| 352 |
+
# Penalize based on error types
|
| 353 |
+
error_penalties = {
|
| 354 |
+
ErrorType.DISTORTION: 15,
|
| 355 |
+
ErrorType.SUBSTITUTION: 10,
|
| 356 |
+
ErrorType.OMISSION: 20,
|
| 357 |
+
ErrorType.ADDITION: 5,
|
| 358 |
+
}
|
| 359 |
+
|
| 360 |
+
base_score = 100.0
|
| 361 |
+
for error in errors:
|
| 362 |
+
base_score -= error_penalties.get(error.error_type, 10)
|
| 363 |
+
|
| 364 |
+
return max(0.0, base_score)
|
| 365 |
+
|
| 366 |
+
def _calculate_pace_score(
|
| 367 |
+
self,
|
| 368 |
+
word_timestamps: Optional[List[dict]]
|
| 369 |
+
) -> float:
|
| 370 |
+
"""Calculate pace/timing score."""
|
| 371 |
+
if not word_timestamps or len(word_timestamps) < 2:
|
| 372 |
+
return 75.0 # Default score if no timestamps
|
| 373 |
+
|
| 374 |
+
# Calculate words per minute
|
| 375 |
+
total_duration = word_timestamps[-1].get("end", 0) - word_timestamps[0].get("start", 0)
|
| 376 |
+
if total_duration <= 0:
|
| 377 |
+
return 75.0
|
| 378 |
+
|
| 379 |
+
wpm = (len(word_timestamps) / total_duration) * 60
|
| 380 |
+
|
| 381 |
+
# Ideal range: 100-150 WPM for clear speech
|
| 382 |
+
if 100 <= wpm <= 150:
|
| 383 |
+
return 100.0
|
| 384 |
+
elif 80 <= wpm < 100 or 150 < wpm <= 180:
|
| 385 |
+
return 85.0
|
| 386 |
+
elif 60 <= wpm < 80 or 180 < wpm <= 200:
|
| 387 |
+
return 70.0
|
| 388 |
+
else:
|
| 389 |
+
return 50.0
|
| 390 |
+
|
| 391 |
+
def _calculate_fluency_score(self, transcription: str, target: str) -> float:
|
| 392 |
+
"""Calculate fluency based on text similarity."""
|
| 393 |
+
return self._word_similarity(transcription, target) * 100
|
| 394 |
+
|
| 395 |
+
def _generate_suggestions(
|
| 396 |
+
self,
|
| 397 |
+
errors: List[PhonemeError],
|
| 398 |
+
word_scores: List[WordScore]
|
| 399 |
+
) -> List[str]:
|
| 400 |
+
"""Generate actionable improvement suggestions."""
|
| 401 |
+
suggestions = []
|
| 402 |
+
|
| 403 |
+
# Group errors by type
|
| 404 |
+
error_types = {}
|
| 405 |
+
for error in errors:
|
| 406 |
+
error_types.setdefault(error.error_type, []).append(error)
|
| 407 |
+
|
| 408 |
+
# Generate suggestions based on error patterns
|
| 409 |
+
if ErrorType.OMISSION in error_types:
|
| 410 |
+
omissions = error_types[ErrorType.OMISSION]
|
| 411 |
+
words = [e.word for e in omissions[:3]]
|
| 412 |
+
suggestions.append(
|
| 413 |
+
f"Try to pronounce all sounds in: {', '.join(words)}"
|
| 414 |
+
)
|
| 415 |
+
|
| 416 |
+
if ErrorType.SUBSTITUTION in error_types:
|
| 417 |
+
subs = error_types[ErrorType.SUBSTITUTION]
|
| 418 |
+
if subs:
|
| 419 |
+
suggestions.append(
|
| 420 |
+
f"Focus on the correct sound in '{subs[0].word}'"
|
| 421 |
+
)
|
| 422 |
+
|
| 423 |
+
if ErrorType.ADDITION in error_types:
|
| 424 |
+
suggestions.append("Speak more clearly without adding extra sounds")
|
| 425 |
+
|
| 426 |
+
# Low scoring words
|
| 427 |
+
low_scores = [ws for ws in word_scores if ws.score < 70]
|
| 428 |
+
if low_scores:
|
| 429 |
+
words = [ws.word for ws in low_scores[:3]]
|
| 430 |
+
suggestions.append(
|
| 431 |
+
f"Practice these words: {', '.join(words)}"
|
| 432 |
+
)
|
| 433 |
+
|
| 434 |
+
# General encouragement if few errors
|
| 435 |
+
if len(errors) <= 2:
|
| 436 |
+
suggestions.append("Good job! Keep practicing for even better clarity.")
|
| 437 |
+
|
| 438 |
+
return suggestions[:5] # Limit to 5 suggestions
|
| 439 |
+
|
| 440 |
+
|
| 441 |
+
# Singleton instance
|
| 442 |
+
_analyzer_instance: Optional[PronunciationAnalyzer] = None
|
| 443 |
+
|
| 444 |
+
|
| 445 |
+
def get_pronunciation_analyzer() -> PronunciationAnalyzer:
|
| 446 |
+
"""Get or create PronunciationAnalyzer singleton."""
|
| 447 |
+
global _analyzer_instance
|
| 448 |
+
if _analyzer_instance is None:
|
| 449 |
+
_analyzer_instance = PronunciationAnalyzer()
|
| 450 |
+
return _analyzer_instance
|
| 451 |
+
|
| 452 |
+
|
| 453 |
+
async def analyze_pronunciation(
|
| 454 |
+
audio_bytes: bytes,
|
| 455 |
+
target_text: str,
|
| 456 |
+
user_baseline: Optional[dict] = None,
|
| 457 |
+
user_context: Optional[dict] = None,
|
| 458 |
+
include_ai_feedback: bool = True
|
| 459 |
+
) -> PronunciationFeedback:
|
| 460 |
+
"""Convenience function for pronunciation analysis with AI feedback."""
|
| 461 |
+
analyzer = get_pronunciation_analyzer()
|
| 462 |
+
return await analyzer.analyze(
|
| 463 |
+
audio_bytes,
|
| 464 |
+
target_text,
|
| 465 |
+
user_baseline,
|
| 466 |
+
user_context,
|
| 467 |
+
include_ai_feedback
|
| 468 |
+
)
|
api/endpoints/v1/processing/soap.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import logging
|
| 3 |
+
import textwrap
|
| 4 |
+
|
| 5 |
+
from openai import OpenAI
|
| 6 |
+
|
| 7 |
+
from api.config import settings
|
| 8 |
+
|
| 9 |
+
client = OpenAI(api_key=settings.OPENAI_API_KEY)
|
| 10 |
+
|
| 11 |
+
if settings.ENVIRONMENT == "development":
|
| 12 |
+
logging.basicConfig(level=logging.DEBUG)
|
| 13 |
+
else:
|
| 14 |
+
logging.basicConfig(level=logging.WARNING)
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def generate_soap(transcript: str) -> str:
|
| 18 |
+
"""Helper function to generate soap note from transcript using OpenAI Chat Completions API."""
|
| 19 |
+
logging.info("Generating soap note")
|
| 20 |
+
|
| 21 |
+
sample_transcript = textwrap.dedent("""
|
| 22 |
+
The patient is a 25-year-old right-handed Caucasian female who presented to the emergency department with sudden onset of headache occurring at approximately 11 a.m. on the morning of the July 31, 2008. She described the headache as the worst in her life and it was also accompanied by blurry vision and scotoma. The patient also perceived some swelling in her face. Once in the Emergency Department, the patient underwent a very thorough evaluation and examination. She was given the migraine cocktail. Also was given morphine a total of 8 mg while in the Emergency Department. For full details on the history of present illness, please see the previous history and physical.
|
| 23 |
+
|
| 24 |
+
Doctor: How're you feeling today?
|
| 25 |
+
Patient: Terrible. I'm having the worst headache of my life.
|
| 26 |
+
Doctor: I'm so sorry. Well you are only twenty-five, so let's hope this is the last of the worst. Let's see how we can best help you. When did it start?
|
| 27 |
+
Patient: Around eleven in the morning.
|
| 28 |
+
Doctor: Today?
|
| 29 |
+
Patient: Um no yesterday. July thirty-first.
|
| 30 |
+
Doctor: July thirty-first O eight. Got it. Did it come on suddenly?
|
| 31 |
+
Patient: Yeah.
|
| 32 |
+
Doctor: Are you having any symptoms with it, such as blurry vision, light sensitivity, dizziness, lightheadedness, or nausea?
|
| 33 |
+
Patient: I'm having blurry vision and lightheadedness. I also can't seem to write well. It looks so messy. I am naturally right-handed but my writing looks like I am trying with my left.
|
| 34 |
+
Doctor: How would you describe the lightheadedness?
|
| 35 |
+
Patient: Like there are blind spots.
|
| 36 |
+
Doctor: Okay. How about any vomiting?
|
| 37 |
+
Patient: Um no. I feel like my face is pretty swollen though. I don't know if it's related to the headache but it started around the same time.
|
| 38 |
+
Doctor: Here in the ER, we'll do a thorough exam and eval to make sure nothing serious is going on. While we're waiting for your CT results, I'm going to order a migraine cocktail and some Morphine.
|
| 39 |
+
Patient: Thanks. Will the nurse be in soon?
|
| 40 |
+
Doctor: Yes, she'll be right in as soon as the order is placed. It shouldn't be more than a few minutes. If it takes longer, then please ring the call bell.
|
| 41 |
+
""")
|
| 42 |
+
|
| 43 |
+
# Example format for Tiptap editor as a JSON string
|
| 44 |
+
example_format = {
|
| 45 |
+
"type": "doc",
|
| 46 |
+
"content": [
|
| 47 |
+
{
|
| 48 |
+
"type": "heading",
|
| 49 |
+
"attrs": {"level": 2},
|
| 50 |
+
"content": [{"type": "text", "text": "Example heading"}],
|
| 51 |
+
},
|
| 52 |
+
{
|
| 53 |
+
"type": "paragraph",
|
| 54 |
+
"content": [{"type": "text", "text": "example paragraph"}],
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"type": "heading",
|
| 58 |
+
"attrs": {"level": 3},
|
| 59 |
+
"content": [{"type": "text", "text": "Features"}],
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"type": "orderedList",
|
| 63 |
+
"attrs": {"tight": True, "start": 1},
|
| 64 |
+
"content": [
|
| 65 |
+
{
|
| 66 |
+
"type": "listItem",
|
| 67 |
+
"content": [
|
| 68 |
+
{
|
| 69 |
+
"type": "paragraph",
|
| 70 |
+
"content": [
|
| 71 |
+
{"type": "text", "text": "Example list item"}
|
| 72 |
+
],
|
| 73 |
+
},
|
| 74 |
+
],
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"type": "listItem",
|
| 78 |
+
"content": [
|
| 79 |
+
{
|
| 80 |
+
"type": "paragraph",
|
| 81 |
+
"content": [
|
| 82 |
+
{"type": "text", "text": "AI autocomplete (type "},
|
| 83 |
+
{
|
| 84 |
+
"type": "text",
|
| 85 |
+
"marks": [{"type": "code"}],
|
| 86 |
+
"text": "++",
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"type": "text",
|
| 90 |
+
"text": " to activate, or select from slash menu)",
|
| 91 |
+
},
|
| 92 |
+
],
|
| 93 |
+
},
|
| 94 |
+
],
|
| 95 |
+
},
|
| 96 |
+
],
|
| 97 |
+
},
|
| 98 |
+
],
|
| 99 |
+
}
|
| 100 |
+
example_format_str = json.dumps(example_format)
|
| 101 |
+
|
| 102 |
+
# Call the OpenAI Chat Completions API
|
| 103 |
+
completion = client.chat.completions.create(
|
| 104 |
+
model="gpt-4o", # gpt-4o, gpt-3.5-turbo
|
| 105 |
+
response_format={"type": "json_object"},
|
| 106 |
+
messages=[
|
| 107 |
+
{
|
| 108 |
+
"role": "system",
|
| 109 |
+
"content": "You are a helpful assistant designed to output JSON.",
|
| 110 |
+
},
|
| 111 |
+
{
|
| 112 |
+
"role": "user",
|
| 113 |
+
"content": f"Generate a SOAP note from the following transcript and return it in JSON format for a Tiptap editor. This is the example format: {example_format_str}. The first heading can be the subjective section. The text fields can not be left blank, so try your best to fill them out. Transcript: {transcript}",
|
| 114 |
+
},
|
| 115 |
+
],
|
| 116 |
+
)
|
| 117 |
+
|
| 118 |
+
logging.debug(f"SOAP: {completion.choices[0].message.content}")
|
| 119 |
+
return completion.choices[0].message.content
|
api/endpoints/v1/processing/therapy_asr.py
ADDED
|
@@ -0,0 +1,305 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Therapy ASR Module - Multi-engine speech recognition for therapy applications.
|
| 3 |
+
|
| 4 |
+
Supports:
|
| 5 |
+
- Local Whisper (general speech, privacy-focused)
|
| 6 |
+
- SpeechBrain (fine-tuned for atypical speech)
|
| 7 |
+
- OpenAI Whisper API (fallback)
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import io
|
| 11 |
+
import logging
|
| 12 |
+
from enum import Enum
|
| 13 |
+
from typing import Optional
|
| 14 |
+
from dataclasses import dataclass
|
| 15 |
+
|
| 16 |
+
from api.config import settings
|
| 17 |
+
|
| 18 |
+
if settings.ENVIRONMENT == "development":
|
| 19 |
+
logging.basicConfig(level=logging.DEBUG)
|
| 20 |
+
else:
|
| 21 |
+
logging.basicConfig(level=logging.WARNING)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
class ASREngine(str, Enum):
|
| 25 |
+
"""Available ASR engines."""
|
| 26 |
+
WHISPER_LOCAL = "whisper_local"
|
| 27 |
+
SPEECHBRAIN = "speechbrain"
|
| 28 |
+
WHISPER_API = "whisper_api"
|
| 29 |
+
AUTO = "auto" # Automatically select based on user profile
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
@dataclass
|
| 33 |
+
class TranscriptionResult:
|
| 34 |
+
"""Structured transcription result."""
|
| 35 |
+
text: str
|
| 36 |
+
engine_used: ASREngine
|
| 37 |
+
confidence: Optional[float] = None
|
| 38 |
+
word_timestamps: Optional[list] = None
|
| 39 |
+
language: Optional[str] = None
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
class TherapyASR:
|
| 43 |
+
"""
|
| 44 |
+
Multi-engine ASR for therapy applications.
|
| 45 |
+
|
| 46 |
+
Supports automatic engine selection based on user speech profile,
|
| 47 |
+
with fallback chain for reliability.
|
| 48 |
+
"""
|
| 49 |
+
|
| 50 |
+
def __init__(self, default_engine: ASREngine = ASREngine.AUTO):
|
| 51 |
+
self.default_engine = default_engine
|
| 52 |
+
self._whisper_local_model = None
|
| 53 |
+
self._speechbrain_model = None
|
| 54 |
+
self._openai_client = None
|
| 55 |
+
|
| 56 |
+
def _get_openai_client(self):
|
| 57 |
+
"""Lazy load OpenAI client."""
|
| 58 |
+
if self._openai_client is None:
|
| 59 |
+
from openai import OpenAI
|
| 60 |
+
self._openai_client = OpenAI(api_key=settings.OPENAI_API_KEY)
|
| 61 |
+
return self._openai_client
|
| 62 |
+
|
| 63 |
+
def _get_whisper_local(self):
|
| 64 |
+
"""Lazy load local Whisper model."""
|
| 65 |
+
if self._whisper_local_model is None:
|
| 66 |
+
try:
|
| 67 |
+
import torch
|
| 68 |
+
from transformers import WhisperProcessor, WhisperForConditionalGeneration
|
| 69 |
+
|
| 70 |
+
model_name = "openai/whisper-base" # Start with base, upgrade as needed
|
| 71 |
+
logging.info(f"Loading local Whisper model: {model_name}")
|
| 72 |
+
|
| 73 |
+
self._whisper_processor = WhisperProcessor.from_pretrained(model_name)
|
| 74 |
+
self._whisper_local_model = WhisperForConditionalGeneration.from_pretrained(model_name)
|
| 75 |
+
|
| 76 |
+
# Use GPU if available
|
| 77 |
+
if torch.cuda.is_available():
|
| 78 |
+
self._whisper_local_model = self._whisper_local_model.to("cuda")
|
| 79 |
+
elif torch.backends.mps.is_available():
|
| 80 |
+
self._whisper_local_model = self._whisper_local_model.to("mps")
|
| 81 |
+
|
| 82 |
+
logging.info("Local Whisper model loaded successfully")
|
| 83 |
+
except ImportError as e:
|
| 84 |
+
logging.warning(f"Local Whisper not available: {e}")
|
| 85 |
+
raise
|
| 86 |
+
return self._whisper_local_model
|
| 87 |
+
|
| 88 |
+
def _get_speechbrain(self):
|
| 89 |
+
"""Lazy load SpeechBrain model for atypical speech."""
|
| 90 |
+
if self._speechbrain_model is None:
|
| 91 |
+
try:
|
| 92 |
+
import speechbrain as sb
|
| 93 |
+
|
| 94 |
+
# Use pre-trained model, can be swapped for fine-tuned version
|
| 95 |
+
model_source = "speechbrain/asr-wav2vec2-commonvoice-en"
|
| 96 |
+
logging.info(f"Loading SpeechBrain model: {model_source}")
|
| 97 |
+
|
| 98 |
+
self._speechbrain_model = sb.pretrained.EncoderASR.from_hparams(
|
| 99 |
+
source=model_source,
|
| 100 |
+
savedir="models/speechbrain_asr"
|
| 101 |
+
)
|
| 102 |
+
logging.info("SpeechBrain model loaded successfully")
|
| 103 |
+
except ImportError as e:
|
| 104 |
+
logging.warning(f"SpeechBrain not available: {e}")
|
| 105 |
+
raise
|
| 106 |
+
return self._speechbrain_model
|
| 107 |
+
|
| 108 |
+
def _select_engine(self, user_profile: Optional[dict] = None) -> ASREngine:
|
| 109 |
+
"""Select appropriate ASR engine based on user profile."""
|
| 110 |
+
if self.default_engine != ASREngine.AUTO:
|
| 111 |
+
return self.default_engine
|
| 112 |
+
|
| 113 |
+
if user_profile:
|
| 114 |
+
# Use SpeechBrain for users with speech conditions
|
| 115 |
+
speech_condition = user_profile.get("speech_condition")
|
| 116 |
+
if speech_condition in ["dysarthria", "apraxia", "autism", "stuttering"]:
|
| 117 |
+
return ASREngine.SPEECHBRAIN
|
| 118 |
+
|
| 119 |
+
# Use local Whisper for privacy-focused users
|
| 120 |
+
if user_profile.get("privacy_mode") == "local":
|
| 121 |
+
return ASREngine.WHISPER_LOCAL
|
| 122 |
+
|
| 123 |
+
# Default to API for best accuracy
|
| 124 |
+
return ASREngine.WHISPER_API
|
| 125 |
+
|
| 126 |
+
def transcribe(
|
| 127 |
+
self,
|
| 128 |
+
audio_data: bytes,
|
| 129 |
+
filename: str = "audio.wav",
|
| 130 |
+
content_type: str = "audio/wav",
|
| 131 |
+
user_profile: Optional[dict] = None,
|
| 132 |
+
engine: Optional[ASREngine] = None
|
| 133 |
+
) -> TranscriptionResult:
|
| 134 |
+
"""
|
| 135 |
+
Transcribe audio using the most appropriate engine.
|
| 136 |
+
|
| 137 |
+
Args:
|
| 138 |
+
audio_data: Raw audio bytes
|
| 139 |
+
filename: Original filename
|
| 140 |
+
content_type: MIME type of audio
|
| 141 |
+
user_profile: Optional user profile for engine selection
|
| 142 |
+
engine: Force specific engine (overrides auto-selection)
|
| 143 |
+
|
| 144 |
+
Returns:
|
| 145 |
+
TranscriptionResult with text and metadata
|
| 146 |
+
"""
|
| 147 |
+
selected_engine = engine or self._select_engine(user_profile)
|
| 148 |
+
logging.info(f"Transcribing with engine: {selected_engine.value}")
|
| 149 |
+
|
| 150 |
+
# Try selected engine with fallback chain
|
| 151 |
+
fallback_order = [selected_engine]
|
| 152 |
+
if selected_engine != ASREngine.WHISPER_API:
|
| 153 |
+
fallback_order.append(ASREngine.WHISPER_API)
|
| 154 |
+
|
| 155 |
+
last_error = None
|
| 156 |
+
for eng in fallback_order:
|
| 157 |
+
try:
|
| 158 |
+
if eng == ASREngine.WHISPER_API:
|
| 159 |
+
return self._transcribe_whisper_api(audio_data, filename, content_type)
|
| 160 |
+
elif eng == ASREngine.WHISPER_LOCAL:
|
| 161 |
+
return self._transcribe_whisper_local(audio_data)
|
| 162 |
+
elif eng == ASREngine.SPEECHBRAIN:
|
| 163 |
+
return self._transcribe_speechbrain(audio_data)
|
| 164 |
+
except Exception as e:
|
| 165 |
+
logging.warning(f"Engine {eng.value} failed: {e}")
|
| 166 |
+
last_error = e
|
| 167 |
+
continue
|
| 168 |
+
|
| 169 |
+
raise RuntimeError(f"All ASR engines failed. Last error: {last_error}")
|
| 170 |
+
|
| 171 |
+
def _transcribe_whisper_api(
|
| 172 |
+
self,
|
| 173 |
+
audio_data: bytes,
|
| 174 |
+
filename: str,
|
| 175 |
+
content_type: str
|
| 176 |
+
) -> TranscriptionResult:
|
| 177 |
+
"""Transcribe using OpenAI Whisper API."""
|
| 178 |
+
logging.info("Transcribing with OpenAI Whisper API")
|
| 179 |
+
|
| 180 |
+
client = self._get_openai_client()
|
| 181 |
+
file_data = (filename, audio_data, content_type)
|
| 182 |
+
|
| 183 |
+
transcription = client.audio.transcriptions.create(
|
| 184 |
+
model="whisper-1",
|
| 185 |
+
file=file_data,
|
| 186 |
+
response_format="verbose_json",
|
| 187 |
+
timestamp_granularities=["word"]
|
| 188 |
+
)
|
| 189 |
+
|
| 190 |
+
# Extract word timestamps if available
|
| 191 |
+
word_timestamps = None
|
| 192 |
+
if hasattr(transcription, 'words'):
|
| 193 |
+
word_timestamps = [
|
| 194 |
+
{"word": w.word, "start": w.start, "end": w.end}
|
| 195 |
+
for w in transcription.words
|
| 196 |
+
]
|
| 197 |
+
|
| 198 |
+
return TranscriptionResult(
|
| 199 |
+
text=transcription.text,
|
| 200 |
+
engine_used=ASREngine.WHISPER_API,
|
| 201 |
+
language=getattr(transcription, 'language', None),
|
| 202 |
+
word_timestamps=word_timestamps
|
| 203 |
+
)
|
| 204 |
+
|
| 205 |
+
def _transcribe_whisper_local(self, audio_data: bytes) -> TranscriptionResult:
|
| 206 |
+
"""Transcribe using local Whisper model."""
|
| 207 |
+
logging.info("Transcribing with local Whisper")
|
| 208 |
+
|
| 209 |
+
import torch
|
| 210 |
+
import librosa
|
| 211 |
+
import numpy as np
|
| 212 |
+
|
| 213 |
+
model = self._get_whisper_local()
|
| 214 |
+
|
| 215 |
+
# Load audio from bytes
|
| 216 |
+
audio_array, sr = librosa.load(io.BytesIO(audio_data), sr=16000)
|
| 217 |
+
|
| 218 |
+
# Process audio
|
| 219 |
+
input_features = self._whisper_processor(
|
| 220 |
+
audio_array,
|
| 221 |
+
sampling_rate=16000,
|
| 222 |
+
return_tensors="pt"
|
| 223 |
+
).input_features
|
| 224 |
+
|
| 225 |
+
# Move to same device as model
|
| 226 |
+
device = next(model.parameters()).device
|
| 227 |
+
input_features = input_features.to(device)
|
| 228 |
+
|
| 229 |
+
# Generate transcription
|
| 230 |
+
with torch.no_grad():
|
| 231 |
+
predicted_ids = model.generate(input_features)
|
| 232 |
+
|
| 233 |
+
transcription = self._whisper_processor.batch_decode(
|
| 234 |
+
predicted_ids,
|
| 235 |
+
skip_special_tokens=True
|
| 236 |
+
)[0]
|
| 237 |
+
|
| 238 |
+
return TranscriptionResult(
|
| 239 |
+
text=transcription.strip(),
|
| 240 |
+
engine_used=ASREngine.WHISPER_LOCAL
|
| 241 |
+
)
|
| 242 |
+
|
| 243 |
+
def _transcribe_speechbrain(self, audio_data: bytes) -> TranscriptionResult:
|
| 244 |
+
"""Transcribe using SpeechBrain (optimized for atypical speech)."""
|
| 245 |
+
logging.info("Transcribing with SpeechBrain")
|
| 246 |
+
|
| 247 |
+
import tempfile
|
| 248 |
+
import os
|
| 249 |
+
|
| 250 |
+
model = self._get_speechbrain()
|
| 251 |
+
|
| 252 |
+
# SpeechBrain requires file path, write temp file
|
| 253 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
|
| 254 |
+
f.write(audio_data)
|
| 255 |
+
temp_path = f.name
|
| 256 |
+
|
| 257 |
+
try:
|
| 258 |
+
transcription = model.transcribe_file(temp_path)
|
| 259 |
+
|
| 260 |
+
# Handle different return types
|
| 261 |
+
if isinstance(transcription, list):
|
| 262 |
+
text = transcription[0] if transcription else ""
|
| 263 |
+
else:
|
| 264 |
+
text = str(transcription)
|
| 265 |
+
|
| 266 |
+
return TranscriptionResult(
|
| 267 |
+
text=text.strip(),
|
| 268 |
+
engine_used=ASREngine.SPEECHBRAIN
|
| 269 |
+
)
|
| 270 |
+
finally:
|
| 271 |
+
os.unlink(temp_path)
|
| 272 |
+
|
| 273 |
+
|
| 274 |
+
# Singleton instance for reuse
|
| 275 |
+
_therapy_asr_instance: Optional[TherapyASR] = None
|
| 276 |
+
|
| 277 |
+
|
| 278 |
+
def get_therapy_asr() -> TherapyASR:
|
| 279 |
+
"""Get or create TherapyASR singleton."""
|
| 280 |
+
global _therapy_asr_instance
|
| 281 |
+
if _therapy_asr_instance is None:
|
| 282 |
+
_therapy_asr_instance = TherapyASR()
|
| 283 |
+
return _therapy_asr_instance
|
| 284 |
+
|
| 285 |
+
|
| 286 |
+
def transcribe_for_therapy(
|
| 287 |
+
audio_data: bytes,
|
| 288 |
+
filename: str = "audio.wav",
|
| 289 |
+
content_type: str = "audio/wav",
|
| 290 |
+
user_profile: Optional[dict] = None,
|
| 291 |
+
engine: Optional[ASREngine] = None
|
| 292 |
+
) -> TranscriptionResult:
|
| 293 |
+
"""
|
| 294 |
+
Convenience function to transcribe audio for therapy.
|
| 295 |
+
|
| 296 |
+
This is the main entry point for therapy transcription.
|
| 297 |
+
"""
|
| 298 |
+
asr = get_therapy_asr()
|
| 299 |
+
return asr.transcribe(
|
| 300 |
+
audio_data=audio_data,
|
| 301 |
+
filename=filename,
|
| 302 |
+
content_type=content_type,
|
| 303 |
+
user_profile=user_profile,
|
| 304 |
+
engine=engine
|
| 305 |
+
)
|
api/endpoints/v1/processing/therapy_tts.py
ADDED
|
@@ -0,0 +1,354 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Therapy TTS Module - Text-to-speech for therapy and AAC applications.
|
| 3 |
+
|
| 4 |
+
Supports:
|
| 5 |
+
- WhisperSpeech (fast, voice cloning)
|
| 6 |
+
- OpenAI TTS API (fallback)
|
| 7 |
+
- Edge TTS (lightweight fallback)
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import io
|
| 11 |
+
import logging
|
| 12 |
+
from enum import Enum
|
| 13 |
+
from typing import Optional
|
| 14 |
+
from dataclasses import dataclass
|
| 15 |
+
|
| 16 |
+
from api.config import settings
|
| 17 |
+
|
| 18 |
+
if settings.ENVIRONMENT == "development":
|
| 19 |
+
logging.basicConfig(level=logging.DEBUG)
|
| 20 |
+
else:
|
| 21 |
+
logging.basicConfig(level=logging.WARNING)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
class TTSEngine(str, Enum):
|
| 25 |
+
"""Available TTS engines."""
|
| 26 |
+
WHISPERSPEECH = "whisperspeech"
|
| 27 |
+
OPENAI_TTS = "openai_tts"
|
| 28 |
+
EDGE_TTS = "edge_tts"
|
| 29 |
+
AUTO = "auto"
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class TTSVoice(str, Enum):
|
| 33 |
+
"""Preset voice options."""
|
| 34 |
+
NEUTRAL = "neutral"
|
| 35 |
+
WARM = "warm"
|
| 36 |
+
CLEAR = "clear"
|
| 37 |
+
SLOW = "slow" # For therapy exercises
|
| 38 |
+
CUSTOM = "custom" # Voice cloning
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
@dataclass
|
| 42 |
+
class TTSResult:
|
| 43 |
+
"""TTS synthesis result."""
|
| 44 |
+
audio_bytes: bytes
|
| 45 |
+
format: str # wav, mp3
|
| 46 |
+
sample_rate: int
|
| 47 |
+
engine_used: TTSEngine
|
| 48 |
+
duration_seconds: Optional[float] = None
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
class TherapyTTS:
|
| 52 |
+
"""
|
| 53 |
+
TTS engine for therapy applications.
|
| 54 |
+
|
| 55 |
+
Features:
|
| 56 |
+
- Voice cloning from reference audio
|
| 57 |
+
- Adjustable speed for therapy exercises
|
| 58 |
+
- Multiple engine support with fallback
|
| 59 |
+
"""
|
| 60 |
+
|
| 61 |
+
def __init__(self, default_engine: TTSEngine = TTSEngine.AUTO):
|
| 62 |
+
self.default_engine = default_engine
|
| 63 |
+
self._whisperspeech_pipe = None
|
| 64 |
+
self._openai_client = None
|
| 65 |
+
|
| 66 |
+
def _get_openai_client(self):
|
| 67 |
+
"""Lazy load OpenAI client."""
|
| 68 |
+
if self._openai_client is None:
|
| 69 |
+
from openai import OpenAI
|
| 70 |
+
self._openai_client = OpenAI(api_key=settings.OPENAI_API_KEY)
|
| 71 |
+
return self._openai_client
|
| 72 |
+
|
| 73 |
+
def _get_whisperspeech(self):
|
| 74 |
+
"""Lazy load WhisperSpeech pipeline."""
|
| 75 |
+
if self._whisperspeech_pipe is None:
|
| 76 |
+
try:
|
| 77 |
+
from whisperspeech.pipeline import Pipeline
|
| 78 |
+
logging.info("Loading WhisperSpeech pipeline...")
|
| 79 |
+
self._whisperspeech_pipe = Pipeline(
|
| 80 |
+
s2a_ref='collabora/whisperspeech:s2a-q4-tiny-en+pl.model'
|
| 81 |
+
)
|
| 82 |
+
logging.info("WhisperSpeech loaded successfully")
|
| 83 |
+
except ImportError as e:
|
| 84 |
+
logging.warning(f"WhisperSpeech not available: {e}")
|
| 85 |
+
raise
|
| 86 |
+
return self._whisperspeech_pipe
|
| 87 |
+
|
| 88 |
+
def _select_engine(self, voice_reference: Optional[bytes] = None) -> TTSEngine:
|
| 89 |
+
"""Select TTS engine based on requirements."""
|
| 90 |
+
if self.default_engine != TTSEngine.AUTO:
|
| 91 |
+
return self.default_engine
|
| 92 |
+
|
| 93 |
+
# Use WhisperSpeech for voice cloning
|
| 94 |
+
if voice_reference:
|
| 95 |
+
return TTSEngine.WHISPERSPEECH
|
| 96 |
+
|
| 97 |
+
# Default to OpenAI for quality
|
| 98 |
+
return TTSEngine.OPENAI_TTS
|
| 99 |
+
|
| 100 |
+
def synthesize(
|
| 101 |
+
self,
|
| 102 |
+
text: str,
|
| 103 |
+
voice: TTSVoice = TTSVoice.NEUTRAL,
|
| 104 |
+
speed: float = 1.0,
|
| 105 |
+
voice_reference: Optional[bytes] = None,
|
| 106 |
+
engine: Optional[TTSEngine] = None,
|
| 107 |
+
output_format: str = "wav"
|
| 108 |
+
) -> TTSResult:
|
| 109 |
+
"""
|
| 110 |
+
Synthesize speech from text.
|
| 111 |
+
|
| 112 |
+
Args:
|
| 113 |
+
text: Text to synthesize
|
| 114 |
+
voice: Voice preset to use
|
| 115 |
+
speed: Speech rate (0.5 = slow, 1.0 = normal, 2.0 = fast)
|
| 116 |
+
voice_reference: Audio bytes for voice cloning
|
| 117 |
+
engine: Force specific engine
|
| 118 |
+
output_format: Output format (wav, mp3)
|
| 119 |
+
|
| 120 |
+
Returns:
|
| 121 |
+
TTSResult with audio bytes
|
| 122 |
+
"""
|
| 123 |
+
selected_engine = engine or self._select_engine(voice_reference)
|
| 124 |
+
logging.info(f"Synthesizing with engine: {selected_engine.value}")
|
| 125 |
+
|
| 126 |
+
# Fallback chain
|
| 127 |
+
fallback_order = [selected_engine]
|
| 128 |
+
if selected_engine != TTSEngine.OPENAI_TTS:
|
| 129 |
+
fallback_order.append(TTSEngine.OPENAI_TTS)
|
| 130 |
+
|
| 131 |
+
last_error = None
|
| 132 |
+
for eng in fallback_order:
|
| 133 |
+
try:
|
| 134 |
+
if eng == TTSEngine.OPENAI_TTS:
|
| 135 |
+
return self._synthesize_openai(text, voice, speed, output_format)
|
| 136 |
+
elif eng == TTSEngine.WHISPERSPEECH:
|
| 137 |
+
return self._synthesize_whisperspeech(
|
| 138 |
+
text, voice_reference, speed, output_format
|
| 139 |
+
)
|
| 140 |
+
elif eng == TTSEngine.EDGE_TTS:
|
| 141 |
+
return self._synthesize_edge_tts(text, voice, speed, output_format)
|
| 142 |
+
except Exception as e:
|
| 143 |
+
logging.warning(f"Engine {eng.value} failed: {e}")
|
| 144 |
+
last_error = e
|
| 145 |
+
continue
|
| 146 |
+
|
| 147 |
+
raise RuntimeError(f"All TTS engines failed. Last error: {last_error}")
|
| 148 |
+
|
| 149 |
+
def _synthesize_openai(
|
| 150 |
+
self,
|
| 151 |
+
text: str,
|
| 152 |
+
voice: TTSVoice,
|
| 153 |
+
speed: float,
|
| 154 |
+
output_format: str
|
| 155 |
+
) -> TTSResult:
|
| 156 |
+
"""Synthesize using OpenAI TTS API."""
|
| 157 |
+
logging.info("Synthesizing with OpenAI TTS")
|
| 158 |
+
|
| 159 |
+
client = self._get_openai_client()
|
| 160 |
+
|
| 161 |
+
# Map voice presets to OpenAI voices
|
| 162 |
+
voice_map = {
|
| 163 |
+
TTSVoice.NEUTRAL: "alloy",
|
| 164 |
+
TTSVoice.WARM: "nova",
|
| 165 |
+
TTSVoice.CLEAR: "onyx",
|
| 166 |
+
TTSVoice.SLOW: "alloy", # Use speed parameter
|
| 167 |
+
TTSVoice.CUSTOM: "alloy",
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
response = client.audio.speech.create(
|
| 171 |
+
model="tts-1",
|
| 172 |
+
voice=voice_map.get(voice, "alloy"),
|
| 173 |
+
input=text,
|
| 174 |
+
speed=speed,
|
| 175 |
+
response_format="wav" if output_format == "wav" else "mp3"
|
| 176 |
+
)
|
| 177 |
+
|
| 178 |
+
audio_bytes = response.content
|
| 179 |
+
|
| 180 |
+
return TTSResult(
|
| 181 |
+
audio_bytes=audio_bytes,
|
| 182 |
+
format=output_format,
|
| 183 |
+
sample_rate=24000,
|
| 184 |
+
engine_used=TTSEngine.OPENAI_TTS
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
+
def _synthesize_whisperspeech(
|
| 188 |
+
self,
|
| 189 |
+
text: str,
|
| 190 |
+
voice_reference: Optional[bytes],
|
| 191 |
+
speed: float,
|
| 192 |
+
output_format: str
|
| 193 |
+
) -> TTSResult:
|
| 194 |
+
"""Synthesize using WhisperSpeech with optional voice cloning."""
|
| 195 |
+
logging.info("Synthesizing with WhisperSpeech")
|
| 196 |
+
|
| 197 |
+
import torch
|
| 198 |
+
import numpy as np
|
| 199 |
+
|
| 200 |
+
pipe = self._get_whisperspeech()
|
| 201 |
+
|
| 202 |
+
# Generate audio
|
| 203 |
+
if voice_reference:
|
| 204 |
+
# Voice cloning mode
|
| 205 |
+
import tempfile
|
| 206 |
+
import os
|
| 207 |
+
|
| 208 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
|
| 209 |
+
f.write(voice_reference)
|
| 210 |
+
ref_path = f.name
|
| 211 |
+
|
| 212 |
+
try:
|
| 213 |
+
audio = pipe.generate(text, speaker=ref_path)
|
| 214 |
+
finally:
|
| 215 |
+
os.unlink(ref_path)
|
| 216 |
+
else:
|
| 217 |
+
audio = pipe.generate(text)
|
| 218 |
+
|
| 219 |
+
# Convert to bytes
|
| 220 |
+
if isinstance(audio, torch.Tensor):
|
| 221 |
+
audio_np = audio.cpu().numpy()
|
| 222 |
+
else:
|
| 223 |
+
audio_np = np.array(audio)
|
| 224 |
+
|
| 225 |
+
# Ensure correct shape
|
| 226 |
+
if audio_np.ndim > 1:
|
| 227 |
+
audio_np = audio_np.squeeze()
|
| 228 |
+
|
| 229 |
+
# Apply speed adjustment if needed
|
| 230 |
+
if speed != 1.0:
|
| 231 |
+
import librosa
|
| 232 |
+
audio_np = librosa.effects.time_stretch(audio_np, rate=speed)
|
| 233 |
+
|
| 234 |
+
# Convert to wav bytes
|
| 235 |
+
import soundfile as sf
|
| 236 |
+
buffer = io.BytesIO()
|
| 237 |
+
sf.write(buffer, audio_np, 24000, format='WAV')
|
| 238 |
+
buffer.seek(0)
|
| 239 |
+
|
| 240 |
+
return TTSResult(
|
| 241 |
+
audio_bytes=buffer.read(),
|
| 242 |
+
format="wav",
|
| 243 |
+
sample_rate=24000,
|
| 244 |
+
engine_used=TTSEngine.WHISPERSPEECH,
|
| 245 |
+
duration_seconds=len(audio_np) / 24000
|
| 246 |
+
)
|
| 247 |
+
|
| 248 |
+
def _synthesize_edge_tts(
|
| 249 |
+
self,
|
| 250 |
+
text: str,
|
| 251 |
+
voice: TTSVoice,
|
| 252 |
+
speed: float,
|
| 253 |
+
output_format: str
|
| 254 |
+
) -> TTSResult:
|
| 255 |
+
"""Synthesize using Edge TTS (lightweight fallback)."""
|
| 256 |
+
logging.info("Synthesizing with Edge TTS")
|
| 257 |
+
|
| 258 |
+
import asyncio
|
| 259 |
+
import edge_tts
|
| 260 |
+
|
| 261 |
+
# Map voice presets to Edge TTS voices
|
| 262 |
+
voice_map = {
|
| 263 |
+
TTSVoice.NEUTRAL: "en-US-JennyNeural",
|
| 264 |
+
TTSVoice.WARM: "en-US-AriaNeural",
|
| 265 |
+
TTSVoice.CLEAR: "en-US-GuyNeural",
|
| 266 |
+
TTSVoice.SLOW: "en-US-JennyNeural",
|
| 267 |
+
TTSVoice.CUSTOM: "en-US-JennyNeural",
|
| 268 |
+
}
|
| 269 |
+
|
| 270 |
+
async def _generate():
|
| 271 |
+
communicate = edge_tts.Communicate(
|
| 272 |
+
text,
|
| 273 |
+
voice_map.get(voice, "en-US-JennyNeural"),
|
| 274 |
+
rate=f"{int((speed - 1) * 100):+d}%"
|
| 275 |
+
)
|
| 276 |
+
buffer = io.BytesIO()
|
| 277 |
+
async for chunk in communicate.stream():
|
| 278 |
+
if chunk["type"] == "audio":
|
| 279 |
+
buffer.write(chunk["data"])
|
| 280 |
+
return buffer.getvalue()
|
| 281 |
+
|
| 282 |
+
audio_bytes = asyncio.run(_generate())
|
| 283 |
+
|
| 284 |
+
return TTSResult(
|
| 285 |
+
audio_bytes=audio_bytes,
|
| 286 |
+
format="mp3",
|
| 287 |
+
sample_rate=24000,
|
| 288 |
+
engine_used=TTSEngine.EDGE_TTS
|
| 289 |
+
)
|
| 290 |
+
|
| 291 |
+
def generate_therapy_prompt(
|
| 292 |
+
self,
|
| 293 |
+
exercise_type: str,
|
| 294 |
+
target_text: str,
|
| 295 |
+
**kwargs
|
| 296 |
+
) -> TTSResult:
|
| 297 |
+
"""
|
| 298 |
+
Generate therapy exercise audio prompt.
|
| 299 |
+
|
| 300 |
+
Args:
|
| 301 |
+
exercise_type: Type of exercise (repeat_after_me, pronunciation, etc.)
|
| 302 |
+
target_text: The text to practice
|
| 303 |
+
**kwargs: Additional synthesis parameters
|
| 304 |
+
|
| 305 |
+
Returns:
|
| 306 |
+
TTSResult with exercise audio
|
| 307 |
+
"""
|
| 308 |
+
prompts = {
|
| 309 |
+
"repeat_after_me": f"Please repeat after me: {target_text}",
|
| 310 |
+
"pronunciation": f"Let's practice saying: {target_text}. Listen carefully.",
|
| 311 |
+
"slower": f"Now try saying it more slowly: {target_text}",
|
| 312 |
+
"word_by_word": f"Let's break it down. {target_text}",
|
| 313 |
+
"encouragement": f"Great try! Let's practice {target_text} again.",
|
| 314 |
+
}
|
| 315 |
+
|
| 316 |
+
prompt_text = prompts.get(exercise_type, target_text)
|
| 317 |
+
|
| 318 |
+
# Use slower speed for therapy prompts
|
| 319 |
+
speed = kwargs.pop("speed", 0.9)
|
| 320 |
+
|
| 321 |
+
return self.synthesize(
|
| 322 |
+
text=prompt_text,
|
| 323 |
+
speed=speed,
|
| 324 |
+
voice=TTSVoice.CLEAR,
|
| 325 |
+
**kwargs
|
| 326 |
+
)
|
| 327 |
+
|
| 328 |
+
|
| 329 |
+
# Singleton instance
|
| 330 |
+
_therapy_tts_instance: Optional[TherapyTTS] = None
|
| 331 |
+
|
| 332 |
+
|
| 333 |
+
def get_therapy_tts() -> TherapyTTS:
|
| 334 |
+
"""Get or create TherapyTTS singleton."""
|
| 335 |
+
global _therapy_tts_instance
|
| 336 |
+
if _therapy_tts_instance is None:
|
| 337 |
+
_therapy_tts_instance = TherapyTTS()
|
| 338 |
+
return _therapy_tts_instance
|
| 339 |
+
|
| 340 |
+
|
| 341 |
+
def synthesize_speech(
|
| 342 |
+
text: str,
|
| 343 |
+
voice: TTSVoice = TTSVoice.NEUTRAL,
|
| 344 |
+
speed: float = 1.0,
|
| 345 |
+
voice_reference: Optional[bytes] = None
|
| 346 |
+
) -> TTSResult:
|
| 347 |
+
"""Convenience function for TTS synthesis."""
|
| 348 |
+
tts = get_therapy_tts()
|
| 349 |
+
return tts.synthesize(
|
| 350 |
+
text=text,
|
| 351 |
+
voice=voice,
|
| 352 |
+
speed=speed,
|
| 353 |
+
voice_reference=voice_reference
|
| 354 |
+
)
|
api/endpoints/v1/routers/__init__.py
ADDED
|
File without changes
|
api/endpoints/v1/routers/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (184 Bytes). View file
|
|
|
api/endpoints/v1/routers/__pycache__/analytics.cpython-312.pyc
ADDED
|
Binary file (12.5 kB). View file
|
|
|
api/endpoints/v1/routers/__pycache__/health.cpython-312.pyc
ADDED
|
Binary file (999 Bytes). View file
|
|
|
api/endpoints/v1/routers/__pycache__/therapy.cpython-312.pyc
ADDED
|
Binary file (24.3 kB). View file
|
|
|
api/endpoints/v1/routers/__pycache__/upload.cpython-312.pyc
ADDED
|
Binary file (3.18 kB). View file
|
|
|
api/endpoints/v1/routers/analytics.py
ADDED
|
@@ -0,0 +1,364 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Analytics Router - Progress tracking and reporting endpoints.
|
| 3 |
+
|
| 4 |
+
Endpoints:
|
| 5 |
+
- GET /analytics/summary - User progress summary
|
| 6 |
+
- GET /analytics/detailed - Detailed metrics for therapist view
|
| 7 |
+
- GET /analytics/trends - Progress trends over time
|
| 8 |
+
- GET /analytics/recommendations - AI-powered recommendations
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import logging
|
| 12 |
+
from datetime import datetime, timedelta
|
| 13 |
+
from typing import Optional
|
| 14 |
+
from fastapi import APIRouter, Depends, Query
|
| 15 |
+
from pydantic import BaseModel, Field
|
| 16 |
+
|
| 17 |
+
from api.config import settings
|
| 18 |
+
from api.endpoints.v1.auth.verify import verify_token
|
| 19 |
+
|
| 20 |
+
router = APIRouter()
|
| 21 |
+
|
| 22 |
+
if settings.ENVIRONMENT == "development":
|
| 23 |
+
logging.basicConfig(level=logging.DEBUG)
|
| 24 |
+
else:
|
| 25 |
+
logging.basicConfig(level=logging.WARNING)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
# ============================================================================
|
| 29 |
+
# Response Models
|
| 30 |
+
# ============================================================================
|
| 31 |
+
|
| 32 |
+
class ProgressSummary(BaseModel):
|
| 33 |
+
"""Summary of user's therapy progress."""
|
| 34 |
+
total_sessions: int
|
| 35 |
+
total_exercises: int
|
| 36 |
+
total_practice_minutes: int
|
| 37 |
+
current_streak_days: int
|
| 38 |
+
average_score: float
|
| 39 |
+
improvement_percent: float
|
| 40 |
+
last_session_date: Optional[str]
|
| 41 |
+
top_achievements: list[str]
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
class MetricTrend(BaseModel):
|
| 45 |
+
"""Single metric trend data point."""
|
| 46 |
+
date: str
|
| 47 |
+
value: float
|
| 48 |
+
metric_type: str
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
class DetailedProgress(BaseModel):
|
| 52 |
+
"""Detailed progress for therapist view."""
|
| 53 |
+
user_id: str
|
| 54 |
+
period_days: int
|
| 55 |
+
|
| 56 |
+
# Core metrics
|
| 57 |
+
pcc_current: float # Percent Consonants Correct
|
| 58 |
+
pcc_baseline: float
|
| 59 |
+
pcc_improvement: float
|
| 60 |
+
|
| 61 |
+
pwc_current: float # Percent Words Correct
|
| 62 |
+
pwc_baseline: float
|
| 63 |
+
pwc_improvement: float
|
| 64 |
+
|
| 65 |
+
clarity_current: float
|
| 66 |
+
clarity_baseline: float
|
| 67 |
+
clarity_improvement: float
|
| 68 |
+
|
| 69 |
+
# Session stats
|
| 70 |
+
sessions_completed: int
|
| 71 |
+
exercises_completed: int
|
| 72 |
+
total_practice_minutes: int
|
| 73 |
+
|
| 74 |
+
# Problem areas
|
| 75 |
+
problem_phonemes: list[str]
|
| 76 |
+
improving_phonemes: list[str]
|
| 77 |
+
|
| 78 |
+
# Recommendations
|
| 79 |
+
recommendations: list[str]
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
class ExerciseStats(BaseModel):
|
| 83 |
+
"""Statistics for a specific exercise type."""
|
| 84 |
+
exercise_type: str
|
| 85 |
+
attempts: int
|
| 86 |
+
average_score: float
|
| 87 |
+
best_score: float
|
| 88 |
+
improvement: float
|
| 89 |
+
last_attempted: Optional[str]
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
class Recommendation(BaseModel):
|
| 93 |
+
"""AI-generated recommendation."""
|
| 94 |
+
category: str # exercise, frequency, focus_area
|
| 95 |
+
title: str
|
| 96 |
+
description: str
|
| 97 |
+
priority: int # 1=high, 2=medium, 3=low
|
| 98 |
+
action_type: Optional[str] # specific exercise to try
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
# ============================================================================
|
| 102 |
+
# Mock Data Functions (Replace with DB queries)
|
| 103 |
+
# ============================================================================
|
| 104 |
+
|
| 105 |
+
def _get_mock_summary(user_id: str, days: int) -> ProgressSummary:
|
| 106 |
+
"""Generate mock summary data. Replace with DB queries."""
|
| 107 |
+
return ProgressSummary(
|
| 108 |
+
total_sessions=23,
|
| 109 |
+
total_exercises=156,
|
| 110 |
+
total_practice_minutes=287,
|
| 111 |
+
current_streak_days=5,
|
| 112 |
+
average_score=72.5,
|
| 113 |
+
improvement_percent=15.3,
|
| 114 |
+
last_session_date=datetime.now().strftime("%Y-%m-%d"),
|
| 115 |
+
top_achievements=[
|
| 116 |
+
"Completed 20+ sessions",
|
| 117 |
+
"5-day practice streak",
|
| 118 |
+
"Mastered 'S' sound"
|
| 119 |
+
]
|
| 120 |
+
)
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
def _get_mock_detailed(user_id: str, days: int) -> DetailedProgress:
|
| 124 |
+
"""Generate mock detailed data. Replace with DB queries."""
|
| 125 |
+
return DetailedProgress(
|
| 126 |
+
user_id=user_id,
|
| 127 |
+
period_days=days,
|
| 128 |
+
pcc_current=78.5,
|
| 129 |
+
pcc_baseline=65.0,
|
| 130 |
+
pcc_improvement=13.5,
|
| 131 |
+
pwc_current=82.0,
|
| 132 |
+
pwc_baseline=70.0,
|
| 133 |
+
pwc_improvement=12.0,
|
| 134 |
+
clarity_current=75.0,
|
| 135 |
+
clarity_baseline=62.0,
|
| 136 |
+
clarity_improvement=13.0,
|
| 137 |
+
sessions_completed=23,
|
| 138 |
+
exercises_completed=156,
|
| 139 |
+
total_practice_minutes=287,
|
| 140 |
+
problem_phonemes=["th", "r", "l"],
|
| 141 |
+
improving_phonemes=["s", "ch", "sh"],
|
| 142 |
+
recommendations=[
|
| 143 |
+
"Focus on 'th' sound with tongue placement exercises",
|
| 144 |
+
"Increase practice frequency to 15 min/day",
|
| 145 |
+
"Try minimal pairs: 'think/sink', 'three/free'"
|
| 146 |
+
]
|
| 147 |
+
)
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
def _get_mock_trends(user_id: str, days: int, metric: str) -> list[MetricTrend]:
|
| 151 |
+
"""Generate mock trend data. Replace with DB queries."""
|
| 152 |
+
trends = []
|
| 153 |
+
base_date = datetime.now()
|
| 154 |
+
base_value = 65.0
|
| 155 |
+
|
| 156 |
+
for i in range(min(days, 30)):
|
| 157 |
+
date = base_date - timedelta(days=days - i - 1)
|
| 158 |
+
# Simulate gradual improvement
|
| 159 |
+
value = base_value + (i * 0.5) + (i % 3 - 1)
|
| 160 |
+
trends.append(MetricTrend(
|
| 161 |
+
date=date.strftime("%Y-%m-%d"),
|
| 162 |
+
value=round(min(100, max(0, value)), 1),
|
| 163 |
+
metric_type=metric
|
| 164 |
+
))
|
| 165 |
+
|
| 166 |
+
return trends
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
def _get_mock_exercise_stats(user_id: str) -> list[ExerciseStats]:
|
| 170 |
+
"""Generate mock exercise stats. Replace with DB queries."""
|
| 171 |
+
return [
|
| 172 |
+
ExerciseStats(
|
| 173 |
+
exercise_type="repeat_after_me",
|
| 174 |
+
attempts=45,
|
| 175 |
+
average_score=74.5,
|
| 176 |
+
best_score=92.0,
|
| 177 |
+
improvement=12.3,
|
| 178 |
+
last_attempted=datetime.now().strftime("%Y-%m-%d")
|
| 179 |
+
),
|
| 180 |
+
ExerciseStats(
|
| 181 |
+
exercise_type="minimal_pairs",
|
| 182 |
+
attempts=32,
|
| 183 |
+
average_score=68.0,
|
| 184 |
+
best_score=85.0,
|
| 185 |
+
improvement=8.5,
|
| 186 |
+
last_attempted=(datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
|
| 187 |
+
),
|
| 188 |
+
ExerciseStats(
|
| 189 |
+
exercise_type="tongue_twisters",
|
| 190 |
+
attempts=18,
|
| 191 |
+
average_score=62.5,
|
| 192 |
+
best_score=78.0,
|
| 193 |
+
improvement=5.2,
|
| 194 |
+
last_attempted=(datetime.now() - timedelta(days=2)).strftime("%Y-%m-%d")
|
| 195 |
+
),
|
| 196 |
+
]
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
def _generate_recommendations(user_id: str) -> list[Recommendation]:
|
| 200 |
+
"""Generate AI-powered recommendations. Replace with ML model."""
|
| 201 |
+
return [
|
| 202 |
+
Recommendation(
|
| 203 |
+
category="focus_area",
|
| 204 |
+
title="Focus on 'TH' Sound",
|
| 205 |
+
description="Your 'th' pronunciation scores are 15% below average. Try placing your tongue between your teeth.",
|
| 206 |
+
priority=1,
|
| 207 |
+
action_type="minimal_pairs"
|
| 208 |
+
),
|
| 209 |
+
Recommendation(
|
| 210 |
+
category="frequency",
|
| 211 |
+
title="Increase Practice Time",
|
| 212 |
+
description="Users who practice 15+ minutes daily see 2x faster improvement. You're averaging 10 minutes.",
|
| 213 |
+
priority=2,
|
| 214 |
+
action_type=None
|
| 215 |
+
),
|
| 216 |
+
Recommendation(
|
| 217 |
+
category="exercise",
|
| 218 |
+
title="Try Tongue Twisters",
|
| 219 |
+
description="Tongue twisters can help with your 'S' and 'SH' sounds. Start with 'She sells seashells'.",
|
| 220 |
+
priority=2,
|
| 221 |
+
action_type="tongue_twisters"
|
| 222 |
+
),
|
| 223 |
+
]
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
# ============================================================================
|
| 227 |
+
# Endpoints
|
| 228 |
+
# ============================================================================
|
| 229 |
+
|
| 230 |
+
@router.get("/summary", response_model=ProgressSummary, tags=["analytics"])
|
| 231 |
+
async def get_progress_summary(
|
| 232 |
+
days: int = Query(default=30, ge=1, le=365, description="Period in days"),
|
| 233 |
+
user: str = Depends(verify_token),
|
| 234 |
+
):
|
| 235 |
+
"""
|
| 236 |
+
Get user's progress summary.
|
| 237 |
+
|
| 238 |
+
Returns high-level stats suitable for the user dashboard.
|
| 239 |
+
"""
|
| 240 |
+
logging.info(f"Progress summary request for user: {user}, days: {days}")
|
| 241 |
+
|
| 242 |
+
# TODO: Replace with actual DB query
|
| 243 |
+
return _get_mock_summary(user, days)
|
| 244 |
+
|
| 245 |
+
|
| 246 |
+
@router.get("/detailed", response_model=DetailedProgress, tags=["analytics"])
|
| 247 |
+
async def get_detailed_progress(
|
| 248 |
+
days: int = Query(default=30, ge=1, le=365),
|
| 249 |
+
user: str = Depends(verify_token),
|
| 250 |
+
):
|
| 251 |
+
"""
|
| 252 |
+
Get detailed progress metrics.
|
| 253 |
+
|
| 254 |
+
Returns comprehensive metrics suitable for therapist review.
|
| 255 |
+
Includes PCC, PWC, clarity scores, and improvement tracking.
|
| 256 |
+
"""
|
| 257 |
+
logging.info(f"Detailed progress request for user: {user}")
|
| 258 |
+
|
| 259 |
+
# TODO: Replace with actual DB query
|
| 260 |
+
return _get_mock_detailed(user, days)
|
| 261 |
+
|
| 262 |
+
|
| 263 |
+
@router.get("/trends", response_model=list[MetricTrend], tags=["analytics"])
|
| 264 |
+
async def get_progress_trends(
|
| 265 |
+
metric: str = Query(
|
| 266 |
+
default="overall",
|
| 267 |
+
description="Metric type: overall, clarity, pace, pcc, pwc"
|
| 268 |
+
),
|
| 269 |
+
days: int = Query(default=30, ge=7, le=365),
|
| 270 |
+
user: str = Depends(verify_token),
|
| 271 |
+
):
|
| 272 |
+
"""
|
| 273 |
+
Get progress trends over time.
|
| 274 |
+
|
| 275 |
+
Returns time-series data for charting progress.
|
| 276 |
+
"""
|
| 277 |
+
logging.info(f"Trends request for user: {user}, metric: {metric}")
|
| 278 |
+
|
| 279 |
+
valid_metrics = ["overall", "clarity", "pace", "pcc", "pwc", "fluency"]
|
| 280 |
+
if metric not in valid_metrics:
|
| 281 |
+
metric = "overall"
|
| 282 |
+
|
| 283 |
+
# TODO: Replace with actual DB query
|
| 284 |
+
return _get_mock_trends(user, days, metric)
|
| 285 |
+
|
| 286 |
+
|
| 287 |
+
@router.get("/exercises", response_model=list[ExerciseStats], tags=["analytics"])
|
| 288 |
+
async def get_exercise_stats(
|
| 289 |
+
user: str = Depends(verify_token),
|
| 290 |
+
):
|
| 291 |
+
"""
|
| 292 |
+
Get statistics for each exercise type.
|
| 293 |
+
|
| 294 |
+
Shows which exercises the user has tried and their performance.
|
| 295 |
+
"""
|
| 296 |
+
logging.info(f"Exercise stats request for user: {user}")
|
| 297 |
+
|
| 298 |
+
# TODO: Replace with actual DB query
|
| 299 |
+
return _get_mock_exercise_stats(user)
|
| 300 |
+
|
| 301 |
+
|
| 302 |
+
@router.get("/recommendations", response_model=list[Recommendation], tags=["analytics"])
|
| 303 |
+
async def get_recommendations(
|
| 304 |
+
user: str = Depends(verify_token),
|
| 305 |
+
):
|
| 306 |
+
"""
|
| 307 |
+
Get AI-powered recommendations.
|
| 308 |
+
|
| 309 |
+
Analyzes user's progress and suggests focus areas, exercises, and practice tips.
|
| 310 |
+
"""
|
| 311 |
+
logging.info(f"Recommendations request for user: {user}")
|
| 312 |
+
|
| 313 |
+
# TODO: Replace with ML model
|
| 314 |
+
return _generate_recommendations(user)
|
| 315 |
+
|
| 316 |
+
|
| 317 |
+
@router.get("/streak", tags=["analytics"])
|
| 318 |
+
async def get_streak_info(
|
| 319 |
+
user: str = Depends(verify_token),
|
| 320 |
+
):
|
| 321 |
+
"""
|
| 322 |
+
Get practice streak information.
|
| 323 |
+
|
| 324 |
+
Returns current streak, best streak, and streak history.
|
| 325 |
+
"""
|
| 326 |
+
logging.info(f"Streak info request for user: {user}")
|
| 327 |
+
|
| 328 |
+
# TODO: Replace with actual DB query
|
| 329 |
+
return {
|
| 330 |
+
"current_streak": 5,
|
| 331 |
+
"best_streak": 12,
|
| 332 |
+
"streak_history": [
|
| 333 |
+
{"start": "2024-11-20", "end": "2024-12-01", "days": 12},
|
| 334 |
+
{"start": "2024-12-03", "end": "2024-12-05", "days": 3},
|
| 335 |
+
{"start": "2024-12-01", "end": None, "days": 5}, # Current
|
| 336 |
+
],
|
| 337 |
+
"next_milestone": 7,
|
| 338 |
+
"days_to_milestone": 2
|
| 339 |
+
}
|
| 340 |
+
|
| 341 |
+
|
| 342 |
+
@router.get("/leaderboard", tags=["analytics"])
|
| 343 |
+
async def get_leaderboard(
|
| 344 |
+
period: str = Query(default="week", description="week, month, all"),
|
| 345 |
+
user: str = Depends(verify_token),
|
| 346 |
+
):
|
| 347 |
+
"""
|
| 348 |
+
Get anonymized leaderboard.
|
| 349 |
+
|
| 350 |
+
Optional gamification feature showing relative standing.
|
| 351 |
+
"""
|
| 352 |
+
logging.info(f"Leaderboard request for period: {period}")
|
| 353 |
+
|
| 354 |
+
# TODO: Replace with actual DB query
|
| 355 |
+
return {
|
| 356 |
+
"user_rank": 15,
|
| 357 |
+
"total_users": 127,
|
| 358 |
+
"percentile": 88,
|
| 359 |
+
"top_10": [
|
| 360 |
+
{"rank": 1, "score": 95.2, "exercises": 234},
|
| 361 |
+
{"rank": 2, "score": 93.8, "exercises": 198},
|
| 362 |
+
{"rank": 3, "score": 91.5, "exercises": 212},
|
| 363 |
+
]
|
| 364 |
+
}
|
api/endpoints/v1/routers/health.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, Depends, HTTPException
|
| 2 |
+
from fastapi.responses import JSONResponse
|
| 3 |
+
|
| 4 |
+
from api.endpoints.v1.auth.verify import verify_token
|
| 5 |
+
|
| 6 |
+
router = APIRouter()
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
@router.get("", status_code=200)
|
| 10 |
+
async def check(user: str = Depends(verify_token)):
|
| 11 |
+
"""Secured health check endpoint."""
|
| 12 |
+
if not user:
|
| 13 |
+
raise HTTPException(status_code=401, detail="Unauthorized")
|
| 14 |
+
|
| 15 |
+
return JSONResponse(
|
| 16 |
+
status_code=200,
|
| 17 |
+
content={
|
| 18 |
+
"message": "Service is running smoothly",
|
| 19 |
+
"batches_processed": 0,
|
| 20 |
+
"title": "title",
|
| 21 |
+
"content": "content",
|
| 22 |
+
"transcript": "hi",
|
| 23 |
+
},
|
| 24 |
+
)
|
api/endpoints/v1/routers/therapy.py
ADDED
|
@@ -0,0 +1,639 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Therapy Router - API endpoints for speech therapy features.
|
| 3 |
+
|
| 4 |
+
Endpoints:
|
| 5 |
+
- POST /therapy/transcribe - Transcribe audio with therapy-optimized ASR
|
| 6 |
+
- POST /therapy/tts - Text-to-speech synthesis
|
| 7 |
+
- POST /therapy/analyze - Pronunciation analysis
|
| 8 |
+
- POST /therapy/exercise - Generate and evaluate exercises
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import io
|
| 12 |
+
import logging
|
| 13 |
+
from typing import Optional
|
| 14 |
+
from fastapi import APIRouter, BackgroundTasks, Depends, File, HTTPException, UploadFile, Query
|
| 15 |
+
from fastapi.responses import JSONResponse, StreamingResponse
|
| 16 |
+
from pydantic import BaseModel, Field
|
| 17 |
+
|
| 18 |
+
from api.config import settings
|
| 19 |
+
from api.endpoints.v1.auth.verify import verify_token
|
| 20 |
+
from api.endpoints.v1.processing.therapy_asr import (
|
| 21 |
+
transcribe_for_therapy,
|
| 22 |
+
ASREngine,
|
| 23 |
+
TranscriptionResult
|
| 24 |
+
)
|
| 25 |
+
from api.endpoints.v1.processing.therapy_tts import (
|
| 26 |
+
synthesize_speech,
|
| 27 |
+
get_therapy_tts,
|
| 28 |
+
TTSVoice,
|
| 29 |
+
TTSEngine
|
| 30 |
+
)
|
| 31 |
+
from api.endpoints.v1.processing.pronunciation_analysis import (
|
| 32 |
+
analyze_pronunciation,
|
| 33 |
+
PronunciationFeedback,
|
| 34 |
+
AIFeedback
|
| 35 |
+
)
|
| 36 |
+
from api.endpoints.v1.processing.ai_feedback import get_ai_feedback_generator
|
| 37 |
+
|
| 38 |
+
router = APIRouter()
|
| 39 |
+
|
| 40 |
+
if settings.ENVIRONMENT == "development":
|
| 41 |
+
logging.basicConfig(level=logging.DEBUG)
|
| 42 |
+
else:
|
| 43 |
+
logging.basicConfig(level=logging.WARNING)
|
| 44 |
+
|
| 45 |
+
# Allowed audio types
|
| 46 |
+
ALLOWED_AUDIO_TYPES = [
|
| 47 |
+
"audio/mpeg", "audio/mp4", "audio/m4a", "audio/x-m4a",
|
| 48 |
+
"audio/wav", "audio/x-wav", "audio/webm"
|
| 49 |
+
]
|
| 50 |
+
FILE_SIZE_LIMIT = 25 * 1024 * 1024 # 25 MB
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
# Request/Response Models
|
| 54 |
+
class TranscribeRequest(BaseModel):
|
| 55 |
+
"""Request model for transcription."""
|
| 56 |
+
engine: Optional[ASREngine] = Field(None, description="ASR engine to use")
|
| 57 |
+
user_profile: Optional[dict] = Field(None, description="User speech profile")
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
class TranscriptionResponse(BaseModel):
|
| 61 |
+
"""Response model for transcription."""
|
| 62 |
+
text: str
|
| 63 |
+
engine_used: str
|
| 64 |
+
confidence: Optional[float] = None
|
| 65 |
+
word_timestamps: Optional[list] = None
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
class TTSRequest(BaseModel):
|
| 69 |
+
"""Request model for TTS."""
|
| 70 |
+
text: str = Field(..., min_length=1, max_length=5000)
|
| 71 |
+
voice: TTSVoice = Field(default=TTSVoice.NEUTRAL)
|
| 72 |
+
speed: float = Field(default=1.0, ge=0.5, le=2.0)
|
| 73 |
+
engine: Optional[TTSEngine] = None
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
class PronunciationRequest(BaseModel):
|
| 77 |
+
"""Request model for pronunciation analysis."""
|
| 78 |
+
target_text: str = Field(..., min_length=1, max_length=500)
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
class AIFeedbackResponse(BaseModel):
|
| 82 |
+
"""AI-generated feedback from GPT-4o."""
|
| 83 |
+
feedback: str
|
| 84 |
+
encouragement: str
|
| 85 |
+
specific_tips: list[str]
|
| 86 |
+
recommended_exercises: list[str]
|
| 87 |
+
difficulty_adjustment: Optional[str] = None
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
class PronunciationResponse(BaseModel):
|
| 91 |
+
"""Response model for pronunciation analysis."""
|
| 92 |
+
overall_score: float
|
| 93 |
+
clarity_score: float
|
| 94 |
+
pace_score: float
|
| 95 |
+
fluency_score: float
|
| 96 |
+
transcription: str
|
| 97 |
+
target_text: str
|
| 98 |
+
suggestions: list[str]
|
| 99 |
+
word_scores: list[dict]
|
| 100 |
+
ai_feedback: Optional[AIFeedbackResponse] = None # GPT-4o powered feedback
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
class ExerciseRequest(BaseModel):
|
| 104 |
+
"""Request model for exercise generation."""
|
| 105 |
+
exercise_type: str = Field(..., description="Type: repeat_after_me, minimal_pairs, etc.")
|
| 106 |
+
difficulty: int = Field(default=1, ge=1, le=5)
|
| 107 |
+
focus_phonemes: Optional[list[str]] = None
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
class ExerciseResponse(BaseModel):
|
| 111 |
+
"""Response model for exercise."""
|
| 112 |
+
exercise_id: str
|
| 113 |
+
exercise_type: str
|
| 114 |
+
target_text: str
|
| 115 |
+
instructions: str
|
| 116 |
+
audio_prompt_available: bool
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
# Endpoints
|
| 120 |
+
|
| 121 |
+
@router.post("/transcribe", response_model=TranscriptionResponse, tags=["therapy"])
|
| 122 |
+
async def transcribe_therapy_audio(
|
| 123 |
+
background_tasks: BackgroundTasks,
|
| 124 |
+
file: UploadFile = File(...),
|
| 125 |
+
engine: Optional[ASREngine] = Query(None, description="ASR engine"),
|
| 126 |
+
user: str = Depends(verify_token),
|
| 127 |
+
):
|
| 128 |
+
"""
|
| 129 |
+
Transcribe audio using therapy-optimized ASR.
|
| 130 |
+
|
| 131 |
+
Supports multiple engines:
|
| 132 |
+
- whisper_api: OpenAI Whisper API (best accuracy)
|
| 133 |
+
- whisper_local: Local Whisper (privacy-focused)
|
| 134 |
+
- speechbrain: SpeechBrain (optimized for atypical speech)
|
| 135 |
+
- auto: Automatic selection based on user profile
|
| 136 |
+
"""
|
| 137 |
+
logging.info(f"Therapy transcription request from user: {user}")
|
| 138 |
+
|
| 139 |
+
# Validate file
|
| 140 |
+
if file.content_type not in ALLOWED_AUDIO_TYPES:
|
| 141 |
+
raise HTTPException(status_code=400, detail="Invalid audio file type")
|
| 142 |
+
|
| 143 |
+
contents = await file.read()
|
| 144 |
+
if len(contents) > FILE_SIZE_LIMIT:
|
| 145 |
+
raise HTTPException(status_code=400, detail="File size exceeds 25 MB limit")
|
| 146 |
+
|
| 147 |
+
try:
|
| 148 |
+
result = transcribe_for_therapy(
|
| 149 |
+
audio_data=contents,
|
| 150 |
+
filename=file.filename or "audio.wav",
|
| 151 |
+
content_type=file.content_type,
|
| 152 |
+
engine=engine
|
| 153 |
+
)
|
| 154 |
+
|
| 155 |
+
return TranscriptionResponse(
|
| 156 |
+
text=result.text,
|
| 157 |
+
engine_used=result.engine_used.value,
|
| 158 |
+
confidence=result.confidence,
|
| 159 |
+
word_timestamps=result.word_timestamps
|
| 160 |
+
)
|
| 161 |
+
|
| 162 |
+
except Exception as e:
|
| 163 |
+
logging.error(f"Transcription failed: {e}")
|
| 164 |
+
raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}")
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
@router.post("/tts", tags=["therapy"])
|
| 168 |
+
async def text_to_speech(
|
| 169 |
+
request: TTSRequest,
|
| 170 |
+
user: str = Depends(verify_token),
|
| 171 |
+
):
|
| 172 |
+
"""
|
| 173 |
+
Convert text to speech.
|
| 174 |
+
|
| 175 |
+
Returns audio stream (WAV format).
|
| 176 |
+
"""
|
| 177 |
+
logging.info(f"TTS request from user: {user}")
|
| 178 |
+
|
| 179 |
+
try:
|
| 180 |
+
result = synthesize_speech(
|
| 181 |
+
text=request.text,
|
| 182 |
+
voice=request.voice,
|
| 183 |
+
speed=request.speed
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
return StreamingResponse(
|
| 187 |
+
io.BytesIO(result.audio_bytes),
|
| 188 |
+
media_type=f"audio/{result.format}",
|
| 189 |
+
headers={
|
| 190 |
+
"Content-Disposition": f"attachment; filename=speech.{result.format}",
|
| 191 |
+
"X-Engine-Used": result.engine_used.value
|
| 192 |
+
}
|
| 193 |
+
)
|
| 194 |
+
|
| 195 |
+
except Exception as e:
|
| 196 |
+
logging.error(f"TTS failed: {e}")
|
| 197 |
+
raise HTTPException(status_code=500, detail=f"TTS failed: {str(e)}")
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
@router.post("/tts/prompt", tags=["therapy"])
|
| 201 |
+
async def generate_therapy_prompt(
|
| 202 |
+
exercise_type: str = Query(..., description="Type: repeat_after_me, pronunciation, slower"),
|
| 203 |
+
target_text: str = Query(..., description="Text to practice"),
|
| 204 |
+
user: str = Depends(verify_token),
|
| 205 |
+
):
|
| 206 |
+
"""
|
| 207 |
+
Generate audio prompt for therapy exercise.
|
| 208 |
+
|
| 209 |
+
Pre-built prompts like "Please repeat after me: [text]"
|
| 210 |
+
"""
|
| 211 |
+
logging.info(f"Therapy prompt request: {exercise_type}")
|
| 212 |
+
|
| 213 |
+
try:
|
| 214 |
+
tts = get_therapy_tts()
|
| 215 |
+
result = tts.generate_therapy_prompt(exercise_type, target_text)
|
| 216 |
+
|
| 217 |
+
return StreamingResponse(
|
| 218 |
+
io.BytesIO(result.audio_bytes),
|
| 219 |
+
media_type=f"audio/{result.format}",
|
| 220 |
+
headers={
|
| 221 |
+
"Content-Disposition": f"attachment; filename=prompt.{result.format}"
|
| 222 |
+
}
|
| 223 |
+
)
|
| 224 |
+
|
| 225 |
+
except Exception as e:
|
| 226 |
+
logging.error(f"Prompt generation failed: {e}")
|
| 227 |
+
raise HTTPException(status_code=500, detail=f"Prompt generation failed: {str(e)}")
|
| 228 |
+
|
| 229 |
+
|
| 230 |
+
@router.post("/analyze", response_model=PronunciationResponse, tags=["therapy"])
|
| 231 |
+
async def analyze_pronunciation_endpoint(
|
| 232 |
+
background_tasks: BackgroundTasks,
|
| 233 |
+
file: UploadFile = File(...),
|
| 234 |
+
target_text: str = Query(..., description="Expected text/phrase"),
|
| 235 |
+
include_ai_feedback: bool = Query(True, description="Include GPT-4o AI feedback"),
|
| 236 |
+
user: str = Depends(verify_token),
|
| 237 |
+
):
|
| 238 |
+
"""
|
| 239 |
+
Analyze pronunciation against target text.
|
| 240 |
+
|
| 241 |
+
Returns scores for:
|
| 242 |
+
- Overall pronunciation
|
| 243 |
+
- Clarity
|
| 244 |
+
- Pace
|
| 245 |
+
- Fluency
|
| 246 |
+
Plus per-word feedback, improvement suggestions, and AI-powered personalized feedback.
|
| 247 |
+
|
| 248 |
+
AI feedback uses GPT-4o via GitHub Models API for free testing.
|
| 249 |
+
"""
|
| 250 |
+
logging.info(f"Pronunciation analysis for user: {user}")
|
| 251 |
+
|
| 252 |
+
# Validate file
|
| 253 |
+
if file.content_type not in ALLOWED_AUDIO_TYPES:
|
| 254 |
+
raise HTTPException(status_code=400, detail="Invalid audio file type")
|
| 255 |
+
|
| 256 |
+
contents = await file.read()
|
| 257 |
+
if len(contents) > FILE_SIZE_LIMIT:
|
| 258 |
+
raise HTTPException(status_code=400, detail="File size exceeds 25 MB limit")
|
| 259 |
+
|
| 260 |
+
try:
|
| 261 |
+
# Now async with AI feedback integration
|
| 262 |
+
feedback = await analyze_pronunciation(
|
| 263 |
+
audio_bytes=contents,
|
| 264 |
+
target_text=target_text,
|
| 265 |
+
include_ai_feedback=include_ai_feedback
|
| 266 |
+
)
|
| 267 |
+
|
| 268 |
+
# Build AI feedback response if available
|
| 269 |
+
ai_feedback_response = None
|
| 270 |
+
if feedback.ai_feedback:
|
| 271 |
+
ai_feedback_response = AIFeedbackResponse(
|
| 272 |
+
feedback=feedback.ai_feedback.feedback,
|
| 273 |
+
encouragement=feedback.ai_feedback.encouragement,
|
| 274 |
+
specific_tips=feedback.ai_feedback.specific_tips,
|
| 275 |
+
recommended_exercises=feedback.ai_feedback.recommended_exercises,
|
| 276 |
+
difficulty_adjustment=feedback.ai_feedback.difficulty_adjustment
|
| 277 |
+
)
|
| 278 |
+
|
| 279 |
+
return PronunciationResponse(
|
| 280 |
+
overall_score=feedback.overall_score,
|
| 281 |
+
clarity_score=feedback.clarity_score,
|
| 282 |
+
pace_score=feedback.pace_score,
|
| 283 |
+
fluency_score=feedback.fluency_score,
|
| 284 |
+
transcription=feedback.transcription,
|
| 285 |
+
target_text=feedback.target_text,
|
| 286 |
+
suggestions=feedback.suggestions,
|
| 287 |
+
word_scores=[
|
| 288 |
+
{
|
| 289 |
+
"word": ws.word,
|
| 290 |
+
"score": ws.score,
|
| 291 |
+
"errors": [
|
| 292 |
+
{
|
| 293 |
+
"type": e.error_type.value,
|
| 294 |
+
"expected": e.expected,
|
| 295 |
+
"actual": e.actual,
|
| 296 |
+
"suggestion": e.suggestion
|
| 297 |
+
}
|
| 298 |
+
for e in ws.errors
|
| 299 |
+
]
|
| 300 |
+
}
|
| 301 |
+
for ws in feedback.word_scores
|
| 302 |
+
],
|
| 303 |
+
ai_feedback=ai_feedback_response
|
| 304 |
+
)
|
| 305 |
+
|
| 306 |
+
except Exception as e:
|
| 307 |
+
logging.error(f"Pronunciation analysis failed: {e}")
|
| 308 |
+
raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}")
|
| 309 |
+
|
| 310 |
+
|
| 311 |
+
# ============================================================================
|
| 312 |
+
# Demo/Test Endpoints (no auth required)
|
| 313 |
+
# ============================================================================
|
| 314 |
+
|
| 315 |
+
@router.get("/demo/exercises", tags=["therapy-demo"])
|
| 316 |
+
async def demo_list_exercises():
|
| 317 |
+
"""[DEMO] List exercises without auth - returns actual practice exercises."""
|
| 318 |
+
return {
|
| 319 |
+
"exercises": [
|
| 320 |
+
{
|
| 321 |
+
"id": "ex-001",
|
| 322 |
+
"title": "Simple Greetings",
|
| 323 |
+
"category": "repeat_after_me",
|
| 324 |
+
"difficulty": "easy",
|
| 325 |
+
"target_text": "Hello, how are you today?",
|
| 326 |
+
"instructions": "Listen carefully, then repeat the greeting clearly and naturally."
|
| 327 |
+
},
|
| 328 |
+
{
|
| 329 |
+
"id": "ex-002",
|
| 330 |
+
"title": "S Sound Practice",
|
| 331 |
+
"category": "minimal_pairs",
|
| 332 |
+
"difficulty": "medium",
|
| 333 |
+
"target_text": "She sells seashells by the seashore",
|
| 334 |
+
"instructions": "Focus on the 'S' and 'SH' sounds. Speak slowly at first."
|
| 335 |
+
},
|
| 336 |
+
{
|
| 337 |
+
"id": "ex-003",
|
| 338 |
+
"title": "R Sound Challenge",
|
| 339 |
+
"category": "tongue_twisters",
|
| 340 |
+
"difficulty": "hard",
|
| 341 |
+
"target_text": "Red lorry, yellow lorry",
|
| 342 |
+
"instructions": "Practice the 'R' and 'L' sounds. Start slow, then speed up."
|
| 343 |
+
},
|
| 344 |
+
{
|
| 345 |
+
"id": "ex-004",
|
| 346 |
+
"title": "Daily Introduction",
|
| 347 |
+
"category": "repeat_after_me",
|
| 348 |
+
"difficulty": "easy",
|
| 349 |
+
"target_text": "My name is... and I am learning to speak clearly.",
|
| 350 |
+
"instructions": "Replace '...' with your name. Speak with confidence!"
|
| 351 |
+
},
|
| 352 |
+
{
|
| 353 |
+
"id": "ex-005",
|
| 354 |
+
"title": "TH Sound Practice",
|
| 355 |
+
"category": "minimal_pairs",
|
| 356 |
+
"difficulty": "medium",
|
| 357 |
+
"target_text": "Think through these three things thoroughly",
|
| 358 |
+
"instructions": "Place your tongue between your teeth for the 'TH' sound."
|
| 359 |
+
},
|
| 360 |
+
{
|
| 361 |
+
"id": "ex-006",
|
| 362 |
+
"title": "Peter Piper",
|
| 363 |
+
"category": "tongue_twisters",
|
| 364 |
+
"difficulty": "hard",
|
| 365 |
+
"target_text": "Peter Piper picked a peck of pickled peppers",
|
| 366 |
+
"instructions": "Focus on the 'P' sounds. Keep your lips together firmly."
|
| 367 |
+
},
|
| 368 |
+
{
|
| 369 |
+
"id": "ex-007",
|
| 370 |
+
"title": "Counting Practice",
|
| 371 |
+
"category": "repeat_after_me",
|
| 372 |
+
"difficulty": "easy",
|
| 373 |
+
"target_text": "One, two, three, four, five",
|
| 374 |
+
"instructions": "Count clearly and pause briefly between each number."
|
| 375 |
+
},
|
| 376 |
+
{
|
| 377 |
+
"id": "ex-008",
|
| 378 |
+
"title": "W vs V Sounds",
|
| 379 |
+
"category": "minimal_pairs",
|
| 380 |
+
"difficulty": "medium",
|
| 381 |
+
"target_text": "Very well, we will wait",
|
| 382 |
+
"instructions": "Notice the difference: 'V' uses teeth on lip, 'W' uses rounded lips."
|
| 383 |
+
}
|
| 384 |
+
]
|
| 385 |
+
}
|
| 386 |
+
|
| 387 |
+
|
| 388 |
+
@router.post("/demo/feedback", tags=["therapy-demo"])
|
| 389 |
+
async def demo_ai_feedback(
|
| 390 |
+
target_text: str = Query(..., description="Text to practice"),
|
| 391 |
+
transcription: str = Query(..., description="What user said"),
|
| 392 |
+
score: float = Query(75.0, description="Overall score 0-100"),
|
| 393 |
+
):
|
| 394 |
+
"""[DEMO] Get AI feedback without auth - for testing GPT-4o integration."""
|
| 395 |
+
generator = get_ai_feedback_generator()
|
| 396 |
+
|
| 397 |
+
feedback = await generator.generate_feedback(
|
| 398 |
+
target_text=target_text,
|
| 399 |
+
transcription=transcription,
|
| 400 |
+
overall_score=score,
|
| 401 |
+
clarity_score=score - 5,
|
| 402 |
+
pace_score=score + 5,
|
| 403 |
+
fluency_score=score,
|
| 404 |
+
errors=[{
|
| 405 |
+
"word": target_text.split()[0] if target_text else "word",
|
| 406 |
+
"expected": target_text.split()[0] if target_text else "word",
|
| 407 |
+
"actual": transcription.split()[0] if transcription else "word",
|
| 408 |
+
"error_type": "substitution"
|
| 409 |
+
}] if target_text != transcription else [],
|
| 410 |
+
user_context=None
|
| 411 |
+
)
|
| 412 |
+
|
| 413 |
+
return {
|
| 414 |
+
"target_text": target_text,
|
| 415 |
+
"transcription": transcription,
|
| 416 |
+
"scores": {
|
| 417 |
+
"overall": score,
|
| 418 |
+
"clarity": score - 5,
|
| 419 |
+
"pace": score + 5,
|
| 420 |
+
"fluency": score
|
| 421 |
+
},
|
| 422 |
+
"ai_feedback": {
|
| 423 |
+
"feedback": feedback.feedback,
|
| 424 |
+
"encouragement": feedback.encouragement,
|
| 425 |
+
"specific_tips": feedback.specific_tips,
|
| 426 |
+
"recommended_exercises": feedback.recommended_exercises,
|
| 427 |
+
"difficulty_adjustment": feedback.difficulty_adjustment
|
| 428 |
+
}
|
| 429 |
+
}
|
| 430 |
+
|
| 431 |
+
|
| 432 |
+
@router.get("/demo/session-summary", tags=["therapy-demo"])
|
| 433 |
+
async def demo_session_summary():
|
| 434 |
+
"""[DEMO] Get AI session summary without auth."""
|
| 435 |
+
generator = get_ai_feedback_generator()
|
| 436 |
+
|
| 437 |
+
summary = await generator.generate_session_summary(
|
| 438 |
+
session_stats={
|
| 439 |
+
"duration_minutes": 12,
|
| 440 |
+
"exercise_count": 6,
|
| 441 |
+
"average_score": 78,
|
| 442 |
+
"best_score": 92,
|
| 443 |
+
"exercise_types": ["repeat_after_me", "minimal_pairs"]
|
| 444 |
+
},
|
| 445 |
+
attempts=[]
|
| 446 |
+
)
|
| 447 |
+
|
| 448 |
+
return {"summary": summary}
|
| 449 |
+
|
| 450 |
+
|
| 451 |
+
@router.get("/demo/weekly-insights", tags=["therapy-demo"])
|
| 452 |
+
async def demo_weekly_insights():
|
| 453 |
+
"""[DEMO] Get AI weekly insights without auth."""
|
| 454 |
+
generator = get_ai_feedback_generator()
|
| 455 |
+
|
| 456 |
+
insights = await generator.generate_weekly_insights(
|
| 457 |
+
weekly_data={
|
| 458 |
+
"sessions_this_week": 5,
|
| 459 |
+
"practice_minutes": 40,
|
| 460 |
+
"avg_score": 76,
|
| 461 |
+
"score_change": 6.5,
|
| 462 |
+
"strengths": ["Consistent daily practice", "Good pace control"],
|
| 463 |
+
"weaknesses": ["S sounds", "Word endings"]
|
| 464 |
+
}
|
| 465 |
+
)
|
| 466 |
+
|
| 467 |
+
return insights
|
| 468 |
+
|
| 469 |
+
|
| 470 |
+
# ============================================================================
|
| 471 |
+
# Authenticated Endpoints
|
| 472 |
+
# ============================================================================
|
| 473 |
+
|
| 474 |
+
@router.get("/exercises", tags=["therapy"])
|
| 475 |
+
async def list_exercise_types(
|
| 476 |
+
user: str = Depends(verify_token),
|
| 477 |
+
):
|
| 478 |
+
"""List available therapy exercise types."""
|
| 479 |
+
return {
|
| 480 |
+
"exercises": [
|
| 481 |
+
{
|
| 482 |
+
"type": "repeat_after_me",
|
| 483 |
+
"name": "Repeat After Me",
|
| 484 |
+
"description": "Listen and repeat the target phrase"
|
| 485 |
+
},
|
| 486 |
+
{
|
| 487 |
+
"type": "minimal_pairs",
|
| 488 |
+
"name": "Minimal Pairs",
|
| 489 |
+
"description": "Practice similar-sounding words (e.g., ship/chip)"
|
| 490 |
+
},
|
| 491 |
+
{
|
| 492 |
+
"type": "tongue_twisters",
|
| 493 |
+
"name": "Tongue Twisters",
|
| 494 |
+
"description": "Practice challenging phrases for fluency"
|
| 495 |
+
},
|
| 496 |
+
{
|
| 497 |
+
"type": "word_chains",
|
| 498 |
+
"name": "Word Chains",
|
| 499 |
+
"description": "Build vocabulary with connected words"
|
| 500 |
+
},
|
| 501 |
+
{
|
| 502 |
+
"type": "sentence_building",
|
| 503 |
+
"name": "Sentence Building",
|
| 504 |
+
"description": "Progress from words to full sentences"
|
| 505 |
+
}
|
| 506 |
+
]
|
| 507 |
+
}
|
| 508 |
+
|
| 509 |
+
|
| 510 |
+
@router.post("/exercise/evaluate", response_model=PronunciationResponse, tags=["therapy"])
|
| 511 |
+
async def evaluate_exercise(
|
| 512 |
+
background_tasks: BackgroundTasks,
|
| 513 |
+
file: UploadFile = File(...),
|
| 514 |
+
exercise_type: str = Query(...),
|
| 515 |
+
target_text: str = Query(...),
|
| 516 |
+
include_ai_feedback: bool = Query(True),
|
| 517 |
+
user: str = Depends(verify_token),
|
| 518 |
+
):
|
| 519 |
+
"""
|
| 520 |
+
Evaluate user's exercise attempt.
|
| 521 |
+
|
| 522 |
+
Same as /analyze but tracks exercise context.
|
| 523 |
+
Includes GPT-4o AI feedback for personalized improvement tips.
|
| 524 |
+
"""
|
| 525 |
+
# Reuse pronunciation analysis
|
| 526 |
+
return await analyze_pronunciation_endpoint(
|
| 527 |
+
background_tasks=background_tasks,
|
| 528 |
+
file=file,
|
| 529 |
+
target_text=target_text,
|
| 530 |
+
include_ai_feedback=include_ai_feedback,
|
| 531 |
+
user=user
|
| 532 |
+
)
|
| 533 |
+
|
| 534 |
+
|
| 535 |
+
# Session Summary Models
|
| 536 |
+
class SessionSummaryRequest(BaseModel):
|
| 537 |
+
"""Request for session summary."""
|
| 538 |
+
duration_minutes: int = Field(..., ge=1)
|
| 539 |
+
exercise_count: int = Field(..., ge=1)
|
| 540 |
+
average_score: float = Field(..., ge=0, le=100)
|
| 541 |
+
best_score: float = Field(..., ge=0, le=100)
|
| 542 |
+
exercise_types: list[str] = Field(default_factory=list)
|
| 543 |
+
|
| 544 |
+
|
| 545 |
+
class SessionSummaryResponse(BaseModel):
|
| 546 |
+
"""AI-generated session summary."""
|
| 547 |
+
summary: str
|
| 548 |
+
|
| 549 |
+
|
| 550 |
+
class WeeklyInsightsRequest(BaseModel):
|
| 551 |
+
"""Request for weekly insights."""
|
| 552 |
+
sessions_this_week: int = Field(..., ge=0)
|
| 553 |
+
practice_minutes: int = Field(..., ge=0)
|
| 554 |
+
avg_score: float = Field(..., ge=0, le=100)
|
| 555 |
+
score_change: float = Field(default=0) # Percentage change from last week
|
| 556 |
+
strengths: list[str] = Field(default_factory=list)
|
| 557 |
+
weaknesses: list[str] = Field(default_factory=list)
|
| 558 |
+
|
| 559 |
+
|
| 560 |
+
class WeeklyInsightsResponse(BaseModel):
|
| 561 |
+
"""AI-generated weekly insights."""
|
| 562 |
+
summary: str
|
| 563 |
+
celebration: str
|
| 564 |
+
focus_area: str
|
| 565 |
+
goal: str
|
| 566 |
+
|
| 567 |
+
|
| 568 |
+
@router.post("/session/summary", response_model=SessionSummaryResponse, tags=["therapy"])
|
| 569 |
+
async def get_session_summary(
|
| 570 |
+
request: SessionSummaryRequest,
|
| 571 |
+
user: str = Depends(verify_token),
|
| 572 |
+
):
|
| 573 |
+
"""
|
| 574 |
+
Generate AI-powered session summary.
|
| 575 |
+
|
| 576 |
+
Uses GPT-4o via GitHub Models to create personalized,
|
| 577 |
+
encouraging session summaries.
|
| 578 |
+
"""
|
| 579 |
+
logging.info(f"Session summary request from user: {user}")
|
| 580 |
+
|
| 581 |
+
try:
|
| 582 |
+
ai_generator = get_ai_feedback_generator()
|
| 583 |
+
summary = await ai_generator.generate_session_summary(
|
| 584 |
+
session_stats={
|
| 585 |
+
"duration_minutes": request.duration_minutes,
|
| 586 |
+
"exercise_count": request.exercise_count,
|
| 587 |
+
"average_score": request.average_score,
|
| 588 |
+
"best_score": request.best_score,
|
| 589 |
+
"exercise_types": request.exercise_types
|
| 590 |
+
},
|
| 591 |
+
attempts=[] # Can be extended to include attempt history
|
| 592 |
+
)
|
| 593 |
+
|
| 594 |
+
return SessionSummaryResponse(summary=summary)
|
| 595 |
+
|
| 596 |
+
except Exception as e:
|
| 597 |
+
logging.error(f"Session summary generation failed: {e}")
|
| 598 |
+
raise HTTPException(status_code=500, detail=f"Summary generation failed: {str(e)}")
|
| 599 |
+
|
| 600 |
+
|
| 601 |
+
@router.post("/insights/weekly", response_model=WeeklyInsightsResponse, tags=["therapy"])
|
| 602 |
+
async def get_weekly_insights(
|
| 603 |
+
request: WeeklyInsightsRequest,
|
| 604 |
+
user: str = Depends(verify_token),
|
| 605 |
+
):
|
| 606 |
+
"""
|
| 607 |
+
Generate AI-powered weekly progress insights.
|
| 608 |
+
|
| 609 |
+
Uses GPT-4o to analyze weekly practice data and provide:
|
| 610 |
+
- Progress summary
|
| 611 |
+
- Celebration of achievements
|
| 612 |
+
- Focus area for next week
|
| 613 |
+
- Realistic goal setting
|
| 614 |
+
"""
|
| 615 |
+
logging.info(f"Weekly insights request from user: {user}")
|
| 616 |
+
|
| 617 |
+
try:
|
| 618 |
+
ai_generator = get_ai_feedback_generator()
|
| 619 |
+
insights = await ai_generator.generate_weekly_insights(
|
| 620 |
+
weekly_data={
|
| 621 |
+
"sessions_this_week": request.sessions_this_week,
|
| 622 |
+
"practice_minutes": request.practice_minutes,
|
| 623 |
+
"avg_score": request.avg_score,
|
| 624 |
+
"score_change": request.score_change,
|
| 625 |
+
"strengths": request.strengths,
|
| 626 |
+
"weaknesses": request.weaknesses
|
| 627 |
+
}
|
| 628 |
+
)
|
| 629 |
+
|
| 630 |
+
return WeeklyInsightsResponse(
|
| 631 |
+
summary=insights.get("summary", ""),
|
| 632 |
+
celebration=insights.get("celebration", ""),
|
| 633 |
+
focus_area=insights.get("focus_area", ""),
|
| 634 |
+
goal=insights.get("goal", "")
|
| 635 |
+
)
|
| 636 |
+
|
| 637 |
+
except Exception as e:
|
| 638 |
+
logging.error(f"Weekly insights generation failed: {e}")
|
| 639 |
+
raise HTTPException(status_code=500, detail=f"Insights generation failed: {str(e)}")
|
api/endpoints/v1/routers/upload.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import io
|
| 2 |
+
import logging
|
| 3 |
+
|
| 4 |
+
from fastapi import APIRouter, BackgroundTasks, Depends, File, HTTPException, UploadFile
|
| 5 |
+
from fastapi.responses import JSONResponse
|
| 6 |
+
|
| 7 |
+
from api.config import settings
|
| 8 |
+
from api.endpoints.v1.auth.verify import verify_token
|
| 9 |
+
from api.endpoints.v1.processing.audio import transcribe_with_whisper
|
| 10 |
+
from api.endpoints.v1.processing.soap import generate_soap
|
| 11 |
+
|
| 12 |
+
router = APIRouter()
|
| 13 |
+
|
| 14 |
+
if settings.ENVIRONMENT == "development":
|
| 15 |
+
logging.basicConfig(level=logging.DEBUG)
|
| 16 |
+
else:
|
| 17 |
+
logging.basicConfig(level=logging.WARNING)
|
| 18 |
+
|
| 19 |
+
# OpenAI Whisper supports the following file types
|
| 20 |
+
ALLOWED_FILE_TYPES = [
|
| 21 |
+
"audio/mpeg",
|
| 22 |
+
"audio/mp4",
|
| 23 |
+
"audio/m4a",
|
| 24 |
+
"audio/x-m4a",
|
| 25 |
+
"audio/wav",
|
| 26 |
+
"audio/x-wav",
|
| 27 |
+
"audio/webm",
|
| 28 |
+
"video/mp4",
|
| 29 |
+
"video/mpeg",
|
| 30 |
+
]
|
| 31 |
+
# OpenAI Whisper file uploads are currently limited to 25 MB
|
| 32 |
+
FILE_SIZE_LIMIT = 25 * 1024 * 1024 # 25 MB in bytes
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
@router.post("", status_code=200)
|
| 36 |
+
async def transcribe_audio(
|
| 37 |
+
background_tasks: BackgroundTasks,
|
| 38 |
+
file: UploadFile = File(...),
|
| 39 |
+
user: str = Depends(verify_token),
|
| 40 |
+
):
|
| 41 |
+
"""Endpoint to upload and process audio files with OpenAI Whisper."""
|
| 42 |
+
|
| 43 |
+
logging.info(f"Transcribing audio file: {file.filename}")
|
| 44 |
+
logging.debug(f"Audio file mime type: {file.content_type}")
|
| 45 |
+
|
| 46 |
+
# Check file type
|
| 47 |
+
if file.content_type not in ALLOWED_FILE_TYPES:
|
| 48 |
+
raise HTTPException(status_code=400, detail="Invalid file type")
|
| 49 |
+
|
| 50 |
+
# Check file size
|
| 51 |
+
contents = await file.read()
|
| 52 |
+
logging.debug(f"size: {len(contents)} bytes")
|
| 53 |
+
if len(contents) > FILE_SIZE_LIMIT:
|
| 54 |
+
raise HTTPException(status_code=400, detail="File size exceeds 25 MB limit")
|
| 55 |
+
|
| 56 |
+
try:
|
| 57 |
+
# Use BytesIO to handle the file in-memory
|
| 58 |
+
file_like = io.BytesIO(contents)
|
| 59 |
+
|
| 60 |
+
# Reset the buffer's position to the start (not needed with await?)
|
| 61 |
+
file_like.seek(0)
|
| 62 |
+
|
| 63 |
+
# Add a background task to close the buffer after processing
|
| 64 |
+
background_tasks.add_task(file_like.close)
|
| 65 |
+
|
| 66 |
+
# Pass the file-like object to the transcription function
|
| 67 |
+
transcription = transcribe_with_whisper(
|
| 68 |
+
file.filename, file_like, file.content_type
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
# Generate a SOAP note from the transcription
|
| 72 |
+
soap_note = generate_soap(transcription.text)
|
| 73 |
+
|
| 74 |
+
return JSONResponse(
|
| 75 |
+
content={
|
| 76 |
+
"message": f"File processed successfully by user {user}",
|
| 77 |
+
"content": soap_note,
|
| 78 |
+
"transcription": transcription.text,
|
| 79 |
+
}
|
| 80 |
+
)
|
| 81 |
+
except Exception as e:
|
| 82 |
+
raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
|
api/endpoints/v1/therapy/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Therapy module - Speech therapy exercises and analysis."""
|
api/endpoints/v1/utils.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datetime import datetime, timezone
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
# Function to parse RFC 3339 datetime and convert to epoch time
|
| 5 |
+
def parse_rfc3339(time_str):
|
| 6 |
+
dt = datetime.fromisoformat(time_str.replace("Z", "+00:00"))
|
| 7 |
+
return dt.replace(tzinfo=timezone.utc).timestamp()
|
api/index.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Vercel serverless entry point
|
| 2 |
+
from api.main import app
|
| 3 |
+
|
| 4 |
+
# Handler for Vercel
|
| 5 |
+
handler = app
|
api/main.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
|
| 3 |
+
from fastapi import APIRouter, FastAPI
|
| 4 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 5 |
+
from fastapi.routing import APIRoute
|
| 6 |
+
|
| 7 |
+
from api.config import settings
|
| 8 |
+
from api.endpoints.v1.api import api_router
|
| 9 |
+
|
| 10 |
+
info_router = APIRouter()
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
@info_router.get("/", status_code=200, include_in_schema=False)
|
| 14 |
+
async def info():
|
| 15 |
+
return [{"Status": "API Running"}]
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
@info_router.get("/health", status_code=200, tags=["health"])
|
| 19 |
+
async def health_check():
|
| 20 |
+
"""Health check endpoint for Railway deployment"""
|
| 21 |
+
return {"status": "healthy", "service": "ubumuntu-api"}
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def custom_generate_unique_id(route: APIRoute):
|
| 25 |
+
"""Generates a custom ID when using the TypeScript Generator Client
|
| 26 |
+
|
| 27 |
+
Args:
|
| 28 |
+
route (APIRoute): The route to be customised
|
| 29 |
+
|
| 30 |
+
Returns:
|
| 31 |
+
str: tag-route_name, e.g. items-CreateItem
|
| 32 |
+
"""
|
| 33 |
+
return f"{route.tags[0]}-{route.name}"
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def get_application():
|
| 37 |
+
_app = FastAPI(
|
| 38 |
+
title=settings.PROJECT_NAME,
|
| 39 |
+
description=settings.PROJECT_DESCRIPTION,
|
| 40 |
+
generate_unique_id_function=custom_generate_unique_id,
|
| 41 |
+
root_path=settings.ROOT,
|
| 42 |
+
root_path_in_servers=True,
|
| 43 |
+
openapi_url=settings.openapi_url,
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
# Allow all origins for demo - in production, restrict to specific domains
|
| 47 |
+
logger = logging.getLogger("uvicorn")
|
| 48 |
+
logger.info("Enabling CORS for all origins (demo mode)")
|
| 49 |
+
_app.add_middleware(
|
| 50 |
+
CORSMiddleware,
|
| 51 |
+
allow_origins=["*"],
|
| 52 |
+
allow_credentials=True,
|
| 53 |
+
allow_methods=["*"],
|
| 54 |
+
allow_headers=["*"],
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
_app.include_router(api_router, prefix=settings.API_VERSION)
|
| 58 |
+
_app.include_router(info_router, tags=[""])
|
| 59 |
+
|
| 60 |
+
return _app
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
app = get_application()
|
app.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# HuggingFace Spaces entry point
|
| 2 |
+
from api.main import app
|
| 3 |
+
|
| 4 |
+
# Re-export for uvicorn
|
| 5 |
+
__all__ = ["app"]
|
requirements.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn[standard]
|
| 3 |
+
pydantic
|
| 4 |
+
pydantic-settings
|
| 5 |
+
python-dotenv
|
| 6 |
+
python-jose[cryptography]
|
| 7 |
+
python-multipart
|
| 8 |
+
openai
|
| 9 |
+
httpx
|
| 10 |
+
requests
|