Macbook commited on
Commit
cc4ea58
·
1 Parent(s): aae6d8c

Add FastAPI application

Browse files
Files changed (48) hide show
  1. Dockerfile +13 -0
  2. api/__init__.py +1 -0
  3. api/__pycache__/__init__.cpython-312.pyc +0 -0
  4. api/__pycache__/config.cpython-312.pyc +0 -0
  5. api/__pycache__/main.cpython-312.pyc +0 -0
  6. api/config.py +29 -0
  7. api/endpoints/__init__.py +0 -0
  8. api/endpoints/__pycache__/__init__.cpython-312.pyc +0 -0
  9. api/endpoints/v1/__init__.py +0 -0
  10. api/endpoints/v1/__pycache__/__init__.cpython-312.pyc +0 -0
  11. api/endpoints/v1/__pycache__/api.cpython-312.pyc +0 -0
  12. api/endpoints/v1/analytics/__init__.py +1 -0
  13. api/endpoints/v1/api.py +31 -0
  14. api/endpoints/v1/auth/__init__.py +0 -0
  15. api/endpoints/v1/auth/__pycache__/__init__.cpython-312.pyc +0 -0
  16. api/endpoints/v1/auth/__pycache__/verify.cpython-312.pyc +0 -0
  17. api/endpoints/v1/auth/verify.py +122 -0
  18. api/endpoints/v1/clients/__init__.py +0 -0
  19. api/endpoints/v1/processing/__init__.py +0 -0
  20. api/endpoints/v1/processing/__pycache__/__init__.cpython-312.pyc +0 -0
  21. api/endpoints/v1/processing/__pycache__/ai_feedback.cpython-312.pyc +0 -0
  22. api/endpoints/v1/processing/__pycache__/audio.cpython-312.pyc +0 -0
  23. api/endpoints/v1/processing/__pycache__/pronunciation_analysis.cpython-312.pyc +0 -0
  24. api/endpoints/v1/processing/__pycache__/soap.cpython-312.pyc +0 -0
  25. api/endpoints/v1/processing/__pycache__/therapy_asr.cpython-312.pyc +0 -0
  26. api/endpoints/v1/processing/__pycache__/therapy_tts.cpython-312.pyc +0 -0
  27. api/endpoints/v1/processing/ai_feedback.py +239 -0
  28. api/endpoints/v1/processing/audio.py +31 -0
  29. api/endpoints/v1/processing/pronunciation_analysis.py +468 -0
  30. api/endpoints/v1/processing/soap.py +119 -0
  31. api/endpoints/v1/processing/therapy_asr.py +305 -0
  32. api/endpoints/v1/processing/therapy_tts.py +354 -0
  33. api/endpoints/v1/routers/__init__.py +0 -0
  34. api/endpoints/v1/routers/__pycache__/__init__.cpython-312.pyc +0 -0
  35. api/endpoints/v1/routers/__pycache__/analytics.cpython-312.pyc +0 -0
  36. api/endpoints/v1/routers/__pycache__/health.cpython-312.pyc +0 -0
  37. api/endpoints/v1/routers/__pycache__/therapy.cpython-312.pyc +0 -0
  38. api/endpoints/v1/routers/__pycache__/upload.cpython-312.pyc +0 -0
  39. api/endpoints/v1/routers/analytics.py +364 -0
  40. api/endpoints/v1/routers/health.py +24 -0
  41. api/endpoints/v1/routers/therapy.py +639 -0
  42. api/endpoints/v1/routers/upload.py +82 -0
  43. api/endpoints/v1/therapy/__init__.py +1 -0
  44. api/endpoints/v1/utils.py +7 -0
  45. api/index.py +5 -0
  46. api/main.py +63 -0
  47. app.py +5 -0
  48. requirements.txt +10 -0
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ RUN useradd -m -u 1000 user
4
+ USER user
5
+ ENV PATH="/home/user/.local/bin:$PATH"
6
+
7
+ WORKDIR /app
8
+
9
+ COPY --chown=user ./requirements.txt requirements.txt
10
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
11
+
12
+ COPY --chown=user . /app
13
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
api/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ __version__ = "0.1.0"
api/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (189 Bytes). View file
 
api/__pycache__/config.cpython-312.pyc ADDED
Binary file (1.48 kB). View file
 
api/__pycache__/main.cpython-312.pyc ADDED
Binary file (2.66 kB). View file
 
api/config.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from dotenv import load_dotenv
4
+ from pydantic_settings import BaseSettings, SettingsConfigDict
5
+
6
+ load_dotenv()
7
+
8
+
9
+ # Otherwise, the root path will be the local host. ROOT_PATH is an env var configured in Vercel deployment.
10
+ # The value for production is equal to the root path of the deployment URL in Vercel.
11
+ ROOT_PATH = os.getenv("ROOT_PATH", "/")
12
+
13
+
14
+ class Settings(BaseSettings):
15
+ PROJECT_NAME: str = "FastAPI App"
16
+ PROJECT_DESCRIPTION: str = "A simple FastAPI app"
17
+ ENVIRONMENT: str = os.getenv("ENVIRONMENT", "development")
18
+ CLERK_JWKS_URL: str = os.getenv("CLERK_JWKS_URL")
19
+ CLERK_PEM_PUBLIC_KEY: str = os.getenv("CLERK_PEM_PUBLIC_KEY")
20
+ OPENAI_API_KEY: str = os.getenv("OPENAI_API_KEY", "")
21
+ GITHUB_TOKEN: str = os.getenv("GITHUB_TOKEN", "") # For GitHub Models GPT-4o
22
+
23
+ model_config = SettingsConfigDict(env_file=".env", extra="ignore")
24
+ openapi_url: str = "/openapi.json"
25
+ API_VERSION: str = "/v1"
26
+ ROOT: str = ROOT_PATH
27
+
28
+
29
+ settings = Settings()
api/endpoints/__init__.py ADDED
File without changes
api/endpoints/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (173 Bytes). View file
 
api/endpoints/v1/__init__.py ADDED
File without changes
api/endpoints/v1/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (176 Bytes). View file
 
api/endpoints/v1/__pycache__/api.cpython-312.pyc ADDED
Binary file (988 Bytes). View file
 
api/endpoints/v1/analytics/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Analytics module - Progress tracking and metrics."""
api/endpoints/v1/api.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+
3
+ from api.endpoints.v1.routers import health, upload, therapy, analytics
4
+
5
+ api_router = APIRouter()
6
+
7
+
8
+ api_router.include_router(
9
+ health.router,
10
+ prefix="/health",
11
+ tags=["health"],
12
+ responses={404: {"description": "Not found"}},
13
+ )
14
+ api_router.include_router(
15
+ upload.router,
16
+ prefix="/upload",
17
+ tags=["upload"],
18
+ responses={404: {"description": "Not found"}},
19
+ )
20
+ api_router.include_router(
21
+ therapy.router,
22
+ prefix="/therapy",
23
+ tags=["therapy"],
24
+ responses={404: {"description": "Not found"}},
25
+ )
26
+ api_router.include_router(
27
+ analytics.router,
28
+ prefix="/analytics",
29
+ tags=["analytics"],
30
+ responses={404: {"description": "Not found"}},
31
+ )
api/endpoints/v1/auth/__init__.py ADDED
File without changes
api/endpoints/v1/auth/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (181 Bytes). View file
 
api/endpoints/v1/auth/__pycache__/verify.cpython-312.pyc ADDED
Binary file (5.49 kB). View file
 
api/endpoints/v1/auth/verify.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from datetime import datetime, timezone
3
+ from typing import Optional, List
4
+
5
+ import requests
6
+ from fastapi import HTTPException, Security
7
+ from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
8
+ from jose import exceptions, jwk, jwt
9
+ from jose.utils import base64url_decode
10
+
11
+ from api.config import settings
12
+
13
+ if settings.ENVIRONMENT == "development":
14
+ logging.basicConfig(level=logging.DEBUG)
15
+ else:
16
+ logging.basicConfig(level=logging.WARNING)
17
+
18
+ ALGORITHM = "RS256"
19
+ security = HTTPBearer()
20
+
21
+ # Define the allowed origins for the azp claim
22
+ ALLOWED_ORIGINS = [
23
+ "http://localhost:3000",
24
+ "com.ubumuntu.app",
25
+ ]
26
+
27
+ def get_jwks(jwks_url: str):
28
+ """Fetch the JWKS from the given URL."""
29
+ response = requests.get(jwks_url)
30
+ response.raise_for_status()
31
+ return response.json()
32
+
33
+
34
+ def get_public_key(token: str, jwks_url: str):
35
+ """Get the public key for the given token from the JWKS."""
36
+ jwks = get_jwks(jwks_url)
37
+ header = jwt.get_unverified_header(token)
38
+ rsa_key = {}
39
+ for key in jwks["keys"]:
40
+ if key["kid"] == header["kid"]:
41
+ rsa_key = {
42
+ "kty": key["kty"],
43
+ "kid": key["kid"],
44
+ "use": key["use"],
45
+ "n": key["n"],
46
+ "e": key["e"],
47
+ }
48
+ break
49
+ if not rsa_key:
50
+ raise HTTPException(status_code=401, detail="Unable to find appropriate key")
51
+ return jwk.construct(rsa_key, algorithm=ALGORITHM)
52
+
53
+
54
+ def decode_jwt(token: str, jwks_url: str, allowed_origins: List[str]) -> Optional[dict]:
55
+ """Decode a JWT token and verify its expiration and azp claim using JWKS."""
56
+ try:
57
+ logging.info("Attempting to decode the JWT token.")
58
+ public_key = get_public_key(token, jwks_url)
59
+ message, encoded_signature = token.rsplit(".", 1)
60
+ decoded_signature = base64url_decode(encoded_signature.encode("utf-8"))
61
+
62
+ if not public_key.verify(message.encode("utf-8"), decoded_signature):
63
+ logging.warning("Invalid token signature.")
64
+ return None
65
+
66
+ payload = jwt.decode(
67
+ token,
68
+ public_key.to_pem().decode("utf-8"),
69
+ algorithms=[ALGORITHM],
70
+ audience="authenticated",
71
+ )
72
+
73
+ # Validate expiration (exp) and not before (nbf) claims
74
+ now = datetime.now(tz=timezone.utc)
75
+ exp = payload.get("exp")
76
+ nbf = payload.get("nbf")
77
+
78
+ if exp and datetime.fromtimestamp(exp, tz=timezone.utc) < now:
79
+ logging.warning("Token has expired.")
80
+ return None
81
+
82
+ if nbf and datetime.fromtimestamp(nbf, tz=timezone.utc) > now:
83
+ logging.warning("Token not yet valid.")
84
+ return None
85
+
86
+ # Validate authorized parties by the azp claim
87
+ azp = payload.get("azp")
88
+ logging.debug(f"azp: {azp}")
89
+
90
+ if azp and azp not in allowed_origins:
91
+ logging.warning(f"Unauthorized party: {azp}")
92
+ return None
93
+
94
+ logging.info("JWT successfully decoded.")
95
+ return payload
96
+
97
+ except exceptions.ExpiredSignatureError:
98
+ logging.error("JWT has expired.")
99
+ return None
100
+ except exceptions.JWTClaimsError:
101
+ logging.error("JWT claims error.")
102
+ return None
103
+ except exceptions.JWTError as e:
104
+ logging.error(f"JWT decoding error: {e}")
105
+ return None
106
+
107
+
108
+ def verify_token(credentials: HTTPAuthorizationCredentials = Security(security)):
109
+ """Verify the incoming token using the `decode_jwt` function."""
110
+ token = credentials.credentials
111
+
112
+ credentials_exception = HTTPException(
113
+ status_code=401,
114
+ detail="Could not validate credentials",
115
+ headers={"WWW-Authenticate": "Bearer"},
116
+ )
117
+
118
+ payload = decode_jwt(token, settings.CLERK_JWKS_URL, ALLOWED_ORIGINS)
119
+ if not payload or "sub" not in payload:
120
+ raise credentials_exception
121
+
122
+ return payload["sub"]
api/endpoints/v1/clients/__init__.py ADDED
File without changes
api/endpoints/v1/processing/__init__.py ADDED
File without changes
api/endpoints/v1/processing/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (187 Bytes). View file
 
api/endpoints/v1/processing/__pycache__/ai_feedback.cpython-312.pyc ADDED
Binary file (10.2 kB). View file
 
api/endpoints/v1/processing/__pycache__/audio.cpython-312.pyc ADDED
Binary file (1.48 kB). View file
 
api/endpoints/v1/processing/__pycache__/pronunciation_analysis.cpython-312.pyc ADDED
Binary file (17.5 kB). View file
 
api/endpoints/v1/processing/__pycache__/soap.cpython-312.pyc ADDED
Binary file (5.08 kB). View file
 
api/endpoints/v1/processing/__pycache__/therapy_asr.cpython-312.pyc ADDED
Binary file (12.8 kB). View file
 
api/endpoints/v1/processing/__pycache__/therapy_tts.cpython-312.pyc ADDED
Binary file (13.5 kB). View file
 
api/endpoints/v1/processing/ai_feedback.py ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ AI Feedback Module - GPT-4o powered speech therapy feedback.
3
+
4
+ Uses GitHub Models API for GPT-4o access.
5
+ """
6
+
7
+ import os
8
+ import json
9
+ import logging
10
+ from typing import Optional, List
11
+ from dataclasses import dataclass
12
+
13
+ from openai import OpenAI
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ @dataclass
19
+ class AIFeedbackResult:
20
+ """AI-generated feedback for speech therapy."""
21
+ feedback: str
22
+ encouragement: str
23
+ specific_tips: List[str]
24
+ recommended_exercises: List[str]
25
+ difficulty_adjustment: Optional[str] # "easier", "same", "harder"
26
+
27
+
28
+ class AIFeedbackGenerator:
29
+ """
30
+ Generate personalized speech therapy feedback using GPT-4o.
31
+
32
+ Uses GitHub Models API (free for GitHub users).
33
+ """
34
+
35
+ def __init__(self):
36
+ self.client: Optional[OpenAI] = None
37
+ self.model = "gpt-4o"
38
+ self._initialize_client()
39
+
40
+ def _initialize_client(self):
41
+ """Initialize the OpenAI client with GitHub Models."""
42
+ github_token = os.getenv("GITHUB_TOKEN")
43
+
44
+ if not github_token:
45
+ raise ValueError(
46
+ "GITHUB_TOKEN not found. Please set it in your .env file. "
47
+ "Get your token at: https://github.com/settings/tokens"
48
+ )
49
+
50
+ # Use GitHub Models (free GPT-4o access)
51
+ self.client = OpenAI(
52
+ base_url="https://models.inference.ai.azure.com",
53
+ api_key=github_token,
54
+ )
55
+ self.model = "gpt-4o"
56
+ logger.info("AI Feedback: Using GitHub Models (GPT-4o)")
57
+
58
+ async def generate_feedback(
59
+ self,
60
+ target_text: str,
61
+ transcription: str,
62
+ overall_score: float,
63
+ clarity_score: float,
64
+ pace_score: float,
65
+ fluency_score: float,
66
+ errors: List[dict],
67
+ user_context: Optional[dict] = None
68
+ ) -> AIFeedbackResult:
69
+ """
70
+ Generate personalized feedback for a speech exercise attempt.
71
+
72
+ Args:
73
+ target_text: The text the user was supposed to say
74
+ transcription: What the ASR heard
75
+ overall_score: 0-100 overall score
76
+ clarity_score: 0-100 clarity score
77
+ pace_score: 0-100 pace score
78
+ fluency_score: 0-100 fluency score
79
+ errors: List of pronunciation errors detected
80
+ user_context: Optional user profile info (speech condition, etc.)
81
+
82
+ Returns:
83
+ AIFeedbackResult with personalized feedback
84
+ """
85
+ # Build context about user if available
86
+ user_info = ""
87
+ if user_context:
88
+ condition = user_context.get("speech_condition", "")
89
+ severity = user_context.get("severity_level", "")
90
+ if condition:
91
+ user_info = f"\nUser has {condition}"
92
+ if severity:
93
+ user_info += f" (severity: {severity}/5)"
94
+ user_info += ". Adjust feedback accordingly."
95
+
96
+ # Format errors for the prompt
97
+ error_summary = ""
98
+ if errors:
99
+ error_items = []
100
+ for e in errors[:5]: # Limit to 5 errors
101
+ error_items.append(
102
+ f"- '{e.get('expected', '')}' → '{e.get('actual', '')}' ({e.get('error_type', '')})"
103
+ )
104
+ error_summary = "\n".join(error_items)
105
+
106
+ system_prompt = """You are a supportive, encouraging speech therapist helping users improve their speech clarity.
107
+
108
+ Your feedback should be:
109
+ - Warm and encouraging, never discouraging
110
+ - Specific and actionable
111
+ - Age-appropriate and easy to understand
112
+ - Focused on progress, not perfection
113
+
114
+ Always acknowledge effort and provide constructive guidance."""
115
+
116
+ user_prompt = f"""Please provide feedback for this speech exercise attempt:
117
+
118
+ **Target phrase:** "{target_text}"
119
+ **User said:** "{transcription}"
120
+
121
+ **Scores:**
122
+ - Overall: {overall_score:.0f}/100
123
+ - Clarity: {clarity_score:.0f}/100
124
+ - Pace: {pace_score:.0f}/100
125
+ - Fluency: {fluency_score:.0f}/100
126
+
127
+ **Pronunciation differences:**
128
+ {error_summary if error_summary else "No major differences detected"}
129
+ {user_info}
130
+
131
+ Please respond in this JSON format:
132
+ {{
133
+ "feedback": "2-3 sentences of overall feedback",
134
+ "encouragement": "A short encouraging message",
135
+ "specific_tips": ["tip 1", "tip 2", "tip 3"],
136
+ "recommended_exercises": ["exercise 1", "exercise 2"],
137
+ "difficulty_adjustment": "easier" or "same" or "harder"
138
+ }}"""
139
+
140
+ response = self.client.chat.completions.create(
141
+ model=self.model,
142
+ messages=[
143
+ {"role": "system", "content": system_prompt},
144
+ {"role": "user", "content": user_prompt}
145
+ ],
146
+ temperature=0.7,
147
+ max_tokens=500,
148
+ response_format={"type": "json_object"}
149
+ )
150
+
151
+ # Parse the response
152
+ result = json.loads(response.choices[0].message.content)
153
+
154
+ return AIFeedbackResult(
155
+ feedback=result.get("feedback", "Good effort! Keep practicing."),
156
+ encouragement=result.get("encouragement", "You're making progress!"),
157
+ specific_tips=result.get("specific_tips", []),
158
+ recommended_exercises=result.get("recommended_exercises", []),
159
+ difficulty_adjustment=result.get("difficulty_adjustment", "same")
160
+ )
161
+
162
+ async def generate_session_summary(
163
+ self,
164
+ session_stats: dict,
165
+ attempts: List[dict]
166
+ ) -> str:
167
+ """Generate an AI summary of a therapy session."""
168
+ prompt = f"""Summarize this speech therapy session for the user:
169
+
170
+ **Session Stats:**
171
+ - Duration: {session_stats.get('duration_minutes', 0)} minutes
172
+ - Exercises completed: {session_stats.get('exercise_count', 0)}
173
+ - Average score: {session_stats.get('average_score', 0):.0f}/100
174
+ - Best score: {session_stats.get('best_score', 0):.0f}/100
175
+
176
+ **Exercise Types Practiced:** {', '.join(session_stats.get('exercise_types', []))}
177
+
178
+ Please provide a brief, encouraging 2-3 sentence summary of their session."""
179
+
180
+ response = self.client.chat.completions.create(
181
+ model=self.model,
182
+ messages=[
183
+ {"role": "system", "content": "You are a supportive speech therapist providing session summaries."},
184
+ {"role": "user", "content": prompt}
185
+ ],
186
+ temperature=0.7,
187
+ max_tokens=150
188
+ )
189
+
190
+ return response.choices[0].message.content
191
+
192
+ async def generate_weekly_insights(
193
+ self,
194
+ weekly_data: dict
195
+ ) -> dict:
196
+ """Generate AI-powered weekly progress insights."""
197
+ prompt = f"""Analyze this user's weekly speech therapy progress:
198
+
199
+ **This Week:**
200
+ - Sessions: {weekly_data.get('sessions_this_week', 0)}
201
+ - Total practice time: {weekly_data.get('practice_minutes', 0)} minutes
202
+ - Average score: {weekly_data.get('avg_score', 0):.0f}/100
203
+ - Score change from last week: {weekly_data.get('score_change', 0):+.1f}%
204
+
205
+ **Strengths:** {', '.join(weekly_data.get('strengths', ['Consistent practice']))}
206
+ **Areas to improve:** {', '.join(weekly_data.get('weaknesses', ['Continue practicing']))}
207
+
208
+ Provide a JSON response with:
209
+ {{
210
+ "summary": "2-3 sentence progress summary",
211
+ "celebration": "Something specific to celebrate",
212
+ "focus_area": "One specific thing to focus on next week",
213
+ "goal": "A realistic goal for next week"
214
+ }}"""
215
+
216
+ response = self.client.chat.completions.create(
217
+ model=self.model,
218
+ messages=[
219
+ {"role": "system", "content": "You are an encouraging speech therapist analyzing weekly progress."},
220
+ {"role": "user", "content": prompt}
221
+ ],
222
+ temperature=0.7,
223
+ max_tokens=300,
224
+ response_format={"type": "json_object"}
225
+ )
226
+
227
+ return json.loads(response.choices[0].message.content)
228
+
229
+
230
+ # Singleton instance
231
+ _feedback_generator: Optional[AIFeedbackGenerator] = None
232
+
233
+
234
+ def get_ai_feedback_generator() -> AIFeedbackGenerator:
235
+ """Get or create AIFeedbackGenerator singleton."""
236
+ global _feedback_generator
237
+ if _feedback_generator is None:
238
+ _feedback_generator = AIFeedbackGenerator()
239
+ return _feedback_generator
api/endpoints/v1/processing/audio.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import logging
3
+
4
+ from openai import OpenAI
5
+
6
+ from api.config import settings
7
+
8
+ client = OpenAI(api_key=settings.OPENAI_API_KEY)
9
+
10
+ if settings.ENVIRONMENT == "development":
11
+ logging.basicConfig(level=logging.DEBUG)
12
+ else:
13
+ logging.basicConfig(level=logging.WARNING)
14
+
15
+
16
+ def transcribe_with_whisper(
17
+ filename: str, file_like: io.BytesIO, content_type: str
18
+ ) -> str:
19
+ """Helper function to transcribe audio using OpenAI Whisper."""
20
+ logging.info("Transcribing with whisper")
21
+
22
+ # Prepare the file data as a tuple
23
+ file_data = (filename, file_like.read(), content_type)
24
+
25
+ # Call the OpenAI API to transcribe the audio file
26
+ transcription = client.audio.transcriptions.create(
27
+ model="whisper-1", file=file_data
28
+ )
29
+
30
+ logging.debug(f"Transcription: {transcription.text}")
31
+ return transcription
api/endpoints/v1/processing/pronunciation_analysis.py ADDED
@@ -0,0 +1,468 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Pronunciation Analysis Module - Speech clarity and pronunciation feedback.
3
+
4
+ Provides:
5
+ - Pronunciation scoring (PCC - Percent Consonants Correct)
6
+ - Clarity assessment
7
+ - Pace analysis
8
+ - Per-word feedback
9
+ - Improvement suggestions
10
+ """
11
+
12
+ import io
13
+ import logging
14
+ from typing import Optional, List
15
+ from dataclasses import dataclass, field
16
+ from enum import Enum
17
+
18
+ from api.config import settings
19
+
20
+ if settings.ENVIRONMENT == "development":
21
+ logging.basicConfig(level=logging.DEBUG)
22
+ else:
23
+ logging.basicConfig(level=logging.WARNING)
24
+
25
+
26
+ class ErrorType(str, Enum):
27
+ """Types of pronunciation errors."""
28
+ SUBSTITUTION = "substitution" # Wrong sound
29
+ OMISSION = "omission" # Missing sound
30
+ ADDITION = "addition" # Extra sound
31
+ DISTORTION = "distortion" # Unclear sound
32
+
33
+
34
+ @dataclass
35
+ class PhonemeError:
36
+ """Individual phoneme-level error."""
37
+ word: str
38
+ position: int # Position in word
39
+ expected: str
40
+ actual: Optional[str]
41
+ error_type: ErrorType
42
+ suggestion: str
43
+
44
+
45
+ @dataclass
46
+ class WordScore:
47
+ """Per-word pronunciation score."""
48
+ word: str
49
+ score: float # 0-100
50
+ start_time: Optional[float] = None
51
+ end_time: Optional[float] = None
52
+ errors: List[PhonemeError] = field(default_factory=list)
53
+
54
+
55
+ @dataclass
56
+ class AIFeedback:
57
+ """AI-generated personalized feedback."""
58
+ feedback: str
59
+ encouragement: str
60
+ specific_tips: List[str]
61
+ recommended_exercises: List[str]
62
+ difficulty_adjustment: Optional[str] = None # "easier", "same", "harder"
63
+
64
+
65
+ @dataclass
66
+ class PronunciationFeedback:
67
+ """Complete pronunciation analysis result."""
68
+ overall_score: float # 0-100
69
+ clarity_score: float # 0-100
70
+ pace_score: float # 0-100
71
+ fluency_score: float # 0-100
72
+ word_scores: List[WordScore]
73
+ suggestions: List[str]
74
+ phoneme_errors: List[PhonemeError]
75
+ transcription: str
76
+ target_text: str
77
+ duration_seconds: Optional[float] = None
78
+ ai_feedback: Optional[AIFeedback] = None # GPT-4o powered feedback
79
+
80
+
81
+ class PronunciationAnalyzer:
82
+ """
83
+ Analyze pronunciation against target text.
84
+
85
+ Uses ASR with forced alignment to compare user speech
86
+ against expected pronunciation. Integrates GPT-4o for
87
+ personalized feedback via GitHub Models API.
88
+ """
89
+
90
+ def __init__(self):
91
+ self._asr = None
92
+ self._ai_feedback = None
93
+
94
+ def _get_ai_feedback_generator(self):
95
+ """Get AI feedback generator instance."""
96
+ if self._ai_feedback is None:
97
+ from api.endpoints.v1.processing.ai_feedback import get_ai_feedback_generator
98
+ self._ai_feedback = get_ai_feedback_generator()
99
+ return self._ai_feedback
100
+
101
+ def _get_asr(self):
102
+ """Get ASR instance for transcription."""
103
+ if self._asr is None:
104
+ from api.endpoints.v1.processing.therapy_asr import get_therapy_asr
105
+ self._asr = get_therapy_asr()
106
+ return self._asr
107
+
108
+ async def analyze(
109
+ self,
110
+ audio_bytes: bytes,
111
+ target_text: str,
112
+ user_baseline: Optional[dict] = None,
113
+ user_context: Optional[dict] = None,
114
+ include_ai_feedback: bool = True
115
+ ) -> PronunciationFeedback:
116
+ """
117
+ Analyze pronunciation of audio against target text.
118
+
119
+ Args:
120
+ audio_bytes: User's recorded audio
121
+ target_text: Expected text/phrase
122
+ user_baseline: Optional baseline metrics for comparison
123
+ user_context: Optional user profile (speech condition, severity)
124
+ include_ai_feedback: Whether to generate GPT-4o feedback
125
+
126
+ Returns:
127
+ PronunciationFeedback with scores, suggestions, and AI feedback
128
+ """
129
+ logging.info(f"Analyzing pronunciation for target: {target_text}")
130
+
131
+ # 1. Transcribe the audio
132
+ asr = self._get_asr()
133
+ result = asr.transcribe(audio_bytes)
134
+ transcription = result.text.strip().lower()
135
+ target_clean = target_text.strip().lower()
136
+
137
+ logging.debug(f"Transcription: {transcription}")
138
+ logging.debug(f"Target: {target_clean}")
139
+
140
+ # 2. Compare transcription to target
141
+ word_scores, phoneme_errors = self._compare_texts(
142
+ transcription, target_clean
143
+ )
144
+
145
+ # 3. Calculate scores
146
+ overall_score = self._calculate_overall_score(word_scores)
147
+ clarity_score = self._calculate_clarity_score(word_scores, phoneme_errors)
148
+ pace_score = self._calculate_pace_score(result.word_timestamps)
149
+ fluency_score = self._calculate_fluency_score(transcription, target_clean)
150
+
151
+ # 4. Generate rule-based suggestions
152
+ suggestions = self._generate_suggestions(phoneme_errors, word_scores)
153
+
154
+ # 5. Generate AI-powered feedback (GPT-4o via GitHub Models)
155
+ ai_feedback = None
156
+ if include_ai_feedback:
157
+ try:
158
+ ai_generator = self._get_ai_feedback_generator()
159
+ # Convert phoneme errors to dict format for AI
160
+ errors_dict = [
161
+ {
162
+ "word": e.word,
163
+ "expected": e.expected,
164
+ "actual": e.actual,
165
+ "error_type": e.error_type.value
166
+ }
167
+ for e in phoneme_errors
168
+ ]
169
+
170
+ ai_result = await ai_generator.generate_feedback(
171
+ target_text=target_text,
172
+ transcription=transcription,
173
+ overall_score=overall_score,
174
+ clarity_score=clarity_score,
175
+ pace_score=pace_score,
176
+ fluency_score=fluency_score,
177
+ errors=errors_dict,
178
+ user_context=user_context
179
+ )
180
+
181
+ ai_feedback = AIFeedback(
182
+ feedback=ai_result.feedback,
183
+ encouragement=ai_result.encouragement,
184
+ specific_tips=ai_result.specific_tips,
185
+ recommended_exercises=ai_result.recommended_exercises,
186
+ difficulty_adjustment=ai_result.difficulty_adjustment
187
+ )
188
+ logging.info("AI feedback generated successfully")
189
+ except Exception as e:
190
+ logging.warning(f"AI feedback generation failed: {e}")
191
+ ai_feedback = None
192
+
193
+ return PronunciationFeedback(
194
+ overall_score=overall_score,
195
+ clarity_score=clarity_score,
196
+ pace_score=pace_score,
197
+ fluency_score=fluency_score,
198
+ word_scores=word_scores,
199
+ suggestions=suggestions,
200
+ phoneme_errors=phoneme_errors,
201
+ transcription=transcription,
202
+ target_text=target_text,
203
+ ai_feedback=ai_feedback
204
+ )
205
+
206
+ def _compare_texts(
207
+ self,
208
+ transcription: str,
209
+ target: str
210
+ ) -> tuple[List[WordScore], List[PhonemeError]]:
211
+ """Compare transcribed text to target text."""
212
+ trans_words = transcription.split()
213
+ target_words = target.split()
214
+
215
+ word_scores = []
216
+ phoneme_errors = []
217
+
218
+ # Simple word-level comparison (can be enhanced with phoneme alignment)
219
+ max_len = max(len(trans_words), len(target_words))
220
+
221
+ for i in range(max_len):
222
+ target_word = target_words[i] if i < len(target_words) else ""
223
+ trans_word = trans_words[i] if i < len(trans_words) else ""
224
+
225
+ if not target_word:
226
+ # Extra word in transcription
227
+ phoneme_errors.append(PhonemeError(
228
+ word=trans_word,
229
+ position=i,
230
+ expected="",
231
+ actual=trans_word,
232
+ error_type=ErrorType.ADDITION,
233
+ suggestion=f"Extra word '{trans_word}' detected"
234
+ ))
235
+ continue
236
+
237
+ if not trans_word:
238
+ # Missing word
239
+ word_scores.append(WordScore(
240
+ word=target_word,
241
+ score=0.0,
242
+ errors=[PhonemeError(
243
+ word=target_word,
244
+ position=i,
245
+ expected=target_word,
246
+ actual=None,
247
+ error_type=ErrorType.OMISSION,
248
+ suggestion=f"Try to include the word '{target_word}'"
249
+ )]
250
+ ))
251
+ phoneme_errors.append(word_scores[-1].errors[0])
252
+ continue
253
+
254
+ # Compare words
255
+ score, errors = self._compare_words(target_word, trans_word, i)
256
+ word_scores.append(WordScore(
257
+ word=target_word,
258
+ score=score,
259
+ errors=errors
260
+ ))
261
+ phoneme_errors.extend(errors)
262
+
263
+ return word_scores, phoneme_errors
264
+
265
+ def _compare_words(
266
+ self,
267
+ target_word: str,
268
+ trans_word: str,
269
+ position: int
270
+ ) -> tuple[float, List[PhonemeError]]:
271
+ """Compare two words and return score and errors."""
272
+ errors = []
273
+
274
+ # Exact match
275
+ if target_word == trans_word:
276
+ return 100.0, []
277
+
278
+ # Calculate similarity (simple Levenshtein-based)
279
+ similarity = self._word_similarity(target_word, trans_word)
280
+ score = similarity * 100
281
+
282
+ # Detect error type
283
+ if len(trans_word) > len(target_word):
284
+ error_type = ErrorType.ADDITION
285
+ suggestion = f"'{trans_word}' has extra sounds, expected '{target_word}'"
286
+ elif len(trans_word) < len(target_word):
287
+ error_type = ErrorType.OMISSION
288
+ suggestion = f"Some sounds missing in '{trans_word}', expected '{target_word}'"
289
+ else:
290
+ error_type = ErrorType.SUBSTITUTION
291
+ suggestion = f"'{trans_word}' should be '{target_word}'"
292
+
293
+ if score < 100:
294
+ errors.append(PhonemeError(
295
+ word=target_word,
296
+ position=position,
297
+ expected=target_word,
298
+ actual=trans_word,
299
+ error_type=error_type,
300
+ suggestion=suggestion
301
+ ))
302
+
303
+ return score, errors
304
+
305
+ def _word_similarity(self, word1: str, word2: str) -> float:
306
+ """Calculate similarity between two words (0-1)."""
307
+ if word1 == word2:
308
+ return 1.0
309
+
310
+ # Levenshtein distance normalized
311
+ len1, len2 = len(word1), len(word2)
312
+ if len1 == 0 or len2 == 0:
313
+ return 0.0
314
+
315
+ # Create distance matrix
316
+ dp = [[0] * (len2 + 1) for _ in range(len1 + 1)]
317
+
318
+ for i in range(len1 + 1):
319
+ dp[i][0] = i
320
+ for j in range(len2 + 1):
321
+ dp[0][j] = j
322
+
323
+ for i in range(1, len1 + 1):
324
+ for j in range(1, len2 + 1):
325
+ cost = 0 if word1[i-1] == word2[j-1] else 1
326
+ dp[i][j] = min(
327
+ dp[i-1][j] + 1, # deletion
328
+ dp[i][j-1] + 1, # insertion
329
+ dp[i-1][j-1] + cost # substitution
330
+ )
331
+
332
+ distance = dp[len1][len2]
333
+ max_len = max(len1, len2)
334
+
335
+ return 1.0 - (distance / max_len)
336
+
337
+ def _calculate_overall_score(self, word_scores: List[WordScore]) -> float:
338
+ """Calculate overall pronunciation score."""
339
+ if not word_scores:
340
+ return 0.0
341
+ return sum(ws.score for ws in word_scores) / len(word_scores)
342
+
343
+ def _calculate_clarity_score(
344
+ self,
345
+ word_scores: List[WordScore],
346
+ errors: List[PhonemeError]
347
+ ) -> float:
348
+ """Calculate speech clarity score."""
349
+ if not word_scores:
350
+ return 0.0
351
+
352
+ # Penalize based on error types
353
+ error_penalties = {
354
+ ErrorType.DISTORTION: 15,
355
+ ErrorType.SUBSTITUTION: 10,
356
+ ErrorType.OMISSION: 20,
357
+ ErrorType.ADDITION: 5,
358
+ }
359
+
360
+ base_score = 100.0
361
+ for error in errors:
362
+ base_score -= error_penalties.get(error.error_type, 10)
363
+
364
+ return max(0.0, base_score)
365
+
366
+ def _calculate_pace_score(
367
+ self,
368
+ word_timestamps: Optional[List[dict]]
369
+ ) -> float:
370
+ """Calculate pace/timing score."""
371
+ if not word_timestamps or len(word_timestamps) < 2:
372
+ return 75.0 # Default score if no timestamps
373
+
374
+ # Calculate words per minute
375
+ total_duration = word_timestamps[-1].get("end", 0) - word_timestamps[0].get("start", 0)
376
+ if total_duration <= 0:
377
+ return 75.0
378
+
379
+ wpm = (len(word_timestamps) / total_duration) * 60
380
+
381
+ # Ideal range: 100-150 WPM for clear speech
382
+ if 100 <= wpm <= 150:
383
+ return 100.0
384
+ elif 80 <= wpm < 100 or 150 < wpm <= 180:
385
+ return 85.0
386
+ elif 60 <= wpm < 80 or 180 < wpm <= 200:
387
+ return 70.0
388
+ else:
389
+ return 50.0
390
+
391
+ def _calculate_fluency_score(self, transcription: str, target: str) -> float:
392
+ """Calculate fluency based on text similarity."""
393
+ return self._word_similarity(transcription, target) * 100
394
+
395
+ def _generate_suggestions(
396
+ self,
397
+ errors: List[PhonemeError],
398
+ word_scores: List[WordScore]
399
+ ) -> List[str]:
400
+ """Generate actionable improvement suggestions."""
401
+ suggestions = []
402
+
403
+ # Group errors by type
404
+ error_types = {}
405
+ for error in errors:
406
+ error_types.setdefault(error.error_type, []).append(error)
407
+
408
+ # Generate suggestions based on error patterns
409
+ if ErrorType.OMISSION in error_types:
410
+ omissions = error_types[ErrorType.OMISSION]
411
+ words = [e.word for e in omissions[:3]]
412
+ suggestions.append(
413
+ f"Try to pronounce all sounds in: {', '.join(words)}"
414
+ )
415
+
416
+ if ErrorType.SUBSTITUTION in error_types:
417
+ subs = error_types[ErrorType.SUBSTITUTION]
418
+ if subs:
419
+ suggestions.append(
420
+ f"Focus on the correct sound in '{subs[0].word}'"
421
+ )
422
+
423
+ if ErrorType.ADDITION in error_types:
424
+ suggestions.append("Speak more clearly without adding extra sounds")
425
+
426
+ # Low scoring words
427
+ low_scores = [ws for ws in word_scores if ws.score < 70]
428
+ if low_scores:
429
+ words = [ws.word for ws in low_scores[:3]]
430
+ suggestions.append(
431
+ f"Practice these words: {', '.join(words)}"
432
+ )
433
+
434
+ # General encouragement if few errors
435
+ if len(errors) <= 2:
436
+ suggestions.append("Good job! Keep practicing for even better clarity.")
437
+
438
+ return suggestions[:5] # Limit to 5 suggestions
439
+
440
+
441
+ # Singleton instance
442
+ _analyzer_instance: Optional[PronunciationAnalyzer] = None
443
+
444
+
445
+ def get_pronunciation_analyzer() -> PronunciationAnalyzer:
446
+ """Get or create PronunciationAnalyzer singleton."""
447
+ global _analyzer_instance
448
+ if _analyzer_instance is None:
449
+ _analyzer_instance = PronunciationAnalyzer()
450
+ return _analyzer_instance
451
+
452
+
453
+ async def analyze_pronunciation(
454
+ audio_bytes: bytes,
455
+ target_text: str,
456
+ user_baseline: Optional[dict] = None,
457
+ user_context: Optional[dict] = None,
458
+ include_ai_feedback: bool = True
459
+ ) -> PronunciationFeedback:
460
+ """Convenience function for pronunciation analysis with AI feedback."""
461
+ analyzer = get_pronunciation_analyzer()
462
+ return await analyzer.analyze(
463
+ audio_bytes,
464
+ target_text,
465
+ user_baseline,
466
+ user_context,
467
+ include_ai_feedback
468
+ )
api/endpoints/v1/processing/soap.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging
3
+ import textwrap
4
+
5
+ from openai import OpenAI
6
+
7
+ from api.config import settings
8
+
9
+ client = OpenAI(api_key=settings.OPENAI_API_KEY)
10
+
11
+ if settings.ENVIRONMENT == "development":
12
+ logging.basicConfig(level=logging.DEBUG)
13
+ else:
14
+ logging.basicConfig(level=logging.WARNING)
15
+
16
+
17
+ def generate_soap(transcript: str) -> str:
18
+ """Helper function to generate soap note from transcript using OpenAI Chat Completions API."""
19
+ logging.info("Generating soap note")
20
+
21
+ sample_transcript = textwrap.dedent("""
22
+ The patient is a 25-year-old right-handed Caucasian female who presented to the emergency department with sudden onset of headache occurring at approximately 11 a.m. on the morning of the July 31, 2008. She described the headache as the worst in her life and it was also accompanied by blurry vision and scotoma. The patient also perceived some swelling in her face. Once in the Emergency Department, the patient underwent a very thorough evaluation and examination. She was given the migraine cocktail. Also was given morphine a total of 8 mg while in the Emergency Department. For full details on the history of present illness, please see the previous history and physical.
23
+
24
+ Doctor: How're you feeling today?
25
+ Patient: Terrible. I'm having the worst headache of my life.
26
+ Doctor: I'm so sorry. Well you are only twenty-five, so let's hope this is the last of the worst. Let's see how we can best help you. When did it start?
27
+ Patient: Around eleven in the morning.
28
+ Doctor: Today?
29
+ Patient: Um no yesterday. July thirty-first.
30
+ Doctor: July thirty-first O eight. Got it. Did it come on suddenly?
31
+ Patient: Yeah.
32
+ Doctor: Are you having any symptoms with it, such as blurry vision, light sensitivity, dizziness, lightheadedness, or nausea?
33
+ Patient: I'm having blurry vision and lightheadedness. I also can't seem to write well. It looks so messy. I am naturally right-handed but my writing looks like I am trying with my left.
34
+ Doctor: How would you describe the lightheadedness?
35
+ Patient: Like there are blind spots.
36
+ Doctor: Okay. How about any vomiting?
37
+ Patient: Um no. I feel like my face is pretty swollen though. I don't know if it's related to the headache but it started around the same time.
38
+ Doctor: Here in the ER, we'll do a thorough exam and eval to make sure nothing serious is going on. While we're waiting for your CT results, I'm going to order a migraine cocktail and some Morphine.
39
+ Patient: Thanks. Will the nurse be in soon?
40
+ Doctor: Yes, she'll be right in as soon as the order is placed. It shouldn't be more than a few minutes. If it takes longer, then please ring the call bell.
41
+ """)
42
+
43
+ # Example format for Tiptap editor as a JSON string
44
+ example_format = {
45
+ "type": "doc",
46
+ "content": [
47
+ {
48
+ "type": "heading",
49
+ "attrs": {"level": 2},
50
+ "content": [{"type": "text", "text": "Example heading"}],
51
+ },
52
+ {
53
+ "type": "paragraph",
54
+ "content": [{"type": "text", "text": "example paragraph"}],
55
+ },
56
+ {
57
+ "type": "heading",
58
+ "attrs": {"level": 3},
59
+ "content": [{"type": "text", "text": "Features"}],
60
+ },
61
+ {
62
+ "type": "orderedList",
63
+ "attrs": {"tight": True, "start": 1},
64
+ "content": [
65
+ {
66
+ "type": "listItem",
67
+ "content": [
68
+ {
69
+ "type": "paragraph",
70
+ "content": [
71
+ {"type": "text", "text": "Example list item"}
72
+ ],
73
+ },
74
+ ],
75
+ },
76
+ {
77
+ "type": "listItem",
78
+ "content": [
79
+ {
80
+ "type": "paragraph",
81
+ "content": [
82
+ {"type": "text", "text": "AI autocomplete (type "},
83
+ {
84
+ "type": "text",
85
+ "marks": [{"type": "code"}],
86
+ "text": "++",
87
+ },
88
+ {
89
+ "type": "text",
90
+ "text": " to activate, or select from slash menu)",
91
+ },
92
+ ],
93
+ },
94
+ ],
95
+ },
96
+ ],
97
+ },
98
+ ],
99
+ }
100
+ example_format_str = json.dumps(example_format)
101
+
102
+ # Call the OpenAI Chat Completions API
103
+ completion = client.chat.completions.create(
104
+ model="gpt-4o", # gpt-4o, gpt-3.5-turbo
105
+ response_format={"type": "json_object"},
106
+ messages=[
107
+ {
108
+ "role": "system",
109
+ "content": "You are a helpful assistant designed to output JSON.",
110
+ },
111
+ {
112
+ "role": "user",
113
+ "content": f"Generate a SOAP note from the following transcript and return it in JSON format for a Tiptap editor. This is the example format: {example_format_str}. The first heading can be the subjective section. The text fields can not be left blank, so try your best to fill them out. Transcript: {transcript}",
114
+ },
115
+ ],
116
+ )
117
+
118
+ logging.debug(f"SOAP: {completion.choices[0].message.content}")
119
+ return completion.choices[0].message.content
api/endpoints/v1/processing/therapy_asr.py ADDED
@@ -0,0 +1,305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Therapy ASR Module - Multi-engine speech recognition for therapy applications.
3
+
4
+ Supports:
5
+ - Local Whisper (general speech, privacy-focused)
6
+ - SpeechBrain (fine-tuned for atypical speech)
7
+ - OpenAI Whisper API (fallback)
8
+ """
9
+
10
+ import io
11
+ import logging
12
+ from enum import Enum
13
+ from typing import Optional
14
+ from dataclasses import dataclass
15
+
16
+ from api.config import settings
17
+
18
+ if settings.ENVIRONMENT == "development":
19
+ logging.basicConfig(level=logging.DEBUG)
20
+ else:
21
+ logging.basicConfig(level=logging.WARNING)
22
+
23
+
24
+ class ASREngine(str, Enum):
25
+ """Available ASR engines."""
26
+ WHISPER_LOCAL = "whisper_local"
27
+ SPEECHBRAIN = "speechbrain"
28
+ WHISPER_API = "whisper_api"
29
+ AUTO = "auto" # Automatically select based on user profile
30
+
31
+
32
+ @dataclass
33
+ class TranscriptionResult:
34
+ """Structured transcription result."""
35
+ text: str
36
+ engine_used: ASREngine
37
+ confidence: Optional[float] = None
38
+ word_timestamps: Optional[list] = None
39
+ language: Optional[str] = None
40
+
41
+
42
+ class TherapyASR:
43
+ """
44
+ Multi-engine ASR for therapy applications.
45
+
46
+ Supports automatic engine selection based on user speech profile,
47
+ with fallback chain for reliability.
48
+ """
49
+
50
+ def __init__(self, default_engine: ASREngine = ASREngine.AUTO):
51
+ self.default_engine = default_engine
52
+ self._whisper_local_model = None
53
+ self._speechbrain_model = None
54
+ self._openai_client = None
55
+
56
+ def _get_openai_client(self):
57
+ """Lazy load OpenAI client."""
58
+ if self._openai_client is None:
59
+ from openai import OpenAI
60
+ self._openai_client = OpenAI(api_key=settings.OPENAI_API_KEY)
61
+ return self._openai_client
62
+
63
+ def _get_whisper_local(self):
64
+ """Lazy load local Whisper model."""
65
+ if self._whisper_local_model is None:
66
+ try:
67
+ import torch
68
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration
69
+
70
+ model_name = "openai/whisper-base" # Start with base, upgrade as needed
71
+ logging.info(f"Loading local Whisper model: {model_name}")
72
+
73
+ self._whisper_processor = WhisperProcessor.from_pretrained(model_name)
74
+ self._whisper_local_model = WhisperForConditionalGeneration.from_pretrained(model_name)
75
+
76
+ # Use GPU if available
77
+ if torch.cuda.is_available():
78
+ self._whisper_local_model = self._whisper_local_model.to("cuda")
79
+ elif torch.backends.mps.is_available():
80
+ self._whisper_local_model = self._whisper_local_model.to("mps")
81
+
82
+ logging.info("Local Whisper model loaded successfully")
83
+ except ImportError as e:
84
+ logging.warning(f"Local Whisper not available: {e}")
85
+ raise
86
+ return self._whisper_local_model
87
+
88
+ def _get_speechbrain(self):
89
+ """Lazy load SpeechBrain model for atypical speech."""
90
+ if self._speechbrain_model is None:
91
+ try:
92
+ import speechbrain as sb
93
+
94
+ # Use pre-trained model, can be swapped for fine-tuned version
95
+ model_source = "speechbrain/asr-wav2vec2-commonvoice-en"
96
+ logging.info(f"Loading SpeechBrain model: {model_source}")
97
+
98
+ self._speechbrain_model = sb.pretrained.EncoderASR.from_hparams(
99
+ source=model_source,
100
+ savedir="models/speechbrain_asr"
101
+ )
102
+ logging.info("SpeechBrain model loaded successfully")
103
+ except ImportError as e:
104
+ logging.warning(f"SpeechBrain not available: {e}")
105
+ raise
106
+ return self._speechbrain_model
107
+
108
+ def _select_engine(self, user_profile: Optional[dict] = None) -> ASREngine:
109
+ """Select appropriate ASR engine based on user profile."""
110
+ if self.default_engine != ASREngine.AUTO:
111
+ return self.default_engine
112
+
113
+ if user_profile:
114
+ # Use SpeechBrain for users with speech conditions
115
+ speech_condition = user_profile.get("speech_condition")
116
+ if speech_condition in ["dysarthria", "apraxia", "autism", "stuttering"]:
117
+ return ASREngine.SPEECHBRAIN
118
+
119
+ # Use local Whisper for privacy-focused users
120
+ if user_profile.get("privacy_mode") == "local":
121
+ return ASREngine.WHISPER_LOCAL
122
+
123
+ # Default to API for best accuracy
124
+ return ASREngine.WHISPER_API
125
+
126
+ def transcribe(
127
+ self,
128
+ audio_data: bytes,
129
+ filename: str = "audio.wav",
130
+ content_type: str = "audio/wav",
131
+ user_profile: Optional[dict] = None,
132
+ engine: Optional[ASREngine] = None
133
+ ) -> TranscriptionResult:
134
+ """
135
+ Transcribe audio using the most appropriate engine.
136
+
137
+ Args:
138
+ audio_data: Raw audio bytes
139
+ filename: Original filename
140
+ content_type: MIME type of audio
141
+ user_profile: Optional user profile for engine selection
142
+ engine: Force specific engine (overrides auto-selection)
143
+
144
+ Returns:
145
+ TranscriptionResult with text and metadata
146
+ """
147
+ selected_engine = engine or self._select_engine(user_profile)
148
+ logging.info(f"Transcribing with engine: {selected_engine.value}")
149
+
150
+ # Try selected engine with fallback chain
151
+ fallback_order = [selected_engine]
152
+ if selected_engine != ASREngine.WHISPER_API:
153
+ fallback_order.append(ASREngine.WHISPER_API)
154
+
155
+ last_error = None
156
+ for eng in fallback_order:
157
+ try:
158
+ if eng == ASREngine.WHISPER_API:
159
+ return self._transcribe_whisper_api(audio_data, filename, content_type)
160
+ elif eng == ASREngine.WHISPER_LOCAL:
161
+ return self._transcribe_whisper_local(audio_data)
162
+ elif eng == ASREngine.SPEECHBRAIN:
163
+ return self._transcribe_speechbrain(audio_data)
164
+ except Exception as e:
165
+ logging.warning(f"Engine {eng.value} failed: {e}")
166
+ last_error = e
167
+ continue
168
+
169
+ raise RuntimeError(f"All ASR engines failed. Last error: {last_error}")
170
+
171
+ def _transcribe_whisper_api(
172
+ self,
173
+ audio_data: bytes,
174
+ filename: str,
175
+ content_type: str
176
+ ) -> TranscriptionResult:
177
+ """Transcribe using OpenAI Whisper API."""
178
+ logging.info("Transcribing with OpenAI Whisper API")
179
+
180
+ client = self._get_openai_client()
181
+ file_data = (filename, audio_data, content_type)
182
+
183
+ transcription = client.audio.transcriptions.create(
184
+ model="whisper-1",
185
+ file=file_data,
186
+ response_format="verbose_json",
187
+ timestamp_granularities=["word"]
188
+ )
189
+
190
+ # Extract word timestamps if available
191
+ word_timestamps = None
192
+ if hasattr(transcription, 'words'):
193
+ word_timestamps = [
194
+ {"word": w.word, "start": w.start, "end": w.end}
195
+ for w in transcription.words
196
+ ]
197
+
198
+ return TranscriptionResult(
199
+ text=transcription.text,
200
+ engine_used=ASREngine.WHISPER_API,
201
+ language=getattr(transcription, 'language', None),
202
+ word_timestamps=word_timestamps
203
+ )
204
+
205
+ def _transcribe_whisper_local(self, audio_data: bytes) -> TranscriptionResult:
206
+ """Transcribe using local Whisper model."""
207
+ logging.info("Transcribing with local Whisper")
208
+
209
+ import torch
210
+ import librosa
211
+ import numpy as np
212
+
213
+ model = self._get_whisper_local()
214
+
215
+ # Load audio from bytes
216
+ audio_array, sr = librosa.load(io.BytesIO(audio_data), sr=16000)
217
+
218
+ # Process audio
219
+ input_features = self._whisper_processor(
220
+ audio_array,
221
+ sampling_rate=16000,
222
+ return_tensors="pt"
223
+ ).input_features
224
+
225
+ # Move to same device as model
226
+ device = next(model.parameters()).device
227
+ input_features = input_features.to(device)
228
+
229
+ # Generate transcription
230
+ with torch.no_grad():
231
+ predicted_ids = model.generate(input_features)
232
+
233
+ transcription = self._whisper_processor.batch_decode(
234
+ predicted_ids,
235
+ skip_special_tokens=True
236
+ )[0]
237
+
238
+ return TranscriptionResult(
239
+ text=transcription.strip(),
240
+ engine_used=ASREngine.WHISPER_LOCAL
241
+ )
242
+
243
+ def _transcribe_speechbrain(self, audio_data: bytes) -> TranscriptionResult:
244
+ """Transcribe using SpeechBrain (optimized for atypical speech)."""
245
+ logging.info("Transcribing with SpeechBrain")
246
+
247
+ import tempfile
248
+ import os
249
+
250
+ model = self._get_speechbrain()
251
+
252
+ # SpeechBrain requires file path, write temp file
253
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
254
+ f.write(audio_data)
255
+ temp_path = f.name
256
+
257
+ try:
258
+ transcription = model.transcribe_file(temp_path)
259
+
260
+ # Handle different return types
261
+ if isinstance(transcription, list):
262
+ text = transcription[0] if transcription else ""
263
+ else:
264
+ text = str(transcription)
265
+
266
+ return TranscriptionResult(
267
+ text=text.strip(),
268
+ engine_used=ASREngine.SPEECHBRAIN
269
+ )
270
+ finally:
271
+ os.unlink(temp_path)
272
+
273
+
274
+ # Singleton instance for reuse
275
+ _therapy_asr_instance: Optional[TherapyASR] = None
276
+
277
+
278
+ def get_therapy_asr() -> TherapyASR:
279
+ """Get or create TherapyASR singleton."""
280
+ global _therapy_asr_instance
281
+ if _therapy_asr_instance is None:
282
+ _therapy_asr_instance = TherapyASR()
283
+ return _therapy_asr_instance
284
+
285
+
286
+ def transcribe_for_therapy(
287
+ audio_data: bytes,
288
+ filename: str = "audio.wav",
289
+ content_type: str = "audio/wav",
290
+ user_profile: Optional[dict] = None,
291
+ engine: Optional[ASREngine] = None
292
+ ) -> TranscriptionResult:
293
+ """
294
+ Convenience function to transcribe audio for therapy.
295
+
296
+ This is the main entry point for therapy transcription.
297
+ """
298
+ asr = get_therapy_asr()
299
+ return asr.transcribe(
300
+ audio_data=audio_data,
301
+ filename=filename,
302
+ content_type=content_type,
303
+ user_profile=user_profile,
304
+ engine=engine
305
+ )
api/endpoints/v1/processing/therapy_tts.py ADDED
@@ -0,0 +1,354 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Therapy TTS Module - Text-to-speech for therapy and AAC applications.
3
+
4
+ Supports:
5
+ - WhisperSpeech (fast, voice cloning)
6
+ - OpenAI TTS API (fallback)
7
+ - Edge TTS (lightweight fallback)
8
+ """
9
+
10
+ import io
11
+ import logging
12
+ from enum import Enum
13
+ from typing import Optional
14
+ from dataclasses import dataclass
15
+
16
+ from api.config import settings
17
+
18
+ if settings.ENVIRONMENT == "development":
19
+ logging.basicConfig(level=logging.DEBUG)
20
+ else:
21
+ logging.basicConfig(level=logging.WARNING)
22
+
23
+
24
+ class TTSEngine(str, Enum):
25
+ """Available TTS engines."""
26
+ WHISPERSPEECH = "whisperspeech"
27
+ OPENAI_TTS = "openai_tts"
28
+ EDGE_TTS = "edge_tts"
29
+ AUTO = "auto"
30
+
31
+
32
+ class TTSVoice(str, Enum):
33
+ """Preset voice options."""
34
+ NEUTRAL = "neutral"
35
+ WARM = "warm"
36
+ CLEAR = "clear"
37
+ SLOW = "slow" # For therapy exercises
38
+ CUSTOM = "custom" # Voice cloning
39
+
40
+
41
+ @dataclass
42
+ class TTSResult:
43
+ """TTS synthesis result."""
44
+ audio_bytes: bytes
45
+ format: str # wav, mp3
46
+ sample_rate: int
47
+ engine_used: TTSEngine
48
+ duration_seconds: Optional[float] = None
49
+
50
+
51
+ class TherapyTTS:
52
+ """
53
+ TTS engine for therapy applications.
54
+
55
+ Features:
56
+ - Voice cloning from reference audio
57
+ - Adjustable speed for therapy exercises
58
+ - Multiple engine support with fallback
59
+ """
60
+
61
+ def __init__(self, default_engine: TTSEngine = TTSEngine.AUTO):
62
+ self.default_engine = default_engine
63
+ self._whisperspeech_pipe = None
64
+ self._openai_client = None
65
+
66
+ def _get_openai_client(self):
67
+ """Lazy load OpenAI client."""
68
+ if self._openai_client is None:
69
+ from openai import OpenAI
70
+ self._openai_client = OpenAI(api_key=settings.OPENAI_API_KEY)
71
+ return self._openai_client
72
+
73
+ def _get_whisperspeech(self):
74
+ """Lazy load WhisperSpeech pipeline."""
75
+ if self._whisperspeech_pipe is None:
76
+ try:
77
+ from whisperspeech.pipeline import Pipeline
78
+ logging.info("Loading WhisperSpeech pipeline...")
79
+ self._whisperspeech_pipe = Pipeline(
80
+ s2a_ref='collabora/whisperspeech:s2a-q4-tiny-en+pl.model'
81
+ )
82
+ logging.info("WhisperSpeech loaded successfully")
83
+ except ImportError as e:
84
+ logging.warning(f"WhisperSpeech not available: {e}")
85
+ raise
86
+ return self._whisperspeech_pipe
87
+
88
+ def _select_engine(self, voice_reference: Optional[bytes] = None) -> TTSEngine:
89
+ """Select TTS engine based on requirements."""
90
+ if self.default_engine != TTSEngine.AUTO:
91
+ return self.default_engine
92
+
93
+ # Use WhisperSpeech for voice cloning
94
+ if voice_reference:
95
+ return TTSEngine.WHISPERSPEECH
96
+
97
+ # Default to OpenAI for quality
98
+ return TTSEngine.OPENAI_TTS
99
+
100
+ def synthesize(
101
+ self,
102
+ text: str,
103
+ voice: TTSVoice = TTSVoice.NEUTRAL,
104
+ speed: float = 1.0,
105
+ voice_reference: Optional[bytes] = None,
106
+ engine: Optional[TTSEngine] = None,
107
+ output_format: str = "wav"
108
+ ) -> TTSResult:
109
+ """
110
+ Synthesize speech from text.
111
+
112
+ Args:
113
+ text: Text to synthesize
114
+ voice: Voice preset to use
115
+ speed: Speech rate (0.5 = slow, 1.0 = normal, 2.0 = fast)
116
+ voice_reference: Audio bytes for voice cloning
117
+ engine: Force specific engine
118
+ output_format: Output format (wav, mp3)
119
+
120
+ Returns:
121
+ TTSResult with audio bytes
122
+ """
123
+ selected_engine = engine or self._select_engine(voice_reference)
124
+ logging.info(f"Synthesizing with engine: {selected_engine.value}")
125
+
126
+ # Fallback chain
127
+ fallback_order = [selected_engine]
128
+ if selected_engine != TTSEngine.OPENAI_TTS:
129
+ fallback_order.append(TTSEngine.OPENAI_TTS)
130
+
131
+ last_error = None
132
+ for eng in fallback_order:
133
+ try:
134
+ if eng == TTSEngine.OPENAI_TTS:
135
+ return self._synthesize_openai(text, voice, speed, output_format)
136
+ elif eng == TTSEngine.WHISPERSPEECH:
137
+ return self._synthesize_whisperspeech(
138
+ text, voice_reference, speed, output_format
139
+ )
140
+ elif eng == TTSEngine.EDGE_TTS:
141
+ return self._synthesize_edge_tts(text, voice, speed, output_format)
142
+ except Exception as e:
143
+ logging.warning(f"Engine {eng.value} failed: {e}")
144
+ last_error = e
145
+ continue
146
+
147
+ raise RuntimeError(f"All TTS engines failed. Last error: {last_error}")
148
+
149
+ def _synthesize_openai(
150
+ self,
151
+ text: str,
152
+ voice: TTSVoice,
153
+ speed: float,
154
+ output_format: str
155
+ ) -> TTSResult:
156
+ """Synthesize using OpenAI TTS API."""
157
+ logging.info("Synthesizing with OpenAI TTS")
158
+
159
+ client = self._get_openai_client()
160
+
161
+ # Map voice presets to OpenAI voices
162
+ voice_map = {
163
+ TTSVoice.NEUTRAL: "alloy",
164
+ TTSVoice.WARM: "nova",
165
+ TTSVoice.CLEAR: "onyx",
166
+ TTSVoice.SLOW: "alloy", # Use speed parameter
167
+ TTSVoice.CUSTOM: "alloy",
168
+ }
169
+
170
+ response = client.audio.speech.create(
171
+ model="tts-1",
172
+ voice=voice_map.get(voice, "alloy"),
173
+ input=text,
174
+ speed=speed,
175
+ response_format="wav" if output_format == "wav" else "mp3"
176
+ )
177
+
178
+ audio_bytes = response.content
179
+
180
+ return TTSResult(
181
+ audio_bytes=audio_bytes,
182
+ format=output_format,
183
+ sample_rate=24000,
184
+ engine_used=TTSEngine.OPENAI_TTS
185
+ )
186
+
187
+ def _synthesize_whisperspeech(
188
+ self,
189
+ text: str,
190
+ voice_reference: Optional[bytes],
191
+ speed: float,
192
+ output_format: str
193
+ ) -> TTSResult:
194
+ """Synthesize using WhisperSpeech with optional voice cloning."""
195
+ logging.info("Synthesizing with WhisperSpeech")
196
+
197
+ import torch
198
+ import numpy as np
199
+
200
+ pipe = self._get_whisperspeech()
201
+
202
+ # Generate audio
203
+ if voice_reference:
204
+ # Voice cloning mode
205
+ import tempfile
206
+ import os
207
+
208
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
209
+ f.write(voice_reference)
210
+ ref_path = f.name
211
+
212
+ try:
213
+ audio = pipe.generate(text, speaker=ref_path)
214
+ finally:
215
+ os.unlink(ref_path)
216
+ else:
217
+ audio = pipe.generate(text)
218
+
219
+ # Convert to bytes
220
+ if isinstance(audio, torch.Tensor):
221
+ audio_np = audio.cpu().numpy()
222
+ else:
223
+ audio_np = np.array(audio)
224
+
225
+ # Ensure correct shape
226
+ if audio_np.ndim > 1:
227
+ audio_np = audio_np.squeeze()
228
+
229
+ # Apply speed adjustment if needed
230
+ if speed != 1.0:
231
+ import librosa
232
+ audio_np = librosa.effects.time_stretch(audio_np, rate=speed)
233
+
234
+ # Convert to wav bytes
235
+ import soundfile as sf
236
+ buffer = io.BytesIO()
237
+ sf.write(buffer, audio_np, 24000, format='WAV')
238
+ buffer.seek(0)
239
+
240
+ return TTSResult(
241
+ audio_bytes=buffer.read(),
242
+ format="wav",
243
+ sample_rate=24000,
244
+ engine_used=TTSEngine.WHISPERSPEECH,
245
+ duration_seconds=len(audio_np) / 24000
246
+ )
247
+
248
+ def _synthesize_edge_tts(
249
+ self,
250
+ text: str,
251
+ voice: TTSVoice,
252
+ speed: float,
253
+ output_format: str
254
+ ) -> TTSResult:
255
+ """Synthesize using Edge TTS (lightweight fallback)."""
256
+ logging.info("Synthesizing with Edge TTS")
257
+
258
+ import asyncio
259
+ import edge_tts
260
+
261
+ # Map voice presets to Edge TTS voices
262
+ voice_map = {
263
+ TTSVoice.NEUTRAL: "en-US-JennyNeural",
264
+ TTSVoice.WARM: "en-US-AriaNeural",
265
+ TTSVoice.CLEAR: "en-US-GuyNeural",
266
+ TTSVoice.SLOW: "en-US-JennyNeural",
267
+ TTSVoice.CUSTOM: "en-US-JennyNeural",
268
+ }
269
+
270
+ async def _generate():
271
+ communicate = edge_tts.Communicate(
272
+ text,
273
+ voice_map.get(voice, "en-US-JennyNeural"),
274
+ rate=f"{int((speed - 1) * 100):+d}%"
275
+ )
276
+ buffer = io.BytesIO()
277
+ async for chunk in communicate.stream():
278
+ if chunk["type"] == "audio":
279
+ buffer.write(chunk["data"])
280
+ return buffer.getvalue()
281
+
282
+ audio_bytes = asyncio.run(_generate())
283
+
284
+ return TTSResult(
285
+ audio_bytes=audio_bytes,
286
+ format="mp3",
287
+ sample_rate=24000,
288
+ engine_used=TTSEngine.EDGE_TTS
289
+ )
290
+
291
+ def generate_therapy_prompt(
292
+ self,
293
+ exercise_type: str,
294
+ target_text: str,
295
+ **kwargs
296
+ ) -> TTSResult:
297
+ """
298
+ Generate therapy exercise audio prompt.
299
+
300
+ Args:
301
+ exercise_type: Type of exercise (repeat_after_me, pronunciation, etc.)
302
+ target_text: The text to practice
303
+ **kwargs: Additional synthesis parameters
304
+
305
+ Returns:
306
+ TTSResult with exercise audio
307
+ """
308
+ prompts = {
309
+ "repeat_after_me": f"Please repeat after me: {target_text}",
310
+ "pronunciation": f"Let's practice saying: {target_text}. Listen carefully.",
311
+ "slower": f"Now try saying it more slowly: {target_text}",
312
+ "word_by_word": f"Let's break it down. {target_text}",
313
+ "encouragement": f"Great try! Let's practice {target_text} again.",
314
+ }
315
+
316
+ prompt_text = prompts.get(exercise_type, target_text)
317
+
318
+ # Use slower speed for therapy prompts
319
+ speed = kwargs.pop("speed", 0.9)
320
+
321
+ return self.synthesize(
322
+ text=prompt_text,
323
+ speed=speed,
324
+ voice=TTSVoice.CLEAR,
325
+ **kwargs
326
+ )
327
+
328
+
329
+ # Singleton instance
330
+ _therapy_tts_instance: Optional[TherapyTTS] = None
331
+
332
+
333
+ def get_therapy_tts() -> TherapyTTS:
334
+ """Get or create TherapyTTS singleton."""
335
+ global _therapy_tts_instance
336
+ if _therapy_tts_instance is None:
337
+ _therapy_tts_instance = TherapyTTS()
338
+ return _therapy_tts_instance
339
+
340
+
341
+ def synthesize_speech(
342
+ text: str,
343
+ voice: TTSVoice = TTSVoice.NEUTRAL,
344
+ speed: float = 1.0,
345
+ voice_reference: Optional[bytes] = None
346
+ ) -> TTSResult:
347
+ """Convenience function for TTS synthesis."""
348
+ tts = get_therapy_tts()
349
+ return tts.synthesize(
350
+ text=text,
351
+ voice=voice,
352
+ speed=speed,
353
+ voice_reference=voice_reference
354
+ )
api/endpoints/v1/routers/__init__.py ADDED
File without changes
api/endpoints/v1/routers/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (184 Bytes). View file
 
api/endpoints/v1/routers/__pycache__/analytics.cpython-312.pyc ADDED
Binary file (12.5 kB). View file
 
api/endpoints/v1/routers/__pycache__/health.cpython-312.pyc ADDED
Binary file (999 Bytes). View file
 
api/endpoints/v1/routers/__pycache__/therapy.cpython-312.pyc ADDED
Binary file (24.3 kB). View file
 
api/endpoints/v1/routers/__pycache__/upload.cpython-312.pyc ADDED
Binary file (3.18 kB). View file
 
api/endpoints/v1/routers/analytics.py ADDED
@@ -0,0 +1,364 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Analytics Router - Progress tracking and reporting endpoints.
3
+
4
+ Endpoints:
5
+ - GET /analytics/summary - User progress summary
6
+ - GET /analytics/detailed - Detailed metrics for therapist view
7
+ - GET /analytics/trends - Progress trends over time
8
+ - GET /analytics/recommendations - AI-powered recommendations
9
+ """
10
+
11
+ import logging
12
+ from datetime import datetime, timedelta
13
+ from typing import Optional
14
+ from fastapi import APIRouter, Depends, Query
15
+ from pydantic import BaseModel, Field
16
+
17
+ from api.config import settings
18
+ from api.endpoints.v1.auth.verify import verify_token
19
+
20
+ router = APIRouter()
21
+
22
+ if settings.ENVIRONMENT == "development":
23
+ logging.basicConfig(level=logging.DEBUG)
24
+ else:
25
+ logging.basicConfig(level=logging.WARNING)
26
+
27
+
28
+ # ============================================================================
29
+ # Response Models
30
+ # ============================================================================
31
+
32
+ class ProgressSummary(BaseModel):
33
+ """Summary of user's therapy progress."""
34
+ total_sessions: int
35
+ total_exercises: int
36
+ total_practice_minutes: int
37
+ current_streak_days: int
38
+ average_score: float
39
+ improvement_percent: float
40
+ last_session_date: Optional[str]
41
+ top_achievements: list[str]
42
+
43
+
44
+ class MetricTrend(BaseModel):
45
+ """Single metric trend data point."""
46
+ date: str
47
+ value: float
48
+ metric_type: str
49
+
50
+
51
+ class DetailedProgress(BaseModel):
52
+ """Detailed progress for therapist view."""
53
+ user_id: str
54
+ period_days: int
55
+
56
+ # Core metrics
57
+ pcc_current: float # Percent Consonants Correct
58
+ pcc_baseline: float
59
+ pcc_improvement: float
60
+
61
+ pwc_current: float # Percent Words Correct
62
+ pwc_baseline: float
63
+ pwc_improvement: float
64
+
65
+ clarity_current: float
66
+ clarity_baseline: float
67
+ clarity_improvement: float
68
+
69
+ # Session stats
70
+ sessions_completed: int
71
+ exercises_completed: int
72
+ total_practice_minutes: int
73
+
74
+ # Problem areas
75
+ problem_phonemes: list[str]
76
+ improving_phonemes: list[str]
77
+
78
+ # Recommendations
79
+ recommendations: list[str]
80
+
81
+
82
+ class ExerciseStats(BaseModel):
83
+ """Statistics for a specific exercise type."""
84
+ exercise_type: str
85
+ attempts: int
86
+ average_score: float
87
+ best_score: float
88
+ improvement: float
89
+ last_attempted: Optional[str]
90
+
91
+
92
+ class Recommendation(BaseModel):
93
+ """AI-generated recommendation."""
94
+ category: str # exercise, frequency, focus_area
95
+ title: str
96
+ description: str
97
+ priority: int # 1=high, 2=medium, 3=low
98
+ action_type: Optional[str] # specific exercise to try
99
+
100
+
101
+ # ============================================================================
102
+ # Mock Data Functions (Replace with DB queries)
103
+ # ============================================================================
104
+
105
+ def _get_mock_summary(user_id: str, days: int) -> ProgressSummary:
106
+ """Generate mock summary data. Replace with DB queries."""
107
+ return ProgressSummary(
108
+ total_sessions=23,
109
+ total_exercises=156,
110
+ total_practice_minutes=287,
111
+ current_streak_days=5,
112
+ average_score=72.5,
113
+ improvement_percent=15.3,
114
+ last_session_date=datetime.now().strftime("%Y-%m-%d"),
115
+ top_achievements=[
116
+ "Completed 20+ sessions",
117
+ "5-day practice streak",
118
+ "Mastered 'S' sound"
119
+ ]
120
+ )
121
+
122
+
123
+ def _get_mock_detailed(user_id: str, days: int) -> DetailedProgress:
124
+ """Generate mock detailed data. Replace with DB queries."""
125
+ return DetailedProgress(
126
+ user_id=user_id,
127
+ period_days=days,
128
+ pcc_current=78.5,
129
+ pcc_baseline=65.0,
130
+ pcc_improvement=13.5,
131
+ pwc_current=82.0,
132
+ pwc_baseline=70.0,
133
+ pwc_improvement=12.0,
134
+ clarity_current=75.0,
135
+ clarity_baseline=62.0,
136
+ clarity_improvement=13.0,
137
+ sessions_completed=23,
138
+ exercises_completed=156,
139
+ total_practice_minutes=287,
140
+ problem_phonemes=["th", "r", "l"],
141
+ improving_phonemes=["s", "ch", "sh"],
142
+ recommendations=[
143
+ "Focus on 'th' sound with tongue placement exercises",
144
+ "Increase practice frequency to 15 min/day",
145
+ "Try minimal pairs: 'think/sink', 'three/free'"
146
+ ]
147
+ )
148
+
149
+
150
+ def _get_mock_trends(user_id: str, days: int, metric: str) -> list[MetricTrend]:
151
+ """Generate mock trend data. Replace with DB queries."""
152
+ trends = []
153
+ base_date = datetime.now()
154
+ base_value = 65.0
155
+
156
+ for i in range(min(days, 30)):
157
+ date = base_date - timedelta(days=days - i - 1)
158
+ # Simulate gradual improvement
159
+ value = base_value + (i * 0.5) + (i % 3 - 1)
160
+ trends.append(MetricTrend(
161
+ date=date.strftime("%Y-%m-%d"),
162
+ value=round(min(100, max(0, value)), 1),
163
+ metric_type=metric
164
+ ))
165
+
166
+ return trends
167
+
168
+
169
+ def _get_mock_exercise_stats(user_id: str) -> list[ExerciseStats]:
170
+ """Generate mock exercise stats. Replace with DB queries."""
171
+ return [
172
+ ExerciseStats(
173
+ exercise_type="repeat_after_me",
174
+ attempts=45,
175
+ average_score=74.5,
176
+ best_score=92.0,
177
+ improvement=12.3,
178
+ last_attempted=datetime.now().strftime("%Y-%m-%d")
179
+ ),
180
+ ExerciseStats(
181
+ exercise_type="minimal_pairs",
182
+ attempts=32,
183
+ average_score=68.0,
184
+ best_score=85.0,
185
+ improvement=8.5,
186
+ last_attempted=(datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
187
+ ),
188
+ ExerciseStats(
189
+ exercise_type="tongue_twisters",
190
+ attempts=18,
191
+ average_score=62.5,
192
+ best_score=78.0,
193
+ improvement=5.2,
194
+ last_attempted=(datetime.now() - timedelta(days=2)).strftime("%Y-%m-%d")
195
+ ),
196
+ ]
197
+
198
+
199
+ def _generate_recommendations(user_id: str) -> list[Recommendation]:
200
+ """Generate AI-powered recommendations. Replace with ML model."""
201
+ return [
202
+ Recommendation(
203
+ category="focus_area",
204
+ title="Focus on 'TH' Sound",
205
+ description="Your 'th' pronunciation scores are 15% below average. Try placing your tongue between your teeth.",
206
+ priority=1,
207
+ action_type="minimal_pairs"
208
+ ),
209
+ Recommendation(
210
+ category="frequency",
211
+ title="Increase Practice Time",
212
+ description="Users who practice 15+ minutes daily see 2x faster improvement. You're averaging 10 minutes.",
213
+ priority=2,
214
+ action_type=None
215
+ ),
216
+ Recommendation(
217
+ category="exercise",
218
+ title="Try Tongue Twisters",
219
+ description="Tongue twisters can help with your 'S' and 'SH' sounds. Start with 'She sells seashells'.",
220
+ priority=2,
221
+ action_type="tongue_twisters"
222
+ ),
223
+ ]
224
+
225
+
226
+ # ============================================================================
227
+ # Endpoints
228
+ # ============================================================================
229
+
230
+ @router.get("/summary", response_model=ProgressSummary, tags=["analytics"])
231
+ async def get_progress_summary(
232
+ days: int = Query(default=30, ge=1, le=365, description="Period in days"),
233
+ user: str = Depends(verify_token),
234
+ ):
235
+ """
236
+ Get user's progress summary.
237
+
238
+ Returns high-level stats suitable for the user dashboard.
239
+ """
240
+ logging.info(f"Progress summary request for user: {user}, days: {days}")
241
+
242
+ # TODO: Replace with actual DB query
243
+ return _get_mock_summary(user, days)
244
+
245
+
246
+ @router.get("/detailed", response_model=DetailedProgress, tags=["analytics"])
247
+ async def get_detailed_progress(
248
+ days: int = Query(default=30, ge=1, le=365),
249
+ user: str = Depends(verify_token),
250
+ ):
251
+ """
252
+ Get detailed progress metrics.
253
+
254
+ Returns comprehensive metrics suitable for therapist review.
255
+ Includes PCC, PWC, clarity scores, and improvement tracking.
256
+ """
257
+ logging.info(f"Detailed progress request for user: {user}")
258
+
259
+ # TODO: Replace with actual DB query
260
+ return _get_mock_detailed(user, days)
261
+
262
+
263
+ @router.get("/trends", response_model=list[MetricTrend], tags=["analytics"])
264
+ async def get_progress_trends(
265
+ metric: str = Query(
266
+ default="overall",
267
+ description="Metric type: overall, clarity, pace, pcc, pwc"
268
+ ),
269
+ days: int = Query(default=30, ge=7, le=365),
270
+ user: str = Depends(verify_token),
271
+ ):
272
+ """
273
+ Get progress trends over time.
274
+
275
+ Returns time-series data for charting progress.
276
+ """
277
+ logging.info(f"Trends request for user: {user}, metric: {metric}")
278
+
279
+ valid_metrics = ["overall", "clarity", "pace", "pcc", "pwc", "fluency"]
280
+ if metric not in valid_metrics:
281
+ metric = "overall"
282
+
283
+ # TODO: Replace with actual DB query
284
+ return _get_mock_trends(user, days, metric)
285
+
286
+
287
+ @router.get("/exercises", response_model=list[ExerciseStats], tags=["analytics"])
288
+ async def get_exercise_stats(
289
+ user: str = Depends(verify_token),
290
+ ):
291
+ """
292
+ Get statistics for each exercise type.
293
+
294
+ Shows which exercises the user has tried and their performance.
295
+ """
296
+ logging.info(f"Exercise stats request for user: {user}")
297
+
298
+ # TODO: Replace with actual DB query
299
+ return _get_mock_exercise_stats(user)
300
+
301
+
302
+ @router.get("/recommendations", response_model=list[Recommendation], tags=["analytics"])
303
+ async def get_recommendations(
304
+ user: str = Depends(verify_token),
305
+ ):
306
+ """
307
+ Get AI-powered recommendations.
308
+
309
+ Analyzes user's progress and suggests focus areas, exercises, and practice tips.
310
+ """
311
+ logging.info(f"Recommendations request for user: {user}")
312
+
313
+ # TODO: Replace with ML model
314
+ return _generate_recommendations(user)
315
+
316
+
317
+ @router.get("/streak", tags=["analytics"])
318
+ async def get_streak_info(
319
+ user: str = Depends(verify_token),
320
+ ):
321
+ """
322
+ Get practice streak information.
323
+
324
+ Returns current streak, best streak, and streak history.
325
+ """
326
+ logging.info(f"Streak info request for user: {user}")
327
+
328
+ # TODO: Replace with actual DB query
329
+ return {
330
+ "current_streak": 5,
331
+ "best_streak": 12,
332
+ "streak_history": [
333
+ {"start": "2024-11-20", "end": "2024-12-01", "days": 12},
334
+ {"start": "2024-12-03", "end": "2024-12-05", "days": 3},
335
+ {"start": "2024-12-01", "end": None, "days": 5}, # Current
336
+ ],
337
+ "next_milestone": 7,
338
+ "days_to_milestone": 2
339
+ }
340
+
341
+
342
+ @router.get("/leaderboard", tags=["analytics"])
343
+ async def get_leaderboard(
344
+ period: str = Query(default="week", description="week, month, all"),
345
+ user: str = Depends(verify_token),
346
+ ):
347
+ """
348
+ Get anonymized leaderboard.
349
+
350
+ Optional gamification feature showing relative standing.
351
+ """
352
+ logging.info(f"Leaderboard request for period: {period}")
353
+
354
+ # TODO: Replace with actual DB query
355
+ return {
356
+ "user_rank": 15,
357
+ "total_users": 127,
358
+ "percentile": 88,
359
+ "top_10": [
360
+ {"rank": 1, "score": 95.2, "exercises": 234},
361
+ {"rank": 2, "score": 93.8, "exercises": 198},
362
+ {"rank": 3, "score": 91.5, "exercises": 212},
363
+ ]
364
+ }
api/endpoints/v1/routers/health.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, Depends, HTTPException
2
+ from fastapi.responses import JSONResponse
3
+
4
+ from api.endpoints.v1.auth.verify import verify_token
5
+
6
+ router = APIRouter()
7
+
8
+
9
+ @router.get("", status_code=200)
10
+ async def check(user: str = Depends(verify_token)):
11
+ """Secured health check endpoint."""
12
+ if not user:
13
+ raise HTTPException(status_code=401, detail="Unauthorized")
14
+
15
+ return JSONResponse(
16
+ status_code=200,
17
+ content={
18
+ "message": "Service is running smoothly",
19
+ "batches_processed": 0,
20
+ "title": "title",
21
+ "content": "content",
22
+ "transcript": "hi",
23
+ },
24
+ )
api/endpoints/v1/routers/therapy.py ADDED
@@ -0,0 +1,639 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Therapy Router - API endpoints for speech therapy features.
3
+
4
+ Endpoints:
5
+ - POST /therapy/transcribe - Transcribe audio with therapy-optimized ASR
6
+ - POST /therapy/tts - Text-to-speech synthesis
7
+ - POST /therapy/analyze - Pronunciation analysis
8
+ - POST /therapy/exercise - Generate and evaluate exercises
9
+ """
10
+
11
+ import io
12
+ import logging
13
+ from typing import Optional
14
+ from fastapi import APIRouter, BackgroundTasks, Depends, File, HTTPException, UploadFile, Query
15
+ from fastapi.responses import JSONResponse, StreamingResponse
16
+ from pydantic import BaseModel, Field
17
+
18
+ from api.config import settings
19
+ from api.endpoints.v1.auth.verify import verify_token
20
+ from api.endpoints.v1.processing.therapy_asr import (
21
+ transcribe_for_therapy,
22
+ ASREngine,
23
+ TranscriptionResult
24
+ )
25
+ from api.endpoints.v1.processing.therapy_tts import (
26
+ synthesize_speech,
27
+ get_therapy_tts,
28
+ TTSVoice,
29
+ TTSEngine
30
+ )
31
+ from api.endpoints.v1.processing.pronunciation_analysis import (
32
+ analyze_pronunciation,
33
+ PronunciationFeedback,
34
+ AIFeedback
35
+ )
36
+ from api.endpoints.v1.processing.ai_feedback import get_ai_feedback_generator
37
+
38
+ router = APIRouter()
39
+
40
+ if settings.ENVIRONMENT == "development":
41
+ logging.basicConfig(level=logging.DEBUG)
42
+ else:
43
+ logging.basicConfig(level=logging.WARNING)
44
+
45
+ # Allowed audio types
46
+ ALLOWED_AUDIO_TYPES = [
47
+ "audio/mpeg", "audio/mp4", "audio/m4a", "audio/x-m4a",
48
+ "audio/wav", "audio/x-wav", "audio/webm"
49
+ ]
50
+ FILE_SIZE_LIMIT = 25 * 1024 * 1024 # 25 MB
51
+
52
+
53
+ # Request/Response Models
54
+ class TranscribeRequest(BaseModel):
55
+ """Request model for transcription."""
56
+ engine: Optional[ASREngine] = Field(None, description="ASR engine to use")
57
+ user_profile: Optional[dict] = Field(None, description="User speech profile")
58
+
59
+
60
+ class TranscriptionResponse(BaseModel):
61
+ """Response model for transcription."""
62
+ text: str
63
+ engine_used: str
64
+ confidence: Optional[float] = None
65
+ word_timestamps: Optional[list] = None
66
+
67
+
68
+ class TTSRequest(BaseModel):
69
+ """Request model for TTS."""
70
+ text: str = Field(..., min_length=1, max_length=5000)
71
+ voice: TTSVoice = Field(default=TTSVoice.NEUTRAL)
72
+ speed: float = Field(default=1.0, ge=0.5, le=2.0)
73
+ engine: Optional[TTSEngine] = None
74
+
75
+
76
+ class PronunciationRequest(BaseModel):
77
+ """Request model for pronunciation analysis."""
78
+ target_text: str = Field(..., min_length=1, max_length=500)
79
+
80
+
81
+ class AIFeedbackResponse(BaseModel):
82
+ """AI-generated feedback from GPT-4o."""
83
+ feedback: str
84
+ encouragement: str
85
+ specific_tips: list[str]
86
+ recommended_exercises: list[str]
87
+ difficulty_adjustment: Optional[str] = None
88
+
89
+
90
+ class PronunciationResponse(BaseModel):
91
+ """Response model for pronunciation analysis."""
92
+ overall_score: float
93
+ clarity_score: float
94
+ pace_score: float
95
+ fluency_score: float
96
+ transcription: str
97
+ target_text: str
98
+ suggestions: list[str]
99
+ word_scores: list[dict]
100
+ ai_feedback: Optional[AIFeedbackResponse] = None # GPT-4o powered feedback
101
+
102
+
103
+ class ExerciseRequest(BaseModel):
104
+ """Request model for exercise generation."""
105
+ exercise_type: str = Field(..., description="Type: repeat_after_me, minimal_pairs, etc.")
106
+ difficulty: int = Field(default=1, ge=1, le=5)
107
+ focus_phonemes: Optional[list[str]] = None
108
+
109
+
110
+ class ExerciseResponse(BaseModel):
111
+ """Response model for exercise."""
112
+ exercise_id: str
113
+ exercise_type: str
114
+ target_text: str
115
+ instructions: str
116
+ audio_prompt_available: bool
117
+
118
+
119
+ # Endpoints
120
+
121
+ @router.post("/transcribe", response_model=TranscriptionResponse, tags=["therapy"])
122
+ async def transcribe_therapy_audio(
123
+ background_tasks: BackgroundTasks,
124
+ file: UploadFile = File(...),
125
+ engine: Optional[ASREngine] = Query(None, description="ASR engine"),
126
+ user: str = Depends(verify_token),
127
+ ):
128
+ """
129
+ Transcribe audio using therapy-optimized ASR.
130
+
131
+ Supports multiple engines:
132
+ - whisper_api: OpenAI Whisper API (best accuracy)
133
+ - whisper_local: Local Whisper (privacy-focused)
134
+ - speechbrain: SpeechBrain (optimized for atypical speech)
135
+ - auto: Automatic selection based on user profile
136
+ """
137
+ logging.info(f"Therapy transcription request from user: {user}")
138
+
139
+ # Validate file
140
+ if file.content_type not in ALLOWED_AUDIO_TYPES:
141
+ raise HTTPException(status_code=400, detail="Invalid audio file type")
142
+
143
+ contents = await file.read()
144
+ if len(contents) > FILE_SIZE_LIMIT:
145
+ raise HTTPException(status_code=400, detail="File size exceeds 25 MB limit")
146
+
147
+ try:
148
+ result = transcribe_for_therapy(
149
+ audio_data=contents,
150
+ filename=file.filename or "audio.wav",
151
+ content_type=file.content_type,
152
+ engine=engine
153
+ )
154
+
155
+ return TranscriptionResponse(
156
+ text=result.text,
157
+ engine_used=result.engine_used.value,
158
+ confidence=result.confidence,
159
+ word_timestamps=result.word_timestamps
160
+ )
161
+
162
+ except Exception as e:
163
+ logging.error(f"Transcription failed: {e}")
164
+ raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}")
165
+
166
+
167
+ @router.post("/tts", tags=["therapy"])
168
+ async def text_to_speech(
169
+ request: TTSRequest,
170
+ user: str = Depends(verify_token),
171
+ ):
172
+ """
173
+ Convert text to speech.
174
+
175
+ Returns audio stream (WAV format).
176
+ """
177
+ logging.info(f"TTS request from user: {user}")
178
+
179
+ try:
180
+ result = synthesize_speech(
181
+ text=request.text,
182
+ voice=request.voice,
183
+ speed=request.speed
184
+ )
185
+
186
+ return StreamingResponse(
187
+ io.BytesIO(result.audio_bytes),
188
+ media_type=f"audio/{result.format}",
189
+ headers={
190
+ "Content-Disposition": f"attachment; filename=speech.{result.format}",
191
+ "X-Engine-Used": result.engine_used.value
192
+ }
193
+ )
194
+
195
+ except Exception as e:
196
+ logging.error(f"TTS failed: {e}")
197
+ raise HTTPException(status_code=500, detail=f"TTS failed: {str(e)}")
198
+
199
+
200
+ @router.post("/tts/prompt", tags=["therapy"])
201
+ async def generate_therapy_prompt(
202
+ exercise_type: str = Query(..., description="Type: repeat_after_me, pronunciation, slower"),
203
+ target_text: str = Query(..., description="Text to practice"),
204
+ user: str = Depends(verify_token),
205
+ ):
206
+ """
207
+ Generate audio prompt for therapy exercise.
208
+
209
+ Pre-built prompts like "Please repeat after me: [text]"
210
+ """
211
+ logging.info(f"Therapy prompt request: {exercise_type}")
212
+
213
+ try:
214
+ tts = get_therapy_tts()
215
+ result = tts.generate_therapy_prompt(exercise_type, target_text)
216
+
217
+ return StreamingResponse(
218
+ io.BytesIO(result.audio_bytes),
219
+ media_type=f"audio/{result.format}",
220
+ headers={
221
+ "Content-Disposition": f"attachment; filename=prompt.{result.format}"
222
+ }
223
+ )
224
+
225
+ except Exception as e:
226
+ logging.error(f"Prompt generation failed: {e}")
227
+ raise HTTPException(status_code=500, detail=f"Prompt generation failed: {str(e)}")
228
+
229
+
230
+ @router.post("/analyze", response_model=PronunciationResponse, tags=["therapy"])
231
+ async def analyze_pronunciation_endpoint(
232
+ background_tasks: BackgroundTasks,
233
+ file: UploadFile = File(...),
234
+ target_text: str = Query(..., description="Expected text/phrase"),
235
+ include_ai_feedback: bool = Query(True, description="Include GPT-4o AI feedback"),
236
+ user: str = Depends(verify_token),
237
+ ):
238
+ """
239
+ Analyze pronunciation against target text.
240
+
241
+ Returns scores for:
242
+ - Overall pronunciation
243
+ - Clarity
244
+ - Pace
245
+ - Fluency
246
+ Plus per-word feedback, improvement suggestions, and AI-powered personalized feedback.
247
+
248
+ AI feedback uses GPT-4o via GitHub Models API for free testing.
249
+ """
250
+ logging.info(f"Pronunciation analysis for user: {user}")
251
+
252
+ # Validate file
253
+ if file.content_type not in ALLOWED_AUDIO_TYPES:
254
+ raise HTTPException(status_code=400, detail="Invalid audio file type")
255
+
256
+ contents = await file.read()
257
+ if len(contents) > FILE_SIZE_LIMIT:
258
+ raise HTTPException(status_code=400, detail="File size exceeds 25 MB limit")
259
+
260
+ try:
261
+ # Now async with AI feedback integration
262
+ feedback = await analyze_pronunciation(
263
+ audio_bytes=contents,
264
+ target_text=target_text,
265
+ include_ai_feedback=include_ai_feedback
266
+ )
267
+
268
+ # Build AI feedback response if available
269
+ ai_feedback_response = None
270
+ if feedback.ai_feedback:
271
+ ai_feedback_response = AIFeedbackResponse(
272
+ feedback=feedback.ai_feedback.feedback,
273
+ encouragement=feedback.ai_feedback.encouragement,
274
+ specific_tips=feedback.ai_feedback.specific_tips,
275
+ recommended_exercises=feedback.ai_feedback.recommended_exercises,
276
+ difficulty_adjustment=feedback.ai_feedback.difficulty_adjustment
277
+ )
278
+
279
+ return PronunciationResponse(
280
+ overall_score=feedback.overall_score,
281
+ clarity_score=feedback.clarity_score,
282
+ pace_score=feedback.pace_score,
283
+ fluency_score=feedback.fluency_score,
284
+ transcription=feedback.transcription,
285
+ target_text=feedback.target_text,
286
+ suggestions=feedback.suggestions,
287
+ word_scores=[
288
+ {
289
+ "word": ws.word,
290
+ "score": ws.score,
291
+ "errors": [
292
+ {
293
+ "type": e.error_type.value,
294
+ "expected": e.expected,
295
+ "actual": e.actual,
296
+ "suggestion": e.suggestion
297
+ }
298
+ for e in ws.errors
299
+ ]
300
+ }
301
+ for ws in feedback.word_scores
302
+ ],
303
+ ai_feedback=ai_feedback_response
304
+ )
305
+
306
+ except Exception as e:
307
+ logging.error(f"Pronunciation analysis failed: {e}")
308
+ raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}")
309
+
310
+
311
+ # ============================================================================
312
+ # Demo/Test Endpoints (no auth required)
313
+ # ============================================================================
314
+
315
+ @router.get("/demo/exercises", tags=["therapy-demo"])
316
+ async def demo_list_exercises():
317
+ """[DEMO] List exercises without auth - returns actual practice exercises."""
318
+ return {
319
+ "exercises": [
320
+ {
321
+ "id": "ex-001",
322
+ "title": "Simple Greetings",
323
+ "category": "repeat_after_me",
324
+ "difficulty": "easy",
325
+ "target_text": "Hello, how are you today?",
326
+ "instructions": "Listen carefully, then repeat the greeting clearly and naturally."
327
+ },
328
+ {
329
+ "id": "ex-002",
330
+ "title": "S Sound Practice",
331
+ "category": "minimal_pairs",
332
+ "difficulty": "medium",
333
+ "target_text": "She sells seashells by the seashore",
334
+ "instructions": "Focus on the 'S' and 'SH' sounds. Speak slowly at first."
335
+ },
336
+ {
337
+ "id": "ex-003",
338
+ "title": "R Sound Challenge",
339
+ "category": "tongue_twisters",
340
+ "difficulty": "hard",
341
+ "target_text": "Red lorry, yellow lorry",
342
+ "instructions": "Practice the 'R' and 'L' sounds. Start slow, then speed up."
343
+ },
344
+ {
345
+ "id": "ex-004",
346
+ "title": "Daily Introduction",
347
+ "category": "repeat_after_me",
348
+ "difficulty": "easy",
349
+ "target_text": "My name is... and I am learning to speak clearly.",
350
+ "instructions": "Replace '...' with your name. Speak with confidence!"
351
+ },
352
+ {
353
+ "id": "ex-005",
354
+ "title": "TH Sound Practice",
355
+ "category": "minimal_pairs",
356
+ "difficulty": "medium",
357
+ "target_text": "Think through these three things thoroughly",
358
+ "instructions": "Place your tongue between your teeth for the 'TH' sound."
359
+ },
360
+ {
361
+ "id": "ex-006",
362
+ "title": "Peter Piper",
363
+ "category": "tongue_twisters",
364
+ "difficulty": "hard",
365
+ "target_text": "Peter Piper picked a peck of pickled peppers",
366
+ "instructions": "Focus on the 'P' sounds. Keep your lips together firmly."
367
+ },
368
+ {
369
+ "id": "ex-007",
370
+ "title": "Counting Practice",
371
+ "category": "repeat_after_me",
372
+ "difficulty": "easy",
373
+ "target_text": "One, two, three, four, five",
374
+ "instructions": "Count clearly and pause briefly between each number."
375
+ },
376
+ {
377
+ "id": "ex-008",
378
+ "title": "W vs V Sounds",
379
+ "category": "minimal_pairs",
380
+ "difficulty": "medium",
381
+ "target_text": "Very well, we will wait",
382
+ "instructions": "Notice the difference: 'V' uses teeth on lip, 'W' uses rounded lips."
383
+ }
384
+ ]
385
+ }
386
+
387
+
388
+ @router.post("/demo/feedback", tags=["therapy-demo"])
389
+ async def demo_ai_feedback(
390
+ target_text: str = Query(..., description="Text to practice"),
391
+ transcription: str = Query(..., description="What user said"),
392
+ score: float = Query(75.0, description="Overall score 0-100"),
393
+ ):
394
+ """[DEMO] Get AI feedback without auth - for testing GPT-4o integration."""
395
+ generator = get_ai_feedback_generator()
396
+
397
+ feedback = await generator.generate_feedback(
398
+ target_text=target_text,
399
+ transcription=transcription,
400
+ overall_score=score,
401
+ clarity_score=score - 5,
402
+ pace_score=score + 5,
403
+ fluency_score=score,
404
+ errors=[{
405
+ "word": target_text.split()[0] if target_text else "word",
406
+ "expected": target_text.split()[0] if target_text else "word",
407
+ "actual": transcription.split()[0] if transcription else "word",
408
+ "error_type": "substitution"
409
+ }] if target_text != transcription else [],
410
+ user_context=None
411
+ )
412
+
413
+ return {
414
+ "target_text": target_text,
415
+ "transcription": transcription,
416
+ "scores": {
417
+ "overall": score,
418
+ "clarity": score - 5,
419
+ "pace": score + 5,
420
+ "fluency": score
421
+ },
422
+ "ai_feedback": {
423
+ "feedback": feedback.feedback,
424
+ "encouragement": feedback.encouragement,
425
+ "specific_tips": feedback.specific_tips,
426
+ "recommended_exercises": feedback.recommended_exercises,
427
+ "difficulty_adjustment": feedback.difficulty_adjustment
428
+ }
429
+ }
430
+
431
+
432
+ @router.get("/demo/session-summary", tags=["therapy-demo"])
433
+ async def demo_session_summary():
434
+ """[DEMO] Get AI session summary without auth."""
435
+ generator = get_ai_feedback_generator()
436
+
437
+ summary = await generator.generate_session_summary(
438
+ session_stats={
439
+ "duration_minutes": 12,
440
+ "exercise_count": 6,
441
+ "average_score": 78,
442
+ "best_score": 92,
443
+ "exercise_types": ["repeat_after_me", "minimal_pairs"]
444
+ },
445
+ attempts=[]
446
+ )
447
+
448
+ return {"summary": summary}
449
+
450
+
451
+ @router.get("/demo/weekly-insights", tags=["therapy-demo"])
452
+ async def demo_weekly_insights():
453
+ """[DEMO] Get AI weekly insights without auth."""
454
+ generator = get_ai_feedback_generator()
455
+
456
+ insights = await generator.generate_weekly_insights(
457
+ weekly_data={
458
+ "sessions_this_week": 5,
459
+ "practice_minutes": 40,
460
+ "avg_score": 76,
461
+ "score_change": 6.5,
462
+ "strengths": ["Consistent daily practice", "Good pace control"],
463
+ "weaknesses": ["S sounds", "Word endings"]
464
+ }
465
+ )
466
+
467
+ return insights
468
+
469
+
470
+ # ============================================================================
471
+ # Authenticated Endpoints
472
+ # ============================================================================
473
+
474
+ @router.get("/exercises", tags=["therapy"])
475
+ async def list_exercise_types(
476
+ user: str = Depends(verify_token),
477
+ ):
478
+ """List available therapy exercise types."""
479
+ return {
480
+ "exercises": [
481
+ {
482
+ "type": "repeat_after_me",
483
+ "name": "Repeat After Me",
484
+ "description": "Listen and repeat the target phrase"
485
+ },
486
+ {
487
+ "type": "minimal_pairs",
488
+ "name": "Minimal Pairs",
489
+ "description": "Practice similar-sounding words (e.g., ship/chip)"
490
+ },
491
+ {
492
+ "type": "tongue_twisters",
493
+ "name": "Tongue Twisters",
494
+ "description": "Practice challenging phrases for fluency"
495
+ },
496
+ {
497
+ "type": "word_chains",
498
+ "name": "Word Chains",
499
+ "description": "Build vocabulary with connected words"
500
+ },
501
+ {
502
+ "type": "sentence_building",
503
+ "name": "Sentence Building",
504
+ "description": "Progress from words to full sentences"
505
+ }
506
+ ]
507
+ }
508
+
509
+
510
+ @router.post("/exercise/evaluate", response_model=PronunciationResponse, tags=["therapy"])
511
+ async def evaluate_exercise(
512
+ background_tasks: BackgroundTasks,
513
+ file: UploadFile = File(...),
514
+ exercise_type: str = Query(...),
515
+ target_text: str = Query(...),
516
+ include_ai_feedback: bool = Query(True),
517
+ user: str = Depends(verify_token),
518
+ ):
519
+ """
520
+ Evaluate user's exercise attempt.
521
+
522
+ Same as /analyze but tracks exercise context.
523
+ Includes GPT-4o AI feedback for personalized improvement tips.
524
+ """
525
+ # Reuse pronunciation analysis
526
+ return await analyze_pronunciation_endpoint(
527
+ background_tasks=background_tasks,
528
+ file=file,
529
+ target_text=target_text,
530
+ include_ai_feedback=include_ai_feedback,
531
+ user=user
532
+ )
533
+
534
+
535
+ # Session Summary Models
536
+ class SessionSummaryRequest(BaseModel):
537
+ """Request for session summary."""
538
+ duration_minutes: int = Field(..., ge=1)
539
+ exercise_count: int = Field(..., ge=1)
540
+ average_score: float = Field(..., ge=0, le=100)
541
+ best_score: float = Field(..., ge=0, le=100)
542
+ exercise_types: list[str] = Field(default_factory=list)
543
+
544
+
545
+ class SessionSummaryResponse(BaseModel):
546
+ """AI-generated session summary."""
547
+ summary: str
548
+
549
+
550
+ class WeeklyInsightsRequest(BaseModel):
551
+ """Request for weekly insights."""
552
+ sessions_this_week: int = Field(..., ge=0)
553
+ practice_minutes: int = Field(..., ge=0)
554
+ avg_score: float = Field(..., ge=0, le=100)
555
+ score_change: float = Field(default=0) # Percentage change from last week
556
+ strengths: list[str] = Field(default_factory=list)
557
+ weaknesses: list[str] = Field(default_factory=list)
558
+
559
+
560
+ class WeeklyInsightsResponse(BaseModel):
561
+ """AI-generated weekly insights."""
562
+ summary: str
563
+ celebration: str
564
+ focus_area: str
565
+ goal: str
566
+
567
+
568
+ @router.post("/session/summary", response_model=SessionSummaryResponse, tags=["therapy"])
569
+ async def get_session_summary(
570
+ request: SessionSummaryRequest,
571
+ user: str = Depends(verify_token),
572
+ ):
573
+ """
574
+ Generate AI-powered session summary.
575
+
576
+ Uses GPT-4o via GitHub Models to create personalized,
577
+ encouraging session summaries.
578
+ """
579
+ logging.info(f"Session summary request from user: {user}")
580
+
581
+ try:
582
+ ai_generator = get_ai_feedback_generator()
583
+ summary = await ai_generator.generate_session_summary(
584
+ session_stats={
585
+ "duration_minutes": request.duration_minutes,
586
+ "exercise_count": request.exercise_count,
587
+ "average_score": request.average_score,
588
+ "best_score": request.best_score,
589
+ "exercise_types": request.exercise_types
590
+ },
591
+ attempts=[] # Can be extended to include attempt history
592
+ )
593
+
594
+ return SessionSummaryResponse(summary=summary)
595
+
596
+ except Exception as e:
597
+ logging.error(f"Session summary generation failed: {e}")
598
+ raise HTTPException(status_code=500, detail=f"Summary generation failed: {str(e)}")
599
+
600
+
601
+ @router.post("/insights/weekly", response_model=WeeklyInsightsResponse, tags=["therapy"])
602
+ async def get_weekly_insights(
603
+ request: WeeklyInsightsRequest,
604
+ user: str = Depends(verify_token),
605
+ ):
606
+ """
607
+ Generate AI-powered weekly progress insights.
608
+
609
+ Uses GPT-4o to analyze weekly practice data and provide:
610
+ - Progress summary
611
+ - Celebration of achievements
612
+ - Focus area for next week
613
+ - Realistic goal setting
614
+ """
615
+ logging.info(f"Weekly insights request from user: {user}")
616
+
617
+ try:
618
+ ai_generator = get_ai_feedback_generator()
619
+ insights = await ai_generator.generate_weekly_insights(
620
+ weekly_data={
621
+ "sessions_this_week": request.sessions_this_week,
622
+ "practice_minutes": request.practice_minutes,
623
+ "avg_score": request.avg_score,
624
+ "score_change": request.score_change,
625
+ "strengths": request.strengths,
626
+ "weaknesses": request.weaknesses
627
+ }
628
+ )
629
+
630
+ return WeeklyInsightsResponse(
631
+ summary=insights.get("summary", ""),
632
+ celebration=insights.get("celebration", ""),
633
+ focus_area=insights.get("focus_area", ""),
634
+ goal=insights.get("goal", "")
635
+ )
636
+
637
+ except Exception as e:
638
+ logging.error(f"Weekly insights generation failed: {e}")
639
+ raise HTTPException(status_code=500, detail=f"Insights generation failed: {str(e)}")
api/endpoints/v1/routers/upload.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import logging
3
+
4
+ from fastapi import APIRouter, BackgroundTasks, Depends, File, HTTPException, UploadFile
5
+ from fastapi.responses import JSONResponse
6
+
7
+ from api.config import settings
8
+ from api.endpoints.v1.auth.verify import verify_token
9
+ from api.endpoints.v1.processing.audio import transcribe_with_whisper
10
+ from api.endpoints.v1.processing.soap import generate_soap
11
+
12
+ router = APIRouter()
13
+
14
+ if settings.ENVIRONMENT == "development":
15
+ logging.basicConfig(level=logging.DEBUG)
16
+ else:
17
+ logging.basicConfig(level=logging.WARNING)
18
+
19
+ # OpenAI Whisper supports the following file types
20
+ ALLOWED_FILE_TYPES = [
21
+ "audio/mpeg",
22
+ "audio/mp4",
23
+ "audio/m4a",
24
+ "audio/x-m4a",
25
+ "audio/wav",
26
+ "audio/x-wav",
27
+ "audio/webm",
28
+ "video/mp4",
29
+ "video/mpeg",
30
+ ]
31
+ # OpenAI Whisper file uploads are currently limited to 25 MB
32
+ FILE_SIZE_LIMIT = 25 * 1024 * 1024 # 25 MB in bytes
33
+
34
+
35
+ @router.post("", status_code=200)
36
+ async def transcribe_audio(
37
+ background_tasks: BackgroundTasks,
38
+ file: UploadFile = File(...),
39
+ user: str = Depends(verify_token),
40
+ ):
41
+ """Endpoint to upload and process audio files with OpenAI Whisper."""
42
+
43
+ logging.info(f"Transcribing audio file: {file.filename}")
44
+ logging.debug(f"Audio file mime type: {file.content_type}")
45
+
46
+ # Check file type
47
+ if file.content_type not in ALLOWED_FILE_TYPES:
48
+ raise HTTPException(status_code=400, detail="Invalid file type")
49
+
50
+ # Check file size
51
+ contents = await file.read()
52
+ logging.debug(f"size: {len(contents)} bytes")
53
+ if len(contents) > FILE_SIZE_LIMIT:
54
+ raise HTTPException(status_code=400, detail="File size exceeds 25 MB limit")
55
+
56
+ try:
57
+ # Use BytesIO to handle the file in-memory
58
+ file_like = io.BytesIO(contents)
59
+
60
+ # Reset the buffer's position to the start (not needed with await?)
61
+ file_like.seek(0)
62
+
63
+ # Add a background task to close the buffer after processing
64
+ background_tasks.add_task(file_like.close)
65
+
66
+ # Pass the file-like object to the transcription function
67
+ transcription = transcribe_with_whisper(
68
+ file.filename, file_like, file.content_type
69
+ )
70
+
71
+ # Generate a SOAP note from the transcription
72
+ soap_note = generate_soap(transcription.text)
73
+
74
+ return JSONResponse(
75
+ content={
76
+ "message": f"File processed successfully by user {user}",
77
+ "content": soap_note,
78
+ "transcription": transcription.text,
79
+ }
80
+ )
81
+ except Exception as e:
82
+ raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
api/endpoints/v1/therapy/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Therapy module - Speech therapy exercises and analysis."""
api/endpoints/v1/utils.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from datetime import datetime, timezone
2
+
3
+
4
+ # Function to parse RFC 3339 datetime and convert to epoch time
5
+ def parse_rfc3339(time_str):
6
+ dt = datetime.fromisoformat(time_str.replace("Z", "+00:00"))
7
+ return dt.replace(tzinfo=timezone.utc).timestamp()
api/index.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Vercel serverless entry point
2
+ from api.main import app
3
+
4
+ # Handler for Vercel
5
+ handler = app
api/main.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+
3
+ from fastapi import APIRouter, FastAPI
4
+ from fastapi.middleware.cors import CORSMiddleware
5
+ from fastapi.routing import APIRoute
6
+
7
+ from api.config import settings
8
+ from api.endpoints.v1.api import api_router
9
+
10
+ info_router = APIRouter()
11
+
12
+
13
+ @info_router.get("/", status_code=200, include_in_schema=False)
14
+ async def info():
15
+ return [{"Status": "API Running"}]
16
+
17
+
18
+ @info_router.get("/health", status_code=200, tags=["health"])
19
+ async def health_check():
20
+ """Health check endpoint for Railway deployment"""
21
+ return {"status": "healthy", "service": "ubumuntu-api"}
22
+
23
+
24
+ def custom_generate_unique_id(route: APIRoute):
25
+ """Generates a custom ID when using the TypeScript Generator Client
26
+
27
+ Args:
28
+ route (APIRoute): The route to be customised
29
+
30
+ Returns:
31
+ str: tag-route_name, e.g. items-CreateItem
32
+ """
33
+ return f"{route.tags[0]}-{route.name}"
34
+
35
+
36
+ def get_application():
37
+ _app = FastAPI(
38
+ title=settings.PROJECT_NAME,
39
+ description=settings.PROJECT_DESCRIPTION,
40
+ generate_unique_id_function=custom_generate_unique_id,
41
+ root_path=settings.ROOT,
42
+ root_path_in_servers=True,
43
+ openapi_url=settings.openapi_url,
44
+ )
45
+
46
+ # Allow all origins for demo - in production, restrict to specific domains
47
+ logger = logging.getLogger("uvicorn")
48
+ logger.info("Enabling CORS for all origins (demo mode)")
49
+ _app.add_middleware(
50
+ CORSMiddleware,
51
+ allow_origins=["*"],
52
+ allow_credentials=True,
53
+ allow_methods=["*"],
54
+ allow_headers=["*"],
55
+ )
56
+
57
+ _app.include_router(api_router, prefix=settings.API_VERSION)
58
+ _app.include_router(info_router, tags=[""])
59
+
60
+ return _app
61
+
62
+
63
+ app = get_application()
app.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # HuggingFace Spaces entry point
2
+ from api.main import app
3
+
4
+ # Re-export for uvicorn
5
+ __all__ = ["app"]
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ pydantic
4
+ pydantic-settings
5
+ python-dotenv
6
+ python-jose[cryptography]
7
+ python-multipart
8
+ openai
9
+ httpx
10
+ requests