Spaces:
Sleeping
Sleeping
Macbook
commited on
Commit
·
4ac15b2
1
Parent(s):
f9b1f70
Deploy with Groq Whisper ASR
Browse files- .gitignore +6 -0
- Dockerfile +7 -2
- api/config.py +2 -0
- api/data/exercises.py +6 -573
- api/endpoints/v1/processing/therapy_asr.py +152 -11
- api/endpoints/v1/routers/therapy.py +48 -0
- requirements.txt +9 -0
.gitignore
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.env
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.pyc
|
| 4 |
+
models/
|
| 5 |
+
.DS_Store
|
| 6 |
+
model_checkpoints/
|
Dockerfile
CHANGED
|
@@ -1,4 +1,9 @@
|
|
| 1 |
-
FROM python:3.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
RUN useradd -m -u 1000 user
|
| 4 |
USER user
|
|
@@ -10,4 +15,4 @@ COPY --chown=user ./requirements.txt requirements.txt
|
|
| 10 |
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 11 |
|
| 12 |
COPY --chown=user . /app
|
| 13 |
-
CMD ["uvicorn", "
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
# Install system dependencies
|
| 4 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 5 |
+
ffmpeg \
|
| 6 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 7 |
|
| 8 |
RUN useradd -m -u 1000 user
|
| 9 |
USER user
|
|
|
|
| 15 |
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 16 |
|
| 17 |
COPY --chown=user . /app
|
| 18 |
+
CMD ["uvicorn", "api.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
api/config.py
CHANGED
|
@@ -19,6 +19,8 @@ class Settings(BaseSettings):
|
|
| 19 |
CLERK_PEM_PUBLIC_KEY: str = os.getenv("CLERK_PEM_PUBLIC_KEY", "")
|
| 20 |
OPENAI_API_KEY: str = os.getenv("OPENAI_API_KEY", "")
|
| 21 |
GITHUB_TOKEN: str = os.getenv("GITHUB_TOKEN", "") # For GitHub Models GPT-4o
|
|
|
|
|
|
|
| 22 |
|
| 23 |
model_config = SettingsConfigDict(env_file=".env", extra="ignore")
|
| 24 |
openapi_url: str = "/openapi.json"
|
|
|
|
| 19 |
CLERK_PEM_PUBLIC_KEY: str = os.getenv("CLERK_PEM_PUBLIC_KEY", "")
|
| 20 |
OPENAI_API_KEY: str = os.getenv("OPENAI_API_KEY", "")
|
| 21 |
GITHUB_TOKEN: str = os.getenv("GITHUB_TOKEN", "") # For GitHub Models GPT-4o
|
| 22 |
+
HUGGINGFACE_TOKEN: str = os.getenv("HUGGINGFACE_TOKEN", os.getenv("HF_TOKEN", "")) # For HuggingFace API
|
| 23 |
+
GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "") # Free fast Whisper large-v3
|
| 24 |
|
| 25 |
model_config = SettingsConfigDict(env_file=".env", extra="ignore")
|
| 26 |
openapi_url: str = "/openapi.json"
|
api/data/exercises.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
"""
|
| 2 |
-
|
| 3 |
-
Categories: Fundamentals, Speech Practice
|
| 4 |
Languages: English (en), French (fr)
|
| 5 |
"""
|
| 6 |
|
|
@@ -11,8 +11,6 @@ from enum import Enum
|
|
| 11 |
|
| 12 |
class ExerciseType(str, Enum):
|
| 13 |
# Fundamentals
|
| 14 |
-
BREATHING = "breathing"
|
| 15 |
-
ARTICULATION = "articulation"
|
| 16 |
PHONEME = "phoneme"
|
| 17 |
|
| 18 |
# Speech Practice
|
|
@@ -20,16 +18,6 @@ class ExerciseType(str, Enum):
|
|
| 20 |
SENTENCE_READING = "sentence_reading"
|
| 21 |
TONGUE_TWISTER = "tongue_twister"
|
| 22 |
|
| 23 |
-
# Visual Learning
|
| 24 |
-
COLOR = "color"
|
| 25 |
-
OBJECT = "object"
|
| 26 |
-
ANIMAL = "animal"
|
| 27 |
-
ACTION = "action"
|
| 28 |
-
|
| 29 |
-
# Sound Imitation
|
| 30 |
-
ANIMAL_SOUND = "animal_sound"
|
| 31 |
-
ENVIRONMENTAL_SOUND = "environmental_sound"
|
| 32 |
-
|
| 33 |
|
| 34 |
class Difficulty(str, Enum):
|
| 35 |
EASY = "easy"
|
|
@@ -39,7 +27,7 @@ class Difficulty(str, Enum):
|
|
| 39 |
|
| 40 |
class Category(BaseModel):
|
| 41 |
id: str
|
| 42 |
-
name: Dict[str, str]
|
| 43 |
description: Dict[str, str]
|
| 44 |
icon: str
|
| 45 |
subcategories: List[str]
|
|
@@ -54,8 +42,6 @@ class Exercise(BaseModel):
|
|
| 54 |
title: Dict[str, str]
|
| 55 |
target_text: Dict[str, str]
|
| 56 |
instructions: Dict[str, str]
|
| 57 |
-
image_url: Optional[str] = None
|
| 58 |
-
audio_url: Optional[str] = None
|
| 59 |
phoneme_focus: Optional[List[str]] = None
|
| 60 |
|
| 61 |
|
|
@@ -68,11 +54,11 @@ CATEGORIES: List[Dict] = [
|
|
| 68 |
"id": "fundamentals",
|
| 69 |
"name": {"en": "Fundamentals", "fr": "Fondamentaux"},
|
| 70 |
"description": {
|
| 71 |
-
"en": "
|
| 72 |
-
"fr": "
|
| 73 |
},
|
| 74 |
"icon": "🎯",
|
| 75 |
-
"subcategories": ["
|
| 76 |
},
|
| 77 |
{
|
| 78 |
"id": "speech_practice",
|
|
@@ -83,38 +69,10 @@ CATEGORIES: List[Dict] = [
|
|
| 83 |
},
|
| 84 |
"icon": "📖",
|
| 85 |
"subcategories": ["word_repetition", "sentence_reading", "tongue_twister"]
|
| 86 |
-
},
|
| 87 |
-
{
|
| 88 |
-
"id": "visual_learning",
|
| 89 |
-
"name": {"en": "Visual Learning", "fr": "Apprentissage Visuel"},
|
| 90 |
-
"description": {
|
| 91 |
-
"en": "Learn by identifying colors, objects, animals, and actions",
|
| 92 |
-
"fr": "Apprenez en identifiant les couleurs, objets, animaux et actions"
|
| 93 |
-
},
|
| 94 |
-
"icon": "🖼️",
|
| 95 |
-
"subcategories": ["color", "object", "animal", "action"]
|
| 96 |
-
},
|
| 97 |
-
{
|
| 98 |
-
"id": "sound_imitation",
|
| 99 |
-
"name": {"en": "Sound Imitation", "fr": "Imitation de Sons"},
|
| 100 |
-
"description": {
|
| 101 |
-
"en": "Imitate animal and environmental sounds",
|
| 102 |
-
"fr": "Imitez les sons d'animaux et de l'environnement"
|
| 103 |
-
},
|
| 104 |
-
"icon": "🔊",
|
| 105 |
-
"subcategories": ["animal_sound", "environmental_sound"]
|
| 106 |
}
|
| 107 |
]
|
| 108 |
|
| 109 |
SUBCATEGORIES: Dict[str, Dict] = {
|
| 110 |
-
"breathing": {
|
| 111 |
-
"name": {"en": "Breathing Exercises", "fr": "Exercices de Respiration"},
|
| 112 |
-
"description": {"en": "Control your breath for better speech", "fr": "Contrôlez votre respiration pour mieux parler"}
|
| 113 |
-
},
|
| 114 |
-
"articulation": {
|
| 115 |
-
"name": {"en": "Articulation Drills", "fr": "Exercices d'Articulation"},
|
| 116 |
-
"description": {"en": "Improve mouth and tongue movements", "fr": "Améliorez les mouvements de la bouche et de la langue"}
|
| 117 |
-
},
|
| 118 |
"phoneme": {
|
| 119 |
"name": {"en": "Phoneme Practice", "fr": "Pratique des Phonèmes"},
|
| 120 |
"description": {"en": "Master specific sounds like R, S, TH", "fr": "Maîtrisez des sons spécifiques comme R, S, CH"}
|
|
@@ -130,30 +88,6 @@ SUBCATEGORIES: Dict[str, Dict] = {
|
|
| 130 |
"tongue_twister": {
|
| 131 |
"name": {"en": "Tongue Twisters", "fr": "Virelangues"},
|
| 132 |
"description": {"en": "Challenge yourself with tricky phrases", "fr": "Défiez-vous avec des phrases difficiles"}
|
| 133 |
-
},
|
| 134 |
-
"color": {
|
| 135 |
-
"name": {"en": "Colors", "fr": "Couleurs"},
|
| 136 |
-
"description": {"en": "Identify and say color names", "fr": "Identifiez et dites les noms des couleurs"}
|
| 137 |
-
},
|
| 138 |
-
"object": {
|
| 139 |
-
"name": {"en": "Objects", "fr": "Objets"},
|
| 140 |
-
"description": {"en": "Name everyday objects", "fr": "Nommez des objets du quotidien"}
|
| 141 |
-
},
|
| 142 |
-
"animal": {
|
| 143 |
-
"name": {"en": "Animals", "fr": "Animaux"},
|
| 144 |
-
"description": {"en": "Identify animals by sight", "fr": "Identifiez les animaux à vue"}
|
| 145 |
-
},
|
| 146 |
-
"action": {
|
| 147 |
-
"name": {"en": "Actions", "fr": "Actions"},
|
| 148 |
-
"description": {"en": "Describe what people are doing", "fr": "Décrivez ce que font les gens"}
|
| 149 |
-
},
|
| 150 |
-
"animal_sound": {
|
| 151 |
-
"name": {"en": "Animal Sounds", "fr": "Sons d'Animaux"},
|
| 152 |
-
"description": {"en": "Imitate animal sounds", "fr": "Imitez les sons des animaux"}
|
| 153 |
-
},
|
| 154 |
-
"environmental_sound": {
|
| 155 |
-
"name": {"en": "Environmental Sounds", "fr": "Sons de l'Environnement"},
|
| 156 |
-
"description": {"en": "Imitate sounds around us", "fr": "Imitez les sons autour de nous"}
|
| 157 |
}
|
| 158 |
}
|
| 159 |
|
|
@@ -163,74 +97,6 @@ SUBCATEGORIES: Dict[str, Dict] = {
|
|
| 163 |
# =============================================================================
|
| 164 |
|
| 165 |
EXERCISES: List[Dict] = [
|
| 166 |
-
# =========================================================================
|
| 167 |
-
# FUNDAMENTALS - Breathing
|
| 168 |
-
# =========================================================================
|
| 169 |
-
{
|
| 170 |
-
"id": "breath-001",
|
| 171 |
-
"type": "breathing",
|
| 172 |
-
"category": "fundamentals",
|
| 173 |
-
"subcategory": "breathing",
|
| 174 |
-
"difficulty": "easy",
|
| 175 |
-
"title": {"en": "Deep Belly Breathing", "fr": "Respiration Abdominale"},
|
| 176 |
-
"target_text": {"en": "Breathe in slowly through your nose, hold, breathe out through your mouth", "fr": "Inspirez lentement par le nez, retenez, expirez par la bouche"},
|
| 177 |
-
"instructions": {"en": "Place your hand on your belly. Breathe in for 4 seconds, hold for 2, breathe out for 4. Repeat 3 times.", "fr": "Placez votre main sur le ventre. Inspirez 4 secondes, retenez 2, expirez 4. Répétez 3 fois."},
|
| 178 |
-
},
|
| 179 |
-
{
|
| 180 |
-
"id": "breath-002",
|
| 181 |
-
"type": "breathing",
|
| 182 |
-
"category": "fundamentals",
|
| 183 |
-
"subcategory": "breathing",
|
| 184 |
-
"difficulty": "easy",
|
| 185 |
-
"title": {"en": "Candle Blow", "fr": "Souffler la Bougie"},
|
| 186 |
-
"target_text": {"en": "Take a deep breath and blow out slowly like blowing a candle", "fr": "Prenez une grande inspiration et soufflez doucement comme une bougie"},
|
| 187 |
-
"instructions": {"en": "Imagine a candle in front of you. Take a deep breath and blow slowly to make the flame flicker but not go out.", "fr": "Imaginez une bougie devant vous. Inspirez et soufflez doucement pour faire vaciller la flamme sans l'éteindre."},
|
| 188 |
-
},
|
| 189 |
-
{
|
| 190 |
-
"id": "breath-003",
|
| 191 |
-
"type": "breathing",
|
| 192 |
-
"category": "fundamentals",
|
| 193 |
-
"subcategory": "breathing",
|
| 194 |
-
"difficulty": "medium",
|
| 195 |
-
"title": {"en": "Sustained Breath", "fr": "Souffle Prolongé"},
|
| 196 |
-
"target_text": {"en": "Aaaaaaaaahhhhhh", "fr": "Aaaaaaaaahhhhhh"},
|
| 197 |
-
"instructions": {"en": "Take a deep breath and say 'Ahhh' for as long as you can. Try to reach 10 seconds!", "fr": "Inspirez profondément et dites 'Ahhh' aussi longtemps que possible. Essayez d'atteindre 10 secondes!"},
|
| 198 |
-
},
|
| 199 |
-
|
| 200 |
-
# =========================================================================
|
| 201 |
-
# FUNDAMENTALS - Articulation
|
| 202 |
-
# =========================================================================
|
| 203 |
-
{
|
| 204 |
-
"id": "artic-001",
|
| 205 |
-
"type": "articulation",
|
| 206 |
-
"category": "fundamentals",
|
| 207 |
-
"subcategory": "articulation",
|
| 208 |
-
"difficulty": "easy",
|
| 209 |
-
"title": {"en": "Lip Warm-up", "fr": "Échauffement des Lèvres"},
|
| 210 |
-
"target_text": {"en": "Ma ma ma, Pa pa pa, Ba ba ba", "fr": "Ma ma ma, Pa pa pa, Ba ba ba"},
|
| 211 |
-
"instructions": {"en": "Say each syllable clearly, focusing on your lip movements. Repeat 3 times.", "fr": "Prononcez chaque syllabe clairement en vous concentrant sur vos lèvres. Répétez 3 fois."},
|
| 212 |
-
},
|
| 213 |
-
{
|
| 214 |
-
"id": "artic-002",
|
| 215 |
-
"type": "articulation",
|
| 216 |
-
"category": "fundamentals",
|
| 217 |
-
"subcategory": "articulation",
|
| 218 |
-
"difficulty": "easy",
|
| 219 |
-
"title": {"en": "Tongue Stretch", "fr": "Étirement de la Langue"},
|
| 220 |
-
"target_text": {"en": "La la la, Ta ta ta, Da da da", "fr": "La la la, Ta ta ta, Da da da"},
|
| 221 |
-
"instructions": {"en": "Touch the roof of your mouth with your tongue for each syllable. Feel the movement!", "fr": "Touchez le palais avec votre langue pour chaque syllabe. Sentez le mouvement!"},
|
| 222 |
-
},
|
| 223 |
-
{
|
| 224 |
-
"id": "artic-003",
|
| 225 |
-
"type": "articulation",
|
| 226 |
-
"category": "fundamentals",
|
| 227 |
-
"subcategory": "articulation",
|
| 228 |
-
"difficulty": "medium",
|
| 229 |
-
"title": {"en": "Jaw Exercise", "fr": "Exercice de Mâchoire"},
|
| 230 |
-
"target_text": {"en": "Wa wa wa, Ya ya ya, Oo ee oo ee", "fr": "Oua oua oua, Ya ya ya, Ou i ou i"},
|
| 231 |
-
"instructions": {"en": "Open your mouth wide for each sound. Feel your jaw moving up and down.", "fr": "Ouvrez grand la bouche pour chaque son. Sentez votre mâchoire bouger."},
|
| 232 |
-
},
|
| 233 |
-
|
| 234 |
# =========================================================================
|
| 235 |
# FUNDAMENTALS - Phoneme Practice
|
| 236 |
# =========================================================================
|
|
@@ -415,437 +281,6 @@ EXERCISES: List[Dict] = [
|
|
| 415 |
"instructions": {"en": "Focus on the 'N' and 'Y' sounds.", "fr": "Concentrez-vous sur les sons 'P' et 'N'."},
|
| 416 |
"phoneme_focus": ["N", "Y"]
|
| 417 |
},
|
| 418 |
-
|
| 419 |
-
# =========================================================================
|
| 420 |
-
# VISUAL LEARNING - Colors
|
| 421 |
-
# =========================================================================
|
| 422 |
-
{
|
| 423 |
-
"id": "color-001",
|
| 424 |
-
"type": "color",
|
| 425 |
-
"category": "visual_learning",
|
| 426 |
-
"subcategory": "color",
|
| 427 |
-
"difficulty": "easy",
|
| 428 |
-
"title": {"en": "Red", "fr": "Rouge"},
|
| 429 |
-
"target_text": {"en": "Red", "fr": "Rouge"},
|
| 430 |
-
"instructions": {"en": "Look at the color and say its name.", "fr": "Regardez la couleur et dites son nom."},
|
| 431 |
-
"image_url": "https://images.unsplash.com/photo-1562157873-818bc0726f68?w=400&h=400&fit=crop"
|
| 432 |
-
},
|
| 433 |
-
{
|
| 434 |
-
"id": "color-002",
|
| 435 |
-
"type": "color",
|
| 436 |
-
"category": "visual_learning",
|
| 437 |
-
"subcategory": "color",
|
| 438 |
-
"difficulty": "easy",
|
| 439 |
-
"title": {"en": "Blue", "fr": "Bleu"},
|
| 440 |
-
"target_text": {"en": "Blue", "fr": "Bleu"},
|
| 441 |
-
"instructions": {"en": "Look at the color and say its name.", "fr": "Regardez la couleur et dites son nom."},
|
| 442 |
-
"image_url": "https://images.unsplash.com/photo-1579546929518-9e396f3cc809?w=400&h=400&fit=crop"
|
| 443 |
-
},
|
| 444 |
-
{
|
| 445 |
-
"id": "color-003",
|
| 446 |
-
"type": "color",
|
| 447 |
-
"category": "visual_learning",
|
| 448 |
-
"subcategory": "color",
|
| 449 |
-
"difficulty": "easy",
|
| 450 |
-
"title": {"en": "Yellow", "fr": "Jaune"},
|
| 451 |
-
"target_text": {"en": "Yellow", "fr": "Jaune"},
|
| 452 |
-
"instructions": {"en": "Look at the color and say its name.", "fr": "Regardez la couleur et dites son nom."},
|
| 453 |
-
"image_url": "https://images.unsplash.com/photo-1495001258031-d1b407bc1776?w=400&h=400&fit=crop"
|
| 454 |
-
},
|
| 455 |
-
{
|
| 456 |
-
"id": "color-004",
|
| 457 |
-
"type": "color",
|
| 458 |
-
"category": "visual_learning",
|
| 459 |
-
"subcategory": "color",
|
| 460 |
-
"difficulty": "easy",
|
| 461 |
-
"title": {"en": "Green", "fr": "Vert"},
|
| 462 |
-
"target_text": {"en": "Green", "fr": "Vert"},
|
| 463 |
-
"instructions": {"en": "Look at the color and say its name.", "fr": "Regardez la couleur et dites son nom."},
|
| 464 |
-
"image_url": "https://images.unsplash.com/photo-1464820453369-31d2c0b651af?w=400&h=400&fit=crop"
|
| 465 |
-
},
|
| 466 |
-
{
|
| 467 |
-
"id": "color-005",
|
| 468 |
-
"type": "color",
|
| 469 |
-
"category": "visual_learning",
|
| 470 |
-
"subcategory": "color",
|
| 471 |
-
"difficulty": "easy",
|
| 472 |
-
"title": {"en": "Orange", "fr": "Orange"},
|
| 473 |
-
"target_text": {"en": "Orange", "fr": "Orange"},
|
| 474 |
-
"instructions": {"en": "Look at the color and say its name.", "fr": "Regardez la couleur et dites son nom."},
|
| 475 |
-
"image_url": "https://images.unsplash.com/photo-1557800636-894a64c1696f?w=400&h=400&fit=crop"
|
| 476 |
-
},
|
| 477 |
-
{
|
| 478 |
-
"id": "color-006",
|
| 479 |
-
"type": "color",
|
| 480 |
-
"category": "visual_learning",
|
| 481 |
-
"subcategory": "color",
|
| 482 |
-
"difficulty": "easy",
|
| 483 |
-
"title": {"en": "Purple", "fr": "Violet"},
|
| 484 |
-
"target_text": {"en": "Purple", "fr": "Violet"},
|
| 485 |
-
"instructions": {"en": "Look at the color and say its name.", "fr": "Regardez la couleur et dites son nom."},
|
| 486 |
-
"image_url": "https://images.unsplash.com/photo-1528459801416-a9e53bbf4e17?w=400&h=400&fit=crop"
|
| 487 |
-
},
|
| 488 |
-
|
| 489 |
-
# =========================================================================
|
| 490 |
-
# VISUAL LEARNING - Objects
|
| 491 |
-
# =========================================================================
|
| 492 |
-
{
|
| 493 |
-
"id": "obj-001",
|
| 494 |
-
"type": "object",
|
| 495 |
-
"category": "visual_learning",
|
| 496 |
-
"subcategory": "object",
|
| 497 |
-
"difficulty": "easy",
|
| 498 |
-
"title": {"en": "Apple", "fr": "Pomme"},
|
| 499 |
-
"target_text": {"en": "Apple", "fr": "Pomme"},
|
| 500 |
-
"instructions": {"en": "Look at the picture and say what you see.", "fr": "Regardez l'image et dites ce que vous voyez."},
|
| 501 |
-
"image_url": "https://images.unsplash.com/photo-1584306670957-acf935f5033c?w=400&h=400&fit=crop"
|
| 502 |
-
},
|
| 503 |
-
{
|
| 504 |
-
"id": "obj-002",
|
| 505 |
-
"type": "object",
|
| 506 |
-
"category": "visual_learning",
|
| 507 |
-
"subcategory": "object",
|
| 508 |
-
"difficulty": "easy",
|
| 509 |
-
"title": {"en": "Book", "fr": "Livre"},
|
| 510 |
-
"target_text": {"en": "Book", "fr": "Livre"},
|
| 511 |
-
"instructions": {"en": "Look at the picture and say what you see.", "fr": "Regardez l'image et dites ce que vous voyez."},
|
| 512 |
-
"image_url": "https://images.unsplash.com/photo-1544947950-fa07a98d237f?w=400&h=400&fit=crop"
|
| 513 |
-
},
|
| 514 |
-
{
|
| 515 |
-
"id": "obj-003",
|
| 516 |
-
"type": "object",
|
| 517 |
-
"category": "visual_learning",
|
| 518 |
-
"subcategory": "object",
|
| 519 |
-
"difficulty": "easy",
|
| 520 |
-
"title": {"en": "Car", "fr": "Voiture"},
|
| 521 |
-
"target_text": {"en": "Car", "fr": "Voiture"},
|
| 522 |
-
"instructions": {"en": "Look at the picture and say what you see.", "fr": "Regardez l'image et dites ce que vous voyez."},
|
| 523 |
-
"image_url": "https://images.unsplash.com/photo-1502877338535-766e1452684a?w=400&h=400&fit=crop"
|
| 524 |
-
},
|
| 525 |
-
{
|
| 526 |
-
"id": "obj-004",
|
| 527 |
-
"type": "object",
|
| 528 |
-
"category": "visual_learning",
|
| 529 |
-
"subcategory": "object",
|
| 530 |
-
"difficulty": "easy",
|
| 531 |
-
"title": {"en": "Chair", "fr": "Chaise"},
|
| 532 |
-
"target_text": {"en": "Chair", "fr": "Chaise"},
|
| 533 |
-
"instructions": {"en": "Look at the picture and say what you see.", "fr": "Regardez l'image et dites ce que vous voyez."},
|
| 534 |
-
"image_url": "https://images.unsplash.com/photo-1503602642458-232111445657?w=400&h=400&fit=crop"
|
| 535 |
-
},
|
| 536 |
-
{
|
| 537 |
-
"id": "obj-005",
|
| 538 |
-
"type": "object",
|
| 539 |
-
"category": "visual_learning",
|
| 540 |
-
"subcategory": "object",
|
| 541 |
-
"difficulty": "easy",
|
| 542 |
-
"title": {"en": "House", "fr": "Maison"},
|
| 543 |
-
"target_text": {"en": "House", "fr": "Maison"},
|
| 544 |
-
"instructions": {"en": "Look at the picture and say what you see.", "fr": "Regardez l'image et dites ce que vous voyez."},
|
| 545 |
-
"image_url": "https://images.unsplash.com/photo-1518780664697-55e3ad937233?w=400&h=400&fit=crop"
|
| 546 |
-
},
|
| 547 |
-
{
|
| 548 |
-
"id": "obj-006",
|
| 549 |
-
"type": "object",
|
| 550 |
-
"category": "visual_learning",
|
| 551 |
-
"subcategory": "object",
|
| 552 |
-
"difficulty": "medium",
|
| 553 |
-
"title": {"en": "Telephone", "fr": "Téléphone"},
|
| 554 |
-
"target_text": {"en": "Telephone", "fr": "Téléphone"},
|
| 555 |
-
"instructions": {"en": "Look at the picture and say what you see.", "fr": "Regardez l'image et dites ce que vous voyez."},
|
| 556 |
-
"image_url": "https://images.unsplash.com/photo-1511707171634-5f897ff02aa9?w=400&h=400&fit=crop"
|
| 557 |
-
},
|
| 558 |
-
{
|
| 559 |
-
"id": "obj-007",
|
| 560 |
-
"type": "object",
|
| 561 |
-
"category": "visual_learning",
|
| 562 |
-
"subcategory": "object",
|
| 563 |
-
"difficulty": "medium",
|
| 564 |
-
"title": {"en": "Umbrella", "fr": "Parapluie"},
|
| 565 |
-
"target_text": {"en": "Umbrella", "fr": "Parapluie"},
|
| 566 |
-
"instructions": {"en": "Look at the picture and say what you see.", "fr": "Regardez l'image et dites ce que vous voyez."},
|
| 567 |
-
"image_url": "https://images.unsplash.com/photo-1534309466160-70b22cc6252c?w=400&h=400&fit=crop"
|
| 568 |
-
},
|
| 569 |
-
|
| 570 |
-
# =========================================================================
|
| 571 |
-
# VISUAL LEARNING - Animals
|
| 572 |
-
# =========================================================================
|
| 573 |
-
{
|
| 574 |
-
"id": "animal-001",
|
| 575 |
-
"type": "animal",
|
| 576 |
-
"category": "visual_learning",
|
| 577 |
-
"subcategory": "animal",
|
| 578 |
-
"difficulty": "easy",
|
| 579 |
-
"title": {"en": "Dog", "fr": "Chien"},
|
| 580 |
-
"target_text": {"en": "Dog", "fr": "Chien"},
|
| 581 |
-
"instructions": {"en": "Look at the animal and say its name.", "fr": "Regardez l'animal et dites son nom."},
|
| 582 |
-
"image_url": "https://images.unsplash.com/photo-1587300003388-59208cc962cb?w=400&h=400&fit=crop"
|
| 583 |
-
},
|
| 584 |
-
{
|
| 585 |
-
"id": "animal-002",
|
| 586 |
-
"type": "animal",
|
| 587 |
-
"category": "visual_learning",
|
| 588 |
-
"subcategory": "animal",
|
| 589 |
-
"difficulty": "easy",
|
| 590 |
-
"title": {"en": "Cat", "fr": "Chat"},
|
| 591 |
-
"target_text": {"en": "Cat", "fr": "Chat"},
|
| 592 |
-
"instructions": {"en": "Look at the animal and say its name.", "fr": "Regardez l'animal et dites son nom."},
|
| 593 |
-
"image_url": "https://images.unsplash.com/photo-1514888286974-6c03e2ca1dba?w=400&h=400&fit=crop"
|
| 594 |
-
},
|
| 595 |
-
{
|
| 596 |
-
"id": "animal-003",
|
| 597 |
-
"type": "animal",
|
| 598 |
-
"category": "visual_learning",
|
| 599 |
-
"subcategory": "animal",
|
| 600 |
-
"difficulty": "easy",
|
| 601 |
-
"title": {"en": "Bird", "fr": "Oiseau"},
|
| 602 |
-
"target_text": {"en": "Bird", "fr": "Oiseau"},
|
| 603 |
-
"instructions": {"en": "Look at the animal and say its name.", "fr": "Regardez l'animal et dites son nom."},
|
| 604 |
-
"image_url": "https://images.unsplash.com/photo-1522926193341-e9ffd686c60f?w=400&h=400&fit=crop"
|
| 605 |
-
},
|
| 606 |
-
{
|
| 607 |
-
"id": "animal-004",
|
| 608 |
-
"type": "animal",
|
| 609 |
-
"category": "visual_learning",
|
| 610 |
-
"subcategory": "animal",
|
| 611 |
-
"difficulty": "easy",
|
| 612 |
-
"title": {"en": "Fish", "fr": "Poisson"},
|
| 613 |
-
"target_text": {"en": "Fish", "fr": "Poisson"},
|
| 614 |
-
"instructions": {"en": "Look at the animal and say its name.", "fr": "Regardez l'animal et dites son nom."},
|
| 615 |
-
"image_url": "https://images.unsplash.com/photo-1524704654690-b56c05c78a00?w=400&h=400&fit=crop"
|
| 616 |
-
},
|
| 617 |
-
{
|
| 618 |
-
"id": "animal-005",
|
| 619 |
-
"type": "animal",
|
| 620 |
-
"category": "visual_learning",
|
| 621 |
-
"subcategory": "animal",
|
| 622 |
-
"difficulty": "medium",
|
| 623 |
-
"title": {"en": "Elephant", "fr": "Éléphant"},
|
| 624 |
-
"target_text": {"en": "Elephant", "fr": "Éléphant"},
|
| 625 |
-
"instructions": {"en": "Look at the animal and say its name.", "fr": "Regardez l'animal et dites son nom."},
|
| 626 |
-
"image_url": "https://images.unsplash.com/photo-1557050543-4d5f4e07ef46?w=400&h=400&fit=crop"
|
| 627 |
-
},
|
| 628 |
-
{
|
| 629 |
-
"id": "animal-006",
|
| 630 |
-
"type": "animal",
|
| 631 |
-
"category": "visual_learning",
|
| 632 |
-
"subcategory": "animal",
|
| 633 |
-
"difficulty": "medium",
|
| 634 |
-
"title": {"en": "Lion", "fr": "Lion"},
|
| 635 |
-
"target_text": {"en": "Lion", "fr": "Lion"},
|
| 636 |
-
"instructions": {"en": "Look at the animal and say its name.", "fr": "Regardez l'animal et dites son nom."},
|
| 637 |
-
"image_url": "https://images.unsplash.com/photo-1546182990-dffeafbe841d?w=400&h=400&fit=crop"
|
| 638 |
-
},
|
| 639 |
-
{
|
| 640 |
-
"id": "animal-007",
|
| 641 |
-
"type": "animal",
|
| 642 |
-
"category": "visual_learning",
|
| 643 |
-
"subcategory": "animal",
|
| 644 |
-
"difficulty": "medium",
|
| 645 |
-
"title": {"en": "Butterfly", "fr": "Papillon"},
|
| 646 |
-
"target_text": {"en": "Butterfly", "fr": "Papillon"},
|
| 647 |
-
"instructions": {"en": "Look at the animal and say its name.", "fr": "Regardez l'animal et dites son nom."},
|
| 648 |
-
"image_url": "https://images.unsplash.com/photo-1452570053594-1b985d6ea890?w=400&h=400&fit=crop"
|
| 649 |
-
},
|
| 650 |
-
{
|
| 651 |
-
"id": "animal-008",
|
| 652 |
-
"type": "animal",
|
| 653 |
-
"category": "visual_learning",
|
| 654 |
-
"subcategory": "animal",
|
| 655 |
-
"difficulty": "hard",
|
| 656 |
-
"title": {"en": "Hippopotamus", "fr": "Hippopotame"},
|
| 657 |
-
"target_text": {"en": "Hippopotamus", "fr": "Hippopotame"},
|
| 658 |
-
"instructions": {"en": "Look at the animal and say its name. This is a long word!", "fr": "Regardez l'animal et dites son nom. C'est un mot long!"},
|
| 659 |
-
"image_url": "https://images.unsplash.com/photo-1517840933437-c41356892b35?w=400&h=400&fit=crop"
|
| 660 |
-
},
|
| 661 |
-
|
| 662 |
-
# =========================================================================
|
| 663 |
-
# VISUAL LEARNING - Actions
|
| 664 |
-
# =========================================================================
|
| 665 |
-
{
|
| 666 |
-
"id": "action-001",
|
| 667 |
-
"type": "action",
|
| 668 |
-
"category": "visual_learning",
|
| 669 |
-
"subcategory": "action",
|
| 670 |
-
"difficulty": "easy",
|
| 671 |
-
"title": {"en": "Running", "fr": "Courir"},
|
| 672 |
-
"target_text": {"en": "Running", "fr": "Courir"},
|
| 673 |
-
"instructions": {"en": "Look at the action and say what the person is doing.", "fr": "Regardez l'action et dites ce que fait la personne."},
|
| 674 |
-
"image_url": "https://images.unsplash.com/photo-1552674605-db6ffd4facb5?w=400&h=400&fit=crop"
|
| 675 |
-
},
|
| 676 |
-
{
|
| 677 |
-
"id": "action-002",
|
| 678 |
-
"type": "action",
|
| 679 |
-
"category": "visual_learning",
|
| 680 |
-
"subcategory": "action",
|
| 681 |
-
"difficulty": "easy",
|
| 682 |
-
"title": {"en": "Eating", "fr": "Manger"},
|
| 683 |
-
"target_text": {"en": "Eating", "fr": "Manger"},
|
| 684 |
-
"instructions": {"en": "Look at the action and say what the person is doing.", "fr": "Regardez l'action et dites ce que fait la personne."},
|
| 685 |
-
"image_url": "https://images.unsplash.com/photo-1504674900247-0877df9cc836?w=400&h=400&fit=crop"
|
| 686 |
-
},
|
| 687 |
-
{
|
| 688 |
-
"id": "action-003",
|
| 689 |
-
"type": "action",
|
| 690 |
-
"category": "visual_learning",
|
| 691 |
-
"subcategory": "action",
|
| 692 |
-
"difficulty": "easy",
|
| 693 |
-
"title": {"en": "Sleeping", "fr": "Dormir"},
|
| 694 |
-
"target_text": {"en": "Sleeping", "fr": "Dormir"},
|
| 695 |
-
"instructions": {"en": "Look at the action and say what the person is doing.", "fr": "Regardez l'action et dites ce que fait la personne."},
|
| 696 |
-
"image_url": "https://images.unsplash.com/photo-1541781774459-bb2af2f05b55?w=400&h=400&fit=crop"
|
| 697 |
-
},
|
| 698 |
-
{
|
| 699 |
-
"id": "action-004",
|
| 700 |
-
"type": "action",
|
| 701 |
-
"category": "visual_learning",
|
| 702 |
-
"subcategory": "action",
|
| 703 |
-
"difficulty": "easy",
|
| 704 |
-
"title": {"en": "Reading", "fr": "Lire"},
|
| 705 |
-
"target_text": {"en": "Reading", "fr": "Lire"},
|
| 706 |
-
"instructions": {"en": "Look at the action and say what the person is doing.", "fr": "Regardez l'action et dites ce que fait la personne."},
|
| 707 |
-
"image_url": "https://images.unsplash.com/photo-1506880018603-83d5b814b5a6?w=400&h=400&fit=crop"
|
| 708 |
-
},
|
| 709 |
-
{
|
| 710 |
-
"id": "action-005",
|
| 711 |
-
"type": "action",
|
| 712 |
-
"category": "visual_learning",
|
| 713 |
-
"subcategory": "action",
|
| 714 |
-
"difficulty": "medium",
|
| 715 |
-
"title": {"en": "Swimming", "fr": "Nager"},
|
| 716 |
-
"target_text": {"en": "Swimming", "fr": "Nager"},
|
| 717 |
-
"instructions": {"en": "Look at the action and say what the person is doing.", "fr": "Regardez l'action et dites ce que fait la personne."},
|
| 718 |
-
"image_url": "https://images.unsplash.com/photo-1530549387789-4c1017266635?w=400&h=400&fit=crop"
|
| 719 |
-
},
|
| 720 |
-
|
| 721 |
-
# =========================================================================
|
| 722 |
-
# SOUND IMITATION - Animal Sounds
|
| 723 |
-
# =========================================================================
|
| 724 |
-
{
|
| 725 |
-
"id": "asound-001",
|
| 726 |
-
"type": "animal_sound",
|
| 727 |
-
"category": "sound_imitation",
|
| 728 |
-
"subcategory": "animal_sound",
|
| 729 |
-
"difficulty": "easy",
|
| 730 |
-
"title": {"en": "Dog Sound", "fr": "Son du Chien"},
|
| 731 |
-
"target_text": {"en": "Woof woof", "fr": "Ouaf ouaf"},
|
| 732 |
-
"instructions": {"en": "Imitate the sound a dog makes!", "fr": "Imitez le son que fait un chien!"},
|
| 733 |
-
"image_url": "https://images.unsplash.com/photo-1587300003388-59208cc962cb?w=400&h=400&fit=crop"
|
| 734 |
-
},
|
| 735 |
-
{
|
| 736 |
-
"id": "asound-002",
|
| 737 |
-
"type": "animal_sound",
|
| 738 |
-
"category": "sound_imitation",
|
| 739 |
-
"subcategory": "animal_sound",
|
| 740 |
-
"difficulty": "easy",
|
| 741 |
-
"title": {"en": "Cat Sound", "fr": "Son du Chat"},
|
| 742 |
-
"target_text": {"en": "Meow meow", "fr": "Miaou miaou"},
|
| 743 |
-
"instructions": {"en": "Imitate the sound a cat makes!", "fr": "Imitez le son que fait un chat!"},
|
| 744 |
-
"image_url": "https://images.unsplash.com/photo-1514888286974-6c03e2ca1dba?w=400&h=400&fit=crop"
|
| 745 |
-
},
|
| 746 |
-
{
|
| 747 |
-
"id": "asound-003",
|
| 748 |
-
"type": "animal_sound",
|
| 749 |
-
"category": "sound_imitation",
|
| 750 |
-
"subcategory": "animal_sound",
|
| 751 |
-
"difficulty": "easy",
|
| 752 |
-
"title": {"en": "Cow Sound", "fr": "Son de la Vache"},
|
| 753 |
-
"target_text": {"en": "Moo moo", "fr": "Meuh meuh"},
|
| 754 |
-
"instructions": {"en": "Imitate the sound a cow makes!", "fr": "Imitez le son que fait une vache!"},
|
| 755 |
-
"image_url": "https://images.unsplash.com/photo-1570042225831-d98fa7577f1e?w=400&h=400&fit=crop"
|
| 756 |
-
},
|
| 757 |
-
{
|
| 758 |
-
"id": "asound-004",
|
| 759 |
-
"type": "animal_sound",
|
| 760 |
-
"category": "sound_imitation",
|
| 761 |
-
"subcategory": "animal_sound",
|
| 762 |
-
"difficulty": "easy",
|
| 763 |
-
"title": {"en": "Duck Sound", "fr": "Son du Canard"},
|
| 764 |
-
"target_text": {"en": "Quack quack", "fr": "Coin coin"},
|
| 765 |
-
"instructions": {"en": "Imitate the sound a duck makes!", "fr": "Imitez le son que fait un canard!"},
|
| 766 |
-
"image_url": "https://images.unsplash.com/photo-1459682687441-7761439a709d?w=400&h=400&fit=crop"
|
| 767 |
-
},
|
| 768 |
-
{
|
| 769 |
-
"id": "asound-005",
|
| 770 |
-
"type": "animal_sound",
|
| 771 |
-
"category": "sound_imitation",
|
| 772 |
-
"subcategory": "animal_sound",
|
| 773 |
-
"difficulty": "medium",
|
| 774 |
-
"title": {"en": "Lion Sound", "fr": "Son du Lion"},
|
| 775 |
-
"target_text": {"en": "Roar!", "fr": "Grrrr!"},
|
| 776 |
-
"instructions": {"en": "Imitate the sound a lion makes! Be loud!", "fr": "Imitez le son que fait un lion! Soyez fort!"},
|
| 777 |
-
"image_url": "https://images.unsplash.com/photo-1546182990-dffeafbe841d?w=400&h=400&fit=crop"
|
| 778 |
-
},
|
| 779 |
-
{
|
| 780 |
-
"id": "asound-006",
|
| 781 |
-
"type": "animal_sound",
|
| 782 |
-
"category": "sound_imitation",
|
| 783 |
-
"subcategory": "animal_sound",
|
| 784 |
-
"difficulty": "medium",
|
| 785 |
-
"title": {"en": "Snake Sound", "fr": "Son du Serpent"},
|
| 786 |
-
"target_text": {"en": "Sssssss", "fr": "Sssssss"},
|
| 787 |
-
"instructions": {"en": "Imitate the hissing sound a snake makes!", "fr": "Imitez le sifflement que fait un serpent!"},
|
| 788 |
-
"image_url": "https://images.unsplash.com/photo-1531386151447-fd76ad50012f?w=400&h=400&fit=crop"
|
| 789 |
-
},
|
| 790 |
-
|
| 791 |
-
# =========================================================================
|
| 792 |
-
# SOUND IMITATION - Environmental Sounds
|
| 793 |
-
# =========================================================================
|
| 794 |
-
{
|
| 795 |
-
"id": "esound-001",
|
| 796 |
-
"type": "environmental_sound",
|
| 797 |
-
"category": "sound_imitation",
|
| 798 |
-
"subcategory": "environmental_sound",
|
| 799 |
-
"difficulty": "easy",
|
| 800 |
-
"title": {"en": "Car Horn", "fr": "Klaxon"},
|
| 801 |
-
"target_text": {"en": "Beep beep", "fr": "Pouet pouet"},
|
| 802 |
-
"instructions": {"en": "Imitate the sound of a car horn!", "fr": "Imitez le son d'un klaxon!"},
|
| 803 |
-
"image_url": "https://images.unsplash.com/photo-1502877338535-766e1452684a?w=400&h=400&fit=crop"
|
| 804 |
-
},
|
| 805 |
-
{
|
| 806 |
-
"id": "esound-002",
|
| 807 |
-
"type": "environmental_sound",
|
| 808 |
-
"category": "sound_imitation",
|
| 809 |
-
"subcategory": "environmental_sound",
|
| 810 |
-
"difficulty": "easy",
|
| 811 |
-
"title": {"en": "Train Sound", "fr": "Son du Train"},
|
| 812 |
-
"target_text": {"en": "Choo choo", "fr": "Tchou tchou"},
|
| 813 |
-
"instructions": {"en": "Imitate the sound of a train!", "fr": "Imitez le son d'un train!"},
|
| 814 |
-
"image_url": "https://images.unsplash.com/photo-1474487548417-781cb71495f3?w=400&h=400&fit=crop"
|
| 815 |
-
},
|
| 816 |
-
{
|
| 817 |
-
"id": "esound-003",
|
| 818 |
-
"type": "environmental_sound",
|
| 819 |
-
"category": "sound_imitation",
|
| 820 |
-
"subcategory": "environmental_sound",
|
| 821 |
-
"difficulty": "easy",
|
| 822 |
-
"title": {"en": "Clock Sound", "fr": "Son de l'Horloge"},
|
| 823 |
-
"target_text": {"en": "Tick tock tick tock", "fr": "Tic tac tic tac"},
|
| 824 |
-
"instructions": {"en": "Imitate the sound of a clock!", "fr": "Imitez le son d'une horloge!"},
|
| 825 |
-
"image_url": "https://images.unsplash.com/photo-1563861826100-9cb868fdbe1c?w=400&h=400&fit=crop"
|
| 826 |
-
},
|
| 827 |
-
{
|
| 828 |
-
"id": "esound-004",
|
| 829 |
-
"type": "environmental_sound",
|
| 830 |
-
"category": "sound_imitation",
|
| 831 |
-
"subcategory": "environmental_sound",
|
| 832 |
-
"difficulty": "medium",
|
| 833 |
-
"title": {"en": "Rain Sound", "fr": "Son de la Pluie"},
|
| 834 |
-
"target_text": {"en": "Pitter patter pitter patter", "fr": "Plic ploc plic ploc"},
|
| 835 |
-
"instructions": {"en": "Imitate the sound of rain falling!", "fr": "Imitez le son de la pluie qui tombe!"},
|
| 836 |
-
"image_url": "https://images.unsplash.com/photo-1519692933481-e162a57d6721?w=400&h=400&fit=crop"
|
| 837 |
-
},
|
| 838 |
-
{
|
| 839 |
-
"id": "esound-005",
|
| 840 |
-
"type": "environmental_sound",
|
| 841 |
-
"category": "sound_imitation",
|
| 842 |
-
"subcategory": "environmental_sound",
|
| 843 |
-
"difficulty": "medium",
|
| 844 |
-
"title": {"en": "Wind Sound", "fr": "Son du Vent"},
|
| 845 |
-
"target_text": {"en": "Whoooosh", "fr": "Woooosh"},
|
| 846 |
-
"instructions": {"en": "Imitate the sound of strong wind!", "fr": "Imitez le son du vent fort!"},
|
| 847 |
-
"image_url": "https://images.unsplash.com/photo-1534088568595-a066f410bcda?w=400&h=400&fit=crop"
|
| 848 |
-
},
|
| 849 |
]
|
| 850 |
|
| 851 |
|
|
@@ -872,8 +307,6 @@ def get_all_exercises(language: str = "en") -> List[Dict]:
|
|
| 872 |
"title": ex["title"].get(language, ex["title"]["en"]),
|
| 873 |
"target_text": ex["target_text"].get(language, ex["target_text"]["en"]),
|
| 874 |
"instructions": ex["instructions"].get(language, ex["instructions"]["en"]),
|
| 875 |
-
"image_url": ex.get("image_url"),
|
| 876 |
-
"audio_url": ex.get("audio_url"),
|
| 877 |
"phoneme_focus": ex.get("phoneme_focus", [])
|
| 878 |
})
|
| 879 |
return exercises
|
|
|
|
| 1 |
"""
|
| 2 |
+
Speech Therapy Exercises Database
|
| 3 |
+
Categories: Fundamentals, Speech Practice
|
| 4 |
Languages: English (en), French (fr)
|
| 5 |
"""
|
| 6 |
|
|
|
|
| 11 |
|
| 12 |
class ExerciseType(str, Enum):
|
| 13 |
# Fundamentals
|
|
|
|
|
|
|
| 14 |
PHONEME = "phoneme"
|
| 15 |
|
| 16 |
# Speech Practice
|
|
|
|
| 18 |
SENTENCE_READING = "sentence_reading"
|
| 19 |
TONGUE_TWISTER = "tongue_twister"
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
class Difficulty(str, Enum):
|
| 23 |
EASY = "easy"
|
|
|
|
| 27 |
|
| 28 |
class Category(BaseModel):
|
| 29 |
id: str
|
| 30 |
+
name: Dict[str, str]
|
| 31 |
description: Dict[str, str]
|
| 32 |
icon: str
|
| 33 |
subcategories: List[str]
|
|
|
|
| 42 |
title: Dict[str, str]
|
| 43 |
target_text: Dict[str, str]
|
| 44 |
instructions: Dict[str, str]
|
|
|
|
|
|
|
| 45 |
phoneme_focus: Optional[List[str]] = None
|
| 46 |
|
| 47 |
|
|
|
|
| 54 |
"id": "fundamentals",
|
| 55 |
"name": {"en": "Fundamentals", "fr": "Fondamentaux"},
|
| 56 |
"description": {
|
| 57 |
+
"en": "Master specific sounds and phonemes",
|
| 58 |
+
"fr": "Maîtrisez des sons et phonèmes spécifiques"
|
| 59 |
},
|
| 60 |
"icon": "🎯",
|
| 61 |
+
"subcategories": ["phoneme"]
|
| 62 |
},
|
| 63 |
{
|
| 64 |
"id": "speech_practice",
|
|
|
|
| 69 |
},
|
| 70 |
"icon": "📖",
|
| 71 |
"subcategories": ["word_repetition", "sentence_reading", "tongue_twister"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
}
|
| 73 |
]
|
| 74 |
|
| 75 |
SUBCATEGORIES: Dict[str, Dict] = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
"phoneme": {
|
| 77 |
"name": {"en": "Phoneme Practice", "fr": "Pratique des Phonèmes"},
|
| 78 |
"description": {"en": "Master specific sounds like R, S, TH", "fr": "Maîtrisez des sons spécifiques comme R, S, CH"}
|
|
|
|
| 88 |
"tongue_twister": {
|
| 89 |
"name": {"en": "Tongue Twisters", "fr": "Virelangues"},
|
| 90 |
"description": {"en": "Challenge yourself with tricky phrases", "fr": "Défiez-vous avec des phrases difficiles"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
}
|
| 92 |
}
|
| 93 |
|
|
|
|
| 97 |
# =============================================================================
|
| 98 |
|
| 99 |
EXERCISES: List[Dict] = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
# =========================================================================
|
| 101 |
# FUNDAMENTALS - Phoneme Practice
|
| 102 |
# =========================================================================
|
|
|
|
| 281 |
"instructions": {"en": "Focus on the 'N' and 'Y' sounds.", "fr": "Concentrez-vous sur les sons 'P' et 'N'."},
|
| 282 |
"phoneme_focus": ["N", "Y"]
|
| 283 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 284 |
]
|
| 285 |
|
| 286 |
|
|
|
|
| 307 |
"title": ex["title"].get(language, ex["title"]["en"]),
|
| 308 |
"target_text": ex["target_text"].get(language, ex["target_text"]["en"]),
|
| 309 |
"instructions": ex["instructions"].get(language, ex["instructions"]["en"]),
|
|
|
|
|
|
|
| 310 |
"phoneme_focus": ex.get("phoneme_focus", [])
|
| 311 |
})
|
| 312 |
return exercises
|
api/endpoints/v1/processing/therapy_asr.py
CHANGED
|
@@ -15,6 +15,41 @@ from dataclasses import dataclass
|
|
| 15 |
|
| 16 |
from api.config import settings
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
if settings.ENVIRONMENT == "development":
|
| 19 |
logging.basicConfig(level=logging.DEBUG)
|
| 20 |
else:
|
|
@@ -23,9 +58,10 @@ else:
|
|
| 23 |
|
| 24 |
class ASREngine(str, Enum):
|
| 25 |
"""Available ASR engines."""
|
| 26 |
-
|
|
|
|
| 27 |
SPEECHBRAIN = "speechbrain"
|
| 28 |
-
WHISPER_API = "whisper_api"
|
| 29 |
AUTO = "auto" # Automatically select based on user profile
|
| 30 |
|
| 31 |
|
|
@@ -67,7 +103,8 @@ class TherapyASR:
|
|
| 67 |
import torch
|
| 68 |
from transformers import WhisperProcessor, WhisperForConditionalGeneration
|
| 69 |
|
| 70 |
-
|
|
|
|
| 71 |
logging.info(f"Loading local Whisper model: {model_name}")
|
| 72 |
|
| 73 |
self._whisper_processor = WhisperProcessor.from_pretrained(model_name)
|
|
@@ -89,6 +126,9 @@ class TherapyASR:
|
|
| 89 |
"""Lazy load SpeechBrain model for atypical speech."""
|
| 90 |
if self._speechbrain_model is None:
|
| 91 |
try:
|
|
|
|
|
|
|
|
|
|
| 92 |
import speechbrain as sb
|
| 93 |
|
| 94 |
# Use pre-trained model, can be swapped for fine-tuned version
|
|
@@ -120,8 +160,10 @@ class TherapyASR:
|
|
| 120 |
if user_profile.get("privacy_mode") == "local":
|
| 121 |
return ASREngine.WHISPER_LOCAL
|
| 122 |
|
| 123 |
-
# Default to API
|
| 124 |
-
|
|
|
|
|
|
|
| 125 |
|
| 126 |
def transcribe(
|
| 127 |
self,
|
|
@@ -147,15 +189,15 @@ class TherapyASR:
|
|
| 147 |
selected_engine = engine or self._select_engine(user_profile)
|
| 148 |
logging.info(f"Transcribing with engine: {selected_engine.value}")
|
| 149 |
|
| 150 |
-
#
|
| 151 |
fallback_order = [selected_engine]
|
| 152 |
-
if selected_engine != ASREngine.WHISPER_API:
|
| 153 |
-
fallback_order.append(ASREngine.WHISPER_API)
|
| 154 |
|
| 155 |
last_error = None
|
| 156 |
for eng in fallback_order:
|
| 157 |
try:
|
| 158 |
-
if eng == ASREngine.
|
|
|
|
|
|
|
| 159 |
return self._transcribe_whisper_api(audio_data, filename, content_type)
|
| 160 |
elif eng == ASREngine.WHISPER_LOCAL:
|
| 161 |
return self._transcribe_whisper_local(audio_data)
|
|
@@ -202,6 +244,51 @@ class TherapyASR:
|
|
| 202 |
word_timestamps=word_timestamps
|
| 203 |
)
|
| 204 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
def _transcribe_whisper_local(self, audio_data: bytes) -> TranscriptionResult:
|
| 206 |
"""Transcribe using local Whisper model."""
|
| 207 |
logging.info("Transcribing with local Whisper")
|
|
@@ -212,8 +299,62 @@ class TherapyASR:
|
|
| 212 |
|
| 213 |
model = self._get_whisper_local()
|
| 214 |
|
| 215 |
-
#
|
| 216 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
|
| 218 |
# Process audio
|
| 219 |
input_features = self._whisper_processor(
|
|
|
|
| 15 |
|
| 16 |
from api.config import settings
|
| 17 |
|
| 18 |
+
|
| 19 |
+
def _patch_torchaudio_for_speechbrain():
|
| 20 |
+
"""
|
| 21 |
+
Patch torchaudio to fix compatibility with SpeechBrain 1.0.3 and torchaudio 2.2+.
|
| 22 |
+
|
| 23 |
+
The `list_audio_backends()` function was removed in torchaudio 2.2+,
|
| 24 |
+
but SpeechBrain 1.0.3 still calls it. This adds a compatibility shim.
|
| 25 |
+
"""
|
| 26 |
+
try:
|
| 27 |
+
import torchaudio
|
| 28 |
+
if not hasattr(torchaudio, 'list_audio_backends'):
|
| 29 |
+
# Add the missing function that returns available backends
|
| 30 |
+
def list_audio_backends():
|
| 31 |
+
"""Compatibility shim for removed torchaudio function."""
|
| 32 |
+
backends = []
|
| 33 |
+
try:
|
| 34 |
+
import soundfile
|
| 35 |
+
backends.append('soundfile')
|
| 36 |
+
except ImportError:
|
| 37 |
+
pass
|
| 38 |
+
try:
|
| 39 |
+
# Check if sox is available
|
| 40 |
+
import torchaudio.backend.sox_io_backend
|
| 41 |
+
backends.append('sox')
|
| 42 |
+
except (ImportError, OSError):
|
| 43 |
+
pass
|
| 44 |
+
# ffmpeg is usually available
|
| 45 |
+
backends.append('ffmpeg')
|
| 46 |
+
return backends
|
| 47 |
+
|
| 48 |
+
torchaudio.list_audio_backends = list_audio_backends
|
| 49 |
+
logging.debug("Patched torchaudio.list_audio_backends for SpeechBrain compatibility")
|
| 50 |
+
except ImportError:
|
| 51 |
+
pass # torchaudio not installed, will fail later with proper error
|
| 52 |
+
|
| 53 |
if settings.ENVIRONMENT == "development":
|
| 54 |
logging.basicConfig(level=logging.DEBUG)
|
| 55 |
else:
|
|
|
|
| 58 |
|
| 59 |
class ASREngine(str, Enum):
|
| 60 |
"""Available ASR engines."""
|
| 61 |
+
WHISPER_JAX = "whisper_jax" # Fast, free Groq API (recommended)
|
| 62 |
+
WHISPER_LOCAL = "whisper_local" # Local whisper-large-v3 (slow on CPU)
|
| 63 |
SPEECHBRAIN = "speechbrain"
|
| 64 |
+
WHISPER_API = "whisper_api" # OpenAI paid API
|
| 65 |
AUTO = "auto" # Automatically select based on user profile
|
| 66 |
|
| 67 |
|
|
|
|
| 103 |
import torch
|
| 104 |
from transformers import WhisperProcessor, WhisperForConditionalGeneration
|
| 105 |
|
| 106 |
+
# Use large-v3 for best accuracy (cached after first load)
|
| 107 |
+
model_name = "openai/whisper-large-v3"
|
| 108 |
logging.info(f"Loading local Whisper model: {model_name}")
|
| 109 |
|
| 110 |
self._whisper_processor = WhisperProcessor.from_pretrained(model_name)
|
|
|
|
| 126 |
"""Lazy load SpeechBrain model for atypical speech."""
|
| 127 |
if self._speechbrain_model is None:
|
| 128 |
try:
|
| 129 |
+
# Apply torchaudio compatibility patch before importing speechbrain
|
| 130 |
+
_patch_torchaudio_for_speechbrain()
|
| 131 |
+
|
| 132 |
import speechbrain as sb
|
| 133 |
|
| 134 |
# Use pre-trained model, can be swapped for fine-tuned version
|
|
|
|
| 160 |
if user_profile.get("privacy_mode") == "local":
|
| 161 |
return ASREngine.WHISPER_LOCAL
|
| 162 |
|
| 163 |
+
# Default to Groq (fast, free) if API key available, else local
|
| 164 |
+
if settings.GROQ_API_KEY:
|
| 165 |
+
return ASREngine.WHISPER_JAX
|
| 166 |
+
return ASREngine.WHISPER_LOCAL
|
| 167 |
|
| 168 |
def transcribe(
|
| 169 |
self,
|
|
|
|
| 189 |
selected_engine = engine or self._select_engine(user_profile)
|
| 190 |
logging.info(f"Transcribing with engine: {selected_engine.value}")
|
| 191 |
|
| 192 |
+
# Use selected engine only (no fallback to paid API)
|
| 193 |
fallback_order = [selected_engine]
|
|
|
|
|
|
|
| 194 |
|
| 195 |
last_error = None
|
| 196 |
for eng in fallback_order:
|
| 197 |
try:
|
| 198 |
+
if eng == ASREngine.WHISPER_JAX:
|
| 199 |
+
return self._transcribe_whisper_jax(audio_data)
|
| 200 |
+
elif eng == ASREngine.WHISPER_API:
|
| 201 |
return self._transcribe_whisper_api(audio_data, filename, content_type)
|
| 202 |
elif eng == ASREngine.WHISPER_LOCAL:
|
| 203 |
return self._transcribe_whisper_local(audio_data)
|
|
|
|
| 244 |
word_timestamps=word_timestamps
|
| 245 |
)
|
| 246 |
|
| 247 |
+
def _transcribe_whisper_jax(self, audio_data: bytes) -> TranscriptionResult:
|
| 248 |
+
"""Transcribe using Groq API (free, fast Whisper large-v3)."""
|
| 249 |
+
logging.info("Transcribing with Groq Whisper (free, fast)")
|
| 250 |
+
|
| 251 |
+
from groq import Groq
|
| 252 |
+
import tempfile
|
| 253 |
+
import os
|
| 254 |
+
|
| 255 |
+
if not settings.GROQ_API_KEY:
|
| 256 |
+
raise RuntimeError("GROQ_API_KEY not configured. Get free key at https://console.groq.com")
|
| 257 |
+
|
| 258 |
+
client = Groq(api_key=settings.GROQ_API_KEY)
|
| 259 |
+
|
| 260 |
+
# Groq requires a file, write temp file
|
| 261 |
+
with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as f:
|
| 262 |
+
f.write(audio_data)
|
| 263 |
+
temp_path = f.name
|
| 264 |
+
|
| 265 |
+
try:
|
| 266 |
+
with open(temp_path, "rb") as audio_file:
|
| 267 |
+
transcription = client.audio.transcriptions.create(
|
| 268 |
+
file=(temp_path, audio_file.read()),
|
| 269 |
+
model="whisper-large-v3",
|
| 270 |
+
temperature=0,
|
| 271 |
+
response_format="verbose_json",
|
| 272 |
+
)
|
| 273 |
+
|
| 274 |
+
# Extract word timestamps if available
|
| 275 |
+
word_timestamps = None
|
| 276 |
+
if hasattr(transcription, 'words') and transcription.words:
|
| 277 |
+
word_timestamps = [
|
| 278 |
+
{"word": w.word, "start": w.start, "end": w.end}
|
| 279 |
+
for w in transcription.words
|
| 280 |
+
]
|
| 281 |
+
|
| 282 |
+
return TranscriptionResult(
|
| 283 |
+
text=transcription.text.strip(),
|
| 284 |
+
engine_used=ASREngine.WHISPER_JAX,
|
| 285 |
+
language=getattr(transcription, 'language', None),
|
| 286 |
+
word_timestamps=word_timestamps
|
| 287 |
+
)
|
| 288 |
+
finally:
|
| 289 |
+
if os.path.exists(temp_path):
|
| 290 |
+
os.unlink(temp_path)
|
| 291 |
+
|
| 292 |
def _transcribe_whisper_local(self, audio_data: bytes) -> TranscriptionResult:
|
| 293 |
"""Transcribe using local Whisper model."""
|
| 294 |
logging.info("Transcribing with local Whisper")
|
|
|
|
| 299 |
|
| 300 |
model = self._get_whisper_local()
|
| 301 |
|
| 302 |
+
# Try to load audio - if it fails (e.g., webm format), convert with PyAV
|
| 303 |
+
try:
|
| 304 |
+
audio_array, sr = librosa.load(io.BytesIO(audio_data), sr=16000)
|
| 305 |
+
except Exception as e:
|
| 306 |
+
logging.warning(f"librosa.load failed: {e}, trying PyAV conversion")
|
| 307 |
+
# Convert using PyAV (bundled ffmpeg libraries)
|
| 308 |
+
import av
|
| 309 |
+
import tempfile
|
| 310 |
+
import os
|
| 311 |
+
|
| 312 |
+
# PyAV needs a file for some formats like webm
|
| 313 |
+
with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as f:
|
| 314 |
+
f.write(audio_data)
|
| 315 |
+
temp_path = f.name
|
| 316 |
+
|
| 317 |
+
try:
|
| 318 |
+
container = av.open(temp_path)
|
| 319 |
+
audio_frames = []
|
| 320 |
+
original_sr = 48000 # Default sample rate
|
| 321 |
+
|
| 322 |
+
# Get sample rate before decoding
|
| 323 |
+
if container.streams.audio:
|
| 324 |
+
original_sr = container.streams.audio[0].rate
|
| 325 |
+
logging.info(f"Audio sample rate: {original_sr}")
|
| 326 |
+
|
| 327 |
+
for frame in container.decode(audio=0):
|
| 328 |
+
# Convert to numpy array
|
| 329 |
+
array = frame.to_ndarray()
|
| 330 |
+
# If stereo, take mean to mono
|
| 331 |
+
if array.ndim > 1:
|
| 332 |
+
array = array.mean(axis=0)
|
| 333 |
+
audio_frames.append(array)
|
| 334 |
+
|
| 335 |
+
container.close()
|
| 336 |
+
|
| 337 |
+
if not audio_frames:
|
| 338 |
+
raise RuntimeError("No audio frames decoded from input")
|
| 339 |
+
|
| 340 |
+
# Concatenate all frames
|
| 341 |
+
audio_array = np.concatenate(audio_frames).astype(np.float32)
|
| 342 |
+
|
| 343 |
+
# Normalize to [-1, 1] range if needed
|
| 344 |
+
if audio_array.max() > 1.0 or audio_array.min() < -1.0:
|
| 345 |
+
audio_array = audio_array / 32768.0
|
| 346 |
+
|
| 347 |
+
# Resample to 16kHz if needed (Whisper expects 16kHz)
|
| 348 |
+
if original_sr != 16000:
|
| 349 |
+
# Use scipy for faster resampling (48000->16000 = 3:1 ratio)
|
| 350 |
+
from scipy import signal
|
| 351 |
+
gcd = np.gcd(16000, original_sr)
|
| 352 |
+
up = 16000 // gcd
|
| 353 |
+
down = original_sr // gcd
|
| 354 |
+
audio_array = signal.resample_poly(audio_array, up, down)
|
| 355 |
+
finally:
|
| 356 |
+
if os.path.exists(temp_path):
|
| 357 |
+
os.unlink(temp_path)
|
| 358 |
|
| 359 |
# Process audio
|
| 360 |
input_features = self._whisper_processor(
|
api/endpoints/v1/routers/therapy.py
CHANGED
|
@@ -381,6 +381,54 @@ async def demo_get_exercise(
|
|
| 381 |
return exercise
|
| 382 |
|
| 383 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 384 |
@router.post("/demo/feedback", tags=["therapy-demo"])
|
| 385 |
async def demo_ai_feedback(
|
| 386 |
target_text: str = Query(..., description="Text to practice"),
|
|
|
|
| 381 |
return exercise
|
| 382 |
|
| 383 |
|
| 384 |
+
@router.post("/demo/transcribe", tags=["therapy-demo"])
|
| 385 |
+
async def demo_transcribe_audio(
|
| 386 |
+
file: UploadFile = File(...),
|
| 387 |
+
engine: Optional[ASREngine] = Query(None, description="ASR engine"),
|
| 388 |
+
):
|
| 389 |
+
"""
|
| 390 |
+
[DEMO] Transcribe audio without auth - for testing Whisper/SpeechBrain.
|
| 391 |
+
|
| 392 |
+
Uses the same therapy-optimized ASR as authenticated endpoint.
|
| 393 |
+
"""
|
| 394 |
+
logging.info(f"Demo transcription request - file: {file.filename}, type: {file.content_type}")
|
| 395 |
+
|
| 396 |
+
# Validate file
|
| 397 |
+
if file.content_type not in ALLOWED_AUDIO_TYPES:
|
| 398 |
+
raise HTTPException(status_code=400, detail=f"Invalid audio file type: {file.content_type}. Allowed: {ALLOWED_AUDIO_TYPES}")
|
| 399 |
+
|
| 400 |
+
contents = await file.read()
|
| 401 |
+
if len(contents) > FILE_SIZE_LIMIT:
|
| 402 |
+
raise HTTPException(status_code=400, detail="File size exceeds 25 MB limit")
|
| 403 |
+
|
| 404 |
+
if len(contents) == 0:
|
| 405 |
+
raise HTTPException(status_code=400, detail="Empty audio file")
|
| 406 |
+
|
| 407 |
+
logging.info(f"Processing audio: {len(contents)} bytes")
|
| 408 |
+
|
| 409 |
+
try:
|
| 410 |
+
result = transcribe_for_therapy(
|
| 411 |
+
audio_data=contents,
|
| 412 |
+
filename=file.filename or "audio.webm",
|
| 413 |
+
content_type=file.content_type or "audio/webm",
|
| 414 |
+
engine=engine
|
| 415 |
+
)
|
| 416 |
+
|
| 417 |
+
logging.info(f"Transcription result: {result.text}")
|
| 418 |
+
|
| 419 |
+
return {
|
| 420 |
+
"text": result.text,
|
| 421 |
+
"transcription": result.text, # Alias for compatibility
|
| 422 |
+
"engine_used": result.engine_used.value,
|
| 423 |
+
"confidence": result.confidence,
|
| 424 |
+
"word_timestamps": result.word_timestamps
|
| 425 |
+
}
|
| 426 |
+
|
| 427 |
+
except Exception as e:
|
| 428 |
+
logging.error(f"Demo transcription failed: {e}")
|
| 429 |
+
raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}")
|
| 430 |
+
|
| 431 |
+
|
| 432 |
@router.post("/demo/feedback", tags=["therapy-demo"])
|
| 433 |
async def demo_ai_feedback(
|
| 434 |
target_text: str = Query(..., description="Text to practice"),
|
requirements.txt
CHANGED
|
@@ -8,3 +8,12 @@ python-multipart
|
|
| 8 |
openai
|
| 9 |
httpx
|
| 10 |
requests
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
openai
|
| 9 |
httpx
|
| 10 |
requests
|
| 11 |
+
# Speech recognition - Whisper large-v3 (free, open-source from HuggingFace)
|
| 12 |
+
transformers
|
| 13 |
+
librosa
|
| 14 |
+
torch
|
| 15 |
+
soundfile
|
| 16 |
+
av # PyAV for webm/opus audio decoding
|
| 17 |
+
scipy # Fast audio resampling
|
| 18 |
+
huggingface_hub # For free Whisper API
|
| 19 |
+
groq # Groq API - FREE fast Whisper large-v3
|