Spaces:

Ntabukiraniro
/

ubumuntu-api

Sleeping

App Files Files Community

Macbook commited on 5 days ago

Commit

4ac15b2

1 Parent(s): f9b1f70

Deploy with Groq Whisper ASR

Browse files

Files changed (7) hide show

.gitignore +6 -0
Dockerfile +7 -2
api/config.py +2 -0
api/data/exercises.py +6 -573
api/endpoints/v1/processing/therapy_asr.py +152 -11
api/endpoints/v1/routers/therapy.py +48 -0
requirements.txt +9 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,6 @@

+.env
+__pycache__/
+*.pyc
+models/
+.DS_Store
+model_checkpoints/

Dockerfile CHANGED Viewed

@@ -1,4 +1,9 @@
-FROM python:3.9
 RUN useradd -m -u 1000 user
 USER user
@@ -10,4 +15,4 @@ COPY --chown=user ./requirements.txt requirements.txt
 RUN pip install --no-cache-dir --upgrade -r requirements.txt
 COPY --chown=user . /app
-CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

+FROM python:3.11-slim
+# Install system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    ffmpeg \
+    && rm -rf /var/lib/apt/lists/*
 RUN useradd -m -u 1000 user
 USER user
 RUN pip install --no-cache-dir --upgrade -r requirements.txt
 COPY --chown=user . /app
+CMD ["uvicorn", "api.main:app", "--host", "0.0.0.0", "--port", "7860"]

api/config.py CHANGED Viewed

@@ -19,6 +19,8 @@ class Settings(BaseSettings):
     CLERK_PEM_PUBLIC_KEY: str = os.getenv("CLERK_PEM_PUBLIC_KEY", "")
     OPENAI_API_KEY: str = os.getenv("OPENAI_API_KEY", "")
     GITHUB_TOKEN: str = os.getenv("GITHUB_TOKEN", "")  # For GitHub Models GPT-4o
     model_config = SettingsConfigDict(env_file=".env", extra="ignore")
     openapi_url: str = "/openapi.json"

     CLERK_PEM_PUBLIC_KEY: str = os.getenv("CLERK_PEM_PUBLIC_KEY", "")
     OPENAI_API_KEY: str = os.getenv("OPENAI_API_KEY", "")
     GITHUB_TOKEN: str = os.getenv("GITHUB_TOKEN", "")  # For GitHub Models GPT-4o
+    HUGGINGFACE_TOKEN: str = os.getenv("HUGGINGFACE_TOKEN", os.getenv("HF_TOKEN", ""))  # For HuggingFace API
+    GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")  # Free fast Whisper large-v3
     model_config = SettingsConfigDict(env_file=".env", extra="ignore")
     openapi_url: str = "/openapi.json"

api/data/exercises.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """
-Comprehensive Speech Therapy Exercises Database
-Categories: Fundamentals, Speech Practice, Visual Learning, Sound Imitation
 Languages: English (en), French (fr)
 """
@@ -11,8 +11,6 @@ from enum import Enum
 class ExerciseType(str, Enum):
     # Fundamentals
-    BREATHING = "breathing"
-    ARTICULATION = "articulation"
     PHONEME = "phoneme"
     # Speech Practice
@@ -20,16 +18,6 @@ class ExerciseType(str, Enum):
     SENTENCE_READING = "sentence_reading"
     TONGUE_TWISTER = "tongue_twister"
-    # Visual Learning
-    COLOR = "color"
-    OBJECT = "object"
-    ANIMAL = "animal"
-    ACTION = "action"
-    # Sound Imitation
-    ANIMAL_SOUND = "animal_sound"
-    ENVIRONMENTAL_SOUND = "environmental_sound"
 class Difficulty(str, Enum):
     EASY = "easy"
@@ -39,7 +27,7 @@ class Difficulty(str, Enum):
 class Category(BaseModel):
     id: str
-    name: Dict[str, str]  # {"en": "...", "fr": "..."}
     description: Dict[str, str]
     icon: str
     subcategories: List[str]
@@ -54,8 +42,6 @@ class Exercise(BaseModel):
     title: Dict[str, str]
     target_text: Dict[str, str]
     instructions: Dict[str, str]
-    image_url: Optional[str] = None
-    audio_url: Optional[str] = None
     phoneme_focus: Optional[List[str]] = None
@@ -68,11 +54,11 @@ CATEGORIES: List[Dict] = [
         "id": "fundamentals",
         "name": {"en": "Fundamentals", "fr": "Fondamentaux"},
         "description": {
-            "en": "Build your foundation with breathing and articulation exercises",
-            "fr": "Construisez votre base avec des exercices de respiration et d'articulation"
         },
         "icon": "🎯",
-        "subcategories": ["breathing", "articulation", "phoneme"]
     },
     {
         "id": "speech_practice",
@@ -83,38 +69,10 @@ CATEGORIES: List[Dict] = [
         },
         "icon": "📖",
         "subcategories": ["word_repetition", "sentence_reading", "tongue_twister"]
-    },
-    {
-        "id": "visual_learning",
-        "name": {"en": "Visual Learning", "fr": "Apprentissage Visuel"},
-        "description": {
-            "en": "Learn by identifying colors, objects, animals, and actions",
-            "fr": "Apprenez en identifiant les couleurs, objets, animaux et actions"
-        },
-        "icon": "🖼️",
-        "subcategories": ["color", "object", "animal", "action"]
-    },
-    {
-        "id": "sound_imitation",
-        "name": {"en": "Sound Imitation", "fr": "Imitation de Sons"},
-        "description": {
-            "en": "Imitate animal and environmental sounds",
-            "fr": "Imitez les sons d'animaux et de l'environnement"
-        },
-        "icon": "🔊",
-        "subcategories": ["animal_sound", "environmental_sound"]
     }
 ]
 SUBCATEGORIES: Dict[str, Dict] = {
-    "breathing": {
-        "name": {"en": "Breathing Exercises", "fr": "Exercices de Respiration"},
-        "description": {"en": "Control your breath for better speech", "fr": "Contrôlez votre respiration pour mieux parler"}
-    },
-    "articulation": {
-        "name": {"en": "Articulation Drills", "fr": "Exercices d'Articulation"},
-        "description": {"en": "Improve mouth and tongue movements", "fr": "Améliorez les mouvements de la bouche et de la langue"}
-    },
     "phoneme": {
         "name": {"en": "Phoneme Practice", "fr": "Pratique des Phonèmes"},
         "description": {"en": "Master specific sounds like R, S, TH", "fr": "Maîtrisez des sons spécifiques comme R, S, CH"}
@@ -130,30 +88,6 @@ SUBCATEGORIES: Dict[str, Dict] = {
     "tongue_twister": {
         "name": {"en": "Tongue Twisters", "fr": "Virelangues"},
         "description": {"en": "Challenge yourself with tricky phrases", "fr": "Défiez-vous avec des phrases difficiles"}
-    },
-    "color": {
-        "name": {"en": "Colors", "fr": "Couleurs"},
-        "description": {"en": "Identify and say color names", "fr": "Identifiez et dites les noms des couleurs"}
-    },
-    "object": {
-        "name": {"en": "Objects", "fr": "Objets"},
-        "description": {"en": "Name everyday objects", "fr": "Nommez des objets du quotidien"}
-    },
-    "animal": {
-        "name": {"en": "Animals", "fr": "Animaux"},
-        "description": {"en": "Identify animals by sight", "fr": "Identifiez les animaux à vue"}
-    },
-    "action": {
-        "name": {"en": "Actions", "fr": "Actions"},
-        "description": {"en": "Describe what people are doing", "fr": "Décrivez ce que font les gens"}
-    },
-    "animal_sound": {
-        "name": {"en": "Animal Sounds", "fr": "Sons d'Animaux"},
-        "description": {"en": "Imitate animal sounds", "fr": "Imitez les sons des animaux"}
-    },
-    "environmental_sound": {
-        "name": {"en": "Environmental Sounds", "fr": "Sons de l'Environnement"},
-        "description": {"en": "Imitate sounds around us", "fr": "Imitez les sons autour de nous"}
     }
 }
@@ -163,74 +97,6 @@ SUBCATEGORIES: Dict[str, Dict] = {
 # =============================================================================
 EXERCISES: List[Dict] = [
-    # =========================================================================
-    # FUNDAMENTALS - Breathing
-    # =========================================================================
-    {
-        "id": "breath-001",
-        "type": "breathing",
-        "category": "fundamentals",
-        "subcategory": "breathing",
-        "difficulty": "easy",
-        "title": {"en": "Deep Belly Breathing", "fr": "Respiration Abdominale"},
-        "target_text": {"en": "Breathe in slowly through your nose, hold, breathe out through your mouth", "fr": "Inspirez lentement par le nez, retenez, expirez par la bouche"},
-        "instructions": {"en": "Place your hand on your belly. Breathe in for 4 seconds, hold for 2, breathe out for 4. Repeat 3 times.", "fr": "Placez votre main sur le ventre. Inspirez 4 secondes, retenez 2, expirez 4. Répétez 3 fois."},
-    },
-    {
-        "id": "breath-002",
-        "type": "breathing",
-        "category": "fundamentals",
-        "subcategory": "breathing",
-        "difficulty": "easy",
-        "title": {"en": "Candle Blow", "fr": "Souffler la Bougie"},
-        "target_text": {"en": "Take a deep breath and blow out slowly like blowing a candle", "fr": "Prenez une grande inspiration et soufflez doucement comme une bougie"},
-        "instructions": {"en": "Imagine a candle in front of you. Take a deep breath and blow slowly to make the flame flicker but not go out.", "fr": "Imaginez une bougie devant vous. Inspirez et soufflez doucement pour faire vaciller la flamme sans l'éteindre."},
-    },
-    {
-        "id": "breath-003",
-        "type": "breathing",
-        "category": "fundamentals",
-        "subcategory": "breathing",
-        "difficulty": "medium",
-        "title": {"en": "Sustained Breath", "fr": "Souffle Prolongé"},
-        "target_text": {"en": "Aaaaaaaaahhhhhh", "fr": "Aaaaaaaaahhhhhh"},
-        "instructions": {"en": "Take a deep breath and say 'Ahhh' for as long as you can. Try to reach 10 seconds!", "fr": "Inspirez profondément et dites 'Ahhh' aussi longtemps que possible. Essayez d'atteindre 10 secondes!"},
-    },
-    # =========================================================================
-    # FUNDAMENTALS - Articulation
-    # =========================================================================
-    {
-        "id": "artic-001",
-        "type": "articulation",
-        "category": "fundamentals",
-        "subcategory": "articulation",
-        "difficulty": "easy",
-        "title": {"en": "Lip Warm-up", "fr": "Échauffement des Lèvres"},
-        "target_text": {"en": "Ma ma ma, Pa pa pa, Ba ba ba", "fr": "Ma ma ma, Pa pa pa, Ba ba ba"},
-        "instructions": {"en": "Say each syllable clearly, focusing on your lip movements. Repeat 3 times.", "fr": "Prononcez chaque syllabe clairement en vous concentrant sur vos lèvres. Répétez 3 fois."},
-    },
-    {
-        "id": "artic-002",
-        "type": "articulation",
-        "category": "fundamentals",
-        "subcategory": "articulation",
-        "difficulty": "easy",
-        "title": {"en": "Tongue Stretch", "fr": "Étirement de la Langue"},
-        "target_text": {"en": "La la la, Ta ta ta, Da da da", "fr": "La la la, Ta ta ta, Da da da"},
-        "instructions": {"en": "Touch the roof of your mouth with your tongue for each syllable. Feel the movement!", "fr": "Touchez le palais avec votre langue pour chaque syllabe. Sentez le mouvement!"},
-    },
-    {
-        "id": "artic-003",
-        "type": "articulation",
-        "category": "fundamentals",
-        "subcategory": "articulation",
-        "difficulty": "medium",
-        "title": {"en": "Jaw Exercise", "fr": "Exercice de Mâchoire"},
-        "target_text": {"en": "Wa wa wa, Ya ya ya, Oo ee oo ee", "fr": "Oua oua oua, Ya ya ya, Ou i ou i"},
-        "instructions": {"en": "Open your mouth wide for each sound. Feel your jaw moving up and down.", "fr": "Ouvrez grand la bouche pour chaque son. Sentez votre mâchoire bouger."},
-    },
     # =========================================================================
     # FUNDAMENTALS - Phoneme Practice
     # =========================================================================
@@ -415,437 +281,6 @@ EXERCISES: List[Dict] = [
         "instructions": {"en": "Focus on the 'N' and 'Y' sounds.", "fr": "Concentrez-vous sur les sons 'P' et 'N'."},
         "phoneme_focus": ["N", "Y"]
     },
-    # =========================================================================
-    # VISUAL LEARNING - Colors
-    # =========================================================================
-    {
-        "id": "color-001",
-        "type": "color",
-        "category": "visual_learning",
-        "subcategory": "color",
-        "difficulty": "easy",
-        "title": {"en": "Red", "fr": "Rouge"},
-        "target_text": {"en": "Red", "fr": "Rouge"},
-        "instructions": {"en": "Look at the color and say its name.", "fr": "Regardez la couleur et dites son nom."},
-        "image_url": "https://images.unsplash.com/photo-1562157873-818bc0726f68?w=400&h=400&fit=crop"
-    },
-    {
-        "id": "color-002",
-        "type": "color",
-        "category": "visual_learning",
-        "subcategory": "color",
-        "difficulty": "easy",
-        "title": {"en": "Blue", "fr": "Bleu"},
-        "target_text": {"en": "Blue", "fr": "Bleu"},
-        "instructions": {"en": "Look at the color and say its name.", "fr": "Regardez la couleur et dites son nom."},
-        "image_url": "https://images.unsplash.com/photo-1579546929518-9e396f3cc809?w=400&h=400&fit=crop"
-    },
-    {
-        "id": "color-003",
-        "type": "color",
-        "category": "visual_learning",
-        "subcategory": "color",
-        "difficulty": "easy",
-        "title": {"en": "Yellow", "fr": "Jaune"},
-        "target_text": {"en": "Yellow", "fr": "Jaune"},
-        "instructions": {"en": "Look at the color and say its name.", "fr": "Regardez la couleur et dites son nom."},
-        "image_url": "https://images.unsplash.com/photo-1495001258031-d1b407bc1776?w=400&h=400&fit=crop"
-    },
-    {
-        "id": "color-004",
-        "type": "color",
-        "category": "visual_learning",
-        "subcategory": "color",
-        "difficulty": "easy",
-        "title": {"en": "Green", "fr": "Vert"},
-        "target_text": {"en": "Green", "fr": "Vert"},
-        "instructions": {"en": "Look at the color and say its name.", "fr": "Regardez la couleur et dites son nom."},
-        "image_url": "https://images.unsplash.com/photo-1464820453369-31d2c0b651af?w=400&h=400&fit=crop"
-    },
-    {
-        "id": "color-005",
-        "type": "color",
-        "category": "visual_learning",
-        "subcategory": "color",
-        "difficulty": "easy",
-        "title": {"en": "Orange", "fr": "Orange"},
-        "target_text": {"en": "Orange", "fr": "Orange"},
-        "instructions": {"en": "Look at the color and say its name.", "fr": "Regardez la couleur et dites son nom."},
-        "image_url": "https://images.unsplash.com/photo-1557800636-894a64c1696f?w=400&h=400&fit=crop"
-    },
-    {
-        "id": "color-006",
-        "type": "color",
-        "category": "visual_learning",
-        "subcategory": "color",
-        "difficulty": "easy",
-        "title": {"en": "Purple", "fr": "Violet"},
-        "target_text": {"en": "Purple", "fr": "Violet"},
-        "instructions": {"en": "Look at the color and say its name.", "fr": "Regardez la couleur et dites son nom."},
-        "image_url": "https://images.unsplash.com/photo-1528459801416-a9e53bbf4e17?w=400&h=400&fit=crop"
-    },
-    # =========================================================================
-    # VISUAL LEARNING - Objects
-    # =========================================================================
-    {
-        "id": "obj-001",
-        "type": "object",
-        "category": "visual_learning",
-        "subcategory": "object",
-        "difficulty": "easy",
-        "title": {"en": "Apple", "fr": "Pomme"},
-        "target_text": {"en": "Apple", "fr": "Pomme"},
-        "instructions": {"en": "Look at the picture and say what you see.", "fr": "Regardez l'image et dites ce que vous voyez."},
-        "image_url": "https://images.unsplash.com/photo-1584306670957-acf935f5033c?w=400&h=400&fit=crop"
-    },
-    {
-        "id": "obj-002",
-        "type": "object",
-        "category": "visual_learning",
-        "subcategory": "object",
-        "difficulty": "easy",
-        "title": {"en": "Book", "fr": "Livre"},
-        "target_text": {"en": "Book", "fr": "Livre"},
-        "instructions": {"en": "Look at the picture and say what you see.", "fr": "Regardez l'image et dites ce que vous voyez."},
-        "image_url": "https://images.unsplash.com/photo-1544947950-fa07a98d237f?w=400&h=400&fit=crop"
-    },
-    {
-        "id": "obj-003",
-        "type": "object",
-        "category": "visual_learning",
-        "subcategory": "object",
-        "difficulty": "easy",
-        "title": {"en": "Car", "fr": "Voiture"},
-        "target_text": {"en": "Car", "fr": "Voiture"},
-        "instructions": {"en": "Look at the picture and say what you see.", "fr": "Regardez l'image et dites ce que vous voyez."},
-        "image_url": "https://images.unsplash.com/photo-1502877338535-766e1452684a?w=400&h=400&fit=crop"
-    },
-    {
-        "id": "obj-004",
-        "type": "object",
-        "category": "visual_learning",
-        "subcategory": "object",
-        "difficulty": "easy",
-        "title": {"en": "Chair", "fr": "Chaise"},
-        "target_text": {"en": "Chair", "fr": "Chaise"},
-        "instructions": {"en": "Look at the picture and say what you see.", "fr": "Regardez l'image et dites ce que vous voyez."},
-        "image_url": "https://images.unsplash.com/photo-1503602642458-232111445657?w=400&h=400&fit=crop"
-    },
-    {
-        "id": "obj-005",
-        "type": "object",
-        "category": "visual_learning",
-        "subcategory": "object",
-        "difficulty": "easy",
-        "title": {"en": "House", "fr": "Maison"},
-        "target_text": {"en": "House", "fr": "Maison"},
-        "instructions": {"en": "Look at the picture and say what you see.", "fr": "Regardez l'image et dites ce que vous voyez."},
-        "image_url": "https://images.unsplash.com/photo-1518780664697-55e3ad937233?w=400&h=400&fit=crop"
-    },
-    {
-        "id": "obj-006",
-        "type": "object",
-        "category": "visual_learning",
-        "subcategory": "object",
-        "difficulty": "medium",
-        "title": {"en": "Telephone", "fr": "Téléphone"},
-        "target_text": {"en": "Telephone", "fr": "Téléphone"},
-        "instructions": {"en": "Look at the picture and say what you see.", "fr": "Regardez l'image et dites ce que vous voyez."},
-        "image_url": "https://images.unsplash.com/photo-1511707171634-5f897ff02aa9?w=400&h=400&fit=crop"
-    },
-    {
-        "id": "obj-007",
-        "type": "object",
-        "category": "visual_learning",
-        "subcategory": "object",
-        "difficulty": "medium",
-        "title": {"en": "Umbrella", "fr": "Parapluie"},
-        "target_text": {"en": "Umbrella", "fr": "Parapluie"},
-        "instructions": {"en": "Look at the picture and say what you see.", "fr": "Regardez l'image et dites ce que vous voyez."},
-        "image_url": "https://images.unsplash.com/photo-1534309466160-70b22cc6252c?w=400&h=400&fit=crop"
-    },
-    # =========================================================================
-    # VISUAL LEARNING - Animals
-    # =========================================================================
-    {
-        "id": "animal-001",
-        "type": "animal",
-        "category": "visual_learning",
-        "subcategory": "animal",
-        "difficulty": "easy",
-        "title": {"en": "Dog", "fr": "Chien"},
-        "target_text": {"en": "Dog", "fr": "Chien"},
-        "instructions": {"en": "Look at the animal and say its name.", "fr": "Regardez l'animal et dites son nom."},
-        "image_url": "https://images.unsplash.com/photo-1587300003388-59208cc962cb?w=400&h=400&fit=crop"
-    },
-    {
-        "id": "animal-002",
-        "type": "animal",
-        "category": "visual_learning",
-        "subcategory": "animal",
-        "difficulty": "easy",
-        "title": {"en": "Cat", "fr": "Chat"},
-        "target_text": {"en": "Cat", "fr": "Chat"},
-        "instructions": {"en": "Look at the animal and say its name.", "fr": "Regardez l'animal et dites son nom."},
-        "image_url": "https://images.unsplash.com/photo-1514888286974-6c03e2ca1dba?w=400&h=400&fit=crop"
-    },
-    {
-        "id": "animal-003",
-        "type": "animal",
-        "category": "visual_learning",
-        "subcategory": "animal",
-        "difficulty": "easy",
-        "title": {"en": "Bird", "fr": "Oiseau"},
-        "target_text": {"en": "Bird", "fr": "Oiseau"},
-        "instructions": {"en": "Look at the animal and say its name.", "fr": "Regardez l'animal et dites son nom."},
-        "image_url": "https://images.unsplash.com/photo-1522926193341-e9ffd686c60f?w=400&h=400&fit=crop"
-    },
-    {
-        "id": "animal-004",
-        "type": "animal",
-        "category": "visual_learning",
-        "subcategory": "animal",
-        "difficulty": "easy",
-        "title": {"en": "Fish", "fr": "Poisson"},
-        "target_text": {"en": "Fish", "fr": "Poisson"},
-        "instructions": {"en": "Look at the animal and say its name.", "fr": "Regardez l'animal et dites son nom."},
-        "image_url": "https://images.unsplash.com/photo-1524704654690-b56c05c78a00?w=400&h=400&fit=crop"
-    },
-    {
-        "id": "animal-005",
-        "type": "animal",
-        "category": "visual_learning",
-        "subcategory": "animal",
-        "difficulty": "medium",
-        "title": {"en": "Elephant", "fr": "Éléphant"},
-        "target_text": {"en": "Elephant", "fr": "Éléphant"},
-        "instructions": {"en": "Look at the animal and say its name.", "fr": "Regardez l'animal et dites son nom."},
-        "image_url": "https://images.unsplash.com/photo-1557050543-4d5f4e07ef46?w=400&h=400&fit=crop"
-    },
-    {
-        "id": "animal-006",
-        "type": "animal",
-        "category": "visual_learning",
-        "subcategory": "animal",
-        "difficulty": "medium",
-        "title": {"en": "Lion", "fr": "Lion"},
-        "target_text": {"en": "Lion", "fr": "Lion"},
-        "instructions": {"en": "Look at the animal and say its name.", "fr": "Regardez l'animal et dites son nom."},
-        "image_url": "https://images.unsplash.com/photo-1546182990-dffeafbe841d?w=400&h=400&fit=crop"
-    },
-    {
-        "id": "animal-007",
-        "type": "animal",
-        "category": "visual_learning",
-        "subcategory": "animal",
-        "difficulty": "medium",
-        "title": {"en": "Butterfly", "fr": "Papillon"},
-        "target_text": {"en": "Butterfly", "fr": "Papillon"},
-        "instructions": {"en": "Look at the animal and say its name.", "fr": "Regardez l'animal et dites son nom."},
-        "image_url": "https://images.unsplash.com/photo-1452570053594-1b985d6ea890?w=400&h=400&fit=crop"
-    },
-    {
-        "id": "animal-008",
-        "type": "animal",
-        "category": "visual_learning",
-        "subcategory": "animal",
-        "difficulty": "hard",
-        "title": {"en": "Hippopotamus", "fr": "Hippopotame"},
-        "target_text": {"en": "Hippopotamus", "fr": "Hippopotame"},
-        "instructions": {"en": "Look at the animal and say its name. This is a long word!", "fr": "Regardez l'animal et dites son nom. C'est un mot long!"},
-        "image_url": "https://images.unsplash.com/photo-1517840933437-c41356892b35?w=400&h=400&fit=crop"
-    },
-    # =========================================================================
-    # VISUAL LEARNING - Actions
-    # =========================================================================
-    {
-        "id": "action-001",
-        "type": "action",
-        "category": "visual_learning",
-        "subcategory": "action",
-        "difficulty": "easy",
-        "title": {"en": "Running", "fr": "Courir"},
-        "target_text": {"en": "Running", "fr": "Courir"},
-        "instructions": {"en": "Look at the action and say what the person is doing.", "fr": "Regardez l'action et dites ce que fait la personne."},
-        "image_url": "https://images.unsplash.com/photo-1552674605-db6ffd4facb5?w=400&h=400&fit=crop"
-    },
-    {
-        "id": "action-002",
-        "type": "action",
-        "category": "visual_learning",
-        "subcategory": "action",
-        "difficulty": "easy",
-        "title": {"en": "Eating", "fr": "Manger"},
-        "target_text": {"en": "Eating", "fr": "Manger"},
-        "instructions": {"en": "Look at the action and say what the person is doing.", "fr": "Regardez l'action et dites ce que fait la personne."},
-        "image_url": "https://images.unsplash.com/photo-1504674900247-0877df9cc836?w=400&h=400&fit=crop"
-    },
-    {
-        "id": "action-003",
-        "type": "action",
-        "category": "visual_learning",
-        "subcategory": "action",
-        "difficulty": "easy",
-        "title": {"en": "Sleeping", "fr": "Dormir"},
-        "target_text": {"en": "Sleeping", "fr": "Dormir"},
-        "instructions": {"en": "Look at the action and say what the person is doing.", "fr": "Regardez l'action et dites ce que fait la personne."},
-        "image_url": "https://images.unsplash.com/photo-1541781774459-bb2af2f05b55?w=400&h=400&fit=crop"
-    },
-    {
-        "id": "action-004",
-        "type": "action",
-        "category": "visual_learning",
-        "subcategory": "action",
-        "difficulty": "easy",
-        "title": {"en": "Reading", "fr": "Lire"},
-        "target_text": {"en": "Reading", "fr": "Lire"},
-        "instructions": {"en": "Look at the action and say what the person is doing.", "fr": "Regardez l'action et dites ce que fait la personne."},
-        "image_url": "https://images.unsplash.com/photo-1506880018603-83d5b814b5a6?w=400&h=400&fit=crop"
-    },
-    {
-        "id": "action-005",
-        "type": "action",
-        "category": "visual_learning",
-        "subcategory": "action",
-        "difficulty": "medium",
-        "title": {"en": "Swimming", "fr": "Nager"},
-        "target_text": {"en": "Swimming", "fr": "Nager"},
-        "instructions": {"en": "Look at the action and say what the person is doing.", "fr": "Regardez l'action et dites ce que fait la personne."},
-        "image_url": "https://images.unsplash.com/photo-1530549387789-4c1017266635?w=400&h=400&fit=crop"
-    },
-    # =========================================================================
-    # SOUND IMITATION - Animal Sounds
-    # =========================================================================
-    {
-        "id": "asound-001",
-        "type": "animal_sound",
-        "category": "sound_imitation",
-        "subcategory": "animal_sound",
-        "difficulty": "easy",
-        "title": {"en": "Dog Sound", "fr": "Son du Chien"},
-        "target_text": {"en": "Woof woof", "fr": "Ouaf ouaf"},
-        "instructions": {"en": "Imitate the sound a dog makes!", "fr": "Imitez le son que fait un chien!"},
-        "image_url": "https://images.unsplash.com/photo-1587300003388-59208cc962cb?w=400&h=400&fit=crop"
-    },
-    {
-        "id": "asound-002",
-        "type": "animal_sound",
-        "category": "sound_imitation",
-        "subcategory": "animal_sound",
-        "difficulty": "easy",
-        "title": {"en": "Cat Sound", "fr": "Son du Chat"},
-        "target_text": {"en": "Meow meow", "fr": "Miaou miaou"},
-        "instructions": {"en": "Imitate the sound a cat makes!", "fr": "Imitez le son que fait un chat!"},
-        "image_url": "https://images.unsplash.com/photo-1514888286974-6c03e2ca1dba?w=400&h=400&fit=crop"
-    },
-    {
-        "id": "asound-003",
-        "type": "animal_sound",
-        "category": "sound_imitation",
-        "subcategory": "animal_sound",
-        "difficulty": "easy",
-        "title": {"en": "Cow Sound", "fr": "Son de la Vache"},
-        "target_text": {"en": "Moo moo", "fr": "Meuh meuh"},
-        "instructions": {"en": "Imitate the sound a cow makes!", "fr": "Imitez le son que fait une vache!"},
-        "image_url": "https://images.unsplash.com/photo-1570042225831-d98fa7577f1e?w=400&h=400&fit=crop"
-    },
-    {
-        "id": "asound-004",
-        "type": "animal_sound",
-        "category": "sound_imitation",
-        "subcategory": "animal_sound",
-        "difficulty": "easy",
-        "title": {"en": "Duck Sound", "fr": "Son du Canard"},
-        "target_text": {"en": "Quack quack", "fr": "Coin coin"},
-        "instructions": {"en": "Imitate the sound a duck makes!", "fr": "Imitez le son que fait un canard!"},
-        "image_url": "https://images.unsplash.com/photo-1459682687441-7761439a709d?w=400&h=400&fit=crop"
-    },
-    {
-        "id": "asound-005",
-        "type": "animal_sound",
-        "category": "sound_imitation",
-        "subcategory": "animal_sound",
-        "difficulty": "medium",
-        "title": {"en": "Lion Sound", "fr": "Son du Lion"},
-        "target_text": {"en": "Roar!", "fr": "Grrrr!"},
-        "instructions": {"en": "Imitate the sound a lion makes! Be loud!", "fr": "Imitez le son que fait un lion! Soyez fort!"},
-        "image_url": "https://images.unsplash.com/photo-1546182990-dffeafbe841d?w=400&h=400&fit=crop"
-    },
-    {
-        "id": "asound-006",
-        "type": "animal_sound",
-        "category": "sound_imitation",
-        "subcategory": "animal_sound",
-        "difficulty": "medium",
-        "title": {"en": "Snake Sound", "fr": "Son du Serpent"},
-        "target_text": {"en": "Sssssss", "fr": "Sssssss"},
-        "instructions": {"en": "Imitate the hissing sound a snake makes!", "fr": "Imitez le sifflement que fait un serpent!"},
-        "image_url": "https://images.unsplash.com/photo-1531386151447-fd76ad50012f?w=400&h=400&fit=crop"
-    },
-    # =========================================================================
-    # SOUND IMITATION - Environmental Sounds
-    # =========================================================================
-    {
-        "id": "esound-001",
-        "type": "environmental_sound",
-        "category": "sound_imitation",
-        "subcategory": "environmental_sound",
-        "difficulty": "easy",
-        "title": {"en": "Car Horn", "fr": "Klaxon"},
-        "target_text": {"en": "Beep beep", "fr": "Pouet pouet"},
-        "instructions": {"en": "Imitate the sound of a car horn!", "fr": "Imitez le son d'un klaxon!"},
-        "image_url": "https://images.unsplash.com/photo-1502877338535-766e1452684a?w=400&h=400&fit=crop"
-    },
-    {
-        "id": "esound-002",
-        "type": "environmental_sound",
-        "category": "sound_imitation",
-        "subcategory": "environmental_sound",
-        "difficulty": "easy",
-        "title": {"en": "Train Sound", "fr": "Son du Train"},
-        "target_text": {"en": "Choo choo", "fr": "Tchou tchou"},
-        "instructions": {"en": "Imitate the sound of a train!", "fr": "Imitez le son d'un train!"},
-        "image_url": "https://images.unsplash.com/photo-1474487548417-781cb71495f3?w=400&h=400&fit=crop"
-    },
-    {
-        "id": "esound-003",
-        "type": "environmental_sound",
-        "category": "sound_imitation",
-        "subcategory": "environmental_sound",
-        "difficulty": "easy",
-        "title": {"en": "Clock Sound", "fr": "Son de l'Horloge"},
-        "target_text": {"en": "Tick tock tick tock", "fr": "Tic tac tic tac"},
-        "instructions": {"en": "Imitate the sound of a clock!", "fr": "Imitez le son d'une horloge!"},
-        "image_url": "https://images.unsplash.com/photo-1563861826100-9cb868fdbe1c?w=400&h=400&fit=crop"
-    },
-    {
-        "id": "esound-004",
-        "type": "environmental_sound",
-        "category": "sound_imitation",
-        "subcategory": "environmental_sound",
-        "difficulty": "medium",
-        "title": {"en": "Rain Sound", "fr": "Son de la Pluie"},
-        "target_text": {"en": "Pitter patter pitter patter", "fr": "Plic ploc plic ploc"},
-        "instructions": {"en": "Imitate the sound of rain falling!", "fr": "Imitez le son de la pluie qui tombe!"},
-        "image_url": "https://images.unsplash.com/photo-1519692933481-e162a57d6721?w=400&h=400&fit=crop"
-    },
-    {
-        "id": "esound-005",
-        "type": "environmental_sound",
-        "category": "sound_imitation",
-        "subcategory": "environmental_sound",
-        "difficulty": "medium",
-        "title": {"en": "Wind Sound", "fr": "Son du Vent"},
-        "target_text": {"en": "Whoooosh", "fr": "Woooosh"},
-        "instructions": {"en": "Imitate the sound of strong wind!", "fr": "Imitez le son du vent fort!"},
-        "image_url": "https://images.unsplash.com/photo-1534088568595-a066f410bcda?w=400&h=400&fit=crop"
-    },
 ]
@@ -872,8 +307,6 @@ def get_all_exercises(language: str = "en") -> List[Dict]:
             "title": ex["title"].get(language, ex["title"]["en"]),
             "target_text": ex["target_text"].get(language, ex["target_text"]["en"]),
             "instructions": ex["instructions"].get(language, ex["instructions"]["en"]),
-            "image_url": ex.get("image_url"),
-            "audio_url": ex.get("audio_url"),
             "phoneme_focus": ex.get("phoneme_focus", [])
         })
     return exercises

 """
+Speech Therapy Exercises Database
+Categories: Fundamentals, Speech Practice
 Languages: English (en), French (fr)
 """
 class ExerciseType(str, Enum):
     # Fundamentals
     PHONEME = "phoneme"
     # Speech Practice
     SENTENCE_READING = "sentence_reading"
     TONGUE_TWISTER = "tongue_twister"
 class Difficulty(str, Enum):
     EASY = "easy"
 class Category(BaseModel):
     id: str
+    name: Dict[str, str]
     description: Dict[str, str]
     icon: str
     subcategories: List[str]
     title: Dict[str, str]
     target_text: Dict[str, str]
     instructions: Dict[str, str]
     phoneme_focus: Optional[List[str]] = None
         "id": "fundamentals",
         "name": {"en": "Fundamentals", "fr": "Fondamentaux"},
         "description": {
+            "en": "Master specific sounds and phonemes",
+            "fr": "Maîtrisez des sons et phonèmes spécifiques"
         },
         "icon": "🎯",
+        "subcategories": ["phoneme"]
     },
     {
         "id": "speech_practice",
         },
         "icon": "📖",
         "subcategories": ["word_repetition", "sentence_reading", "tongue_twister"]
     }
 ]
 SUBCATEGORIES: Dict[str, Dict] = {
     "phoneme": {
         "name": {"en": "Phoneme Practice", "fr": "Pratique des Phonèmes"},
         "description": {"en": "Master specific sounds like R, S, TH", "fr": "Maîtrisez des sons spécifiques comme R, S, CH"}
     "tongue_twister": {
         "name": {"en": "Tongue Twisters", "fr": "Virelangues"},
         "description": {"en": "Challenge yourself with tricky phrases", "fr": "Défiez-vous avec des phrases difficiles"}
     }
 }
 # =============================================================================
 EXERCISES: List[Dict] = [
     # =========================================================================
     # FUNDAMENTALS - Phoneme Practice
     # =========================================================================
         "instructions": {"en": "Focus on the 'N' and 'Y' sounds.", "fr": "Concentrez-vous sur les sons 'P' et 'N'."},
         "phoneme_focus": ["N", "Y"]
     },
 ]
             "title": ex["title"].get(language, ex["title"]["en"]),
             "target_text": ex["target_text"].get(language, ex["target_text"]["en"]),
             "instructions": ex["instructions"].get(language, ex["instructions"]["en"]),
             "phoneme_focus": ex.get("phoneme_focus", [])
         })
     return exercises

api/endpoints/v1/processing/therapy_asr.py CHANGED Viewed

@@ -15,6 +15,41 @@ from dataclasses import dataclass
 from api.config import settings
 if settings.ENVIRONMENT == "development":
     logging.basicConfig(level=logging.DEBUG)
 else:
@@ -23,9 +58,10 @@ else:
 class ASREngine(str, Enum):
     """Available ASR engines."""
-    WHISPER_LOCAL = "whisper_local"
     SPEECHBRAIN = "speechbrain"
-    WHISPER_API = "whisper_api"
     AUTO = "auto"  # Automatically select based on user profile
@@ -67,7 +103,8 @@ class TherapyASR:
                 import torch
                 from transformers import WhisperProcessor, WhisperForConditionalGeneration
-                model_name = "openai/whisper-base"  # Start with base, upgrade as needed
                 logging.info(f"Loading local Whisper model: {model_name}")
                 self._whisper_processor = WhisperProcessor.from_pretrained(model_name)
@@ -89,6 +126,9 @@ class TherapyASR:
         """Lazy load SpeechBrain model for atypical speech."""
         if self._speechbrain_model is None:
             try:
                 import speechbrain as sb
                 # Use pre-trained model, can be swapped for fine-tuned version
@@ -120,8 +160,10 @@ class TherapyASR:
             if user_profile.get("privacy_mode") == "local":
                 return ASREngine.WHISPER_LOCAL
-        # Default to API for best accuracy
-        return ASREngine.WHISPER_API
     def transcribe(
         self,
@@ -147,15 +189,15 @@ class TherapyASR:
         selected_engine = engine or self._select_engine(user_profile)
         logging.info(f"Transcribing with engine: {selected_engine.value}")
-        # Try selected engine with fallback chain
         fallback_order = [selected_engine]
-        if selected_engine != ASREngine.WHISPER_API:
-            fallback_order.append(ASREngine.WHISPER_API)
         last_error = None
         for eng in fallback_order:
             try:
-                if eng == ASREngine.WHISPER_API:
                     return self._transcribe_whisper_api(audio_data, filename, content_type)
                 elif eng == ASREngine.WHISPER_LOCAL:
                     return self._transcribe_whisper_local(audio_data)
@@ -202,6 +244,51 @@ class TherapyASR:
             word_timestamps=word_timestamps
         )
     def _transcribe_whisper_local(self, audio_data: bytes) -> TranscriptionResult:
         """Transcribe using local Whisper model."""
         logging.info("Transcribing with local Whisper")
@@ -212,8 +299,62 @@ class TherapyASR:
         model = self._get_whisper_local()
-        # Load audio from bytes
-        audio_array, sr = librosa.load(io.BytesIO(audio_data), sr=16000)
         # Process audio
         input_features = self._whisper_processor(

 from api.config import settings
+def _patch_torchaudio_for_speechbrain():
+    """
+    Patch torchaudio to fix compatibility with SpeechBrain 1.0.3 and torchaudio 2.2+.
+    The `list_audio_backends()` function was removed in torchaudio 2.2+,
+    but SpeechBrain 1.0.3 still calls it. This adds a compatibility shim.
+    """
+    try:
+        import torchaudio
+        if not hasattr(torchaudio, 'list_audio_backends'):
+            # Add the missing function that returns available backends
+            def list_audio_backends():
+                """Compatibility shim for removed torchaudio function."""
+                backends = []
+                try:
+                    import soundfile
+                    backends.append('soundfile')
+                except ImportError:
+                    pass
+                try:
+                    # Check if sox is available
+                    import torchaudio.backend.sox_io_backend
+                    backends.append('sox')
+                except (ImportError, OSError):
+                    pass
+                # ffmpeg is usually available
+                backends.append('ffmpeg')
+                return backends
+            torchaudio.list_audio_backends = list_audio_backends
+            logging.debug("Patched torchaudio.list_audio_backends for SpeechBrain compatibility")
+    except ImportError:
+        pass  # torchaudio not installed, will fail later with proper error
 if settings.ENVIRONMENT == "development":
     logging.basicConfig(level=logging.DEBUG)
 else:
 class ASREngine(str, Enum):
     """Available ASR engines."""
+    WHISPER_JAX = "whisper_jax"  # Fast, free Groq API (recommended)
+    WHISPER_LOCAL = "whisper_local"  # Local whisper-large-v3 (slow on CPU)
     SPEECHBRAIN = "speechbrain"
+    WHISPER_API = "whisper_api"  # OpenAI paid API
     AUTO = "auto"  # Automatically select based on user profile
                 import torch
                 from transformers import WhisperProcessor, WhisperForConditionalGeneration
+                # Use large-v3 for best accuracy (cached after first load)
+                model_name = "openai/whisper-large-v3"
                 logging.info(f"Loading local Whisper model: {model_name}")
                 self._whisper_processor = WhisperProcessor.from_pretrained(model_name)
         """Lazy load SpeechBrain model for atypical speech."""
         if self._speechbrain_model is None:
             try:
+                # Apply torchaudio compatibility patch before importing speechbrain
+                _patch_torchaudio_for_speechbrain()
                 import speechbrain as sb
                 # Use pre-trained model, can be swapped for fine-tuned version
             if user_profile.get("privacy_mode") == "local":
                 return ASREngine.WHISPER_LOCAL
+        # Default to Groq (fast, free) if API key available, else local
+        if settings.GROQ_API_KEY:
+            return ASREngine.WHISPER_JAX
+        return ASREngine.WHISPER_LOCAL
     def transcribe(
         self,
         selected_engine = engine or self._select_engine(user_profile)
         logging.info(f"Transcribing with engine: {selected_engine.value}")
+        # Use selected engine only (no fallback to paid API)
         fallback_order = [selected_engine]
         last_error = None
         for eng in fallback_order:
             try:
+                if eng == ASREngine.WHISPER_JAX:
+                    return self._transcribe_whisper_jax(audio_data)
+                elif eng == ASREngine.WHISPER_API:
                     return self._transcribe_whisper_api(audio_data, filename, content_type)
                 elif eng == ASREngine.WHISPER_LOCAL:
                     return self._transcribe_whisper_local(audio_data)
             word_timestamps=word_timestamps
         )
+    def _transcribe_whisper_jax(self, audio_data: bytes) -> TranscriptionResult:
+        """Transcribe using Groq API (free, fast Whisper large-v3)."""
+        logging.info("Transcribing with Groq Whisper (free, fast)")
+        from groq import Groq
+        import tempfile
+        import os
+        if not settings.GROQ_API_KEY:
+            raise RuntimeError("GROQ_API_KEY not configured. Get free key at https://console.groq.com")
+        client = Groq(api_key=settings.GROQ_API_KEY)
+        # Groq requires a file, write temp file
+        with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as f:
+            f.write(audio_data)
+            temp_path = f.name
+        try:
+            with open(temp_path, "rb") as audio_file:
+                transcription = client.audio.transcriptions.create(
+                    file=(temp_path, audio_file.read()),
+                    model="whisper-large-v3",
+                    temperature=0,
+                    response_format="verbose_json",
+                )
+            # Extract word timestamps if available
+            word_timestamps = None
+            if hasattr(transcription, 'words') and transcription.words:
+                word_timestamps = [
+                    {"word": w.word, "start": w.start, "end": w.end}
+                    for w in transcription.words
+                ]
+            return TranscriptionResult(
+                text=transcription.text.strip(),
+                engine_used=ASREngine.WHISPER_JAX,
+                language=getattr(transcription, 'language', None),
+                word_timestamps=word_timestamps
+            )
+        finally:
+            if os.path.exists(temp_path):
+                os.unlink(temp_path)
     def _transcribe_whisper_local(self, audio_data: bytes) -> TranscriptionResult:
         """Transcribe using local Whisper model."""
         logging.info("Transcribing with local Whisper")
         model = self._get_whisper_local()
+        # Try to load audio - if it fails (e.g., webm format), convert with PyAV
+        try:
+            audio_array, sr = librosa.load(io.BytesIO(audio_data), sr=16000)
+        except Exception as e:
+            logging.warning(f"librosa.load failed: {e}, trying PyAV conversion")
+            # Convert using PyAV (bundled ffmpeg libraries)
+            import av
+            import tempfile
+            import os
+            # PyAV needs a file for some formats like webm
+            with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as f:
+                f.write(audio_data)
+                temp_path = f.name
+            try:
+                container = av.open(temp_path)
+                audio_frames = []
+                original_sr = 48000  # Default sample rate
+                # Get sample rate before decoding
+                if container.streams.audio:
+                    original_sr = container.streams.audio[0].rate
+                    logging.info(f"Audio sample rate: {original_sr}")
+                for frame in container.decode(audio=0):
+                    # Convert to numpy array
+                    array = frame.to_ndarray()
+                    # If stereo, take mean to mono
+                    if array.ndim > 1:
+                        array = array.mean(axis=0)
+                    audio_frames.append(array)
+                container.close()
+                if not audio_frames:
+                    raise RuntimeError("No audio frames decoded from input")
+                # Concatenate all frames
+                audio_array = np.concatenate(audio_frames).astype(np.float32)
+                # Normalize to [-1, 1] range if needed
+                if audio_array.max() > 1.0 or audio_array.min() < -1.0:
+                    audio_array = audio_array / 32768.0
+                # Resample to 16kHz if needed (Whisper expects 16kHz)
+                if original_sr != 16000:
+                    # Use scipy for faster resampling (48000->16000 = 3:1 ratio)
+                    from scipy import signal
+                    gcd = np.gcd(16000, original_sr)
+                    up = 16000 // gcd
+                    down = original_sr // gcd
+                    audio_array = signal.resample_poly(audio_array, up, down)
+            finally:
+                if os.path.exists(temp_path):
+                    os.unlink(temp_path)
         # Process audio
         input_features = self._whisper_processor(

api/endpoints/v1/routers/therapy.py CHANGED Viewed

@@ -381,6 +381,54 @@ async def demo_get_exercise(
     return exercise
 @router.post("/demo/feedback", tags=["therapy-demo"])
 async def demo_ai_feedback(
     target_text: str = Query(..., description="Text to practice"),

     return exercise
+@router.post("/demo/transcribe", tags=["therapy-demo"])
+async def demo_transcribe_audio(
+    file: UploadFile = File(...),
+    engine: Optional[ASREngine] = Query(None, description="ASR engine"),
+):
+    """
+    [DEMO] Transcribe audio without auth - for testing Whisper/SpeechBrain.
+    Uses the same therapy-optimized ASR as authenticated endpoint.
+    """
+    logging.info(f"Demo transcription request - file: {file.filename}, type: {file.content_type}")
+    # Validate file
+    if file.content_type not in ALLOWED_AUDIO_TYPES:
+        raise HTTPException(status_code=400, detail=f"Invalid audio file type: {file.content_type}. Allowed: {ALLOWED_AUDIO_TYPES}")
+    contents = await file.read()
+    if len(contents) > FILE_SIZE_LIMIT:
+        raise HTTPException(status_code=400, detail="File size exceeds 25 MB limit")
+    if len(contents) == 0:
+        raise HTTPException(status_code=400, detail="Empty audio file")
+    logging.info(f"Processing audio: {len(contents)} bytes")
+    try:
+        result = transcribe_for_therapy(
+            audio_data=contents,
+            filename=file.filename or "audio.webm",
+            content_type=file.content_type or "audio/webm",
+            engine=engine
+        )
+        logging.info(f"Transcription result: {result.text}")
+        return {
+            "text": result.text,
+            "transcription": result.text,  # Alias for compatibility
+            "engine_used": result.engine_used.value,
+            "confidence": result.confidence,
+            "word_timestamps": result.word_timestamps
+        }
+    except Exception as e:
+        logging.error(f"Demo transcription failed: {e}")
+        raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}")
 @router.post("/demo/feedback", tags=["therapy-demo"])
 async def demo_ai_feedback(
     target_text: str = Query(..., description="Text to practice"),

requirements.txt CHANGED Viewed

@@ -8,3 +8,12 @@ python-multipart
 openai
 httpx
 requests

 openai
 httpx
 requests
+# Speech recognition - Whisper large-v3 (free, open-source from HuggingFace)
+transformers
+librosa
+torch
+soundfile
+av  # PyAV for webm/opus audio decoding
+scipy  # Fast audio resampling
+huggingface_hub  # For free Whisper API
+groq  # Groq API - FREE fast Whisper large-v3