inferoxy-hub / utils.py
nazdridoy's picture
feat(tts): add dynamic model parameters and Fal.ai Chatterbox
43333ad verified
raw
history blame
8.5 kB
"""
Utility functions and constants for HF-Inferoxy AI Hub.
Contains configuration constants and helper functions.
"""
import os
# Configuration constants
DEFAULT_CHAT_MODEL = "openai/gpt-oss-20b"
DEFAULT_IMAGE_MODEL = "Qwen/Qwen-Image"
DEFAULT_IMAGE_PROVIDER = "fal-ai"
DEFAULT_IMAGE_TO_IMAGE_MODEL = "Qwen/Qwen-Image-Edit"
DEFAULT_IMAGE_TO_IMAGE_PROVIDER = "fal-ai"
DEFAULT_TTS_MODEL = "hexgrad/Kokoro-82M"
DEFAULT_TTS_PROVIDER = "fal-ai"
# Chat configuration
CHAT_CONFIG = {
"max_tokens": 1024,
"temperature": 0.7,
"top_p": 0.95,
"system_message": "You are a helpful and friendly AI assistant. Provide clear, accurate, and helpful responses."
}
# Image generation configuration
IMAGE_CONFIG = {
"width": 1024,
"height": 1024,
"num_inference_steps": 20,
"guidance_scale": 7.5,
"seed": -1,
"negative_prompt": "blurry, low quality, distorted, deformed, ugly, bad anatomy"
}
# Supported providers
CHAT_PROVIDERS = ["auto", "fireworks-ai", "cerebras", "groq", "together", "cohere"]
IMAGE_PROVIDERS = ["hf-inference", "fal-ai", "nebius", "nscale", "replicate", "together"]
# Popular models for quick access
POPULAR_CHAT_MODELS = [
"openai/gpt-oss-20b",
"meta-llama/Llama-2-7b-chat-hf",
"microsoft/DialoGPT-medium",
"google/flan-t5-base"
]
POPULAR_IMAGE_MODELS = [
"Qwen/Qwen-Image",
"black-forest-labs/FLUX.1-dev",
"stabilityai/stable-diffusion-xl-base-1.0",
"runwayml/stable-diffusion-v1-5"
]
# Model presets for image generation
IMAGE_MODEL_PRESETS = [
("Qwen (Fal.ai)", "Qwen/Qwen-Image", "fal-ai"),
("Qwen (Replicate)", "Qwen/Qwen-Image", "replicate"),
("FLUX.1 (Nebius)", "black-forest-labs/FLUX.1-dev", "nebius"),
("SDXL (HF)", "stabilityai/stable-diffusion-xl-base-1.0", "hf-inference"),
]
# Model presets for image-to-image generation
IMAGE_TO_IMAGE_MODEL_PRESETS = [
("Qwen Image Edit (Fal.ai)", "Qwen/Qwen-Image-Edit", "fal-ai"),
("Qwen Image Edit (Replicate)", "Qwen/Qwen-Image-Edit", "replicate"),
("FLUX.1 Kontext (Nebius)", "black-forest-labs/FLUX.1-Kontext-dev", "nebius"),
("SDXL (HF)", "stabilityai/stable-diffusion-xl-base-1.0", "hf-inference"),
]
# Model presets for text-to-speech generation
TTS_MODEL_PRESETS = [
("Kokoro (Fal.ai)", "hexgrad/Kokoro-82M", "fal-ai"),
("Kokoro (Replicate)", "hexgrad/Kokoro-82M", "replicate"),
("Chatterbox (Fal.ai)", "ResembleAI/chatterbox", "fal-ai"),
]
# Model-specific configurations for TTS
TTS_MODEL_CONFIGS = {
"hexgrad/Kokoro-82M": {
"type": "kokoro",
"supports_voice": True,
"supports_speed": True,
"extra_body_params": ["voice", "speed"]
},
"ResembleAI/chatterbox": {
"type": "chatterbox",
"supports_voice": False,
"supports_speed": False,
"extra_body_params": ["audio_url", "exaggeration", "temperature", "cfg"]
}
}
# Voice options for Kokoro TTS (based on the reference app)
TTS_VOICES = {
'πŸ‡ΊπŸ‡Έ 🚺 Heart ❀️': 'af_heart',
'πŸ‡ΊπŸ‡Έ 🚺 Bella πŸ”₯': 'af_bella',
'πŸ‡ΊπŸ‡Έ 🚺 Nicole 🎧': 'af_nicole',
'πŸ‡ΊπŸ‡Έ 🚺 Aoede': 'af_aoede',
'πŸ‡ΊπŸ‡Έ 🚺 Kore': 'af_kore',
'πŸ‡ΊπŸ‡Έ 🚺 Sarah': 'af_sarah',
'πŸ‡ΊπŸ‡Έ 🚺 Nova': 'af_nova',
'πŸ‡ΊπŸ‡Έ 🚺 Sky': 'af_sky',
'πŸ‡ΊπŸ‡Έ 🚺 Alloy': 'af_alloy',
'πŸ‡ΊπŸ‡Έ 🚺 Jessica': 'af_jessica',
'πŸ‡ΊπŸ‡Έ 🚺 River': 'af_river',
'πŸ‡ΊπŸ‡Έ 🚹 Michael': 'am_michael',
'πŸ‡ΊπŸ‡Έ 🚹 Fenrir': 'am_fenrir',
'πŸ‡ΊπŸ‡Έ 🚹 Puck': 'am_puck',
'πŸ‡ΊπŸ‡Έ 🚹 Echo': 'am_echo',
'πŸ‡ΊπŸ‡Έ 🚹 Eric': 'am_eric',
'πŸ‡ΊπŸ‡Έ 🚹 Liam': 'am_liam',
'πŸ‡ΊπŸ‡Έ 🚹 Onyx': 'am_onyx',
'πŸ‡ΊπŸ‡Έ 🚹 Santa': 'am_santa',
'πŸ‡ΊπŸ‡Έ 🚹 Adam': 'am_adam',
'πŸ‡¬πŸ‡§ 🚺 Emma': 'bf_emma',
'πŸ‡¬πŸ‡§ 🚺 Isabella': 'bf_isabella',
'πŸ‡¬πŸ‡§ 🚺 Alice': 'bf_alice',
'πŸ‡¬πŸ‡§ 🚺 Lily': 'bf_lily',
'πŸ‡¬πŸ‡§ 🚹 George': 'bm_george',
'πŸ‡¬πŸ‡§ 🚹 Fable': 'bm_fable',
'πŸ‡¬πŸ‡§ 🚹 Lewis': 'bm_lewis',
'πŸ‡¬πŸ‡§ 🚹 Daniel': 'bm_daniel',
}
# Example prompts for image generation
IMAGE_EXAMPLE_PROMPTS = [
"A majestic dragon flying over a medieval castle, epic fantasy art, detailed, 8k",
"A serene Japanese garden with cherry blossoms, zen atmosphere, peaceful, high quality",
"A futuristic cityscape with flying cars and neon lights, cyberpunk style, cinematic",
"A cute robot cat playing with yarn, adorable, cartoon style, vibrant colors",
"A magical forest with glowing mushrooms and fairy lights, fantasy, ethereal beauty",
"Portrait of a wise old wizard with flowing robes, magical aura, fantasy character art",
"A cozy coffee shop on a rainy day, warm lighting, peaceful atmosphere, detailed",
"An astronaut floating in space with Earth in background, photorealistic, stunning"
]
# Example prompts for image-to-image generation
IMAGE_TO_IMAGE_EXAMPLE_PROMPTS = [
"Turn the cat into a tiger with stripes and fierce expression",
"Make the background a magical forest with glowing mushrooms",
"Change the style to vintage comic book with bold colors",
"Add a superhero cape and mask to the person",
"Transform the building into a futuristic skyscraper",
"Make the flowers bloom and add butterflies around them",
"Change the weather to a stormy night with lightning",
"Add a magical portal in the background with sparkles"
]
# Example texts for text-to-speech generation
TTS_EXAMPLE_TEXTS = [
"Hello! Welcome to the amazing world of AI-powered text-to-speech technology.",
"The quick brown fox jumps over the lazy dog. This pangram contains every letter of the alphabet.",
"In a world where technology advances at lightning speed, artificial intelligence continues to reshape our future.",
"Imagine a world where machines can understand and respond to human emotions with perfect clarity.",
"The future belongs to those who believe in the beauty of their dreams and have the courage to pursue them.",
"Science is not only compatible with spirituality; it is a profound source of spirituality.",
"The only way to do great work is to love what you do. If you haven't found it yet, keep looking.",
"Life is what happens when you're busy making other plans. Embrace every moment with gratitude."
]
# Example audio URLs for Chatterbox TTS
TTS_EXAMPLE_AUDIO_URLS = [
"https://github.com/nazdridoy/kokoro-tts/raw/main/previews/demo.mp3",
"https://huggingface.co/datasets/hf-internal-testing/fixtures/resolve/main/audio/sample_audio_1.mp3",
"https://huggingface.co/datasets/hf-internal-testing/fixtures/resolve/main/audio/sample_audio_2.mp3",
"https://www.soundjay.com/misc/sounds/bell-ringing-05.wav"
]
def get_proxy_key():
"""Get the proxy API key from environment variables."""
return os.getenv("PROXY_KEY")
def validate_proxy_key():
"""Validate that the proxy key is available."""
proxy_key = get_proxy_key()
if not proxy_key:
return False, "❌ Error: PROXY_KEY not found in environment variables. Please set it in your HuggingFace Space secrets."
return True, ""
def get_proxy_url():
"""Get the proxy URL from environment variables."""
return os.getenv("PROXY_URL")
def validate_proxy_url():
"""Validate that the proxy URL is available."""
proxy_url = get_proxy_url()
if not proxy_url:
return False, "❌ Error: PROXY_URL not found in environment variables. Please set it in your HuggingFace Space secrets."
return True, ""
def parse_model_and_provider(model_name):
"""
Parse model name and provider from a string like 'model:provider'.
Returns (model, provider) tuple. Provider is None if not specified.
"""
if ":" in model_name:
model, provider = model_name.split(":", 1)
return model, provider
else:
return model_name, None
def format_error_message(error_type, error_message):
"""Format error messages consistently."""
return f"❌ {error_type}: {error_message}"
def format_success_message(operation, details=""):
"""Format success messages consistently."""
base_message = f"βœ… {operation} completed successfully"
if details:
return f"{base_message}: {details}"
return f"{base_message}!"
def get_gradio_theme():
"""Get the default Gradio theme for the application."""
try:
import gradio as gr
return gr.themes.Soft()
except ImportError:
return None