import gradio as gr import torch from transformers import AutoProcessor, MusicgenForConditionalGeneration import os from pathlib import Path import time import tempfile import numpy as np from scipy.io.wavfile import write # Custom theme for music maker custom_theme = gr.themes.Soft( primary_hue="purple", secondary_hue="indigo", neutral_hue="slate", font=gr.themes.GoogleFont("Inter"), text_size="lg", spacing_size="lg", radius_size="md" ).set( button_primary_background_fill="*primary_600", button_primary_background_fill_hover="*primary_700", block_title_text_weight="600", ) # Model configuration MODEL_NAME = "facebook/musicgen-small" MODEL_CACHE_DIR = Path.home() / ".cache" / "huggingface" / "musicgen" MAX_NEW_TOKENS = 500 # Increased for longer generation AUDIO_DURATION = 240 # 4 minutes max # Initialize model with optimized settings def load_model(): """Load the MusicGen model with caching and optimization""" if not os.path.exists(MODEL_CACHE_DIR): os.makedirs(MODEL_CACHE_DIR, exist_ok=True) print("Loading MusicGen model...") start_time = time.time() # Load processor processor = AutoProcessor.from_pretrained( MODEL_NAME, cache_dir=MODEL_CACHE_DIR ) # Load model with optimized settings model = MusicgenForConditionalGeneration.from_pretrained( MODEL_NAME, cache_dir=MODEL_CACHE_DIR, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto" if torch.cuda.is_available() else None ) # Optimize for inference if torch.cuda.is_available(): model = model.to("cuda") model.eval() # Set to evaluation mode load_time = time.time() - start_time print(f"Model loaded in {load_time:.2f} seconds") return model, processor # Global variables for model model, processor = load_model() def generate_music(prompt, duration, temperature, top_k): """ Generate music from text prompt using MusicGen model Args: prompt: Text description of the music duration: Duration in seconds (5-240) temperature: Creativity parameter top_k: Sampling parameter Returns: Generated audio file path """ try: # Calculate tokens needed for the requested duration # MusicGen generates at ~50 tokens per second tokens_per_second = 50 max_new_tokens = int(duration * tokens_per_second) # Generate music using MusicGen inputs = processor( text=[prompt], padding=True, return_tensors="pt" ).to(model.device) # Generate audio with optimized settings audio_values = model.generate( **inputs, max_new_tokens=max_new_tokens, do_sample=True, temperature=temperature, top_k=top_k, use_cache=True # Enable caching for faster generation ) # Get sampling rate from processor sampling_rate = processor.feature_extractor.sampling_rate # Convert audio tensor to numpy array audio_data = audio_values[0, 0].cpu().numpy() # Ensure stereo format if len(audio_data.shape) == 1: audio_data = np.stack([audio_data, audio_data], axis=0) elif audio_data.shape[0] == 1: audio_data = np.concatenate([audio_data, audio_data], axis=0) # Normalize and convert to 16-bit audio_data = audio_data / np.max(np.abs(audio_data)) * 0.9 audio_data = (audio_data * 32767).astype(np.int16) # Create temporary file with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: write(temp_file.name, sampling_rate, audio_data.T) return temp_file.name except Exception as e: print(f"Error generating music: {e}") raise gr.Error(f"Failed to generate music: {str(e)}") def music_maker_interface(prompt, duration, temperature, top_k): """ Main interface function for music generation """ if not prompt.strip(): raise gr.Error("Please enter a music description") if duration < 5 or duration > 240: raise gr.Error("Duration must be between 5 and 240 seconds (4 minutes)") # Show loading state progress = gr.Progress() for i in progress.tqdm(range(10), desc=f"Generating {duration} second music..."): time.sleep(0.2) # Faster progress for optimized model # Generate music audio_file = generate_music(prompt, duration, temperature, top_k) return audio_file # Create Gradio interface with gr.Blocks() as demo: gr.Markdown(""" # 🎵 AI Music Maker - Extended Edition Create original music from text descriptions using AI! Now with support for songs up to 4 minutes long. [Built with anycoder](https://huggingface.co/spaces/akhaliq/anycoder) """) with gr.Row(): with gr.Column(): # Input controls prompt = gr.Textbox( label="Music Description", placeholder="e.g., 'Happy electronic dance music with catchy beats'", lines=3 ) duration = gr.Slider( minimum=5, maximum=240, value=30, step=5, label="Duration (seconds) - Up to 4 minutes!" ) with gr.Accordion("Advanced Settings", open=False): temperature = gr.Slider( minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Creativity (Temperature)" ) top_k = gr.Slider( minimum=10, maximum=100, value=50, step=10, label="Sampling Diversity (Top K)" ) generate_btn = gr.Button("🎵 Generate Music", variant="primary", size="lg") # Examples gr.Examples( examples=[ ["Happy electronic dance music with catchy beats and uplifting melodies"], ["Calm piano music for meditation and relaxation"], ["Epic orchestral soundtrack with dramatic strings and powerful brass"], ["Jazz improvisation with saxophone and piano"], ["Rock guitar solo with heavy distortion and fast tempo"] ], inputs=[prompt], label="Try these examples:" ) with gr.Column(): # Output audio_output = gr.Audio( label="Generated Music", type="filepath", interactive=False, autoplay=True ) # Status and info status = gr.Markdown("Enter a description and click 'Generate Music' to create your track!") model_info = gr.Markdown(f""" ### Model Info - **Model**: MusicGen Small - **Cache Location**: `{MODEL_CACHE_DIR}` - **Device**: {'CUDA' if torch.cuda.is_available() else 'CPU'} - **Max Duration**: {AUDIO_DURATION}s (4 minutes) - **Generation Speed**: Optimized for performance """) # Event handlers generate_btn.click( fn=music_maker_interface, inputs=[prompt, duration, temperature, top_k], outputs=[audio_output], api_visibility="public" ) # Update status when inputs change prompt.change( fn=lambda p: f"Ready to generate music from: '{p}'", inputs=[prompt], outputs=[status] ) # Launch the app demo.launch( theme=custom_theme, footer_links=[ {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"}, {"label": "MusicGen Model", "url": "https://huggingface.co/facebook/musicgen-small"}, {"label": "Gradio", "url": "https://gradio.app"} ], show_error=True, share=True )