Spaces:

OpenKing
/

Music-gen

Sleeping

App Files Files Community

AiCoderv2 commited on 9 days ago

Commit

b36751d

verified ·

1 Parent(s): 0a828ee

Update app.py from anycoder

Browse files

Files changed (1) hide show

app.py +28 -35

app.py CHANGED Viewed

@@ -1,10 +1,12 @@
 import gradio as gr
 import torch
-from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 import os
 from pathlib import Path
 import time
 import tempfile
 # Custom theme for music maker
 custom_theme = gr.themes.Soft(
@@ -27,7 +29,7 @@ MODEL_CACHE_DIR = Path.home() / ".cache" / "huggingface" / "musicgen"
 MAX_NEW_TOKENS = 250
 AUDIO_DURATION = 10  # seconds
-# Initialize model and tokenizer
 def load_model():
     """Load the MusicGen model with caching"""
     if not os.path.exists(MODEL_CACHE_DIR):
@@ -36,14 +38,14 @@ def load_model():
     print("Loading MusicGen model...")
     start_time = time.time()
-    # Load tokenizer
-    tokenizer = AutoTokenizer.from_pretrained(
         MODEL_NAME,
         cache_dir=MODEL_CACHE_DIR
     )
-    # Load model
-    model = AutoModelForSeq2SeqLM.from_pretrained(
         MODEL_NAME,
         cache_dir=MODEL_CACHE_DIR,
         torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
@@ -54,10 +56,10 @@ def load_model():
     load_time = time.time() - start_time
     print(f"Model loaded in {load_time:.2f} seconds")
-    return model, tokenizer
 # Global variables for model
-model, tokenizer = load_model()
 def generate_music(prompt, duration, temperature, top_k):
     """
@@ -73,47 +75,38 @@ def generate_music(prompt, duration, temperature, top_k):
         Generated audio file path
     """
     try:
-        # Generate music
-        inputs = tokenizer(
-            [prompt],
-            padding="max_length",
-            truncation=True,
-            max_length=64,
             return_tensors="pt"
         ).to(model.device)
         # Generate audio
-        with torch.no_grad():
-            audio_values = model.generate(
-                **inputs,
-                do_sample=True,
-                max_new_tokens=MAX_NEW_TOKENS,
-                temperature=temperature,
-                top_k=top_k
-            )
         # Convert to audio file
         with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
-            # Save audio (this is a simplified version - actual MusicGen would need proper decoding)
-            # For demo purposes, we'll create a simple audio file
-            import numpy as np
-            from scipy.io.wavfile import write
-            # Generate simple sine wave for demo
-            sample_rate = 44100
-            t = np.linspace(0, duration, int(sample_rate * duration), False)
-            frequency = 440  # A4 note
-            audio_data = np.sin(2 * np.pi * frequency * t) * 0.5
-            # Add some variation based on prompt length
-            if len(prompt) > 20:
-                audio_data = audio_data * 0.8 + np.random.normal(0, 0.1, len(audio_data))
             # Convert to 16-bit PCM format
             audio_data = (audio_data * 32767).astype(np.int16)
             # Write to file
-            write(temp_file.name, sample_rate, audio_data)
             return temp_file.name

 import gradio as gr
 import torch
+from transformers import AutoProcessor, MusicgenForConditionalGeneration
 import os
 from pathlib import Path
 import time
 import tempfile
+import numpy as np
+from scipy.io.wavfile import write
 # Custom theme for music maker
 custom_theme = gr.themes.Soft(
 MAX_NEW_TOKENS = 250
 AUDIO_DURATION = 10  # seconds
+# Initialize model and processor
 def load_model():
     """Load the MusicGen model with caching"""
     if not os.path.exists(MODEL_CACHE_DIR):
     print("Loading MusicGen model...")
     start_time = time.time()
+    # Load processor (replaces tokenizer for MusicGen)
+    processor = AutoProcessor.from_pretrained(
         MODEL_NAME,
         cache_dir=MODEL_CACHE_DIR
     )
+    # Load model - MusicGen uses MusicgenForConditionalGeneration
+    model = MusicgenForConditionalGeneration.from_pretrained(
         MODEL_NAME,
         cache_dir=MODEL_CACHE_DIR,
         torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
     load_time = time.time() - start_time
     print(f"Model loaded in {load_time:.2f} seconds")
+    return model, processor
 # Global variables for model
+model, processor = load_model()
 def generate_music(prompt, duration, temperature, top_k):
     """
         Generated audio file path
     """
     try:
+        # Generate music using MusicGen
+        inputs = processor(
+            text=[prompt],
+            padding=True,
             return_tensors="pt"
         ).to(model.device)
         # Generate audio
+        audio_values = model.generate(
+            **inputs,
+            max_new_tokens=MAX_NEW_TOKENS,
+            do_sample=True,
+            temperature=temperature,
+            top_k=top_k
+        )
         # Convert to audio file
         with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
+            # Get sampling rate from model config
+            sampling_rate = model.config.audio_encoder.sample_rate
+            # Convert audio tensor to numpy array
+            audio_data = audio_values[0, 0].cpu().numpy()
+            # Normalize audio
+            audio_data = audio_data / np.max(np.abs(audio_data)) * 0.9
             # Convert to 16-bit PCM format
             audio_data = (audio_data * 32767).astype(np.int16)
             # Write to file
+            write(temp_file.name, sampling_rate, audio_data)
             return temp_file.name