Update app.py from anycoder
Browse files
app.py
CHANGED
|
@@ -91,22 +91,29 @@ def generate_music(prompt, duration, temperature, top_k):
|
|
| 91 |
top_k=top_k
|
| 92 |
)
|
| 93 |
|
| 94 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
|
| 96 |
-
#
|
| 97 |
-
sampling_rate
|
| 98 |
-
|
| 99 |
-
# Convert audio tensor to numpy array
|
| 100 |
-
audio_data = audio_values[0, 0].cpu().numpy()
|
| 101 |
-
|
| 102 |
-
# Normalize audio
|
| 103 |
-
audio_data = audio_data / np.max(np.abs(audio_data)) * 0.9
|
| 104 |
-
|
| 105 |
-
# Convert to 16-bit PCM format
|
| 106 |
-
audio_data = (audio_data * 32767).astype(np.int16)
|
| 107 |
-
|
| 108 |
-
# Write to file
|
| 109 |
-
write(temp_file.name, sampling_rate, audio_data)
|
| 110 |
|
| 111 |
return temp_file.name
|
| 112 |
|
|
|
|
| 91 |
top_k=top_k
|
| 92 |
)
|
| 93 |
|
| 94 |
+
# Get sampling rate from model config
|
| 95 |
+
sampling_rate = model.config.audio_encoder.sample_rate
|
| 96 |
+
|
| 97 |
+
# Convert audio tensor to numpy array
|
| 98 |
+
# MusicGen outputs audio in stereo (2 channels)
|
| 99 |
+
audio_data = audio_values[0, 0].cpu().numpy()
|
| 100 |
+
|
| 101 |
+
# Reshape to stereo format if needed
|
| 102 |
+
if len(audio_data.shape) == 1:
|
| 103 |
+
# Mono to stereo conversion
|
| 104 |
+
audio_data = np.stack([audio_data, audio_data], axis=0)
|
| 105 |
+
elif audio_data.shape[0] == 1:
|
| 106 |
+
# Single channel to stereo
|
| 107 |
+
audio_data = np.concatenate([audio_data, audio_data], axis=0)
|
| 108 |
+
|
| 109 |
+
# Normalize audio to 16-bit range
|
| 110 |
+
audio_data = audio_data / np.max(np.abs(audio_data)) * 0.9
|
| 111 |
+
audio_data = (audio_data * 32767).astype(np.int16)
|
| 112 |
+
|
| 113 |
+
# Create temporary file
|
| 114 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
|
| 115 |
+
# Write stereo audio
|
| 116 |
+
write(temp_file.name, sampling_rate, audio_data.T) # Transpose for stereo format
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
return temp_file.name
|
| 119 |
|