AiCoderv2 commited on
Commit
91fd009
·
verified ·
1 Parent(s): b36751d

Update app.py from anycoder

Browse files
Files changed (1) hide show
  1. app.py +22 -15
app.py CHANGED
@@ -91,22 +91,29 @@ def generate_music(prompt, duration, temperature, top_k):
91
  top_k=top_k
92
  )
93
 
94
- # Convert to audio file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
96
- # Get sampling rate from model config
97
- sampling_rate = model.config.audio_encoder.sample_rate
98
-
99
- # Convert audio tensor to numpy array
100
- audio_data = audio_values[0, 0].cpu().numpy()
101
-
102
- # Normalize audio
103
- audio_data = audio_data / np.max(np.abs(audio_data)) * 0.9
104
-
105
- # Convert to 16-bit PCM format
106
- audio_data = (audio_data * 32767).astype(np.int16)
107
-
108
- # Write to file
109
- write(temp_file.name, sampling_rate, audio_data)
110
 
111
  return temp_file.name
112
 
 
91
  top_k=top_k
92
  )
93
 
94
+ # Get sampling rate from model config
95
+ sampling_rate = model.config.audio_encoder.sample_rate
96
+
97
+ # Convert audio tensor to numpy array
98
+ # MusicGen outputs audio in stereo (2 channels)
99
+ audio_data = audio_values[0, 0].cpu().numpy()
100
+
101
+ # Reshape to stereo format if needed
102
+ if len(audio_data.shape) == 1:
103
+ # Mono to stereo conversion
104
+ audio_data = np.stack([audio_data, audio_data], axis=0)
105
+ elif audio_data.shape[0] == 1:
106
+ # Single channel to stereo
107
+ audio_data = np.concatenate([audio_data, audio_data], axis=0)
108
+
109
+ # Normalize audio to 16-bit range
110
+ audio_data = audio_data / np.max(np.abs(audio_data)) * 0.9
111
+ audio_data = (audio_data * 32767).astype(np.int16)
112
+
113
+ # Create temporary file
114
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
115
+ # Write stereo audio
116
+ write(temp_file.name, sampling_rate, audio_data.T) # Transpose for stereo format
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
  return temp_file.name
119