Spaces:
Sleeping
Sleeping
Commit
·
6e56362
1
Parent(s):
1277288
loudness issues
Browse files
app.py
CHANGED
|
@@ -163,7 +163,6 @@ def continue_music(input_audio_path, prompt_duration, musicgen_model, output_dur
|
|
| 163 |
|
| 164 |
# Load original audio as AudioSegment for easier manipulation
|
| 165 |
original_audio = AudioSegment.from_wav(input_audio_path)
|
| 166 |
-
current_audio = original_audio
|
| 167 |
file_paths_for_cleanup = []
|
| 168 |
|
| 169 |
# Get the last `prompt_duration` seconds as the prompt
|
|
@@ -183,10 +182,11 @@ def continue_music(input_audio_path, prompt_duration, musicgen_model, output_dur
|
|
| 183 |
if len(output.size()) > 2:
|
| 184 |
output = output.squeeze()
|
| 185 |
|
| 186 |
-
# Save the generated audio
|
| 187 |
filename_without_extension = f'continue_extension_{random.randint(1000, 9999)}'
|
| 188 |
filename_with_extension = f'{filename_without_extension}.wav'
|
| 189 |
-
audio_write(filename_without_extension, output, model_continue.sample_rate,
|
|
|
|
| 190 |
|
| 191 |
# Handle the double .wav extension issue
|
| 192 |
correct_filename = f'{filename_without_extension}.wav.wav'
|
|
@@ -197,10 +197,25 @@ def continue_music(input_audio_path, prompt_duration, musicgen_model, output_dur
|
|
| 197 |
generated_audio_segment = AudioSegment.from_wav(filename_with_extension)
|
| 198 |
file_paths_for_cleanup.append(filename_with_extension)
|
| 199 |
|
| 200 |
-
#
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
combined_audio_filename = f"extended_audio_{random.randint(1000, 9999)}.wav"
|
| 205 |
combined_audio.export(combined_audio_filename, format="wav")
|
| 206 |
|
|
|
|
| 163 |
|
| 164 |
# Load original audio as AudioSegment for easier manipulation
|
| 165 |
original_audio = AudioSegment.from_wav(input_audio_path)
|
|
|
|
| 166 |
file_paths_for_cleanup = []
|
| 167 |
|
| 168 |
# Get the last `prompt_duration` seconds as the prompt
|
|
|
|
| 182 |
if len(output.size()) > 2:
|
| 183 |
output = output.squeeze()
|
| 184 |
|
| 185 |
+
# Save the generated audio WITHOUT aggressive loudness processing
|
| 186 |
filename_without_extension = f'continue_extension_{random.randint(1000, 9999)}'
|
| 187 |
filename_with_extension = f'{filename_without_extension}.wav'
|
| 188 |
+
audio_write(filename_without_extension, output, model_continue.sample_rate,
|
| 189 |
+
strategy="clip") # Just prevent clipping, no loudness changes
|
| 190 |
|
| 191 |
# Handle the double .wav extension issue
|
| 192 |
correct_filename = f'{filename_without_extension}.wav.wav'
|
|
|
|
| 197 |
generated_audio_segment = AudioSegment.from_wav(filename_with_extension)
|
| 198 |
file_paths_for_cleanup.append(filename_with_extension)
|
| 199 |
|
| 200 |
+
# VOLUME MATCHING: Apply consistent normalization
|
| 201 |
+
|
| 202 |
+
# 1. Remove prompt duration from original (no overlap)
|
| 203 |
+
prompt_duration_ms = int(prompt_duration * 1000)
|
| 204 |
+
original_minus_prompt = original_audio[:-prompt_duration_ms]
|
| 205 |
+
|
| 206 |
+
# 2. Normalize both segments to same peak level
|
| 207 |
+
target_peak_dbfs = -6.0 # Professional level with headroom
|
| 208 |
+
|
| 209 |
+
# Normalize original segment
|
| 210 |
+
original_normalized = original_minus_prompt.normalize(headroom=abs(target_peak_dbfs))
|
| 211 |
+
|
| 212 |
+
# Normalize generated segment
|
| 213 |
+
generated_normalized = generated_audio_segment.normalize(headroom=abs(target_peak_dbfs))
|
| 214 |
+
|
| 215 |
+
# 3. Combine seamlessly
|
| 216 |
+
combined_audio = original_normalized + generated_normalized
|
| 217 |
+
|
| 218 |
+
# Save final result
|
| 219 |
combined_audio_filename = f"extended_audio_{random.randint(1000, 9999)}.wav"
|
| 220 |
combined_audio.export(combined_audio_filename, format="wav")
|
| 221 |
|