Spaces:
Sleeping
Sleeping
Commit
Β·
1aa3245
1
Parent(s):
8f10b7a
melodyflow experiment
Browse files
app.py
CHANGED
|
@@ -5,20 +5,57 @@ from audiocraft.models import MusicGen
|
|
| 5 |
from audiocraft.data.audio import audio_write
|
| 6 |
import tempfile
|
| 7 |
import os
|
| 8 |
-
import logging
|
| 9 |
import torch
|
| 10 |
-
from
|
| 11 |
-
import io
|
| 12 |
import random
|
|
|
|
| 13 |
|
| 14 |
# Check if CUDA is available
|
| 15 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
def preprocess_audio(waveform):
|
| 18 |
waveform_np = waveform.cpu().squeeze().numpy()
|
| 19 |
return torch.from_numpy(waveform_np).unsqueeze(0).to(device)
|
| 20 |
|
| 21 |
-
|
|
|
|
|
|
|
| 22 |
def generate_drum_sample():
|
| 23 |
model = MusicGen.get_pretrained('pharoAIsanders420/micro-musicgen-jungle')
|
| 24 |
model.set_generation_params(duration=10)
|
|
@@ -28,10 +65,9 @@ def generate_drum_sample():
|
|
| 28 |
filename_with_extension = f'{filename_without_extension}.wav'
|
| 29 |
|
| 30 |
audio_write(filename_without_extension, wav.cpu(), model.sample_rate, strategy="loudness", loudness_compressor=True)
|
| 31 |
-
|
| 32 |
return filename_with_extension
|
| 33 |
|
| 34 |
-
@spaces.GPU
|
| 35 |
def continue_drum_sample(existing_audio_path):
|
| 36 |
if existing_audio_path is None:
|
| 37 |
return None
|
|
@@ -57,7 +93,6 @@ def continue_drum_sample(existing_audio_path):
|
|
| 57 |
|
| 58 |
if output.dim() == 3:
|
| 59 |
output = output.squeeze(0)
|
| 60 |
-
|
| 61 |
if output.dim() == 1:
|
| 62 |
output = output.unsqueeze(0)
|
| 63 |
|
|
@@ -66,10 +101,9 @@ def continue_drum_sample(existing_audio_path):
|
|
| 66 |
|
| 67 |
combined_file_path = f'./continued_jungle_{random.randint(1000, 9999)}.wav'
|
| 68 |
torchaudio.save(combined_file_path, combined_audio, sr)
|
| 69 |
-
|
| 70 |
return combined_file_path
|
| 71 |
|
| 72 |
-
@spaces.GPU
|
| 73 |
def generate_music(wav_filename, prompt_duration, musicgen_model, output_duration):
|
| 74 |
if wav_filename is None:
|
| 75 |
return None
|
|
@@ -104,136 +138,131 @@ def generate_music(wav_filename, prompt_duration, musicgen_model, output_duratio
|
|
| 104 |
|
| 105 |
return filename_with_extension
|
| 106 |
|
| 107 |
-
|
| 108 |
-
def continue_music(input_audio_path, prompt_duration, musicgen_model, output_duration):
|
| 109 |
-
if input_audio_path is None:
|
| 110 |
-
return None
|
| 111 |
-
|
| 112 |
-
song, sr = torchaudio.load(input_audio_path)
|
| 113 |
-
song = song.to(device)
|
| 114 |
-
|
| 115 |
-
model_continue = MusicGen.get_pretrained(musicgen_model.split(" ")[0])
|
| 116 |
-
model_continue.set_generation_params(
|
| 117 |
-
use_sampling=True,
|
| 118 |
-
top_k=250,
|
| 119 |
-
top_p=0.0,
|
| 120 |
-
temperature=1.0,
|
| 121 |
-
duration=output_duration,
|
| 122 |
-
cfg_coef=3
|
| 123 |
-
)
|
| 124 |
-
|
| 125 |
-
original_audio = AudioSegment.from_mp3(input_audio_path)
|
| 126 |
-
current_audio = original_audio
|
| 127 |
-
|
| 128 |
-
file_paths_for_cleanup = []
|
| 129 |
-
|
| 130 |
-
for i in range(1):
|
| 131 |
-
num_samples = int(prompt_duration * sr)
|
| 132 |
-
if current_audio.duration_seconds * 1000 < prompt_duration * 1000:
|
| 133 |
-
raise ValueError("The prompt_duration is longer than the current audio length.")
|
| 134 |
-
|
| 135 |
-
start_time = current_audio.duration_seconds * 1000 - prompt_duration * 1000
|
| 136 |
-
prompt_audio = current_audio[start_time:]
|
| 137 |
-
|
| 138 |
-
prompt_bytes = prompt_audio.export(format="wav").read()
|
| 139 |
-
prompt_waveform, _ = torchaudio.load(io.BytesIO(prompt_bytes))
|
| 140 |
-
prompt_waveform = prompt_waveform.to(device)
|
| 141 |
-
|
| 142 |
-
prompt_waveform = preprocess_audio(prompt_waveform)
|
| 143 |
-
|
| 144 |
-
output = model_continue.generate_continuation(prompt_waveform, prompt_sample_rate=sr, progress=True)
|
| 145 |
-
output = output.cpu()
|
| 146 |
-
|
| 147 |
-
if len(output.size()) > 2:
|
| 148 |
-
output = output.squeeze()
|
| 149 |
-
|
| 150 |
-
filename_without_extension = f'continue_{i}'
|
| 151 |
-
filename_with_extension = f'{filename_without_extension}.wav'
|
| 152 |
-
correct_filename_extension = f'{filename_without_extension}.wav.wav'
|
| 153 |
-
|
| 154 |
-
audio_write(filename_with_extension, output, model_continue.sample_rate, strategy="loudness", loudness_compressor=True)
|
| 155 |
-
generated_audio_segment = AudioSegment.from_wav(correct_filename_extension)
|
| 156 |
-
|
| 157 |
-
current_audio = current_audio[:start_time] + generated_audio_segment
|
| 158 |
-
|
| 159 |
-
file_paths_for_cleanup.append(correct_filename_extension)
|
| 160 |
-
|
| 161 |
-
combined_audio_filename = f"combined_audio_{random.randint(1, 10000)}.mp3"
|
| 162 |
-
current_audio.export(combined_audio_filename, format="mp3")
|
| 163 |
-
|
| 164 |
-
for file_path in file_paths_for_cleanup:
|
| 165 |
-
os.remove(file_path)
|
| 166 |
-
|
| 167 |
-
return combined_audio_filename
|
| 168 |
-
|
| 169 |
-
# Define the expandable sections (keeping your existing content)
|
| 170 |
-
musicgen_micro_blurb = """
|
| 171 |
-
## musicgen_micro
|
| 172 |
-
musicgen micro is an experimental series of models by aaron abebe. they are incredibly fast, and extra insane. this one does goated jungle drums. we're very excited about these.
|
| 173 |
-
[<img src="https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png" alt="GitHub" width="20" style="vertical-align:middle"> aaron's github](https://github.com/aaronabebe/)
|
| 174 |
-
[<img src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg" alt="Hugging Face" width="20" style="vertical-align:middle"> musicgen-micro on huggingface](https://huggingface.co/pharoAIsanders420/micro-musicgen-jungle)
|
| 175 |
-
"""
|
| 176 |
-
|
| 177 |
-
musicgen_blurb = """
|
| 178 |
-
## musicgen
|
| 179 |
-
musicgen is a transformer-based music model that generates audio. It can also do something called a continuation, which was initially meant to extend musicgen outputs beyond 30 seconds. it can be used with any input audio to produce surprising results.
|
| 180 |
-
[<img src="https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png" alt="GitHub" width="20" style="vertical-align:middle"> audiocraft github](https://github.com/facebookresearch/audiocraft)
|
| 181 |
-
visit https://thecollabagepatch.com/infinitepolo.mp3 or https://thecollabagepatch.com/audiocraft.mp3 to hear continuations in action.
|
| 182 |
-
see also https://youtube.com/@thecollabagepatch
|
| 183 |
-
"""
|
| 184 |
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
with gr.Blocks() as iface:
|
| 214 |
-
gr.Markdown("#
|
| 215 |
-
gr.Markdown("
|
| 216 |
-
gr.Markdown("
|
| 217 |
|
| 218 |
-
with gr.Accordion("
|
| 219 |
-
gr.Markdown(
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
|
|
|
|
|
|
|
|
|
|
| 226 |
with gr.Row():
|
| 227 |
with gr.Column():
|
| 228 |
-
generate_button = gr.Button("Generate
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 234 |
)
|
| 235 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 236 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 237 |
with gr.Column():
|
| 238 |
prompt_duration = gr.Dropdown(
|
| 239 |
label="Prompt Duration (seconds)",
|
|
@@ -247,6 +276,8 @@ with gr.Blocks() as iface:
|
|
| 247 |
step=1,
|
| 248 |
value=20
|
| 249 |
)
|
|
|
|
|
|
|
| 250 |
musicgen_model = gr.Dropdown(
|
| 251 |
label="MusicGen Model",
|
| 252 |
choices=[
|
|
@@ -259,16 +290,28 @@ with gr.Blocks() as iface:
|
|
| 259 |
],
|
| 260 |
value="thepatch/vanya_ai_dnb_0.1 (small)"
|
| 261 |
)
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
continue_button = gr.Button("Continue Generating Music")
|
| 265 |
-
continue_output_audio = gr.Audio(label="Continued Music Output", type="filepath")
|
| 266 |
|
| 267 |
-
#
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
|
| 273 |
if __name__ == "__main__":
|
| 274 |
iface.launch()
|
|
|
|
| 5 |
from audiocraft.data.audio import audio_write
|
| 6 |
import tempfile
|
| 7 |
import os
|
|
|
|
| 8 |
import torch
|
| 9 |
+
from gradio_client import Client, handle_file
|
|
|
|
| 10 |
import random
|
| 11 |
+
import time
|
| 12 |
|
| 13 |
# Check if CUDA is available
|
| 14 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 15 |
|
| 16 |
+
# MelodyFlow variation mapping - map your semantic variations to text prompts
|
| 17 |
+
VARIATION_PROMPTS = {
|
| 18 |
+
'accordion_folk': 'folk accordion melody with traditional folk instruments',
|
| 19 |
+
'banjo_bluegrass': 'bluegrass banjo with country folk instruments',
|
| 20 |
+
'piano_classical': 'classical piano with orchestral arrangement',
|
| 21 |
+
'celtic': 'celtic harp and flute with traditional irish instruments',
|
| 22 |
+
'strings_quartet': 'string quartet with violin, viola, cello arrangement',
|
| 23 |
+
'synth_retro': 'retro 80s synthesizer with vintage electronic sounds',
|
| 24 |
+
'synth_modern': 'modern synthesizer with contemporary electronic production',
|
| 25 |
+
'synth_edm': 'edm synthesizer with dance electronic beats',
|
| 26 |
+
'lofi_chill': 'lo-fi chill with relaxed jazz hip-hop elements',
|
| 27 |
+
'synth_bass': 'heavy bass synthesizer with sub-bass frequencies',
|
| 28 |
+
'rock_band': 'rock band with electric guitar, bass, and drums',
|
| 29 |
+
'cinematic_epic': 'cinematic epic orchestral with dramatic strings and brass',
|
| 30 |
+
'retro_rpg': 'retro rpg chiptune with 8-bit game music elements',
|
| 31 |
+
'chiptune': '8-bit chiptune with retro video game sounds',
|
| 32 |
+
'steel_drums': 'steel drums with caribbean tropical percussion',
|
| 33 |
+
'gamelan_fusion': 'gamelan fusion with indonesian percussion instruments',
|
| 34 |
+
'music_box': 'music box with delicate mechanical melody',
|
| 35 |
+
'trap_808': 'trap beats with heavy 808 drums and hi-hats',
|
| 36 |
+
'lo_fi_drums': 'lo-fi drums with vinyl crackle and jazz samples',
|
| 37 |
+
'boom_bap': 'boom bap hip-hop with classic drum breaks',
|
| 38 |
+
'percussion_ensemble': 'percussion ensemble with varied drum instruments',
|
| 39 |
+
'future_bass': 'future bass with melodic drops and vocal chops',
|
| 40 |
+
'synthwave_retro': 'synthwave retro with neon 80s aesthetic',
|
| 41 |
+
'melodic_techno': 'melodic techno with driving beats and emotional melodies',
|
| 42 |
+
'dubstep_wobble': 'dubstep with heavy wobble bass and electronic drops',
|
| 43 |
+
'glitch_hop': 'glitch hop with broken beats and digital artifacts',
|
| 44 |
+
'digital_disruption': 'digital disruption with glitchy electronic effects',
|
| 45 |
+
'circuit_bent': 'circuit bent with broken electronic hardware sounds',
|
| 46 |
+
'orchestral_glitch': 'orchestral glitch with classical instruments and digital errors',
|
| 47 |
+
'vapor_drums': 'vaporwave drums with slowed down nostalgic beats',
|
| 48 |
+
'industrial_textures': 'industrial textures with harsh mechanical sounds',
|
| 49 |
+
'jungle_breaks': 'jungle breaks with fast drum and bass rhythms'
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
def preprocess_audio(waveform):
|
| 53 |
waveform_np = waveform.cpu().squeeze().numpy()
|
| 54 |
return torch.from_numpy(waveform_np).unsqueeze(0).to(device)
|
| 55 |
|
| 56 |
+
# ========== MUSICGEN FUNCTIONS (Local ZeroGPU) ==========
|
| 57 |
+
|
| 58 |
+
@spaces.GPU
|
| 59 |
def generate_drum_sample():
|
| 60 |
model = MusicGen.get_pretrained('pharoAIsanders420/micro-musicgen-jungle')
|
| 61 |
model.set_generation_params(duration=10)
|
|
|
|
| 65 |
filename_with_extension = f'{filename_without_extension}.wav'
|
| 66 |
|
| 67 |
audio_write(filename_without_extension, wav.cpu(), model.sample_rate, strategy="loudness", loudness_compressor=True)
|
|
|
|
| 68 |
return filename_with_extension
|
| 69 |
|
| 70 |
+
@spaces.GPU
|
| 71 |
def continue_drum_sample(existing_audio_path):
|
| 72 |
if existing_audio_path is None:
|
| 73 |
return None
|
|
|
|
| 93 |
|
| 94 |
if output.dim() == 3:
|
| 95 |
output = output.squeeze(0)
|
|
|
|
| 96 |
if output.dim() == 1:
|
| 97 |
output = output.unsqueeze(0)
|
| 98 |
|
|
|
|
| 101 |
|
| 102 |
combined_file_path = f'./continued_jungle_{random.randint(1000, 9999)}.wav'
|
| 103 |
torchaudio.save(combined_file_path, combined_audio, sr)
|
|
|
|
| 104 |
return combined_file_path
|
| 105 |
|
| 106 |
+
@spaces.GPU
|
| 107 |
def generate_music(wav_filename, prompt_duration, musicgen_model, output_duration):
|
| 108 |
if wav_filename is None:
|
| 109 |
return None
|
|
|
|
| 138 |
|
| 139 |
return filename_with_extension
|
| 140 |
|
| 141 |
+
# ========== MELODYFLOW FUNCTIONS (Via Facebook Space) ==========
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
|
| 143 |
+
def transform_with_melodyflow_api(audio_path, variation, custom_prompt="", solver="euler", steps=128):
|
| 144 |
+
"""Transform audio using Facebook/MelodyFlow space API"""
|
| 145 |
+
if audio_path is None:
|
| 146 |
+
return None, "β No audio file provided"
|
| 147 |
+
|
| 148 |
+
try:
|
| 149 |
+
# Initialize client for Facebook MelodyFlow space
|
| 150 |
+
client = Client("facebook/MelodyFlow")
|
| 151 |
+
|
| 152 |
+
# Determine the prompt to use
|
| 153 |
+
if custom_prompt.strip():
|
| 154 |
+
prompt_text = custom_prompt.strip()
|
| 155 |
+
status_msg = f"β
Transformed with custom prompt: '{prompt_text}'"
|
| 156 |
+
else:
|
| 157 |
+
prompt_text = VARIATION_PROMPTS.get(variation, f"transform this audio to {variation} style")
|
| 158 |
+
status_msg = f"β
Transformed with {variation} style"
|
| 159 |
+
|
| 160 |
+
# Call the MelodyFlow API
|
| 161 |
+
result = client.predict(
|
| 162 |
+
model="facebook/melodyflow-t24-30secs",
|
| 163 |
+
text=prompt_text,
|
| 164 |
+
solver=solver,
|
| 165 |
+
steps=steps,
|
| 166 |
+
target_flowstep=0, # Default flowstep
|
| 167 |
+
regularize=False,
|
| 168 |
+
regularization_strength=0.2,
|
| 169 |
+
duration=30, # Max duration
|
| 170 |
+
melody=handle_file(audio_path),
|
| 171 |
+
api_name="/predict"
|
| 172 |
+
)
|
| 173 |
+
|
| 174 |
+
# Result is a tuple of 3 audio files (variations)
|
| 175 |
+
# We'll use the first variation
|
| 176 |
+
if result and len(result) > 0 and result[0]:
|
| 177 |
+
# Save the result locally
|
| 178 |
+
output_filename = f"melodyflow_{variation}_{random.randint(1000, 9999)}.wav"
|
| 179 |
+
|
| 180 |
+
# Copy the result file to our local filename
|
| 181 |
+
import shutil
|
| 182 |
+
shutil.copy2(result[0], output_filename)
|
| 183 |
+
|
| 184 |
+
return output_filename, status_msg
|
| 185 |
+
else:
|
| 186 |
+
return None, "β MelodyFlow API returned no results"
|
| 187 |
+
|
| 188 |
+
except Exception as e:
|
| 189 |
+
return None, f"β MelodyFlow API error: {str(e)}"
|
| 190 |
+
|
| 191 |
+
# ========== GRADIO INTERFACE ==========
|
| 192 |
+
|
| 193 |
+
# Create the interface
|
| 194 |
with gr.Blocks() as iface:
|
| 195 |
+
gr.Markdown("# π° The Mega Slot Machine")
|
| 196 |
+
gr.Markdown("**Hybrid Multi-Model Pipeline**: MicroMusicGen β MelodyFlow (via API) β MusicGen Fine-tunes")
|
| 197 |
+
gr.Markdown("*Demonstrating the workflow from our Ableton device in a web interface!*")
|
| 198 |
|
| 199 |
+
with gr.Accordion("How This Works", open=False):
|
| 200 |
+
gr.Markdown("""
|
| 201 |
+
This demo shows how multiple AI models can work together:
|
| 202 |
+
|
| 203 |
+
1. **Generate** initial audio with MicroMusicGen (super fast jungle drums)
|
| 204 |
+
2. **Transform** it using MelodyFlow (via Facebook's space API)
|
| 205 |
+
3. **Continue** with MusicGen fine-tunes (trained on specific genres)
|
| 206 |
+
4. **Repeat** the cycle to create infinite musical journeys!
|
| 207 |
+
|
| 208 |
+
The models run with different PyTorch versions, so we use the Facebook MelodyFlow space via API.
|
| 209 |
+
""")
|
| 210 |
|
| 211 |
+
# ========== STEP 1: GENERATE ==========
|
| 212 |
+
gr.Markdown("## π΅ Step 1: Generate Initial Audio")
|
| 213 |
+
|
| 214 |
with gr.Row():
|
| 215 |
with gr.Column():
|
| 216 |
+
generate_button = gr.Button("Generate Jungle Drums", variant="primary", size="lg")
|
| 217 |
+
continue_drum_button = gr.Button("Continue Drums", size="sm")
|
| 218 |
+
|
| 219 |
+
main_audio = gr.Audio(
|
| 220 |
+
label="π΅ Current Audio (flows through pipeline)",
|
| 221 |
+
type="filepath",
|
| 222 |
+
interactive=True,
|
| 223 |
+
show_download_button=True
|
| 224 |
+
)
|
| 225 |
+
|
| 226 |
+
# ========== STEP 2: TRANSFORM ==========
|
| 227 |
+
gr.Markdown("## ποΈ Step 2: Transform with MelodyFlow")
|
| 228 |
+
|
| 229 |
+
with gr.Row():
|
| 230 |
+
with gr.Column(scale=2):
|
| 231 |
+
transform_variation = gr.Dropdown(
|
| 232 |
+
label="Transform Style",
|
| 233 |
+
choices=list(VARIATION_PROMPTS.keys()),
|
| 234 |
+
value="synth_modern",
|
| 235 |
+
interactive=True
|
| 236 |
)
|
| 237 |
+
|
| 238 |
+
with gr.Column(scale=3):
|
| 239 |
+
transform_prompt = gr.Textbox(
|
| 240 |
+
label="Custom Prompt (optional)",
|
| 241 |
+
placeholder="Leave empty to use style above, or enter custom transformation prompt",
|
| 242 |
+
lines=2
|
| 243 |
+
)
|
| 244 |
+
|
| 245 |
+
with gr.Row():
|
| 246 |
+
transform_solver = gr.Dropdown(
|
| 247 |
+
label="Solver",
|
| 248 |
+
choices=["euler", "midpoint"],
|
| 249 |
+
value="euler"
|
| 250 |
+
)
|
| 251 |
+
transform_steps = gr.Slider(
|
| 252 |
+
label="Steps",
|
| 253 |
+
minimum=64,
|
| 254 |
+
maximum=256,
|
| 255 |
+
step=32,
|
| 256 |
+
value=128
|
| 257 |
+
)
|
| 258 |
+
transform_button = gr.Button("ποΈ Transform Audio", variant="secondary", size="lg")
|
| 259 |
+
|
| 260 |
+
transform_status = gr.Textbox(label="Transform Status", value="Ready to transform", interactive=False)
|
| 261 |
|
| 262 |
+
# ========== STEP 3: CONTINUE ==========
|
| 263 |
+
gr.Markdown("## πΌ Step 3: Continue with MusicGen")
|
| 264 |
+
|
| 265 |
+
with gr.Row():
|
| 266 |
with gr.Column():
|
| 267 |
prompt_duration = gr.Dropdown(
|
| 268 |
label="Prompt Duration (seconds)",
|
|
|
|
| 276 |
step=1,
|
| 277 |
value=20
|
| 278 |
)
|
| 279 |
+
|
| 280 |
+
with gr.Column():
|
| 281 |
musicgen_model = gr.Dropdown(
|
| 282 |
label="MusicGen Model",
|
| 283 |
choices=[
|
|
|
|
| 290 |
],
|
| 291 |
value="thepatch/vanya_ai_dnb_0.1 (small)"
|
| 292 |
)
|
| 293 |
+
|
| 294 |
+
generate_music_button = gr.Button("πΌ Continue with MusicGen", variant="primary", size="lg")
|
|
|
|
|
|
|
| 295 |
|
| 296 |
+
# ========== EVENT HANDLERS ==========
|
| 297 |
+
|
| 298 |
+
# Step 1: Generate
|
| 299 |
+
generate_button.click(generate_drum_sample, outputs=[main_audio])
|
| 300 |
+
continue_drum_button.click(continue_drum_sample, inputs=[main_audio], outputs=[main_audio])
|
| 301 |
+
|
| 302 |
+
# Step 2: Transform (using Facebook MelodyFlow API)
|
| 303 |
+
transform_button.click(
|
| 304 |
+
transform_with_melodyflow_api,
|
| 305 |
+
inputs=[main_audio, transform_variation, transform_prompt, transform_solver, transform_steps],
|
| 306 |
+
outputs=[main_audio, transform_status]
|
| 307 |
+
)
|
| 308 |
+
|
| 309 |
+
# Step 3: Continue
|
| 310 |
+
generate_music_button.click(
|
| 311 |
+
generate_music,
|
| 312 |
+
inputs=[main_audio, prompt_duration, musicgen_model, output_duration],
|
| 313 |
+
outputs=[main_audio]
|
| 314 |
+
)
|
| 315 |
|
| 316 |
if __name__ == "__main__":
|
| 317 |
iface.launch()
|