thecollabagepatch commited on
Commit
5570342
Β·
1 Parent(s): 09b4773

ok herewego

Browse files
Files changed (1) hide show
  1. app.py +267 -17
app.py CHANGED
@@ -3,10 +3,24 @@ import spaces
3
  import torchaudio
4
  from audiocraft.models import MusicGen
5
  from audiocraft.data.audio import audio_write
 
 
 
 
 
 
6
  import random
7
 
 
 
 
 
 
 
 
 
8
  @spaces.GPU
9
- def generate_drum_sample():
10
  model = MusicGen.get_pretrained('pharoAIsanders420/micro-musicgen-jungle')
11
  model.set_generation_params(duration=10)
12
  wav = model.generate_unconditional(1).squeeze(0)
@@ -18,21 +32,257 @@ def generate_drum_sample():
18
 
19
  return filename_with_extension
20
 
21
- # Use gr.Interface instead of gr.Blocks - this bypasses the click handler issue
22
- demo = gr.Interface(
23
- fn=generate_drum_sample,
24
- inputs=None, # No inputs for generation function
25
- outputs=gr.Audio(label="Generated Drum Sample", type="filepath"),
26
- title="🎰 Micro Slot Machine",
27
- description="Generate jungle drums using Aaron's micro-musicgen model",
28
- article="""
29
- ## musicgen_micro
30
- musicgen micro is an experimental series of models by aaron abebe. they are incredibly fast, and extra insane.
31
- [Aaron's GitHub](https://github.com/aaronabebe/) | [Model on HuggingFace](https://huggingface.co/pharoAIsanders420/micro-musicgen-jungle)
32
- """,
33
- examples=[], # You can add examples here if needed
34
- cache_examples=False
35
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  if __name__ == "__main__":
38
- demo.launch()
 
3
  import torchaudio
4
  from audiocraft.models import MusicGen
5
  from audiocraft.data.audio import audio_write
6
+ import tempfile
7
+ import os
8
+ import logging
9
+ import torch
10
+ from pydub import AudioSegment
11
+ import io
12
  import random
13
 
14
+ # Check if CUDA is available
15
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
+
17
+ def preprocess_audio(waveform):
18
+ waveform_np = waveform.cpu().squeeze().numpy()
19
+ return torch.from_numpy(waveform_np).unsqueeze(0).to(device)
20
+
21
+ # Test with a wrapper function
22
  @spaces.GPU
23
+ def _generate_drum_sample_internal():
24
  model = MusicGen.get_pretrained('pharoAIsanders420/micro-musicgen-jungle')
25
  model.set_generation_params(duration=10)
26
  wav = model.generate_unconditional(1).squeeze(0)
 
32
 
33
  return filename_with_extension
34
 
35
+ # Regular function wrapper (no @spaces.GPU on this one)
36
+ def generate_drum_sample():
37
+ return _generate_drum_sample_internal()
38
+
39
+
40
+
41
+ # @spaces.GPU
42
+ # def continue_drum_sample(existing_audio_path):
43
+ # if existing_audio_path is None:
44
+ # return None
45
+
46
+ # existing_audio, sr = torchaudio.load(existing_audio_path)
47
+ # existing_audio = existing_audio.to(device)
48
+
49
+ # prompt_duration = 2
50
+ # output_duration = 10
51
+
52
+ # num_samples = int(prompt_duration * sr)
53
+ # if existing_audio.shape[1] < num_samples:
54
+ # raise ValueError("The existing audio is too short for the specified prompt duration.")
55
+
56
+ # start_sample = existing_audio.shape[1] - num_samples
57
+ # prompt_waveform = existing_audio[..., start_sample:]
58
+
59
+ # model = MusicGen.get_pretrained('pharoAIsanders420/micro-musicgen-jungle')
60
+ # model.set_generation_params(duration=output_duration)
61
+
62
+ # output = model.generate_continuation(prompt_waveform, prompt_sample_rate=sr, progress=True)
63
+ # output = output.to(device)
64
+
65
+ # if output.dim() == 3:
66
+ # output = output.squeeze(0)
67
+
68
+ # if output.dim() == 1:
69
+ # output = output.unsqueeze(0)
70
+
71
+ # combined_audio = torch.cat((existing_audio, output), dim=1)
72
+ # combined_audio = combined_audio.cpu()
73
+
74
+ # combined_file_path = f'./continued_jungle_{random.randint(1000, 9999)}.wav'
75
+ # torchaudio.save(combined_file_path, combined_audio, sr)
76
+
77
+ # return combined_file_path
78
+
79
+ # @spaces.GPU
80
+ # def generate_music(wav_filename, prompt_duration, musicgen_model, output_duration):
81
+ # if wav_filename is None:
82
+ # return None
83
+
84
+ # song, sr = torchaudio.load(wav_filename)
85
+ # song = song.to(device)
86
+
87
+ # model_name = musicgen_model.split(" ")[0]
88
+ # model_continue = MusicGen.get_pretrained(model_name)
89
+
90
+ # model_continue.set_generation_params(
91
+ # use_sampling=True,
92
+ # top_k=250,
93
+ # top_p=0.0,
94
+ # temperature=1.0,
95
+ # duration=output_duration,
96
+ # cfg_coef=3
97
+ # )
98
+
99
+ # prompt_waveform = song[..., :int(prompt_duration * sr)]
100
+ # prompt_waveform = preprocess_audio(prompt_waveform)
101
+
102
+ # output = model_continue.generate_continuation(prompt_waveform, prompt_sample_rate=sr, progress=True)
103
+ # output = output.cpu()
104
+
105
+ # if len(output.size()) > 2:
106
+ # output = output.squeeze()
107
+
108
+ # filename_without_extension = f'continued_music'
109
+ # filename_with_extension = f'{filename_without_extension}.wav'
110
+ # audio_write(filename_without_extension, output, model_continue.sample_rate, strategy="loudness", loudness_compressor=True)
111
+
112
+ # return filename_with_extension
113
+
114
+ # @spaces.GPU
115
+ # def continue_music(input_audio_path, prompt_duration, musicgen_model, output_duration):
116
+ # if input_audio_path is None:
117
+ # return None
118
+
119
+ # song, sr = torchaudio.load(input_audio_path)
120
+ # song = song.to(device)
121
+
122
+ # model_continue = MusicGen.get_pretrained(musicgen_model.split(" ")[0])
123
+ # model_continue.set_generation_params(
124
+ # use_sampling=True,
125
+ # top_k=250,
126
+ # top_p=0.0,
127
+ # temperature=1.0,
128
+ # duration=output_duration,
129
+ # cfg_coef=3
130
+ # )
131
+
132
+ # original_audio = AudioSegment.from_mp3(input_audio_path)
133
+ # current_audio = original_audio
134
+
135
+ # file_paths_for_cleanup = []
136
+
137
+ # for i in range(1):
138
+ # num_samples = int(prompt_duration * sr)
139
+ # if current_audio.duration_seconds * 1000 < prompt_duration * 1000:
140
+ # raise ValueError("The prompt_duration is longer than the current audio length.")
141
+
142
+ # start_time = current_audio.duration_seconds * 1000 - prompt_duration * 1000
143
+ # prompt_audio = current_audio[start_time:]
144
+
145
+ # prompt_bytes = prompt_audio.export(format="wav").read()
146
+ # prompt_waveform, _ = torchaudio.load(io.BytesIO(prompt_bytes))
147
+ # prompt_waveform = prompt_waveform.to(device)
148
+
149
+ # prompt_waveform = preprocess_audio(prompt_waveform)
150
+
151
+ # output = model_continue.generate_continuation(prompt_waveform, prompt_sample_rate=sr, progress=True)
152
+ # output = output.cpu()
153
+
154
+ # if len(output.size()) > 2:
155
+ # output = output.squeeze()
156
+
157
+ # filename_without_extension = f'continue_{i}'
158
+ # filename_with_extension = f'{filename_without_extension}.wav'
159
+ # correct_filename_extension = f'{filename_without_extension}.wav.wav'
160
+
161
+ # audio_write(filename_with_extension, output, model_continue.sample_rate, strategy="loudness", loudness_compressor=True)
162
+ # generated_audio_segment = AudioSegment.from_wav(correct_filename_extension)
163
+
164
+ # current_audio = current_audio[:start_time] + generated_audio_segment
165
+
166
+ # file_paths_for_cleanup.append(correct_filename_extension)
167
+
168
+ # combined_audio_filename = f"combined_audio_{random.randint(1, 10000)}.mp3"
169
+ # current_audio.export(combined_audio_filename, format="mp3")
170
+
171
+ # for file_path in file_paths_for_cleanup:
172
+ # os.remove(file_path)
173
+
174
+ # return combined_audio_filename
175
+
176
+ # Define the expandable sections
177
+ musicgen_micro_blurb = """
178
+ ## musicgen_micro
179
+ musicgen micro is an experimental series of models by aaron abebe. they are incredibly fast, and extra insane. this one does goated jungle drums. we're very excited about these.
180
+ [<img src="https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png" alt="GitHub" width="20" style="vertical-align:middle"> aaron's github](https://github.com/aaronabebe/)
181
+ [<img src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg" alt="Hugging Face" width="20" style="vertical-align:middle"> musicgen-micro on huggingface](https://huggingface.co/pharoAIsanders420/micro-musicgen-jungle)
182
+ """
183
+
184
+ musicgen_blurb = """
185
+ ## musicgen
186
+ musicgen is a transformer-based music model that generates audio. It can also do something called a continuation, which was initially meant to extend musicgen outputs beyond 30 seconds. it can be used with any input audio to produce surprising results.
187
+ [<img src="https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png" alt="GitHub" width="20" style="vertical-align:middle"> audiocraft github](https://github.com/facebookresearch/audiocraft)
188
+ visit https://thecollabagepatch.com/infinitepolo.mp3 or https://thecollabagepatch.com/audiocraft.mp3 to hear continuations in action.
189
+ see also https://youtube.com/@thecollabagepatch
190
+ """
191
+
192
+ finetunes_blurb = """
193
+ ## fine-tuned models
194
+ the fine-tunes hosted on the huggingface hub are provided collectively by the musicgen discord community. thanks to vanya, mj, hoenn, septicDNB and of course, lyra.
195
+ [<img src="https://cdn.iconscout.com/icon/free/png-256/discord-3691244-3073764.png" alt="Discord" width="20" style="vertical-align:middle"> musicgen discord](https://discord.gg/93kX8rGZ)
196
+ [<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" style="vertical-align:middle"> fine-tuning colab notebook by lyra](https://colab.research.google.com/drive/13tbcC3A42KlaUZ21qvUXd25SFLu8WIvb)
197
+ """
198
+
199
+ fine_tunes_info = """
200
+ ## thepatch/vanya_ai_dnb_0.1
201
+ thepatch/vanya_ai_dnb_0.1 was trained by vanya. [vanya's Twitter](https://twitter.com/@veryVANYA) πŸ”— - it treats almost all input audio as the beginning of a buildup to a dnb drop (can do downtempo well)
202
+
203
+ ## thepatch/bleeps-medium
204
+ thepatch/bleeps-medium was trained by kevin and lyra [lyra's Twitter](https://twitter.com/@_lyraaaa_) πŸ”— - it is a medium model. it's more melodic and ambient sometimes than vanya's, but there's a 50/50 chance it gets real heavy with the edm vibes. It can be amazing at turning your chords into pads, and is a good percussionist.
205
+
206
+ ## thepatch/budots_remix
207
+ thepatch/budots_remix was trained by MJ BERSABEph. budots is a dope niche genre from the philippines apparently. this one will often do fascinating, demonic, kinds of vocal chopping. warning: it tends to speed up and slow down tempo, which makes it hard to use in a daw.
208
+
209
+ ## thepatch/hoenn_lofi
210
+ thepatch/hoenn_lofi is a large fine-tune by hoenn. [hoenn's Twitter](https://twitter.com/@eschatolocation) πŸ”— - this model is a large boi, and it shows. even tho it is trained to do lo-fi, its ability to run with your melodies and not ruin them is unparalleled among the fine-tunes so far.
211
+
212
+ ## thepatch/PhonkV2
213
+ thepatch/PhonkV2 was trained by MJ BERSABEph. there are multiple versions in the discord.
214
+
215
+ ## foureyednymph/musicgen-sza-sos-small
216
+ foureyednymph/musicgen-sza-sos-small was just trained by foureyednymph. We're all about to find out if it does continuations well.
217
+ """
218
+
219
+ # Create the Gradio interface
220
+ with gr.Blocks() as iface:
221
+ gr.Markdown("# the-micro-slot-machine")
222
+ gr.Markdown("two ai's jamming. warning: outputs will be very strange, likely stupid, and possibly rad.")
223
+ gr.Markdown("this is an even weirder slot machine than the other one. on the left, you get to generate some state of the art lo-fi jungle drums at incredible speed thanks to aaron's new class of model, and if you want you can have it continue its own output. Then, you can either press the generate_music button to use the first 5 seconds as a prompt, or you can re-upload the audio into the continue_music section to have a fine-tune continue from the end of the jungle drum output, however long and insane it is. think of this as a very weird relay race and you're winning.")
224
+
225
+ with gr.Accordion("more info", open=False):
226
+ gr.Markdown(musicgen_micro_blurb)
227
+ gr.Markdown(musicgen_blurb)
228
+ gr.Markdown(finetunes_blurb)
229
+
230
+ with gr.Accordion("fine-tunes info", open=False):
231
+ gr.Markdown(fine_tunes_info)
232
+
233
+ with gr.Row():
234
+ with gr.Column():
235
+ generate_button = gr.Button("Generate Drum Sample")
236
+ drum_audio = gr.Audio(
237
+ label="Generated Drum Sample",
238
+ type="filepath",
239
+ interactive=True,
240
+ show_download_button=True
241
+ )
242
+ continue_drum_sample_button = gr.Button("Continue Drum Sample")
243
+
244
+ with gr.Column():
245
+ prompt_duration = gr.Dropdown(
246
+ label="Prompt Duration (seconds)",
247
+ choices=list(range(1, 11)),
248
+ value=5
249
+ )
250
+ output_duration = gr.Slider(
251
+ label="Output Duration (seconds)",
252
+ minimum=10,
253
+ maximum=30,
254
+ step=1,
255
+ value=20
256
+ )
257
+ musicgen_model = gr.Dropdown(
258
+ label="MusicGen Model",
259
+ choices=[
260
+ "thepatch/vanya_ai_dnb_0.1 (small)",
261
+ "thepatch/budots_remix (small)",
262
+ "thepatch/PhonkV2 (small)",
263
+ "thepatch/bleeps-medium (medium)",
264
+ "thepatch/hoenn_lofi (large)",
265
+ "foureyednymph/musicgen-sza-sos-small (small)"
266
+ ],
267
+ value="thepatch/vanya_ai_dnb_0.1 (small)"
268
+ )
269
+ generate_music_button = gr.Button("Generate Music")
270
+ output_audio = gr.Audio(label="Generated Music", type="filepath")
271
+ continue_button = gr.Button("Continue Generating Music")
272
+ continue_output_audio = gr.Audio(label="Continued Music Output", type="filepath")
273
+
274
+ # Hidden component to provide dummy input
275
+ hidden_trigger = gr.Textbox(value="generate", visible=False)
276
+
277
+ # Fixed click handlers - use hidden input for generate_drum_sample
278
+ # Normal click connection
279
+ generate_button.click(
280
+ lambda: generate_drum_sample(),
281
+ outputs=[drum_audio]
282
+ )
283
+ # continue_drum_sample_button.click(continue_drum_sample, inputs=[drum_audio], outputs=[drum_audio])
284
+ # generate_music_button.click(generate_music, inputs=[drum_audio, prompt_duration, musicgen_model, output_duration], outputs=[output_audio])
285
+ # continue_button.click(continue_music, inputs=[output_audio, prompt_duration, musicgen_model, output_duration], outputs=continue_output_audio)
286
 
287
  if __name__ == "__main__":
288
+ iface.launch()