wsntxxn commited on
Commit
53eaf02
Β·
1 Parent(s): 79d437c

Adjust tabs

Browse files
Files changed (1) hide show
  1. app.py +81 -11
app.py CHANGED
@@ -2,6 +2,10 @@
2
 
3
  import os
4
  import gradio as gr
 
 
 
 
5
 
6
  import spaces
7
 
@@ -167,6 +171,37 @@ def speech_enhancement(
167
  return None, f"Error: {str(e)}"
168
 
169
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  @spaces.GPU(duration=60)
171
  def audio_super_resolution(
172
  low_sr_audio,
@@ -187,9 +222,18 @@ def audio_super_resolution(
187
  num_steps=num_steps,
188
  output_path=output_path
189
  )
190
- return output_path, "Super-resolution successful!"
 
 
 
 
 
 
 
 
 
191
  except Exception as e:
192
- return None, f"Error: {str(e)}"
193
 
194
 
195
  @spaces.GPU(duration=60)
@@ -217,9 +261,24 @@ def video_to_audio(
217
  return None, f"Error: {str(e)}"
218
 
219
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  # Create Gradio Interface
221
  with gr.Blocks(
222
- title="UniFlow-Audio Inference Demo", theme=gr.themes.Soft()
 
 
223
  ) as demo:
224
  gr.Markdown("# πŸ”Š UniFlow-Audio Inference Demo")
225
  gr.Markdown(
@@ -228,7 +287,7 @@ with gr.Blocks(
228
 
229
  with gr.Tabs():
230
  # Tab 1: Text to Audio
231
- with gr.Tab("πŸ“’ Text to Audio (T2A)"):
232
  with gr.Row():
233
  with gr.Column():
234
  t2a_caption = gr.Textbox(
@@ -279,7 +338,7 @@ with gr.Blocks(
279
  )
280
 
281
  # Tab 2: Text to Music
282
- with gr.Tab("🎼 Text to Music (T2M)"):
283
  with gr.Row():
284
  with gr.Column():
285
  t2m_caption = gr.Textbox(
@@ -330,7 +389,7 @@ with gr.Blocks(
330
  )
331
 
332
  # Tab 3: Text to Speech
333
- with gr.Tab("πŸ—£οΈ Text to Speech (TTS)"):
334
  with gr.Row():
335
  with gr.Column():
336
  tts_transcript = gr.Textbox(
@@ -393,7 +452,7 @@ with gr.Blocks(
393
  )
394
 
395
  # Tab 4: Singing Voice Synthesis
396
- with gr.Tab("🎀 Singing Voice Synthesis (SVS)"):
397
  with gr.Row():
398
  with gr.Column():
399
  svs_singer = gr.Dropdown(
@@ -487,7 +546,7 @@ with gr.Blocks(
487
  )
488
 
489
  # Tab 5: Speech Enhancement
490
- with gr.Tab("πŸ”Š Speech Enhancement (SE)"):
491
  with gr.Row():
492
  with gr.Column():
493
  se_input = gr.Audio(label="Noisy Speech", type="filepath")
@@ -533,7 +592,7 @@ with gr.Blocks(
533
  )
534
 
535
  # Tab 6: Audio Super Resolution
536
- with gr.Tab("⬆️ Audio Super Resolution (SR)"):
537
  with gr.Row():
538
  with gr.Column():
539
  sr_input = gr.Audio(
@@ -569,10 +628,21 @@ with gr.Blocks(
569
  )
570
  sr_status = gr.Textbox(label="Status")
571
 
 
 
 
 
 
 
 
 
 
 
 
572
  sr_button.click(
573
  fn=audio_super_resolution,
574
  inputs=[sr_input, sr_model, sr_guidance, sr_steps],
575
- outputs=[sr_output, sr_status]
576
  )
577
 
578
  gr.Examples(
@@ -583,7 +653,7 @@ with gr.Blocks(
583
  )
584
 
585
  # Tab 7: Video to Audio
586
- with gr.Tab("🎬 Video to Audio (V2A)"):
587
  with gr.Row():
588
  with gr.Column():
589
  v2a_input = gr.Video(label="Input Video")
 
2
 
3
  import os
4
  import gradio as gr
5
+ import numpy as np
6
+ import matplotlib.pyplot as plt
7
+ import librosa
8
+ import librosa.display
9
 
10
  import spaces
11
 
 
171
  return None, f"Error: {str(e)}"
172
 
173
 
174
+ def generate_spectrogram(audio_path, title="Spectrogram"):
175
+ """Generate spectrogram from audio file"""
176
+ try:
177
+ # Load audio file
178
+ y, sr = librosa.load(audio_path, sr=None)
179
+
180
+ # Create figure
181
+ fig, ax = plt.subplots(figsize=(10, 4))
182
+
183
+ # Generate mel spectrogram
184
+ D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
185
+
186
+ # Display spectrogram
187
+ img = librosa.display.specshow(
188
+ D, y_axis='hz', x_axis='time', sr=sr, ax=ax
189
+ )
190
+ ax.set_title(f'{title} (Sample Rate: {sr} Hz)')
191
+ fig.colorbar(img, ax=ax, format='%+2.0f dB')
192
+
193
+ # Save to file
194
+ spec_path = audio_path.replace('.wav', '_spec.png')
195
+ plt.tight_layout()
196
+ fig.savefig(spec_path, dpi=100, bbox_inches='tight')
197
+ plt.close(fig)
198
+
199
+ return spec_path
200
+ except Exception as e:
201
+ print(f"Error generating spectrogram: {str(e)}")
202
+ return None
203
+
204
+
205
  @spaces.GPU(duration=60)
206
  def audio_super_resolution(
207
  low_sr_audio,
 
222
  num_steps=num_steps,
223
  output_path=output_path
224
  )
225
+
226
+ # Generate spectrograms for input and output
227
+ input_spec = generate_spectrogram(
228
+ low_sr_audio, "Input Audio Spectrogram"
229
+ )
230
+ output_spec = generate_spectrogram(
231
+ output_path, "Output Audio Spectrogram"
232
+ )
233
+
234
+ return output_path, "Super-resolution successful!", input_spec, output_spec
235
  except Exception as e:
236
+ return None, f"Error: {str(e)}", None, None
237
 
238
 
239
  @spaces.GPU(duration=60)
 
261
  return None, f"Error: {str(e)}"
262
 
263
 
264
+ # Custom CSS for better tab display
265
+ custom_css = """
266
+ .tab-nav button {
267
+ font-size: 14px !important;
268
+ padding: 8px 12px !important;
269
+ min-width: fit-content !important;
270
+ }
271
+ .tab-nav {
272
+ overflow-x: auto !important;
273
+ flex-wrap: nowrap !important;
274
+ }
275
+ """
276
+
277
  # Create Gradio Interface
278
  with gr.Blocks(
279
+ title="UniFlow-Audio Inference Demo",
280
+ theme=gr.themes.Soft(),
281
+ css=custom_css
282
  ) as demo:
283
  gr.Markdown("# πŸ”Š UniFlow-Audio Inference Demo")
284
  gr.Markdown(
 
287
 
288
  with gr.Tabs():
289
  # Tab 1: Text to Audio
290
+ with gr.Tab("πŸ“’ Text to Audio"):
291
  with gr.Row():
292
  with gr.Column():
293
  t2a_caption = gr.Textbox(
 
338
  )
339
 
340
  # Tab 2: Text to Music
341
+ with gr.Tab("🎼 Text to Music"):
342
  with gr.Row():
343
  with gr.Column():
344
  t2m_caption = gr.Textbox(
 
389
  )
390
 
391
  # Tab 3: Text to Speech
392
+ with gr.Tab("πŸ—£οΈ Text to Speech"):
393
  with gr.Row():
394
  with gr.Column():
395
  tts_transcript = gr.Textbox(
 
452
  )
453
 
454
  # Tab 4: Singing Voice Synthesis
455
+ with gr.Tab("🎀 Singing Voice Synthesis"):
456
  with gr.Row():
457
  with gr.Column():
458
  svs_singer = gr.Dropdown(
 
546
  )
547
 
548
  # Tab 5: Speech Enhancement
549
+ with gr.Tab("πŸ”Š Speech Enhancement"):
550
  with gr.Row():
551
  with gr.Column():
552
  se_input = gr.Audio(label="Noisy Speech", type="filepath")
 
592
  )
593
 
594
  # Tab 6: Audio Super Resolution
595
+ with gr.Tab("⬆️ Audio SR"):
596
  with gr.Row():
597
  with gr.Column():
598
  sr_input = gr.Audio(
 
628
  )
629
  sr_status = gr.Textbox(label="Status")
630
 
631
+ # Spectrograms display
632
+ with gr.Row():
633
+ with gr.Column():
634
+ sr_input_spec = gr.Image(
635
+ label="Input Spectrogram", type="filepath"
636
+ )
637
+ with gr.Column():
638
+ sr_output_spec = gr.Image(
639
+ label="Output Spectrogram", type="filepath"
640
+ )
641
+
642
  sr_button.click(
643
  fn=audio_super_resolution,
644
  inputs=[sr_input, sr_model, sr_guidance, sr_steps],
645
+ outputs=[sr_output, sr_status, sr_input_spec, sr_output_spec]
646
  )
647
 
648
  gr.Examples(
 
653
  )
654
 
655
  # Tab 7: Video to Audio
656
+ with gr.Tab("🎬 Video to Audio"):
657
  with gr.Row():
658
  with gr.Column():
659
  v2a_input = gr.Video(label="Input Video")