prithivMLmods commited on
Commit
43c3626
·
verified ·
1 Parent(s): 9916e82

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -57
app.py CHANGED
@@ -211,7 +211,7 @@ def generate_response(model_name: str, text: str, media_input, media_type: str,
211
 
212
  buffer = ""
213
  for new_text in streamer:
214
- buffer += new_text.replace("", "")
215
  yield buffer, buffer
216
 
217
  if model_name == "SmolDocling-256M-preview":
@@ -221,11 +221,12 @@ def generate_response(model_name: str, text: str, media_input, media_type: str,
221
  # For other models, the formatted output is just the cleaned buffer
222
  yield buffer, buffer.strip()
223
 
224
- def generate_image_wrapper(*args):
225
- yield from generate_response(*args, media_type="image")
 
 
 
226
 
227
- def generate_video_wrapper(*args):
228
- yield from generate_response(*args, media_type="video")
229
 
230
  # --- Examples ---
231
  image_examples = [
@@ -389,6 +390,11 @@ css = """
389
 
390
  # --- Gradio Interface ---
391
  with gr.Blocks(css=css) as demo:
 
 
 
 
 
392
  gr.Markdown("# **[Multimodal OCR2](https://huggingface.co/collections/prithivMLmods/multimodal-implementations-67c9982ea04b39f0608badb0)**")
393
 
394
  with gr.Row():
@@ -423,25 +429,8 @@ with gr.Blocks(css=css) as demo:
423
  elem_classes="doc-item"
424
  )
425
 
426
- # Examples section
427
- gr.Markdown("### Examples")
428
- with gr.Row():
429
- with gr.Column():
430
- gr.Examples(
431
- examples=image_examples,
432
- inputs=[image_query, image_upload],
433
- label="Image Examples"
434
- )
435
- with gr.Column():
436
- gr.Examples(
437
- examples=video_examples,
438
- inputs=[video_query, video_upload],
439
- label="Video Examples"
440
- )
441
-
442
- # File upload and controls
443
  with gr.Group(elem_classes="upload-controls"):
444
- # File upload area
445
  with gr.Column(elem_classes="file-upload"):
446
  file_upload = gr.File(
447
  label="Upload files (image/video)",
@@ -449,7 +438,6 @@ with gr.Blocks(css=css) as demo:
449
  elem_classes="file-upload"
450
  )
451
 
452
- # Model dropdown
453
  model_dropdown = gr.Dropdown(
454
  choices=["Nanonets-OCR-s", "MonkeyOCR-Recognition", "Thyme-RL", "Typhoon-OCR-7B", "SmolDocling-256M-preview"],
455
  value="Nanonets-OCR-s",
@@ -457,8 +445,29 @@ with gr.Blocks(css=css) as demo:
457
  elem_classes="model-dropdown"
458
  )
459
 
460
- # Submit button
461
  submit_btn = gr.Button("→", size="lg", elem_classes="submit-btn")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
462
 
463
  # Advanced options (hidden by default)
464
  with gr.Accordion("Advanced Options", open=False):
@@ -468,13 +477,6 @@ with gr.Blocks(css=css) as demo:
468
  top_k = gr.Slider(label="Top-k", minimum=1, maximum=1000, step=1, value=50)
469
  repetition_penalty = gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.2)
470
 
471
- # Query input
472
- query_input = gr.Textbox(
473
- label="Enter your query",
474
- placeholder="Describe the image, extract text, convert to markdown...",
475
- elem_classes="query-input"
476
- )
477
-
478
  # Output area
479
  with gr.Group(elem_classes="output-area"):
480
  gr.Markdown("### Output")
@@ -485,13 +487,6 @@ with gr.Blocks(css=css) as demo:
485
  elem_classes="output-text"
486
  )
487
 
488
- # Initialize state variables
489
- image_query = gr.State("")
490
- video_query = gr.State("")
491
- image_upload = gr.State(None)
492
- video_upload = gr.State(None)
493
- media_type = gr.State("image")
494
-
495
  # --- Event Handlers ---
496
  def handle_file_upload(file):
497
  if file is None:
@@ -506,40 +501,34 @@ with gr.Blocks(css=css) as demo:
506
  file_upload.change(
507
  fn=handle_file_upload,
508
  inputs=[file_upload],
509
- outputs=[media_type, image_upload, video_upload]
510
  )
511
 
512
- def handle_model_selection(model_name):
513
- # This function could be used to update the UI based on model selection
514
- return f"Using {model_name}"
515
-
516
- model_dropdown.change(
517
- fn=handle_model_selection,
518
- inputs=[model_dropdown],
519
- outputs=[]
520
- )
521
-
522
- def generate_wrapper(text, img, vid, model, max_tokens, temp, top_p, top_k, rep_penalty, m_type):
523
  if m_type == "image" and img is not None:
524
- yield from generate_image_wrapper(text, img, model, max_tokens, temp, top_p, top_k, rep_penalty)
525
  elif m_type == "video" and vid is not None:
526
- yield from generate_video_wrapper(text, vid, model, max_tokens, temp, top_p, top_k, rep_penalty)
527
  else:
528
- yield "Please upload a valid file", "Please upload a valid file"
 
 
 
529
 
530
  submit_btn.click(
531
  fn=generate_wrapper,
532
  inputs=[
533
  query_input,
534
- image_upload,
535
- video_upload,
536
  model_dropdown,
537
  max_new_tokens,
538
  temperature,
539
  top_p,
540
  top_k,
541
  repetition_penalty,
542
- media_type
543
  ],
544
  outputs=[raw_output, raw_output]
545
  )
 
211
 
212
  buffer = ""
213
  for new_text in streamer:
214
+ buffer += new_text.replace("<|end_of_text|>", "")
215
  yield buffer, buffer
216
 
217
  if model_name == "SmolDocling-256M-preview":
 
221
  # For other models, the formatted output is just the cleaned buffer
222
  yield buffer, buffer.strip()
223
 
224
+ def generate_image_wrapper(text, img, model, max_tokens, temp, top_p_val, top_k_val, rep_penalty):
225
+ yield from generate_response(model, text, img, "image", max_tokens, temp, top_p_val, top_k_val, rep_penalty)
226
+
227
+ def generate_video_wrapper(text, vid, model, max_tokens, temp, top_p_val, top_k_val, rep_penalty):
228
+ yield from generate_response(model, text, vid, "video", max_tokens, temp, top_p_val, top_k_val, rep_penalty)
229
 
 
 
230
 
231
  # --- Examples ---
232
  image_examples = [
 
390
 
391
  # --- Gradio Interface ---
392
  with gr.Blocks(css=css) as demo:
393
+ # Initialize state variables that hold data
394
+ image_upload_state = gr.State(None)
395
+ video_upload_state = gr.State(None)
396
+ media_type_state = gr.State("image")
397
+
398
  gr.Markdown("# **[Multimodal OCR2](https://huggingface.co/collections/prithivMLmods/multimodal-implementations-67c9982ea04b39f0608badb0)**")
399
 
400
  with gr.Row():
 
429
  elem_classes="doc-item"
430
  )
431
 
432
+ # Define input components before they are referenced by gr.Examples
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
433
  with gr.Group(elem_classes="upload-controls"):
 
434
  with gr.Column(elem_classes="file-upload"):
435
  file_upload = gr.File(
436
  label="Upload files (image/video)",
 
438
  elem_classes="file-upload"
439
  )
440
 
 
441
  model_dropdown = gr.Dropdown(
442
  choices=["Nanonets-OCR-s", "MonkeyOCR-Recognition", "Thyme-RL", "Typhoon-OCR-7B", "SmolDocling-256M-preview"],
443
  value="Nanonets-OCR-s",
 
445
  elem_classes="model-dropdown"
446
  )
447
 
 
448
  submit_btn = gr.Button("→", size="lg", elem_classes="submit-btn")
449
+
450
+ query_input = gr.Textbox(
451
+ label="Enter your query",
452
+ placeholder="Describe the image, extract text, convert to markdown...",
453
+ elem_classes="query-input"
454
+ )
455
+
456
+ # Examples section
457
+ gr.Markdown("### Examples")
458
+ with gr.Row():
459
+ with gr.Column():
460
+ gr.Examples(
461
+ examples=image_examples,
462
+ inputs=[query_input, file_upload], # Corrected inputs
463
+ label="Image Examples"
464
+ )
465
+ with gr.Column():
466
+ gr.Examples(
467
+ examples=video_examples,
468
+ inputs=[query_input, file_upload], # Corrected inputs
469
+ label="Video Examples"
470
+ )
471
 
472
  # Advanced options (hidden by default)
473
  with gr.Accordion("Advanced Options", open=False):
 
477
  top_k = gr.Slider(label="Top-k", minimum=1, maximum=1000, step=1, value=50)
478
  repetition_penalty = gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.2)
479
 
 
 
 
 
 
 
 
480
  # Output area
481
  with gr.Group(elem_classes="output-area"):
482
  gr.Markdown("### Output")
 
487
  elem_classes="output-text"
488
  )
489
 
 
 
 
 
 
 
 
490
  # --- Event Handlers ---
491
  def handle_file_upload(file):
492
  if file is None:
 
501
  file_upload.change(
502
  fn=handle_file_upload,
503
  inputs=[file_upload],
504
+ outputs=[media_type_state, image_upload_state, video_upload_state] # Corrected outputs
505
  )
506
 
507
+ def generate_wrapper(text, img, vid, model, max_tokens, temp, top_p_val, top_k_val, rep_penalty, m_type):
508
+ media_input = None
 
 
 
 
 
 
 
 
 
509
  if m_type == "image" and img is not None:
510
+ media_input = img
511
  elif m_type == "video" and vid is not None:
512
+ media_input = vid
513
  else:
514
+ yield "Please upload a valid file.", "Please upload a valid file."
515
+ return
516
+
517
+ yield from generate_response(model, text, media_input, m_type, max_tokens, temp, top_p_val, top_k_val, rep_penalty)
518
 
519
  submit_btn.click(
520
  fn=generate_wrapper,
521
  inputs=[
522
  query_input,
523
+ image_upload_state, # Corrected input state
524
+ video_upload_state, # Corrected input state
525
  model_dropdown,
526
  max_new_tokens,
527
  temperature,
528
  top_p,
529
  top_k,
530
  repetition_penalty,
531
+ media_type_state # Corrected input state
532
  ],
533
  outputs=[raw_output, raw_output]
534
  )