| import gradio as gr |
| from transformers import AutoProcessor, AutoModelForVisualQuestionAnswering, AutoModelForCausalLM, AutoTokenizer |
| from PIL import Image |
| import torch |
|
|
| model = AutoModelForCausalLM.from_pretrained("microsoft/git-base-vqav2") |
| model_path = "microsoft/git-base-vqav2" |
| dataset_name = "Multimodal-Fatima/OK-VQA_train" |
| tokenizer = AutoTokenizer.from_pretrained(model_path) |
|
|
| def main(): |
|
|
|
|
| demo = gr.Interface( |
| fn=main, |
| inputs=[gr.Slider(1, len(questions), step=1)], |
| outputs=["image", "text", "text"], |
| ) |
|
|
| demo.launch(share=True) |
|
|