Spaces:
Runtime error
Runtime error
| import re | |
| import gradio as gr | |
| from model import ToyModel | |
| """ | |
| Model specification | |
| """ | |
| vision_model_path = 'openai/clip-vit-base-patch32' | |
| language_model_path = 'openai-community/gpt2' | |
| model = ToyModel(vision_model_path, language_model_path) | |
| def chat(image_input, text_input): | |
| text_output = model.chat(image_input, text_input) | |
| return image_input, text_output | |
| """ | |
| Gradio | |
| """ | |
| def gradio_taskselect(idx): | |
| prompt_list = [ | |
| '', | |
| '[grounding] describe this image in detail', | |
| '[refer] ', | |
| '[detection] ', | |
| '[identify] what is this ', | |
| '[vqa] ' | |
| ] | |
| instruct_list = [ | |
| '**Hint:** Type in whatever you want', | |
| '**Hint:** Send the command to generate a grounded image description', | |
| '**Hint:** Type in a phrase about an object in the image and send the command', | |
| '**Hint:** Type in a caption or phrase, and see object locations in the image', | |
| '**Hint:** Draw a bounding box on the uploaded image then send the command. Click the "clear" botton on the ' | |
| 'top right of the image before redraw', | |
| '**Hint:** Send a question to get a short answer', | |
| ] | |
| return prompt_list[idx], instruct_list[idx] | |
| title = """<h1 align="center">RS-Visual Perception Demo</h1>""" | |
| description = 'Welcome to Our RS-Visual Perception Demo!' | |
| introduction = ''' | |
| For Abilities Involving Visual Grounding: | |
| 1. Grounding: CLICK **Send** to generate a grounded image description. | |
| 2. Refer: Input a referring object and CLICK **Send**. | |
| 3. Detection: Write a caption or phrase, and CLICK **Send**. | |
| 4. Identify: Draw the bounding box on the uploaded image window and CLICK **Send** to generate the bounding box. (CLICK "clear" button before re-drawing next time). | |
| 5. VQA: Input a visual question and CLICK **Send**. | |
| 6. No Tag: Input whatever you want and CLICK **Send** without any tagging | |
| You can also simply chat in free form! | |
| ''' | |
| with gr.Blocks() as demo: | |
| gr.Markdown(title) | |
| gr.Markdown(description) | |
| with gr.Row(): | |
| with gr.Column(scale=0.5): | |
| image_input = gr.Image(type="pil", label="Input Image") | |
| temperature = gr.Slider( | |
| minimum=0.1, | |
| maximum=1.5, | |
| value=0.6, | |
| step=0.1, | |
| interactive=True, | |
| label="Temperature", | |
| ) | |
| dataset = gr.Dataset( | |
| components=[gr.Textbox(visible=False)], | |
| samples=[['No Tag'], ['Grounding'], ['Refer'], ['Detection'], ['Identify'], ['VQA']], | |
| type="index", | |
| label='Task Shortcuts', | |
| ) | |
| task_inst = gr.Markdown('**Hint:** Upload your image and chat') | |
| text_input = gr.Textbox(label='Input text', placeholder='Upload your image and chat', interactive=True, ) | |
| submit_button = gr.Button("Submit", variant='primary', size='sm', scale=1) | |
| gr.Markdown(introduction) | |
| with gr.Column(): | |
| image_output = gr.Image(type="pil", label='Output image') | |
| text_output = gr.Textbox(label='Output text', interactive=True) | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Examples(examples=[ | |
| ["examples_v2/office.jpg", "[grounding] describe this image in detail"], | |
| ["examples_v2/sofa.jpg", "[detection] sofas"], | |
| ["examples_v2/2000x1372_wmkn_0012149409555.jpg", "[refer] the world cup"], | |
| ["examples_v2/KFC-20-for-20-Nuggets.jpg", "[identify] what is this {<4><50><30><65>}"], | |
| ], inputs=[image_input, text_input], fn=chat, | |
| outputs=[image_output, text_output]) | |
| with gr.Column(): | |
| gr.Examples(examples=[ | |
| ["examples_v2/glip_test.jpg", "[vqa] where should I hide in this room when playing hide and seek"], | |
| ["examples_v2/float.png", "Please write a poem about the image"], | |
| ["examples_v2/thief.png", "Is the weapon fateful"], | |
| ["examples_v2/cockdial.png", "What might happen in this image in the next second"], | |
| ], inputs=[image_input, text_input], fn=chat, | |
| outputs=[image_output, text_output]) | |
| dataset.click( | |
| gradio_taskselect, | |
| inputs=[dataset], | |
| outputs=[text_input, task_inst], | |
| show_progress="hidden", | |
| postprocess=False, | |
| queue=False, | |
| ) | |
| text_input.submit( | |
| chat, | |
| inputs=[image_input, text_input], | |
| outputs=[image_output, text_output], | |
| ) | |
| submit_button.click( | |
| chat, | |
| inputs=[image_input, text_input], | |
| outputs=[image_output, text_output], | |
| ) | |
| demo.launch() | |