Spaces:
Runtime error
Runtime error
File size: 4,677 Bytes
16b2893 68ebecc 39eba3e 16b2893 39eba3e 16b2893 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import re
import gradio as gr
from model import ToyModel
"""
Model specification
"""
vision_model_path = 'openai/clip-vit-base-patch32'
language_model_path = 'openai-community/gpt2'
model = ToyModel(vision_model_path, language_model_path)
def chat(image_input, text_input):
text_output = model.chat(image_input, text_input)
return image_input, text_output
"""
Gradio
"""
def gradio_taskselect(idx):
prompt_list = [
'',
'[grounding] describe this image in detail',
'[refer] ',
'[detection] ',
'[identify] what is this ',
'[vqa] '
]
instruct_list = [
'**Hint:** Type in whatever you want',
'**Hint:** Send the command to generate a grounded image description',
'**Hint:** Type in a phrase about an object in the image and send the command',
'**Hint:** Type in a caption or phrase, and see object locations in the image',
'**Hint:** Draw a bounding box on the uploaded image then send the command. Click the "clear" botton on the '
'top right of the image before redraw',
'**Hint:** Send a question to get a short answer',
]
return prompt_list[idx], instruct_list[idx]
title = """<h1 align="center">RS-Visual Perception Demo</h1>"""
description = 'Welcome to Our RS-Visual Perception Demo!'
introduction = '''
For Abilities Involving Visual Grounding:
1. Grounding: CLICK **Send** to generate a grounded image description.
2. Refer: Input a referring object and CLICK **Send**.
3. Detection: Write a caption or phrase, and CLICK **Send**.
4. Identify: Draw the bounding box on the uploaded image window and CLICK **Send** to generate the bounding box. (CLICK "clear" button before re-drawing next time).
5. VQA: Input a visual question and CLICK **Send**.
6. No Tag: Input whatever you want and CLICK **Send** without any tagging
You can also simply chat in free form!
'''
with gr.Blocks() as demo:
gr.Markdown(title)
gr.Markdown(description)
with gr.Row():
with gr.Column(scale=0.5):
image_input = gr.Image(type="pil", label="Input Image")
temperature = gr.Slider(
minimum=0.1,
maximum=1.5,
value=0.6,
step=0.1,
interactive=True,
label="Temperature",
)
dataset = gr.Dataset(
components=[gr.Textbox(visible=False)],
samples=[['No Tag'], ['Grounding'], ['Refer'], ['Detection'], ['Identify'], ['VQA']],
type="index",
label='Task Shortcuts',
)
task_inst = gr.Markdown('**Hint:** Upload your image and chat')
text_input = gr.Textbox(label='Input text', placeholder='Upload your image and chat', interactive=True, )
submit_button = gr.Button("Submit", variant='primary', size='sm', scale=1)
gr.Markdown(introduction)
with gr.Column():
image_output = gr.Image(type="pil", label='Output image')
text_output = gr.Textbox(label='Output text', interactive=True)
with gr.Row():
with gr.Column():
gr.Examples(examples=[
["examples_v2/office.jpg", "[grounding] describe this image in detail"],
["examples_v2/sofa.jpg", "[detection] sofas"],
["examples_v2/2000x1372_wmkn_0012149409555.jpg", "[refer] the world cup"],
["examples_v2/KFC-20-for-20-Nuggets.jpg", "[identify] what is this {<4><50><30><65>}"],
], inputs=[image_input, text_input], fn=chat,
outputs=[image_output, text_output])
with gr.Column():
gr.Examples(examples=[
["examples_v2/glip_test.jpg", "[vqa] where should I hide in this room when playing hide and seek"],
["examples_v2/float.png", "Please write a poem about the image"],
["examples_v2/thief.png", "Is the weapon fateful"],
["examples_v2/cockdial.png", "What might happen in this image in the next second"],
], inputs=[image_input, text_input], fn=chat,
outputs=[image_output, text_output])
dataset.click(
gradio_taskselect,
inputs=[dataset],
outputs=[text_input, task_inst],
show_progress="hidden",
postprocess=False,
queue=False,
)
text_input.submit(
chat,
inputs=[image_input, text_input],
outputs=[image_output, text_output],
)
submit_button.click(
chat,
inputs=[image_input, text_input],
outputs=[image_output, text_output],
)
demo.launch()
|