Spaces:

sayehghp
/

vicca

Sleeping

sayehghp commited on 12 days ago

Commit

27e3844

1 Parent(s): 3600cfd

Visualization

Files changed (3) hide show

app.py CHANGED Viewed

@@ -19,14 +19,14 @@ def vicca_interface(image, text_prompt, box_threshold=0.2, text_threshold=0.2, n
     )
     best_gen = result.get("best_generated_image_path")
     attn = result.get("attention_overlays") or {}
     combined = attn.get("combined")
     per_term_dict = attn.get("per_term") or {}
     gallery_items = [(p, term) for term, p in per_term_dict.items()]
-    return best_gen, combined, gallery_items, result
 demo = gr.Interface(
     fn=vicca_interface,
@@ -39,6 +39,7 @@ demo = gr.Interface(
     ],
     outputs=[
         gr.Image(label="Best generated CXR"),
         gr.Image(label="Combined attention heatmap"),
         # gr.Gallery(label="Per-term overlays").style(grid=[3], height=400),
         gr.Gallery(

     )
     best_gen = result.get("best_generated_image_path")
+    VG_path = result.get("VG_annotated_image_path")
     attn = result.get("attention_overlays") or {}
     combined = attn.get("combined")
     per_term_dict = attn.get("per_term") or {}
     gallery_items = [(p, term) for term, p in per_term_dict.items()]
+    return best_gen, VG_path, combined, gallery_items, result
 demo = gr.Interface(
     fn=vicca_interface,
     ],
     outputs=[
         gr.Image(label="Best generated CXR"),
+        gr.Image(label="VG annotated image"),
         gr.Image(label="Combined attention heatmap"),
         # gr.Gallery(label="Per-term overlays").style(grid=[3], height=400),
         gr.Gallery(

requirements.txt CHANGED Viewed

@@ -49,6 +49,7 @@ scikit-learn
 scikit-image
 tqdm
 statsmodels
 # Formatting / style
 yapf

 scikit-image
 tqdm
 statsmodels
+supervision
 # Formatting / style
 yapf

vicca_api.py CHANGED Viewed

@@ -4,10 +4,11 @@ import time
 import cv2
 import pandas as pd
 import torch
 from weights_utils import ensure_all_vicca_weights, get_weight
 from vg_token_attention import run_token_ca_visualization
 # Make sure all heavy weights are present once per container
 ensure_all_vicca_weights()
@@ -69,6 +70,11 @@ def run_vicca(
         box_threshold,
         text_threshold,
     )
     # 5) SSIM per bbox
     image_org_cv = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
@@ -126,4 +132,5 @@ def run_vicca(
         "shift_y": sy,
         "best_generated_image_path": max_sim_gen_path,
         "attention_overlays": attn_paths,
     }

 import cv2
 import pandas as pd
 import torch
+import supervision as sv
 from weights_utils import ensure_all_vicca_weights, get_weight
 from vg_token_attention import run_token_ca_visualization
+from VG.groundingdino.util.inference import annotate
 # Make sure all heavy weights are present once per container
 ensure_all_vicca_weights()
         box_threshold,
         text_threshold,
     )
+    annotate_dict = dict(color=sv.ColorPalette.DEFAULT, thickness=2, text_thickness=1)
+    annotated_frame = annotate(image_source=image_source, boxes=boxes, logits=logits, phrases=phrases, bbox_annot=annotate_dict)
+    VG_path = os.path.join(output_path, "VG_annotations.jpg")
+    cv2.imwrite(VG_path, annotated_frame)
     # 5) SSIM per bbox
     image_org_cv = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
         "shift_y": sy,
         "best_generated_image_path": max_sim_gen_path,
         "attention_overlays": attn_paths,
+        "VG_annotated_image_path": VG_path,
     }