Spaces:

uw-insight-lab
/

Probing-Vis-Literacy-of-VLMs

Paused

App Files Files Community

AustingDong commited on Mar 26

Commit

6d117d1

1 Parent(s): 7e57874

customed a loss (useless)

Browse files

Files changed (3) hide show

demo/visualization.py +11 -0
evaluate/evaluate.py +7 -9
evaluate/questions.py +12 -12

demo/visualization.py CHANGED Viewed

@@ -406,6 +406,16 @@ class VisualizationChartGemma(Visualization):
         super().__init__(model, register=True)
         self._modify_layers()
         self._register_hooks_activations()
     def forward_backward(self, inputs, focus, start_idx, target_token_idx, visual_method="softmax"):
         outputs_raw = self.model(**inputs, output_hidden_states=True)
@@ -421,6 +431,7 @@ class VisualizationChartGemma(Visualization):
             print("logits shape:", outputs_raw.logits.shape)
             if target_token_idx == -1:
                 loss = outputs_raw.logits.max(dim=-1).values.sum()
             else:
                 loss = outputs_raw.logits.max(dim=-1).values[0, start_idx + target_token_idx]
             loss.backward()

         super().__init__(model, register=True)
         self._modify_layers()
         self._register_hooks_activations()
+    # def custom_loss(self, start_idx, input_ids, logits):
+    #     Q = logits.shape[1]
+    #     loss = 0
+    #     q = 0
+    #     while start_idx + q < Q - 1:
+    #         loss += F.cross_entropy(logits[0, start_idx + q], input_ids[0, start_idx + q + 1])
+    #         q += 1
+    #     return loss
     def forward_backward(self, inputs, focus, start_idx, target_token_idx, visual_method="softmax"):
         outputs_raw = self.model(**inputs, output_hidden_states=True)
             print("logits shape:", outputs_raw.logits.shape)
             if target_token_idx == -1:
                 loss = outputs_raw.logits.max(dim=-1).values.sum()
+                # loss = self.custom_loss(start_idx, inputs['input_ids'], outputs_raw.logits)
             else:
                 loss = outputs_raw.logits.max(dim=-1).values[0, start_idx + target_token_idx]
             loss.backward()

evaluate/evaluate.py CHANGED Viewed

@@ -7,9 +7,9 @@ from openai import OpenAI
 from demo.model_utils import *
 from evaluate.questions import questions
-def set_seed(model_seed = 42):
     torch.manual_seed(model_seed)
-    np.random.seed(model_seed)
     torch.cuda.manual_seed(model_seed) if torch.cuda.is_available() else None
 def clean():
@@ -52,7 +52,7 @@ def evaluate(model_type, num_eval = 10):
             client = OpenAI(api_key=os.environ["GEMINI_HCI_API_KEY"],
                             base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
-        for question in questions:
             chart_type = question[0]
             q = question[1]
             img_path = question[2]
@@ -104,8 +104,8 @@ def evaluate(model_type, num_eval = 10):
             else:
                 prepare_inputs = model_utils.prepare_inputs(q, image)
-                temperature = 0.9
-                top_p = 0.1
                 if model_type.split('-')[0] == "Janus":
                     inputs_embeds = model_utils.generate_inputs_embeddings(prepare_inputs)
@@ -120,7 +120,7 @@ def evaluate(model_type, num_eval = 10):
             FILES_ROOT = f"{RESULTS_ROOT}/{model_type}/{eval_idx}"
             os.makedirs(FILES_ROOT, exist_ok=True)
-            with open(f"{FILES_ROOT}/{chart_type}.txt", "w") as f:
                 f.write(answer)
                 f.close()
@@ -129,8 +129,6 @@ def evaluate(model_type, num_eval = 10):
 if __name__ == '__main__':
     # models = ["ChartGemma", "Janus-Pro-1B", "Janus-Pro-7B", "LLaVA-1.5-7B", "GPT-4o", "Gemini-2.0-flash"]
-    # models = ["ChartGemma", "Janus-Pro-1B"]
-    # models = ["Janus-Pro-7B", "LLaVA-1.5-7B"]
-    models = ["GPT-4o", "Gemini-2.0-flash"]
     for model_type in models:
         evaluate(model_type=model_type, num_eval=10)

 from demo.model_utils import *
 from evaluate.questions import questions
+def set_seed(model_seed = 70):
     torch.manual_seed(model_seed)
+    # np.random.seed(model_seed)
     torch.cuda.manual_seed(model_seed) if torch.cuda.is_available() else None
 def clean():
             client = OpenAI(api_key=os.environ["GEMINI_HCI_API_KEY"],
                             base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
+        for question_idx, question in enumerate(questions):
             chart_type = question[0]
             q = question[1]
             img_path = question[2]
             else:
                 prepare_inputs = model_utils.prepare_inputs(q, image)
+                temperature = 0.1
+                top_p = 0.95
                 if model_type.split('-')[0] == "Janus":
                     inputs_embeds = model_utils.generate_inputs_embeddings(prepare_inputs)
             FILES_ROOT = f"{RESULTS_ROOT}/{model_type}/{eval_idx}"
             os.makedirs(FILES_ROOT, exist_ok=True)
+            with open(f"{FILES_ROOT}/Q{question_idx + 1}-{chart_type}.txt", "w") as f:
                 f.write(answer)
                 f.close()
 if __name__ == '__main__':
     # models = ["ChartGemma", "Janus-Pro-1B", "Janus-Pro-7B", "LLaVA-1.5-7B", "GPT-4o", "Gemini-2.0-flash"]
+    models = ["Janus-Pro-7B"]
     for model_type in models:
         evaluate(model_type=model_type, num_eval=10)

evaluate/questions.py CHANGED Viewed

@@ -2,72 +2,72 @@ questions=[
     [
         "LineChart",
         "What was the price of a barrel of oil in February 2020?",
-        "images/LineChart.png"
     ],
     [
         "BarChart",
         "What is the average internet speed in Japan?",
-        "images/BarChart.png"
     ],
     [
         "StackedBar",
         "What is the cost of peanuts in Seoul?",
-        "images/StackedBar.png"
     ],
     [
         "100%StackedBar",
         "Which country has the lowest proportion of Gold medals?",
-        "images/Stacked100.png"
     ],
     [
         "PieChart",
         "What is the approximate global smartphone market share of Samsung?",
-        "images/PieChart.png"
     ],
     [
         "Histogram",
         "What distance have customers traveled in the taxi the most?",
-        "images/Histogram.png"
     ],
     [
         "Scatterplot",
         "True/False: There is a negative linear relationship between the height and the weight of the 85 males.",
-        "images/Scatterplot.png"
     ],
     [
         "AreaChart",
         "What was the average price of pount of coffee beans in October 2019?",
-        "images/AreaChart.png"
     ],
     [
         "StackedArea",
         "What was the ratio of girls named 'Isla' to girls named 'Amelia' in 2012 in the UK?",
-        "images/StackedArea.png"
     ],
     [
         "BubbleChart",
         "Which city's metro system has the largest number of stations?",
-        "images/BubbleChart.png"
     ],
     [
         "Choropleth",
         "True/False: In 2020, the unemployment rate for Washington (WA) was higher than that of Wisconsin (WI).",
-        "images/Choropleth_New.png"
     ],
     [
         "TreeMap",
         "True/False: eBay is nested in the Software category.",
-        "images/TreeMap.png"
     ]
 ]

     [
         "LineChart",
         "What was the price of a barrel of oil in February 2020?",
+        "images/mini-VLAT/LineChart.png"
     ],
     [
         "BarChart",
         "What is the average internet speed in Japan?",
+        "images/mini-VLAT/BarChart.png"
     ],
     [
         "StackedBar",
         "What is the cost of peanuts in Seoul?",
+        "images/mini-VLAT/StackedBar.png"
     ],
     [
         "100%StackedBar",
         "Which country has the lowest proportion of Gold medals?",
+        "images/mini-VLAT/Stacked100.png"
     ],
     [
         "PieChart",
         "What is the approximate global smartphone market share of Samsung?",
+        "images/mini-VLAT/PieChart.png"
     ],
     [
         "Histogram",
         "What distance have customers traveled in the taxi the most?",
+        "images/mini-VLAT/Histogram.png"
     ],
     [
         "Scatterplot",
         "True/False: There is a negative linear relationship between the height and the weight of the 85 males.",
+        "images/mini-VLAT/Scatterplot.png"
     ],
     [
         "AreaChart",
         "What was the average price of pount of coffee beans in October 2019?",
+        "images/mini-VLAT/AreaChart.png"
     ],
     [
         "StackedArea",
         "What was the ratio of girls named 'Isla' to girls named 'Amelia' in 2012 in the UK?",
+        "images/mini-VLAT/StackedArea.png"
     ],
     [
         "BubbleChart",
         "Which city's metro system has the largest number of stations?",
+        "images/mini-VLAT/BubbleChart.png"
     ],
     [
         "Choropleth",
         "True/False: In 2020, the unemployment rate for Washington (WA) was higher than that of Wisconsin (WI).",
+        "images/mini-VLAT/Choropleth_New.png"
     ],
     [
         "TreeMap",
         "True/False: eBay is nested in the Software category.",
+        "images/mini-VLAT/TreeMap.png"
     ]
 ]