Spaces:

alexandrainst
/

radial-plot-generator

Running

App Files Files Community

saattrupdan commited on Mar 24

Commit

c97530c

1 Parent(s): 6bdb37f

feat: Use actual ranks on scale

Browse files

Files changed (1) hide show

app.py +16 -8

app.py CHANGED Viewed

@@ -126,7 +126,7 @@ paper](https://aclanthology.org/2023.nodalida-1.20):
 UPDATE_FREQUENCY_MINUTES = 5
 MIN_COLOUR_DISTANCE_BETWEEN_MODELS = 200
 DEFAULT_LANGUAGES = ["Danish"]
-DEFAULT_MODELS = ["gpt-4-0613", "mistralai/Mistral-7B-v0.1"]
 class Task(BaseModel):
@@ -633,6 +633,7 @@ def produce_radial_plot(
     for task in tasks:
         for language in languages:
             df = results_dfs_filtered[language][task].dropna()
             model_ids_sorted: list[str] = (
                 df.map(np.mean).sort_values(ascending=False).index.tolist()
             )
@@ -649,14 +650,15 @@ def produce_radial_plot(
                         a=best_scores, b=scores, alternative="greater"
                     ).pvalue < 0.05
                     if worse_than_previous_models:
-                        rank += 1
                         best_scores = scores
                 ranks.append(rank)
             ranks = np.asarray(ranks)
-            scores = 1 - (ranks / ranks.max())
-            for model_id, score in zip(model_ids_sorted, scores):
-                all_rank_scores[task][language][model_id] = score
     logger.info("Successfully computed rank scores.")
     # Add all the evaluation results for each model
@@ -671,7 +673,7 @@ def produce_radial_plot(
                 if model_id not in results_dfs_filtered[language].index:
                     continue
-                rank_score = 100 * all_rank_scores[task][language][model_id]
                 rank_scores.append(rank_score)
                 score_arr = np.array(results_dfs_filtered[language].loc[model_id][task])
@@ -699,7 +701,9 @@ def produce_radial_plot(
     # Sort the models (and their results) such that the model who beats most other
     # models first. This will result in the "smaller areas" being on top of the "larger
     # areas", which is more aesthetically pleasing.
-    sorted_idxs = num_models_beaten.sum(axis=1).argsort()[::-1]
     model_ids = np.asarray(model_ids)[sorted_idxs].tolist()
     results = result_matrix[sorted_idxs].tolist()
@@ -734,7 +738,11 @@ def produce_radial_plot(
     # Builds the radial plot from the results
     fig.update_layout(
-        polar=dict(radialaxis=dict(visible=show_scale, range=[0, 100])),
         showlegend=True,
         title=title,
         width=plot_width,

 UPDATE_FREQUENCY_MINUTES = 5
 MIN_COLOUR_DISTANCE_BETWEEN_MODELS = 200
 DEFAULT_LANGUAGES = ["Danish"]
+DEFAULT_MODELS = ["gpt-4-0613", "google/gemma-3-12b-it"]
 class Task(BaseModel):
     for task in tasks:
         for language in languages:
             df = results_dfs_filtered[language][task].dropna()
+            stddev = df.map(np.mean).std()
             model_ids_sorted: list[str] = (
                 df.map(np.mean).sort_values(ascending=False).index.tolist()
             )
                         a=best_scores, b=scores, alternative="greater"
                     ).pvalue < 0.05
                     if worse_than_previous_models:
+                        difference = np.mean(best_scores) - np.mean(scores)
+                        normalised_difference = difference / stddev
+                        rank += normalised_difference
                         best_scores = scores
                 ranks.append(rank)
             ranks = np.asarray(ranks)
+            for model_id, rank in zip(model_ids_sorted, ranks):
+                all_rank_scores[task][language][model_id] = rank
     logger.info("Successfully computed rank scores.")
     # Add all the evaluation results for each model
                 if model_id not in results_dfs_filtered[language].index:
                     continue
+                rank_score = all_rank_scores[task][language][model_id]
                 rank_scores.append(rank_score)
                 score_arr = np.array(results_dfs_filtered[language].loc[model_id][task])
     # Sort the models (and their results) such that the model who beats most other
     # models first. This will result in the "smaller areas" being on top of the "larger
     # areas", which is more aesthetically pleasing.
+    sorted_idxs = num_models_beaten.sum(axis=1).argsort()
+    if not use_rank_score:
+        sorted_idxs = sorted_idxs[::-1]
     model_ids = np.asarray(model_ids)[sorted_idxs].tolist()
     results = result_matrix[sorted_idxs].tolist()
     # Builds the radial plot from the results
     fig.update_layout(
+        polar=dict(
+            radialaxis=dict(
+                visible=show_scale, range=[5, 1] if use_rank_score else [0, 100]
+            ),
+        ),
         showlegend=True,
         title=title,
         width=plot_width,