Spaces:
Sleeping
Sleeping
Feat: add task synonyms
Browse files
app.py
CHANGED
|
@@ -1,5 +1,19 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
# LLM performance data with scores
|
| 4 |
performance_data = {
|
| 5 |
"Undergraduate level knowledge": [("Claude 3 Opus", 86.8), ("GPT-4", 86.4), ("Gemini 1.0 Ultra", 83.7)],
|
|
@@ -15,9 +29,18 @@ performance_data = {
|
|
| 15 |
}
|
| 16 |
|
| 17 |
def recommend_llm(task):
|
| 18 |
-
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
return "No data available"
|
|
|
|
|
|
|
| 21 |
recommendations_sorted = sorted(recommendations, key=lambda x: x[1], reverse=True)
|
| 22 |
result = f"For {task}, the recommended LLMs are:\n"
|
| 23 |
for i, (model, score) in enumerate(recommendations_sorted):
|
|
@@ -27,11 +50,11 @@ def recommend_llm(task):
|
|
| 27 |
# Gradio interface
|
| 28 |
interface = gr.Interface(
|
| 29 |
fn=recommend_llm,
|
| 30 |
-
inputs=gr.
|
| 31 |
outputs=gr.Textbox(label="LLM Recommendations"),
|
| 32 |
title="LLM Recommendation App",
|
| 33 |
-
description="
|
| 34 |
)
|
| 35 |
|
| 36 |
# Launch the app
|
| 37 |
-
interface.launch()
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
|
| 3 |
+
# Synonyms for each task category
|
| 4 |
+
task_synonyms = {
|
| 5 |
+
"Undergraduate level knowledge": ["undergraduate level knowledge", "MMLU"],
|
| 6 |
+
"Graduate level reasoning": ["graduate level reasoning", "GPOA", "Diamond"],
|
| 7 |
+
"Grade school math": ["grade school math", "GSM8K"],
|
| 8 |
+
"Math problem-solving": ["math problem-solving", "MATH"],
|
| 9 |
+
"Multilingual math": ["multilingual math", "MGSM"],
|
| 10 |
+
"Code": ["code", "coding", "programming", "HumanEval"],
|
| 11 |
+
"Reasoning over text": ["reasoning over text", "DROP", "F1 score"],
|
| 12 |
+
"Mixed evaluations": ["mixed evaluations", "BIG-Bench-Hard"],
|
| 13 |
+
"Knowledge Q&A": ["knowledge Q&A", "ARC-Challenge"],
|
| 14 |
+
"Common Knowledge": ["common knowledge", "HellaSwag"],
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
# LLM performance data with scores
|
| 18 |
performance_data = {
|
| 19 |
"Undergraduate level knowledge": [("Claude 3 Opus", 86.8), ("GPT-4", 86.4), ("Gemini 1.0 Ultra", 83.7)],
|
|
|
|
| 29 |
}
|
| 30 |
|
| 31 |
def recommend_llm(task):
|
| 32 |
+
# Normalize the input task to match against synonyms
|
| 33 |
+
task_lower = task.lower()
|
| 34 |
+
main_category = None
|
| 35 |
+
for key, synonyms in task_synonyms.items():
|
| 36 |
+
if task_lower in map(str.lower, synonyms):
|
| 37 |
+
main_category = key
|
| 38 |
+
break
|
| 39 |
+
|
| 40 |
+
if not main_category:
|
| 41 |
return "No data available"
|
| 42 |
+
|
| 43 |
+
recommendations = performance_data.get(main_category, [])
|
| 44 |
recommendations_sorted = sorted(recommendations, key=lambda x: x[1], reverse=True)
|
| 45 |
result = f"For {task}, the recommended LLMs are:\n"
|
| 46 |
for i, (model, score) in enumerate(recommendations_sorted):
|
|
|
|
| 50 |
# Gradio interface
|
| 51 |
interface = gr.Interface(
|
| 52 |
fn=recommend_llm,
|
| 53 |
+
inputs=gr.Textbox(label="Enter Task"),
|
| 54 |
outputs=gr.Textbox(label="LLM Recommendations"),
|
| 55 |
title="LLM Recommendation App",
|
| 56 |
+
description="Enter a task to get recommendations for the best LLMs based on performance data. For example, you can enter 'coding', 'undergraduate level knowledge', etc."
|
| 57 |
)
|
| 58 |
|
| 59 |
# Launch the app
|
| 60 |
+
interface.launch()
|