Spaces:
Runtime error
Runtime error
Commit
·
fb39607
1
Parent(s):
c77c9f7
- __pycache__/main.cpython-310.pyc +0 -0
- app.py +12 -0
- main.py +2 -6
__pycache__/main.cpython-310.pyc
CHANGED
|
Binary files a/__pycache__/main.cpython-310.pyc and b/__pycache__/main.cpython-310.pyc differ
|
|
|
app.py
CHANGED
|
@@ -2,6 +2,7 @@ import streamlit as st
|
|
| 2 |
from main import benchmark_model_multithreaded, benchmark_model_sequential
|
| 3 |
from prompts import questions as predefined_questions
|
| 4 |
import requests
|
|
|
|
| 5 |
|
| 6 |
# Set the title in the browser tab
|
| 7 |
st.set_page_config(page_title="Aidan Bench - Generator")
|
|
@@ -130,11 +131,22 @@ if st.session_state.open_router_key and st.session_state.openai_api_key:
|
|
| 130 |
results_table.append({
|
| 131 |
"Question": result["question"],
|
| 132 |
"Answer": answer,
|
|
|
|
|
|
|
| 133 |
"Coherence Score": result["coherence_score"],
|
| 134 |
"Novelty Score": result["novelty_score"]
|
| 135 |
})
|
| 136 |
st.table(results_table)
|
| 137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
if stop_button:
|
| 139 |
st.warning("Partial results displayed due to interruption.")
|
| 140 |
else:
|
|
|
|
| 2 |
from main import benchmark_model_multithreaded, benchmark_model_sequential
|
| 3 |
from prompts import questions as predefined_questions
|
| 4 |
import requests
|
| 5 |
+
import pandas as pd
|
| 6 |
|
| 7 |
# Set the title in the browser tab
|
| 8 |
st.set_page_config(page_title="Aidan Bench - Generator")
|
|
|
|
| 131 |
results_table.append({
|
| 132 |
"Question": result["question"],
|
| 133 |
"Answer": answer,
|
| 134 |
+
"Contestant Model": model_name,
|
| 135 |
+
"Judge Model": 'openai/gpt-4o-mini',
|
| 136 |
"Coherence Score": result["coherence_score"],
|
| 137 |
"Novelty Score": result["novelty_score"]
|
| 138 |
})
|
| 139 |
st.table(results_table)
|
| 140 |
|
| 141 |
+
df = pd.DataFrame(results_table) # Create a Pandas DataFrame from the results
|
| 142 |
+
csv = df.to_csv(index=False).encode('utf-8') # Convert DataFrame to CSV
|
| 143 |
+
st.download_button(
|
| 144 |
+
label="Export Results as CSV",
|
| 145 |
+
data=csv,
|
| 146 |
+
file_name="benchmark_results.csv",
|
| 147 |
+
mime='text/csv'
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
if stop_button:
|
| 151 |
st.warning("Partial results displayed due to interruption.")
|
| 152 |
else:
|
main.py
CHANGED
|
@@ -51,16 +51,12 @@ def process_question(question, model_name, open_router_key, openai_api_key, resu
|
|
| 51 |
if coherence_score is None:
|
| 52 |
break
|
| 53 |
|
| 54 |
-
if coherence_score <=
|
| 55 |
-
# st.write("<span style='color:yellow'>Output is incoherent. Moving to next question.</span>",
|
| 56 |
-
# unsafe_allow_html=True)
|
| 57 |
break
|
| 58 |
|
| 59 |
novelty_score = get_novelty_score(new_answer, previous_answers, openai_api_key)
|
| 60 |
|
| 61 |
-
if novelty_score < 0.
|
| 62 |
-
# st.write("<span style='color:yellow'>Output is redundant. Moving to next question.</span>",
|
| 63 |
-
# unsafe_allow_html=True)
|
| 64 |
break
|
| 65 |
|
| 66 |
# Append results to the queue instead of using st.write
|
|
|
|
| 51 |
if coherence_score is None:
|
| 52 |
break
|
| 53 |
|
| 54 |
+
if coherence_score <= 6:
|
|
|
|
|
|
|
| 55 |
break
|
| 56 |
|
| 57 |
novelty_score = get_novelty_score(new_answer, previous_answers, openai_api_key)
|
| 58 |
|
| 59 |
+
if novelty_score < 0.3:
|
|
|
|
|
|
|
| 60 |
break
|
| 61 |
|
| 62 |
# Append results to the queue instead of using st.write
|