Spaces:

Presidentlin
/

Aidan-Bench

Runtime error

Presidentlin commited on Aug 13, 2024

Commit

fb39607

1 Parent(s): c77c9f7

x

Files changed (3) hide show

__pycache__/main.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/main.cpython-310.pyc and b/__pycache__/main.cpython-310.pyc differ

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import streamlit as st
 from main import benchmark_model_multithreaded, benchmark_model_sequential
 from prompts import questions as predefined_questions
 import requests
 # Set the title in the browser tab
 st.set_page_config(page_title="Aidan Bench - Generator")
@@ -130,11 +131,22 @@ if st.session_state.open_router_key and st.session_state.openai_api_key:
                     results_table.append({
                         "Question": result["question"],
                         "Answer": answer,
                         "Coherence Score": result["coherence_score"],
                         "Novelty Score": result["novelty_score"]
                     })
             st.table(results_table)
             if stop_button:
                 st.warning("Partial results displayed due to interruption.")
             else:

 from main import benchmark_model_multithreaded, benchmark_model_sequential
 from prompts import questions as predefined_questions
 import requests
+import pandas as pd
 # Set the title in the browser tab
 st.set_page_config(page_title="Aidan Bench - Generator")
                     results_table.append({
                         "Question": result["question"],
                         "Answer": answer,
+                        "Contestant Model": model_name,
+                        "Judge Model": 'openai/gpt-4o-mini',
                         "Coherence Score": result["coherence_score"],
                         "Novelty Score": result["novelty_score"]
                     })
             st.table(results_table)
+            df = pd.DataFrame(results_table)  # Create a Pandas DataFrame from the results
+            csv = df.to_csv(index=False).encode('utf-8')  # Convert DataFrame to CSV
+            st.download_button(
+                label="Export Results as CSV",
+                data=csv,
+                file_name="benchmark_results.csv",
+                mime='text/csv'
+                )
             if stop_button:
                 st.warning("Partial results displayed due to interruption.")
             else:

main.py CHANGED Viewed

@@ -51,16 +51,12 @@ def process_question(question, model_name, open_router_key, openai_api_key, resu
             if coherence_score is None:
                 break
-            if coherence_score <= 3:
-                # st.write("<span style='color:yellow'>Output is incoherent. Moving to next question.</span>",
-                #          unsafe_allow_html=True)
                 break
             novelty_score = get_novelty_score(new_answer, previous_answers, openai_api_key)
-            if novelty_score < 0.1:
-                # st.write("<span style='color:yellow'>Output is redundant. Moving to next question.</span>",
-                #          unsafe_allow_html=True)
                 break
             # Append results to the queue instead of using st.write

             if coherence_score is None:
                 break
+            if coherence_score <= 6:
                 break
             novelty_score = get_novelty_score(new_answer, previous_answers, openai_api_key)
+            if novelty_score < 0.3:
                 break
             # Append results to the queue instead of using st.write