Spaces:

GIZ
/

audit_assistant

Running on T4

ppsingh commited on Aug 6, 2024

Commit

3829a5f

verified ·

1 Parent(s): 5be75f1

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -20,7 +20,7 @@ from dotenv import load_dotenv
 load_dotenv()
 HF_token = os.environ["HF_TOKEN"]
 # process all files and get the vectorstores collections
-#vectorstores = process_pdf()
 # -------------------------------------------------------------
 # Functions
@@ -83,18 +83,33 @@ async def chat(query,history,sources,reports,subtype,year):
     #print(f"audience:{audience}")
     print(f"sources:{sources}")
     print(f"reports:{reports}")
-    print(f"reports:{subtype}")
-    print(f"reports:{year}")
     docs_html = ""
     output_query = ""
     if len(reports) == 0:
-        print(sources)
-        #vectorstore = vectorstores[sources]
     else:
-        print(reports)
-        #vectorstore = vectorstores["allreports"]
     yield history,docs_html

 load_dotenv()
 HF_token = os.environ["HF_TOKEN"]
 # process all files and get the vectorstores collections
+process_pdf()
 # -------------------------------------------------------------
 # Functions
     #print(f"audience:{audience}")
     print(f"sources:{sources}")
     print(f"reports:{reports}")
+    print(f"subtype:{subtype}")
+    print(f"year:{year}")
     docs_html = ""
     output_query = ""
+    ##------------------------decide which collection to fetch------------------------------
     if len(reports) == 0:
+        vectorstore = vectorstores[sources]
     else:
+        vectorstore = vectorstores["allreports"]
+    ##------------------------------get context----------------------------------------------------
+    context_retrieved_lst = []
+    question_lst= [query]
+    for question in question_lst:
+        retriever = vectorstore.as_retriever(
+          search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.6, "k": 3})
+        context_retrieved = retriever.invoke(question)
+        def format_docs(docs):
+            return "\n\n".join(doc.page_content for doc in docs)
+        context_retrieved_formatted = format_docs(context_retrieved)
+        context_retrieved_lst.append(context_retrieved_formatted)
+    print(context_retrieved_lst)
     yield history,docs_html