Spaces:

Vasanth
/

Dummy_Researcher

Runtime error

App Files Files Community

Vasanth commited on Mar 10, 2024

Commit

4d8deb8

1 Parent(s): 445f5b9

Researcher Done

Browse files

Files changed (5) hide show

.env +3 -0
app.py +37 -0
config.py +18 -0
requirements.txt +122 -0
researcher.py +93 -0

.env ADDED Viewed

	@@ -0,0 +1,3 @@

+GROQ_API_KEY = "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"
+SERPER_API_KEY = "a89c1bc89b03a84f903ebe84e0c389fc16d2a072"
+SERPER_API_KEY = "a89c1bc89b03a84f903ebe84e0c389fc16d2a072"

app.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import streamlit as st
+from streamlit_chat import message
+from researcher import Researcher
+from dotenv import find_dotenv, load_dotenv
+load_dotenv(find_dotenv())
+st.set_page_config(layout="wide")
+st.session_state.clicked=True
+@st.cache_resource(show_spinner=True)
+def create_researcher():
+    researcher = Researcher()
+    return researcher
+research_apprentice = create_researcher()
+def display_conversation(history):
+    for i in range(len(history["apprentice"])):
+        message(history["user"][i], is_user=True, key=str(i) + "_user")
+        message(history["apprentice"][i], key=str(i))
+if st.session_state.clicked:
+    st.title("InfoGenie - Your 24/7 AI Research Apprentice 🧑‍💻")
+    st.subheader("An AI apprentice who can serve you 24/7 by researching on a given question in realtime over Internet and provide you answers accurately within a blink of an eye.")
+    if "apprentice" not in st.session_state:
+        st.session_state["apprentice"] = ["Hello. How can I help you?"]
+    if "user" not in st.session_state:
+        st.session_state["user"] = ["Hey InfoGenie!"]
+    with st.expander("Command InfoGenie"):
+        research_query_input = st.text_input("Resarch Query")
+        if st.button("Send"):
+            robowiz_output = research_apprentice.research(research_query_input)
+            st.session_state["user"].append(research_query_input)
+            st.session_state["apprentice"].append(robowiz_output)
+            if st.session_state["apprentice"]:
+                display_conversation(st.session_state)

config.py ADDED Viewed

	@@ -0,0 +1,18 @@

+PROMPT_TEMPLATE = """
+You are a great researcher. With the information provided understand in deep and try to answer the question.
+If you cant answer the question based on the information either say you cant find an answer or unable to find an answer.
+So try to understand in depth about the context and answer only based on the information provided. Dont generate irrelevant answers.
+Context: {context}
+Question: {question}
+Do provide only helpful answers
+Answer:
+"""
+INPUT_VARIABLES = ["context", "question"]
+SEPARATORS = "\n"
+CHUNK_SIZE = 10000
+CHUNK_OVERLAP = 1000
+EMBEDDER = "BAAI/bge-base-en-v1.5"
+CHAIN_TYPE = "stuff"
+SEARCH_KWARGS = {'k': 3}

requirements.txt ADDED Viewed

	@@ -0,0 +1,122 @@

+aiohttp==3.9.3
+aiosignal==1.3.1
+altair==5.2.0
+annotated-types==0.6.0
+anyio==4.3.0
+attrs==23.2.0
+backoff==2.2.1
+beautifulsoup4==4.12.3
+blinker==1.7.0
+cachetools==5.3.3
+certifi==2024.2.2
+chardet==5.2.0
+charset-normalizer==3.3.2
+click==8.1.7
+colorama==0.4.6
+contourpy==1.2.0
+cycler==0.12.1
+dataclasses-json==0.6.4
+distro==1.9.0
+emoji==2.10.1
+faiss-cpu==1.8.0
+filelock==3.9.0
+filetype==1.2.0
+fonttools==4.49.0
+frozenlist==1.4.1
+fsspec==2024.2.0
+gitdb==4.0.11
+GitPython==3.1.42
+greenlet==3.0.3
+groq==0.4.2
+h11==0.14.0
+httpcore==1.0.4
+httpx==0.27.0
+huggingface-hub==0.21.3
+idna==3.6
+importlib-metadata==7.0.1
+Jinja2==3.1.2
+joblib==1.3.2
+jsonpatch==1.33
+jsonpath-python==1.0.6
+jsonpointer==2.4
+jsonschema==4.21.1
+jsonschema-specifications==2023.12.1
+kiwisolver==1.4.5
+langchain==0.1.10
+langchain-community==0.0.25
+langchain-core==0.1.28
+langchain-groq==0.0.1
+langchain-text-splitters==0.0.1
+langdetect==1.0.9
+langsmith==0.1.14
+lxml==5.1.0
+markdown-it-py==3.0.0
+MarkupSafe==2.1.3
+marshmallow==3.21.0
+matplotlib==3.8.3
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.0.5
+mypy-extensions==1.0.0
+networkx==3.2.1
+nltk==3.8.1
+numpy==1.26.4
+orjson==3.9.15
+packaging==23.2
+pandas==2.2.1
+pillow==10.2.0
+protobuf==4.25.3
+pyarrow==15.0.0
+pydantic==2.6.3
+pydantic_core==2.16.3
+pydeck==0.8.1b0
+Pygments==2.17.2
+pyparsing==3.1.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+python-iso639==2024.2.7
+pytz==2024.1
+PyYAML==6.0.1
+rapidfuzz==3.6.1
+referencing==0.33.0
+regex==2023.12.25
+requests==2.31.0
+rich==13.7.1
+rpds-py==0.18.0
+safetensors==0.4.2
+scikit-learn==1.4.1.post1
+scipy==1.12.0
+seaborn==0.13.2
+sentence-transformers==2.5.1
+six==1.16.0
+smmap==5.0.1
+sniffio==1.3.1
+soupsieve==2.5
+SQLAlchemy==2.0.27
+streamlit==1.31.1
+streamlit-chat==0.1.1
+sympy==1.12
+tabulate==0.9.0
+tenacity==8.2.3
+threadpoolctl==3.3.0
+tokenizers==0.15.2
+toml==0.10.2
+toolz==0.12.1
+torch==2.2.1
+torchaudio==2.2.1
+torchvision==0.17.1
+tornado==6.4
+tqdm==4.66.2
+transformers==4.38.2
+typing-inspect==0.9.0
+typing_extensions==4.8.0
+tzdata==2024.1
+tzlocal==5.2
+unstructured==0.11.8
+unstructured-client==0.21.0
+urllib3==2.2.1
+validators==0.22.0
+watchdog==4.0.0
+wrapt==1.16.0
+yarl==1.9.4
+zipp==3.17.0

researcher.py ADDED Viewed

	@@ -0,0 +1,93 @@

+from config import *
+import os
+from dotenv import load_dotenv, find_dotenv
+import json
+import requests
+from langchain_groq import ChatGroq
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.chains import RetrievalQA
+from langchain.prompts import PromptTemplate
+from langchain.document_loaders.url import UnstructuredURLLoader
+from langchain.vectorstores.faiss import FAISS
+from langchain_community.embeddings import HuggingFaceEmbeddings
+import os
+load_dotenv(find_dotenv())
+from langchain.globals import set_debug
+set_debug(True)
+class Researcher:
+    def __init__(self):
+        self.serper_api_key = os.getenv("SERPER_API_KEY")
+        self.groq_api_key = os.getenv("GROQ_API_KEY")
+        self.prompt_template = PromptTemplate(
+            template=PROMPT_TEMPLATE,
+            input_variables=INPUT_VARIABLES
+        )
+        self.text_splitter = RecursiveCharacterTextSplitter(
+            separators=SEPARATORS,
+            chunk_size=CHUNK_SIZE,
+            chunk_overlap=CHUNK_OVERLAP
+        )
+        self.llm = ChatGroq(temperature=0.5, model_name="mixtral-8x7b-32768", groq_api_key=self.groq_api_key)
+        self.hfembeddings = HuggingFaceEmbeddings(
+                            model_name=EMBEDDER,
+                            model_kwargs={'device': 'cpu'}
+                        )
+    def search_articles(self, query):
+        url = "https://google.serper.dev/search"
+        data = json.dumps({"q":query})
+        headers = {
+            'X-API-KEY': self.serper_api_key,
+            'Content-Type': 'application/json'
+        }
+        response = requests.request("POST", url, headers=headers, data=data)
+        return response.json()
+    def research_answerer(self):
+        research_qa_chain = RetrievalQA.from_chain_type(
+                llm=self.llm,
+                chain_type=CHAIN_TYPE,
+                retriever= self.db.as_retriever(search_kwargs=SEARCH_KWARGS),
+                return_source_documents=True,
+                verbose=True,
+                chain_type_kwargs={"prompt": self.prompt_template}
+            )
+        return research_qa_chain
+    def get_urls(self, articles):
+        urls = []
+        try:
+            urls.append(articles["answerBox"]["link"])
+        except:
+            pass
+        for i in range(0, min(3, len(articles["organic"]))):
+            urls.append(articles["organic"][i]["link"])
+        return urls
+    def get_content_from_urls(self, urls):
+        loader = UnstructuredURLLoader(urls=urls)
+        research_content = loader.load()
+        return research_content
+    def research_given_query(self, research_objective, research_content):
+        docs = self.text_splitter.split_documents(research_content)
+        self.db = FAISS.from_documents(documents=docs, embedding=self.hfembeddings)
+        bot = self.research_answerer()
+        research_out =bot({"query": research_objective})
+        return research_out["result"]
+    def research(self, query):
+        search_articles = self.search_articles(query)
+        urls = self.get_urls(search_articles)
+        research_content = self.get_content_from_urls(urls)
+        answer = self.research_given_query(query, research_content)
+        return answer