Spaces:
Runtime error
Runtime error
| from config import * | |
| import os | |
| from dotenv import load_dotenv, find_dotenv | |
| import json | |
| import requests | |
| from langchain_groq import ChatGroq | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.chains import RetrievalQA | |
| from langchain.prompts import PromptTemplate | |
| from langchain.document_loaders.url import UnstructuredURLLoader | |
| from langchain.vectorstores.faiss import FAISS | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| import os | |
| load_dotenv(find_dotenv()) | |
| from langchain.globals import set_debug | |
| set_debug(True) | |
| class Researcher: | |
| def __init__(self): | |
| self.serper_api_key = os.getenv("SERPER_API_KEY") | |
| self.groq_api_key = os.getenv("GROQ_API_KEY") | |
| self.prompt_template = PromptTemplate( | |
| template=PROMPT_TEMPLATE, | |
| input_variables=INPUT_VARIABLES | |
| ) | |
| self.text_splitter = RecursiveCharacterTextSplitter( | |
| separators=SEPARATORS, | |
| chunk_size=CHUNK_SIZE, | |
| chunk_overlap=CHUNK_OVERLAP | |
| ) | |
| self.llm = ChatGroq(temperature=0.5, model_name="mixtral-8x7b-32768", groq_api_key=self.groq_api_key) | |
| self.hfembeddings = HuggingFaceEmbeddings( | |
| model_name=EMBEDDER, | |
| model_kwargs={'device': 'cpu'} | |
| ) | |
| def search_articles(self, query): | |
| url = "https://google.serper.dev/search" | |
| data = json.dumps({"q":query}) | |
| headers = { | |
| 'X-API-KEY': self.serper_api_key, | |
| 'Content-Type': 'application/json' | |
| } | |
| response = requests.request("POST", url, headers=headers, data=data) | |
| return response.json() | |
| def research_answerer(self): | |
| research_qa_chain = RetrievalQA.from_chain_type( | |
| llm=self.llm, | |
| chain_type=CHAIN_TYPE, | |
| retriever= self.db.as_retriever(search_kwargs=SEARCH_KWARGS), | |
| return_source_documents=True, | |
| verbose=True, | |
| chain_type_kwargs={"prompt": self.prompt_template} | |
| ) | |
| return research_qa_chain | |
| def get_urls(self, articles): | |
| urls = [] | |
| try: | |
| urls.append(articles["answerBox"]["link"]) | |
| except: | |
| pass | |
| for i in range(0, min(3, len(articles["organic"]))): | |
| urls.append(articles["organic"][i]["link"]) | |
| return urls | |
| def get_content_from_urls(self, urls): | |
| loader = UnstructuredURLLoader(urls=urls) | |
| research_content = loader.load() | |
| return research_content | |
| def research_given_query(self, research_objective, research_content): | |
| docs = self.text_splitter.split_documents(research_content) | |
| self.db = FAISS.from_documents(documents=docs, embedding=self.hfembeddings) | |
| bot = self.research_answerer() | |
| research_out =bot({"query": research_objective}) | |
| return research_out["result"] | |
| def research(self, query): | |
| search_articles = self.search_articles(query) | |
| urls = self.get_urls(search_articles) | |
| research_content = self.get_content_from_urls(urls) | |
| answer = self.research_given_query(query, research_content) | |
| return answer |