File size: 2,681 Bytes
8d29339
5c312ac
7ff12f8
 
 
 
8d29339
7ff12f8
8d29339
 
7ff12f8
 
 
8d29339
7ff12f8
 
 
 
 
 
0f58c67
 
ac3878a
0f58c67
 
 
 
 
 
7ff12f8
5c312ac
7ff12f8
 
 
 
 
aa84d81
 
 
0395bcc
7ff12f8
aa84d81
7ff12f8
cb638c9
 
 
 
07c391d
aa84d81
07c391d
 
 
 
 
 
 
 
 
 
 
 
 
 
7ff12f8
 
07c391d
 
 
 
 
 
 
 
 
 
 
 
 
7ff12f8
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import os
import joblib
import langchain
import streamlit as st
import pickle as pkl
from langchain.chains import RetrievalQAWithSourcesChain
from langchain_community.document_loaders import UnstructuredURLLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_community.vectorstores import Chroma, FAISS
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
import time


load_dotenv("ping.env")
api_key=os.getenv("OPENAI_API_KEY")
api_base=os.getenv("OPENAI_API_BASE")

llm=ChatOpenAI(model_name="google/gemma-3n-e2b-it:free",temperature=0)
try:
    with open("embedmo.pkl", "rb") as f:
        m1 = pkl.load(f)
    # Quick sanity check
    if not isinstance(m1, SentenceTransformerEmbeddings):
        raise ValueError("Loaded object is not a SentenceTransformerEmbeddings instance.")
except Exception as e:
    st.error(f"Failed to load embedding model: {str(e)}")
    st.stop()

m2=joblib.load("m1.joblib")
st.title("URL ANALYSER๐Ÿ”—")
st.sidebar.title("Give your URls๐Ÿ”—?")
mp=st.empty()


url1=st.sidebar.text_input(f"URL 1๐Ÿ”—")
url2=st.sidebar.text_input(f"URL 2๐Ÿ”—")
url3=st.sidebar.text_input(f"URL 3๐Ÿ”—")


purs=st.button("gotcha")
if purs:
    
    st.write(url1)
    st.write(url2)
    st.write(url3)
    mp.text("Loading..URl..Loader....โ˜‘๏ธโ˜‘๏ธโ˜‘๏ธ")
    sic=UnstructuredURLLoader(urls=[url1,url2,url3])
    docs=sic.load()
    st.write(len(docs))  
    mp.text("Loading..txt..splitter....โ˜‘๏ธโ˜‘๏ธโ˜‘๏ธ")
    tot=RecursiveCharacterTextSplitter.from_tiktoken_encoder(encoding_name="cl100k_base",chunk_size=512,chunk_overlap=16)
    doccs=tot.split_documents(docs)
    st.write(len(doccs))  
    mp.text("Loading..VB...โ˜‘๏ธโ˜‘๏ธโ˜‘๏ธ")
    vv=Chroma.from_documents(doccs,m1)
    r2=vv.as_retriever(search_type="similarity",search_kwargs={"k":4})
    mp.text("Loading..Retri....โ˜‘๏ธโ˜‘๏ธโ˜‘๏ธ")
    ra1=RetrievalQAWithSourcesChain.from_chain_type(llm=llm,retriever=r2,chain_type="map_reduce")
    st.session_state.ra1=ra1
    mp.text("DB & Retri Done โœ…โœ…โœ…")
    time.sleep(3)
query=mp.text_input("UR Question??")
if query:
    if "ra1" not in st.session_state:
        st.warning("pls give ur urls")
    else:
        with st.spinner("Wait for it..."):
            result=st.session_state.ra1({"question":query},return_only_outputs=True)
        st.header("Answer")
        st.subheader(result["answer"])
        g = st.button("Source")
        if g:
            sources = result.get("sources", "")
            st.subheader("Sources")
            for line in sources.split("\n"):
                st.write(line)