| import gradio as gr |
| from PyPDF2 import PdfReader |
| from transformers import AutoTokenizer, AutoModel |
| import torch |
| import faiss |
| import numpy as np |
| from groq import Groq |
| import os |
|
|
| |
| LEGAL_BERT_MODEL = "nlpaueb/legal-bert-base-uncased" |
|
|
| |
| DOCS = [ |
| ("bns_full.pdf", "Bharatiya Nyaya Sanhita 2023"), |
| ("bns_ipc_mapping.pdf", "BNS-IPC Comparative Mapping"), |
| ] |
|
|
| MAX_CHUNK_SIZE = 1000 |
| OVERLAP = 200 |
| TOP_K = 5 |
| LLAMA_MODEL = 'llama-3.3-70b-versatile' |
|
|
| |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY") |
| groq_client = Groq(api_key=GROQ_API_KEY) |
|
|
| |
| class LegalBERTEmbedder: |
| def __init__(self, model_name=LEGAL_BERT_MODEL): |
| self.tokenizer = AutoTokenizer.from_pretrained(model_name) |
| self.model = AutoModel.from_pretrained(model_name) |
| self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
| self.model.to(self.device) |
| self.model.eval() |
| |
| def embed(self, texts): |
| all_embeddings = [] |
| with torch.no_grad(): |
| for text in texts: |
| inputs = self.tokenizer(text, return_tensors="pt", |
| truncation=True, max_length=512).to(self.device) |
| outputs = self.model(**inputs) |
| cls_embed = outputs.last_hidden_state[:, 0, :].cpu().numpy() |
| all_embeddings.append(cls_embed.flatten()) |
| return np.vstack(all_embeddings) |
|
|
| |
| def extract_text_from_pdf(pdf_path): |
| """Extract text from PDF file""" |
| reader = PdfReader(pdf_path) |
| raw_text = "" |
| for page in reader.pages: |
| text = page.extract_text() |
| if text: |
| raw_text += text + "\n" |
| return raw_text |
|
|
| def chunk_text(text, max_chunk_size=MAX_CHUNK_SIZE, overlap=OVERLAP): |
| """Split text into overlapping chunks""" |
| chunks = [] |
| start = 0 |
| length = len(text) |
| while start < length: |
| end = min(start + max_chunk_size, length) |
| chunk = text[start:end] |
| chunks.append(chunk) |
| start += max_chunk_size - overlap |
| return chunks |
|
|
| |
| def build_faiss_index(embeddings): |
| """Build FAISS index for similarity search""" |
| dim = embeddings.shape[1] |
| index = faiss.IndexFlatIP(dim) |
| faiss.normalize_L2(embeddings) |
| index.add(embeddings) |
| return index |
|
|
| def query_faiss(index, query_embed, k=TOP_K): |
| """Query FAISS index for top-k similar chunks""" |
| faiss.normalize_L2(query_embed) |
| distances, indices = index.search(query_embed, k) |
| return distances, indices |
|
|
| |
| print("Loading and processing multiple legal documents...") |
|
|
| embedder = LegalBERTEmbedder() |
| all_chunks = [] |
| metadata = [] |
|
|
| print("Extracting and chunking text from all PDFs...") |
| for pdf_path, act_label in DOCS: |
| try: |
| raw_text = extract_text_from_pdf(pdf_path) |
| print(f"Extracted {len(raw_text)} characters from {act_label}") |
| |
| chunks = chunk_text(raw_text) |
| print(f"Created {len(chunks)} chunks from {act_label}") |
| |
| |
| labeled_chunks = [f"[{act_label}] {chunk}" for chunk in chunks] |
| all_chunks.extend(labeled_chunks) |
| metadata.extend([(act_label, chunk) for chunk in chunks]) |
| |
| except Exception as e: |
| print(f"Error processing {pdf_path}: {str(e)}") |
| continue |
|
|
| print(f"Total chunks created: {len(all_chunks)}") |
|
|
| print("Embedding all text chunks with Legal-BERT...") |
| chunk_embeddings = embedder.embed(all_chunks) |
| print("Embeddings created successfully") |
|
|
| print("Building FAISS index...") |
| faiss_index = build_faiss_index(chunk_embeddings) |
| print("FAISS index built successfully") |
|
|
| |
| SYSTEM_PROMPT = """You are a senior Indian legal expert specializing in the Bharatiya Nyaya Sanhita 2023 (BNS) and its correspondence with the Indian Penal Code 1860 (IPC). |
| When answering any question, you MUST use this exact format: |
| CONTEXT/SITUATION: |
| [Provide detailed explanation of the legal context and situation] |
| BNS SECTIONS: |
| [List the specific BNS sections and subsections that apply, with proper citations] |
| IPC SECTIONS (if applicable): |
| [List the corresponding IPC sections based on mappings, with proper citations] |
| SUMMARY: |
| [Provide a clear one-sentence summary highlighting the applicable BNS and IPC sections in **bold** format] |
| Always cite specific sections when available and ensure your response covers relevant BNS provisions and mapped IPC equivalents.""" |
|
|
| def build_user_prompt(context, question): |
| """Build the user prompt with context and question""" |
| return f"""Based on the following relevant extracts from BNS and IPC legislation: |
| {context} |
| Question: {question} |
| Please provide a comprehensive legal answer following the exact format specified in the system instructions.""" |
|
|
| |
| def answer_query(user_query): |
| """Main function to answer user queries""" |
| try: |
| |
| query_embed = embedder.embed([user_query]) |
| |
| |
| _, indices = query_faiss(faiss_index, query_embed, k=TOP_K) |
| retrieved_chunks = [all_chunks[i] for i in indices[0]] |
| |
| |
| context = "\n\n".join(retrieved_chunks) |
| |
| |
| chat_completion = groq_client.chat.completions.create( |
| messages=[ |
| { |
| "role": "system", |
| "content": SYSTEM_PROMPT |
| }, |
| { |
| "role": "user", |
| "content": build_user_prompt(context, user_query) |
| } |
| ], |
| model=LLAMA_MODEL, |
| temperature=0.1, |
| max_tokens=1024 |
| ) |
| |
| return chat_completion.choices[0].message.content.strip() |
| |
| except Exception as e: |
| return f"Error processing query: {str(e)}\n\nPlease check your Groq API key and internet connection." |
|
|
| |
| with gr.Blocks(title="IPC & BNS Legal Assistant") as demo: |
| gr.Markdown(""" |
| # ๐๏ธ IPC & BNS Legal Assistant |
| |
| **Comprehensive Legal Q&A System covering:** |
| - Bharatiya Nyaya Sanhita 2023 (BNS) |
| - Corresponding Indian Penal Code 1860 (IPC) sections |
| |
| Ask any question about Indian criminal legislation and get structured legal answers with proper citations. |
| """) |
| |
| with gr.Row(): |
| with gr.Column(): |
| query_input = gr.Textbox( |
| label="๐ผ Enter your legal query", |
| placeholder="e.g., What are the penalties for murder under BNS? What is the IPC equivalent for theft?", |
| lines=4, |
| max_lines=8 |
| ) |
| |
| with gr.Row(): |
| submit_btn = gr.Button("๐ Get Legal Answer", variant="primary", scale=2) |
| clear_btn = gr.Button("๐๏ธ Clear", scale=1) |
| |
| with gr.Row(): |
| answer_output = gr.Markdown( |
| label="๐ Legal Analysis", |
| value="*Submit your question to get a structured legal analysis...*" |
| ) |
| |
| |
| submit_btn.click(answer_query, inputs=query_input, outputs=answer_output) |
| query_input.submit(answer_query, inputs=query_input, outputs=answer_output) |
| clear_btn.click(lambda: ("", "*Submit your question to get a structured legal analysis...*"), |
| outputs=[query_input, answer_output]) |
| |
| |
| gr.Examples( |
| examples=[ |
| ["What are the penalties for murder under BNS?"], |
| ["What is the IPC equivalent for BNS Section 103?"], |
| ["What constitutes theft according to BNS legislation?"], |
| ["How are punishments defined for assault in BNS?"], |
| ["What are the legal provisions for robbery under IPC and BNS?"] |
| ], |
| inputs=query_input, |
| outputs=answer_output, |
| fn=answer_query, |
| cache_examples=False |
| ) |
|
|
| |
| if __name__ == "__main__": |
| demo.launch( |
| share=False, |
| debug=True, |
| show_error=True |
| ) |