""" ProVerBs Legal AI - Complete AI Integration Features: 7 AI Modes + Rotating Logos + DeepSeek-OCR + ERNIE-4.5-VL + GDPVAL """ import gradio as gr from huggingface_hub import InferenceClient import json import os from datetime import datetime from typing import Dict, List, Optional import base64 from pathlib import Path # AI Model Integration try: from transformers import pipeline, AutoModel from datasets import load_dataset AI_MODELS_AVAILABLE = True except ImportError: AI_MODELS_AVAILABLE = False print("â ī¸ AI models not available. Install transformers and datasets.") class EnhancedAILegalChatbot: """ Enhanced AI Legal Chatbot with multiple AI models - DeepSeek-OCR for document text extraction - ERNIE-4.5-VL for vision-language understanding - OpenAI GDPVAL dataset for legal knowledge """ def __init__(self): self.specialized_modes = { "navigation": "Application Navigation Guide", "general": "General Legal Assistant", "document_validation": "Document Validator with OCR & Vision AI", "legal_research": "Legal Research Assistant with GDPVAL", "etymology": "Legal Etymology Lookup", "case_management": "Case Management Helper", "regulatory_updates": "Regulatory Update Monitor" } # Initialize AI models self.ocr_pipeline = None self.vision_pipeline = None self.gdpval_dataset = None self.minimax_pipeline = None if AI_MODELS_AVAILABLE: self._initialize_models() def _initialize_models(self): """Initialize all AI models""" # DeepSeek-OCR for text extraction try: print("đĻ Loading DeepSeek-OCR...") self.ocr_pipeline = pipeline( "image-text-to-text", model="deepseek-ai/DeepSeek-OCR", trust_remote_code=True ) print("â DeepSeek-OCR loaded!") except Exception as e: print(f"â ī¸ DeepSeek-OCR not loaded: {e}") # ERNIE-4.5-VL for vision-language understanding try: print("đĻ Loading ERNIE-4.5-VL (this may take a while)...") self.vision_pipeline = pipeline( "image-text-to-text", model="baidu/ERNIE-4.5-VL-28B-A3B-Thinking", trust_remote_code=True ) print("â ERNIE-4.5-VL loaded!") except Exception as e: print(f"â ī¸ ERNIE-4.5-VL not loaded: {e}") # OpenAI GDPVAL dataset for legal knowledge try: print("đĻ Loading OpenAI GDPVAL dataset...") self.gdpval_dataset = load_dataset("openai/gdpval", split="train") print(f"â GDPVAL loaded! {len(self.gdpval_dataset)} entries") except Exception as e: print(f"â ī¸ GDPVAL not loaded: {e}") # MiniMax-M2 for advanced text generation try: print("đĻ Loading MiniMax-M2...") self.minimax_pipeline = pipeline( "text-generation", model="MiniMaxAI/MiniMax-M2", trust_remote_code=True ) print("â MiniMax-M2 loaded!") except Exception as e: print(f"â ī¸ MiniMax-M2 not loaded: {e}") def process_with_ocr(self, image_path: str) -> str: """Extract text using DeepSeek-OCR""" if not self.ocr_pipeline: return "â OCR not available" try: result = self.ocr_pipeline(image_path) return result[0]['generated_text'] if result else "" except Exception as e: return f"â OCR error: {str(e)}" def process_with_vision(self, image_url: str, question: str) -> str: """Analyze image using ERNIE-4.5-VL""" if not self.vision_pipeline: return "â Vision AI not available" try: messages = [ { "role": "user", "content": [ {"type": "image", "url": image_url}, {"type": "text", "text": question} ] } ] result = self.vision_pipeline(text=messages) return result except Exception as e: return f"â Vision AI error: {str(e)}" def search_gdpval(self, query: str, limit: int = 5) -> str: """Search OpenAI GDPVAL dataset for legal knowledge""" if not self.gdpval_dataset: return "â GDPVAL dataset not available" try: # Simple keyword search (can be enhanced with embeddings) results = [] query_lower = query.lower() for i, entry in enumerate(self.gdpval_dataset): if i >= 100: # Limit search for performance break # Check if query keywords appear in entry entry_text = str(entry).lower() if any(word in entry_text for word in query_lower.split()): results.append(entry) if len(results) >= limit: break if results: formatted = "### đ GDPVAL Knowledge Base Results:\n\n" for i, result in enumerate(results, 1): formatted += f"**Result {i}:**\n{result}\n\n" return formatted else: return "No relevant results found in GDPVAL dataset." except Exception as e: return f"â GDPVAL search error: {str(e)}" def analyze_legal_document(self, image_path: str) -> str: """ Complete document analysis using multiple AI models: 1. DeepSeek-OCR for text extraction 2. ERNIE-4.5-VL for visual understanding 3. Legal analysis with AI """ analysis = "## đ Complete Legal Document Analysis\n\n" # Step 1: OCR Text Extraction analysis += "### Step 1: Text Extraction (DeepSeek-OCR)\n" extracted_text = self.process_with_ocr(image_path) analysis += f"```\n{extracted_text[:500]}...\n```\n\n" # Step 2: Vision Analysis analysis += "### Step 2: Visual Document Understanding (ERNIE-4.5-VL)\n" vision_result = self.process_with_vision( image_path, "Analyze this legal document. Identify document type, key sections, and any notable elements." ) analysis += f"{vision_result}\n\n" # Step 3: Legal Term Detection analysis += "### Step 3: Legal Analysis\n" legal_terms = self._detect_legal_terms(extracted_text) analysis += f"**Legal Terms Found:** {legal_terms}\n\n" # Step 4: Document Quality Check analysis += "### Step 4: Document Quality\n" quality = self._assess_document_quality(extracted_text) analysis += f"{quality}\n\n" return analysis def _detect_legal_terms(self, text: str) -> str: """Detect legal terminology in text""" legal_terms = [ 'contract', 'agreement', 'party', 'clause', 'provision', 'whereas', 'hereby', 'herein', 'pursuant', 'consideration', 'liability', 'indemnify', 'warranty', 'breach', 'terminate', 'arbitration', 'jurisdiction', 'statute', 'regulation' ] found = [term for term in legal_terms if term.lower() in text.lower()] return ', '.join(found) if found else "None detected" def _assess_document_quality(self, text: str) -> str: """Assess document completeness and quality""" issues = [] if len(text) < 100: issues.append("â ī¸ Document seems very short") if not any(term in text.lower() for term in ['party', 'parties', 'agreement']): issues.append("â ī¸ Missing party identification") if not any(term in text.lower() for term in ['date', 'dated', 'effective']): issues.append("â ī¸ No effective date found") if issues: return "**Issues Found:**\n" + "\n".join(issues) else: return "â Document appears complete" def generate_with_minimax(self, messages: list) -> str: """Generate response using MiniMax-M2""" if not self.minimax_pipeline: return "â MiniMax-M2 not available" try: result = self.minimax_pipeline(messages) return result[0]['generated_text'] if result else "No response" except Exception as e: return f"â MiniMax error: {str(e)}" def get_mode_system_prompt(self, mode: str) -> str: """Get specialized system prompt based on mode""" prompts = { "navigation": """You are a ProVerBs Application Navigation Guide. **Enhanced Features:** - Document Analysis with OCR (DeepSeek-OCR) - Vision AI Analysis (ERNIE-4.5-VL) - Legal Knowledge Base (OpenAI GDPVAL) - 7 Specialized AI Modes Guide users to the right features.""", "general": """You are a General Legal Assistant for ProVerBs Legal AI Platform. **Available Tools:** - GDPVAL legal knowledge dataset - Vision AI for document understanding - OCR for text extraction Provide accurate legal information while noting you cannot provide legal advice.""", "document_validation": """You are an Advanced Document Validator. **AI-Powered Capabilities:** - **DeepSeek-OCR**: Extract text from scanned documents - **ERNIE-4.5-VL**: Understand document structure and layout visually - **Legal Analysis**: Validate completeness and legal terms **Process:** 1. Extract text with OCR 2. Analyze visually with ERNIE 3. Check legal validity 4. Provide detailed feedback""", "legal_research": """You are a Legal Research Assistant with GDPVAL access. **Enhanced Research Tools:** - OpenAI GDPVAL dataset for legal knowledge - Case law and precedent search - Statute and regulation analysis Provide comprehensive research with citations.""", "etymology": """You are a Legal Etymology Expert. Explain origins of legal terms.""", "case_management": """You are a Case Management Helper with AI document processing.""", "regulatory_updates": """You are a Regulatory Update Monitor.""" } return prompts.get(mode, prompts["general"]) def respond_with_mode( message, history: list, mode: str, model_choice: str, max_tokens: int, temperature: float, top_p: float, ): """Generate AI response based on selected mode""" chatbot = EnhancedAILegalChatbot() system_message = chatbot.get_mode_system_prompt(mode) # Enhanced responses for specific modes if mode == "document_validation" and "analyze" in message.lower(): yield """ ## đ Advanced Document Validator **Multi-AI Analysis Available:** 1. **DeepSeek-OCR**: Extract text from scanned documents 2. **ERNIE-4.5-VL**: Understand document structure visually 3. **Legal AI**: Validate and analyze content **Upload a document to get:** - â Text extraction - â Visual structure analysis - â Legal term detection - â Completeness check - â Quality assessment """ return if mode == "legal_research" and "search" in message.lower(): # Search GDPVAL dataset gdpval_results = chatbot.search_gdpval(message) yield gdpval_results return # Choose AI model based on selection if model_choice == "MiniMax-M2" and chatbot.minimax_pipeline: # Use MiniMax-M2 try: messages_minimax = [ {"role": "system", "content": system_message}, {"role": "user", "content": message} ] response = chatbot.generate_with_minimax(messages_minimax) yield response except Exception as e: yield f"MiniMax Error: {str(e)}" else: # Use HF Inference API (Llama) as default try: client = InferenceClient(model="meta-llama/Llama-3.3-70B-Instruct") messages = [{"role": "system", "content": system_message}] for user_msg, assistant_msg in history: if user_msg: messages.append({"role": "user", "content": user_msg}) if assistant_msg: messages.append({"role": "assistant", "content": assistant_msg}) messages.append({"role": "user", "content": message}) response = "" for message_chunk in client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): if message_chunk.choices and message_chunk.choices[0].delta.content: token = message_chunk.choices[0].delta.content response += token yield response except Exception as e: # Provide demo response for local preview demo_response = f""" ## đ¯ Preview Mode Response **Your Question:** {message} **Mode:** {mode} **Note:** This is a demo response for local preview. The AI chat will work fully once deployed to Hugging Face Spaces with authentication. ### What This Mode Does: {system_message[:200]}... ### Features Available: - â UI and navigation fully functional - â All tabs working - â Watermark backgrounds active - â Logo rotation active - â ī¸ AI responses require HF deployment **To test AI chat:** Deploy to Hugging Face Spaces where authentication is automatic. --- **For now:** Test the visual design, tab navigation, dropdowns, and timing features! """ yield demo_response # Custom CSS with watermark background logos custom_css = """ .gradio-container { max-width: 1200px !important; position: relative; } /* Watermark background container */ .watermark-container { position: fixed; top: 0; left: 0; width: 100%; height: 100%; z-index: 0; pointer-events: none; overflow: hidden; } /* Watermark logo styling */ .watermark-logo { position: absolute; width: 300px; height: 300px; opacity: 0.08; object-fit: contain; transition: opacity 2s ease-in-out; } .watermark-logo.active { opacity: 0.08; } .watermark-logo.hidden { opacity: 0; } /* Position watermarks at different locations */ .watermark-1 { top: 10%; left: 10%; transform: rotate(-15deg); } .watermark-2 { top: 15%; right: 15%; transform: rotate(20deg); } .watermark-3 { bottom: 20%; left: 15%; transform: rotate(10deg); } .watermark-4 { bottom: 15%; right: 10%; transform: rotate(-20deg); } .watermark-5 { top: 50%; left: 50%; transform: translate(-50%, -50%) rotate(-5deg); } /* Ensure content is above watermarks */ .gradio-container > * { position: relative; z-index: 1; } .header-section { text-align: center; padding: 40px 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 12px; margin-bottom: 30px; position: relative; z-index: 2; } .logo-container { margin-bottom: 20px; display: flex; justify-content: center; align-items: center; } .rotating-logo { width: 150px; height: 150px; border-radius: 50%; object-fit: cover; border: 4px solid rgba(255, 255, 255, 0.8); box-shadow: 0 8px 32px rgba(0, 0, 0, 0.3); } .header-section h1 { font-size: 3rem; margin-bottom: 10px; font-weight: 700; text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.2); } /* Make sure tabs and content are above watermarks */ .tabs, .tabitem { position: relative; z-index: 1; } """ # JavaScript for rotating logos in header and random watermarks rotating_logo_js = """ """ # Create the main application demo = gr.Blocks(title="ProVerBs Legal AI Platform") with demo: # Watermark background logos gr.HTML(f"""
Lawful vs. Legal: Dual Analysis "Adappt'plication"
Multi-AI Legal System | 5 AI Models Working Together đ
âī¸ ProVerBs Legal AI Platform | Version 2.1.0 - 5-Model AI Edition
Powered by: DeepSeek-OCR âĸ ERNIE-4.5-VL âĸ GDPVAL âĸ Llama 3.3 âĸ MiniMax-M2
Š 2024 ProVerBs Legal AI