File size: 12,614 Bytes
e05f7c6
 
2a1b2fa
7beadc0
2a1b2fa
7beadc0
2a1b2fa
7beadc0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2a1b2fa
eed2a86
 
 
 
7beadc0
2a1b2fa
eed2a86
 
 
 
7beadc0
 
 
 
 
 
 
 
eed2a86
 
 
 
 
7beadc0
 
 
 
 
 
 
 
 
 
 
eed2a86
 
 
 
 
7beadc0
 
 
 
 
 
 
 
 
 
 
eed2a86
 
 
 
 
7beadc0
 
 
 
 
 
 
 
 
 
 
eed2a86
2a1b2fa
 
7beadc0
 
 
eed2a86
 
7beadc0
 
 
eed2a86
 
 
 
 
7beadc0
 
 
2a1b2fa
 
7beadc0
 
 
eed2a86
 
7beadc0
 
 
eed2a86
 
 
 
 
7beadc0
 
 
2a1b2fa
 
7beadc0
 
 
eed2a86
 
7beadc0
 
 
eed2a86
 
 
 
 
7beadc0
 
 
 
 
 
 
2a1b2fa
 
 
 
 
 
 
 
 
 
 
 
7beadc0
2a1b2fa
 
7beadc0
2a1b2fa
 
 
 
7beadc0
2a1b2fa
 
 
 
 
7beadc0
 
 
 
 
2a1b2fa
 
 
 
7beadc0
2a1b2fa
 
7beadc0
2a1b2fa
 
 
 
7beadc0
2a1b2fa
 
 
 
 
7beadc0
 
 
 
 
2a1b2fa
 
 
 
7beadc0
2a1b2fa
 
7beadc0
2a1b2fa
 
 
 
7beadc0
2a1b2fa
 
 
 
 
7beadc0
 
 
 
 
2a1b2fa
 
 
 
 
7beadc0
 
2a1b2fa
 
7beadc0
 
 
 
2a1b2fa
7beadc0
 
2a1b2fa
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
import os
import sys
import time
from pathlib import Path

# Add project root to path
sys.path.append(os.path.dirname(__file__))
sys.path.append(str(Path(__file__).parent))

# ============================================================================
# RAG SYSTEM INITIALIZATION WITH PROPER ERROR HANDLING
# ============================================================================

def initialize_rag_system():
    """Initialize FAISS index and embedding cache with proper error handling"""
    print("🔧 Initializing FAISS index and cache...")
    
    # Create data directory if it doesn't exist
    data_dir = Path("/app/data")
    data_dir.mkdir(parents=True, exist_ok=True)
    
    # Check if FAISS index exists
    faiss_path = data_dir / "faiss_index.bin"
    cache_path = data_dir / "embedding_cache.db"
    
    if not faiss_path.exists():
        print("⚠ WARNING: FAISS index not found at /app/data/faiss_index.bin")
        print("   Creating new FAISS index...")
        
        try:
            # Try to import and run initialization
            from scripts.initialize_rag import initialize_rag
            initialize_rag()
            print("✅ FAISS index created successfully")
        except ImportError as e:
            print(f"⚠️ Import error: {e}")
            print("   Running initialization script directly...")
            
            # Fallback: run as subprocess
            import subprocess
            result = subprocess.run(
                [sys.executable, "scripts/initialize_rag.py"],
                capture_output=True,
                text=True,
                cwd="/app"
            )
            if result.returncode == 0:
                print("✅ FAISS index created via subprocess")
            else:
                print(f"⚠️ Failed to create FAISS index: {result.stderr}")
                return False
        except Exception as e:
            print(f"⚠️ Initialization error: {e}")
            return False
    else:
        print(f"✅ FAISS index found at {faiss_path}")
    
    # Check embedding cache
    if not cache_path.exists():
        print("⚠ WARNING: Embedding cache not found at /app/data/embedding_cache.db")
        print("   It will be created automatically on first use.")
        
        # Create empty cache database
        import sqlite3
        try:
            conn = sqlite3.connect(cache_path)
            cursor = conn.cursor()
            cursor.execute("""
                CREATE TABLE IF NOT EXISTS embedding_cache (
                    text_hash TEXT PRIMARY KEY,
                    embedding BLOB NOT NULL,
                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                    access_count INTEGER DEFAULT 0
                )
            """)
            cursor.execute("CREATE INDEX IF NOT EXISTS idx_created_at ON embedding_cache(created_at)")
            conn.commit()
            conn.close()
            print("✅ Embedding cache created")
        except Exception as e:
            print(f"⚠️ Could not create embedding cache: {e}")
    else:
        print(f"✅ Embedding cache found at {cache_path}")
    
    print("✅ Configuration validated successfully")
    return True

# Run initialization
try:
    init_success = initialize_rag_system()
    if not init_success:
        print("⚠️ RAG system initialization had issues, but continuing anyway...")
except Exception as e:
    print(f"⚠️ Unexpected initialization error: {e}")
    print("   Continuing with limited functionality...")

# ============================================================================
# GRADIO APP IMPORTS AND SETUP
# ============================================================================

import gradio as gr

# Global references to loaded systems
_naive_rag = None
_optimized_rag = None
_no_compromise_rag = None
_embedding_model = None

def get_embedding_model():
    """Load the embedding model once and reuse it across all RAG classes."""
    global _embedding_model
    if _embedding_model is None:
        try:
            from sentence_transformers import SentenceTransformer
            print("Loading embedding model...")
            _embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
            print("✅ Embedding model loaded successfully")
        except Exception as e:
            print(f"⚠️ Error loading embedding model: {e}")
            _embedding_model = None
    return _embedding_model

def get_naive():
    global _naive_rag
    if _naive_rag is None:
        try:
            from app.rag_naive import NaiveRAG
            print("Initializing Naive RAG...")
            _naive_rag = NaiveRAG()
            print("✅ Naive RAG initialized")
        except ImportError as e:
            print(f"⚠️ Could not import NaiveRAG: {e}")
            return None
        except Exception as e:
            print(f"⚠️ Error initializing Naive RAG: {e}")
            return None
    return _naive_rag

def get_optimized():
    global _optimized_rag
    if _optimized_rag is None:
        try:
            from app.rag_optimized import OptimizedRAG
            print("Initializing Optimized RAG...")
            _optimized_rag = OptimizedRAG()
            print("✅ Optimized RAG initialized")
        except ImportError as e:
            print(f"⚠️ Could not import OptimizedRAG: {e}")
            return None
        except Exception as e:
            print(f"⚠️ Error initializing Optimized RAG: {e}")
            return None
    return _optimized_rag

def get_no_compromise():
    global _no_compromise_rag
    if _no_compromise_rag is None:
        try:
            from app.no_compromise_rag import NoCompromiseRAG
            print("Initializing No-Compromise RAG...")
            _no_compromise_rag = NoCompromiseRAG()
            print("✅ No-Compromise RAG initialized")
        except ImportError as e:
            print(f"⚠️ Could not import NoCompromiseRAG: {e}")
            return None
        except Exception as e:
            print(f"⚠️ Error initializing No-Compromise RAG: {e}")
            return None
    return _no_compromise_rag

def query_naive(question):
    if not question or question.strip() == "":
        return "Please enter a question.", "0 ms", "0", "No"
    
    try:
        rag = get_naive()
        if rag is None:
            return "RAG system not available. Check logs.", "0 ms", "0", "No"
        
        start = time.perf_counter()
        answer, chunks_used, cache_hit = rag.query(question)
        latency = (time.perf_counter() - start) * 1000
        return answer, f"{latency:.1f} ms", str(chunks_used), "Yes" if cache_hit else "No"
    except Exception as e:
        error_msg = f"Error in Naive RAG: {str(e)}"
        print(error_msg)
        return error_msg, "0 ms", "0", "No"

def query_optimized(question):
    if not question or question.strip() == "":
        return "Please enter a question.", "0 ms", "0", "No"
    
    try:
        rag = get_optimized()
        if rag is None:
            return "RAG system not available. Check logs.", "0 ms", "0", "No"
        
        start = time.perf_counter()
        answer, chunks_used, cache_hit = rag.query(question)
        latency = (time.perf_counter() - start) * 1000
        return answer, f"{latency:.1f} ms", str(chunks_used), "Yes" if cache_hit else "No"
    except Exception as e:
        error_msg = f"Error in Optimized RAG: {str(e)}"
        print(error_msg)
        return error_msg, "0 ms", "0", "No"

def query_no_compromise(question):
    if not question or question.strip() == "":
        return "Please enter a question.", "0 ms", "0", "No"
    
    try:
        rag = get_no_compromise()
        if rag is None:
            return "RAG system not available. Check logs.", "0 ms", "0", "No"
        
        start = time.perf_counter()
        answer, chunks_used, cache_hit = rag.query(question)
        latency = (time.perf_counter() - start) * 1000
        return answer, f"{latency:.1f} ms", str(chunks_used), "Yes" if cache_hit else "No"
    except Exception as e:
        error_msg = f"Error in No-Compromise RAG: {str(e)}"
        print(error_msg)
        return error_msg, "0 ms", "0", "No"

# ============================================================================
# BUILD THE GRADIO INTERFACE
# ============================================================================

with gr.Blocks(title="RAG Latency Optimization", theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
    # ⚡ RAG Latency Optimization
    ### Compare Naive, Optimized, and No‑Compromise RAG on CPU‑only hardware
    **Proven 2.7× speedup (247ms → 92ms)** – now interactive!
    """)

    with gr.Tabs():
        # ----- Naive RAG tab -----
        with gr.TabItem("🐢 Naive RAG (Baseline)"):
            with gr.Row():
                question_naive = gr.Textbox(label="Your Question", lines=2, placeholder="e.g., What is RAG?")
                submit_naive = gr.Button("Ask", variant="primary")
            with gr.Row():
                answer_naive = gr.Textbox(label="Answer", lines=4, interactive=False)
            with gr.Row():
                latency_naive = gr.Textbox(label="Latency", interactive=False)
                chunks_naive = gr.Textbox(label="Chunks Used", interactive=False)
                cache_naive = gr.Textbox(label="Cache Hit", interactive=False)
            
            submit_naive.click(
                query_naive,
                inputs=question_naive,
                outputs=[answer_naive, latency_naive, chunks_naive, cache_naive]
            )
            question_naive.submit(
                query_naive,
                inputs=question_naive,
                outputs=[answer_naive, latency_naive, chunks_naive, cache_naive]
            )

        # ----- Optimized RAG tab -----
        with gr.TabItem("⚡ Optimized RAG (Production)"):
            with gr.Row():
                question_opt = gr.Textbox(label="Your Question", lines=2, placeholder="e.g., What is RAG?")
                submit_opt = gr.Button("Ask", variant="primary")
            with gr.Row():
                answer_opt = gr.Textbox(label="Answer", lines=4, interactive=False)
            with gr.Row():
                latency_opt = gr.Textbox(label="Latency", interactive=False)
                chunks_opt = gr.Textbox(label="Chunks Used", interactive=False)
                cache_opt = gr.Textbox(label="Cache Hit", interactive=False)
            
            submit_opt.click(
                query_optimized,
                inputs=question_opt,
                outputs=[answer_opt, latency_opt, chunks_opt, cache_opt]
            )
            question_opt.submit(
                query_optimized,
                inputs=question_opt,
                outputs=[answer_opt, latency_opt, chunks_opt, cache_opt]
            )

        # ----- No‑Compromise RAG tab -----
        with gr.TabItem("🚀 No‑Compromise RAG (Max Speed)"):
            with gr.Row():
                question_nc = gr.Textbox(label="Your Question", lines=2, placeholder="e.g., What is RAG?")
                submit_nc = gr.Button("Ask", variant="primary")
            with gr.Row():
                answer_nc = gr.Textbox(label="Answer", lines=4, interactive=False)
            with gr.Row():
                latency_nc = gr.Textbox(label="Latency", interactive=False)
                chunks_nc = gr.Textbox(label="Chunks Used", interactive=False)
                cache_nc = gr.Textbox(label="Cache Hit", interactive=False)
            
            submit_nc.click(
                query_no_compromise,
                inputs=question_nc,
                outputs=[answer_nc, latency_nc, chunks_nc, cache_nc]
            )
            question_nc.submit(
                query_no_compromise,
                inputs=question_nc,
                outputs=[answer_nc, latency_nc, chunks_nc, cache_nc]
            )

    gr.Markdown("""
    ---
    **Architecture**: CPU‑only | **Embeddings**: `all-MiniLM-L6-v2` | **Vector Store**: FAISS  
    **Caching**: SQLite (Optimized) + LRU memory | **Generation**: Simulated (real LLM can be plugged in)
    
    💡 **Tip**: Press Enter to submit your question quickly!
    """)

# ============================================================================
# LAUNCH THE APP
# ============================================================================

if __name__ == "__main__":
    print("🚀 Starting RAG Latency Optimization App...")
    print("📍 Server will run on http://0.0.0.0:7860")
    demo.launch(server_name="0.0.0.0", server_port=7860)