Spaces:
Sleeping
Sleeping
| import os | |
| from pathlib import Path | |
| from typing import List | |
| from fastapi import FastAPI, HTTPException | |
| from app.config import settings | |
| from app.models import ( | |
| ChatRequest, | |
| ChatResponse, | |
| CourseInput, | |
| CourseProcessError, | |
| CourseSummary, | |
| SyllabusProcessResponse, | |
| ) | |
| from app.services.gemini_service import GeminiService | |
| from app.services.intent_service import classify_intent | |
| from app.services.pdf_service import chunk_text, fetch_pdf_text | |
| from app.services.student_service import fetch_student_info | |
| from app.vector_store import LocalVectorStore | |
| app = FastAPI(title="GitConnect Chatbot Service", version="0.1.0") | |
| _CONTEXT_LOG_PATH = Path(__file__).resolve().parents[1] / "context.txt" | |
| def _write_context_log(content: str) -> None: | |
| _CONTEXT_LOG_PATH.write_text(content, encoding="utf-8") | |
| def warmup_embedding_model() -> None: | |
| try: | |
| GeminiService.preload_embedding_model(settings.embedding_model_name) | |
| except Exception as exc: | |
| # Startup should continue even if warmup fails. | |
| print(f"Embedding warmup skipped due to error: {exc}") | |
| def health() -> dict: | |
| return {"status": "ok"} | |
| def process_syllabus(courses: List[CourseInput]) -> SyllabusProcessResponse: | |
| if not courses: | |
| return SyllabusProcessResponse( | |
| results=[], | |
| failed=[], | |
| total_received=0, | |
| total_processed=0, | |
| total_failed=0, | |
| ) | |
| try: | |
| gemini = GeminiService( | |
| settings.gemini_api_key, | |
| settings.gemini_model, | |
| settings.embedding_model_name, | |
| ) | |
| except ValueError as exc: | |
| raise HTTPException(status_code=500, detail=str(exc)) from exc | |
| vector_store = LocalVectorStore( | |
| settings.vector_data_dir, | |
| rag_index_db_url=settings.rag_index_db_url, | |
| neon_max_retries=settings.neon_max_retries, | |
| neon_retry_backoff_sec=settings.neon_retry_backoff_sec, | |
| neon_connect_timeout_sec=settings.neon_connect_timeout_sec, | |
| ) | |
| results: List[CourseSummary] = [] | |
| failed: List[CourseProcessError] = [] | |
| for course in courses: | |
| try: | |
| syllabus_text = fetch_pdf_text( | |
| str(course.syllabus_url), | |
| timeout=settings.pdf_timeout_sec, | |
| max_retries=settings.pdf_max_retries, | |
| backoff_sec=settings.pdf_retry_backoff_sec, | |
| ) | |
| if not syllabus_text: | |
| raise RuntimeError("No text extracted from PDF.") | |
| chunks = chunk_text( | |
| syllabus_text, | |
| chunk_size=settings.rag_chunk_size, | |
| overlap=settings.rag_chunk_overlap, | |
| ) | |
| if not chunks: | |
| raise RuntimeError("Unable to create text chunks from syllabus content.") | |
| embeddings = [ | |
| gemini.embed_text(chunk, task_type="retrieval_document") | |
| for chunk in chunks | |
| ] | |
| vector_store.upsert_documents(course.semester, course.course_code, chunks, embeddings) | |
| ai_summary = gemini.summarize_multilingual(course.name, syllabus_text) | |
| results.append(CourseSummary(course_code=course.course_code, ai_summary=ai_summary)) | |
| except Exception as exc: | |
| failed.append(CourseProcessError(course_code=course.course_code, error=str(exc))) | |
| return SyllabusProcessResponse( | |
| results=results, | |
| failed=failed, | |
| total_received=len(courses), | |
| total_processed=len(results), | |
| total_failed=len(failed), | |
| ) | |
| def chat(req: ChatRequest) -> ChatResponse: | |
| try: | |
| gemini = GeminiService( | |
| settings.gemini_api_key, | |
| settings.gemini_model, | |
| settings.embedding_model_name, | |
| ) | |
| except ValueError as exc: | |
| raise HTTPException(status_code=500, detail=str(exc)) from exc | |
| history_text = "\n".join( | |
| [f"{msg.role}: {msg.content}" for msg in req.history] | |
| ) | |
| intent, in_scope = classify_intent(req.query) | |
| if not in_scope: | |
| reply = ( | |
| "I can help only with education-related queries such as syllabus, attendance, " | |
| "results, study planning, and course guidance." | |
| ) | |
| _write_context_log( | |
| "Intent: out_of_scope\n" | |
| f"Query: {req.query}\n" | |
| "LLM Called: no\n" | |
| f"Response: {reply}\n" | |
| ) | |
| return ChatResponse(reply_markdown=reply) | |
| prompt = "" | |
| chunks_passed = 0 | |
| try: | |
| if intent in {"attendance", "result"}: | |
| student_info = fetch_student_info( | |
| settings.student_performance_url_template, | |
| req.student_id, | |
| semester=req.semester, | |
| intent=intent, | |
| ) | |
| prompt = f""" | |
| You are a college assistant. Respond in language code: {req.lang_code}. | |
| Return markdown only. | |
| Intent: {intent} | |
| User query: {req.query} | |
| Recent chat history: | |
| {history_text} | |
| Student performance context (authoritative): | |
| {student_info} | |
| Rules: | |
| - Answer only from the provided student performance context. | |
| - If asked for something unavailable in the context, clearly say it is unavailable. | |
| - Be concise and practical. | |
| """ | |
| elif intent == "syllabus": | |
| vector_store = LocalVectorStore( | |
| settings.vector_data_dir, | |
| rag_index_db_url=settings.rag_index_db_url, | |
| neon_max_retries=settings.neon_max_retries, | |
| neon_retry_backoff_sec=settings.neon_retry_backoff_sec, | |
| neon_connect_timeout_sec=settings.neon_connect_timeout_sec, | |
| ) | |
| query_embedding = gemini.embed_text(req.query, task_type="retrieval_query") | |
| hits = vector_store.search( | |
| req.semester, | |
| query_embedding, | |
| top_k=settings.rag_syllabus_top_k, | |
| ) | |
| hits = hits[: settings.rag_syllabus_top_k] | |
| chunks_passed = len(hits) | |
| syllabus_context = "\n\n---\n\n".join( | |
| [f"[{h.get('course_code', '')}] {h.get('chunk', '')}" for h in hits] | |
| ) | |
| prompt = f""" | |
| You are a college assistant. Respond in language code: {req.lang_code}. | |
| Return markdown only. | |
| Intent: syllabus | |
| User query: {req.query} | |
| Recent chat history: | |
| {history_text} | |
| Syllabus context (authoritative): | |
| {syllabus_context} | |
| Rules: | |
| - Answer only from the provided syllabus context. | |
| - For unit/module queries, list units clearly with headings/bullets. | |
| - If exact detail is unavailable, state what is missing. | |
| """ | |
| else: | |
| prompt = f""" | |
| You are a helpful college assistant. Respond in language code: {req.lang_code}. | |
| Return markdown only. | |
| Intent: other (education-related) | |
| User query: {req.query} | |
| Recent chat history: | |
| {history_text} | |
| Rules: | |
| - Keep the response casual, helpful, and education-focused. | |
| - Do not answer non-education requests. | |
| - If needed, ask a brief clarifying question. | |
| """ | |
| reply = gemini.generate_markdown(prompt) | |
| except Exception as exc: | |
| raise HTTPException(status_code=500, detail=f"LLM response failed: {exc}") from exc | |
| _write_context_log( | |
| f"Intent: {intent}\n" | |
| f"Query: {req.query}\n" | |
| f"Student ID: {req.student_id}\n" | |
| f"Semester: {req.semester}\n" | |
| f"Language: {req.lang_code}\n" | |
| f"Student endpoint intent param: {intent if intent in {'attendance', 'result'} else 'not_called'}\n" | |
| f"Student endpoint semester param: {req.semester if intent in {'attendance', 'result'} else 'not_called'}\n" | |
| f"Syllabus top_k configured: {settings.rag_syllabus_top_k}\n" | |
| f"RAG chunk size configured: {settings.rag_chunk_size}\n" | |
| f"RAG chunk overlap configured: {settings.rag_chunk_overlap}\n" | |
| f"Syllabus chunks passed: {chunks_passed}\n" | |
| "LLM Called: yes\n" | |
| "\n--- Prompt Passed To LLM ---\n" | |
| f"{prompt}\n" | |
| ) | |
| return ChatResponse(reply_markdown=reply) | |