import os import gradio as gr import torch import threading from datetime import datetime from huggingface_hub import login from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer from reportlab.lib.pagesizes import letter from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle from reportlab.lib.units import inch from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak from reportlab.lib import colors from reportlab.lib.enums import TA_CENTER, TA_LEFT import re print("=" * 50) print("๐Ÿš€ Starting AI Health Assistant") print(f"Torch version: {torch.__version__}") print(f"CUDA available: {torch.cuda.is_available()}") print("=" * 50) # ------------------------------------------------------ # ๐Ÿ”น STEP 1: Authentication for HF Spaces # ------------------------------------------------------ # Check both possible token names (order matters!) hf_token = os.getenv("HUGGINGFACE_HUB_TOKEN") or os.getenv("HF_TOKEN") if not hf_token: raise ValueError( "โŒ No token found!\n" "Please add your Hugging Face token in Space Settings โ†’ Repository secrets.\n" "Name it either: HUGGINGFACE_HUB_TOKEN or HF_TOKEN\n" "Create a token at: https://huggingface.co/settings/tokens" ) print(f"โœ… HF Token found (length: {len(hf_token)})") print("๐Ÿ” Logging in to Hugging Face...") login(token=hf_token) print("โœ… Login successful!") # ------------------------------------------------------ # ๐Ÿ”น STEP 2: Load model and tokenizer # ------------------------------------------------------ # Option 1: MedGemma (requires access request) # model_id = "google/medgemma-27b-text-it" # Option 2: Regular Gemma (no access needed, works immediately) model_id = "google/gemma-2-9b-it" # Smaller, faster, no access required # Option 3: Mistral (medical fine-tuned alternative) # model_id = "mistralai/Mistral-7B-Instruct-v0.3" print("๐Ÿ”„ Loading tokenizer...") try: tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token print("โœ… Tokenizer loaded successfully!") except Exception as e: print(f"โŒ Error loading tokenizer: {e}") raise print("๐Ÿ”„ Loading model... (this may take several minutes)") try: model = AutoModelForCausalLM.from_pretrained( model_id, token=hf_token, torch_dtype=torch.bfloat16, device_map="auto", low_cpu_mem_usage=True ) print("โœ… Model loaded successfully!") except Exception as e: print(f"โŒ Error loading model: {e}") raise # ------------------------------------------------------ # ๐Ÿ”น STEP 3: Input validation helpers # ------------------------------------------------------ def validate_numeric(value, name, min_val=0, max_val=None): """Validate numeric input""" try: num = float(value) if num < min_val: return False, f"{name} must be >= {min_val}" if max_val and num > max_val: return False, f"{name} must be <= {max_val}" return True, num except (ValueError, TypeError): return False, f"{name} must be a valid number" # ------------------------------------------------------ # ๐Ÿ”น STEP 4: PDF Generation Function # ------------------------------------------------------ def parse_markdown_table(md_text): """Extract and parse markdown tables from text""" tables = [] lines = md_text.split('\n') i = 0 while i < len(lines): line = lines[i].strip() if '|' in line and line.startswith('|'): table_lines = [line] i += 1 if i < len(lines) and '|' in lines[i]: i += 1 while i < len(lines) and '|' in lines[i] and lines[i].strip(): table_lines.append(lines[i].strip()) i += 1 parsed_table = [] for tline in table_lines: cells = [cell.strip() for cell in tline.split('|')[1:-1]] if cells: parsed_table.append(cells) if len(parsed_table) > 1: tables.append(parsed_table) i += 1 return tables def create_pdf_report(patient_data, biomarkers, ai_response, filename="health_report.pdf"): """Generate a professional PDF report""" doc = SimpleDocTemplate(filename, pagesize=letter, topMargin=0.5*inch, bottomMargin=0.5*inch, leftMargin=0.75*inch, rightMargin=0.75*inch) story = [] styles = getSampleStyleSheet() # Custom styles title_style = ParagraphStyle( 'CustomTitle', parent=styles['Heading1'], fontSize=24, textColor=colors.HexColor('#1a5490'), spaceAfter=30, alignment=TA_CENTER, fontName='Helvetica-Bold' ) heading_style = ParagraphStyle( 'CustomHeading', parent=styles['Heading2'], fontSize=14, textColor=colors.HexColor('#2c5aa0'), spaceAfter=12, spaceBefore=12, fontName='Helvetica-Bold' ) subheading_style = ParagraphStyle( 'CustomSubHeading', parent=styles['Heading3'], fontSize=12, textColor=colors.HexColor('#444444'), spaceAfter=10, spaceBefore=10, fontName='Helvetica-Bold' ) normal_style = ParagraphStyle( 'CustomNormal', parent=styles['Normal'], fontSize=10, spaceAfter=8, leading=14 ) # Title story.append(Paragraph("AI Health Assessment Report", title_style)) story.append(Paragraph(f"Generated on: {datetime.now().strftime('%B %d, %Y at %I:%M %p')}", ParagraphStyle('Date', parent=styles['Normal'], fontSize=9, textColor=colors.grey, alignment=TA_CENTER))) story.append(Spacer(1, 0.3*inch)) # Patient Information Section story.append(Paragraph("Patient Information", heading_style)) patient_table_data = [ ['Age', f"{patient_data['age']} years", 'Gender', patient_data['gender']], ['Height', f"{patient_data['height']} cm", 'Weight', f"{patient_data['weight']} kg"], ['BMI', f"{patient_data['weight'] / ((patient_data['height']/100)**2):.1f}", 'Report ID', f"RPT-{datetime.now().strftime('%Y%m%d%H%M')}"] ] patient_table = Table(patient_table_data, colWidths=[1.2*inch, 1.8*inch, 1.2*inch, 1.8*inch]) patient_table.setStyle(TableStyle([ ('BACKGROUND', (0, 0), (-1, -1), colors.HexColor('#f0f4f8')), ('TEXTCOLOR', (0, 0), (-1, -1), colors.black), ('ALIGN', (0, 0), (-1, -1), 'LEFT'), ('FONTNAME', (0, 0), (-1, -1), 'Helvetica'), ('FONTSIZE', (0, 0), (-1, -1), 10), ('FONTNAME', (0, 0), (0, -1), 'Helvetica-Bold'), ('FONTNAME', (2, 0), (2, -1), 'Helvetica-Bold'), ('BOTTOMPADDING', (0, 0), (-1, -1), 8), ('TOPPADDING', (0, 0), (-1, -1), 8), ('GRID', (0, 0), (-1, -1), 0.5, colors.grey), ])) story.append(patient_table) story.append(Spacer(1, 0.2*inch)) # Biomarkers Input Section story.append(Paragraph("Laboratory Biomarkers - Input Values", heading_style)) biomarker_table_data = [ ['Biomarker', 'Value', 'Unit', 'Biomarker', 'Value', 'Unit'] ] biomarker_list = [ ('Albumin', biomarkers['albumin'], 'g/dL'), ('Creatinine', biomarkers['creatinine'], 'mg/dL'), ('Glucose', biomarkers['glucose'], 'mg/dL'), ('CRP', biomarkers['crp'], 'mg/L'), ('MCV', biomarkers['mcv'], 'fL'), ('RDW', biomarkers['rdw'], '%'), ('ALP', biomarkers['alp'], 'U/L'), ('WBC', biomarkers['wbc'], 'x10^3/uL'), ('Lymphocytes', biomarkers['lymphocytes'], '%'), ('Hemoglobin', biomarkers['hb'], 'g/dL'), ('Plasma (PV)', biomarkers['pv'], 'mL'), ] for i in range(0, len(biomarker_list), 2): row = list(biomarker_list[i]) if i + 1 < len(biomarker_list): row.extend(list(biomarker_list[i + 1])) else: row.extend(['', '', '']) biomarker_table_data.append(row) biomarker_table = Table(biomarker_table_data, colWidths=[1.4*inch, 0.9*inch, 0.7*inch, 1.4*inch, 0.9*inch, 0.7*inch]) biomarker_table.setStyle(TableStyle([ ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#2c5aa0')), ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke), ('ALIGN', (0, 0), (-1, -1), 'CENTER'), ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'), ('FONTSIZE', (0, 0), (-1, 0), 11), ('FONTNAME', (0, 1), (-1, -1), 'Helvetica'), ('FONTSIZE', (0, 1), (-1, -1), 9), ('BACKGROUND', (0, 1), (-1, -1), colors.white), ('GRID', (0, 0), (-1, -1), 0.5, colors.grey), ('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, colors.HexColor('#f8f9fa')]), ('TOPPADDING', (0, 0), (-1, -1), 6), ('BOTTOMPADDING', (0, 0), (-1, -1), 6), ])) story.append(biomarker_table) story.append(Spacer(1, 0.3*inch)) # AI Analysis Section story.append(PageBreak()) story.append(Paragraph("AI-Generated Health Analysis", heading_style)) story.append(Spacer(1, 0.1*inch)) sections = ai_response.split('###') for section in sections: if not section.strip(): continue lines = section.strip().split('\n') section_title = lines[0].strip() section_content = '\n'.join(lines[1:]).strip() if section_title: story.append(Paragraph(section_title, subheading_style)) if '|' in section_content: tables = parse_markdown_table(section_content) for table_data in tables: if len(table_data) > 1: pdf_table = Table(table_data, repeatRows=1) table_style = TableStyle([ ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#2c5aa0')), ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke), ('ALIGN', (0, 0), (-1, -1), 'LEFT'), ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'), ('FONTSIZE', (0, 0), (-1, 0), 9), ('FONTNAME', (0, 1), (-1, -1), 'Helvetica'), ('FONTSIZE', (0, 1), (-1, -1), 8), ('BACKGROUND', (0, 1), (-1, -1), colors.white), ('GRID', (0, 0), (-1, -1), 0.5, colors.grey), ('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, colors.HexColor('#f8f9fa')]), ('TOPPADDING', (0, 0), (-1, -1), 6), ('BOTTOMPADDING', (0, 0), (-1, -1), 6), ('VALIGN', (0, 0), (-1, -1), 'TOP'), ]) pdf_table.setStyle(table_style) story.append(pdf_table) story.append(Spacer(1, 0.15*inch)) text_content = re.sub(r'\|[^\n]+\n', '', section_content) text_content = re.sub(r'\n\s*\n', '\n', text_content).strip() if text_content: for para in text_content.split('\n'): if para.strip(): story.append(Paragraph(para.strip(), normal_style)) else: for para in section_content.split('\n'): if para.strip(): story.append(Paragraph(para.strip(), normal_style)) story.append(Spacer(1, 0.1*inch)) # Footer/Disclaimer story.append(Spacer(1, 0.2*inch)) disclaimer_style = ParagraphStyle( 'Disclaimer', parent=styles['Normal'], fontSize=8, textColor=colors.HexColor('#666666'), alignment=TA_CENTER, borderWidth=1, borderColor=colors.HexColor('#cccccc'), borderPadding=10, backColor=colors.HexColor('#fffef0') ) story.append(Paragraph( "IMPORTANT DISCLAIMER: This report is generated by an AI system for informational purposes only. " "It is NOT a medical diagnosis and should NOT replace professional medical advice. " "Always consult with qualified healthcare professionals for medical decisions and treatment.", disclaimer_style )) doc.build(story) return filename # ------------------------------------------------------ # ๐Ÿ”น STEP 5: Define the model interaction function # ------------------------------------------------------ def respond(albumin, creatinine, glucose, crp, mcv, rdw, alp, wbc, lymphocytes, hb, pv, age, gender, height, weight): try: # Validate all inputs validations = [ validate_numeric(albumin, "Albumin", 0, 10), validate_numeric(creatinine, "Creatinine", 0, 20), validate_numeric(glucose, "Glucose", 0, 1000), validate_numeric(crp, "CRP", 0, 500), validate_numeric(mcv, "MCV", 0, 200), validate_numeric(rdw, "RDW", 0, 50), validate_numeric(alp, "ALP", 0, 1000), validate_numeric(wbc, "WBC", 0, 100), validate_numeric(lymphocytes, "Lymphocytes", 0, 100), validate_numeric(hb, "Hemoglobin", 0, 25), validate_numeric(pv, "Plasma", 0, 10000), validate_numeric(age, "Age", 0, 150), validate_numeric(height, "Height", 0, 300), validate_numeric(weight, "Weight", 0, 500), ] for is_valid, result in validations: if not is_valid: return f"โŒ Validation Error: {result}", None validated_values = [result for is_valid, result in validations] albumin, creatinine, glucose, crp, mcv, rdw, alp, wbc, lymphocytes, hb, pv, age, height, weight = validated_values system_message = ( "You are an AI Health Assistant that analyzes laboratory biomarkers " "and generates structured, patient-friendly health summaries.\n\n" "Your task is to evaluate the provided biomarkers and generate an AI-driven medical report " "with insights, observations, and clear explanations.\n" "You must strictly follow this structured format:\n\n" "### Tabular Mapping\n" "- Always include a Markdown table with exactly five columns:\n" "| Biomarker | Value | Status (Low/Normal/High) | AI-Inferred Insight | Reference Range |\n" "- Include **all available biomarkers** below:\n" "Albumin, Creatinine, Glucose, CRP, MCV, RDW, ALP, WBC, Lymphocytes, Hemoglobin, Plasma (PV)\n" "- The first row after the header must begin directly with 'Albumin'.\n" "- Each biomarker must appear exactly once as a separate row.\n\n" "### Executive Summary\n" "- List Top 3 Health Priorities.\n" "- Highlight Key Strengths or normal biomarkers.\n\n" "### System-Specific Analysis\n" "- Summarize findings grouped by organ systems (Liver, Kidney, Immune, Blood, etc.).\n" "- Status: 'Optimal' | 'Monitor' | 'Needs Attention'.\n" "- Provide 2-3 sentences of explanation in plain, supportive language.\n\n" "### Personalized Action Plan\n" "- Provide categorized recommendations (Nutrition, Lifestyle, Testing, Medical Consultation).\n" "- Never recommend medication or treatment.\n\n" "### Interaction Alerts\n" "- Highlight potential relationships between markers (e.g., high CRP + low Albumin).\n\n" "### Constraints\n" "- Never give a diagnosis or prescribe medicine.\n" "- Never use data not present in the input.\n" "- Always recommend consulting a healthcare professional.\n" "- Always include normal reference ranges for each biomarker.\n" "- Use simple, clear, patient-friendly language.\n" "- Provide additional explanation instead of just writing direct points.\n" "- Be concise and avoid repetition." ) user_message = ( f"Patient Info:\n" f"- Age: {age} years\n" f"- Gender: {gender}\n" f"- Height: {height} cm\n" f"- Weight: {weight} kg\n\n" f"Biomarkers:\n" f"- Albumin: {albumin} g/dL\n" f"- Creatinine: {creatinine} mg/dL\n" f"- Glucose: {glucose} mg/dL\n" f"- CRP: {crp} mg/L\n" f"- MCV: {mcv} fL\n" f"- RDW: {rdw} %\n" f"- ALP: {alp} U/L\n" f"- WBC: {wbc} x10^3/ฮผL\n" f"- Lymphocytes: {lymphocytes} %\n" f"- Hemoglobin: {hb} g/dL\n" f"- Plasma (PV): {pv} mL" ) messages = [ {"role": "system", "content": system_message}, {"role": "user", "content": user_message} ] encodings = tokenizer.apply_chat_template( messages, return_tensors="pt", add_special_tokens=True, padding=True, truncation=True ).to(model.device) attention_mask = encodings.ne(tokenizer.pad_token_id) streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) generation_kwargs = dict( input_ids=encodings, attention_mask=attention_mask, max_new_tokens=2000, temperature=0.2, top_p=0.9, do_sample=True, streamer=streamer, pad_token_id=tokenizer.pad_token_id ) thread = threading.Thread(target=model.generate, kwargs=generation_kwargs) thread.start() response = "" for token in streamer: response += token thread.join() patient_data = { 'age': age, 'gender': gender, 'height': height, 'weight': weight } biomarkers = { 'albumin': albumin, 'creatinine': creatinine, 'glucose': glucose, 'crp': crp, 'mcv': mcv, 'rdw': rdw, 'alp': alp, 'wbc': wbc, 'lymphocytes': lymphocytes, 'hb': hb, 'pv': pv } pdf_filename = f"health_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf" pdf_path = create_pdf_report(patient_data, biomarkers, response, pdf_filename) return response, pdf_path except Exception as e: import traceback error_msg = f"โŒ Error generating report: {str(e)}\n\n{traceback.format_exc()}" print(error_msg) return error_msg, None # ------------------------------------------------------ # ๐Ÿ”น STEP 6: Gradio UI # ------------------------------------------------------ with gr.Blocks(theme=gr.themes.Soft(), css=""" .output-markdown table { border-collapse: collapse; width: 100%; margin: 20px 0; } .output-markdown th { background-color: #2c5aa0; color: white; padding: 12px; text-align: left; } .output-markdown td { padding: 10px; border: 1px solid #ddd; } .output-markdown tr:nth-child(even) { background-color: #f8f9fa; } .output-markdown h3 { color: #2c5aa0; margin-top: 20px; } """) as demo: gr.Markdown("# ๐Ÿงช AI Health Assistant with PDF Export") gr.Markdown("*Analyze biomarkers with AI-powered insights and download a professional PDF report.*") with gr.Row(): with gr.Column(): gr.Markdown("### ๐Ÿ“Š Biomarkers") albumin = gr.Number(label="Albumin (g/dL)", value=4.5, minimum=0, maximum=10) creatinine = gr.Number(label="Creatinine (mg/dL)", value=1.5, minimum=0, maximum=20) glucose = gr.Number(label="Glucose (mg/dL, fasting)", value=160, minimum=0, maximum=1000) crp = gr.Number(label="CRP (mg/L)", value=2.5, minimum=0, maximum=500) mcv = gr.Number(label="MCV (fL)", value=90, minimum=0, maximum=200) rdw = gr.Number(label="RDW (%)", value=13, minimum=0, maximum=50) alp = gr.Number(label="ALP (U/L)", value=70, minimum=0, maximum=1000) wbc = gr.Number(label="WBC (10^3/ฮผL)", value=7.5, minimum=0, maximum=100) lymphocytes = gr.Number(label="Lymphocytes (%)", value=30, minimum=0, maximum=100) hb = gr.Number(label="Hemoglobin (g/dL)", value=14.5, minimum=0, maximum=25) pv = gr.Number(label="Plasma (PV) (mL)", value=3000, minimum=0, maximum=10000) with gr.Column(): gr.Markdown("### ๐Ÿ‘ค Patient Information") age = gr.Number(label="Age (years)", value=30, minimum=0, maximum=150) gender = gr.Dropdown(choices=["Male", "Female"], label="Gender", value="Male") height = gr.Number(label="Height (cm)", value=170, minimum=0, maximum=300) weight = gr.Number(label="Weight (kg)", value=70, minimum=0, maximum=500) btn = gr.Button("๐Ÿ”ฌ Generate Health Report & PDF", variant="primary", size="lg") with gr.Row(): output = gr.Markdown(label="AI Health Report") with gr.Row(): pdf_output = gr.File(label="๐Ÿ“„ Download PDF Report") gr.Markdown("---") gr.Markdown("โš ๏ธ **Disclaimer:** This tool provides informational insights only and is not a substitute for professional medical advice.") btn.click( respond, inputs=[albumin, creatinine, glucose, crp, mcv, rdw, alp, wbc, lymphocytes, hb, pv, age, gender, height, weight], outputs=[output, pdf_output] ) # ------------------------------------------------------ # ๐Ÿ”น STEP 7: Launch for Hugging Face Spaces # ------------------------------------------------------ if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)