VibecoderMcSwaggins's picture
docs: update demos to use all 3 search sources
8625ded
raw
history blame
8.4 kB
#!/usr/bin/env python3
"""
Demo: Full Stack DeepCritical Agent (Phases 1-8).
This script demonstrates the COMPLETE REAL drug repurposing research pipeline:
- Phase 2: REAL Search (PubMed + ClinicalTrials + bioRxiv)
- Phase 6: REAL Embeddings (sentence-transformers + ChromaDB)
- Phase 7: REAL Hypothesis (LLM mechanistic reasoning)
- Phase 3: REAL Judge (LLM evidence assessment)
- Phase 8: REAL Report (LLM structured scientific report)
NO MOCKS. NO FAKE DATA. REAL SCIENCE.
Usage:
uv run python examples/full_stack_demo/run_full.py "metformin Alzheimer's"
uv run python examples/full_stack_demo/run_full.py "sildenafil heart failure" -i 3
Requires: OPENAI_API_KEY or ANTHROPIC_API_KEY
"""
import argparse
import asyncio
import os
import sys
from typing import Any
from src.utils.models import Evidence
def print_header(title: str) -> None:
"""Print a formatted section header."""
print(f"\n{'='*70}")
print(f" {title}")
print(f"{'='*70}\n")
def print_step(step: int, name: str) -> None:
"""Print a step indicator."""
print(f"\n[Step {step}] {name}")
print("-" * 50)
_MAX_DISPLAY_LEN = 600
def _print_truncated(text: str) -> None:
"""Print text, truncating if too long."""
if len(text) > _MAX_DISPLAY_LEN:
print(text[:_MAX_DISPLAY_LEN] + "\n... [truncated for display]")
else:
print(text)
async def _run_search_iteration(
query: str,
iteration: int,
evidence_store: dict[str, Any],
all_evidence: list[Evidence],
search_handler: Any,
embedding_service: Any,
) -> list[Evidence]:
"""Run a single search iteration with deduplication."""
search_queries = [query]
if evidence_store.get("hypotheses"):
for h in evidence_store["hypotheses"][-2:]:
search_queries.extend(h.search_suggestions[:1])
for q in search_queries[:2]:
result = await search_handler.execute(q, max_results_per_tool=5)
print(f" '{q}' -> {result.total_found} results")
new_unique = await embedding_service.deduplicate(result.evidence)
print(f" After dedup: {len(new_unique)} unique")
all_evidence.extend(new_unique)
evidence_store["current"] = all_evidence
evidence_store["iteration_count"] = iteration
return all_evidence
async def _handle_judge_step(
judge_handler: Any, query: str, all_evidence: list[Evidence], evidence_store: dict[str, Any]
) -> tuple[bool, str]:
"""Handle the judge assessment step. Returns (should_stop, next_query)."""
print("\n[Judge] Assessing evidence quality (REAL LLM)...")
assessment = await judge_handler.assess(query, all_evidence)
print(f" Mechanism Score: {assessment.details.mechanism_score}/10")
print(f" Clinical Score: {assessment.details.clinical_evidence_score}/10")
print(f" Confidence: {assessment.confidence:.0%}")
print(f" Recommendation: {assessment.recommendation.upper()}")
if assessment.recommendation == "synthesize":
print("\n[Judge] Evidence sufficient! Proceeding to report generation...")
evidence_store["last_assessment"] = assessment.details.model_dump()
return True, query
next_queries = assessment.next_search_queries[:2] if assessment.next_search_queries else []
if next_queries:
print(f"\n[Judge] Need more evidence. Next queries: {next_queries}")
return False, next_queries[0]
print(
"\n[Judge] Need more evidence but no suggested queries. " "Continuing with original query."
)
return False, query
async def run_full_demo(query: str, max_iterations: int) -> None:
"""Run the REAL full stack pipeline."""
print_header("DeepCritical Full Stack Demo (REAL)")
print(f"Query: {query}")
print(f"Max iterations: {max_iterations}")
print("Mode: REAL (All live API calls - no mocks)\n")
# Import real components
from src.agent_factory.judges import JudgeHandler
from src.agents.hypothesis_agent import HypothesisAgent
from src.agents.report_agent import ReportAgent
from src.services.embeddings import EmbeddingService
from src.tools.biorxiv import BioRxivTool
from src.tools.clinicaltrials import ClinicalTrialsTool
from src.tools.pubmed import PubMedTool
from src.tools.search_handler import SearchHandler
# Initialize REAL services
print("[Init] Loading embedding model...")
embedding_service = EmbeddingService()
search_handler = SearchHandler(
tools=[PubMedTool(), ClinicalTrialsTool(), BioRxivTool()], timeout=30.0
)
judge_handler = JudgeHandler()
# Shared evidence store
evidence_store: dict[str, Any] = {"current": [], "hypotheses": [], "iteration_count": 0}
all_evidence: list[Evidence] = []
for iteration in range(1, max_iterations + 1):
print_step(iteration, f"ITERATION {iteration}/{max_iterations}")
# Step 1: REAL Search
print("\n[Search] Querying PubMed + ClinicalTrials + bioRxiv (REAL API calls)...")
all_evidence = await _run_search_iteration(
query, iteration, evidence_store, all_evidence, search_handler, embedding_service
)
if not all_evidence:
print("\nNo evidence found. Try a different query.")
return
# Step 2: REAL Hypothesis generation (first iteration only)
if iteration == 1:
print("\n[Hypothesis] Generating mechanistic hypotheses (REAL LLM)...")
hypothesis_agent = HypothesisAgent(evidence_store, embedding_service)
hyp_response = await hypothesis_agent.run(query)
_print_truncated(hyp_response.messages[0].text)
# Step 3: REAL Judge
should_stop, query = await _handle_judge_step(
judge_handler, query, all_evidence, evidence_store
)
if should_stop:
break
# Step 4: REAL Report generation
print_step(iteration + 1, "REPORT GENERATION (REAL LLM)")
report_agent = ReportAgent(evidence_store, embedding_service)
report_response = await report_agent.run(query)
print("\n" + "=" * 70)
print(" FINAL RESEARCH REPORT")
print("=" * 70)
print(report_response.messages[0].text)
async def main() -> None:
"""Entry point."""
parser = argparse.ArgumentParser(
description="DeepCritical Full Stack Demo - REAL, No Mocks",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
This demo runs the COMPLETE pipeline with REAL API calls:
1. REAL search: Actual PubMed queries
2. REAL embeddings: Actual sentence-transformers model
3. REAL hypothesis: Actual LLM generating mechanistic chains
4. REAL judge: Actual LLM assessing evidence quality
5. REAL report: Actual LLM generating structured report
Examples:
uv run python examples/full_stack_demo/run_full.py "metformin Alzheimer's"
uv run python examples/full_stack_demo/run_full.py "sildenafil heart failure" -i 3
uv run python examples/full_stack_demo/run_full.py "aspirin cancer prevention"
""",
)
parser.add_argument(
"query",
help="Research query (e.g., 'metformin Alzheimer's disease')",
)
parser.add_argument(
"-i",
"--iterations",
type=int,
default=2,
help="Max search iterations (default: 2)",
)
args = parser.parse_args()
if args.iterations < 1:
print("Error: iterations must be at least 1")
sys.exit(1)
# Fail fast: require API key
if not (os.getenv("OPENAI_API_KEY") or os.getenv("ANTHROPIC_API_KEY")):
print("=" * 70)
print("ERROR: This demo requires a real LLM.")
print()
print("Set one of the following in your .env file:")
print(" OPENAI_API_KEY=sk-...")
print(" ANTHROPIC_API_KEY=sk-ant-...")
print()
print("This is a REAL demo. No mocks. No fake data.")
print("=" * 70)
sys.exit(1)
await run_full_demo(args.query, args.iterations)
print("\n" + "=" * 70)
print(" DeepCritical Full Stack Demo Complete!")
print(" ")
print(" Everything you just saw was REAL:")
print(" - Real PubMed + ClinicalTrials + bioRxiv searches")
print(" - Real embedding computations")
print(" - Real LLM reasoning")
print(" - Real scientific report")
print("=" * 70 + "\n")
if __name__ == "__main__":
asyncio.run(main())