Spaces:

DataQuests
/

DeepCritical

Running

App Files Files Community

Joseph Pollack commited on 11 days ago

Commit

66c7f79

unverified ·

2 Parent(s): 687a1f1 e6c2142

Merge branch 'feature/iterative-deep-research-workflows' of https://github.com/Josephrp/DeepCritical-HFSpace into feature/iterative-deep-research-workflows

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.env.example +12 -8
.gitignore +5 -0
docs/brainstorming/00_ROADMAP_SUMMARY.md +194 -0
docs/brainstorming/01_PUBMED_IMPROVEMENTS.md +125 -0
docs/brainstorming/02_CLINICALTRIALS_IMPROVEMENTS.md +193 -0
docs/brainstorming/03_EUROPEPMC_IMPROVEMENTS.md +211 -0
docs/brainstorming/04_OPENALEX_INTEGRATION.md +303 -0
docs/brainstorming/implementation/15_PHASE_OPENALEX.md +603 -0
docs/brainstorming/implementation/16_PHASE_PUBMED_FULLTEXT.md +586 -0
docs/brainstorming/implementation/17_PHASE_RATE_LIMITING.md +540 -0
docs/brainstorming/implementation/README.md +143 -0
docs/brainstorming/magentic-pydantic/00_SITUATION_AND_PLAN.md +189 -0
docs/brainstorming/magentic-pydantic/01_ARCHITECTURE_SPEC.md +289 -0
docs/brainstorming/magentic-pydantic/02_IMPLEMENTATION_PHASES.md +112 -0
docs/brainstorming/magentic-pydantic/03_IMMEDIATE_ACTIONS.md +112 -0
docs/brainstorming/magentic-pydantic/04_FOLLOWUP_REVIEW_REQUEST.md +158 -0
docs/brainstorming/magentic-pydantic/REVIEW_PROMPT_FOR_SENIOR_AGENT.md +113 -0
docs/bugs/FIX_PLAN_MAGENTIC_MODE.md +227 -0
docs/bugs/P0_ACTIONABLE_FIXES.md +0 -281
docs/bugs/P0_CRITICAL_BUGS.md +0 -298
docs/bugs/P0_MAGENTIC_AND_SEARCH_AUDIT.md +0 -249
docs/bugs/P0_MAGENTIC_MODE_BROKEN.md +116 -0
docs/bugs/P1_GRADIO_SETTINGS_CLEANUP.md +81 -0
docs/bugs/PHASE_00_IMPLEMENTATION_ORDER.md +0 -156
docs/bugs/PHASE_01_REPLACE_BIORXIV.md +0 -371
docs/bugs/PHASE_02_PUBMED_QUERY_PREPROCESSING.md +0 -355
docs/bugs/PHASE_03_CLINICALTRIALS_FILTERING.md +0 -386
examples/rate_limiting_demo.py +82 -0
pyproject.toml +3 -1
requirements.txt +7 -0
src/agent_factory/judges.py +9 -1
src/agents/code_executor_agent.py +69 -0
src/agents/judge_agent_llm.py +45 -0
src/agents/magentic_agents.py +1 -1
src/agents/retrieval_agent.py +82 -0
src/app.py +60 -69
src/middleware/sub_iteration.py +135 -0
src/orchestrator_factory.py +40 -15
src/orchestrator_hierarchical.py +95 -0
src/orchestrator_magentic.py +35 -4
src/state/__init__.py +9 -0
src/tools/__init__.py +2 -0
src/tools/pubmed.py +5 -9
src/tools/rate_limiter.py +121 -0
src/tools/web_search.py +53 -0
src/utils/config.py +14 -2
src/utils/models.py +4 -0
tests/integration/test_dual_mode_e2e.py +82 -0
tests/integration/test_modal.py +11 -2
tests/unit/agent_factory/test_judges_factory.py +64 -0

.env.example CHANGED Viewed

@@ -7,9 +7,17 @@ LLM_PROVIDER=openai
 OPENAI_API_KEY=sk-your-key-here
 ANTHROPIC_API_KEY=sk-ant-your-key-here
-# Model names (optional - sensible defaults)
-OPENAI_MODEL=gpt-5.1
-ANTHROPIC_MODEL=claude-sonnet-4-5-20250929
 # ============== HUGGINGFACE (FREE TIER) ==============
@@ -20,7 +28,7 @@ ANTHROPIC_MODEL=claude-sonnet-4-5-20250929
 # WITH HF_TOKEN: Uses Llama 3.1 8B Instruct (requires accepting license)
 #
 # For HuggingFace Spaces deployment:
-#   Set this as a "Secret" in Space Settings → Variables and secrets
 #   Users/judges don't need their own token - the Space secret is used
 #
 HF_TOKEN=hf_your-token-here
@@ -36,9 +44,5 @@ LOG_LEVEL=INFO
 # PubMed (optional - higher rate limits)
 NCBI_API_KEY=your-ncbi-key-here
-# Modal Sandbox (optional - for secure code execution)
-MODAL_TOKEN_ID=ak-your-modal-token-id-here
-MODAL_TOKEN_SECRET=your-modal-token-secret-here
 # Vector Database (optional - for LlamaIndex RAG)
 CHROMA_DB_PATH=./chroma_db

 OPENAI_API_KEY=sk-your-key-here
 ANTHROPIC_API_KEY=sk-ant-your-key-here
+# Model names (optional - sensible defaults set in config.py)
+# ANTHROPIC_MODEL=claude-sonnet-4-5-20250929
+# OPENAI_MODEL=gpt-5.1
+# ============== EMBEDDINGS ==============
+# OpenAI Embedding Model (used if LLM_PROVIDER is openai and performing RAG/Embeddings)
+OPENAI_EMBEDDING_MODEL=text-embedding-3-small
+# Local Embedding Model (used for local/offline embeddings)
+LOCAL_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
 # ============== HUGGINGFACE (FREE TIER) ==============
 # WITH HF_TOKEN: Uses Llama 3.1 8B Instruct (requires accepting license)
 #
 # For HuggingFace Spaces deployment:
+#   Set this as a "Secret" in Space Settings -> Variables and secrets
 #   Users/judges don't need their own token - the Space secret is used
 #
 HF_TOKEN=hf_your-token-here
 # PubMed (optional - higher rate limits)
 NCBI_API_KEY=your-ncbi-key-here
 # Vector Database (optional - for LlamaIndex RAG)
 CHROMA_DB_PATH=./chroma_db

.gitignore CHANGED Viewed

@@ -69,4 +69,9 @@ logs/
 .mypy_cache/
 .coverage
 htmlcov/
 # Trigger rebuild Wed Nov 26 17:51:41 EST 2025

 .mypy_cache/
 .coverage
 htmlcov/
+# Database files
+chroma_db/
+*.sqlite3
 # Trigger rebuild Wed Nov 26 17:51:41 EST 2025

docs/brainstorming/00_ROADMAP_SUMMARY.md ADDED Viewed

	@@ -0,0 +1,194 @@

+# DeepCritical Data Sources: Roadmap Summary
+**Created**: 2024-11-27
+**Purpose**: Future maintainability and hackathon continuation
+---
+## Current State
+### Working Tools
+| Tool | Status | Data Quality |
+|------|--------|--------------|
+| PubMed | ✅ Works | Good (abstracts only) |
+| ClinicalTrials.gov | ✅ Works | Good (filtered for interventional) |
+| Europe PMC | ✅ Works | Good (includes preprints) |
+### Removed Tools
+| Tool | Status | Reason |
+|------|--------|--------|
+| bioRxiv | ❌ Removed | No search API - only date/DOI lookup |
+---
+## Priority Improvements
+### P0: Critical (Do First)
+1. **Add Rate Limiting to PubMed**
+   - NCBI will block us without it
+   - Use `limits` library (see reference repo)
+   - 3/sec without key, 10/sec with key
+### P1: High Value, Medium Effort
+2. **Add OpenAlex as 4th Source**
+   - Citation network (huge for drug repurposing)
+   - Concept tagging (semantic discovery)
+   - Already implemented in reference repo
+   - Free, no API key
+3. **PubMed Full-Text via BioC**
+   - Get full paper text for PMC papers
+   - Already in reference repo
+### P2: Nice to Have
+4. **ClinicalTrials.gov Results**
+   - Get efficacy data from completed trials
+   - Requires more complex API calls
+5. **Europe PMC Annotations**
+   - Text-mined entities (genes, drugs, diseases)
+   - Automatic entity extraction
+---
+## Effort Estimates
+| Improvement | Effort | Impact | Priority |
+|-------------|--------|--------|----------|
+| PubMed rate limiting | 1 hour | Stability | P0 |
+| OpenAlex basic search | 2 hours | High | P1 |
+| OpenAlex citations | 2 hours | Very High | P1 |
+| PubMed full-text | 3 hours | Medium | P1 |
+| CT.gov results | 4 hours | Medium | P2 |
+| Europe PMC annotations | 3 hours | Medium | P2 |
+---
+## Architecture Decision
+### Option A: Keep Current + Add OpenAlex
+```
+                    User Query
+                        ↓
+    ┌───────────────────┼───────────────────┐
+    ↓                   ↓                   ↓
+ PubMed          ClinicalTrials        Europe PMC
+ (abstracts)     (trials only)         (preprints)
+    ↓                   ↓                   ↓
+    └───────────────────┼───────────────────┘
+                        ↓
+                   OpenAlex              ← NEW
+               (citations, concepts)
+                        ↓
+                  Orchestrator
+                        ↓
+                     Report
+```
+**Pros**: Low risk, additive
+**Cons**: More complexity, some overlap
+### Option B: OpenAlex as Primary
+```
+                    User Query
+                        ↓
+    ┌───────────────────┼───────────────────┐
+    ↓                   ↓                   ↓
+ OpenAlex          ClinicalTrials      Europe PMC
+ (primary          (trials only)       (full-text
+  search)                               fallback)
+    ↓                   ↓                   ↓
+    └───────────────────┼───────────────────┘
+                        ↓
+                  Orchestrator
+                        ↓
+                     Report
+```
+**Pros**: Simpler, citation network built-in
+**Cons**: Lose some PubMed-specific features
+### Recommendation: Option A
+Keep current architecture working, add OpenAlex incrementally.
+---
+## Quick Wins (Can Do Today)
+1. **Add `limits` to `pyproject.toml`**
+   ```toml
+   dependencies = [
+       "limits>=3.0",
+   ]
+   ```
+2. **Copy OpenAlex tool from reference repo**
+   - File: `reference_repos/DeepCritical/DeepResearch/src/tools/openalex_tools.py`
+   - Adapt to our `SearchTool` base class
+3. **Enable NCBI API Key**
+   - Add to `.env`: `NCBI_API_KEY=your_key`
+   - 10x rate limit improvement
+---
+## External Resources Worth Exploring
+### Python Libraries
+| Library | For | Notes |
+|---------|-----|-------|
+| `limits` | Rate limiting | Used by reference repo |
+| `pyalex` | OpenAlex wrapper | [GitHub](https://github.com/J535D165/pyalex) |
+| `metapub` | PubMed | Full-featured |
+| `sentence-transformers` | Semantic search | For embeddings |
+### APIs Not Yet Used
+| API | Provides | Effort |
+|-----|----------|--------|
+| RxNorm | Drug name normalization | Low |
+| DrugBank | Drug targets/mechanisms | Medium (license) |
+| UniProt | Protein data | Medium |
+| ChEMBL | Bioactivity data | Medium |
+### RAG Tools (Future)
+| Tool | Purpose |
+|------|---------|
+| [PaperQA](https://github.com/Future-House/paper-qa) | RAG for scientific papers |
+| [txtai](https://github.com/neuml/txtai) | Embeddings + search |
+| [PubMedBERT](https://huggingface.co/NeuML/pubmedbert-base-embeddings) | Biomedical embeddings |
+---
+## Files in This Directory
+| File | Contents |
+|------|----------|
+| `00_ROADMAP_SUMMARY.md` | This file |
+| `01_PUBMED_IMPROVEMENTS.md` | PubMed enhancement details |
+| `02_CLINICALTRIALS_IMPROVEMENTS.md` | ClinicalTrials.gov details |
+| `03_EUROPEPMC_IMPROVEMENTS.md` | Europe PMC details |
+| `04_OPENALEX_INTEGRATION.md` | OpenAlex integration plan |
+---
+## For Future Maintainers
+If you're picking this up after the hackathon:
+1. **Start with OpenAlex** - biggest bang for buck
+2. **Add rate limiting** - prevents API blocks
+3. **Don't bother with bioRxiv** - use Europe PMC instead
+4. **Reference repo is gold** - `reference_repos/DeepCritical/` has working implementations
+Good luck! 🚀

docs/brainstorming/01_PUBMED_IMPROVEMENTS.md ADDED Viewed

	@@ -0,0 +1,125 @@

+# PubMed Tool: Current State & Future Improvements
+**Status**: Currently Implemented
+**Priority**: High (Core Data Source)
+---
+## Current Implementation
+### What We Have (`src/tools/pubmed.py`)
+- Basic E-utilities search via `esearch.fcgi` and `efetch.fcgi`
+- Query preprocessing (strips question words, expands synonyms)
+- Returns: title, abstract, authors, journal, PMID
+- Rate limiting: None implemented (relying on NCBI defaults)
+### Current Limitations
+1. **No Full-Text Access**: Only retrieves abstracts, not full paper text
+2. **No Rate Limiting**: Risk of being blocked by NCBI
+3. **No BioC Format**: Missing structured full-text extraction
+4. **No Figure Retrieval**: No supplementary materials access
+5. **No PMC Integration**: Missing open-access full-text via PMC
+---
+## Reference Implementation (DeepCritical Reference Repo)
+The reference repo at `reference_repos/DeepCritical/DeepResearch/src/tools/bioinformatics_tools.py` has a more sophisticated implementation:
+### Features We're Missing
+```python
+# Rate limiting (lines 47-50)
+from limits import parse
+from limits.storage import MemoryStorage
+from limits.strategies import MovingWindowRateLimiter
+storage = MemoryStorage()
+limiter = MovingWindowRateLimiter(storage)
+rate_limit = parse("3/second")  # NCBI allows 3/sec without API key, 10/sec with
+# Full-text via BioC format (lines 108-120)
+def _get_fulltext(pmid: int) -> dict[str, Any] | None:
+    pmid_url = f"https://www.ncbi.nlm.nih.gov/research/bionlp/RESTful/pmcoa.cgi/BioC_json/{pmid}/unicode"
+    # Returns structured JSON with full text for open-access papers
+# Figure retrieval via Europe PMC (lines 123-149)
+def _get_figures(pmcid: str) -> dict[str, str]:
+    suppl_url = f"https://www.ebi.ac.uk/europepmc/webservices/rest/{pmcid}/supplementaryFiles"
+    # Returns base64-encoded images from supplementary materials
+```
+---
+## Recommended Improvements
+### Phase 1: Rate Limiting (Critical)
+```python
+# Add to src/tools/pubmed.py
+from limits import parse
+from limits.storage import MemoryStorage
+from limits.strategies import MovingWindowRateLimiter
+storage = MemoryStorage()
+limiter = MovingWindowRateLimiter(storage)
+# With NCBI_API_KEY: 10/sec, without: 3/sec
+def get_rate_limit():
+    if settings.ncbi_api_key:
+        return parse("10/second")
+    return parse("3/second")
+```
+**Dependencies**: `pip install limits`
+### Phase 2: Full-Text Retrieval
+```python
+async def get_fulltext(pmid: str) -> str | None:
+    """Get full text for open-access papers via BioC API."""
+    url = f"https://www.ncbi.nlm.nih.gov/research/bionlp/RESTful/pmcoa.cgi/BioC_json/{pmid}/unicode"
+    # Only works for PMC papers (open access)
+```
+### Phase 3: PMC ID Resolution
+```python
+async def get_pmc_id(pmid: str) -> str | None:
+    """Convert PMID to PMCID for full-text access."""
+    url = f"https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/?ids={pmid}&format=json"
+```
+---
+## Python Libraries to Consider
+| Library | Purpose | Notes |
+|---------|---------|-------|
+| [Biopython](https://biopython.org/) | `Bio.Entrez` module | Official, well-maintained |
+| [PyMed](https://pypi.org/project/pymed/) | PubMed wrapper | Simpler API, less control |
+| [metapub](https://pypi.org/project/metapub/) | Full-featured | Tested on 1/3 of PubMed |
+| [limits](https://pypi.org/project/limits/) | Rate limiting | Used by reference repo |
+---
+## API Endpoints Reference
+| Endpoint | Purpose | Rate Limit |
+|----------|---------|------------|
+| `esearch.fcgi` | Search for PMIDs | 3/sec (10 with key) |
+| `efetch.fcgi` | Fetch metadata | 3/sec (10 with key) |
+| `esummary.fcgi` | Quick metadata | 3/sec (10 with key) |
+| `pmcoa.cgi/BioC_json` | Full text (PMC only) | Unknown |
+| `idconv/v1.0` | PMID ↔ PMCID | Unknown |
+---
+## Sources
+- [PubMed E-utilities Documentation](https://www.ncbi.nlm.nih.gov/books/NBK25501/)
+- [NCBI BioC API](https://www.ncbi.nlm.nih.gov/research/bionlp/APIs/)
+- [Searching PubMed with Python](https://marcobonzanini.com/2015/01/12/searching-pubmed-with-python/)
+- [PyMed on PyPI](https://pypi.org/project/pymed/)

docs/brainstorming/02_CLINICALTRIALS_IMPROVEMENTS.md ADDED Viewed

	@@ -0,0 +1,193 @@

+# ClinicalTrials.gov Tool: Current State & Future Improvements
+**Status**: Currently Implemented
+**Priority**: High (Core Data Source for Drug Repurposing)
+---
+## Current Implementation
+### What We Have (`src/tools/clinicaltrials.py`)
+- V2 API search via `clinicaltrials.gov/api/v2/studies`
+- Filters: `INTERVENTIONAL` study type, `RECRUITING` status
+- Returns: NCT ID, title, conditions, interventions, phase, status
+- Query preprocessing via shared `query_utils.py`
+### Current Strengths
+1. **Good Filtering**: Already filtering for interventional + recruiting
+2. **V2 API**: Using the modern API (v1 deprecated)
+3. **Phase Info**: Extracting trial phases for drug development context
+### Current Limitations
+1. **No Outcome Data**: Missing primary/secondary outcomes
+2. **No Eligibility Criteria**: Missing inclusion/exclusion details
+3. **No Sponsor Info**: Missing who's running the trial
+4. **No Result Data**: For completed trials, no efficacy data
+5. **Limited Drug Mapping**: No integration with drug databases
+---
+## API Capabilities We're Not Using
+### Fields We Could Request
+```python
+# Current fields
+fields = ["NCTId", "BriefTitle", "Condition", "InterventionName", "Phase", "OverallStatus"]
+# Additional valuable fields
+additional_fields = [
+    "PrimaryOutcomeMeasure",      # What are they measuring?
+    "SecondaryOutcomeMeasure",    # Secondary endpoints
+    "EligibilityCriteria",        # Who can participate?
+    "LeadSponsorName",            # Who's funding?
+    "ResultsFirstPostDate",       # Has results?
+    "StudyFirstPostDate",         # When started?
+    "CompletionDate",             # When finished?
+    "EnrollmentCount",            # Sample size
+    "InterventionDescription",    # Drug details
+    "ArmGroupLabel",              # Treatment arms
+    "InterventionOtherName",      # Drug aliases
+]
+```
+### Filter Enhancements
+```python
+# Current
+aggFilters = "studyType:INTERVENTIONAL,status:RECRUITING"
+# Could add
+"status:RECRUITING,ACTIVE_NOT_RECRUITING,COMPLETED"  # Include completed for results
+"phase:PHASE2,PHASE3"  # Only later-stage trials
+"resultsFirstPostDateRange:2020-01-01_"  # Trials with posted results
+```
+---
+## Recommended Improvements
+### Phase 1: Richer Metadata
+```python
+EXTENDED_FIELDS = [
+    "NCTId",
+    "BriefTitle",
+    "OfficialTitle",
+    "Condition",
+    "InterventionName",
+    "InterventionDescription",
+    "InterventionOtherName",  # Drug synonyms!
+    "Phase",
+    "OverallStatus",
+    "PrimaryOutcomeMeasure",
+    "EnrollmentCount",
+    "LeadSponsorName",
+    "StudyFirstPostDate",
+]
+```
+### Phase 2: Results Retrieval
+For completed trials, we can get actual efficacy data:
+```python
+async def get_trial_results(nct_id: str) -> dict | None:
+    """Fetch results for completed trials."""
+    url = f"https://clinicaltrials.gov/api/v2/studies/{nct_id}"
+    params = {
+        "fields": "ResultsSection",
+    }
+    # Returns outcome measures and statistics
+```
+### Phase 3: Drug Name Normalization
+Map intervention names to standard identifiers:
+```python
+# Problem: "Metformin", "Metformin HCl", "Glucophage" are the same drug
+# Solution: Use RxNorm or DrugBank for normalization
+async def normalize_drug_name(intervention: str) -> str:
+    """Normalize drug name via RxNorm API."""
+    url = f"https://rxnav.nlm.nih.gov/REST/rxcui.json?name={intervention}"
+    # Returns standardized RxCUI
+```
+---
+## Integration Opportunities
+### With PubMed
+Cross-reference trials with publications:
+```python
+# ClinicalTrials.gov provides PMID links
+# Can correlate trial results with published papers
+```
+### With DrugBank/ChEMBL
+Map interventions to:
+- Mechanism of action
+- Known targets
+- Adverse effects
+- Drug-drug interactions
+---
+## Python Libraries to Consider
+| Library | Purpose | Notes |
+|---------|---------|-------|
+| [pytrials](https://pypi.org/project/pytrials/) | CT.gov wrapper | V2 API support unclear |
+| [clinicaltrials](https://github.com/ebmdatalab/clinicaltrials-act-tracker) | Data tracking | More for analysis |
+| [drugbank-downloader](https://pypi.org/project/drugbank-downloader/) | Drug mapping | Requires license |
+---
+## API Quirks & Gotchas
+1. **Rate Limiting**: Undocumented, be conservative
+2. **Pagination**: Max 1000 results per request
+3. **Field Names**: Case-sensitive, camelCase
+4. **Empty Results**: Some fields may be null even if requested
+5. **Status Changes**: Trials change status frequently
+---
+## Example Enhanced Query
+```python
+async def search_drug_repurposing_trials(
+    drug_name: str,
+    condition: str,
+    include_completed: bool = True,
+) -> list[Evidence]:
+    """Search for trials repurposing a drug for a new condition."""
+    statuses = ["RECRUITING", "ACTIVE_NOT_RECRUITING"]
+    if include_completed:
+        statuses.append("COMPLETED")
+    params = {
+        "query.intr": drug_name,
+        "query.cond": condition,
+        "filter.overallStatus": ",".join(statuses),
+        "filter.studyType": "INTERVENTIONAL",
+        "fields": ",".join(EXTENDED_FIELDS),
+        "pageSize": 50,
+    }
+```
+---
+## Sources
+- [ClinicalTrials.gov API Documentation](https://clinicaltrials.gov/data-api/api)
+- [CT.gov Field Definitions](https://clinicaltrials.gov/data-api/about-api/study-data-structure)
+- [RxNorm API](https://lhncbc.nlm.nih.gov/RxNav/APIs/api-RxNorm.findRxcuiByString.html)

docs/brainstorming/03_EUROPEPMC_IMPROVEMENTS.md ADDED Viewed

	@@ -0,0 +1,211 @@

+# Europe PMC Tool: Current State & Future Improvements
+**Status**: Currently Implemented (Replaced bioRxiv)
+**Priority**: High (Preprint + Open Access Source)
+---
+## Why Europe PMC Over bioRxiv?
+### bioRxiv API Limitations (Why We Abandoned It)
+1. **No Search API**: Only returns papers by date range or DOI
+2. **No Query Capability**: Cannot search for "metformin cancer"
+3. **Workaround Required**: Would need to download ALL preprints and build local search
+4. **Known Issue**: [Gradio Issue #8861](https://github.com/gradio-app/gradio/issues/8861) documents the limitation
+### Europe PMC Advantages
+1. **Full Search API**: Boolean queries, filters, facets
+2. **Aggregates bioRxiv**: Includes bioRxiv, medRxiv content anyway
+3. **Includes PubMed**: Also has MEDLINE content
+4. **34 Preprint Servers**: Not just bioRxiv
+5. **Open Access Focus**: Full-text when available
+---
+## Current Implementation
+### What We Have (`src/tools/europepmc.py`)
+- REST API search via `europepmc.org/webservices/rest/search`
+- Preprint flagging via `firstPublicationDate` heuristics
+- Returns: title, abstract, authors, DOI, source
+- Marks preprints for transparency
+### Current Limitations
+1. **No Full-Text Retrieval**: Only metadata/abstracts
+2. **No Citation Network**: Missing references/citations
+3. **No Supplementary Files**: Not fetching figures/data
+4. **Basic Preprint Detection**: Heuristic, not explicit flag
+---
+## Europe PMC API Capabilities
+### Endpoints We Could Use
+| Endpoint | Purpose | Currently Using |
+|----------|---------|-----------------|
+| `/search` | Query papers | Yes |
+| `/fulltext/{ID}` | Full text (XML/JSON) | No |
+| `/{PMCID}/supplementaryFiles` | Figures, data | No |
+| `/citations/{ID}` | Who cited this | No |
+| `/references/{ID}` | What this cites | No |
+| `/annotations` | Text-mined entities | No |
+### Rich Query Syntax
+```python
+# Current simple query
+query = "metformin cancer"
+# Could use advanced syntax
+query = "(TITLE:metformin OR ABSTRACT:metformin) AND (cancer OR oncology)"
+query += " AND (SRC:PPR)"  # Only preprints
+query += " AND (FIRST_PDATE:[2023-01-01 TO 2024-12-31])"  # Date range
+query += " AND (OPEN_ACCESS:y)"  # Only open access
+```
+### Source Filters
+```python
+# Filter by source
+"SRC:MED"     # MEDLINE
+"SRC:PMC"     # PubMed Central
+"SRC:PPR"     # Preprints (bioRxiv, medRxiv, etc.)
+"SRC:AGR"     # Agricola
+"SRC:CBA"     # Chinese Biological Abstracts
+```
+---
+## Recommended Improvements
+### Phase 1: Rich Metadata
+```python
+# Add to search results
+additional_fields = [
+    "citedByCount",           # Impact indicator
+    "source",                 # Explicit source (MED, PMC, PPR)
+    "isOpenAccess",           # Boolean flag
+    "fullTextUrlList",        # URLs for full text
+    "authorAffiliations",     # Institution info
+    "grantsList",             # Funding info
+]
+```
+### Phase 2: Full-Text Retrieval
+```python
+async def get_fulltext(pmcid: str) -> str | None:
+    """Get full text for open access papers."""
+    # XML format
+    url = f"https://www.ebi.ac.uk/europepmc/webservices/rest/{pmcid}/fullTextXML"
+    # Or JSON
+    url = f"https://www.ebi.ac.uk/europepmc/webservices/rest/{pmcid}/fullTextJSON"
+```
+### Phase 3: Citation Network
+```python
+async def get_citations(pmcid: str) -> list[str]:
+    """Get papers that cite this one."""
+    url = f"https://www.ebi.ac.uk/europepmc/webservices/rest/{pmcid}/citations"
+async def get_references(pmcid: str) -> list[str]:
+    """Get papers this one cites."""
+    url = f"https://www.ebi.ac.uk/europepmc/webservices/rest/{pmcid}/references"
+```
+### Phase 4: Text-Mined Annotations
+Europe PMC extracts entities automatically:
+```python
+async def get_annotations(pmcid: str) -> dict:
+    """Get text-mined entities (genes, diseases, drugs)."""
+    url = f"https://www.ebi.ac.uk/europepmc/annotations_api/annotationsByArticleIds"
+    params = {
+        "articleIds": f"PMC:{pmcid}",
+        "type": "Gene_Proteins,Diseases,Chemicals",
+        "format": "JSON",
+    }
+    # Returns structured entity mentions with positions
+```
+---
+## Supplementary File Retrieval
+From reference repo (`bioinformatics_tools.py` lines 123-149):
+```python
+def get_figures(pmcid: str) -> dict[str, str]:
+    """Download figures and supplementary files."""
+    url = f"https://www.ebi.ac.uk/europepmc/webservices/rest/{pmcid}/supplementaryFiles?includeInlineImage=true"
+    # Returns ZIP with images, returns base64-encoded
+```
+---
+## Preprint-Specific Features
+### Identify Preprint Servers
+```python
+PREPRINT_SOURCES = {
+    "PPR": "General preprints",
+    "bioRxiv": "Biology preprints",
+    "medRxiv": "Medical preprints",
+    "chemRxiv": "Chemistry preprints",
+    "Research Square": "Multi-disciplinary",
+    "Preprints.org": "MDPI preprints",
+}
+# Check if published version exists
+async def check_published_version(preprint_doi: str) -> str | None:
+    """Check if preprint has been peer-reviewed and published."""
+    # Europe PMC links preprints to final versions
+```
+---
+## Rate Limiting
+Europe PMC is more generous than NCBI:
+```python
+# No documented hard limit, but be respectful
+# Recommend: 10-20 requests/second max
+# Use email in User-Agent for polite pool
+headers = {
+    "User-Agent": "DeepCritical/1.0 (mailto:your@email.com)"
+}
+```
+---
+## vs. The Lens & OpenAlex
+| Feature | Europe PMC | The Lens | OpenAlex |
+|---------|------------|----------|----------|
+| Biomedical Focus | Yes | Partial | Partial |
+| Preprints | Yes (34 servers) | Yes | Yes |
+| Full Text | PMC papers | Links | No |
+| Citations | Yes | Yes | Yes |
+| Annotations | Yes (text-mined) | No | No |
+| Rate Limits | Generous | Moderate | Very generous |
+| API Key | Optional | Required | Optional |
+---
+## Sources
+- [Europe PMC REST API](https://europepmc.org/RestfulWebService)
+- [Europe PMC Annotations API](https://europepmc.org/AnnotationsApi)
+- [Europe PMC Articles API](https://europepmc.org/ArticlesApi)
+- [rOpenSci medrxivr](https://docs.ropensci.org/medrxivr/)
+- [bioRxiv TDM Resources](https://www.biorxiv.org/tdm)

docs/brainstorming/04_OPENALEX_INTEGRATION.md ADDED Viewed

	@@ -0,0 +1,303 @@

+# OpenAlex Integration: The Missing Piece?
+**Status**: NOT Implemented (Candidate for Addition)
+**Priority**: HIGH - Could Replace Multiple Tools
+**Reference**: Already implemented in `reference_repos/DeepCritical`
+---
+## What is OpenAlex?
+OpenAlex is a **fully open** index of the global research system:
+- **209M+ works** (papers, books, datasets)
+- **2B+ author records** (disambiguated)
+- **124K+ venues** (journals, repositories)
+- **109K+ institutions**
+- **65K+ concepts** (hierarchical, linked to Wikidata)
+**Free. Open. No API key required.**
+---
+## Why OpenAlex for DeepCritical?
+### Current Architecture
+```
+User Query
+    ↓
+┌──────────────────────────────────────┐
+│  PubMed    ClinicalTrials  Europe PMC │  ← 3 separate APIs
+└──────────────────────────────────────┘
+    ↓
+Orchestrator (deduplicate, judge, synthesize)
+```
+### With OpenAlex
+```
+User Query
+    ↓
+┌──────────────────────────────────────┐
+│              OpenAlex                 │  ← Single API
+│  (includes PubMed + preprints +       │
+│   citations + concepts + authors)     │
+└──────────────────────────────────────┘
+    ↓
+Orchestrator (enrich with CT.gov for trials)
+```
+**OpenAlex already aggregates**:
+- PubMed/MEDLINE
+- Crossref
+- ORCID
+- Unpaywall (open access links)
+- Microsoft Academic Graph (legacy)
+- Preprint servers
+---
+## Reference Implementation
+From `reference_repos/DeepCritical/DeepResearch/src/tools/openalex_tools.py`:
+```python
+class OpenAlexFetchTool(ToolRunner):
+    def __init__(self):
+        super().__init__(
+            ToolSpec(
+                name="openalex_fetch",
+                description="Fetch OpenAlex work or author",
+                inputs={"entity": "TEXT", "identifier": "TEXT"},
+                outputs={"result": "JSON"},
+            )
+        )
+    def run(self, params: dict[str, Any]) -> ExecutionResult:
+        entity = params["entity"]      # "works", "authors", "venues"
+        identifier = params["identifier"]
+        base = "https://api.openalex.org"
+        url = f"{base}/{entity}/{identifier}"
+        resp = requests.get(url, timeout=30)
+        return ExecutionResult(success=True, data={"result": resp.json()})
+```
+---
+## OpenAlex API Features
+### Search Works (Papers)
+```python
+# Search for metformin + cancer papers
+url = "https://api.openalex.org/works"
+params = {
+    "search": "metformin cancer drug repurposing",
+    "filter": "publication_year:>2020,type:article",
+    "sort": "cited_by_count:desc",
+    "per_page": 50,
+}
+```
+### Rich Filtering
+```python
+# Filter examples
+"publication_year:2023"
+"type:article"                      # vs preprint, book, etc.
+"is_oa:true"                        # Open access only
+"concepts.id:C71924100"             # Papers about "Medicine"
+"authorships.institutions.id:I27837315"  # From Harvard
+"cited_by_count:>100"               # Highly cited
+"has_fulltext:true"                 # Full text available
+```
+### What You Get Back
+```json
+{
+    "id": "W2741809807",
+    "title": "Metformin: A candidate drug for...",
+    "publication_year": 2023,
+    "type": "article",
+    "cited_by_count": 45,
+    "is_oa": true,
+    "primary_location": {
+        "source": {"display_name": "Nature Medicine"},
+        "pdf_url": "https://...",
+        "landing_page_url": "https://..."
+    },
+    "concepts": [
+        {"id": "C71924100", "display_name": "Medicine", "score": 0.95},
+        {"id": "C54355233", "display_name": "Pharmacology", "score": 0.88}
+    ],
+    "authorships": [
+        {
+            "author": {"id": "A123", "display_name": "John Smith"},
+            "institutions": [{"display_name": "Harvard Medical School"}]
+        }
+    ],
+    "referenced_works": ["W123", "W456"],  # Citations
+    "related_works": ["W789", "W012"]       # Similar papers
+}
+```
+---
+## Key Advantages Over Current Tools
+### 1. Citation Network (We Don't Have This!)
+```python
+# Get papers that cite a work
+url = f"https://api.openalex.org/works?filter=cites:{work_id}"
+# Get papers cited by a work
+# Already in `referenced_works` field
+```
+### 2. Concept Tagging (We Don't Have This!)
+OpenAlex auto-tags papers with hierarchical concepts:
+- "Medicine" → "Pharmacology" → "Drug Repurposing"
+- Can search by concept, not just keywords
+### 3. Author Disambiguation (We Don't Have This!)
+```python
+# Find all works by an author
+url = f"https://api.openalex.org/works?filter=authorships.author.id:{author_id}"
+```
+### 4. Institution Tracking
+```python
+# Find drug repurposing papers from top institutions
+url = "https://api.openalex.org/works"
+params = {
+    "search": "drug repurposing",
+    "filter": "authorships.institutions.id:I27837315",  # Harvard
+}
+```
+### 5. Related Works
+Each paper comes with `related_works` - semantically similar papers discovered by OpenAlex's ML.
+---
+## Proposed Implementation
+### New Tool: `src/tools/openalex.py`
+```python
+"""OpenAlex search tool for comprehensive scholarly data."""
+import httpx
+from src.tools.base import SearchTool
+from src.utils.models import Evidence
+class OpenAlexTool(SearchTool):
+    """Search OpenAlex for scholarly works with rich metadata."""
+    name = "openalex"
+    async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
+        async with httpx.AsyncClient() as client:
+            resp = await client.get(
+                "https://api.openalex.org/works",
+                params={
+                    "search": query,
+                    "filter": "type:article,is_oa:true",
+                    "sort": "cited_by_count:desc",
+                    "per_page": max_results,
+                    "mailto": "deepcritical@example.com",  # Polite pool
+                },
+            )
+            data = resp.json()
+        return [
+            Evidence(
+                source="openalex",
+                title=work["title"],
+                abstract=work.get("abstract", ""),
+                url=work["primary_location"]["landing_page_url"],
+                metadata={
+                    "cited_by_count": work["cited_by_count"],
+                    "concepts": [c["display_name"] for c in work["concepts"][:5]],
+                    "is_open_access": work["is_oa"],
+                    "pdf_url": work["primary_location"].get("pdf_url"),
+                },
+            )
+            for work in data["results"]
+        ]
+```
+---
+## Rate Limits
+OpenAlex is **extremely generous**:
+- No hard rate limit documented
+- Recommended: <100,000 requests/day
+- **Polite pool**: Add `mailto=your@email.com` param for faster responses
+- No API key required (optional for priority support)
+---
+## Should We Add OpenAlex?
+### Arguments FOR
+1. **Already in reference repo** - proven pattern
+2. **Richer data** - citations, concepts, authors
+3. **Single source** - reduces API complexity
+4. **Free & open** - no keys, no limits
+5. **Institution adoption** - Leiden, Sorbonne switched to it
+### Arguments AGAINST
+1. **Adds complexity** - another data source
+2. **Overlap** - duplicates some PubMed data
+3. **Not biomedical-focused** - covers all disciplines
+4. **No full text** - still need PMC/Europe PMC for that
+### Recommendation
+**Add OpenAlex as a 4th source**, don't replace existing tools.
+Use it for:
+- Citation network analysis
+- Concept-based discovery
+- High-impact paper finding
+- Author/institution tracking
+Keep PubMed, ClinicalTrials, Europe PMC for:
+- Authoritative biomedical search
+- Clinical trial data
+- Full-text access
+- Preprint tracking
+---
+## Implementation Priority
+| Task | Effort | Value |
+|------|--------|-------|
+| Basic search | Low | High |
+| Citation network | Medium | Very High |
+| Concept filtering | Low | High |
+| Related works | Low | High |
+| Author tracking | Medium | Medium |
+---
+## Sources
+- [OpenAlex Documentation](https://docs.openalex.org)
+- [OpenAlex API Overview](https://docs.openalex.org/api)
+- [OpenAlex Wikipedia](https://en.wikipedia.org/wiki/OpenAlex)
+- [Leiden University Announcement](https://www.leidenranking.com/information/openalex)
+- [OpenAlex: A fully-open index (Paper)](https://arxiv.org/abs/2205.01833)

docs/brainstorming/implementation/15_PHASE_OPENALEX.md ADDED Viewed

	@@ -0,0 +1,603 @@

+# Phase 15: OpenAlex Integration
+**Priority**: HIGH - Biggest bang for buck
+**Effort**: ~2-3 hours
+**Dependencies**: None (existing codebase patterns sufficient)
+---
+## Prerequisites (COMPLETED)
+The following model changes have been implemented to support this integration:
+1. **`SourceName` Literal Updated** (`src/utils/models.py:9`)
+   ```python
+   SourceName = Literal["pubmed", "clinicaltrials", "europepmc", "preprint", "openalex"]
+   ```
+   - Without this, `source="openalex"` would fail Pydantic validation
+2. **`Evidence.metadata` Field Added** (`src/utils/models.py:39-42`)
+   ```python
+   metadata: dict[str, Any] = Field(
+       default_factory=dict,
+       description="Additional metadata (e.g., cited_by_count, concepts, is_open_access)",
+   )
+   ```
+   - Required for storing `cited_by_count`, `concepts`, etc.
+   - Model is still frozen - metadata must be passed at construction time
+3. **`__init__.py` Exports Updated** (`src/tools/__init__.py`)
+   - All tools are now exported: `ClinicalTrialsTool`, `EuropePMCTool`, `PubMedTool`
+   - OpenAlexTool should be added here after implementation
+---
+## Overview
+Add OpenAlex as a 4th data source for comprehensive scholarly data including:
+- Citation networks (who cites whom)
+- Concept tagging (hierarchical topic classification)
+- Author disambiguation
+- 209M+ works indexed
+**Why OpenAlex?**
+- Free, no API key required
+- Already implemented in reference repo
+- Provides citation data we don't have
+- Aggregates PubMed + preprints + more
+---
+## TDD Implementation Plan
+### Step 1: Write the Tests First
+**File**: `tests/unit/tools/test_openalex.py`
+```python
+"""Tests for OpenAlex search tool."""
+import pytest
+import respx
+from httpx import Response
+from src.tools.openalex import OpenAlexTool
+from src.utils.models import Evidence
+class TestOpenAlexTool:
+    """Test suite for OpenAlex search functionality."""
+    @pytest.fixture
+    def tool(self) -> OpenAlexTool:
+        return OpenAlexTool()
+    def test_name_property(self, tool: OpenAlexTool) -> None:
+        """Tool should identify itself as 'openalex'."""
+        assert tool.name == "openalex"
+    @respx.mock
+    @pytest.mark.asyncio
+    async def test_search_returns_evidence(self, tool: OpenAlexTool) -> None:
+        """Search should return list of Evidence objects."""
+        mock_response = {
+            "results": [
+                {
+                    "id": "W2741809807",
+                    "title": "Metformin and cancer: A systematic review",
+                    "publication_year": 2023,
+                    "cited_by_count": 45,
+                    "type": "article",
+                    "is_oa": True,
+                    "primary_location": {
+                        "source": {"display_name": "Nature Medicine"},
+                        "landing_page_url": "https://doi.org/10.1038/example",
+                        "pdf_url": None,
+                    },
+                    "abstract_inverted_index": {
+                        "Metformin": [0],
+                        "shows": [1],
+                        "anticancer": [2],
+                        "effects": [3],
+                    },
+                    "concepts": [
+                        {"display_name": "Medicine", "score": 0.95},
+                        {"display_name": "Oncology", "score": 0.88},
+                    ],
+                    "authorships": [
+                        {
+                            "author": {"display_name": "John Smith"},
+                            "institutions": [{"display_name": "Harvard"}],
+                        }
+                    ],
+                }
+            ]
+        }
+        respx.get("https://api.openalex.org/works").mock(
+            return_value=Response(200, json=mock_response)
+        )
+        results = await tool.search("metformin cancer", max_results=10)
+        assert len(results) == 1
+        assert isinstance(results[0], Evidence)
+        assert "Metformin and cancer" in results[0].citation.title
+        assert results[0].citation.source == "openalex"
+    @respx.mock
+    @pytest.mark.asyncio
+    async def test_search_empty_results(self, tool: OpenAlexTool) -> None:
+        """Search with no results should return empty list."""
+        respx.get("https://api.openalex.org/works").mock(
+            return_value=Response(200, json={"results": []})
+        )
+        results = await tool.search("xyznonexistentquery123")
+        assert results == []
+    @respx.mock
+    @pytest.mark.asyncio
+    async def test_search_handles_missing_abstract(self, tool: OpenAlexTool) -> None:
+        """Tool should handle papers without abstracts."""
+        mock_response = {
+            "results": [
+                {
+                    "id": "W123",
+                    "title": "Paper without abstract",
+                    "publication_year": 2023,
+                    "cited_by_count": 10,
+                    "type": "article",
+                    "is_oa": False,
+                    "primary_location": {
+                        "source": {"display_name": "Journal"},
+                        "landing_page_url": "https://example.com",
+                    },
+                    "abstract_inverted_index": None,
+                    "concepts": [],
+                    "authorships": [],
+                }
+            ]
+        }
+        respx.get("https://api.openalex.org/works").mock(
+            return_value=Response(200, json=mock_response)
+        )
+        results = await tool.search("test query")
+        assert len(results) == 1
+        assert results[0].content == ""  # No abstract
+    @respx.mock
+    @pytest.mark.asyncio
+    async def test_search_extracts_citation_count(self, tool: OpenAlexTool) -> None:
+        """Citation count should be in metadata."""
+        mock_response = {
+            "results": [
+                {
+                    "id": "W456",
+                    "title": "Highly cited paper",
+                    "publication_year": 2020,
+                    "cited_by_count": 500,
+                    "type": "article",
+                    "is_oa": True,
+                    "primary_location": {
+                        "source": {"display_name": "Science"},
+                        "landing_page_url": "https://example.com",
+                    },
+                    "abstract_inverted_index": {"Test": [0]},
+                    "concepts": [],
+                    "authorships": [],
+                }
+            ]
+        }
+        respx.get("https://api.openalex.org/works").mock(
+            return_value=Response(200, json=mock_response)
+        )
+        results = await tool.search("highly cited")
+        assert results[0].metadata["cited_by_count"] == 500
+    @respx.mock
+    @pytest.mark.asyncio
+    async def test_search_extracts_concepts(self, tool: OpenAlexTool) -> None:
+        """Concepts should be extracted for semantic discovery."""
+        mock_response = {
+            "results": [
+                {
+                    "id": "W789",
+                    "title": "Drug repurposing study",
+                    "publication_year": 2023,
+                    "cited_by_count": 25,
+                    "type": "article",
+                    "is_oa": True,
+                    "primary_location": {
+                        "source": {"display_name": "PLOS ONE"},
+                        "landing_page_url": "https://example.com",
+                    },
+                    "abstract_inverted_index": {"Drug": [0], "repurposing": [1]},
+                    "concepts": [
+                        {"display_name": "Pharmacology", "score": 0.92},
+                        {"display_name": "Drug Discovery", "score": 0.85},
+                        {"display_name": "Medicine", "score": 0.80},
+                    ],
+                    "authorships": [],
+                }
+            ]
+        }
+        respx.get("https://api.openalex.org/works").mock(
+            return_value=Response(200, json=mock_response)
+        )
+        results = await tool.search("drug repurposing")
+        assert "Pharmacology" in results[0].metadata["concepts"]
+        assert "Drug Discovery" in results[0].metadata["concepts"]
+    @respx.mock
+    @pytest.mark.asyncio
+    async def test_search_api_error_raises_search_error(
+        self, tool: OpenAlexTool
+    ) -> None:
+        """API errors should raise SearchError."""
+        from src.utils.exceptions import SearchError
+        respx.get("https://api.openalex.org/works").mock(
+            return_value=Response(500, text="Internal Server Error")
+        )
+        with pytest.raises(SearchError):
+            await tool.search("test query")
+    def test_reconstruct_abstract(self, tool: OpenAlexTool) -> None:
+        """Test abstract reconstruction from inverted index."""
+        inverted_index = {
+            "Metformin": [0, 5],
+            "is": [1],
+            "a": [2],
+            "diabetes": [3],
+            "drug": [4],
+            "effective": [6],
+        }
+        abstract = tool._reconstruct_abstract(inverted_index)
+        assert abstract == "Metformin is a diabetes drug Metformin effective"
+```
+---
+### Step 2: Create the Implementation
+**File**: `src/tools/openalex.py`
+```python
+"""OpenAlex search tool for comprehensive scholarly data."""
+from typing import Any
+import httpx
+from tenacity import retry, stop_after_attempt, wait_exponential
+from src.utils.exceptions import SearchError
+from src.utils.models import Citation, Evidence
+class OpenAlexTool:
+    """
+    Search OpenAlex for scholarly works with rich metadata.
+    OpenAlex provides:
+    - 209M+ scholarly works
+    - Citation counts and networks
+    - Concept tagging (hierarchical)
+    - Author disambiguation
+    - Open access links
+    API Docs: https://docs.openalex.org/
+    """
+    BASE_URL = "https://api.openalex.org/works"
+    def __init__(self, email: str | None = None) -> None:
+        """
+        Initialize OpenAlex tool.
+        Args:
+            email: Optional email for polite pool (faster responses)
+        """
+        self.email = email or "deepcritical@example.com"
+    @property
+    def name(self) -> str:
+        return "openalex"
+    @retry(
+        stop=stop_after_attempt(3),
+        wait=wait_exponential(multiplier=1, min=1, max=10),
+        reraise=True,
+    )
+    async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
+        """
+        Search OpenAlex for scholarly works.
+        Args:
+            query: Search terms
+            max_results: Maximum results to return (max 200 per request)
+        Returns:
+            List of Evidence objects with citation metadata
+        Raises:
+            SearchError: If API request fails
+        """
+        params = {
+            "search": query,
+            "filter": "type:article",  # Only peer-reviewed articles
+            "sort": "cited_by_count:desc",  # Most cited first
+            "per_page": min(max_results, 200),
+            "mailto": self.email,  # Polite pool for faster responses
+        }
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            try:
+                response = await client.get(self.BASE_URL, params=params)
+                response.raise_for_status()
+                data = response.json()
+                results = data.get("results", [])
+                return [self._to_evidence(work) for work in results[:max_results]]
+            except httpx.HTTPStatusError as e:
+                raise SearchError(f"OpenAlex API error: {e}") from e
+            except httpx.RequestError as e:
+                raise SearchError(f"OpenAlex connection failed: {e}") from e
+    def _to_evidence(self, work: dict[str, Any]) -> Evidence:
+        """Convert OpenAlex work to Evidence object."""
+        title = work.get("title", "Untitled")
+        pub_year = work.get("publication_year", "Unknown")
+        cited_by = work.get("cited_by_count", 0)
+        is_oa = work.get("is_oa", False)
+        # Reconstruct abstract from inverted index
+        abstract_index = work.get("abstract_inverted_index")
+        abstract = self._reconstruct_abstract(abstract_index) if abstract_index else ""
+        # Extract concepts (top 5)
+        concepts = [
+            c.get("display_name", "")
+            for c in work.get("concepts", [])[:5]
+            if c.get("display_name")
+        ]
+        # Extract authors (top 5)
+        authorships = work.get("authorships", [])
+        authors = [
+            a.get("author", {}).get("display_name", "")
+            for a in authorships[:5]
+            if a.get("author", {}).get("display_name")
+        ]
+        # Get URL
+        primary_loc = work.get("primary_location") or {}
+        url = primary_loc.get("landing_page_url", "")
+        if not url:
+            # Fallback to OpenAlex page
+            work_id = work.get("id", "").replace("https://openalex.org/", "")
+            url = f"https://openalex.org/{work_id}"
+        return Evidence(
+            content=abstract[:2000],
+            citation=Citation(
+                source="openalex",
+                title=title[:500],
+                url=url,
+                date=str(pub_year),
+                authors=authors,
+            ),
+            relevance=min(0.9, 0.5 + (cited_by / 1000)),  # Boost by citations
+            metadata={
+                "cited_by_count": cited_by,
+                "is_open_access": is_oa,
+                "concepts": concepts,
+                "pdf_url": primary_loc.get("pdf_url"),
+            },
+        )
+    def _reconstruct_abstract(
+        self, inverted_index: dict[str, list[int]]
+    ) -> str:
+        """
+        Reconstruct abstract from OpenAlex inverted index format.
+        OpenAlex stores abstracts as {"word": [position1, position2, ...]}.
+        This rebuilds the original text.
+        """
+        if not inverted_index:
+            return ""
+        # Build position -> word mapping
+        position_word: dict[int, str] = {}
+        for word, positions in inverted_index.items():
+            for pos in positions:
+                position_word[pos] = word
+        # Reconstruct in order
+        if not position_word:
+            return ""
+        max_pos = max(position_word.keys())
+        words = [position_word.get(i, "") for i in range(max_pos + 1)]
+        return " ".join(w for w in words if w)
+```
+---
+### Step 3: Register in Search Handler
+**File**: `src/tools/search_handler.py` (add to imports and tool list)
+```python
+# Add import
+from src.tools.openalex import OpenAlexTool
+# Add to _create_tools method
+def _create_tools(self) -> list[SearchTool]:
+    return [
+        PubMedTool(),
+        ClinicalTrialsTool(),
+        EuropePMCTool(),
+        OpenAlexTool(),  # NEW
+    ]
+```
+---
+### Step 4: Update `__init__.py`
+**File**: `src/tools/__init__.py`
+```python
+from src.tools.openalex import OpenAlexTool
+__all__ = [
+    "PubMedTool",
+    "ClinicalTrialsTool",
+    "EuropePMCTool",
+    "OpenAlexTool",  # NEW
+    # ...
+]
+```
+---
+## Demo Script
+**File**: `examples/openalex_demo.py`
+```python
+#!/usr/bin/env python3
+"""Demo script to verify OpenAlex integration."""
+import asyncio
+from src.tools.openalex import OpenAlexTool
+async def main():
+    """Run OpenAlex search demo."""
+    tool = OpenAlexTool()
+    print("=" * 60)
+    print("OpenAlex Integration Demo")
+    print("=" * 60)
+    # Test 1: Basic drug repurposing search
+    print("\n[Test 1] Searching for 'metformin cancer drug repurposing'...")
+    results = await tool.search("metformin cancer drug repurposing", max_results=5)
+    for i, evidence in enumerate(results, 1):
+        print(f"\n--- Result {i} ---")
+        print(f"Title: {evidence.citation.title}")
+        print(f"Year: {evidence.citation.date}")
+        print(f"Citations: {evidence.metadata.get('cited_by_count', 'N/A')}")
+        print(f"Concepts: {', '.join(evidence.metadata.get('concepts', []))}")
+        print(f"Open Access: {evidence.metadata.get('is_open_access', False)}")
+        print(f"URL: {evidence.citation.url}")
+        if evidence.content:
+            print(f"Abstract: {evidence.content[:200]}...")
+    # Test 2: High-impact papers
+    print("\n" + "=" * 60)
+    print("[Test 2] Finding highly-cited papers on 'long COVID treatment'...")
+    results = await tool.search("long COVID treatment", max_results=3)
+    for evidence in results:
+        print(f"\n- {evidence.citation.title}")
+        print(f"  Citations: {evidence.metadata.get('cited_by_count', 0)}")
+    print("\n" + "=" * 60)
+    print("Demo complete!")
+if __name__ == "__main__":
+    asyncio.run(main())
+```
+---
+## Verification Checklist
+### Unit Tests
+```bash
+# Run just OpenAlex tests
+uv run pytest tests/unit/tools/test_openalex.py -v
+# Expected: All tests pass
+```
+### Integration Test (Manual)
+```bash
+# Run demo script with real API
+uv run python examples/openalex_demo.py
+# Expected: Real results from OpenAlex API
+```
+### Full Test Suite
+```bash
+# Ensure nothing broke
+make check
+# Expected: All 110+ tests pass, mypy clean
+```
+---
+## Success Criteria
+1. **Unit tests pass**: All mocked tests in `test_openalex.py` pass
+2. **Integration works**: Demo script returns real results
+3. **No regressions**: `make check` passes completely
+4. **SearchHandler integration**: OpenAlex appears in search results alongside other sources
+5. **Citation metadata**: Results include `cited_by_count`, `concepts`, `is_open_access`
+---
+## Future Enhancements (P2)
+Once basic integration works:
+1. **Citation Network Queries**
+   ```python
+   # Get papers citing a specific work
+   async def get_citing_works(self, work_id: str) -> list[Evidence]:
+       params = {"filter": f"cites:{work_id}"}
+       ...
+   ```
+2. **Concept-Based Search**
+   ```python
+   # Search by OpenAlex concept ID
+   async def search_by_concept(self, concept_id: str) -> list[Evidence]:
+       params = {"filter": f"concepts.id:{concept_id}"}
+       ...
+   ```
+3. **Author Tracking**
+   ```python
+   # Find all works by an author
+   async def search_by_author(self, author_id: str) -> list[Evidence]:
+       params = {"filter": f"authorships.author.id:{author_id}"}
+       ...
+   ```
+---
+## Notes
+- OpenAlex is **very generous** with rate limits (no documented hard limit)
+- Adding `mailto` parameter gives priority access (polite pool)
+- Abstract is stored as inverted index - must reconstruct
+- Citation count is a good proxy for paper quality/impact
+- Consider caching responses for repeated queries

docs/brainstorming/implementation/16_PHASE_PUBMED_FULLTEXT.md ADDED Viewed

	@@ -0,0 +1,586 @@

+# Phase 16: PubMed Full-Text Retrieval
+**Priority**: MEDIUM - Enhances evidence quality
+**Effort**: ~3 hours
+**Dependencies**: None (existing PubMed tool sufficient)
+---
+## Prerequisites (COMPLETED)
+The `Evidence.metadata` field has been added to `src/utils/models.py` to support:
+```python
+metadata={"has_fulltext": True}
+```
+---
+## Architecture Decision: Constructor Parameter vs Method Parameter
+**IMPORTANT**: The original spec proposed `include_fulltext` as a method parameter:
+```python
+# WRONG - SearchHandler won't pass this parameter
+async def search(self, query: str, max_results: int = 10, include_fulltext: bool = False):
+```
+**Problem**: `SearchHandler` calls `tool.search(query, max_results)` uniformly across all tools.
+It has no mechanism to pass tool-specific parameters like `include_fulltext`.
+**Solution**: Use constructor parameter instead:
+```python
+# CORRECT - Configured at instantiation time
+class PubMedTool:
+    def __init__(self, api_key: str | None = None, include_fulltext: bool = False):
+        self.include_fulltext = include_fulltext
+        ...
+```
+This way, you can create a full-text-enabled PubMed tool:
+```python
+# In orchestrator or wherever tools are created
+tools = [
+    PubMedTool(include_fulltext=True),  # Full-text enabled
+    ClinicalTrialsTool(),
+    EuropePMCTool(),
+]
+```
+---
+## Overview
+Add full-text retrieval for PubMed papers via the BioC API, enabling:
+- Complete paper text for open-access PMC papers
+- Structured sections (intro, methods, results, discussion)
+- Better evidence for LLM synthesis
+**Why Full-Text?**
+- Abstracts only give ~200-300 words
+- Full text provides detailed methods, results, figures
+- Reference repo already has this implemented
+- Makes LLM judgments more accurate
+---
+## TDD Implementation Plan
+### Step 1: Write the Tests First
+**File**: `tests/unit/tools/test_pubmed_fulltext.py`
+```python
+"""Tests for PubMed full-text retrieval."""
+import pytest
+import respx
+from httpx import Response
+from src.tools.pubmed import PubMedTool
+class TestPubMedFullText:
+    """Test suite for PubMed full-text functionality."""
+    @pytest.fixture
+    def tool(self) -> PubMedTool:
+        return PubMedTool()
+    @respx.mock
+    @pytest.mark.asyncio
+    async def test_get_pmc_id_success(self, tool: PubMedTool) -> None:
+        """Should convert PMID to PMCID for full-text access."""
+        mock_response = {
+            "records": [
+                {
+                    "pmid": "12345678",
+                    "pmcid": "PMC1234567",
+                }
+            ]
+        }
+        respx.get("https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/").mock(
+            return_value=Response(200, json=mock_response)
+        )
+        pmcid = await tool.get_pmc_id("12345678")
+        assert pmcid == "PMC1234567"
+    @respx.mock
+    @pytest.mark.asyncio
+    async def test_get_pmc_id_not_in_pmc(self, tool: PubMedTool) -> None:
+        """Should return None if paper not in PMC."""
+        mock_response = {
+            "records": [
+                {
+                    "pmid": "12345678",
+                    # No pmcid means not in PMC
+                }
+            ]
+        }
+        respx.get("https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/").mock(
+            return_value=Response(200, json=mock_response)
+        )
+        pmcid = await tool.get_pmc_id("12345678")
+        assert pmcid is None
+    @respx.mock
+    @pytest.mark.asyncio
+    async def test_get_fulltext_success(self, tool: PubMedTool) -> None:
+        """Should retrieve full text for PMC papers."""
+        # Mock BioC API response
+        mock_bioc = {
+            "documents": [
+                {
+                    "passages": [
+                        {
+                            "infons": {"section_type": "INTRO"},
+                            "text": "Introduction text here.",
+                        },
+                        {
+                            "infons": {"section_type": "METHODS"},
+                            "text": "Methods description here.",
+                        },
+                        {
+                            "infons": {"section_type": "RESULTS"},
+                            "text": "Results summary here.",
+                        },
+                        {
+                            "infons": {"section_type": "DISCUSS"},
+                            "text": "Discussion and conclusions.",
+                        },
+                    ]
+                }
+            ]
+        }
+        respx.get(
+            "https://www.ncbi.nlm.nih.gov/research/bionlp/RESTful/pmcoa.cgi/BioC_json/12345678/unicode"
+        ).mock(return_value=Response(200, json=mock_bioc))
+        fulltext = await tool.get_fulltext("12345678")
+        assert fulltext is not None
+        assert "Introduction text here" in fulltext
+        assert "Methods description here" in fulltext
+        assert "Results summary here" in fulltext
+    @respx.mock
+    @pytest.mark.asyncio
+    async def test_get_fulltext_not_available(self, tool: PubMedTool) -> None:
+        """Should return None if full text not available."""
+        respx.get(
+            "https://www.ncbi.nlm.nih.gov/research/bionlp/RESTful/pmcoa.cgi/BioC_json/99999999/unicode"
+        ).mock(return_value=Response(404))
+        fulltext = await tool.get_fulltext("99999999")
+        assert fulltext is None
+    @respx.mock
+    @pytest.mark.asyncio
+    async def test_get_fulltext_structured(self, tool: PubMedTool) -> None:
+        """Should return structured sections dict."""
+        mock_bioc = {
+            "documents": [
+                {
+                    "passages": [
+                        {"infons": {"section_type": "INTRO"}, "text": "Intro..."},
+                        {"infons": {"section_type": "METHODS"}, "text": "Methods..."},
+                        {"infons": {"section_type": "RESULTS"}, "text": "Results..."},
+                        {"infons": {"section_type": "DISCUSS"}, "text": "Discussion..."},
+                    ]
+                }
+            ]
+        }
+        respx.get(
+            "https://www.ncbi.nlm.nih.gov/research/bionlp/RESTful/pmcoa.cgi/BioC_json/12345678/unicode"
+        ).mock(return_value=Response(200, json=mock_bioc))
+        sections = await tool.get_fulltext_structured("12345678")
+        assert sections is not None
+        assert "introduction" in sections
+        assert "methods" in sections
+        assert "results" in sections
+        assert "discussion" in sections
+    @respx.mock
+    @pytest.mark.asyncio
+    async def test_search_with_fulltext_enabled(self) -> None:
+        """Search should include full text when tool is configured for it."""
+        # Create tool WITH full-text enabled via constructor
+        tool = PubMedTool(include_fulltext=True)
+        # Mock esearch
+        respx.get("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi").mock(
+            return_value=Response(
+                200, json={"esearchresult": {"idlist": ["12345678"]}}
+            )
+        )
+        # Mock efetch (abstract)
+        mock_xml = """
+        <PubmedArticleSet>
+          <PubmedArticle>
+            <MedlineCitation>
+              <PMID>12345678</PMID>
+              <Article>
+                <ArticleTitle>Test Paper</ArticleTitle>
+                <Abstract><AbstractText>Short abstract.</AbstractText></Abstract>
+                <AuthorList><Author><LastName>Smith</LastName></Author></AuthorList>
+              </Article>
+            </MedlineCitation>
+          </PubmedArticle>
+        </PubmedArticleSet>
+        """
+        respx.get("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi").mock(
+            return_value=Response(200, text=mock_xml)
+        )
+        # Mock ID converter
+        respx.get("https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/").mock(
+            return_value=Response(
+                200, json={"records": [{"pmid": "12345678", "pmcid": "PMC1234567"}]}
+            )
+        )
+        # Mock BioC full text
+        mock_bioc = {
+            "documents": [
+                {
+                    "passages": [
+                        {"infons": {"section_type": "INTRO"}, "text": "Full intro..."},
+                    ]
+                }
+            ]
+        }
+        respx.get(
+            "https://www.ncbi.nlm.nih.gov/research/bionlp/RESTful/pmcoa.cgi/BioC_json/12345678/unicode"
+        ).mock(return_value=Response(200, json=mock_bioc))
+        # NOTE: No include_fulltext param - it's set via constructor
+        results = await tool.search("test", max_results=1)
+        assert len(results) == 1
+        # Full text should be appended or replace abstract
+        assert "Full intro" in results[0].content or "Short abstract" in results[0].content
+```
+---
+### Step 2: Implement Full-Text Methods
+**File**: `src/tools/pubmed.py` (additions to existing class)
+```python
+# Add these methods to PubMedTool class
+async def get_pmc_id(self, pmid: str) -> str | None:
+    """
+    Convert PMID to PMCID for full-text access.
+    Args:
+        pmid: PubMed ID
+    Returns:
+        PMCID if paper is in PMC, None otherwise
+    """
+    url = "https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/"
+    params = {"ids": pmid, "format": "json"}
+    async with httpx.AsyncClient(timeout=30.0) as client:
+        try:
+            response = await client.get(url, params=params)
+            response.raise_for_status()
+            data = response.json()
+            records = data.get("records", [])
+            if records and records[0].get("pmcid"):
+                return records[0]["pmcid"]
+            return None
+        except httpx.HTTPError:
+            return None
+async def get_fulltext(self, pmid: str) -> str | None:
+    """
+    Get full text for a PubMed paper via BioC API.
+    Only works for open-access papers in PubMed Central.
+    Args:
+        pmid: PubMed ID
+    Returns:
+        Full text as string, or None if not available
+    """
+    url = f"https://www.ncbi.nlm.nih.gov/research/bionlp/RESTful/pmcoa.cgi/BioC_json/{pmid}/unicode"
+    async with httpx.AsyncClient(timeout=60.0) as client:
+        try:
+            response = await client.get(url)
+            if response.status_code == 404:
+                return None
+            response.raise_for_status()
+            data = response.json()
+            # Extract text from all passages
+            documents = data.get("documents", [])
+            if not documents:
+                return None
+            passages = documents[0].get("passages", [])
+            text_parts = [p.get("text", "") for p in passages if p.get("text")]
+            return "\n\n".join(text_parts) if text_parts else None
+        except httpx.HTTPError:
+            return None
+async def get_fulltext_structured(self, pmid: str) -> dict[str, str] | None:
+    """
+    Get structured full text with sections.
+    Args:
+        pmid: PubMed ID
+    Returns:
+        Dict mapping section names to text, or None if not available
+    """
+    url = f"https://www.ncbi.nlm.nih.gov/research/bionlp/RESTful/pmcoa.cgi/BioC_json/{pmid}/unicode"
+    async with httpx.AsyncClient(timeout=60.0) as client:
+        try:
+            response = await client.get(url)
+            if response.status_code == 404:
+                return None
+            response.raise_for_status()
+            data = response.json()
+            documents = data.get("documents", [])
+            if not documents:
+                return None
+            # Map section types to readable names
+            section_map = {
+                "INTRO": "introduction",
+                "METHODS": "methods",
+                "RESULTS": "results",
+                "DISCUSS": "discussion",
+                "CONCL": "conclusion",
+                "ABSTRACT": "abstract",
+            }
+            sections: dict[str, list[str]] = {}
+            for passage in documents[0].get("passages", []):
+                section_type = passage.get("infons", {}).get("section_type", "other")
+                section_name = section_map.get(section_type, "other")
+                text = passage.get("text", "")
+                if text:
+                    if section_name not in sections:
+                        sections[section_name] = []
+                    sections[section_name].append(text)
+            # Join multiple passages per section
+            return {k: "\n\n".join(v) for k, v in sections.items()}
+        except httpx.HTTPError:
+            return None
+```
+---
+### Step 3: Update Constructor and Search Method
+Add full-text flag to constructor and update search to use it:
+```python
+class PubMedTool:
+    """Search tool for PubMed/NCBI."""
+    def __init__(
+        self,
+        api_key: str | None = None,
+        include_fulltext: bool = False,  # NEW CONSTRUCTOR PARAM
+    ) -> None:
+        self.api_key = api_key or settings.ncbi_api_key
+        if self.api_key == "your-ncbi-key-here":
+            self.api_key = None
+        self._last_request_time = 0.0
+        self.include_fulltext = include_fulltext  # Store for use in search()
+    async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
+        """
+        Search PubMed and return evidence.
+        Note: Full-text enrichment is controlled by constructor parameter,
+        not method parameter, because SearchHandler doesn't pass extra args.
+        """
+        # ... existing search logic ...
+        evidence_list = self._parse_pubmed_xml(fetch_resp.text)
+        # Optionally enrich with full text (if configured at construction)
+        if self.include_fulltext:
+            evidence_list = await self._enrich_with_fulltext(evidence_list)
+        return evidence_list
+async def _enrich_with_fulltext(
+    self, evidence_list: list[Evidence]
+) -> list[Evidence]:
+    """Attempt to add full text to evidence items."""
+    enriched = []
+    for evidence in evidence_list:
+        # Extract PMID from URL
+        url = evidence.citation.url
+        pmid = url.rstrip("/").split("/")[-1] if url else None
+        if pmid:
+            fulltext = await self.get_fulltext(pmid)
+            if fulltext:
+                # Replace abstract with full text (truncated)
+                evidence = Evidence(
+                    content=fulltext[:8000],  # Larger limit for full text
+                    citation=evidence.citation,
+                    relevance=evidence.relevance,
+                    metadata={
+                        **evidence.metadata,
+                        "has_fulltext": True,
+                    },
+                )
+        enriched.append(evidence)
+    return enriched
+```
+---
+## Demo Script
+**File**: `examples/pubmed_fulltext_demo.py`
+```python
+#!/usr/bin/env python3
+"""Demo script to verify PubMed full-text retrieval."""
+import asyncio
+from src.tools.pubmed import PubMedTool
+async def main():
+    """Run PubMed full-text demo."""
+    tool = PubMedTool()
+    print("=" * 60)
+    print("PubMed Full-Text Demo")
+    print("=" * 60)
+    # Test 1: Convert PMID to PMCID
+    print("\n[Test 1] Converting PMID to PMCID...")
+    # Use a known open-access paper
+    test_pmid = "34450029"  # Example: COVID-related open-access paper
+    pmcid = await tool.get_pmc_id(test_pmid)
+    print(f"PMID {test_pmid} -> PMCID: {pmcid or 'Not in PMC'}")
+    # Test 2: Get full text
+    print("\n[Test 2] Fetching full text...")
+    if pmcid:
+        fulltext = await tool.get_fulltext(test_pmid)
+        if fulltext:
+            print(f"Full text length: {len(fulltext)} characters")
+            print(f"Preview: {fulltext[:500]}...")
+        else:
+            print("Full text not available")
+    # Test 3: Get structured sections
+    print("\n[Test 3] Fetching structured sections...")
+    if pmcid:
+        sections = await tool.get_fulltext_structured(test_pmid)
+        if sections:
+            print("Available sections:")
+            for section, text in sections.items():
+                print(f"  - {section}: {len(text)} chars")
+        else:
+            print("Structured text not available")
+    # Test 4: Search with full text
+    print("\n[Test 4] Search with full-text enrichment...")
+    results = await tool.search(
+        "metformin cancer open access",
+        max_results=3,
+        include_fulltext=True
+    )
+    for i, evidence in enumerate(results, 1):
+        has_ft = evidence.metadata.get("has_fulltext", False)
+        print(f"\n--- Result {i} ---")
+        print(f"Title: {evidence.citation.title}")
+        print(f"Has Full Text: {has_ft}")
+        print(f"Content Length: {len(evidence.content)} chars")
+    print("\n" + "=" * 60)
+    print("Demo complete!")
+if __name__ == "__main__":
+    asyncio.run(main())
+```
+---
+## Verification Checklist
+### Unit Tests
+```bash
+# Run full-text tests
+uv run pytest tests/unit/tools/test_pubmed_fulltext.py -v
+# Run all PubMed tests
+uv run pytest tests/unit/tools/test_pubmed.py -v
+# Expected: All tests pass
+```
+### Integration Test (Manual)
+```bash
+# Run demo with real API
+uv run python examples/pubmed_fulltext_demo.py
+# Expected: Real full text from PMC papers
+```
+### Full Test Suite
+```bash
+make check
+# Expected: All tests pass, mypy clean
+```
+---
+## Success Criteria
+1. **ID Conversion works**: PMID -> PMCID conversion successful
+2. **Full text retrieval works**: BioC API returns paper text
+3. **Structured sections work**: Can get intro/methods/results/discussion separately
+4. **Search integration works**: `include_fulltext=True` enriches results
+5. **No regressions**: Existing tests still pass
+6. **Graceful degradation**: Non-PMC papers still return abstracts
+---
+## Notes
+- Only ~30% of PubMed papers have full text in PMC
+- BioC API has no documented rate limit, but be respectful
+- Full text can be very long - truncate appropriately
+- Consider caching full text responses (they don't change)
+- Timeout should be longer for full text (60s vs 30s)

docs/brainstorming/implementation/17_PHASE_RATE_LIMITING.md ADDED Viewed

	@@ -0,0 +1,540 @@

+# Phase 17: Rate Limiting with `limits` Library
+**Priority**: P0 CRITICAL - Prevents API blocks
+**Effort**: ~1 hour
+**Dependencies**: None
+---
+## CRITICAL: Async Safety Requirements
+**WARNING**: The rate limiter MUST be async-safe. Blocking the event loop will freeze:
+- The Gradio UI
+- All parallel searches
+- The orchestrator
+**Rules**:
+1. **NEVER use `time.sleep()`** - Always use `await asyncio.sleep()`
+2. **NEVER use blocking while loops** - Use async-aware polling
+3. **The `limits` library check is synchronous** - Wrap it carefully
+The implementation below uses a polling pattern that:
+- Checks the limit (synchronous, fast)
+- If exceeded, `await asyncio.sleep()` (non-blocking)
+- Retry the check
+**Alternative**: If `limits` proves problematic, use `aiolimiter` which is pure-async.
+---
+## Overview
+Replace naive `asyncio.sleep` rate limiting with proper rate limiter using the `limits` library, which provides:
+- Moving window rate limiting
+- Per-API configurable limits
+- Thread-safe storage
+- Already used in reference repo
+**Why This Matters?**
+- NCBI will block us without proper rate limiting (3/sec without key, 10/sec with)
+- Current implementation only has simple sleep delay
+- Need coordinated limits across all PubMed calls
+- Professional-grade rate limiting prevents production issues
+---
+## Current State
+### What We Have (`src/tools/pubmed.py:20-21, 34-41`)
+```python
+RATE_LIMIT_DELAY = 0.34  # ~3 requests/sec without API key
+async def _rate_limit(self) -> None:
+    """Enforce NCBI rate limiting."""
+    loop = asyncio.get_running_loop()
+    now = loop.time()
+    elapsed = now - self._last_request_time
+    if elapsed < self.RATE_LIMIT_DELAY:
+        await asyncio.sleep(self.RATE_LIMIT_DELAY - elapsed)
+    self._last_request_time = loop.time()
+```
+### Problems
+1. **Not shared across instances**: Each `PubMedTool()` has its own counter
+2. **Simple delay vs moving window**: Doesn't handle bursts properly
+3. **Hardcoded rate**: Doesn't adapt to API key presence
+4. **No backoff on 429**: Just retries blindly
+---
+## TDD Implementation Plan
+### Step 1: Add Dependency
+**File**: `pyproject.toml`
+```toml
+dependencies = [
+    # ... existing deps ...
+    "limits>=3.0",
+]
+```
+Then run:
+```bash
+uv sync
+```
+---
+### Step 2: Write the Tests First
+**File**: `tests/unit/tools/test_rate_limiting.py`
+```python
+"""Tests for rate limiting functionality."""
+import asyncio
+import time
+import pytest
+from src.tools.rate_limiter import RateLimiter, get_pubmed_limiter
+class TestRateLimiter:
+    """Test suite for rate limiter."""
+    def test_create_limiter_without_api_key(self) -> None:
+        """Should create 3/sec limiter without API key."""
+        limiter = RateLimiter(rate="3/second")
+        assert limiter.rate == "3/second"
+    def test_create_limiter_with_api_key(self) -> None:
+        """Should create 10/sec limiter with API key."""
+        limiter = RateLimiter(rate="10/second")
+        assert limiter.rate == "10/second"
+    @pytest.mark.asyncio
+    async def test_limiter_allows_requests_under_limit(self) -> None:
+        """Should allow requests under the rate limit."""
+        limiter = RateLimiter(rate="10/second")
+        # 3 requests should all succeed immediately
+        for _ in range(3):
+            allowed = await limiter.acquire()
+            assert allowed is True
+    @pytest.mark.asyncio
+    async def test_limiter_blocks_when_exceeded(self) -> None:
+        """Should wait when rate limit exceeded."""
+        limiter = RateLimiter(rate="2/second")
+        # First 2 should be instant
+        await limiter.acquire()
+        await limiter.acquire()
+        # Third should block briefly
+        start = time.monotonic()
+        await limiter.acquire()
+        elapsed = time.monotonic() - start
+        # Should have waited ~0.5 seconds (half second window for 2/sec)
+        assert elapsed >= 0.3
+    @pytest.mark.asyncio
+    async def test_limiter_resets_after_window(self) -> None:
+        """Rate limit should reset after time window."""
+        limiter = RateLimiter(rate="5/second")
+        # Use up the limit
+        for _ in range(5):
+            await limiter.acquire()
+        # Wait for window to pass
+        await asyncio.sleep(1.1)
+        # Should be allowed again
+        start = time.monotonic()
+        await limiter.acquire()
+        elapsed = time.monotonic() - start
+        assert elapsed < 0.1  # Should be nearly instant
+class TestGetPubmedLimiter:
+    """Test PubMed-specific limiter factory."""
+    def test_limiter_without_api_key(self) -> None:
+        """Should return 3/sec limiter without key."""
+        limiter = get_pubmed_limiter(api_key=None)
+        assert "3" in limiter.rate
+    def test_limiter_with_api_key(self) -> None:
+        """Should return 10/sec limiter with key."""
+        limiter = get_pubmed_limiter(api_key="my-api-key")
+        assert "10" in limiter.rate
+    def test_limiter_is_singleton(self) -> None:
+        """Same API key should return same limiter instance."""
+        limiter1 = get_pubmed_limiter(api_key="key1")
+        limiter2 = get_pubmed_limiter(api_key="key1")
+        assert limiter1 is limiter2
+    def test_different_keys_different_limiters(self) -> None:
+        """Different API keys should return different limiters."""
+        limiter1 = get_pubmed_limiter(api_key="key1")
+        limiter2 = get_pubmed_limiter(api_key="key2")
+        # Clear cache for clean test
+        # Actually, different keys SHOULD share the same limiter
+        # since we're limiting against the same API
+        assert limiter1 is limiter2  # Shared NCBI rate limit
+```
+---
+### Step 3: Create Rate Limiter Module
+**File**: `src/tools/rate_limiter.py`
+```python
+"""Rate limiting utilities using the limits library."""
+import asyncio
+from typing import ClassVar
+from limits import RateLimitItem, parse
+from limits.storage import MemoryStorage
+from limits.strategies import MovingWindowRateLimiter
+class RateLimiter:
+    """
+    Async-compatible rate limiter using limits library.
+    Uses moving window algorithm for smooth rate limiting.
+    """
+    def __init__(self, rate: str) -> None:
+        """
+        Initialize rate limiter.
+        Args:
+            rate: Rate string like "3/second" or "10/second"
+        """
+        self.rate = rate
+        self._storage = MemoryStorage()
+        self._limiter = MovingWindowRateLimiter(self._storage)
+        self._rate_limit: RateLimitItem = parse(rate)
+        self._identity = "default"  # Single identity for shared limiting
+    async def acquire(self, wait: bool = True) -> bool:
+        """
+        Acquire permission to make a request.
+        ASYNC-SAFE: Uses asyncio.sleep(), never time.sleep().
+        The polling pattern allows other coroutines to run while waiting.
+        Args:
+            wait: If True, wait until allowed. If False, return immediately.
+        Returns:
+            True if allowed, False if not (only when wait=False)
+        """
+        while True:
+            # Check if we can proceed (synchronous, fast - ~microseconds)
+            if self._limiter.hit(self._rate_limit, self._identity):
+                return True
+            if not wait:
+                return False
+            # CRITICAL: Use asyncio.sleep(), NOT time.sleep()
+            # This yields control to the event loop, allowing other
+            # coroutines (UI, parallel searches) to run
+            await asyncio.sleep(0.1)
+    def reset(self) -> None:
+        """Reset the rate limiter (for testing)."""
+        self._storage.reset()
+# Singleton limiter for PubMed/NCBI
+_pubmed_limiter: RateLimiter | None = None
+def get_pubmed_limiter(api_key: str | None = None) -> RateLimiter:
+    """
+    Get the shared PubMed rate limiter.
+    Rate depends on whether API key is provided:
+    - Without key: 3 requests/second
+    - With key: 10 requests/second
+    Args:
+        api_key: NCBI API key (optional)
+    Returns:
+        Shared RateLimiter instance
+    """
+    global _pubmed_limiter
+    if _pubmed_limiter is None:
+        rate = "10/second" if api_key else "3/second"
+        _pubmed_limiter = RateLimiter(rate)
+    return _pubmed_limiter
+def reset_pubmed_limiter() -> None:
+    """Reset the PubMed limiter (for testing)."""
+    global _pubmed_limiter
+    _pubmed_limiter = None
+# Factory for other APIs
+class RateLimiterFactory:
+    """Factory for creating/getting rate limiters for different APIs."""
+    _limiters: ClassVar[dict[str, RateLimiter]] = {}
+    @classmethod
+    def get(cls, api_name: str, rate: str) -> RateLimiter:
+        """
+        Get or create a rate limiter for an API.
+        Args:
+            api_name: Unique identifier for the API
+            rate: Rate limit string (e.g., "10/second")
+        Returns:
+            RateLimiter instance (shared for same api_name)
+        """
+        if api_name not in cls._limiters:
+            cls._limiters[api_name] = RateLimiter(rate)
+        return cls._limiters[api_name]
+    @classmethod
+    def reset_all(cls) -> None:
+        """Reset all limiters (for testing)."""
+        cls._limiters.clear()
+```
+---
+### Step 4: Update PubMed Tool
+**File**: `src/tools/pubmed.py` (replace rate limiting code)
+```python
+# Replace imports and rate limiting
+from src.tools.rate_limiter import get_pubmed_limiter
+class PubMedTool:
+    """Search tool for PubMed/NCBI."""
+    BASE_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
+    HTTP_TOO_MANY_REQUESTS = 429
+    def __init__(self, api_key: str | None = None) -> None:
+        self.api_key = api_key or settings.ncbi_api_key
+        if self.api_key == "your-ncbi-key-here":
+            self.api_key = None
+        # Use shared rate limiter
+        self._limiter = get_pubmed_limiter(self.api_key)
+    async def _rate_limit(self) -> None:
+        """Enforce NCBI rate limiting using shared limiter."""
+        await self._limiter.acquire()
+    # ... rest of class unchanged ...
+```
+---
+### Step 5: Add Rate Limiters for Other APIs
+**File**: `src/tools/clinicaltrials.py` (optional)
+```python
+from src.tools.rate_limiter import RateLimiterFactory
+class ClinicalTrialsTool:
+    def __init__(self) -> None:
+        # ClinicalTrials.gov doesn't document limits, but be conservative
+        self._limiter = RateLimiterFactory.get("clinicaltrials", "5/second")
+    async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
+        await self._limiter.acquire()
+        # ... rest of method ...
+```
+**File**: `src/tools/europepmc.py` (optional)
+```python
+from src.tools.rate_limiter import RateLimiterFactory
+class EuropePMCTool:
+    def __init__(self) -> None:
+        # Europe PMC is generous, but still be respectful
+        self._limiter = RateLimiterFactory.get("europepmc", "10/second")
+    async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
+        await self._limiter.acquire()
+        # ... rest of method ...
+```
+---
+## Demo Script
+**File**: `examples/rate_limiting_demo.py`
+```python
+#!/usr/bin/env python3
+"""Demo script to verify rate limiting works correctly."""
+import asyncio
+import time
+from src.tools.rate_limiter import RateLimiter, get_pubmed_limiter, reset_pubmed_limiter
+from src.tools.pubmed import PubMedTool
+async def test_basic_limiter():
+    """Test basic rate limiter behavior."""
+    print("=" * 60)
+    print("Rate Limiting Demo")
+    print("=" * 60)
+    # Test 1: Basic limiter
+    print("\n[Test 1] Testing 3/second limiter...")
+    limiter = RateLimiter("3/second")
+    start = time.monotonic()
+    for i in range(6):
+        await limiter.acquire()
+        elapsed = time.monotonic() - start
+        print(f"  Request {i+1} at {elapsed:.2f}s")
+    total = time.monotonic() - start
+    print(f"  Total time for 6 requests: {total:.2f}s (expected ~2s)")
+async def test_pubmed_limiter():
+    """Test PubMed-specific limiter."""
+    print("\n[Test 2] Testing PubMed limiter (shared)...")
+    reset_pubmed_limiter()  # Clean state
+    # Without API key: 3/sec
+    limiter = get_pubmed_limiter(api_key=None)
+    print(f"  Rate without key: {limiter.rate}")
+    # Multiple tools should share the same limiter
+    tool1 = PubMedTool()
+    tool2 = PubMedTool()
+    # Verify they share the limiter
+    print(f"  Tools share limiter: {tool1._limiter is tool2._limiter}")
+async def test_concurrent_requests():
+    """Test rate limiting under concurrent load."""
+    print("\n[Test 3] Testing concurrent request limiting...")
+    limiter = RateLimiter("5/second")
+    async def make_request(i: int):
+        await limiter.acquire()
+        return time.monotonic()
+    start = time.monotonic()
+    # Launch 10 concurrent requests
+    tasks = [make_request(i) for i in range(10)]
+    times = await asyncio.gather(*tasks)
+    # Calculate distribution
+    relative_times = [t - start for t in times]
+    print(f"  Request times: {[f'{t:.2f}s' for t in sorted(relative_times)]}")
+    total = max(relative_times)
+    print(f"  All 10 requests completed in {total:.2f}s (expected ~2s)")
+async def main():
+    await test_basic_limiter()
+    await test_pubmed_limiter()
+    await test_concurrent_requests()
+    print("\n" + "=" * 60)
+    print("Demo complete!")
+if __name__ == "__main__":
+    asyncio.run(main())
+```
+---
+## Verification Checklist
+### Unit Tests
+```bash
+# Run rate limiting tests
+uv run pytest tests/unit/tools/test_rate_limiting.py -v
+# Expected: All tests pass
+```
+### Integration Test (Manual)
+```bash
+# Run demo
+uv run python examples/rate_limiting_demo.py
+# Expected: Requests properly spaced
+```
+### Full Test Suite
+```bash
+make check
+# Expected: All tests pass, mypy clean
+```
+---
+## Success Criteria
+1. **`limits` library installed**: Dependency added to pyproject.toml
+2. **RateLimiter class works**: Can create and use limiters
+3. **PubMed uses new limiter**: Shared limiter across instances
+4. **Rate adapts to API key**: 3/sec without, 10/sec with
+5. **Concurrent requests handled**: Multiple async requests properly queued
+6. **No regressions**: All existing tests pass
+---
+## API Rate Limit Reference
+| API | Without Key | With Key |
+|-----|-------------|----------|
+| PubMed/NCBI | 3/sec | 10/sec |
+| ClinicalTrials.gov | Undocumented (~5/sec safe) | N/A |
+| Europe PMC | ~10-20/sec (generous) | N/A |
+| OpenAlex | ~100k/day (no per-sec limit) | Faster with `mailto` |
+---
+## Notes
+- `limits` library uses moving window algorithm (fairer than fixed window)
+- Singleton pattern ensures all PubMed calls share the limit
+- The factory pattern allows easy extension to other APIs
+- Consider adding 429 response detection + exponential backoff
+- In production, consider Redis storage for distributed rate limiting

docs/brainstorming/implementation/README.md ADDED Viewed

	@@ -0,0 +1,143 @@

+# Implementation Plans
+TDD implementation plans based on the brainstorming documents. Each phase is a self-contained vertical slice with tests, implementation, and demo scripts.
+---
+## Prerequisites (COMPLETED)
+The following foundational changes have been implemented to support all three phases:
+| Change | File | Status |
+|--------|------|--------|
+| Add `"openalex"` to `SourceName` | `src/utils/models.py:9` | ✅ Done |
+| Add `metadata` field to `Evidence` | `src/utils/models.py:39-42` | ✅ Done |
+| Export all tools from `__init__.py` | `src/tools/__init__.py` | ✅ Done |
+All 110 tests pass after these changes.
+---
+## Priority Order
+| Phase | Name | Priority | Effort | Value |
+|-------|------|----------|--------|-------|
+| **17** | Rate Limiting | P0 CRITICAL | 1 hour | Stability |
+| **15** | OpenAlex | HIGH | 2-3 hours | Very High |
+| **16** | PubMed Full-Text | MEDIUM | 3 hours | High |
+**Recommended implementation order**: 17 → 15 → 16
+---
+## Phase 15: OpenAlex Integration
+**File**: [15_PHASE_OPENALEX.md](./15_PHASE_OPENALEX.md)
+Add OpenAlex as 4th data source for:
+- Citation networks (who cites whom)
+- Concept tagging (semantic discovery)
+- 209M+ scholarly works
+- Free, no API key required
+**Quick Start**:
+```bash
+# Create the tool
+touch src/tools/openalex.py
+touch tests/unit/tools/test_openalex.py
+# Run tests first (TDD)
+uv run pytest tests/unit/tools/test_openalex.py -v
+# Demo
+uv run python examples/openalex_demo.py
+```
+---
+## Phase 16: PubMed Full-Text
+**File**: [16_PHASE_PUBMED_FULLTEXT.md](./16_PHASE_PUBMED_FULLTEXT.md)
+Add full-text retrieval via BioC API for:
+- Complete paper text (not just abstracts)
+- Structured sections (intro, methods, results)
+- Better evidence for LLM synthesis
+**Quick Start**:
+```bash
+# Add methods to existing pubmed.py
+# Tests in test_pubmed_fulltext.py
+# Run tests
+uv run pytest tests/unit/tools/test_pubmed_fulltext.py -v
+# Demo
+uv run python examples/pubmed_fulltext_demo.py
+```
+---
+## Phase 17: Rate Limiting
+**File**: [17_PHASE_RATE_LIMITING.md](./17_PHASE_RATE_LIMITING.md)
+Replace naive sleep-based rate limiting with `limits` library for:
+- Moving window algorithm
+- Shared limits across instances
+- Configurable per-API rates
+- Production-grade stability
+**Quick Start**:
+```bash
+# Add dependency
+uv add limits
+# Create module
+touch src/tools/rate_limiter.py
+touch tests/unit/tools/test_rate_limiting.py
+# Run tests
+uv run pytest tests/unit/tools/test_rate_limiting.py -v
+# Demo
+uv run python examples/rate_limiting_demo.py
+```
+---
+## TDD Workflow
+Each implementation doc follows this pattern:
+1. **Write tests first** - Define expected behavior
+2. **Run tests** - Verify they fail (red)
+3. **Implement** - Write minimal code to pass
+4. **Run tests** - Verify they pass (green)
+5. **Refactor** - Clean up if needed
+6. **Demo** - Verify end-to-end with real APIs
+7. **`make check`** - Ensure no regressions
+---
+## Related Brainstorming Docs
+These implementation plans are derived from:
+- [00_ROADMAP_SUMMARY.md](../00_ROADMAP_SUMMARY.md) - Priority overview
+- [01_PUBMED_IMPROVEMENTS.md](../01_PUBMED_IMPROVEMENTS.md) - PubMed details
+- [02_CLINICALTRIALS_IMPROVEMENTS.md](../02_CLINICALTRIALS_IMPROVEMENTS.md) - CT.gov details
+- [03_EUROPEPMC_IMPROVEMENTS.md](../03_EUROPEPMC_IMPROVEMENTS.md) - Europe PMC details
+- [04_OPENALEX_INTEGRATION.md](../04_OPENALEX_INTEGRATION.md) - OpenAlex integration
+---
+## Future Phases (Not Yet Documented)
+Based on brainstorming, these could be added later:
+- **Phase 18**: ClinicalTrials.gov Results Retrieval
+- **Phase 19**: Europe PMC Annotations API
+- **Phase 20**: Drug Name Normalization (RxNorm)
+- **Phase 21**: Citation Network Queries (OpenAlex)
+- **Phase 22**: Semantic Search with Embeddings

docs/brainstorming/magentic-pydantic/00_SITUATION_AND_PLAN.md ADDED Viewed

	@@ -0,0 +1,189 @@

+# Situation Analysis: Pydantic-AI + Microsoft Agent Framework Integration
+**Date:** November 27, 2025
+**Status:** ACTIVE DECISION REQUIRED
+**Risk Level:** HIGH - DO NOT MERGE PR #41 UNTIL RESOLVED
+---
+## 1. The Problem
+We almost merged a refactor that would have **deleted** multi-agent orchestration capability from the codebase, mistakenly believing pydantic-ai and Microsoft Agent Framework were mutually exclusive.
+**They are not.** They are complementary:
+- **pydantic-ai** (Library): Ensures LLM outputs match Pydantic schemas
+- **Microsoft Agent Framework** (Framework): Orchestrates multi-agent workflows
+---
+## 2. Current Branch State
+| Branch | Location | Has Agent Framework? | Has Pydantic-AI Improvements? | Status |
+|--------|----------|---------------------|------------------------------|--------|
+| `origin/dev` | GitHub | YES | NO | **SAFE - Source of Truth** |
+| `huggingface-upstream/dev` | HF Spaces | YES | NO | **SAFE - Same as GitHub** |
+| `origin/main` | GitHub | YES | NO | **SAFE** |
+| `feat/pubmed-fulltext` | GitHub | NO (deleted) | YES | **DANGER - Has destructive refactor** |
+| `refactor/pydantic-unification` | Local | NO (deleted) | YES | **DANGER - Redundant, delete** |
+| Local `dev` | Local only | NO (deleted) | YES | **DANGER - NOT PUSHED (thankfully)** |
+### Key Files at Risk
+**On `origin/dev` (PRESERVED):**
+```text
+src/agents/
+├── analysis_agent.py      # StatisticalAnalyzer wrapper
+├── hypothesis_agent.py    # Hypothesis generation
+├── judge_agent.py         # JudgeHandler wrapper
+├── magentic_agents.py     # Multi-agent definitions
+├── report_agent.py        # Report synthesis
+├── search_agent.py        # SearchHandler wrapper
+├── state.py               # Thread-safe state management
+└── tools.py               # @ai_function decorated tools
+src/orchestrator_magentic.py  # Multi-agent orchestrator
+src/utils/llm_factory.py      # Centralized LLM client factory
+```
+**Deleted in refactor branch (would be lost if merged):**
+- All of the above
+---
+## 3. Target Architecture
+```text
+┌─────────────────────────────────────────────────────────────────┐
+│  Microsoft Agent Framework (Orchestration Layer)                │
+│  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐          │
+│  │ SearchAgent  │→ │ JudgeAgent   │→ │ ReportAgent  │          │
+│  │ (BaseAgent)  │  │ (BaseAgent)  │  │ (BaseAgent)  │          │
+│  └──────┬───────┘  └──────┬───────┘  └──────┬───────┘          │
+│         │                 │                 │                  │
+│         ▼                 ▼                 ▼                  │
+│  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐          │
+│  │ pydantic-ai  │  │ pydantic-ai  │  │ pydantic-ai  │          │
+│  │ Agent()      │  │ Agent()      │  │ Agent()      │          │
+│  │ output_type= │  │ output_type= │  │ output_type= │          │
+│  │ SearchResult │  │ JudgeAssess  │  │ Report       │          │
+│  └──────────────┘  └──────────────┘  └──────────────┘          │
+└─────────────────────────────────────────────────────────────────┘
+```
+**Why this architecture:**
+1. **Agent Framework** handles: workflow coordination, state passing, middleware, observability
+2. **pydantic-ai** handles: type-safe LLM calls within each agent
+---
+## 4. CRITICAL: Naming Confusion Clarification
+> **Senior Agent Review Finding:** The codebase uses "magentic" in file names (e.g., `orchestrator_magentic.py`, `magentic_agents.py`) but this is **NOT** the `magentic` PyPI package by Jacky Liang. It's Microsoft Agent Framework (`agent-framework-core`).
+**The naming confusion:**
+- `magentic` (PyPI package): A different library for structured LLM outputs
+- "Magentic" (in our codebase): Our internal name for Microsoft Agent Framework integration
+- `agent-framework-core` (PyPI package): Microsoft's actual multi-agent orchestration framework
+**Recommended future action:** Rename `orchestrator_magentic.py` → `orchestrator_advanced.py` to eliminate confusion.
+---
+## 5. What the Refactor DID Get Right
+The refactor branch (`feat/pubmed-fulltext`) has some valuable improvements:
+1. **`judges.py` unified `get_model()`** - Supports OpenAI, Anthropic, AND HuggingFace via pydantic-ai
+2. **HuggingFace free tier support** - `HuggingFaceModel` integration
+3. **Test fix** - Properly mocks `HuggingFaceModel` class
+4. **Removed broken magentic optional dependency** from pyproject.toml (this was correct - the old `magentic` package is different from Microsoft Agent Framework)
+**What it got WRONG:**
+1. Deleted `src/agents/` entirely instead of refactoring them
+2. Deleted `src/orchestrator_magentic.py` instead of fixing it
+3. Conflated "magentic" (old package) with "Microsoft Agent Framework" (current framework)
+---
+## 6. Options for Path Forward
+### Option A: Abandon Refactor, Start Fresh
+- Close PR #41
+- Delete `feat/pubmed-fulltext` and `refactor/pydantic-unification` branches
+- Reset local `dev` to match `origin/dev`
+- Cherry-pick ONLY the good parts (judges.py improvements, HF support)
+- **Pros:** Clean, safe
+- **Cons:** Lose some work, need to redo carefully
+### Option B: Cherry-Pick Good Parts to origin/dev
+- Do NOT merge PR #41
+- Create new branch from `origin/dev`
+- Cherry-pick specific commits/changes that improve pydantic-ai usage
+- Keep agent framework code intact
+- **Pros:** Preserves both, surgical
+- **Cons:** Requires careful file-by-file review
+### Option C: Revert Deletions in Refactor Branch
+- On `feat/pubmed-fulltext`, restore deleted agent files from `origin/dev`
+- Keep the pydantic-ai improvements
+- Merge THAT to dev
+- **Pros:** Gets both
+- **Cons:** Complex git operations, risk of conflicts
+---
+## 7. Recommended Action: Option B (Cherry-Pick)
+**Step-by-step:**
+1. **Close PR #41** (do not merge)
+2. **Delete redundant branches:**
+   - `refactor/pydantic-unification` (local)
+   - Reset local `dev` to `origin/dev`
+3. **Create new branch from origin/dev:**
+   ```bash
+   git checkout -b feat/pydantic-ai-improvements origin/dev
+   ```
+4. **Cherry-pick or manually port these improvements:**
+   - `src/agent_factory/judges.py` - the unified `get_model()` function
+   - `examples/free_tier_demo.py` - HuggingFace demo
+   - Test improvements
+5. **Do NOT delete any agent framework files**
+6. **Create PR for review**
+---
+## 8. Files to Cherry-Pick (Safe Improvements)
+| File | What Changed | Safe to Port? |
+|------|-------------|---------------|
+| `src/agent_factory/judges.py` | Added `HuggingFaceModel` support in `get_model()` | YES |
+| `examples/free_tier_demo.py` | New demo for HF inference | YES |
+| `tests/unit/agent_factory/test_judges.py` | Fixed HF model mocking | YES |
+| `pyproject.toml` | Removed old `magentic` optional dep | MAYBE (review carefully) |
+---
+## 9. Questions to Answer Before Proceeding
+1. **For the hackathon**: Do we need full multi-agent orchestration, or is single-agent sufficient?
+2. **For DeepCritical mainline**: Is the plan to use Microsoft Agent Framework for orchestration?
+3. **Timeline**: How much time do we have to get this right?
+---
+## 10. Immediate Actions (DO NOW)
+- [ ] **DO NOT merge PR #41**
+- [ ] Close PR #41 with comment explaining the situation
+- [ ] Do not push local `dev` branch anywhere
+- [ ] Confirm HuggingFace Spaces is untouched (it is - verified)
+---
+## 11. Decision Log
+| Date | Decision | Rationale |
+|------|----------|-----------|
+| 2025-11-27 | Pause refactor merge | Discovered agent framework and pydantic-ai are complementary, not exclusive |
+| TBD | ? | Awaiting decision on path forward |

docs/brainstorming/magentic-pydantic/01_ARCHITECTURE_SPEC.md ADDED Viewed

	@@ -0,0 +1,289 @@

+# Architecture Specification: Dual-Mode Agent System
+**Date:** November 27, 2025
+**Status:** SPECIFICATION
+**Goal:** Graceful degradation from full multi-agent orchestration to simple single-agent mode
+---
+## 1. Core Concept: Two Operating Modes
+```text
+┌─────────────────────────────────────────────────────────────────────┐
+│                        USER REQUEST                                 │
+│                            │                                        │
+│                            ▼                                        │
+│                   ┌─────────────────┐                               │
+│                   │  Mode Selection │                               │
+│                   │  (Auto-detect)  │                               │
+│                   └────────┬────────┘                               │
+│                            │                                        │
+│            ┌───────────────┴───────────────┐                        │
+│            │                               │                        │
+│            ▼                               ▼                        │
+│   ┌─────────────────┐             ┌─────────────────┐               │
+│   │   SIMPLE MODE   │             │  ADVANCED MODE  │               │
+│   │  (Free Tier)    │             │  (Paid Tier)    │               │
+│   │                 │             │                 │               │
+│   │  pydantic-ai    │             │  MS Agent Fwk   │               │
+│   │  single-agent   │             │  + pydantic-ai  │               │
+│   │  loop           │             │  multi-agent    │               │
+│   └─────────────────┘             └─────────────────┘               │
+│            │                               │                        │
+│            └───────────────┬───────────────┘                        │
+│                            ▼                                        │
+│                   ┌─────────────────┐                               │
+│                   │  Research Report │                              │
+│                   │  with Citations  │                              │
+│                   └─────────────────┘                               │
+└─────────────────────────────────────────────────────────────────────┘
+```
+---
+## 2. Mode Comparison
+| Aspect | Simple Mode | Advanced Mode |
+|--------|-------------|---------------|
+| **Trigger** | No API key OR `LLM_PROVIDER=huggingface` | OpenAI API key present (currently OpenAI only) |
+| **Framework** | pydantic-ai only | Microsoft Agent Framework + pydantic-ai |
+| **Architecture** | Single orchestrator loop | Multi-agent coordination |
+| **Agents** | One agent does Search→Judge→Report | SearchAgent, JudgeAgent, ReportAgent, AnalysisAgent |
+| **State Management** | Simple dict | Thread-safe `MagenticState` with context vars |
+| **Quality** | Good (functional) | Better (specialized agents, coordination) |
+| **Cost** | Free (HuggingFace Inference) | Paid (OpenAI/Anthropic) |
+| **Use Case** | Demos, hackathon, budget-constrained | Production, research quality |
+---
+## 3. Simple Mode Architecture (pydantic-ai Only)
+```text
+┌─────────────────────────────────────────────────────┐
+│                  Orchestrator                       │
+│                                                     │
+│   while not sufficient and iteration < max:        │
+│       1. SearchHandler.execute(query)              │
+│       2. JudgeHandler.assess(evidence)    ◄── pydantic-ai Agent  │
+│       3. if sufficient: break                      │
+│       4. query = judge.next_queries                │
+│                                                     │
+│   return ReportGenerator.generate(evidence)        │
+└─────────────────────────────────────────────────────┘
+```
+**Components:**
+- `src/orchestrator.py` - Simple loop orchestrator
+- `src/agent_factory/judges.py` - JudgeHandler with pydantic-ai
+- `src/tools/search_handler.py` - Scatter-gather search
+- `src/tools/pubmed.py`, `clinicaltrials.py`, `europepmc.py` - Search tools
+---
+## 4. Advanced Mode Architecture (MS Agent Framework + pydantic-ai)
+```text
+┌─────────────────────────────────────────────────────────────────────┐
+│              Microsoft Agent Framework Orchestrator                 │
+│                                                                     │
+│   ┌─────────────┐    ┌─────────────┐    ┌─────────────┐            │
+│   │ SearchAgent │───▶│ JudgeAgent  │───▶│ ReportAgent │            │
+│   │ (BaseAgent) │    │ (BaseAgent) │    │ (BaseAgent) │            │
+│   └──────┬──────┘    └──────┬──────┘    └──────┬──────┘            │
+│          │                  │                  │                    │
+│          ▼                  ▼                  ▼                    │
+│   ┌─────────────┐    ┌─────────────┐    ┌─────────────┐            │
+│   │ pydantic-ai │    │ pydantic-ai │    │ pydantic-ai │            │
+│   │ Agent()     │    │ Agent()     │    │ Agent()     │            │
+│   │ output_type=│    │ output_type=│    │ output_type=│            │
+│   │ SearchResult│    │ JudgeAssess │    │ Report      │            │
+│   └─────────────┘    └─────────────┘    └─────────────┘            │
+│                                                                     │
+│   Shared State: MagenticState (thread-safe via contextvars)        │
+│   - evidence: list[Evidence]                                       │
+│   - embedding_service: EmbeddingService                            │
+└─────────────────────────────────────────────────────────────────────┘
+```
+**Components:**
+- `src/orchestrator_magentic.py` - Multi-agent orchestrator
+- `src/agents/search_agent.py` - SearchAgent (BaseAgent)
+- `src/agents/judge_agent.py` - JudgeAgent (BaseAgent)
+- `src/agents/report_agent.py` - ReportAgent (BaseAgent)
+- `src/agents/analysis_agent.py` - AnalysisAgent (BaseAgent)
+- `src/agents/state.py` - Thread-safe state management
+- `src/agents/tools.py` - @ai_function decorated tools
+---
+## 5. Mode Selection Logic
+```python
+# src/orchestrator_factory.py (actual implementation)
+def create_orchestrator(
+    search_handler: SearchHandlerProtocol | None = None,
+    judge_handler: JudgeHandlerProtocol | None = None,
+    config: OrchestratorConfig | None = None,
+    mode: Literal["simple", "magentic", "advanced"] | None = None,
+) -> Any:
+    """
+    Auto-select orchestrator based on available credentials.
+    Priority:
+    1. If mode explicitly set, use that
+    2. If OpenAI key available -> Advanced Mode (currently OpenAI only)
+    3. Otherwise -> Simple Mode (HuggingFace free tier)
+    """
+    effective_mode = _determine_mode(mode)
+    if effective_mode == "advanced":
+        orchestrator_cls = _get_magentic_orchestrator_class()
+        return orchestrator_cls(max_rounds=config.max_iterations if config else 10)
+    # Simple mode requires handlers
+    if search_handler is None or judge_handler is None:
+        raise ValueError("Simple mode requires search_handler and judge_handler")
+    return Orchestrator(
+        search_handler=search_handler,
+        judge_handler=judge_handler,
+        config=config,
+    )
+```
+---
+## 6. Shared Components (Both Modes Use)
+These components work in both modes:
+| Component | Purpose |
+|-----------|---------|
+| `src/tools/pubmed.py` | PubMed search |
+| `src/tools/clinicaltrials.py` | ClinicalTrials.gov search |
+| `src/tools/europepmc.py` | Europe PMC search |
+| `src/tools/search_handler.py` | Scatter-gather orchestration |
+| `src/tools/rate_limiter.py` | Rate limiting |
+| `src/utils/models.py` | Evidence, Citation, JudgeAssessment |
+| `src/utils/config.py` | Settings |
+| `src/services/embeddings.py` | Vector search (optional) |
+---
+## 7. pydantic-ai Integration Points
+Both modes use pydantic-ai for structured LLM outputs:
+```python
+# In JudgeHandler (both modes)
+from pydantic_ai import Agent
+from pydantic_ai.models.huggingface import HuggingFaceModel
+from pydantic_ai.models.openai import OpenAIModel
+from pydantic_ai.models.anthropic import AnthropicModel
+class JudgeHandler:
+    def __init__(self, model: Any = None):
+        self.model = model or get_model()  # Auto-selects based on config
+        self.agent = Agent(
+            model=self.model,
+            output_type=JudgeAssessment,  # Structured output!
+            system_prompt=SYSTEM_PROMPT,
+        )
+    async def assess(self, question: str, evidence: list[Evidence]) -> JudgeAssessment:
+        result = await self.agent.run(format_prompt(question, evidence))
+        return result.output  # Guaranteed to be JudgeAssessment
+```
+---
+## 8. Microsoft Agent Framework Integration Points
+Advanced mode wraps pydantic-ai agents in BaseAgent:
+```python
+# In JudgeAgent (advanced mode only)
+from agent_framework import BaseAgent, AgentRunResponse, ChatMessage, Role
+class JudgeAgent(BaseAgent):
+    def __init__(self, judge_handler: JudgeHandlerProtocol):
+        super().__init__(
+            name="JudgeAgent",
+            description="Evaluates evidence quality",
+        )
+        self._handler = judge_handler  # Uses pydantic-ai internally
+    async def run(self, messages, **kwargs) -> AgentRunResponse:
+        question = extract_question(messages)
+        evidence = self._evidence_store.get("current", [])
+        # Delegate to pydantic-ai powered handler
+        assessment = await self._handler.assess(question, evidence)
+        return AgentRunResponse(
+            messages=[ChatMessage(role=Role.ASSISTANT, text=format_response(assessment))],
+            additional_properties={"assessment": assessment.model_dump()},
+        )
+```
+---
+## 9. Benefits of This Architecture
+1. **Graceful Degradation**: Works without API keys (free tier)
+2. **Progressive Enhancement**: Better with API keys (orchestration)
+3. **Code Reuse**: pydantic-ai handlers shared between modes
+4. **Hackathon Ready**: Demo works without requiring paid keys
+5. **Production Ready**: Full orchestration available when needed
+6. **Future Proof**: Can add more agents to advanced mode
+7. **Testable**: Simple mode is easier to unit test
+---
+## 10. Known Risks and Mitigations
+> **From Senior Agent Review**
+### 10.1 Bridge Complexity (MEDIUM)
+**Risk:** In Advanced Mode, agents (Agent Framework) wrap handlers (pydantic-ai). Both are async. Context variables (`MagenticState`) must propagate correctly through the pydantic-ai call stack.
+**Mitigation:**
+- pydantic-ai uses standard Python `contextvars`, which naturally propagate through `await` chains
+- Test context propagation explicitly in integration tests
+- If issues arise, pass state explicitly rather than via context vars
+### 10.2 Integration Drift (MEDIUM)
+**Risk:** Simple Mode and Advanced Mode might diverge in behavior over time (e.g., Simple Mode uses logic A, Advanced Mode uses logic B).
+**Mitigation:**
+- Both modes MUST call the exact same underlying Tools (`src/tools/*`) and Handlers (`src/agent_factory/*`)
+- Handlers are the single source of truth for business logic
+- Agents are thin wrappers that delegate to handlers
+### 10.3 Testing Burden (LOW-MEDIUM)
+**Risk:** Two distinct orchestrators (`src/orchestrator.py` and `src/orchestrator_magentic.py`) doubles integration testing surface area.
+**Mitigation:**
+- Unit test handlers independently (shared code)
+- Integration tests for each mode separately
+- End-to-end tests verify same output for same input (determinism permitting)
+### 10.4 Dependency Conflicts (LOW)
+**Risk:** `agent-framework-core` might conflict with `pydantic-ai`'s dependencies (e.g., different pydantic versions).
+**Status:** Both use `pydantic>=2.x`. Should be compatible.
+---
+## 11. Naming Clarification
+> See `00_SITUATION_AND_PLAN.md` Section 4 for full details.
+**Important:** The codebase uses "magentic" in file names (`orchestrator_magentic.py`, `magentic_agents.py`) but this refers to our internal naming for Microsoft Agent Framework integration, **NOT** the `magentic` PyPI package.
+**Future action:** Rename to `orchestrator_advanced.py` to eliminate confusion.

docs/brainstorming/magentic-pydantic/02_IMPLEMENTATION_PHASES.md ADDED Viewed

	@@ -0,0 +1,112 @@

+# Implementation Phases: Dual-Mode Agent System
+**Date:** November 27, 2025
+**Status:** IMPLEMENTATION PLAN (REVISED)
+**Strategy:** TDD (Test-Driven Development), SOLID Principles
+**Dependency Strategy:** PyPI (agent-framework-core)
+---
+## Phase 0: Environment Validation & Cleanup
+**Goal:** Ensure clean state and dependencies are correctly installed.
+### Step 0.1: Verify PyPI Package
+The `agent-framework-core` package is published on PyPI by Microsoft. Verify installation:
+```bash
+uv sync --all-extras
+python -c "from agent_framework import ChatAgent; print('OK')"
+```
+### Step 0.2: Branch State
+We are on `feat/dual-mode-architecture`. Ensure it is up to date with `origin/dev` before starting.
+**Note:** The `reference_repos/agent-framework` folder is kept for reference/documentation only.
+The production dependency uses the official PyPI release.
+---
+## Phase 1: Pydantic-AI Improvements (Simple Mode)
+**Goal:** Implement `HuggingFaceModel` support in `JudgeHandler` using strict TDD.
+### Step 1.1: Test First (Red)
+Create `tests/unit/agent_factory/test_judges_factory.py`:
+- Test `get_model()` returns `HuggingFaceModel` when `LLM_PROVIDER=huggingface`.
+- Test `get_model()` respects `HF_TOKEN`.
+- Test fallback to OpenAI.
+### Step 1.2: Implementation (Green)
+Update `src/utils/config.py`:
+- Add `huggingface_model` and `hf_token` fields.
+Update `src/agent_factory/judges.py`:
+- Implement `get_model` with the logic derived from the tests.
+- Use dependency injection for the model where possible.
+### Step 1.3: Refactor
+Ensure `JudgeHandler` is loosely coupled from the specific model provider.
+---
+## Phase 2: Orchestrator Factory (The Switch)
+**Goal:** Implement the factory pattern to switch between Simple and Advanced modes.
+### Step 2.1: Test First (Red)
+Create `tests/unit/test_orchestrator_factory.py`:
+- Test `create_orchestrator` returns `Orchestrator` (simple) when API keys are missing.
+- Test `create_orchestrator` returns `MagenticOrchestrator` (advanced) when OpenAI key exists.
+- Test explicit mode override.
+### Step 2.2: Implementation (Green)
+Update `src/orchestrator_factory.py` to implement the selection logic.
+---
+## Phase 3: Agent Framework Integration (Advanced Mode)
+**Goal:** Integrate Microsoft Agent Framework from PyPI.
+### Step 3.1: Dependency Management
+The `agent-framework-core` package is installed from PyPI:
+```toml
+[project.optional-dependencies]
+magentic = [
+    "agent-framework-core>=1.0.0b251120,<2.0.0",  # Microsoft Agent Framework (PyPI)
+]
+```
+Install with: `uv sync --all-extras`
+### Step 3.2: Verify Imports (Test First)
+Create `tests/unit/agents/test_agent_imports.py`:
+- Verify `from agent_framework import ChatAgent` works.
+- Verify instantiation of `ChatAgent` with a mock client.
+### Step 3.3: Update Agents
+Refactor `src/agents/*.py` to ensure they match the exact signature of the local `ChatAgent` class.
+- **SOLID:** Ensure agents have single responsibilities.
+- **DRY:** Share tool definitions between Pydantic-AI simple mode and Agent Framework advanced mode.
+---
+## Phase 4: UI & End-to-End Verification
+**Goal:** Update Gradio to reflect the active mode.
+### Step 4.1: UI Updates
+Update `src/app.py` to display "Simple Mode" vs "Advanced Mode".
+### Step 4.2: End-to-End Test
+Run the full loop:
+1. Simple Mode (No Keys) -> Search -> Judge (HF) -> Report.
+2. Advanced Mode (OpenAI Key) -> SearchAgent -> JudgeAgent -> ReportAgent.
+---
+## Phase 5: Cleanup & Documentation
+- Remove unused code.
+- Update main README.md.
+- Final `make check`.

docs/brainstorming/magentic-pydantic/03_IMMEDIATE_ACTIONS.md ADDED Viewed

	@@ -0,0 +1,112 @@

+# Immediate Actions Checklist
+**Date:** November 27, 2025
+**Priority:** Execute in order
+---
+## Before Starting Implementation
+### 1. Close PR #41 (CRITICAL)
+```bash
+gh pr close 41 --comment "Architecture decision changed. Cherry-picking improvements to preserve both pydantic-ai and Agent Framework capabilities."
+```
+### 2. Verify HuggingFace Spaces is Safe
+```bash
+# Should show agent framework files exist
+git ls-tree --name-only huggingface-upstream/dev -- src/agents/
+git ls-tree --name-only huggingface-upstream/dev -- src/orchestrator_magentic.py
+```
+Expected output: Files should exist (they do as of this writing).
+### 3. Clean Local Environment
+```bash
+# Switch to main first
+git checkout main
+# Delete problematic branches
+git branch -D refactor/pydantic-unification 2>/dev/null || true
+git branch -D feat/pubmed-fulltext 2>/dev/null || true
+# Reset local dev to origin/dev
+git branch -D dev 2>/dev/null || true
+git checkout -b dev origin/dev
+# Verify agent framework code exists
+ls src/agents/
+# Expected: __init__.py, analysis_agent.py, hypothesis_agent.py, judge_agent.py,
+#           magentic_agents.py, report_agent.py, search_agent.py, state.py, tools.py
+ls src/orchestrator_magentic.py
+# Expected: file exists
+```
+### 4. Create Fresh Feature Branch
+```bash
+git checkout -b feat/dual-mode-architecture origin/dev
+```
+---
+## Decision Points
+Before proceeding, confirm:
+1. **For hackathon**: Do we need advanced mode, or is simple mode sufficient?
+   - Simple mode = faster to implement, works today
+   - Advanced mode = better quality, more work
+2. **Timeline**: How much time do we have?
+   - If < 1 day: Focus on simple mode only
+   - If > 1 day: Implement dual-mode
+3. **Dependencies**: Is `agent-framework-core` available?
+   - Check: `pip index versions agent-framework-core`
+   - If not on PyPI, may need to install from GitHub
+---
+## Quick Start (Simple Mode Only)
+If time is limited, implement only simple mode improvements:
+```bash
+# On feat/dual-mode-architecture branch
+# 1. Update judges.py to add HuggingFace support
+# 2. Update config.py to add HF settings
+# 3. Create free_tier_demo.py
+# 4. Run make check
+# 5. Create PR to dev
+```
+This gives you free-tier capability without touching agent framework code.
+---
+## Quick Start (Full Dual-Mode)
+If time permits, implement full dual-mode:
+Follow phases 1-6 in `02_IMPLEMENTATION_PHASES.md`
+---
+## Emergency Rollback
+If anything goes wrong:
+```bash
+# Reset to safe state
+git checkout main
+git branch -D feat/dual-mode-architecture
+git checkout -b feat/dual-mode-architecture origin/dev
+```
+Origin/dev is the safe fallback - it has agent framework intact.

docs/brainstorming/magentic-pydantic/04_FOLLOWUP_REVIEW_REQUEST.md ADDED Viewed

	@@ -0,0 +1,158 @@

+# Follow-Up Review Request: Did We Implement Your Feedback?
+**Date:** November 27, 2025
+**Context:** You previously reviewed our dual-mode architecture plan and provided feedback. We have updated the documentation. Please verify we correctly implemented your recommendations.
+---
+## Your Original Feedback vs Our Changes
+### 1. Naming Confusion Clarification
+**Your feedback:** "You are using Microsoft Agent Framework, but you've named your integration 'Magentic'. This caused the confusion."
+**Our change:** Added Section 4 in `00_SITUATION_AND_PLAN.md`:
+```markdown
+## 4. CRITICAL: Naming Confusion Clarification
+> **Senior Agent Review Finding:** The codebase uses "magentic" in file names
+> (e.g., `orchestrator_magentic.py`, `magentic_agents.py`) but this is **NOT**
+> the `magentic` PyPI package by Jacky Liang. It's Microsoft Agent Framework.
+**The naming confusion:**
+- `magentic` (PyPI package): A different library for structured LLM outputs
+- "Magentic" (in our codebase): Our internal name for Microsoft Agent Framework integration
+- `agent-framework-core` (PyPI package): Microsoft's actual multi-agent orchestration framework
+**Recommended future action:** Rename `orchestrator_magentic.py` → `orchestrator_advanced.py`
+```
+**Status:** ✅ IMPLEMENTED
+---
+### 2. Bridge Complexity Warning
+**Your feedback:** "You must ensure MagenticState (context vars) propagates correctly through the pydantic-ai call stack."
+**Our change:** Added Section 10.1 in `01_ARCHITECTURE_SPEC.md`:
+```markdown
+### 10.1 Bridge Complexity (MEDIUM)
+**Risk:** In Advanced Mode, agents (Agent Framework) wrap handlers (pydantic-ai).
+Both are async. Context variables (`MagenticState`) must propagate correctly.
+**Mitigation:**
+- pydantic-ai uses standard Python `contextvars`, which naturally propagate through `await` chains
+- Test context propagation explicitly in integration tests
+- If issues arise, pass state explicitly rather than via context vars
+```
+**Status:** ✅ IMPLEMENTED
+---
+### 3. Integration Drift Warning
+**Your feedback:** "Simple Mode and Advanced Mode might diverge in behavior."
+**Our change:** Added Section 10.2 in `01_ARCHITECTURE_SPEC.md`:
+```markdown
+### 10.2 Integration Drift (MEDIUM)
+**Risk:** Simple Mode and Advanced Mode might diverge in behavior over time.
+**Mitigation:**
+- Both modes MUST call the exact same underlying Tools (`src/tools/*`) and Handlers (`src/agent_factory/*`)
+- Handlers are the single source of truth for business logic
+- Agents are thin wrappers that delegate to handlers
+```
+**Status:** ✅ IMPLEMENTED
+---
+### 4. Testing Burden Warning
+**Your feedback:** "You now have two distinct orchestrators to maintain. This doubles your integration testing surface area."
+**Our change:** Added Section 10.3 in `01_ARCHITECTURE_SPEC.md`:
+```markdown
+### 10.3 Testing Burden (LOW-MEDIUM)
+**Risk:** Two distinct orchestrators doubles integration testing surface area.
+**Mitigation:**
+- Unit test handlers independently (shared code)
+- Integration tests for each mode separately
+- End-to-end tests verify same output for same input
+```
+**Status:** ✅ IMPLEMENTED
+---
+### 5. Rename Recommendation
+**Your feedback:** "Rename `src/orchestrator_magentic.py` to `src/orchestrator_advanced.py`"
+**Our change:** Added Step 3.4 in `02_IMPLEMENTATION_PHASES.md`:
+```markdown
+### Step 3.4: (OPTIONAL) Rename "Magentic" to "Advanced"
+> **Senior Agent Recommendation:** Rename files to eliminate confusion.
+git mv src/orchestrator_magentic.py src/orchestrator_advanced.py
+git mv src/agents/magentic_agents.py src/agents/advanced_agents.py
+**Note:** This is optional for the hackathon. Can be done in a follow-up PR.
+```
+**Status:** ✅ DOCUMENTED (marked as optional for hackathon)
+---
+### 6. Standardize Wrapper Recommendation
+**Your feedback:** "Create a generic `PydanticAiAgentWrapper(BaseAgent)` class instead of manually wrapping each handler."
+**Our change:** NOT YET DOCUMENTED
+**Status:** ⚠️ NOT IMPLEMENTED - Should we add this?
+---
+## Questions for Your Review
+1. **Did we correctly implement your feedback?** Are there any misunderstandings in how we interpreted your recommendations?
+2. **Is the "Standardize Wrapper" recommendation critical?** Should we add it to the implementation phases, or is it a nice-to-have for later?
+3. **Dependency versioning:** You noted `agent-framework-core>=1.0.0b251120` might be ephemeral. Should we:
+   - Pin to a specific version?
+   - Use a version range?
+   - Install from GitHub source?
+4. **Anything else we missed?**
+---
+## Files to Re-Review
+1. `00_SITUATION_AND_PLAN.md` - Added Section 4 (Naming Clarification)
+2. `01_ARCHITECTURE_SPEC.md` - Added Sections 10-11 (Risks, Naming)
+3. `02_IMPLEMENTATION_PHASES.md` - Added Step 3.4 (Optional Rename)
+---
+## Current Branch State
+We are now on `feat/dual-mode-architecture` branched from `origin/dev`:
+- ✅ Agent framework code intact (`src/agents/`, `src/orchestrator_magentic.py`)
+- ✅ Documentation committed
+- ❌ PR #41 still open (need to close it)
+- ❌ Cherry-pick of pydantic-ai improvements not yet done
+---
+Please confirm: **GO / NO-GO** to proceed with Phase 1 (cherry-picking pydantic-ai improvements)?

docs/brainstorming/magentic-pydantic/REVIEW_PROMPT_FOR_SENIOR_AGENT.md ADDED Viewed

	@@ -0,0 +1,113 @@

+# Senior Agent Review Prompt
+Copy and paste everything below this line to a fresh Claude/AI session:
+---
+## Context
+I am a junior developer working on a HuggingFace hackathon project called DeepCritical. We made a significant architectural mistake and are now trying to course-correct. I need you to act as a **senior staff engineer** and critically review our proposed solution.
+## The Situation
+We almost merged a refactor that would have **deleted** our multi-agent orchestration capability, mistakenly believing that `pydantic-ai` (a library for structured LLM outputs) and Microsoft's `agent-framework` (a framework for multi-agent orchestration) were mutually exclusive alternatives.
+**They are not.** They are complementary:
+- `pydantic-ai` ensures LLM responses match Pydantic schemas (type-safe outputs)
+- `agent-framework` orchestrates multiple agents working together (coordination layer)
+We now want to implement a **dual-mode architecture** where:
+- **Simple Mode (No API key):** Uses only pydantic-ai with HuggingFace free tier
+- **Advanced Mode (With API key):** Uses Microsoft Agent Framework for orchestration, with pydantic-ai inside each agent for structured outputs
+## Your Task
+Please perform a **deep, critical review** of:
+1. **The architecture diagram** (image attached: `assets/magentic-pydantic.png`)
+2. **Our documentation** (4 files listed below)
+3. **The actual codebase** to verify our claims
+## Specific Questions to Answer
+### Architecture Validation
+1. Is our understanding correct that pydantic-ai and agent-framework are complementary, not competing?
+2. Does the dual-mode architecture diagram accurately represent how these should integrate?
+3. Are there any architectural flaws or anti-patterns in our proposed design?
+### Documentation Accuracy
+4. Are the branch states we documented accurate? (Check `git log`, `git ls-tree`)
+5. Is our understanding of what code exists where correct?
+6. Are the implementation phases realistic and in the correct order?
+7. Are there any missing steps or dependencies we overlooked?
+### Codebase Reality Check
+8. Does `origin/dev` actually have the agent framework code intact? Verify by checking:
+   - `git ls-tree origin/dev -- src/agents/`
+   - `git ls-tree origin/dev -- src/orchestrator_magentic.py`
+9. What does the current `src/agents/` code actually import? Does it use `agent_framework` or `agent-framework-core`?
+10. Is the `agent-framework-core` package actually available on PyPI, or do we need to install from source?
+### Implementation Feasibility
+11. Can the cherry-pick strategy we outlined actually work, or are there merge conflicts we're not seeing?
+12. Is the mode auto-detection logic sound?
+13. What are the risks we haven't identified?
+### Critical Errors Check
+14. Did we miss anything critical in our analysis?
+15. Are there any factual errors in our documentation?
+16. Would a Google/DeepMind senior engineer approve this plan, or would they flag issues?
+## Files to Review
+Please read these files in order:
+1. `/Users/ray/Desktop/CLARITY-DIGITAL-TWIN/DeepCritical-1/docs/brainstorming/magentic-pydantic/00_SITUATION_AND_PLAN.md`
+2. `/Users/ray/Desktop/CLARITY-DIGITAL-TWIN/DeepCritical-1/docs/brainstorming/magentic-pydantic/01_ARCHITECTURE_SPEC.md`
+3. `/Users/ray/Desktop/CLARITY-DIGITAL-TWIN/DeepCritical-1/docs/brainstorming/magentic-pydantic/02_IMPLEMENTATION_PHASES.md`
+4. `/Users/ray/Desktop/CLARITY-DIGITAL-TWIN/DeepCritical-1/docs/brainstorming/magentic-pydantic/03_IMMEDIATE_ACTIONS.md`
+And the architecture diagram:
+5. `/Users/ray/Desktop/CLARITY-DIGITAL-TWIN/DeepCritical-1/assets/magentic-pydantic.png`
+## Reference Repositories to Consult
+We have local clones of the source-of-truth repositories:
+- **Original DeepCritical:** `/Users/ray/Desktop/CLARITY-DIGITAL-TWIN/DeepCritical-1/reference_repos/DeepCritical/`
+- **Microsoft Agent Framework:** `/Users/ray/Desktop/CLARITY-DIGITAL-TWIN/DeepCritical-1/reference_repos/agent-framework/`
+- **Microsoft AutoGen:** `/Users/ray/Desktop/CLARITY-DIGITAL-TWIN/DeepCritical-1/reference_repos/autogen-microsoft/`
+Please cross-reference our hackathon fork against these to verify architectural alignment.
+## Codebase to Analyze
+Our hackathon fork is at:
+`/Users/ray/Desktop/CLARITY-DIGITAL-TWIN/DeepCritical-1/`
+Key files to examine:
+- `src/agents/` - Agent framework integration
+- `src/agent_factory/judges.py` - pydantic-ai integration
+- `src/orchestrator.py` - Simple mode orchestrator
+- `src/orchestrator_magentic.py` - Advanced mode orchestrator
+- `src/orchestrator_factory.py` - Mode selection
+- `pyproject.toml` - Dependencies
+## Expected Output
+Please provide:
+1. **Validation Summary:** Is our plan sound? (YES/NO with explanation)
+2. **Errors Found:** List any factual errors in our documentation
+3. **Missing Items:** What did we overlook?
+4. **Risk Assessment:** What could go wrong?
+5. **Recommended Changes:** Specific edits to our documentation or plan
+6. **Go/No-Go Recommendation:** Should we proceed with this plan?
+## Tone
+Be brutally honest. If our plan is flawed, say so directly. We would rather know now than after implementation. Don't soften criticism - we need accuracy.
+---
+END OF PROMPT

docs/bugs/FIX_PLAN_MAGENTIC_MODE.md ADDED Viewed

	@@ -0,0 +1,227 @@

+# Fix Plan: Magentic Mode Report Generation
+**Related Bug**: `P0_MAGENTIC_MODE_BROKEN.md`
+**Approach**: Test-Driven Development (TDD)
+**Estimated Scope**: 4 tasks, ~2-3 hours
+---
+## Problem Summary
+Magentic mode runs but fails to produce readable reports due to:
+1. **Primary Bug**: `MagenticFinalResultEvent.message` returns `ChatMessage` object, not text
+2. **Secondary Bug**: Max rounds (3) reached before ReportAgent completes
+3. **Tertiary Issues**: Stale "bioRxiv" references in prompts
+---
+## Fix Order (TDD)
+### Phase 1: Write Failing Tests
+**Task 1.1**: Create test for ChatMessage text extraction
+```python
+# tests/unit/test_orchestrator_magentic.py
+def test_process_event_extracts_text_from_chat_message():
+    """Final result event should extract text from ChatMessage object."""
+    # Arrange: Mock ChatMessage with .content attribute
+    # Act: Call _process_event with MagenticFinalResultEvent
+    # Assert: Returned AgentEvent.message is a string, not object repr
+```
+**Task 1.2**: Create test for max rounds configuration
+```python
+def test_orchestrator_uses_configured_max_rounds():
+    """MagenticOrchestrator should use max_rounds from constructor."""
+    # Arrange: Create orchestrator with max_rounds=10
+    # Act: Build workflow
+    # Assert: Workflow has max_round_count=10
+```
+**Task 1.3**: Create test for bioRxiv reference removal
+```python
+def test_task_prompt_references_europe_pmc():
+    """Task prompt should reference Europe PMC, not bioRxiv."""
+    # Arrange: Create orchestrator
+    # Act: Check task string in run()
+    # Assert: Contains "Europe PMC", not "bioRxiv"
+```
+---
+### Phase 2: Fix ChatMessage Text Extraction
+**File**: `src/orchestrator_magentic.py`
+**Lines**: 192-199
+**Current Code**:
+```python
+elif isinstance(event, MagenticFinalResultEvent):
+    text = event.message.text if event.message else "No result"
+```
+**Fixed Code**:
+```python
+elif isinstance(event, MagenticFinalResultEvent):
+    if event.message:
+        # ChatMessage may have .content or .text depending on version
+        if hasattr(event.message, 'content') and event.message.content:
+            text = str(event.message.content)
+        elif hasattr(event.message, 'text') and event.message.text:
+            text = str(event.message.text)
+        else:
+            # Fallback: convert entire message to string
+            text = str(event.message)
+    else:
+        text = "No result generated"
+```
+**Why**: The `agent_framework.ChatMessage` object structure may vary. We need defensive extraction.
+---
+### Phase 3: Fix Max Rounds Configuration
+**File**: `src/orchestrator_magentic.py`
+**Lines**: 97-99
+**Current Code**:
+```python
+.with_standard_manager(
+    chat_client=manager_client,
+    max_round_count=self._max_rounds,  # Already uses config
+    max_stall_count=3,
+    max_reset_count=2,
+)
+```
+**Issue**: Default `max_rounds` in `__init__` is 10, but workflow may need more for complex queries.
+**Fix**: Verify the value flows through correctly. Add logging.
+```python
+logger.info(
+    "Building Magentic workflow",
+    max_rounds=self._max_rounds,
+    max_stall=3,
+    max_reset=2,
+)
+```
+**Also check**: `src/orchestrator_factory.py` passes config correctly:
+```python
+return MagenticOrchestrator(
+    max_rounds=config.max_iterations if config else 10,
+)
+```
+---
+### Phase 4: Fix Stale bioRxiv References
+**Files to update**:
+| File | Line | Change |
+|------|------|--------|
+| `src/orchestrator_magentic.py` | 131 | "bioRxiv" → "Europe PMC" |
+| `src/agents/magentic_agents.py` | 32-33 | "bioRxiv" → "Europe PMC" |
+| `src/app.py` | 202-203 | "bioRxiv" → "Europe PMC" |
+**Search command to verify**:
+```bash
+grep -rn "bioRxiv\|biorxiv" src/
+```
+---
+## Implementation Checklist
+```
+[ ] Phase 1: Write failing tests
+    [ ] 1.1 Test ChatMessage text extraction
+    [ ] 1.2 Test max rounds configuration
+    [ ] 1.3 Test Europe PMC references
+[ ] Phase 2: Fix ChatMessage extraction
+    [ ] Update _process_event() in orchestrator_magentic.py
+    [ ] Run test 1.1 - should pass
+[ ] Phase 3: Fix max rounds
+    [ ] Add logging to _build_workflow()
+    [ ] Verify factory passes config correctly
+    [ ] Run test 1.2 - should pass
+[ ] Phase 4: Fix bioRxiv references
+    [ ] Update orchestrator_magentic.py task prompt
+    [ ] Update magentic_agents.py descriptions
+    [ ] Update app.py UI text
+    [ ] Run test 1.3 - should pass
+    [ ] Run grep to verify no remaining refs
+[ ] Final Verification
+    [ ] make check passes
+    [ ] All tests pass (108+)
+    [ ] Manual test: run_magentic.py produces readable report
+```
+---
+## Test Commands
+```bash
+# Run specific test file
+uv run pytest tests/unit/test_orchestrator_magentic.py -v
+# Run all tests
+uv run pytest tests/unit/ -v
+# Full check
+make check
+# Manual integration test
+set -a && source .env && set +a
+uv run python examples/orchestrator_demo/run_magentic.py "metformin alzheimer"
+```
+---
+## Success Criteria
+1. `run_magentic.py` outputs a readable research report (not `<ChatMessage object>`)
+2. Report includes: Executive Summary, Key Findings, Drug Candidates, References
+3. No "Max round count reached" error with default settings
+4. No "bioRxiv" references anywhere in codebase
+5. All 108+ tests pass
+6. `make check` passes
+---
+## Files Modified
+```
+src/
+├── orchestrator_magentic.py   # ChatMessage fix, logging
+├── agents/magentic_agents.py  # bioRxiv → Europe PMC
+└── app.py                     # bioRxiv → Europe PMC
+tests/unit/
+└── test_orchestrator_magentic.py  # NEW: 3 tests
+```
+---
+## Notes for AI Agent
+When implementing this fix plan:
+1. **DO NOT** create mock data or fake responses
+2. **DO** write real tests that verify actual behavior
+3. **DO** run `make check` after each phase
+4. **DO** test with real OpenAI API key via `.env`
+5. **DO** preserve existing functionality - simple mode must still work
+6. **DO NOT** over-engineer - minimal changes to fix the specific bugs

docs/bugs/P0_ACTIONABLE_FIXES.md DELETED Viewed

@@ -1,281 +0,0 @@
-# P0 Actionable Fixes - What to Do
-**Date:** November 27, 2025
-**Status:** ACTIONABLE
----
-## Summary: What's Broken and What's Fixable
-| Tool | Problem | Fixable? | How |
-|------|---------|----------|-----|
-| BioRxiv | API has NO search endpoint | **NO** | Replace with Europe PMC |
-| PubMed | No query preprocessing | **YES** | Add query cleaner |
-| ClinicalTrials | No filters applied | **YES** | Add filter params |
-| Magentic Framework | Nothing wrong | N/A | Already working |
----
-## FIX 1: Replace BioRxiv with Europe PMC (30 min)
-### Why BioRxiv Can't Be Fixed
-The bioRxiv API only has this endpoint:
-```
-https://api.biorxiv.org/details/{server}/{date-range}/{cursor}/json
-```
-This returns papers **by date**, not by keyword. There is NO search endpoint.
-**Proof:** I queried `medrxiv/2024-01-01/2024-01-02` and got:
-- "Global risk of Plasmodium falciparum" (malaria)
-- "Multiple Endocrine Neoplasia in India"
-- "Acupuncture for Acute Musculoskeletal Pain"
-**None of these are about Long COVID** because the API doesn't search.
-### Europe PMC Has Search + Preprints
-```bash
-curl "https://www.ebi.ac.uk/europepmc/webservices/rest/search?query=long+covid+treatment&resultType=core&pageSize=3&format=json"
-```
-Returns 283,058 results including:
-- "Long COVID Treatment No Silver Bullets, Only a Few Bronze BBs" ✅
-### The Fix
-Replace `src/tools/biorxiv.py` with `src/tools/europepmc.py`:
-```python
-"""Europe PMC preprint and paper search tool."""
-import httpx
-from src.utils.models import Citation, Evidence
-class EuropePMCTool:
-    """Search Europe PMC for papers and preprints."""
-    BASE_URL = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"
-    @property
-    def name(self) -> str:
-        return "europepmc"
-    async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
-        """Search Europe PMC (includes preprints from bioRxiv/medRxiv)."""
-        params = {
-            "query": query,
-            "resultType": "core",
-            "pageSize": max_results,
-            "format": "json",
-        }
-        async with httpx.AsyncClient(timeout=30.0) as client:
-            response = await client.get(self.BASE_URL, params=params)
-            response.raise_for_status()
-            data = response.json()
-            results = data.get("resultList", {}).get("result", [])
-            return [self._to_evidence(r) for r in results]
-    def _to_evidence(self, result: dict) -> Evidence:
-        """Convert Europe PMC result to Evidence."""
-        title = result.get("title", "Untitled")
-        abstract = result.get("abstractText", "No abstract")
-        doi = result.get("doi", "")
-        pub_year = result.get("pubYear", "Unknown")
-        source = result.get("source", "europepmc")
-        # Mark preprints
-        pub_type = result.get("pubTypeList", {}).get("pubType", [])
-        is_preprint = "Preprint" in pub_type
-        content = f"{'[PREPRINT] ' if is_preprint else ''}{abstract[:1800]}"
-        return Evidence(
-            content=content,
-            citation=Citation(
-                source="europepmc" if not is_preprint else "preprint",
-                title=title[:500],
-                url=f"https://doi.org/{doi}" if doi else "",
-                date=str(pub_year),
-            ),
-            relevance=0.75 if is_preprint else 0.9,
-        )
-```
----
-## FIX 2: Add PubMed Query Preprocessing (1 hour)
-### Current Problem
-User enters: `What medications show promise for Long COVID?`
-PubMed receives: `What medications show promise for Long COVID?`
-The question words pollute the search.
-### The Fix
-Add `src/tools/query_utils.py`:
-```python
-"""Query preprocessing utilities."""
-import re
-# Question words to remove
-QUESTION_WORDS = {
-    "what", "which", "how", "why", "when", "where", "who",
-    "is", "are", "can", "could", "would", "should", "do", "does",
-    "show", "promise", "help", "treat", "cure",
-}
-# Medical synonyms to expand
-SYNONYMS = {
-    "long covid": ["long COVID", "PASC", "post-COVID syndrome", "post-acute sequelae"],
-    "alzheimer": ["Alzheimer's disease", "AD", "Alzheimer dementia"],
-    "cancer": ["neoplasm", "tumor", "malignancy", "carcinoma"],
-}
-def preprocess_pubmed_query(raw_query: str) -> str:
-    """Convert natural language to cleaner PubMed query."""
-    # Lowercase
-    query = raw_query.lower()
-    # Remove question marks
-    query = query.replace("?", "")
-    # Remove question words
-    words = query.split()
-    words = [w for w in words if w not in QUESTION_WORDS]
-    query = " ".join(words)
-    # Expand synonyms
-    for term, expansions in SYNONYMS.items():
-        if term in query:
-            # Add OR clause
-            expansion = " OR ".join([f'"{e}"' for e in expansions])
-            query = query.replace(term, f"({expansion})")
-    return query.strip()
-```
-Then update `src/tools/pubmed.py`:
-```python
-from src.tools.query_utils import preprocess_pubmed_query
-async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
-    # Preprocess query
-    clean_query = preprocess_pubmed_query(query)
-    search_params = self._build_params(
-        db="pubmed",
-        term=clean_query,  # Use cleaned query
-        retmax=max_results,
-        sort="relevance",
-    )
-    # ... rest unchanged
-```
----
-## FIX 3: Add ClinicalTrials.gov Filters (30 min)
-### Current Problem
-Returns ALL trials including withdrawn, terminated, observational studies.
-### The Fix
-The API supports `filter.overallStatus` and other filters. Update `src/tools/clinicaltrials.py`:
-```python
-async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
-    params: dict[str, str | int] = {
-        "query.term": query,
-        "pageSize": min(max_results, 100),
-        "fields": "|".join(self.FIELDS),
-        # ADD THESE FILTERS:
-        "filter.overallStatus": "COMPLETED|RECRUITING|ACTIVE_NOT_RECRUITING",
-        # Only interventional studies (not observational)
-        "aggFilters": "studyType:int",
-    }
-    # ... rest unchanged
-```
-**Note:** I tested the API - it supports filtering but with slightly different syntax. Check the [API docs](https://clinicaltrials.gov/data-api/api).
----
-## What NOT to Change
-### Microsoft Agent Framework - WORKING
-I verified:
-```python
-from agent_framework import MagenticBuilder, ChatAgent
-from agent_framework.openai import OpenAIChatClient
-# All imports OK
-orchestrator = MagenticOrchestrator(max_rounds=2)
-workflow = orchestrator._build_workflow()
-# Workflow built successfully
-```
-The Magentic agents are correctly wired:
-- SearchAgent → GPT-5.1 ✅
-- JudgeAgent → GPT-5.1 ✅
-- HypothesisAgent → GPT-5.1 ✅
-- ReportAgent → GPT-5.1 ✅
-**The framework is fine. The tools it calls are broken.**
----
-## Priority Order
-1. **Replace BioRxiv** → Immediate, fundamental
-2. **Add PubMed preprocessing** → High impact, easy
-3. **Add ClinicalTrials filters** → Medium impact, easy
----
-## Test After Fixes
-```bash
-# Test Europe PMC
-uv run python -c "
-import asyncio
-from src.tools.europepmc import EuropePMCTool
-tool = EuropePMCTool()
-results = asyncio.run(tool.search('long covid treatment', 3))
-for r in results:
-    print(r.citation.title)
-"
-# Test PubMed with preprocessing
-uv run python -c "
-from src.tools.query_utils import preprocess_pubmed_query
-q = 'What medications show promise for Long COVID?'
-print(preprocess_pubmed_query(q))
-# Should output: (\"long COVID\" OR \"PASC\" OR \"post-COVID syndrome\") medications
-"
-```
----
-## After These Fixes
-The Magentic workflow will:
-1. SearchAgent calls `search_pubmed("long COVID treatment")` → Gets RELEVANT papers
-2. SearchAgent calls `search_preprints("long COVID treatment")` → Gets RELEVANT preprints via Europe PMC
-3. SearchAgent calls `search_clinical_trials("long COVID")` → Gets INTERVENTIONAL trials only
-4. JudgeAgent evaluates GOOD evidence
-5. HypothesisAgent generates hypotheses from GOOD evidence
-6. ReportAgent synthesizes GOOD report
-**The framework will work once we feed it good data.**

docs/bugs/P0_CRITICAL_BUGS.md DELETED Viewed

@@ -1,298 +0,0 @@
-# P0 CRITICAL BUGS - Why DeepCritical Produces Garbage Results
-**Date:** November 27, 2025
-**Status:** CRITICAL - App is functionally useless
-**Severity:** P0 (Blocker)
-## TL;DR
-The app produces garbage because:
-1. **BioRxiv search doesn't work** - returns random papers
-2. **Free tier LLM is too dumb** - can't identify drugs
-3. **Query construction is naive** - no optimization for PubMed/CT.gov syntax
-4. **Loop terminates too early** - 5 iterations isn't enough
----
-## P0-001: BioRxiv Search is Fundamentally Broken
-**File:** `src/tools/biorxiv.py:248-286`
-**The Problem:**
-The bioRxiv API **DOES NOT SUPPORT KEYWORD SEARCH**.
-The code does this:
-```python
-# Fetch recent papers (last 90 days, first 100 papers)
-url = f"{self.BASE_URL}/{self.server}/{interval}/0/json"
-# Then filter client-side for keywords
-```
-**What Actually Happens:**
-1. Fetches the first 100 papers from medRxiv in the last 90 days (chronological order)
-2. Filters those 100 random papers for query keywords
-3. Returns whatever garbage matches
-**Result:** For "Long COVID medications", you get random papers like:
-- "Calf muscle structure-function adaptations"
-- "Work-Life Balance of Ophthalmologists During COVID"
-These papers contain "COVID" somewhere but have NOTHING to do with Long COVID treatments.
-**Root Cause:** The `/0/json` pagination only returns 100 papers. You'd need to paginate through ALL papers (thousands) to do proper keyword filtering.
-**Fix Options:**
-1. **Remove BioRxiv entirely** - It's unusable without proper search API
-2. **Use a different preprint aggregator** - Europe PMC has preprints WITH search
-3. **Add pagination** - Fetch all papers (slow, expensive)
-4. **Use Semantic Scholar API** - Has preprints and proper search
----
-## P0-002: Free Tier LLM Cannot Perform Drug Identification
-**File:** `src/agent_factory/judges.py:153-211`
-**The Problem:**
-Without an API key, the app uses `HFInferenceJudgeHandler` with:
-- Llama 3.1 8B Instruct
-- Mistral 7B Instruct
-These are **7-8 billion parameter models**. They cannot:
-- Reliably parse complex biomedical abstracts
-- Identify drug candidates from scientific text
-- Generate structured JSON output consistently
-- Reason about mechanism of action
-**Evidence of Failure:**
-```python
-# From MockJudgeHandler - the honest fallback when LLM fails
-drug_candidates=[
-    "Drug identification requires AI analysis",
-    "Enter API key above for full results",
-]
-```
-The team KNEW the free tier can't identify drugs and added this message.
-**Root Cause:** Drug repurposing requires understanding:
-- Drug mechanisms
-- Disease pathophysiology
-- Clinical trial phases
-- Statistical significance
-This requires GPT-4 / Claude Sonnet class models (100B+ parameters).
-**Fix Options:**
-1. **Require API key** - No free tier, be honest
-2. **Use larger HF models** - Llama 70B or Mixtral 8x7B (expensive on free tier)
-3. **Hybrid approach** - Use free tier for search, require paid for synthesis
----
-## P0-003: PubMed Query Not Optimized
-**File:** `src/tools/pubmed.py:54-71`
-**The Problem:**
-The query is passed directly to PubMed without optimization:
-```python
-search_params = self._build_params(
-    db="pubmed",
-    term=query,  # Raw user query!
-    retmax=max_results,
-    sort="relevance",
-)
-```
-**What User Enters:** "What medications show promise for Long COVID?"
-**What PubMed Receives:** `What medications show promise for Long COVID?`
-**What PubMed Should Receive:**
-```
-("long covid"[Title/Abstract] OR "post-COVID"[Title/Abstract] OR "PASC"[Title/Abstract])
-AND (drug[Title/Abstract] OR treatment[Title/Abstract] OR medication[Title/Abstract] OR therapy[Title/Abstract])
-AND (clinical trial[Publication Type] OR randomized[Title/Abstract])
-```
-**Root Cause:** No query preprocessing or medical term expansion.
-**Fix Options:**
-1. **Add query preprocessor** - Extract medical entities, expand synonyms
-2. **Use MeSH terms** - PubMed's controlled vocabulary for better recall
-3. **LLM query generation** - Use LLM to generate optimized PubMed query
----
-## P0-004: Loop Terminates Too Early
-**File:** `src/app.py:42-45` and `src/utils/models.py`
-**The Problem:**
-```python
-config = OrchestratorConfig(
-    max_iterations=5,
-    max_results_per_tool=10,
-)
-```
-5 iterations is not enough to:
-1. Search multiple variations of the query
-2. Gather enough evidence for the Judge to synthesize
-3. Refine queries based on initial results
-**Evidence:** The user's output shows "Max Iterations Reached" with only 6 sources.
-**Root Cause:** Conservative defaults to avoid API costs, but makes app useless.
-**Fix Options:**
-1. **Increase default to 10-15** - More iterations = better results
-2. **Dynamic termination** - Stop when confidence > threshold, not iteration count
-3. **Parallel query expansion** - Run more queries per iteration
----
-## P0-005: No Query Understanding Layer
-**Files:** `src/orchestrator.py`, `src/tools/search_handler.py`
-**The Problem:**
-There's no NLU (Natural Language Understanding) layer. The system:
-1. Takes raw user query
-2. Passes directly to search tools
-3. No entity extraction
-4. No intent classification
-5. No query expansion
-For drug repurposing, you need to extract:
-- **Disease:** "Long COVID" → [Long COVID, PASC, Post-COVID syndrome, chronic COVID]
-- **Drug intent:** "medications" → [drugs, treatments, therapeutics, interventions]
-- **Evidence type:** "show promise" → [clinical trials, efficacy, RCT]
-**Root Cause:** No preprocessing pipeline between user input and search execution.
-**Fix Options:**
-1. **Add entity extraction** - Use BioBERT or PubMedBERT for medical NER
-2. **Add query expansion** - Use medical ontologies (UMLS, MeSH)
-3. **LLM preprocessing** - Use LLM to generate search strategy before searching
----
-## P0-006: ClinicalTrials.gov Results Not Filtered
-**File:** `src/tools/clinicaltrials.py`
-**The Problem:**
-ClinicalTrials.gov returns ALL matching trials including:
-- Withdrawn trials
-- Terminated trials
-- Not yet recruiting
-- Observational studies (not interventional)
-For drug repurposing, you want:
-- Interventional studies
-- Phase 2+ (has safety/efficacy data)
-- Completed or with results
-**Root Cause:** No filtering of trial metadata.
----
-## Summary: Why This App Produces Garbage
-```
-User Query: "What medications show promise for Long COVID?"
-    │
-    ▼
-┌─────────────────────────────────────────────────────────────┐
-│ NO QUERY PREPROCESSING                                       │
-│ - No entity extraction                                       │
-│ - No synonym expansion                                       │
-│ - No medical term normalization                              │
-└─────────────────────────────────────────────────────────────┘
-    │
-    ▼
-┌─────────────────────────────────────────────────────────────┐
-│ BROKEN SEARCH LAYER                                          │
-│ - PubMed: Raw query, no MeSH, gets 1 result                 │
-│ - BioRxiv: Returns random papers (API doesn't support search)│
-│ - ClinicalTrials: Returns all trials, no filtering          │
-└─────────────────────────────────────────────────────────────┘
-    │
-    ▼
-┌─────────────────────────────────────────────────────────────┐
-│ GARBAGE EVIDENCE                                             │
-│ - 6 papers, most irrelevant                                  │
-│ - "Calf muscle adaptations" (mentions COVID once)            │
-│ - "Ophthalmologist work-life balance"                        │
-└─────────────────────────────────────────────────────────────┘
-    │
-    ▼
-┌─────────────────────────────────────────────────────────────┐
-│ DUMB JUDGE (Free Tier)                                       │
-│ - Llama 8B can't identify drugs from garbage                 │
-│ - JSON parsing fails                                         │
-│ - Falls back to "Drug identification requires AI analysis"   │
-└─────────────────────────────────────────────────────────────┘
-    │
-    ▼
-┌─────────────────────────────────────────────────────────────┐
-│ LOOP HITS MAX (5 iterations)                                 │
-│ - Never finds enough good evidence                           │
-│ - Never synthesizes anything useful                          │
-└─────────────────────────────────────────────────────────────┘
-    │
-    ▼
-    GARBAGE OUTPUT
-```
----
-## What Would Make This Actually Work
-### Minimum Viable Fix (1-2 days)
-1. **Remove BioRxiv** - It doesn't work
-2. **Require API key** - Be honest that free tier is useless
-3. **Add basic query preprocessing** - Strip question words, expand COVID synonyms
-4. **Increase iterations to 10**
-### Proper Fix (1-2 weeks)
-1. **Query Understanding Layer**
-   - Medical NER (BioBERT/SciBERT)
-   - Query expansion with MeSH/UMLS
-   - Intent classification (drug discovery vs mechanism vs safety)
-2. **Optimized Search**
-   - PubMed: Proper query syntax with MeSH terms
-   - ClinicalTrials: Filter by phase, status, intervention type
-   - Replace BioRxiv with Europe PMC (has preprints + search)
-3. **Evidence Ranking**
-   - Score by publication type (RCT > cohort > case report)
-   - Score by journal impact factor
-   - Score by recency
-   - Score by citation count
-4. **Proper LLM Pipeline**
-   - Use GPT-4 / Claude for synthesis
-   - Structured extraction of: drug, mechanism, evidence level, effect size
-   - Multi-step reasoning: identify → validate → rank → synthesize
----
-## The Hard Truth
-Building a drug repurposing agent that works is HARD. The state of the art is:
-- **Drug2Disease (IBM)** - Uses knowledge graphs + ML
-- **COVID-KG (Stanford)** - Dedicated COVID knowledge graph
-- **Literature Mining at scale (PubMed)** - Millions of papers, not 10
-This hackathon project is fundamentally a **search wrapper with an LLM prompt**. That's not enough.
-To make it useful:
-1. Either scope it down (e.g., "find clinical trials for X disease")
-2. Or invest serious engineering in the NLU + search + ranking pipeline

docs/bugs/P0_MAGENTIC_AND_SEARCH_AUDIT.md DELETED Viewed

@@ -1,249 +0,0 @@
-# P0 Audit: Microsoft Agent Framework (Magentic) & Search Tools
-**Date:** November 27, 2025
-**Auditor:** Claude Code
-**Status:** VERIFIED
----
-## TL;DR
-| Component | Status | Verdict |
-|-----------|--------|---------|
-| Microsoft Agent Framework | ✅ WORKING | Correctly wired, no bugs |
-| GPT-5.1 Model Config | ✅ CORRECT | Using `gpt-5.1` as configured |
-| Search Tools | ❌ BROKEN | Root cause of garbage results |
-**The orchestration framework is fine. The search layer is garbage.**
----
-## Microsoft Agent Framework Verification
-### Import Test: PASSED
-```python
-from agent_framework import MagenticBuilder, ChatAgent
-from agent_framework.openai import OpenAIChatClient
-# All imports successful
-```
-### Agent Creation Test: PASSED
-```python
-from src.agents.magentic_agents import create_search_agent
-search_agent = create_search_agent()
-# SearchAgent created: SearchAgent
-# Description: Searches biomedical databases (PubMed, ClinicalTrials.gov, bioRxiv)
-```
-### Workflow Build Test: PASSED
-```python
-from src.orchestrator_magentic import MagenticOrchestrator
-orchestrator = MagenticOrchestrator(max_rounds=2)
-workflow = orchestrator._build_workflow()
-# Workflow built successfully: <class 'agent_framework._workflows._workflow.Workflow'>
-```
-### Model Configuration: CORRECT
-```python
-settings.openai_model = "gpt-5.1"  # ✅ Using GPT-5.1, not GPT-4o
-settings.openai_api_key = True     # ✅ API key is set
-```
----
-## What Magentic Provides (Working)
-1. **Multi-Agent Coordination**
-   - Manager agent orchestrates SearchAgent, JudgeAgent, HypothesisAgent, ReportAgent
-   - Uses `MagenticBuilder().with_standard_manager()` for coordination
-2. **ChatAgent Pattern**
-   - Each agent has internal LLM (GPT-5.1)
-   - Can call tools via `@ai_function` decorator
-   - Has proper instructions for domain-specific tasks
-3. **Workflow Streaming**
-   - Events: `MagenticAgentMessageEvent`, `MagenticFinalResultEvent`, etc.
-   - Real-time UI updates via `workflow.run_stream(task)`
-4. **State Management**
-   - `MagenticState` persists evidence across agents
-   - `get_bibliography()` tool for ReportAgent
----
-## What's Actually Broken: The Search Tools
-### File: `src/agents/tools.py`
-The Magentic agents call these tools:
-- `search_pubmed` → Uses `PubMedTool`
-- `search_clinical_trials` → Uses `ClinicalTrialsTool`
-- `search_preprints` → Uses `BioRxivTool`
-**These tools are the problem, not the framework.**
----
-## Search Tool Bugs (Detailed)
-### BUG 1: BioRxiv API Does Not Support Search
-**File:** `src/tools/biorxiv.py:248-286`
-```python
-# This fetches the FIRST 100 papers from the last 90 days
-# It does NOT search by keyword - the API doesn't support that
-url = f"{self.BASE_URL}/{self.server}/{interval}/0/json"
-# Then filters client-side for keywords
-matching = self._filter_by_keywords(papers, query_terms, max_results)
-```
-**Problem:**
-- Fetches 100 random chronological papers
-- Filters for ANY keyword match in title/abstract
-- "Long COVID medications" returns papers about "calf muscles" because they mention "COVID" once
-**Fix:** Remove BioRxiv or use Europe PMC (which has actual search)
----
-### BUG 2: PubMed Query Not Optimized
-**File:** `src/tools/pubmed.py:54-71`
-```python
-search_params = self._build_params(
-    db="pubmed",
-    term=query,  # RAW USER QUERY - no preprocessing!
-    retmax=max_results,
-    sort="relevance",
-)
-```
-**Problem:**
-- User enters: "What medications show promise for Long COVID?"
-- PubMed receives: `What medications show promise for Long COVID?`
-- Should receive: `("long covid"[Title/Abstract] OR "PASC"[Title/Abstract]) AND (treatment[Title/Abstract] OR drug[Title/Abstract])`
-**Fix:** Add query preprocessing:
-1. Strip question words (what, which, how, etc.)
-2. Expand medical synonyms (Long COVID → PASC, Post-COVID)
-3. Use MeSH terms for better recall
----
-### BUG 3: ClinicalTrials.gov No Filtering
-**File:** `src/tools/clinicaltrials.py`
-Returns ALL trials including:
-- Withdrawn trials
-- Terminated trials
-- Observational studies (not drug interventions)
-- Phase 1 (no efficacy data)
-**Fix:** Filter by:
-- `studyType=INTERVENTIONAL`
-- `phase=PHASE2,PHASE3,PHASE4`
-- `status=COMPLETED,ACTIVE_NOT_RECRUITING,RECRUITING`
----
-## Evidence: Garbage In → Garbage Out
-When the Magentic SearchAgent calls these tools:
-```
-SearchAgent: "Find evidence for Long COVID medications"
-    │
-    ▼
-search_pubmed("Long COVID medications")
-    → Returns 1 semi-relevant paper (raw query hits)
-search_preprints("Long COVID medications")
-    → Returns garbage (BioRxiv API doesn't search)
-    → "Calf muscle adaptations" (has "COVID" somewhere)
-    → "Ophthalmologist work-life balance" (mentions COVID)
-search_clinical_trials("Long COVID medications")
-    → Returns all trials, no filtering
-    │
-    ▼
-JudgeAgent receives garbage evidence
-    │
-    ▼
-HypothesisAgent can't generate good hypotheses from garbage
-    │
-    ▼
-ReportAgent produces garbage report
-```
-**The framework is doing its job. It's orchestrating agents correctly. But the agents are being fed garbage data.**
----
-## Recommended Fixes
-### Priority 1: Delete or Fix BioRxiv (30 min)
-**Option A: Delete it**
-```python
-# In src/agents/tools.py, remove:
-# from src.tools.biorxiv import BioRxivTool
-# _biorxiv = BioRxivTool()
-# @ai_function search_preprints(...)
-```
-**Option B: Replace with Europe PMC**
-Europe PMC has preprints AND proper search API:
-```
-https://www.ebi.ac.uk/europepmc/webservices/rest/search?query=long+covid+treatment&format=json
-```
-### Priority 2: Fix PubMed Query (1 hour)
-Add query preprocessor:
-```python
-def preprocess_query(raw_query: str) -> str:
-    """Convert natural language to PubMed query syntax."""
-    # Strip question words
-    # Expand medical synonyms
-    # Add field tags [Title/Abstract]
-    # Return optimized query
-```
-### Priority 3: Filter ClinicalTrials (30 min)
-Add parameters to API call:
-```python
-params = {
-    "query.term": query,
-    "filter.overallStatus": "COMPLETED,RECRUITING",
-    "filter.studyType": "INTERVENTIONAL",
-    "pageSize": max_results,
-}
-```
----
-## Conclusion
-**Microsoft Agent Framework: NO BUGS FOUND**
-- Imports work ✅
-- Agent creation works ✅
-- Workflow building works ✅
-- Model config correct (GPT-5.1) ✅
-- Streaming events work ✅
-**Search Tools: CRITICALLY BROKEN**
-- BioRxiv: API doesn't support search (fundamental)
-- PubMed: No query optimization (fixable)
-- ClinicalTrials: No filtering (fixable)
-**Recommendation:**
-1. Delete BioRxiv immediately (unusable)
-2. Add PubMed query preprocessing
-3. Add ClinicalTrials filtering
-4. Then the Magentic multi-agent system will work as designed

docs/bugs/P0_MAGENTIC_MODE_BROKEN.md ADDED Viewed

	@@ -0,0 +1,116 @@

+# P0 Bug: Magentic Mode Returns ChatMessage Object Instead of Report Text
+**Status**: OPEN
+**Priority**: P0 (Critical)
+**Date**: 2025-11-27
+---
+## Actual Bug Found (Not What We Thought)
+**The OpenAI key works fine.** The real bug is different:
+### The Problem
+When Magentic mode completes, the final report returns a `ChatMessage` object instead of the actual text:
+```
+FINAL REPORT:
+<agent_framework._types.ChatMessage object at 0x11db70310>
+```
+### Evidence
+Full test output shows:
+1. Magentic orchestrator starts correctly
+2. SearchAgent finds evidence
+3. HypothesisAgent generates hypotheses
+4. JudgeAgent evaluates
+5. **BUT**: Final output is `ChatMessage` object, not text
+### Root Cause
+In `src/orchestrator_magentic.py` line 193:
+```python
+elif isinstance(event, MagenticFinalResultEvent):
+    text = event.message.text if event.message else "No result"
+```
+The `event.message` is a `ChatMessage` object, and `.text` may not extract the content correctly, or the message structure changed in the agent-framework library.
+---
+## Secondary Issue: Max Rounds Reached
+The orchestrator hits max rounds before producing a report:
+```
+[ERROR] Magentic Orchestrator: Max round count reached
+```
+This means the workflow times out before the ReportAgent synthesizes the final output.
+---
+## What Works
+- OpenAI API key: **Works** (loaded from .env)
+- SearchAgent: **Works** (finds evidence from PubMed, ClinicalTrials, Europe PMC)
+- HypothesisAgent: **Works** (generates Drug -> Target -> Pathway chains)
+- JudgeAgent: **Partial** (evaluates but sometimes loses context)
+---
+## Files to Fix
+| File | Line | Issue |
+|------|------|-------|
+| `src/orchestrator_magentic.py` | 193 | `event.message.text` returns object, not string |
+| `src/orchestrator_magentic.py` | 97-99 | `max_round_count=3` too low for full pipeline |
+---
+## Suggested Fix
+```python
+# In _process_event, line 192-199
+elif isinstance(event, MagenticFinalResultEvent):
+    # Handle ChatMessage object properly
+    if event.message:
+        if hasattr(event.message, 'content'):
+            text = event.message.content
+        elif hasattr(event.message, 'text'):
+            text = event.message.text
+        else:
+            text = str(event.message)
+    else:
+        text = "No result"
+```
+And increase rounds:
+```python
+# In _build_workflow, line 97
+max_round_count=self._max_rounds,  # Use configured value, default 10
+```
+---
+## Test Command
+```bash
+set -a && source .env && set +a && uv run python examples/orchestrator_demo/run_magentic.py "metformin alzheimer"
+```
+---
+## Simple Mode Works
+For reference, simple mode produces full reports:
+```bash
+uv run python examples/orchestrator_demo/run_agent.py "metformin alzheimer"
+```
+Output includes structured report with Drug Candidates, Key Findings, etc.

docs/bugs/P1_GRADIO_SETTINGS_CLEANUP.md ADDED Viewed

	@@ -0,0 +1,81 @@

+# P1 Bug: Gradio Settings Accordion Not Collapsing
+**Priority**: P1 (UX Bug)
+**Status**: OPEN
+**Date**: 2025-11-27
+**Target Component**: `src/app.py`
+---
+## 1. Problem Description
+The "Settings" accordion in the Gradio UI (containing Orchestrator Mode, API Key, Provider) fails to collapse, even when configured with `open=False`. It remains permanently expanded, cluttering the interface and obscuring the chat history.
+### Symptoms
+- Accordion arrow toggles visually, but content remains visible.
+- Occurs in both local development (`uv run src/app.py`) and HuggingFace Spaces.
+---
+## 2. Root Cause Analysis
+**Definitive Cause**: Nested `Blocks` Context Bug.
+`gr.ChatInterface` is itself a high-level abstraction that creates a `gr.Blocks` context. Wrapping `gr.ChatInterface` inside an external `with gr.Blocks():` context causes event listener conflicts, specifically breaking the JavaScript state management for `additional_inputs_accordion`.
+**Reference**: [Gradio Issue #8861](https://github.com/gradio-app/gradio/issues/8861) confirms that `additional_inputs_accordion` malfunctions when `ChatInterface` is not the top-level block.
+---
+## 3. Solution Strategy: "The Unwrap Fix"
+We will remove the redundant `gr.Blocks` wrapper. This restores the native behavior of `ChatInterface`, ensuring the accordion respects `open=False`.
+### Implementation Plan
+**Refactor `src/app.py` / `create_demo()`**:
+1.  **Remove** the `with gr.Blocks() as demo:` context manager.
+2.  **Instantiate** `gr.ChatInterface` directly as the `demo` object.
+3.  **Migrate UI Elements**:
+    *   **Header**: Move the H1/Title text into the `title` parameter of `ChatInterface`.
+    *   **Footer**: Move the footer text ("MCP Server Active...") into the `description` parameter. `ChatInterface` supports Markdown in `description`, making it the ideal place for static info below the title but above the chat.
+### Before (Buggy)
+```python
+def create_demo():
+    with gr.Blocks() as demo:  # <--- CAUSE OF BUG
+        gr.Markdown("# Title")
+        gr.ChatInterface(..., additional_inputs_accordion=gr.Accordion(open=False))
+        gr.Markdown("Footer")
+    return demo
+```
+### After (Correct)
+```python
+def create_demo():
+    return gr.ChatInterface(   # <--- FIX: Top-level component
+        ...,
+        title="🧬 DeepCritical",
+        description="*AI-Powered Drug Repurposing Agent...*\n\n---\n**MCP Server Active**...",
+        additional_inputs_accordion=gr.Accordion(label="⚙️ Settings", open=False)
+    )
+```
+---
+## 4. Validation
+1.  **Run**: `uv run python src/app.py`
+2.  **Check**: Open `http://localhost:7860`
+3.  **Verify**:
+    *   Settings accordion starts **COLLAPSED**.
+    *   Header title ("DeepCritical") is visible.
+    *   Footer text ("MCP Server Active") is visible in the description area.
+    *   Chat functionality works (Magentic/Simple modes).
+---
+## 5. Constraints & Notes
+- **Layout**: We lose the ability to place arbitrary elements *below* the chat box (footer will move to top, under title), but this is an acceptable trade-off for a working UI.
+- **CSS**: `ChatInterface` handles its own CSS; any custom class styling from the previous footer will be standardized to the description text style.

docs/bugs/PHASE_00_IMPLEMENTATION_ORDER.md DELETED Viewed

@@ -1,156 +0,0 @@
-# Phase 00: Implementation Order & Summary
-**Total Effort:** 5-8 hours
-**Parallelizable:** Yes (all 3 phases are independent)
----
-## Executive Summary
-The DeepCritical drug repurposing agent produces garbage results because the search tools are broken:
-| Tool | Problem | Fix |
-|------|---------|-----|
-| BioRxiv | API doesn't support search | Replace with Europe PMC |
-| PubMed | Raw queries, no preprocessing | Add query cleaner |
-| ClinicalTrials | No filtering | Add status/type filters |
-**The Microsoft Agent Framework (Magentic) is working correctly.** The orchestration layer is fine. The data layer is broken.
----
-## Phase Specs
-| Phase | Title | Effort | Priority | Dependencies |
-|-------|-------|--------|----------|--------------|
-| **01** | [Replace BioRxiv with Europe PMC](./PHASE_01_REPLACE_BIORXIV.md) | 2-3 hrs | P0 | None |
-| **02** | [PubMed Query Preprocessing](./PHASE_02_PUBMED_QUERY_PREPROCESSING.md) | 2-3 hrs | P0 | None |
-| **03** | [ClinicalTrials Filtering](./PHASE_03_CLINICALTRIALS_FILTERING.md) | 1-2 hrs | P1 | None |
----
-## Recommended Execution Order
-Since all phases are independent, they can be done in parallel by different developers.
-**If doing sequentially, order by impact:**
-1. **Phase 01** - BioRxiv is completely broken (returns random papers)
-2. **Phase 02** - PubMed is partially broken (returns suboptimal results)
-3. **Phase 03** - ClinicalTrials returns too much noise
----
-## TDD Workflow (Per Phase)
-```
-1. Write failing tests
-2. Run tests (confirm they fail)
-3. Implement fix
-4. Run tests (confirm they pass)
-5. Run ALL tests (confirm no regressions)
-6. Manual verification
-7. Commit
-```
----
-## Verification After All Phases
-After completing all 3 phases, run this integration test:
-```bash
-# Full system test
-uv run python -c "
-import asyncio
-from src.tools.europepmc import EuropePMCTool
-from src.tools.pubmed import PubMedTool
-from src.tools.clinicaltrials import ClinicalTrialsTool
-async def test_all():
-    query = 'long covid treatment'
-    print('=== Europe PMC (Preprints) ===')
-    epmc = EuropePMCTool()
-    results = await epmc.search(query, 2)
-    for r in results:
-        print(f'  - {r.citation.title[:60]}...')
-    print()
-    print('=== PubMed ===')
-    pm = PubMedTool()
-    results = await pm.search(query, 2)
-    for r in results:
-        print(f'  - {r.citation.title[:60]}...')
-    print()
-    print('=== ClinicalTrials.gov ===')
-    ct = ClinicalTrialsTool()
-    results = await ct.search(query, 2)
-    for r in results:
-        print(f'  - {r.citation.title[:60]}...')
-asyncio.run(test_all())
-"
-```
-**Expected:** All results should be relevant to "long covid treatment"
----
-## Test Magentic Integration
-After all phases are complete, test the full Magentic workflow:
-```bash
-# Test Magentic mode (requires OPENAI_API_KEY)
-uv run python -c "
-import asyncio
-from src.orchestrator_magentic import MagenticOrchestrator
-async def test_magentic():
-    orchestrator = MagenticOrchestrator(max_rounds=3)
-    print('Running Magentic workflow...')
-    async for event in orchestrator.run('What drugs show promise for Long COVID?'):
-        print(f'[{event.type}] {event.message[:100]}...')
-asyncio.run(test_magentic())
-"
-```
----
-## Files Changed (All Phases)
-| File | Phase | Action |
-|------|-------|--------|
-| `src/tools/europepmc.py` | 01 | CREATE |
-| `tests/unit/tools/test_europepmc.py` | 01 | CREATE |
-| `src/agents/tools.py` | 01 | MODIFY |
-| `src/tools/search_handler.py` | 01 | MODIFY |
-| `src/tools/biorxiv.py` | 01 | DELETE |
-| `tests/unit/tools/test_biorxiv.py` | 01 | DELETE |
-| `src/tools/query_utils.py` | 02 | CREATE |
-| `tests/unit/tools/test_query_utils.py` | 02 | CREATE |
-| `src/tools/pubmed.py` | 02 | MODIFY |
-| `src/tools/clinicaltrials.py` | 03 | MODIFY |
-| `tests/unit/tools/test_clinicaltrials.py` | 03 | MODIFY |
----
-## Success Criteria (Overall)
-- [ ] All unit tests pass
-- [ ] All integration tests pass (real APIs)
-- [ ] Query "What drugs show promise for Long COVID?" returns relevant results from all 3 sources
-- [ ] Magentic workflow produces a coherent research report
-- [ ] No regressions in existing functionality
----
-## Related Documentation
-- [P0 Critical Bugs](./P0_CRITICAL_BUGS.md) - Root cause analysis
-- [P0 Magentic Audit](./P0_MAGENTIC_AND_SEARCH_AUDIT.md) - Framework verification
-- [P0 Actionable Fixes](./P0_ACTIONABLE_FIXES.md) - Fix summaries

docs/bugs/PHASE_01_REPLACE_BIORXIV.md DELETED Viewed

@@ -1,371 +0,0 @@
-# Phase 01: Replace BioRxiv with Europe PMC
-**Priority:** P0 - Critical
-**Effort:** 2-3 hours
-**Dependencies:** None
----
-## Problem Statement
-The BioRxiv API does not support keyword search. It only returns papers by date range, resulting in completely irrelevant results for any query.
-## Success Criteria
-- [ ] `search_preprints("long covid treatment")` returns papers actually about Long COVID
-- [ ] All existing tests pass
-- [ ] New tests cover Europe PMC integration
----
-## TDD Implementation Order
-### Step 1: Write Failing Test
-**File:** `tests/unit/tools/test_europepmc.py`
-```python
-"""Unit tests for Europe PMC tool."""
-import pytest
-from unittest.mock import AsyncMock, patch
-from src.tools.europepmc import EuropePMCTool
-from src.utils.models import Evidence
-@pytest.mark.unit
-class TestEuropePMCTool:
-    """Tests for EuropePMCTool."""
-    @pytest.fixture
-    def tool(self):
-        return EuropePMCTool()
-    def test_tool_name(self, tool):
-        assert tool.name == "europepmc"
-    @pytest.mark.asyncio
-    async def test_search_returns_evidence(self, tool):
-        """Test that search returns Evidence objects."""
-        mock_response = {
-            "resultList": {
-                "result": [
-                    {
-                        "id": "12345",
-                        "title": "Long COVID Treatment Study",
-                        "abstractText": "This study examines treatments for Long COVID.",
-                        "doi": "10.1234/test",
-                        "pubYear": "2024",
-                        "source": "MED",
-                        "pubTypeList": {"pubType": ["research-article"]},
-                    }
-                ]
-            }
-        }
-        with patch("httpx.AsyncClient") as mock_client:
-            mock_instance = AsyncMock()
-            mock_client.return_value.__aenter__.return_value = mock_instance
-            mock_instance.get.return_value.json.return_value = mock_response
-            mock_instance.get.return_value.raise_for_status = lambda: None
-            results = await tool.search("long covid treatment", max_results=5)
-            assert len(results) == 1
-            assert isinstance(results[0], Evidence)
-            assert "Long COVID Treatment Study" in results[0].citation.title
-    @pytest.mark.asyncio
-    async def test_search_marks_preprints(self, tool):
-        """Test that preprints are marked correctly."""
-        mock_response = {
-            "resultList": {
-                "result": [
-                    {
-                        "id": "PPR12345",
-                        "title": "Preprint Study",
-                        "abstractText": "Abstract text",
-                        "doi": "10.1234/preprint",
-                        "pubYear": "2024",
-                        "source": "PPR",
-                        "pubTypeList": {"pubType": ["Preprint"]},
-                    }
-                ]
-            }
-        }
-        with patch("httpx.AsyncClient") as mock_client:
-            mock_instance = AsyncMock()
-            mock_client.return_value.__aenter__.return_value = mock_instance
-            mock_instance.get.return_value.json.return_value = mock_response
-            mock_instance.get.return_value.raise_for_status = lambda: None
-            results = await tool.search("test", max_results=5)
-            assert "[PREPRINT]" in results[0].content
-            assert results[0].citation.source == "preprint"
-    @pytest.mark.asyncio
-    async def test_search_empty_results(self, tool):
-        """Test handling of empty results."""
-        mock_response = {"resultList": {"result": []}}
-        with patch("httpx.AsyncClient") as mock_client:
-            mock_instance = AsyncMock()
-            mock_client.return_value.__aenter__.return_value = mock_instance
-            mock_instance.get.return_value.json.return_value = mock_response
-            mock_instance.get.return_value.raise_for_status = lambda: None
-            results = await tool.search("nonexistent query xyz", max_results=5)
-            assert results == []
-@pytest.mark.integration
-class TestEuropePMCIntegration:
-    """Integration tests with real API."""
-    @pytest.mark.asyncio
-    async def test_real_api_call(self):
-        """Test actual API returns relevant results."""
-        tool = EuropePMCTool()
-        results = await tool.search("long covid treatment", max_results=3)
-        assert len(results) > 0
-        # At least one result should mention COVID
-        titles = " ".join([r.citation.title.lower() for r in results])
-        assert "covid" in titles or "sars" in titles
-```
-### Step 2: Implement Europe PMC Tool
-**File:** `src/tools/europepmc.py`
-```python
-"""Europe PMC search tool - replaces BioRxiv."""
-from typing import Any
-import httpx
-from tenacity import retry, stop_after_attempt, wait_exponential
-from src.utils.exceptions import SearchError
-from src.utils.models import Citation, Evidence
-class EuropePMCTool:
-    """
-    Search Europe PMC for papers and preprints.
-    Europe PMC indexes:
-    - PubMed/MEDLINE articles
-    - PMC full-text articles
-    - Preprints from bioRxiv, medRxiv, ChemRxiv, etc.
-    - Patents and clinical guidelines
-    API Docs: https://europepmc.org/RestfulWebService
-    """
-    BASE_URL = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"
-    @property
-    def name(self) -> str:
-        return "europepmc"
-    @retry(
-        stop=stop_after_attempt(3),
-        wait=wait_exponential(multiplier=1, min=1, max=10),
-        reraise=True,
-    )
-    async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
-        """
-        Search Europe PMC for papers matching query.
-        Args:
-            query: Search keywords
-            max_results: Maximum results to return
-        Returns:
-            List of Evidence objects
-        """
-        params = {
-            "query": query,
-            "resultType": "core",
-            "pageSize": min(max_results, 100),
-            "format": "json",
-        }
-        async with httpx.AsyncClient(timeout=30.0) as client:
-            try:
-                response = await client.get(self.BASE_URL, params=params)
-                response.raise_for_status()
-                data = response.json()
-                results = data.get("resultList", {}).get("result", [])
-                return [self._to_evidence(r) for r in results[:max_results]]
-            except httpx.HTTPStatusError as e:
-                raise SearchError(f"Europe PMC API error: {e}") from e
-            except httpx.RequestError as e:
-                raise SearchError(f"Europe PMC connection failed: {e}") from e
-    def _to_evidence(self, result: dict[str, Any]) -> Evidence:
-        """Convert Europe PMC result to Evidence."""
-        title = result.get("title", "Untitled")
-        abstract = result.get("abstractText", "No abstract available.")
-        doi = result.get("doi", "")
-        pub_year = result.get("pubYear", "Unknown")
-        # Get authors
-        author_list = result.get("authorList", {}).get("author", [])
-        authors = [a.get("fullName", "") for a in author_list[:5] if a.get("fullName")]
-        # Check if preprint
-        pub_types = result.get("pubTypeList", {}).get("pubType", [])
-        is_preprint = "Preprint" in pub_types
-        source_db = result.get("source", "europepmc")
-        # Build content
-        preprint_marker = "[PREPRINT - Not peer-reviewed] " if is_preprint else ""
-        content = f"{preprint_marker}{abstract[:1800]}"
-        # Build URL
-        if doi:
-            url = f"https://doi.org/{doi}"
-        elif result.get("pmid"):
-            url = f"https://pubmed.ncbi.nlm.nih.gov/{result['pmid']}/"
-        else:
-            url = f"https://europepmc.org/article/{source_db}/{result.get('id', '')}"
-        return Evidence(
-            content=content[:2000],
-            citation=Citation(
-                source="preprint" if is_preprint else "europepmc",
-                title=title[:500],
-                url=url,
-                date=str(pub_year),
-                authors=authors,
-            ),
-            relevance=0.75 if is_preprint else 0.9,
-        )
-```
-### Step 3: Update Magentic Tools
-**File:** `src/agents/tools.py` - Replace biorxiv import:
-```python
-# REMOVE:
-# from src.tools.biorxiv import BioRxivTool
-# _biorxiv = BioRxivTool()
-# ADD:
-from src.tools.europepmc import EuropePMCTool
-_europepmc = EuropePMCTool()
-# UPDATE search_preprints function:
-@ai_function
-async def search_preprints(query: str, max_results: int = 10) -> str:
-    """Search Europe PMC for preprints and papers.
-    Use this tool to find the latest research including preprints
-    from bioRxiv, medRxiv, and peer-reviewed papers.
-    Args:
-        query: Search terms (e.g., "long covid treatment")
-        max_results: Maximum results to return (default 10)
-    Returns:
-        Formatted list of papers with abstracts and links
-    """
-    state = get_magentic_state()
-    results = await _europepmc.search(query, max_results)
-    if not results:
-        return f"No papers found for: {query}"
-    new_count = state.add_evidence(results)
-    output = [f"Found {len(results)} papers ({new_count} new stored):\n"]
-    for i, r in enumerate(results[:max_results], 1):
-        title = r.citation.title
-        date = r.citation.date
-        source = r.citation.source
-        content_clean = r.content[:300].replace("\n", " ")
-        url = r.citation.url
-        output.append(f"{i}. **{title}**")
-        output.append(f"   Source: {source} | Date: {date}")
-        output.append(f"   {content_clean}...")
-        output.append(f"   URL: {url}\n")
-    return "\n".join(output)
-```
-### Step 4: Update Search Handler (Simple Mode)
-**File:** `src/tools/search_handler.py` - Update imports:
-```python
-# REMOVE:
-# from src.tools.biorxiv import BioRxivTool
-# ADD:
-from src.tools.europepmc import EuropePMCTool
-```
-### Step 5: Delete Old BioRxiv Tests
-```bash
-# After all new tests pass:
-rm tests/unit/tools/test_biorxiv.py
-```
----
-## Verification
-```bash
-# Run new tests
-uv run pytest tests/unit/tools/test_europepmc.py -v
-# Run integration test (real API)
-uv run pytest tests/unit/tools/test_europepmc.py::TestEuropePMCIntegration -v
-# Run all tests to ensure no regressions
-uv run pytest tests/unit/ -v
-# Manual verification
-uv run python -c "
-import asyncio
-from src.tools.europepmc import EuropePMCTool
-tool = EuropePMCTool()
-results = asyncio.run(tool.search('long covid treatment', 3))
-for r in results:
-    print(f'- {r.citation.title}')
-"
-```
----
-## Files Changed
-| File | Action |
-|------|--------|
-| `src/tools/europepmc.py` | CREATE |
-| `tests/unit/tools/test_europepmc.py` | CREATE |
-| `src/agents/tools.py` | MODIFY (replace biorxiv import) |
-| `src/tools/search_handler.py` | MODIFY (replace biorxiv import) |
-| `src/tools/biorxiv.py` | DELETE (after verification) |
-| `tests/unit/tools/test_biorxiv.py` | DELETE (after verification) |
----
-## Rollback Plan
-If issues arise:
-1. Revert `src/agents/tools.py` to use BioRxivTool
-2. Revert `src/tools/search_handler.py`
-3. Keep `europepmc.py` for future use

docs/bugs/PHASE_02_PUBMED_QUERY_PREPROCESSING.md DELETED Viewed

@@ -1,355 +0,0 @@
-# Phase 02: PubMed Query Preprocessing
-**Priority:** P0 - Critical
-**Effort:** 2-3 hours
-**Dependencies:** None (can run parallel with Phase 01)
----
-## Problem Statement
-PubMed receives raw natural language queries like "What medications show promise for Long COVID?" which include question words that pollute search results.
-## Success Criteria
-- [ ] Question words stripped from queries
-- [ ] Medical synonyms expanded (Long COVID → PASC, etc.)
-- [ ] Relevant results returned for natural language questions
-- [ ] All existing tests pass
-- [ ] New tests cover query preprocessing
----
-## TDD Implementation Order
-### Step 1: Write Failing Tests
-**File:** `tests/unit/tools/test_query_utils.py`
-```python
-"""Unit tests for query preprocessing utilities."""
-import pytest
-from src.tools.query_utils import preprocess_query, expand_synonyms, strip_question_words
-@pytest.mark.unit
-class TestQueryPreprocessing:
-    """Tests for query preprocessing."""
-    def test_strip_question_words(self):
-        """Test removal of question words."""
-        assert strip_question_words("What drugs treat cancer") == "drugs treat cancer"
-        assert strip_question_words("Which medications help diabetes") == "medications diabetes"
-        assert strip_question_words("How can we cure alzheimer") == "cure alzheimer"
-        assert strip_question_words("Is metformin effective") == "metformin effective"
-    def test_strip_preserves_medical_terms(self):
-        """Test that medical terms are preserved."""
-        result = strip_question_words("What is the mechanism of metformin")
-        assert "metformin" in result
-        assert "mechanism" in result
-    def test_expand_synonyms_long_covid(self):
-        """Test Long COVID synonym expansion."""
-        result = expand_synonyms("long covid treatment")
-        assert "PASC" in result or "post-COVID" in result
-    def test_expand_synonyms_alzheimer(self):
-        """Test Alzheimer's synonym expansion."""
-        result = expand_synonyms("alzheimer drug")
-        assert "Alzheimer" in result
-    def test_expand_synonyms_preserves_unknown(self):
-        """Test that unknown terms are preserved."""
-        result = expand_synonyms("metformin diabetes")
-        assert "metformin" in result
-        assert "diabetes" in result
-    def test_preprocess_query_full_pipeline(self):
-        """Test complete preprocessing pipeline."""
-        raw = "What medications show promise for Long COVID?"
-        result = preprocess_query(raw)
-        # Should not contain question words
-        assert "what" not in result.lower()
-        assert "show" not in result.lower()
-        assert "promise" not in result.lower()
-        # Should contain expanded terms
-        assert "PASC" in result or "post-COVID" in result or "long covid" in result.lower()
-        assert "medications" in result.lower() or "drug" in result.lower()
-    def test_preprocess_query_removes_punctuation(self):
-        """Test that question marks are removed."""
-        result = preprocess_query("Is metformin safe?")
-        assert "?" not in result
-    def test_preprocess_query_handles_empty(self):
-        """Test handling of empty/whitespace queries."""
-        assert preprocess_query("") == ""
-        assert preprocess_query("   ") == ""
-    def test_preprocess_query_already_clean(self):
-        """Test that clean queries pass through."""
-        clean = "metformin diabetes mechanism"
-        result = preprocess_query(clean)
-        assert "metformin" in result
-        assert "diabetes" in result
-        assert "mechanism" in result
-```
-### Step 2: Implement Query Utils
-**File:** `src/tools/query_utils.py`
-```python
-"""Query preprocessing utilities for biomedical search."""
-import re
-from typing import ClassVar
-# Question words and filler words to remove
-QUESTION_WORDS: set[str] = {
-    # Question starters
-    "what", "which", "how", "why", "when", "where", "who", "whom",
-    # Auxiliary verbs in questions
-    "is", "are", "was", "were", "do", "does", "did", "can", "could",
-    "would", "should", "will", "shall", "may", "might",
-    # Filler words in natural questions
-    "show", "promise", "help", "believe", "think", "suggest",
-    "possible", "potential", "effective", "useful", "good",
-    # Articles (remove but less aggressively)
-    "the", "a", "an",
-}
-# Medical synonym expansions
-SYNONYMS: dict[str, list[str]] = {
-    "long covid": [
-        "long COVID",
-        "PASC",
-        "post-acute sequelae of SARS-CoV-2",
-        "post-COVID syndrome",
-        "post-COVID-19 condition",
-    ],
-    "alzheimer": [
-        "Alzheimer's disease",
-        "Alzheimer disease",
-        "AD",
-        "Alzheimer dementia",
-    ],
-    "parkinson": [
-        "Parkinson's disease",
-        "Parkinson disease",
-        "PD",
-    ],
-    "diabetes": [
-        "diabetes mellitus",
-        "type 2 diabetes",
-        "T2DM",
-        "diabetic",
-    ],
-    "cancer": [
-        "cancer",
-        "neoplasm",
-        "tumor",
-        "malignancy",
-        "carcinoma",
-    ],
-    "heart disease": [
-        "cardiovascular disease",
-        "CVD",
-        "coronary artery disease",
-        "heart failure",
-    ],
-}
-def strip_question_words(query: str) -> str:
-    """
-    Remove question words and filler terms from query.
-    Args:
-        query: Raw query string
-    Returns:
-        Query with question words removed
-    """
-    words = query.lower().split()
-    filtered = [w for w in words if w not in QUESTION_WORDS]
-    return " ".join(filtered)
-def expand_synonyms(query: str) -> str:
-    """
-    Expand medical terms to include synonyms.
-    Args:
-        query: Query string
-    Returns:
-        Query with synonym expansions in OR groups
-    """
-    result = query.lower()
-    for term, expansions in SYNONYMS.items():
-        if term in result:
-            # Create OR group: ("term1" OR "term2" OR "term3")
-            or_group = " OR ".join([f'"{exp}"' for exp in expansions])
-            result = result.replace(term, f"({or_group})")
-    return result
-def preprocess_query(raw_query: str) -> str:
-    """
-    Full preprocessing pipeline for PubMed queries.
-    Pipeline:
-    1. Strip whitespace and punctuation
-    2. Remove question words
-    3. Expand medical synonyms
-    Args:
-        raw_query: Natural language query from user
-    Returns:
-        Optimized query for PubMed
-    """
-    if not raw_query or not raw_query.strip():
-        return ""
-    # Remove question marks and extra whitespace
-    query = raw_query.replace("?", "").strip()
-    query = re.sub(r"\s+", " ", query)
-    # Strip question words
-    query = strip_question_words(query)
-    # Expand synonyms
-    query = expand_synonyms(query)
-    return query.strip()
-```
-### Step 3: Update PubMed Tool
-**File:** `src/tools/pubmed.py` - Add preprocessing:
-```python
-# Add import at top:
-from src.tools.query_utils import preprocess_query
-# Update search method:
-@retry(
-    stop=stop_after_attempt(3),
-    wait=wait_exponential(multiplier=1, min=1, max=10),
-    reraise=True,
-)
-async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
-    """
-    Search PubMed and return evidence.
-    """
-    await self._rate_limit()
-    # PREPROCESS QUERY
-    clean_query = preprocess_query(query)
-    if not clean_query:
-        clean_query = query  # Fallback to original if preprocessing empties it
-    async with httpx.AsyncClient(timeout=30.0) as client:
-        search_params = self._build_params(
-            db="pubmed",
-            term=clean_query,  # Use preprocessed query
-            retmax=max_results,
-            sort="relevance",
-        )
-        # ... rest unchanged
-```
-### Step 4: Update PubMed Tests
-**File:** `tests/unit/tools/test_pubmed.py` - Add preprocessing test:
-```python
-@pytest.mark.asyncio
-async def test_search_preprocesses_query(self, pubmed_tool, mock_httpx_client):
-    """Test that queries are preprocessed before search."""
-    # This test verifies the integration - the actual preprocessing
-    # is tested in test_query_utils.py
-    mock_httpx_client.get.return_value = httpx.Response(
-        200,
-        json={"esearchresult": {"idlist": []}},
-    )
-    # Natural language query
-    await pubmed_tool.search("What drugs help with Long COVID?")
-    # Verify the call was made (preprocessing happens internally)
-    assert mock_httpx_client.get.called
-```
----
-## Verification
-```bash
-# Run query utils tests
-uv run pytest tests/unit/tools/test_query_utils.py -v
-# Run pubmed tests
-uv run pytest tests/unit/tools/test_pubmed.py -v
-# Run all tests
-uv run pytest tests/unit/ -v
-# Manual verification
-uv run python -c "
-from src.tools.query_utils import preprocess_query
-queries = [
-    'What medications show promise for Long COVID?',
-    'Is metformin effective for cancer treatment?',
-    'How can we treat Alzheimer with existing drugs?',
-]
-for q in queries:
-    print(f'Input:  {q}')
-    print(f'Output: {preprocess_query(q)}')
-    print()
-"
-```
-Expected output:
-```
-Input:  What medications show promise for Long COVID?
-Output: medications ("long COVID" OR "PASC" OR "post-acute sequelae of SARS-CoV-2" OR "post-COVID syndrome" OR "post-COVID-19 condition")
-Input:  Is metformin effective for cancer treatment?
-Output: metformin for ("cancer" OR "neoplasm" OR "tumor" OR "malignancy" OR "carcinoma") treatment
-Input:  How can we treat Alzheimer with existing drugs?
-Output: we treat ("Alzheimer's disease" OR "Alzheimer disease" OR "AD" OR "Alzheimer dementia") with existing drugs
-```
----
-## Files Changed
-| File | Action |
-|------|--------|
-| `src/tools/query_utils.py` | CREATE |
-| `tests/unit/tools/test_query_utils.py` | CREATE |
-| `src/tools/pubmed.py` | MODIFY (add preprocessing) |
-| `tests/unit/tools/test_pubmed.py` | MODIFY (add integration test) |
----
-## Future Enhancements (Out of Scope)
-- MeSH term lookup via NCBI API
-- Drug name normalization (brand → generic)
-- Disease ontology integration (UMLS)
-- Query intent classification

docs/bugs/PHASE_03_CLINICALTRIALS_FILTERING.md DELETED Viewed

@@ -1,386 +0,0 @@
-# Phase 03: ClinicalTrials.gov Filtering
-**Priority:** P1 - High
-**Effort:** 1-2 hours
-**Dependencies:** None (can run parallel with Phase 01 & 02)
----
-## Problem Statement
-ClinicalTrials.gov returns ALL matching trials including:
-- Withdrawn/Terminated trials (no useful data)
-- Observational studies (not drug interventions)
-- Phase 1 trials (safety only, no efficacy)
-For drug repurposing, we need interventional studies with efficacy data.
-## Success Criteria
-- [ ] Only interventional studies returned
-- [ ] Withdrawn/terminated trials filtered out
-- [ ] Phase information included in results
-- [ ] All existing tests pass
-- [ ] New tests cover filtering
----
-## TDD Implementation Order
-### Step 1: Write Failing Tests
-**File:** `tests/unit/tools/test_clinicaltrials.py` - Add filter tests:
-```python
-"""Unit tests for ClinicalTrials.gov tool."""
-import pytest
-from unittest.mock import patch, MagicMock
-from src.tools.clinicaltrials import ClinicalTrialsTool
-from src.utils.models import Evidence
-@pytest.mark.unit
-class TestClinicalTrialsTool:
-    """Tests for ClinicalTrialsTool."""
-    @pytest.fixture
-    def tool(self):
-        return ClinicalTrialsTool()
-    def test_tool_name(self, tool):
-        assert tool.name == "clinicaltrials"
-    @pytest.mark.asyncio
-    async def test_search_uses_filters(self, tool):
-        """Test that search applies status and type filters."""
-        mock_response = MagicMock()
-        mock_response.json.return_value = {"studies": []}
-        mock_response.raise_for_status = MagicMock()
-        with patch("requests.get", return_value=mock_response) as mock_get:
-            await tool.search("test query", max_results=5)
-            # Verify filters were applied
-            call_args = mock_get.call_args
-            params = call_args.kwargs.get("params", call_args[1].get("params", {}))
-            # Should filter for active/completed studies
-            assert "filter.overallStatus" in params
-            assert "COMPLETED" in params["filter.overallStatus"]
-            assert "RECRUITING" in params["filter.overallStatus"]
-            # Should filter for interventional studies
-            assert "filter.studyType" in params
-            assert "INTERVENTIONAL" in params["filter.studyType"]
-    @pytest.mark.asyncio
-    async def test_search_returns_evidence(self, tool):
-        """Test that search returns Evidence objects."""
-        mock_study = {
-            "protocolSection": {
-                "identificationModule": {
-                    "nctId": "NCT12345678",
-                    "briefTitle": "Metformin for Long COVID Treatment",
-                },
-                "statusModule": {
-                    "overallStatus": "COMPLETED",
-                    "startDateStruct": {"date": "2023-01-01"},
-                },
-                "descriptionModule": {
-                    "briefSummary": "A study examining metformin for Long COVID symptoms.",
-                },
-                "designModule": {
-                    "phases": ["PHASE2", "PHASE3"],
-                },
-                "conditionsModule": {
-                    "conditions": ["Long COVID", "PASC"],
-                },
-                "armsInterventionsModule": {
-                    "interventions": [{"name": "Metformin"}],
-                },
-            }
-        }
-        mock_response = MagicMock()
-        mock_response.json.return_value = {"studies": [mock_study]}
-        mock_response.raise_for_status = MagicMock()
-        with patch("requests.get", return_value=mock_response):
-            results = await tool.search("long covid metformin", max_results=5)
-            assert len(results) == 1
-            assert isinstance(results[0], Evidence)
-            assert "Metformin" in results[0].citation.title
-            assert "PHASE2" in results[0].content or "Phase" in results[0].content
-    @pytest.mark.asyncio
-    async def test_search_includes_phase_info(self, tool):
-        """Test that phase information is included in content."""
-        mock_study = {
-            "protocolSection": {
-                "identificationModule": {
-                    "nctId": "NCT12345678",
-                    "briefTitle": "Test Study",
-                },
-                "statusModule": {
-                    "overallStatus": "RECRUITING",
-                    "startDateStruct": {"date": "2024-01-01"},
-                },
-                "descriptionModule": {
-                    "briefSummary": "Test summary.",
-                },
-                "designModule": {
-                    "phases": ["PHASE3"],
-                },
-                "conditionsModule": {"conditions": ["Test"]},
-                "armsInterventionsModule": {"interventions": []},
-            }
-        }
-        mock_response = MagicMock()
-        mock_response.json.return_value = {"studies": [mock_study]}
-        mock_response.raise_for_status = MagicMock()
-        with patch("requests.get", return_value=mock_response):
-            results = await tool.search("test", max_results=5)
-            # Phase should be in content
-            assert "PHASE3" in results[0].content or "Phase 3" in results[0].content
-    @pytest.mark.asyncio
-    async def test_search_empty_results(self, tool):
-        """Test handling of empty results."""
-        mock_response = MagicMock()
-        mock_response.json.return_value = {"studies": []}
-        mock_response.raise_for_status = MagicMock()
-        with patch("requests.get", return_value=mock_response):
-            results = await tool.search("nonexistent xyz 12345", max_results=5)
-            assert results == []
-@pytest.mark.integration
-class TestClinicalTrialsIntegration:
-    """Integration tests with real API."""
-    @pytest.mark.asyncio
-    async def test_real_api_returns_interventional(self):
-        """Test that real API returns interventional studies."""
-        tool = ClinicalTrialsTool()
-        results = await tool.search("long covid treatment", max_results=3)
-        # Should get results
-        assert len(results) > 0
-        # Results should mention interventions or treatments
-        all_content = " ".join([r.content.lower() for r in results])
-        has_intervention = (
-            "intervention" in all_content
-            or "treatment" in all_content
-            or "drug" in all_content
-            or "phase" in all_content
-        )
-        assert has_intervention
-```
-### Step 2: Update ClinicalTrials Tool
-**File:** `src/tools/clinicaltrials.py` - Add filters:
-```python
-"""ClinicalTrials.gov search tool using API v2."""
-import asyncio
-from typing import Any, ClassVar
-import requests
-from tenacity import retry, stop_after_attempt, wait_exponential
-from src.utils.exceptions import SearchError
-from src.utils.models import Citation, Evidence
-class ClinicalTrialsTool:
-    """Search tool for ClinicalTrials.gov.
-    Note: Uses `requests` library instead of `httpx` because ClinicalTrials.gov's
-    WAF blocks httpx's TLS fingerprint. The `requests` library is not blocked.
-    See: https://clinicaltrials.gov/data-api/api
-    """
-    BASE_URL = "https://clinicaltrials.gov/api/v2/studies"
-    # Fields to retrieve
-    FIELDS: ClassVar[list[str]] = [
-        "NCTId",
-        "BriefTitle",
-        "Phase",
-        "OverallStatus",
-        "Condition",
-        "InterventionName",
-        "StartDate",
-        "BriefSummary",
-    ]
-    # Status filter: Only active/completed studies with potential data
-    STATUS_FILTER = "COMPLETED|ACTIVE_NOT_RECRUITING|RECRUITING|ENROLLING_BY_INVITATION"
-    # Study type filter: Only interventional (drug/treatment studies)
-    STUDY_TYPE_FILTER = "INTERVENTIONAL"
-    @property
-    def name(self) -> str:
-        return "clinicaltrials"
-    @retry(
-        stop=stop_after_attempt(3),
-        wait=wait_exponential(multiplier=1, min=1, max=10),
-        reraise=True,
-    )
-    async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
-        """Search ClinicalTrials.gov for interventional studies.
-        Args:
-            query: Search query (e.g., "metformin alzheimer")
-            max_results: Maximum results to return (max 100)
-        Returns:
-            List of Evidence objects from clinical trials
-        """
-        params: dict[str, str | int] = {
-            "query.term": query,
-            "pageSize": min(max_results, 100),
-            "fields": "|".join(self.FIELDS),
-            # FILTERS - Only interventional, active/completed studies
-            "filter.overallStatus": self.STATUS_FILTER,
-            "filter.studyType": self.STUDY_TYPE_FILTER,
-        }
-        try:
-            # Run blocking requests.get in a separate thread for async compatibility
-            response = await asyncio.to_thread(
-                requests.get,
-                self.BASE_URL,
-                params=params,
-                headers={"User-Agent": "DeepCritical-Research-Agent/1.0"},
-                timeout=30,
-            )
-            response.raise_for_status()
-            data = response.json()
-            studies = data.get("studies", [])
-            return [self._study_to_evidence(study) for study in studies[:max_results]]
-        except requests.HTTPError as e:
-            raise SearchError(f"ClinicalTrials.gov API error: {e}") from e
-        except requests.RequestException as e:
-            raise SearchError(f"ClinicalTrials.gov request failed: {e}") from e
-    def _study_to_evidence(self, study: dict[str, Any]) -> Evidence:
-        """Convert a clinical trial study to Evidence."""
-        # Navigate nested structure
-        protocol = study.get("protocolSection", {})
-        id_module = protocol.get("identificationModule", {})
-        status_module = protocol.get("statusModule", {})
-        desc_module = protocol.get("descriptionModule", {})
-        design_module = protocol.get("designModule", {})
-        conditions_module = protocol.get("conditionsModule", {})
-        arms_module = protocol.get("armsInterventionsModule", {})
-        nct_id = id_module.get("nctId", "Unknown")
-        title = id_module.get("briefTitle", "Untitled Study")
-        status = status_module.get("overallStatus", "Unknown")
-        start_date = status_module.get("startDateStruct", {}).get("date", "Unknown")
-        # Get phase (might be a list)
-        phases = design_module.get("phases", [])
-        phase = phases[0] if phases else "Not Applicable"
-        # Get conditions
-        conditions = conditions_module.get("conditions", [])
-        conditions_str = ", ".join(conditions[:3]) if conditions else "Unknown"
-        # Get interventions
-        interventions = arms_module.get("interventions", [])
-        intervention_names = [i.get("name", "") for i in interventions[:3]]
-        interventions_str = ", ".join(intervention_names) if intervention_names else "Unknown"
-        # Get summary
-        summary = desc_module.get("briefSummary", "No summary available.")
-        # Build content with key trial info
-        content = (
-            f"{summary[:500]}... "
-            f"Trial Phase: {phase}. "
-            f"Status: {status}. "
-            f"Conditions: {conditions_str}. "
-            f"Interventions: {interventions_str}."
-        )
-        return Evidence(
-            content=content[:2000],
-            citation=Citation(
-                source="clinicaltrials",
-                title=title[:500],
-                url=f"https://clinicaltrials.gov/study/{nct_id}",
-                date=start_date,
-                authors=[],  # Trials don't have traditional authors
-            ),
-            relevance=0.85,  # Trials are highly relevant for repurposing
-        )
-```
----
-## Verification
-```bash
-# Run clinicaltrials tests
-uv run pytest tests/unit/tools/test_clinicaltrials.py -v
-# Run integration test (real API)
-uv run pytest tests/unit/tools/test_clinicaltrials.py::TestClinicalTrialsIntegration -v
-# Run all tests
-uv run pytest tests/unit/ -v
-# Manual verification
-uv run python -c "
-import asyncio
-from src.tools.clinicaltrials import ClinicalTrialsTool
-tool = ClinicalTrialsTool()
-results = asyncio.run(tool.search('long covid treatment', 3))
-for r in results:
-    print(f'Title: {r.citation.title}')
-    print(f'Content: {r.content[:200]}...')
-    print()
-"
-```
----
-## Files Changed
-| File | Action |
-|------|--------|
-| `src/tools/clinicaltrials.py` | MODIFY (add filters) |
-| `tests/unit/tools/test_clinicaltrials.py` | MODIFY (add filter tests) |
----
-## API Filter Reference
-ClinicalTrials.gov API v2 supports these filters:
-| Parameter | Values | Purpose |
-|-----------|--------|---------|
-| `filter.overallStatus` | COMPLETED, RECRUITING, etc. | Trial status |
-| `filter.studyType` | INTERVENTIONAL, OBSERVATIONAL | Study design |
-| `filter.phase` | PHASE1, PHASE2, PHASE3, PHASE4 | Trial phase |
-| `filter.geo` | Country codes | Geographic filter |
-See: https://clinicaltrials.gov/data-api/api

examples/rate_limiting_demo.py ADDED Viewed

	@@ -0,0 +1,82 @@

+#!/usr/bin/env python3
+"""Demo script to verify rate limiting works correctly."""
+import asyncio
+import time
+from src.tools.pubmed import PubMedTool
+from src.tools.rate_limiter import RateLimiter, get_pubmed_limiter, reset_pubmed_limiter
+async def test_basic_limiter():
+    """Test basic rate limiter behavior."""
+    print("=" * 60)
+    print("Rate Limiting Demo")
+    print("=" * 60)
+    # Test 1: Basic limiter
+    print("\n[Test 1] Testing 3/second limiter...")
+    limiter = RateLimiter("3/second")
+    start = time.monotonic()
+    for i in range(6):
+        await limiter.acquire()
+        elapsed = time.monotonic() - start
+        print(f"  Request {i+1} at {elapsed:.2f}s")
+    total = time.monotonic() - start
+    print(f"  Total time for 6 requests: {total:.2f}s (expected ~2s)")
+async def test_pubmed_limiter():
+    """Test PubMed-specific limiter."""
+    print("\n[Test 2] Testing PubMed limiter (shared)...")
+    reset_pubmed_limiter()  # Clean state
+    # Without API key: 3/sec
+    limiter = get_pubmed_limiter(api_key=None)
+    print(f"  Rate without key: {limiter.rate}")
+    # Multiple tools should share the same limiter
+    tool1 = PubMedTool()
+    tool2 = PubMedTool()
+    # Verify they share the limiter
+    print(f"  Tools share limiter: {tool1._limiter is tool2._limiter}")
+async def test_concurrent_requests():
+    """Test rate limiting under concurrent load."""
+    print("\n[Test 3] Testing concurrent request limiting...")
+    limiter = RateLimiter("5/second")
+    async def make_request(i: int):
+        await limiter.acquire()
+        return time.monotonic()
+    start = time.monotonic()
+    # Launch 10 concurrent requests
+    tasks = [make_request(i) for i in range(10)]
+    times = await asyncio.gather(*tasks)
+    # Calculate distribution
+    relative_times = [t - start for t in times]
+    print(f"  Request times: {[f'{t:.2f}s' for t in sorted(relative_times)]}")
+    total = max(relative_times)
+    print(f"  All 10 requests completed in {total:.2f}s (expected ~2s)")
+async def main():
+    await test_basic_limiter()
+    await test_pubmed_limiter()
+    await test_concurrent_requests()
+    print("\n" + "=" * 60)
+    print("Demo complete!")
+if __name__ == "__main__":
+    asyncio.run(main())

pyproject.toml CHANGED Viewed

@@ -25,6 +25,8 @@ dependencies = [
     "structlog>=24.1", # Structured logging
     "requests>=2.32.5", # ClinicalTrials.gov (httpx blocked by WAF)
     "pydantic-graph>=1.22.0",
 ]
 [project.optional-dependencies]
@@ -44,7 +46,7 @@ dev = [
     "pre-commit>=3.7",
 ]
 magentic = [
-    "agent-framework-core>=1.0.0b251120,<2.0.0",  # Pin to avoid breaking changes
 ]
 embeddings = [
     "chromadb>=0.4.0",

     "structlog>=24.1", # Structured logging
     "requests>=2.32.5", # ClinicalTrials.gov (httpx blocked by WAF)
     "pydantic-graph>=1.22.0",
+    "limits>=3.0", # Rate limiting
+    "duckduckgo-search>=5.0", # Web search
 ]
 [project.optional-dependencies]
     "pre-commit>=3.7",
 ]
 magentic = [
+    "agent-framework-core>=1.0.0b251120,<2.0.0",  # Microsoft Agent Framework (PyPI)
 ]
 embeddings = [
     "chromadb>=0.4.0",

requirements.txt CHANGED Viewed

@@ -7,6 +7,12 @@ pydantic-ai>=0.0.16
 openai>=1.0.0
 anthropic>=0.18.0
 # HTTP & Parsing
 httpx>=0.27
 beautifulsoup4>=4.12
@@ -20,6 +26,7 @@ python-dotenv>=1.0
 tenacity>=8.2
 structlog>=24.1
 requests>=2.32.5
 # Optional: Modal for code execution
 modal>=0.63.0

 openai>=1.0.0
 anthropic>=0.18.0
+# Multi-agent orchestration (Advanced mode)
+agent-framework-core>=1.0.0b251120
+# Web search
+duckduckgo-search>=5.0
 # HTTP & Parsing
 httpx>=0.27
 beautifulsoup4>=4.12
 tenacity>=8.2
 structlog>=24.1
 requests>=2.32.5
+limits>=3.0  # Rate limiting (Phase 17)
 # Optional: Modal for code execution
 modal>=0.63.0

src/agent_factory/judges.py CHANGED Viewed

@@ -8,8 +8,10 @@ import structlog
 from huggingface_hub import InferenceClient
 from pydantic_ai import Agent
 from pydantic_ai.models.anthropic import AnthropicModel
 from pydantic_ai.models.openai import OpenAIModel
 from pydantic_ai.providers.anthropic import AnthropicProvider
 from pydantic_ai.providers.openai import OpenAIProvider
 from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
@@ -36,6 +38,12 @@ def get_model() -> Any:
         provider = AnthropicProvider(api_key=settings.anthropic_api_key)
         return AnthropicModel(settings.anthropic_model, provider=provider)
     if llm_provider != "openai":
         logger.warning("Unknown LLM provider, defaulting to OpenAI", provider=llm_provider)
@@ -434,7 +442,7 @@ class MockJudgeHandler:
                 clinical_evidence_score=clinical_score,
                 clinical_reasoning=(
                     f"Demo mode: {evidence_count} sources retrieved from PubMed, "
-                    "ClinicalTrials.gov, and bioRxiv. Full analysis requires LLM API key."
                 ),
                 drug_candidates=drug_candidates,
                 key_findings=key_findings,

 from huggingface_hub import InferenceClient
 from pydantic_ai import Agent
 from pydantic_ai.models.anthropic import AnthropicModel
+from pydantic_ai.models.huggingface import HuggingFaceModel
 from pydantic_ai.models.openai import OpenAIModel
 from pydantic_ai.providers.anthropic import AnthropicProvider
+from pydantic_ai.providers.huggingface import HuggingFaceProvider
 from pydantic_ai.providers.openai import OpenAIProvider
 from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
         provider = AnthropicProvider(api_key=settings.anthropic_api_key)
         return AnthropicModel(settings.anthropic_model, provider=provider)
+    if llm_provider == "huggingface":
+        # Free tier - uses HF_TOKEN from environment if available
+        model_name = settings.huggingface_model or "meta-llama/Llama-3.1-70B-Instruct"
+        hf_provider = HuggingFaceProvider(api_key=settings.hf_token)
+        return HuggingFaceModel(model_name, provider=hf_provider)
     if llm_provider != "openai":
         logger.warning("Unknown LLM provider, defaulting to OpenAI", provider=llm_provider)
                 clinical_evidence_score=clinical_score,
                 clinical_reasoning=(
                     f"Demo mode: {evidence_count} sources retrieved from PubMed, "
+                    "ClinicalTrials.gov, and Europe PMC. Full analysis requires LLM API key."
                 ),
                 drug_candidates=drug_candidates,
                 key_findings=key_findings,

src/agents/code_executor_agent.py ADDED Viewed

	@@ -0,0 +1,69 @@

+"""Code execution agent using Modal."""
+import asyncio
+import structlog
+from agent_framework import ChatAgent, ai_function
+from agent_framework.openai import OpenAIChatClient
+from src.tools.code_execution import get_code_executor
+from src.utils.config import settings
+logger = structlog.get_logger()
+@ai_function  # type: ignore[arg-type, misc]
+async def execute_python_code(code: str) -> str:
+    """Execute Python code in a secure sandbox.
+    Args:
+        code: The Python code to execute.
+    Returns:
+        The standard output and standard error of the execution.
+    """
+    logger.info("Code execution starting", code_length=len(code))
+    executor = get_code_executor()
+    loop = asyncio.get_running_loop()
+    # Run in executor to avoid blocking
+    try:
+        result = await loop.run_in_executor(None, lambda: executor.execute(code))
+        if result["success"]:
+            logger.info("Code execution succeeded")
+            return f"Stdout:\n{result['stdout']}"
+        else:
+            logger.warning("Code execution failed", error=result.get("error"))
+            return f"Error:\n{result['error']}\nStderr:\n{result['stderr']}"
+    except Exception as e:
+        logger.error("Code execution exception", error=str(e))
+        return f"Execution failed: {e}"
+def create_code_executor_agent(chat_client: OpenAIChatClient | None = None) -> ChatAgent:
+    """Create a code executor agent.
+    Args:
+        chat_client: Optional custom chat client.
+    Returns:
+        ChatAgent configured for code execution.
+    """
+    client = chat_client or OpenAIChatClient(
+        model_id=settings.openai_model,
+        api_key=settings.openai_api_key,
+    )
+    return ChatAgent(
+        name="CodeExecutorAgent",
+        description="Executes Python code for data analysis, calculation, and simulation.",
+        instructions="""You are a code execution expert.
+When asked to analyze data or perform calculations, write Python code and execute it.
+Use libraries like pandas, numpy, scipy, matplotlib.
+Always output the code you want to execute using the `execute_python_code` tool.
+Check the output and interpret the results.""",
+        chat_client=client,
+        tools=[execute_python_code],
+        temperature=0.0,  # Strict code generation
+    )

src/agents/judge_agent_llm.py ADDED Viewed

	@@ -0,0 +1,45 @@

+"""LLM Judge for sub-iterations."""
+from typing import Any
+import structlog
+from pydantic_ai import Agent
+from src.agent_factory.judges import get_model
+from src.utils.models import JudgeAssessment
+logger = structlog.get_logger()
+class LLMSubIterationJudge:
+    """Judge that uses an LLM to assess sub-iteration results."""
+    def __init__(self) -> None:
+        self.model = get_model()
+        self.agent = Agent(
+            model=self.model,
+            output_type=JudgeAssessment,
+            system_prompt="""You are a strict judge evaluating a research task.
+Evaluate if the result is sufficient to answer the task.
+Provide scores and detailed reasoning.
+If not sufficient, suggest next steps.""",
+            retries=3,
+        )
+    async def assess(self, task: str, result: Any, history: list[Any]) -> JudgeAssessment:
+        """Assess the result using LLM."""
+        logger.info("LLM judge assessing result", task=task[:100], history_len=len(history))
+        prompt = f"""Task: {task}
+Current Result:
+{str(result)[:4000]}
+History of previous attempts: {len(history)}
+Evaluate validity and sufficiency."""
+        run_result = await self.agent.run(prompt)
+        logger.info("LLM judge assessment complete", sufficient=run_result.output.sufficient)
+        return run_result.output

src/agents/magentic_agents.py CHANGED Viewed

@@ -29,7 +29,7 @@ def create_search_agent(chat_client: OpenAIChatClient | None = None) -> ChatAgen
     return ChatAgent(
         name="SearchAgent",
         description=(
-            "Searches biomedical databases (PubMed, ClinicalTrials.gov, bioRxiv) "
             "for drug repurposing evidence"
         ),
         instructions="""You are a biomedical search specialist. When asked to find evidence:

     return ChatAgent(
         name="SearchAgent",
         description=(
+            "Searches biomedical databases (PubMed, ClinicalTrials.gov, Europe PMC) "
             "for drug repurposing evidence"
         ),
         instructions="""You are a biomedical search specialist. When asked to find evidence:

src/agents/retrieval_agent.py ADDED Viewed

	@@ -0,0 +1,82 @@

+"""Retrieval agent for web search and context management."""
+import structlog
+from agent_framework import ChatAgent, ai_function
+from agent_framework.openai import OpenAIChatClient
+from src.state import get_magentic_state
+from src.tools.web_search import WebSearchTool
+from src.utils.config import settings
+logger = structlog.get_logger()
+_web_search = WebSearchTool()
+@ai_function  # type: ignore[arg-type, misc]
+async def search_web(query: str, max_results: int = 10) -> str:
+    """Search the web using DuckDuckGo.
+    Args:
+        query: Search keywords.
+        max_results: Maximum results to return (default 10).
+    Returns:
+        Formatted search results.
+    """
+    logger.info("Web search starting", query=query, max_results=max_results)
+    state = get_magentic_state()
+    results = await _web_search.search(query, max_results)
+    if not results.evidence:
+        logger.info("Web search returned no results", query=query)
+        return f"No web results found for: {query}"
+    # Update state
+    # We add *all* found results to state
+    new_count = state.add_evidence(results.evidence)
+    logger.info(
+        "Web search complete",
+        query=query,
+        results_found=len(results.evidence),
+        new_evidence=new_count,
+    )
+    # Use embedding service for deduplication/indexing if available
+    if state.embedding_service:
+        # This method also adds to vector DB as a side effect for unique items
+        await state.embedding_service.deduplicate(results.evidence)
+    output = [f"Found {len(results.evidence)} web results ({new_count} new stored):\n"]
+    for i, r in enumerate(results.evidence[:max_results], 1):
+        output.append(f"{i}. **{r.citation.title}**")
+        output.append(f"   Source: {r.citation.url}")
+        output.append(f"   {r.content[:300]}...\n")
+    return "\n".join(output)
+def create_retrieval_agent(chat_client: OpenAIChatClient | None = None) -> ChatAgent:
+    """Create a retrieval agent.
+    Args:
+        chat_client: Optional custom chat client.
+    Returns:
+        ChatAgent configured for retrieval.
+    """
+    client = chat_client or OpenAIChatClient(
+        model_id=settings.openai_model,
+        api_key=settings.openai_api_key,
+    )
+    return ChatAgent(
+        name="RetrievalAgent",
+        description="Searches the web and manages context/evidence.",
+        instructions="""You are a retrieval specialist.
+Use `search_web` to find information on the internet.
+Your goal is to gather relevant evidence for the research task.
+Always summarize what you found.""",
+        chat_client=client,
+        tools=[search_web],
+    )

src/app.py CHANGED Viewed

@@ -31,7 +31,7 @@ def configure_orchestrator(
     Args:
         use_mock: If True, use MockJudgeHandler (no API key needed)
-        mode: Orchestrator mode ("simple" or "magentic")
         user_api_key: Optional user-provided API key (BYOK)
         api_provider: API provider ("openai" or "anthropic")
@@ -115,7 +115,7 @@ async def research_agent(
     Args:
         message: User's research question
         history: Chat history (Gradio format)
-        mode: Orchestrator mode ("simple" or "magentic")
         api_key: Optional user-provided API key (BYOK - Bring Your Own Key)
         api_provider: API provider ("openai" or "anthropic")
@@ -135,10 +135,11 @@ async def research_agent(
     has_user_key = bool(user_api_key)
     has_paid_key = has_openai or has_anthropic or has_user_key
-    # Magentic mode requires OpenAI specifically
-    if mode == "magentic" and not (has_openai or (has_user_key and api_provider == "openai")):
         yield (
-            "⚠️ **Warning**: Magentic mode requires OpenAI API key. Falling back to simple mode.\n\n"
         )
         mode = "simple"
@@ -186,78 +187,68 @@ async def research_agent(
         yield f"❌ **Error**: {e!s}"
-def create_demo() -> Any:
     """
     Create the Gradio demo interface with MCP support.
     Returns:
         Configured Gradio Blocks interface with MCP server enabled
     """
-    with gr.Blocks(
-        title="DeepCritical - Drug Repurposing Research Agent",
-    ) as demo:
-        # 1. Minimal Header (Option A: 2 lines max)
-        gr.Markdown(
-            "# 🧬 DeepCritical\n"
-            "*AI-Powered Drug Repurposing Agent — searches PubMed, ClinicalTrials.gov & bioRxiv*"
-        )
-        # 2. Main Chat Interface
-        # Config inputs will be in a collapsed accordion below the chat input
-        gr.ChatInterface(
-            fn=research_agent,
-            examples=[
-                [
-                    "What drugs could be repurposed for Alzheimer's disease?",
-                    "simple",
-                    "",
-                    "openai",
-                ],
-                [
-                    "Is metformin effective for treating cancer?",
-                    "simple",
-                    "",
-                    "openai",
-                ],
-                [
-                    "What medications show promise for Long COVID treatment?",
-                    "simple",
-                    "",
-                    "openai",
-                ],
             ],
-            additional_inputs_accordion=gr.Accordion(label="⚙️ Settings", open=False),
-            additional_inputs=[
-                gr.Radio(
-                    choices=["simple", "magentic"],
-                    value="simple",
-                    label="Orchestrator Mode",
-                    info="Simple: Linear | Magentic: Multi-Agent (OpenAI)",
-                ),
-                gr.Textbox(
-                    label="🔑 API Key (Optional - BYOK)",
-                    placeholder="sk-... or sk-ant-...",
-                    type="password",
-                    info="Enter your own API key. Never stored.",
-                ),
-                gr.Radio(
-                    choices=["openai", "anthropic"],
-                    value="openai",
-                    label="API Provider",
-                    info="Select the provider for your API key",
-                ),
             ],
-        )
-        # 3. Minimal Footer (Option C: Remove MCP Tabs, keep info)
-        gr.Markdown(
-            """
-            ---
-            *Research tool only — not for medical advice.*
-            **MCP Server Active**: Connect Claude Desktop to `/gradio_api/mcp/`
-            """,
-            elem_classes=["footer"],
-        )
     return demo

     Args:
         use_mock: If True, use MockJudgeHandler (no API key needed)
+        mode: Orchestrator mode ("simple" or "advanced")
         user_api_key: Optional user-provided API key (BYOK)
         api_provider: API provider ("openai" or "anthropic")
     Args:
         message: User's research question
         history: Chat history (Gradio format)
+        mode: Orchestrator mode ("simple" or "advanced")
         api_key: Optional user-provided API key (BYOK - Bring Your Own Key)
         api_provider: API provider ("openai" or "anthropic")
     has_user_key = bool(user_api_key)
     has_paid_key = has_openai or has_anthropic or has_user_key
+    # Advanced mode requires OpenAI specifically (due to agent-framework binding)
+    if mode == "advanced" and not (has_openai or (has_user_key and api_provider == "openai")):
         yield (
+            "⚠️ **Warning**: Advanced mode currently requires OpenAI API key. "
+            "Falling back to simple mode.\n\n"
         )
         mode = "simple"
         yield f"❌ **Error**: {e!s}"
+def create_demo() -> gr.ChatInterface:
     """
     Create the Gradio demo interface with MCP support.
     Returns:
         Configured Gradio Blocks interface with MCP server enabled
     """
+    # 1. Unwrapped ChatInterface (Fixes Accordion Bug)
+    demo = gr.ChatInterface(
+        fn=research_agent,
+        title="🧬 DeepCritical",
+        description=(
+            "*AI-Powered Drug Repurposing Agent — searches PubMed, "
+            "ClinicalTrials.gov & Europe PMC*\n\n"
+            "---\n"
+            "*Research tool only — not for medical advice.*  \n"
+            "**MCP Server Active**: Connect Claude Desktop to `/gradio_api/mcp/`"
+        ),
+        examples=[
+            [
+                "What drugs could be repurposed for Alzheimer's disease?",
+                "simple",
+                "",
+                "openai",
             ],
+            [
+                "Is metformin effective for treating cancer?",
+                "simple",
+                "",
+                "openai",
             ],
+            [
+                "What medications show promise for Long COVID treatment?",
+                "simple",
+                "",
+                "openai",
+            ],
+        ],
+        additional_inputs_accordion=gr.Accordion(label="⚙️ Settings", open=False),
+        additional_inputs=[
+            gr.Radio(
+                choices=["simple", "advanced"],
+                value="simple",
+                label="Orchestrator Mode",
+                info=(
+                    "Simple: Linear (Free Tier Friendly) | Advanced: Multi-Agent (Requires OpenAI)"
+                ),
+            ),
+            gr.Textbox(
+                label="🔑 API Key (Optional - BYOK)",
+                placeholder="sk-... or sk-ant-...",
+                type="password",
+                info="Enter your own API key. Never stored.",
+            ),
+            gr.Radio(
+                choices=["openai", "anthropic"],
+                value="openai",
+                label="API Provider",
+                info="Select the provider for your API key",
+            ),
+        ],
+    )
     return demo

src/middleware/sub_iteration.py ADDED Viewed

	@@ -0,0 +1,135 @@

+"""Middleware for orchestrating sub-iterations with research teams and judges."""
+from typing import Any, Protocol
+import structlog
+from src.utils.models import AgentEvent, JudgeAssessment
+logger = structlog.get_logger()
+class SubIterationTeam(Protocol):
+    """Protocol for a research team that executes a sub-task."""
+    async def execute(self, task: str) -> Any:
+        """Execute the sub-task and return a result."""
+        ...
+class SubIterationJudge(Protocol):
+    """Protocol for a judge that evaluates the sub-task result."""
+    async def assess(self, task: str, result: Any, history: list[Any]) -> JudgeAssessment:
+        """Assess the quality of the result."""
+        ...
+class SubIterationMiddleware:
+    """
+    Middleware that manages a sub-iteration loop:
+    1. Orchestrator delegates to a Research Team.
+    2. Research Team produces a result.
+    3. Judge evaluates the result.
+    4. Loop continues until Judge approves or max iterations reached.
+    """
+    def __init__(
+        self,
+        team: SubIterationTeam,
+        judge: SubIterationJudge,
+        max_iterations: int = 3,
+    ):
+        self.team = team
+        self.judge = judge
+        self.max_iterations = max_iterations
+    async def run(
+        self,
+        task: str,
+        event_callback: Any = None,  # Optional callback for streaming events
+    ) -> tuple[Any, JudgeAssessment | None]:
+        """
+        Run the sub-iteration loop.
+        Args:
+            task: The research task or question.
+            event_callback: Async callable to report events (e.g. to UI).
+        Returns:
+            Tuple of (best_result, final_assessment).
+        """
+        history: list[Any] = []
+        best_result: Any = None
+        final_assessment: JudgeAssessment | None = None
+        for i in range(1, self.max_iterations + 1):
+            logger.info("Sub-iteration starting", iteration=i, task=task)
+            if event_callback:
+                await event_callback(
+                    AgentEvent(
+                        type="looping",
+                        message=f"Sub-iteration {i}: Executing task...",
+                        iteration=i,
+                    )
+                )
+            # 1. Team Execution
+            try:
+                result = await self.team.execute(task)
+                history.append(result)
+                best_result = result  # Assume latest is best for now
+            except Exception as e:
+                logger.error("Sub-iteration execution failed", error=str(e))
+                if event_callback:
+                    await event_callback(
+                        AgentEvent(
+                            type="error",
+                            message=f"Sub-iteration execution failed: {e}",
+                            iteration=i,
+                        )
+                    )
+                return best_result, final_assessment
+            # 2. Judge Assessment
+            try:
+                assessment = await self.judge.assess(task, result, history)
+                final_assessment = assessment
+            except Exception as e:
+                logger.error("Sub-iteration judge failed", error=str(e))
+                if event_callback:
+                    await event_callback(
+                        AgentEvent(
+                            type="error",
+                            message=f"Sub-iteration judge failed: {e}",
+                            iteration=i,
+                        )
+                    )
+                return best_result, final_assessment
+            # 3. Decision
+            if assessment.sufficient:
+                logger.info("Sub-iteration sufficient", iteration=i)
+                return best_result, assessment
+            # If not sufficient, we might refine the task for the next iteration
+            # For this implementation, we assume the team is smart enough or the task stays same
+            # but we could append feedback to the task.
+            feedback = assessment.reasoning
+            logger.info("Sub-iteration insufficient", feedback=feedback)
+            if event_callback:
+                await event_callback(
+                    AgentEvent(
+                        type="looping",
+                        message=(
+                            f"Sub-iteration {i} result insufficient. Feedback: {feedback[:100]}..."
+                        ),
+                        iteration=i,
+                    )
+                )
+        logger.warning("Sub-iteration max iterations reached", task=task)
+        return best_result, final_assessment

src/orchestrator_factory.py CHANGED Viewed

@@ -9,12 +9,29 @@ from src.legacy_orchestrator import (
 )
 from src.utils.models import OrchestratorConfig
 def create_orchestrator(
     search_handler: SearchHandlerProtocol | None = None,
     judge_handler: JudgeHandlerProtocol | None = None,
     config: OrchestratorConfig | None = None,
-    mode: Literal["simple", "magentic"] = "simple",
 ) -> Any:
     """
     Create an orchestrator instance.
@@ -23,25 +40,19 @@ def create_orchestrator(
         search_handler: The search handler (required for simple mode)
         judge_handler: The judge handler (required for simple mode)
         config: Optional configuration
-        mode: "simple" for Phase 4 loop, "magentic" for ChatAgent-based multi-agent
     Returns:
         Orchestrator instance
-    Note:
-        Magentic mode does NOT use search_handler/judge_handler.
-        It creates ChatAgent instances with internal LLMs that call tools directly.
     """
-    if mode == "magentic":
-        try:
-            from src.orchestrator_magentic import MagenticOrchestrator
-            return MagenticOrchestrator(
-                max_rounds=config.max_iterations if config else 10,
-            )
-        except ImportError:
-            # Fallback to simple if agent-framework not installed
-            pass
     # Simple mode requires handlers
     if search_handler is None or judge_handler is None:
@@ -52,3 +63,17 @@ def create_orchestrator(
         judge_handler=judge_handler,
         config=config,
     )

 )
 from src.utils.models import OrchestratorConfig
+import structlog
+logger = structlog.get_logger()
+def _get_magentic_orchestrator_class() -> Any:
+    """Import MagenticOrchestrator lazily to avoid hard dependency."""
+    try:
+        from src.orchestrator_magentic import MagenticOrchestrator
+        return MagenticOrchestrator
+    except ImportError as e:
+        logger.error("Failed to import MagenticOrchestrator", error=str(e))
+        raise ValueError(
+            "Advanced mode requires agent-framework-core. Please install it or use mode='simple'."
+        ) from e
 def create_orchestrator(
     search_handler: SearchHandlerProtocol | None = None,
     judge_handler: JudgeHandlerProtocol | None = None,
     config: OrchestratorConfig | None = None,
+    mode: Literal["simple", "magentic", "advanced"] | None = None,
 ) -> Any:
     """
     Create an orchestrator instance.
         search_handler: The search handler (required for simple mode)
         judge_handler: The judge handler (required for simple mode)
         config: Optional configuration
+        mode: "simple", "magentic", "advanced" or None (auto-detect)
     Returns:
         Orchestrator instance
     """
+    effective_mode = _determine_mode(mode)
+    logger.info("Creating orchestrator", mode=effective_mode)
+    if effective_mode == "advanced":
+        orchestrator_cls = _get_magentic_orchestrator_class()
+        return orchestrator_cls(
+            max_rounds=config.max_iterations if config else 10,
+        )
     # Simple mode requires handlers
     if search_handler is None or judge_handler is None:
         judge_handler=judge_handler,
         config=config,
     )
+def _determine_mode(explicit_mode: str | None) -> str:
+    """Determine which mode to use."""
+    if explicit_mode:
+        if explicit_mode in ("magentic", "advanced"):
+            return "advanced"
+        return "simple"
+    # Auto-detect: advanced if paid API key available
+    if settings.has_openai_key:
+        return "advanced"
+    return "simple"

src/orchestrator_hierarchical.py ADDED Viewed

	@@ -0,0 +1,95 @@

+"""Hierarchical orchestrator using middleware and sub-teams."""
+import asyncio
+from collections.abc import AsyncGenerator
+import structlog
+from src.agents.judge_agent_llm import LLMSubIterationJudge
+from src.agents.magentic_agents import create_search_agent
+from src.middleware.sub_iteration import SubIterationMiddleware, SubIterationTeam
+from src.services.embeddings import get_embedding_service
+from src.state import init_magentic_state
+from src.utils.models import AgentEvent
+logger = structlog.get_logger()
+class ResearchTeam(SubIterationTeam):
+    """Adapts Magentic ChatAgent to SubIterationTeam protocol."""
+    def __init__(self) -> None:
+        self.agent = create_search_agent()
+    async def execute(self, task: str) -> str:
+        response = await self.agent.run(task)
+        if response.messages:
+            for msg in reversed(response.messages):
+                if msg.role == "assistant" and msg.text:
+                    return str(msg.text)
+        return "No response from agent."
+class HierarchicalOrchestrator:
+    """Orchestrator that uses hierarchical teams and sub-iterations."""
+    def __init__(self) -> None:
+        self.team = ResearchTeam()
+        self.judge = LLMSubIterationJudge()
+        self.middleware = SubIterationMiddleware(self.team, self.judge, max_iterations=5)
+    async def run(self, query: str) -> AsyncGenerator[AgentEvent, None]:
+        logger.info("Starting hierarchical orchestrator", query=query)
+        try:
+            service = get_embedding_service()
+            init_magentic_state(service)
+        except Exception as e:
+            logger.warning(
+                "Embedding service initialization failed, using default state",
+                error=str(e),
+            )
+            init_magentic_state()
+        yield AgentEvent(type="started", message=f"Starting research: {query}")
+        queue: asyncio.Queue[AgentEvent | None] = asyncio.Queue()
+        async def event_callback(event: AgentEvent) -> None:
+            await queue.put(event)
+        task_future = asyncio.create_task(self.middleware.run(query, event_callback))
+        while not task_future.done():
+            get_event = asyncio.create_task(queue.get())
+            done, _ = await asyncio.wait(
+                {task_future, get_event}, return_when=asyncio.FIRST_COMPLETED
+            )
+            if get_event in done:
+                event = get_event.result()
+                if event:
+                    yield event
+            else:
+                get_event.cancel()
+        # Process remaining events
+        while not queue.empty():
+            ev = queue.get_nowait()
+            if ev:
+                yield ev
+        try:
+            result, assessment = await task_future
+            assessment_text = assessment.reasoning if assessment else "None"
+            yield AgentEvent(
+                type="complete",
+                message=(
+                    f"Research complete.\n\nResult:\n{result}\n\nAssessment:\n{assessment_text}"
+                ),
+                data={"assessment": assessment.model_dump() if assessment else None},
+            )
+        except Exception as e:
+            logger.error("Orchestrator failed", error=str(e))
+            yield AgentEvent(type="error", message=f"Orchestrator failed: {e}")

src/orchestrator_magentic.py CHANGED Viewed

@@ -128,7 +128,7 @@ class MagenticOrchestrator:
         task = f"""Research drug repurposing opportunities for: {query}
 Workflow:
-1. SearchAgent: Find evidence from PubMed, ClinicalTrials.gov, and bioRxiv
 2. HypothesisAgent: Generate mechanistic hypotheses (Drug -> Target -> Pathway -> Effect)
 3. JudgeAgent: Evaluate if evidence is sufficient
 4. If insufficient -> SearchAgent refines search based on gaps
@@ -158,10 +158,41 @@ The final output should be a structured research report."""
                 iteration=iteration,
             )
     def _process_event(self, event: Any, iteration: int) -> AgentEvent | None:
         """Process workflow event into AgentEvent."""
         if isinstance(event, MagenticOrchestratorMessageEvent):
-            text = event.message.text if event.message else ""
             if text:
                 return AgentEvent(
                     type="judging",
@@ -171,7 +202,7 @@ The final output should be a structured research report."""
         elif isinstance(event, MagenticAgentMessageEvent):
             agent_name = event.agent_id or "unknown"
-            text = event.message.text if event.message else ""
             event_type = "judging"
             if "search" in agent_name.lower():
@@ -190,7 +221,7 @@ The final output should be a structured research report."""
             )
         elif isinstance(event, MagenticFinalResultEvent):
-            text = event.message.text if event.message else "No result"
             return AgentEvent(
                 type="complete",
                 message=text,

         task = f"""Research drug repurposing opportunities for: {query}
 Workflow:
+1. SearchAgent: Find evidence from PubMed, ClinicalTrials.gov, and Europe PMC
 2. HypothesisAgent: Generate mechanistic hypotheses (Drug -> Target -> Pathway -> Effect)
 3. JudgeAgent: Evaluate if evidence is sufficient
 4. If insufficient -> SearchAgent refines search based on gaps
                 iteration=iteration,
             )
+    def _extract_text(self, message: Any) -> str:
+        """
+        Defensively extract text from a message object.
+        Fixes bug where message.text might return the object itself or its repr.
+        """
+        if not message:
+            return ""
+        # Priority 1: .content (often the raw string or list of content)
+        if hasattr(message, "content") and message.content:
+            content = message.content
+            # If it's a list (e.g., Multi-modal), join text parts
+            if isinstance(content, list):
+                return " ".join([str(c.text) for c in content if hasattr(c, "text")])
+            return str(content)
+        # Priority 2: .text (standard, but sometimes buggy/missing)
+        if hasattr(message, "text") and message.text:
+            # Verify it's not the object itself or a repr string
+            text = str(message.text)
+            if text.startswith("<") and "object at" in text:
+                # Likely a repr string, ignore if possible
+                pass
+            else:
+                return text
+        # Fallback: If we can't find clean text, return str(message)
+        # taking care to avoid infinite recursion if str() calls .text
+        return str(message)
     def _process_event(self, event: Any, iteration: int) -> AgentEvent | None:
         """Process workflow event into AgentEvent."""
         if isinstance(event, MagenticOrchestratorMessageEvent):
+            text = self._extract_text(event.message)
             if text:
                 return AgentEvent(
                     type="judging",
         elif isinstance(event, MagenticAgentMessageEvent):
             agent_name = event.agent_id or "unknown"
+            text = self._extract_text(event.message)
             event_type = "judging"
             if "search" in agent_name.lower():
             )
         elif isinstance(event, MagenticFinalResultEvent):
+            text = self._extract_text(event.message) if event.message else "No result"
             return AgentEvent(
                 type="complete",
                 message=text,

src/state/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+"""State package - re-exports from agents.state for compatibility."""
+from src.agents.state import (
+    MagenticState,
+    get_magentic_state,
+    init_magentic_state,
+)
+__all__ = ["MagenticState", "get_magentic_state", "init_magentic_state"]

src/tools/__init__.py CHANGED Viewed

@@ -1,6 +1,8 @@
 """Search tools package."""
 from src.tools.base import SearchTool
 from src.tools.pubmed import PubMedTool
 from src.tools.rag_tool import RAGTool, create_rag_tool
 from src.tools.search_handler import SearchHandler

 """Search tools package."""
 from src.tools.base import SearchTool
+from src.tools.clinicaltrials import ClinicalTrialsTool
+from src.tools.europepmc import EuropePMCTool
 from src.tools.pubmed import PubMedTool
 from src.tools.rag_tool import RAGTool, create_rag_tool
 from src.tools.search_handler import SearchHandler

src/tools/pubmed.py CHANGED Viewed

@@ -1,6 +1,5 @@
 """PubMed search tool using NCBI E-utilities."""
-import asyncio
 from typing import Any
 import httpx
@@ -8,6 +7,7 @@ import xmltodict
 from tenacity import retry, stop_after_attempt, wait_exponential
 from src.tools.query_utils import preprocess_query
 from src.utils.config import settings
 from src.utils.exceptions import RateLimitError, SearchError
 from src.utils.models import Citation, Evidence
@@ -17,7 +17,6 @@ class PubMedTool:
     """Search tool for PubMed/NCBI."""
     BASE_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
-    RATE_LIMIT_DELAY = 0.34  # ~3 requests/sec without API key
     HTTP_TOO_MANY_REQUESTS = 429
     def __init__(self, api_key: str | None = None) -> None:
@@ -25,7 +24,9 @@ class PubMedTool:
         # Ignore placeholder values from .env.example
         if self.api_key == "your-ncbi-key-here":
             self.api_key = None
-        self._last_request_time = 0.0
     @property
     def name(self) -> str:
@@ -33,12 +34,7 @@ class PubMedTool:
     async def _rate_limit(self) -> None:
         """Enforce NCBI rate limiting."""
-        loop = asyncio.get_running_loop()
-        now = loop.time()
-        elapsed = now - self._last_request_time
-        if elapsed < self.RATE_LIMIT_DELAY:
-            await asyncio.sleep(self.RATE_LIMIT_DELAY - elapsed)
-        self._last_request_time = loop.time()
     def _build_params(self, **kwargs: Any) -> dict[str, Any]:
         """Build request params with optional API key."""

 """PubMed search tool using NCBI E-utilities."""
 from typing import Any
 import httpx
 from tenacity import retry, stop_after_attempt, wait_exponential
 from src.tools.query_utils import preprocess_query
+from src.tools.rate_limiter import get_pubmed_limiter
 from src.utils.config import settings
 from src.utils.exceptions import RateLimitError, SearchError
 from src.utils.models import Citation, Evidence
     """Search tool for PubMed/NCBI."""
     BASE_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
     HTTP_TOO_MANY_REQUESTS = 429
     def __init__(self, api_key: str | None = None) -> None:
         # Ignore placeholder values from .env.example
         if self.api_key == "your-ncbi-key-here":
             self.api_key = None
+        # Use shared rate limiter
+        self._limiter = get_pubmed_limiter(self.api_key)
     @property
     def name(self) -> str:
     async def _rate_limit(self) -> None:
         """Enforce NCBI rate limiting."""
+        await self._limiter.acquire()
     def _build_params(self, **kwargs: Any) -> dict[str, Any]:
         """Build request params with optional API key."""

src/tools/rate_limiter.py ADDED Viewed

	@@ -0,0 +1,121 @@

+"""Rate limiting utilities using the limits library."""
+import asyncio
+from typing import ClassVar
+from limits import RateLimitItem, parse
+from limits.storage import MemoryStorage
+from limits.strategies import MovingWindowRateLimiter
+class RateLimiter:
+    """
+    Async-compatible rate limiter using limits library.
+    Uses moving window algorithm for smooth rate limiting.
+    """
+    def __init__(self, rate: str) -> None:
+        """
+        Initialize rate limiter.
+        Args:
+            rate: Rate string like "3/second" or "10/second"
+        """
+        self.rate = rate
+        self._storage = MemoryStorage()
+        self._limiter = MovingWindowRateLimiter(self._storage)
+        self._rate_limit: RateLimitItem = parse(rate)
+        self._identity = "default"  # Single identity for shared limiting
+    async def acquire(self, wait: bool = True) -> bool:
+        """
+        Acquire permission to make a request.
+        ASYNC-SAFE: Uses asyncio.sleep(), never time.sleep().
+        The polling pattern allows other coroutines to run while waiting.
+        Args:
+            wait: If True, wait until allowed. If False, return immediately.
+        Returns:
+            True if allowed, False if not (only when wait=False)
+        """
+        while True:
+            # Check if we can proceed (synchronous, fast - ~microseconds)
+            if self._limiter.hit(self._rate_limit, self._identity):
+                return True
+            if not wait:
+                return False
+            # CRITICAL: Use asyncio.sleep(), NOT time.sleep()
+            # This yields control to the event loop, allowing other
+            # coroutines (UI, parallel searches) to run.
+            # Using 0.01s for fine-grained responsiveness.
+            await asyncio.sleep(0.01)
+    def reset(self) -> None:
+        """Reset the rate limiter (for testing)."""
+        self._storage.reset()
+# Singleton limiter for PubMed/NCBI
+_pubmed_limiter: RateLimiter | None = None
+def get_pubmed_limiter(api_key: str | None = None) -> RateLimiter:
+    """
+    Get the shared PubMed rate limiter.
+    Rate depends on whether API key is provided:
+    - Without key: 3 requests/second
+    - With key: 10 requests/second
+    Args:
+        api_key: NCBI API key (optional)
+    Returns:
+        Shared RateLimiter instance
+    """
+    global _pubmed_limiter
+    if _pubmed_limiter is None:
+        rate = "10/second" if api_key else "3/second"
+        _pubmed_limiter = RateLimiter(rate)
+    return _pubmed_limiter
+def reset_pubmed_limiter() -> None:
+    """Reset the PubMed limiter (for testing)."""
+    global _pubmed_limiter
+    _pubmed_limiter = None
+# Factory for other APIs
+class RateLimiterFactory:
+    """Factory for creating/getting rate limiters for different APIs."""
+    _limiters: ClassVar[dict[str, RateLimiter]] = {}
+    @classmethod
+    def get(cls, api_name: str, rate: str) -> RateLimiter:
+        """
+        Get or create a rate limiter for an API.
+        Args:
+            api_name: Unique identifier for the API
+            rate: Rate limit string (e.g., "10/second")
+        Returns:
+            RateLimiter instance (shared for same api_name)
+        """
+        if api_name not in cls._limiters:
+            cls._limiters[api_name] = RateLimiter(rate)
+        return cls._limiters[api_name]
+    @classmethod
+    def reset_all(cls) -> None:
+        """Reset all limiters (for testing)."""
+        cls._limiters.clear()

src/tools/web_search.py ADDED Viewed

	@@ -0,0 +1,53 @@

+"""Web search tool using DuckDuckGo."""
+import asyncio
+import structlog
+from duckduckgo_search import DDGS
+from src.utils.models import Citation, Evidence, SearchResult
+logger = structlog.get_logger()
+class WebSearchTool:
+    """Tool for searching the web using DuckDuckGo."""
+    def __init__(self) -> None:
+        self._ddgs = DDGS()
+    async def search(self, query: str, max_results: int = 10) -> SearchResult:
+        """Execute a web search."""
+        try:
+            loop = asyncio.get_running_loop()
+            def _do_search() -> list[dict[str, str]]:
+                # text() returns an iterator, need to list() it or iterate
+                return list(self._ddgs.text(query, max_results=max_results))
+            raw_results = await loop.run_in_executor(None, _do_search)
+            evidence = []
+            for r in raw_results:
+                ev = Evidence(
+                    content=r.get("body", ""),
+                    citation=Citation(
+                        title=r.get("title", "No Title"),
+                        url=r.get("href", ""),
+                        source="web",
+                        date="Unknown",
+                        authors=[],
+                    ),
+                    relevance=0.0,
+                )
+                evidence.append(ev)
+            return SearchResult(
+                query=query, evidence=evidence, sources_searched=["web"], total_found=len(evidence)
+            )
+        except Exception as e:
+            logger.error("Web search failed", error=str(e))
+            return SearchResult(
+                query=query, evidence=[], sources_searched=["web"], total_found=0, errors=[str(e)]
+            )

src/utils/config.py CHANGED Viewed

@@ -23,13 +23,20 @@ class Settings(BaseSettings):
     # LLM Configuration
     openai_api_key: str | None = Field(default=None, description="OpenAI API key")
     anthropic_api_key: str | None = Field(default=None, description="Anthropic API key")
-    llm_provider: Literal["openai", "anthropic"] = Field(
         default="openai", description="Which LLM provider to use"
     )
     openai_model: str = Field(default="gpt-5.1", description="OpenAI model name")
     anthropic_model: str = Field(
         default="claude-sonnet-4-5-20250929", description="Anthropic model"
     )
     # Embedding Configuration
     # Note: OpenAI embeddings require OPENAI_API_KEY (Anthropic has no embeddings API)
@@ -175,10 +182,15 @@ class Settings(BaseSettings):
         """Check if Anthropic API key is available."""
         return bool(self.anthropic_api_key)
     @property
     def has_any_llm_key(self) -> bool:
         """Check if any LLM API key is available."""
-        return self.has_openai_key or self.has_anthropic_key
     @property
     def has_huggingface_key(self) -> bool:

     # LLM Configuration
     openai_api_key: str | None = Field(default=None, description="OpenAI API key")
     anthropic_api_key: str | None = Field(default=None, description="Anthropic API key")
+    llm_provider: Literal["openai", "anthropic", "huggingface"] = Field(
         default="openai", description="Which LLM provider to use"
     )
     openai_model: str = Field(default="gpt-5.1", description="OpenAI model name")
     anthropic_model: str = Field(
         default="claude-sonnet-4-5-20250929", description="Anthropic model"
     )
+    # HuggingFace (free tier)
+    huggingface_model: str | None = Field(
+        default="meta-llama/Llama-3.1-70B-Instruct", description="HuggingFace model name"
+    )
+    hf_token: str | None = Field(
+        default=None, alias="HF_TOKEN", description="HuggingFace API token"
+    )
     # Embedding Configuration
     # Note: OpenAI embeddings require OPENAI_API_KEY (Anthropic has no embeddings API)
         """Check if Anthropic API key is available."""
         return bool(self.anthropic_api_key)
+    @property
+    def has_huggingface_key(self) -> bool:
+        """Check if HuggingFace token is available."""
+        return bool(self.hf_token)
     @property
     def has_any_llm_key(self) -> bool:
         """Check if any LLM API key is available."""
+        return self.has_openai_key or self.has_anthropic_key or self.has_huggingface_key
     @property
     def has_huggingface_key(self) -> bool:

src/utils/models.py CHANGED Viewed

@@ -36,6 +36,10 @@ class Evidence(BaseModel):
     content: str = Field(min_length=1, description="The actual text content")
     citation: Citation
     relevance: float = Field(default=0.0, ge=0.0, le=1.0, description="Relevance score 0-1")
     model_config = {"frozen": True}

     content: str = Field(min_length=1, description="The actual text content")
     citation: Citation
     relevance: float = Field(default=0.0, ge=0.0, le=1.0, description="Relevance score 0-1")
+    metadata: dict[str, Any] = Field(
+        default_factory=dict,
+        description="Additional metadata (e.g., cited_by_count, concepts, is_open_access)",
+    )
     model_config = {"frozen": True}

tests/integration/test_dual_mode_e2e.py ADDED Viewed

	@@ -0,0 +1,82 @@

+"""End-to-End Integration Tests for Dual-Mode Architecture."""
+from unittest.mock import AsyncMock, MagicMock, patch
+import pytest
+pytestmark = [pytest.mark.integration, pytest.mark.slow]
+from src.orchestrator_factory import create_orchestrator
+from src.utils.models import Citation, Evidence, OrchestratorConfig
+@pytest.fixture
+def mock_search_handler():
+    handler = MagicMock()
+    handler.execute = AsyncMock(
+        return_value=[
+            Evidence(
+                citation=Citation(
+                    title="Test Paper", url="http://test", date="2024", source="pubmed"
+                ),
+                content="Metformin increases lifespan in mice.",
+            )
+        ]
+    )
+    return handler
+@pytest.fixture
+def mock_judge_handler():
+    handler = MagicMock()
+    # Mock return value of assess
+    assessment = MagicMock()
+    assessment.sufficient = True
+    assessment.recommendation = "synthesize"
+    handler.assess = AsyncMock(return_value=assessment)
+    return handler
+@pytest.mark.asyncio
+async def test_simple_mode_e2e(mock_search_handler, mock_judge_handler):
+    """Test Simple Mode Orchestration flow."""
+    orch = create_orchestrator(
+        search_handler=mock_search_handler,
+        judge_handler=mock_judge_handler,
+        mode="simple",
+        config=OrchestratorConfig(max_iterations=1),
+    )
+    # Run
+    results = []
+    async for event in orch.run("Test query"):
+        results.append(event)
+    assert len(results) > 0
+    assert mock_search_handler.execute.called
+    assert mock_judge_handler.assess.called
+@pytest.mark.asyncio
+async def test_advanced_mode_explicit_instantiation():
+    """Test explicit Advanced Mode instantiation (not auto-detect).
+    This tests the explicit mode="advanced" path, verifying that
+    MagenticOrchestrator can be instantiated when explicitly requested.
+    The settings patch ensures any internal checks pass.
+    """
+    with patch("src.orchestrator_factory.settings") as mock_settings:
+        # Settings patch ensures factory checks pass (even though mode is explicit)
+        mock_settings.has_openai_key = True
+        with patch("src.agents.magentic_agents.OpenAIChatClient"):
+            # Mock agent creation to avoid real API calls during init
+            with (
+                patch("src.orchestrator_magentic.create_search_agent"),
+                patch("src.orchestrator_magentic.create_judge_agent"),
+                patch("src.orchestrator_magentic.create_hypothesis_agent"),
+                patch("src.orchestrator_magentic.create_report_agent"),
+            ):
+                # Explicit mode="advanced" - tests the explicit path, not auto-detect
+                orch = create_orchestrator(mode="advanced")
+                assert orch is not None

tests/integration/test_modal.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""Integration tests for Modal (requires credentials)."""
 import pytest
@@ -7,9 +7,18 @@ from src.utils.config import settings
 # Check if any LLM API key is available
 _llm_available = bool(settings.openai_api_key or settings.anthropic_api_key)
 @pytest.mark.integration
-@pytest.mark.skipif(not settings.modal_available, reason="Modal not configured")
 class TestModalIntegration:
     """Integration tests requiring Modal credentials."""

+"""Integration tests for Modal (requires credentials and modal package)."""
 import pytest
 # Check if any LLM API key is available
 _llm_available = bool(settings.openai_api_key or settings.anthropic_api_key)
+# Check if modal package is installed
+try:
+    import modal  # noqa: F401
+    _modal_installed = True
+except ImportError:
+    _modal_installed = False
 @pytest.mark.integration
+@pytest.mark.skipif(not _modal_installed, reason="Modal package not installed")
+@pytest.mark.skipif(not settings.modal_available, reason="Modal credentials not configured")
 class TestModalIntegration:
     """Integration tests requiring Modal credentials."""

tests/unit/agent_factory/test_judges_factory.py ADDED Viewed

	@@ -0,0 +1,64 @@

+"""Unit tests for Judge Factory and Model Selection."""
+from unittest.mock import patch
+import pytest
+pytestmark = pytest.mark.unit
+from pydantic_ai.models.anthropic import AnthropicModel
+# We expect this import to exist after we implement it, or we mock it if it's not there yet
+# For TDD, we assume we will use the library class
+from pydantic_ai.models.huggingface import HuggingFaceModel
+from pydantic_ai.models.openai import OpenAIModel
+from src.agent_factory.judges import get_model
+@pytest.fixture
+def mock_settings():
+    with patch("src.agent_factory.judges.settings", autospec=True) as mock_settings:
+        yield mock_settings
+def test_get_model_openai(mock_settings):
+    """Test that OpenAI model is returned when provider is openai."""
+    mock_settings.llm_provider = "openai"
+    mock_settings.openai_api_key = "sk-test"
+    mock_settings.openai_model = "gpt-5.1"
+    model = get_model()
+    assert isinstance(model, OpenAIModel)
+    assert model.model_name == "gpt-5.1"
+def test_get_model_anthropic(mock_settings):
+    """Test that Anthropic model is returned when provider is anthropic."""
+    mock_settings.llm_provider = "anthropic"
+    mock_settings.anthropic_api_key = "sk-ant-test"
+    mock_settings.anthropic_model = "claude-sonnet-4-5-20250929"
+    model = get_model()
+    assert isinstance(model, AnthropicModel)
+    assert model.model_name == "claude-sonnet-4-5-20250929"
+def test_get_model_huggingface(mock_settings):
+    """Test that HuggingFace model is returned when provider is huggingface."""
+    mock_settings.llm_provider = "huggingface"
+    mock_settings.hf_token = "hf_test_token"
+    mock_settings.huggingface_model = "meta-llama/Llama-3.1-70B-Instruct"
+    model = get_model()
+    assert isinstance(model, HuggingFaceModel)
+    assert model.model_name == "meta-llama/Llama-3.1-70B-Instruct"
+def test_get_model_default_fallback(mock_settings):
+    """Test fallback to OpenAI if provider is unknown."""
+    mock_settings.llm_provider = "unknown_provider"
+    mock_settings.openai_api_key = "sk-test"
+    mock_settings.openai_model = "gpt-5.1"
+    model = get_model()
+    assert isinstance(model, OpenAIModel)