Spaces:
Running
Running
Merge branch 'feature/iterative-deep-research-workflows' of https://github.com/Josephrp/DeepCritical-HFSpace into feature/iterative-deep-research-workflows
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .env.example +12 -8
- .gitignore +5 -0
- docs/brainstorming/00_ROADMAP_SUMMARY.md +194 -0
- docs/brainstorming/01_PUBMED_IMPROVEMENTS.md +125 -0
- docs/brainstorming/02_CLINICALTRIALS_IMPROVEMENTS.md +193 -0
- docs/brainstorming/03_EUROPEPMC_IMPROVEMENTS.md +211 -0
- docs/brainstorming/04_OPENALEX_INTEGRATION.md +303 -0
- docs/brainstorming/implementation/15_PHASE_OPENALEX.md +603 -0
- docs/brainstorming/implementation/16_PHASE_PUBMED_FULLTEXT.md +586 -0
- docs/brainstorming/implementation/17_PHASE_RATE_LIMITING.md +540 -0
- docs/brainstorming/implementation/README.md +143 -0
- docs/brainstorming/magentic-pydantic/00_SITUATION_AND_PLAN.md +189 -0
- docs/brainstorming/magentic-pydantic/01_ARCHITECTURE_SPEC.md +289 -0
- docs/brainstorming/magentic-pydantic/02_IMPLEMENTATION_PHASES.md +112 -0
- docs/brainstorming/magentic-pydantic/03_IMMEDIATE_ACTIONS.md +112 -0
- docs/brainstorming/magentic-pydantic/04_FOLLOWUP_REVIEW_REQUEST.md +158 -0
- docs/brainstorming/magentic-pydantic/REVIEW_PROMPT_FOR_SENIOR_AGENT.md +113 -0
- docs/bugs/FIX_PLAN_MAGENTIC_MODE.md +227 -0
- docs/bugs/P0_ACTIONABLE_FIXES.md +0 -281
- docs/bugs/P0_CRITICAL_BUGS.md +0 -298
- docs/bugs/P0_MAGENTIC_AND_SEARCH_AUDIT.md +0 -249
- docs/bugs/P0_MAGENTIC_MODE_BROKEN.md +116 -0
- docs/bugs/P1_GRADIO_SETTINGS_CLEANUP.md +81 -0
- docs/bugs/PHASE_00_IMPLEMENTATION_ORDER.md +0 -156
- docs/bugs/PHASE_01_REPLACE_BIORXIV.md +0 -371
- docs/bugs/PHASE_02_PUBMED_QUERY_PREPROCESSING.md +0 -355
- docs/bugs/PHASE_03_CLINICALTRIALS_FILTERING.md +0 -386
- examples/rate_limiting_demo.py +82 -0
- pyproject.toml +3 -1
- requirements.txt +7 -0
- src/agent_factory/judges.py +9 -1
- src/agents/code_executor_agent.py +69 -0
- src/agents/judge_agent_llm.py +45 -0
- src/agents/magentic_agents.py +1 -1
- src/agents/retrieval_agent.py +82 -0
- src/app.py +60 -69
- src/middleware/sub_iteration.py +135 -0
- src/orchestrator_factory.py +40 -15
- src/orchestrator_hierarchical.py +95 -0
- src/orchestrator_magentic.py +35 -4
- src/state/__init__.py +9 -0
- src/tools/__init__.py +2 -0
- src/tools/pubmed.py +5 -9
- src/tools/rate_limiter.py +121 -0
- src/tools/web_search.py +53 -0
- src/utils/config.py +14 -2
- src/utils/models.py +4 -0
- tests/integration/test_dual_mode_e2e.py +82 -0
- tests/integration/test_modal.py +11 -2
- tests/unit/agent_factory/test_judges_factory.py +64 -0
.env.example
CHANGED
|
@@ -7,9 +7,17 @@ LLM_PROVIDER=openai
|
|
| 7 |
OPENAI_API_KEY=sk-your-key-here
|
| 8 |
ANTHROPIC_API_KEY=sk-ant-your-key-here
|
| 9 |
|
| 10 |
-
# Model names (optional - sensible defaults)
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
# ============== HUGGINGFACE (FREE TIER) ==============
|
| 15 |
|
|
@@ -20,7 +28,7 @@ ANTHROPIC_MODEL=claude-sonnet-4-5-20250929
|
|
| 20 |
# WITH HF_TOKEN: Uses Llama 3.1 8B Instruct (requires accepting license)
|
| 21 |
#
|
| 22 |
# For HuggingFace Spaces deployment:
|
| 23 |
-
# Set this as a "Secret" in Space Settings
|
| 24 |
# Users/judges don't need their own token - the Space secret is used
|
| 25 |
#
|
| 26 |
HF_TOKEN=hf_your-token-here
|
|
@@ -36,9 +44,5 @@ LOG_LEVEL=INFO
|
|
| 36 |
# PubMed (optional - higher rate limits)
|
| 37 |
NCBI_API_KEY=your-ncbi-key-here
|
| 38 |
|
| 39 |
-
# Modal Sandbox (optional - for secure code execution)
|
| 40 |
-
MODAL_TOKEN_ID=ak-your-modal-token-id-here
|
| 41 |
-
MODAL_TOKEN_SECRET=your-modal-token-secret-here
|
| 42 |
-
|
| 43 |
# Vector Database (optional - for LlamaIndex RAG)
|
| 44 |
CHROMA_DB_PATH=./chroma_db
|
|
|
|
| 7 |
OPENAI_API_KEY=sk-your-key-here
|
| 8 |
ANTHROPIC_API_KEY=sk-ant-your-key-here
|
| 9 |
|
| 10 |
+
# Model names (optional - sensible defaults set in config.py)
|
| 11 |
+
# ANTHROPIC_MODEL=claude-sonnet-4-5-20250929
|
| 12 |
+
# OPENAI_MODEL=gpt-5.1
|
| 13 |
+
|
| 14 |
+
# ============== EMBEDDINGS ==============
|
| 15 |
+
|
| 16 |
+
# OpenAI Embedding Model (used if LLM_PROVIDER is openai and performing RAG/Embeddings)
|
| 17 |
+
OPENAI_EMBEDDING_MODEL=text-embedding-3-small
|
| 18 |
+
|
| 19 |
+
# Local Embedding Model (used for local/offline embeddings)
|
| 20 |
+
LOCAL_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
|
| 21 |
|
| 22 |
# ============== HUGGINGFACE (FREE TIER) ==============
|
| 23 |
|
|
|
|
| 28 |
# WITH HF_TOKEN: Uses Llama 3.1 8B Instruct (requires accepting license)
|
| 29 |
#
|
| 30 |
# For HuggingFace Spaces deployment:
|
| 31 |
+
# Set this as a "Secret" in Space Settings -> Variables and secrets
|
| 32 |
# Users/judges don't need their own token - the Space secret is used
|
| 33 |
#
|
| 34 |
HF_TOKEN=hf_your-token-here
|
|
|
|
| 44 |
# PubMed (optional - higher rate limits)
|
| 45 |
NCBI_API_KEY=your-ncbi-key-here
|
| 46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
# Vector Database (optional - for LlamaIndex RAG)
|
| 48 |
CHROMA_DB_PATH=./chroma_db
|
.gitignore
CHANGED
|
@@ -69,4 +69,9 @@ logs/
|
|
| 69 |
.mypy_cache/
|
| 70 |
.coverage
|
| 71 |
htmlcov/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
# Trigger rebuild Wed Nov 26 17:51:41 EST 2025
|
|
|
|
| 69 |
.mypy_cache/
|
| 70 |
.coverage
|
| 71 |
htmlcov/
|
| 72 |
+
|
| 73 |
+
# Database files
|
| 74 |
+
chroma_db/
|
| 75 |
+
*.sqlite3
|
| 76 |
+
|
| 77 |
# Trigger rebuild Wed Nov 26 17:51:41 EST 2025
|
docs/brainstorming/00_ROADMAP_SUMMARY.md
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# DeepCritical Data Sources: Roadmap Summary
|
| 2 |
+
|
| 3 |
+
**Created**: 2024-11-27
|
| 4 |
+
**Purpose**: Future maintainability and hackathon continuation
|
| 5 |
+
|
| 6 |
+
---
|
| 7 |
+
|
| 8 |
+
## Current State
|
| 9 |
+
|
| 10 |
+
### Working Tools
|
| 11 |
+
|
| 12 |
+
| Tool | Status | Data Quality |
|
| 13 |
+
|------|--------|--------------|
|
| 14 |
+
| PubMed | ✅ Works | Good (abstracts only) |
|
| 15 |
+
| ClinicalTrials.gov | ✅ Works | Good (filtered for interventional) |
|
| 16 |
+
| Europe PMC | ✅ Works | Good (includes preprints) |
|
| 17 |
+
|
| 18 |
+
### Removed Tools
|
| 19 |
+
|
| 20 |
+
| Tool | Status | Reason |
|
| 21 |
+
|------|--------|--------|
|
| 22 |
+
| bioRxiv | ❌ Removed | No search API - only date/DOI lookup |
|
| 23 |
+
|
| 24 |
+
---
|
| 25 |
+
|
| 26 |
+
## Priority Improvements
|
| 27 |
+
|
| 28 |
+
### P0: Critical (Do First)
|
| 29 |
+
|
| 30 |
+
1. **Add Rate Limiting to PubMed**
|
| 31 |
+
- NCBI will block us without it
|
| 32 |
+
- Use `limits` library (see reference repo)
|
| 33 |
+
- 3/sec without key, 10/sec with key
|
| 34 |
+
|
| 35 |
+
### P1: High Value, Medium Effort
|
| 36 |
+
|
| 37 |
+
2. **Add OpenAlex as 4th Source**
|
| 38 |
+
- Citation network (huge for drug repurposing)
|
| 39 |
+
- Concept tagging (semantic discovery)
|
| 40 |
+
- Already implemented in reference repo
|
| 41 |
+
- Free, no API key
|
| 42 |
+
|
| 43 |
+
3. **PubMed Full-Text via BioC**
|
| 44 |
+
- Get full paper text for PMC papers
|
| 45 |
+
- Already in reference repo
|
| 46 |
+
|
| 47 |
+
### P2: Nice to Have
|
| 48 |
+
|
| 49 |
+
4. **ClinicalTrials.gov Results**
|
| 50 |
+
- Get efficacy data from completed trials
|
| 51 |
+
- Requires more complex API calls
|
| 52 |
+
|
| 53 |
+
5. **Europe PMC Annotations**
|
| 54 |
+
- Text-mined entities (genes, drugs, diseases)
|
| 55 |
+
- Automatic entity extraction
|
| 56 |
+
|
| 57 |
+
---
|
| 58 |
+
|
| 59 |
+
## Effort Estimates
|
| 60 |
+
|
| 61 |
+
| Improvement | Effort | Impact | Priority |
|
| 62 |
+
|-------------|--------|--------|----------|
|
| 63 |
+
| PubMed rate limiting | 1 hour | Stability | P0 |
|
| 64 |
+
| OpenAlex basic search | 2 hours | High | P1 |
|
| 65 |
+
| OpenAlex citations | 2 hours | Very High | P1 |
|
| 66 |
+
| PubMed full-text | 3 hours | Medium | P1 |
|
| 67 |
+
| CT.gov results | 4 hours | Medium | P2 |
|
| 68 |
+
| Europe PMC annotations | 3 hours | Medium | P2 |
|
| 69 |
+
|
| 70 |
+
---
|
| 71 |
+
|
| 72 |
+
## Architecture Decision
|
| 73 |
+
|
| 74 |
+
### Option A: Keep Current + Add OpenAlex
|
| 75 |
+
|
| 76 |
+
```
|
| 77 |
+
User Query
|
| 78 |
+
↓
|
| 79 |
+
┌───────────────────┼───────────────────┐
|
| 80 |
+
↓ ↓ ↓
|
| 81 |
+
PubMed ClinicalTrials Europe PMC
|
| 82 |
+
(abstracts) (trials only) (preprints)
|
| 83 |
+
↓ ↓ ↓
|
| 84 |
+
└───────────────────┼───────────────────┘
|
| 85 |
+
↓
|
| 86 |
+
OpenAlex ← NEW
|
| 87 |
+
(citations, concepts)
|
| 88 |
+
↓
|
| 89 |
+
Orchestrator
|
| 90 |
+
↓
|
| 91 |
+
Report
|
| 92 |
+
```
|
| 93 |
+
|
| 94 |
+
**Pros**: Low risk, additive
|
| 95 |
+
**Cons**: More complexity, some overlap
|
| 96 |
+
|
| 97 |
+
### Option B: OpenAlex as Primary
|
| 98 |
+
|
| 99 |
+
```
|
| 100 |
+
User Query
|
| 101 |
+
↓
|
| 102 |
+
┌───────────────────┼───────────────────┐
|
| 103 |
+
↓ ↓ ↓
|
| 104 |
+
OpenAlex ClinicalTrials Europe PMC
|
| 105 |
+
(primary (trials only) (full-text
|
| 106 |
+
search) fallback)
|
| 107 |
+
↓ ↓ ↓
|
| 108 |
+
└───────────────────┼───────────────────┘
|
| 109 |
+
↓
|
| 110 |
+
Orchestrator
|
| 111 |
+
↓
|
| 112 |
+
Report
|
| 113 |
+
```
|
| 114 |
+
|
| 115 |
+
**Pros**: Simpler, citation network built-in
|
| 116 |
+
**Cons**: Lose some PubMed-specific features
|
| 117 |
+
|
| 118 |
+
### Recommendation: Option A
|
| 119 |
+
|
| 120 |
+
Keep current architecture working, add OpenAlex incrementally.
|
| 121 |
+
|
| 122 |
+
---
|
| 123 |
+
|
| 124 |
+
## Quick Wins (Can Do Today)
|
| 125 |
+
|
| 126 |
+
1. **Add `limits` to `pyproject.toml`**
|
| 127 |
+
```toml
|
| 128 |
+
dependencies = [
|
| 129 |
+
"limits>=3.0",
|
| 130 |
+
]
|
| 131 |
+
```
|
| 132 |
+
|
| 133 |
+
2. **Copy OpenAlex tool from reference repo**
|
| 134 |
+
- File: `reference_repos/DeepCritical/DeepResearch/src/tools/openalex_tools.py`
|
| 135 |
+
- Adapt to our `SearchTool` base class
|
| 136 |
+
|
| 137 |
+
3. **Enable NCBI API Key**
|
| 138 |
+
- Add to `.env`: `NCBI_API_KEY=your_key`
|
| 139 |
+
- 10x rate limit improvement
|
| 140 |
+
|
| 141 |
+
---
|
| 142 |
+
|
| 143 |
+
## External Resources Worth Exploring
|
| 144 |
+
|
| 145 |
+
### Python Libraries
|
| 146 |
+
|
| 147 |
+
| Library | For | Notes |
|
| 148 |
+
|---------|-----|-------|
|
| 149 |
+
| `limits` | Rate limiting | Used by reference repo |
|
| 150 |
+
| `pyalex` | OpenAlex wrapper | [GitHub](https://github.com/J535D165/pyalex) |
|
| 151 |
+
| `metapub` | PubMed | Full-featured |
|
| 152 |
+
| `sentence-transformers` | Semantic search | For embeddings |
|
| 153 |
+
|
| 154 |
+
### APIs Not Yet Used
|
| 155 |
+
|
| 156 |
+
| API | Provides | Effort |
|
| 157 |
+
|-----|----------|--------|
|
| 158 |
+
| RxNorm | Drug name normalization | Low |
|
| 159 |
+
| DrugBank | Drug targets/mechanisms | Medium (license) |
|
| 160 |
+
| UniProt | Protein data | Medium |
|
| 161 |
+
| ChEMBL | Bioactivity data | Medium |
|
| 162 |
+
|
| 163 |
+
### RAG Tools (Future)
|
| 164 |
+
|
| 165 |
+
| Tool | Purpose |
|
| 166 |
+
|------|---------|
|
| 167 |
+
| [PaperQA](https://github.com/Future-House/paper-qa) | RAG for scientific papers |
|
| 168 |
+
| [txtai](https://github.com/neuml/txtai) | Embeddings + search |
|
| 169 |
+
| [PubMedBERT](https://huggingface.co/NeuML/pubmedbert-base-embeddings) | Biomedical embeddings |
|
| 170 |
+
|
| 171 |
+
---
|
| 172 |
+
|
| 173 |
+
## Files in This Directory
|
| 174 |
+
|
| 175 |
+
| File | Contents |
|
| 176 |
+
|------|----------|
|
| 177 |
+
| `00_ROADMAP_SUMMARY.md` | This file |
|
| 178 |
+
| `01_PUBMED_IMPROVEMENTS.md` | PubMed enhancement details |
|
| 179 |
+
| `02_CLINICALTRIALS_IMPROVEMENTS.md` | ClinicalTrials.gov details |
|
| 180 |
+
| `03_EUROPEPMC_IMPROVEMENTS.md` | Europe PMC details |
|
| 181 |
+
| `04_OPENALEX_INTEGRATION.md` | OpenAlex integration plan |
|
| 182 |
+
|
| 183 |
+
---
|
| 184 |
+
|
| 185 |
+
## For Future Maintainers
|
| 186 |
+
|
| 187 |
+
If you're picking this up after the hackathon:
|
| 188 |
+
|
| 189 |
+
1. **Start with OpenAlex** - biggest bang for buck
|
| 190 |
+
2. **Add rate limiting** - prevents API blocks
|
| 191 |
+
3. **Don't bother with bioRxiv** - use Europe PMC instead
|
| 192 |
+
4. **Reference repo is gold** - `reference_repos/DeepCritical/` has working implementations
|
| 193 |
+
|
| 194 |
+
Good luck! 🚀
|
docs/brainstorming/01_PUBMED_IMPROVEMENTS.md
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# PubMed Tool: Current State & Future Improvements
|
| 2 |
+
|
| 3 |
+
**Status**: Currently Implemented
|
| 4 |
+
**Priority**: High (Core Data Source)
|
| 5 |
+
|
| 6 |
+
---
|
| 7 |
+
|
| 8 |
+
## Current Implementation
|
| 9 |
+
|
| 10 |
+
### What We Have (`src/tools/pubmed.py`)
|
| 11 |
+
|
| 12 |
+
- Basic E-utilities search via `esearch.fcgi` and `efetch.fcgi`
|
| 13 |
+
- Query preprocessing (strips question words, expands synonyms)
|
| 14 |
+
- Returns: title, abstract, authors, journal, PMID
|
| 15 |
+
- Rate limiting: None implemented (relying on NCBI defaults)
|
| 16 |
+
|
| 17 |
+
### Current Limitations
|
| 18 |
+
|
| 19 |
+
1. **No Full-Text Access**: Only retrieves abstracts, not full paper text
|
| 20 |
+
2. **No Rate Limiting**: Risk of being blocked by NCBI
|
| 21 |
+
3. **No BioC Format**: Missing structured full-text extraction
|
| 22 |
+
4. **No Figure Retrieval**: No supplementary materials access
|
| 23 |
+
5. **No PMC Integration**: Missing open-access full-text via PMC
|
| 24 |
+
|
| 25 |
+
---
|
| 26 |
+
|
| 27 |
+
## Reference Implementation (DeepCritical Reference Repo)
|
| 28 |
+
|
| 29 |
+
The reference repo at `reference_repos/DeepCritical/DeepResearch/src/tools/bioinformatics_tools.py` has a more sophisticated implementation:
|
| 30 |
+
|
| 31 |
+
### Features We're Missing
|
| 32 |
+
|
| 33 |
+
```python
|
| 34 |
+
# Rate limiting (lines 47-50)
|
| 35 |
+
from limits import parse
|
| 36 |
+
from limits.storage import MemoryStorage
|
| 37 |
+
from limits.strategies import MovingWindowRateLimiter
|
| 38 |
+
|
| 39 |
+
storage = MemoryStorage()
|
| 40 |
+
limiter = MovingWindowRateLimiter(storage)
|
| 41 |
+
rate_limit = parse("3/second") # NCBI allows 3/sec without API key, 10/sec with
|
| 42 |
+
|
| 43 |
+
# Full-text via BioC format (lines 108-120)
|
| 44 |
+
def _get_fulltext(pmid: int) -> dict[str, Any] | None:
|
| 45 |
+
pmid_url = f"https://www.ncbi.nlm.nih.gov/research/bionlp/RESTful/pmcoa.cgi/BioC_json/{pmid}/unicode"
|
| 46 |
+
# Returns structured JSON with full text for open-access papers
|
| 47 |
+
|
| 48 |
+
# Figure retrieval via Europe PMC (lines 123-149)
|
| 49 |
+
def _get_figures(pmcid: str) -> dict[str, str]:
|
| 50 |
+
suppl_url = f"https://www.ebi.ac.uk/europepmc/webservices/rest/{pmcid}/supplementaryFiles"
|
| 51 |
+
# Returns base64-encoded images from supplementary materials
|
| 52 |
+
```
|
| 53 |
+
|
| 54 |
+
---
|
| 55 |
+
|
| 56 |
+
## Recommended Improvements
|
| 57 |
+
|
| 58 |
+
### Phase 1: Rate Limiting (Critical)
|
| 59 |
+
|
| 60 |
+
```python
|
| 61 |
+
# Add to src/tools/pubmed.py
|
| 62 |
+
from limits import parse
|
| 63 |
+
from limits.storage import MemoryStorage
|
| 64 |
+
from limits.strategies import MovingWindowRateLimiter
|
| 65 |
+
|
| 66 |
+
storage = MemoryStorage()
|
| 67 |
+
limiter = MovingWindowRateLimiter(storage)
|
| 68 |
+
|
| 69 |
+
# With NCBI_API_KEY: 10/sec, without: 3/sec
|
| 70 |
+
def get_rate_limit():
|
| 71 |
+
if settings.ncbi_api_key:
|
| 72 |
+
return parse("10/second")
|
| 73 |
+
return parse("3/second")
|
| 74 |
+
```
|
| 75 |
+
|
| 76 |
+
**Dependencies**: `pip install limits`
|
| 77 |
+
|
| 78 |
+
### Phase 2: Full-Text Retrieval
|
| 79 |
+
|
| 80 |
+
```python
|
| 81 |
+
async def get_fulltext(pmid: str) -> str | None:
|
| 82 |
+
"""Get full text for open-access papers via BioC API."""
|
| 83 |
+
url = f"https://www.ncbi.nlm.nih.gov/research/bionlp/RESTful/pmcoa.cgi/BioC_json/{pmid}/unicode"
|
| 84 |
+
# Only works for PMC papers (open access)
|
| 85 |
+
```
|
| 86 |
+
|
| 87 |
+
### Phase 3: PMC ID Resolution
|
| 88 |
+
|
| 89 |
+
```python
|
| 90 |
+
async def get_pmc_id(pmid: str) -> str | None:
|
| 91 |
+
"""Convert PMID to PMCID for full-text access."""
|
| 92 |
+
url = f"https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/?ids={pmid}&format=json"
|
| 93 |
+
```
|
| 94 |
+
|
| 95 |
+
---
|
| 96 |
+
|
| 97 |
+
## Python Libraries to Consider
|
| 98 |
+
|
| 99 |
+
| Library | Purpose | Notes |
|
| 100 |
+
|---------|---------|-------|
|
| 101 |
+
| [Biopython](https://biopython.org/) | `Bio.Entrez` module | Official, well-maintained |
|
| 102 |
+
| [PyMed](https://pypi.org/project/pymed/) | PubMed wrapper | Simpler API, less control |
|
| 103 |
+
| [metapub](https://pypi.org/project/metapub/) | Full-featured | Tested on 1/3 of PubMed |
|
| 104 |
+
| [limits](https://pypi.org/project/limits/) | Rate limiting | Used by reference repo |
|
| 105 |
+
|
| 106 |
+
---
|
| 107 |
+
|
| 108 |
+
## API Endpoints Reference
|
| 109 |
+
|
| 110 |
+
| Endpoint | Purpose | Rate Limit |
|
| 111 |
+
|----------|---------|------------|
|
| 112 |
+
| `esearch.fcgi` | Search for PMIDs | 3/sec (10 with key) |
|
| 113 |
+
| `efetch.fcgi` | Fetch metadata | 3/sec (10 with key) |
|
| 114 |
+
| `esummary.fcgi` | Quick metadata | 3/sec (10 with key) |
|
| 115 |
+
| `pmcoa.cgi/BioC_json` | Full text (PMC only) | Unknown |
|
| 116 |
+
| `idconv/v1.0` | PMID ↔ PMCID | Unknown |
|
| 117 |
+
|
| 118 |
+
---
|
| 119 |
+
|
| 120 |
+
## Sources
|
| 121 |
+
|
| 122 |
+
- [PubMed E-utilities Documentation](https://www.ncbi.nlm.nih.gov/books/NBK25501/)
|
| 123 |
+
- [NCBI BioC API](https://www.ncbi.nlm.nih.gov/research/bionlp/APIs/)
|
| 124 |
+
- [Searching PubMed with Python](https://marcobonzanini.com/2015/01/12/searching-pubmed-with-python/)
|
| 125 |
+
- [PyMed on PyPI](https://pypi.org/project/pymed/)
|
docs/brainstorming/02_CLINICALTRIALS_IMPROVEMENTS.md
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ClinicalTrials.gov Tool: Current State & Future Improvements
|
| 2 |
+
|
| 3 |
+
**Status**: Currently Implemented
|
| 4 |
+
**Priority**: High (Core Data Source for Drug Repurposing)
|
| 5 |
+
|
| 6 |
+
---
|
| 7 |
+
|
| 8 |
+
## Current Implementation
|
| 9 |
+
|
| 10 |
+
### What We Have (`src/tools/clinicaltrials.py`)
|
| 11 |
+
|
| 12 |
+
- V2 API search via `clinicaltrials.gov/api/v2/studies`
|
| 13 |
+
- Filters: `INTERVENTIONAL` study type, `RECRUITING` status
|
| 14 |
+
- Returns: NCT ID, title, conditions, interventions, phase, status
|
| 15 |
+
- Query preprocessing via shared `query_utils.py`
|
| 16 |
+
|
| 17 |
+
### Current Strengths
|
| 18 |
+
|
| 19 |
+
1. **Good Filtering**: Already filtering for interventional + recruiting
|
| 20 |
+
2. **V2 API**: Using the modern API (v1 deprecated)
|
| 21 |
+
3. **Phase Info**: Extracting trial phases for drug development context
|
| 22 |
+
|
| 23 |
+
### Current Limitations
|
| 24 |
+
|
| 25 |
+
1. **No Outcome Data**: Missing primary/secondary outcomes
|
| 26 |
+
2. **No Eligibility Criteria**: Missing inclusion/exclusion details
|
| 27 |
+
3. **No Sponsor Info**: Missing who's running the trial
|
| 28 |
+
4. **No Result Data**: For completed trials, no efficacy data
|
| 29 |
+
5. **Limited Drug Mapping**: No integration with drug databases
|
| 30 |
+
|
| 31 |
+
---
|
| 32 |
+
|
| 33 |
+
## API Capabilities We're Not Using
|
| 34 |
+
|
| 35 |
+
### Fields We Could Request
|
| 36 |
+
|
| 37 |
+
```python
|
| 38 |
+
# Current fields
|
| 39 |
+
fields = ["NCTId", "BriefTitle", "Condition", "InterventionName", "Phase", "OverallStatus"]
|
| 40 |
+
|
| 41 |
+
# Additional valuable fields
|
| 42 |
+
additional_fields = [
|
| 43 |
+
"PrimaryOutcomeMeasure", # What are they measuring?
|
| 44 |
+
"SecondaryOutcomeMeasure", # Secondary endpoints
|
| 45 |
+
"EligibilityCriteria", # Who can participate?
|
| 46 |
+
"LeadSponsorName", # Who's funding?
|
| 47 |
+
"ResultsFirstPostDate", # Has results?
|
| 48 |
+
"StudyFirstPostDate", # When started?
|
| 49 |
+
"CompletionDate", # When finished?
|
| 50 |
+
"EnrollmentCount", # Sample size
|
| 51 |
+
"InterventionDescription", # Drug details
|
| 52 |
+
"ArmGroupLabel", # Treatment arms
|
| 53 |
+
"InterventionOtherName", # Drug aliases
|
| 54 |
+
]
|
| 55 |
+
```
|
| 56 |
+
|
| 57 |
+
### Filter Enhancements
|
| 58 |
+
|
| 59 |
+
```python
|
| 60 |
+
# Current
|
| 61 |
+
aggFilters = "studyType:INTERVENTIONAL,status:RECRUITING"
|
| 62 |
+
|
| 63 |
+
# Could add
|
| 64 |
+
"status:RECRUITING,ACTIVE_NOT_RECRUITING,COMPLETED" # Include completed for results
|
| 65 |
+
"phase:PHASE2,PHASE3" # Only later-stage trials
|
| 66 |
+
"resultsFirstPostDateRange:2020-01-01_" # Trials with posted results
|
| 67 |
+
```
|
| 68 |
+
|
| 69 |
+
---
|
| 70 |
+
|
| 71 |
+
## Recommended Improvements
|
| 72 |
+
|
| 73 |
+
### Phase 1: Richer Metadata
|
| 74 |
+
|
| 75 |
+
```python
|
| 76 |
+
EXTENDED_FIELDS = [
|
| 77 |
+
"NCTId",
|
| 78 |
+
"BriefTitle",
|
| 79 |
+
"OfficialTitle",
|
| 80 |
+
"Condition",
|
| 81 |
+
"InterventionName",
|
| 82 |
+
"InterventionDescription",
|
| 83 |
+
"InterventionOtherName", # Drug synonyms!
|
| 84 |
+
"Phase",
|
| 85 |
+
"OverallStatus",
|
| 86 |
+
"PrimaryOutcomeMeasure",
|
| 87 |
+
"EnrollmentCount",
|
| 88 |
+
"LeadSponsorName",
|
| 89 |
+
"StudyFirstPostDate",
|
| 90 |
+
]
|
| 91 |
+
```
|
| 92 |
+
|
| 93 |
+
### Phase 2: Results Retrieval
|
| 94 |
+
|
| 95 |
+
For completed trials, we can get actual efficacy data:
|
| 96 |
+
|
| 97 |
+
```python
|
| 98 |
+
async def get_trial_results(nct_id: str) -> dict | None:
|
| 99 |
+
"""Fetch results for completed trials."""
|
| 100 |
+
url = f"https://clinicaltrials.gov/api/v2/studies/{nct_id}"
|
| 101 |
+
params = {
|
| 102 |
+
"fields": "ResultsSection",
|
| 103 |
+
}
|
| 104 |
+
# Returns outcome measures and statistics
|
| 105 |
+
```
|
| 106 |
+
|
| 107 |
+
### Phase 3: Drug Name Normalization
|
| 108 |
+
|
| 109 |
+
Map intervention names to standard identifiers:
|
| 110 |
+
|
| 111 |
+
```python
|
| 112 |
+
# Problem: "Metformin", "Metformin HCl", "Glucophage" are the same drug
|
| 113 |
+
# Solution: Use RxNorm or DrugBank for normalization
|
| 114 |
+
|
| 115 |
+
async def normalize_drug_name(intervention: str) -> str:
|
| 116 |
+
"""Normalize drug name via RxNorm API."""
|
| 117 |
+
url = f"https://rxnav.nlm.nih.gov/REST/rxcui.json?name={intervention}"
|
| 118 |
+
# Returns standardized RxCUI
|
| 119 |
+
```
|
| 120 |
+
|
| 121 |
+
---
|
| 122 |
+
|
| 123 |
+
## Integration Opportunities
|
| 124 |
+
|
| 125 |
+
### With PubMed
|
| 126 |
+
|
| 127 |
+
Cross-reference trials with publications:
|
| 128 |
+
```python
|
| 129 |
+
# ClinicalTrials.gov provides PMID links
|
| 130 |
+
# Can correlate trial results with published papers
|
| 131 |
+
```
|
| 132 |
+
|
| 133 |
+
### With DrugBank/ChEMBL
|
| 134 |
+
|
| 135 |
+
Map interventions to:
|
| 136 |
+
- Mechanism of action
|
| 137 |
+
- Known targets
|
| 138 |
+
- Adverse effects
|
| 139 |
+
- Drug-drug interactions
|
| 140 |
+
|
| 141 |
+
---
|
| 142 |
+
|
| 143 |
+
## Python Libraries to Consider
|
| 144 |
+
|
| 145 |
+
| Library | Purpose | Notes |
|
| 146 |
+
|---------|---------|-------|
|
| 147 |
+
| [pytrials](https://pypi.org/project/pytrials/) | CT.gov wrapper | V2 API support unclear |
|
| 148 |
+
| [clinicaltrials](https://github.com/ebmdatalab/clinicaltrials-act-tracker) | Data tracking | More for analysis |
|
| 149 |
+
| [drugbank-downloader](https://pypi.org/project/drugbank-downloader/) | Drug mapping | Requires license |
|
| 150 |
+
|
| 151 |
+
---
|
| 152 |
+
|
| 153 |
+
## API Quirks & Gotchas
|
| 154 |
+
|
| 155 |
+
1. **Rate Limiting**: Undocumented, be conservative
|
| 156 |
+
2. **Pagination**: Max 1000 results per request
|
| 157 |
+
3. **Field Names**: Case-sensitive, camelCase
|
| 158 |
+
4. **Empty Results**: Some fields may be null even if requested
|
| 159 |
+
5. **Status Changes**: Trials change status frequently
|
| 160 |
+
|
| 161 |
+
---
|
| 162 |
+
|
| 163 |
+
## Example Enhanced Query
|
| 164 |
+
|
| 165 |
+
```python
|
| 166 |
+
async def search_drug_repurposing_trials(
|
| 167 |
+
drug_name: str,
|
| 168 |
+
condition: str,
|
| 169 |
+
include_completed: bool = True,
|
| 170 |
+
) -> list[Evidence]:
|
| 171 |
+
"""Search for trials repurposing a drug for a new condition."""
|
| 172 |
+
|
| 173 |
+
statuses = ["RECRUITING", "ACTIVE_NOT_RECRUITING"]
|
| 174 |
+
if include_completed:
|
| 175 |
+
statuses.append("COMPLETED")
|
| 176 |
+
|
| 177 |
+
params = {
|
| 178 |
+
"query.intr": drug_name,
|
| 179 |
+
"query.cond": condition,
|
| 180 |
+
"filter.overallStatus": ",".join(statuses),
|
| 181 |
+
"filter.studyType": "INTERVENTIONAL",
|
| 182 |
+
"fields": ",".join(EXTENDED_FIELDS),
|
| 183 |
+
"pageSize": 50,
|
| 184 |
+
}
|
| 185 |
+
```
|
| 186 |
+
|
| 187 |
+
---
|
| 188 |
+
|
| 189 |
+
## Sources
|
| 190 |
+
|
| 191 |
+
- [ClinicalTrials.gov API Documentation](https://clinicaltrials.gov/data-api/api)
|
| 192 |
+
- [CT.gov Field Definitions](https://clinicaltrials.gov/data-api/about-api/study-data-structure)
|
| 193 |
+
- [RxNorm API](https://lhncbc.nlm.nih.gov/RxNav/APIs/api-RxNorm.findRxcuiByString.html)
|
docs/brainstorming/03_EUROPEPMC_IMPROVEMENTS.md
ADDED
|
@@ -0,0 +1,211 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Europe PMC Tool: Current State & Future Improvements
|
| 2 |
+
|
| 3 |
+
**Status**: Currently Implemented (Replaced bioRxiv)
|
| 4 |
+
**Priority**: High (Preprint + Open Access Source)
|
| 5 |
+
|
| 6 |
+
---
|
| 7 |
+
|
| 8 |
+
## Why Europe PMC Over bioRxiv?
|
| 9 |
+
|
| 10 |
+
### bioRxiv API Limitations (Why We Abandoned It)
|
| 11 |
+
|
| 12 |
+
1. **No Search API**: Only returns papers by date range or DOI
|
| 13 |
+
2. **No Query Capability**: Cannot search for "metformin cancer"
|
| 14 |
+
3. **Workaround Required**: Would need to download ALL preprints and build local search
|
| 15 |
+
4. **Known Issue**: [Gradio Issue #8861](https://github.com/gradio-app/gradio/issues/8861) documents the limitation
|
| 16 |
+
|
| 17 |
+
### Europe PMC Advantages
|
| 18 |
+
|
| 19 |
+
1. **Full Search API**: Boolean queries, filters, facets
|
| 20 |
+
2. **Aggregates bioRxiv**: Includes bioRxiv, medRxiv content anyway
|
| 21 |
+
3. **Includes PubMed**: Also has MEDLINE content
|
| 22 |
+
4. **34 Preprint Servers**: Not just bioRxiv
|
| 23 |
+
5. **Open Access Focus**: Full-text when available
|
| 24 |
+
|
| 25 |
+
---
|
| 26 |
+
|
| 27 |
+
## Current Implementation
|
| 28 |
+
|
| 29 |
+
### What We Have (`src/tools/europepmc.py`)
|
| 30 |
+
|
| 31 |
+
- REST API search via `europepmc.org/webservices/rest/search`
|
| 32 |
+
- Preprint flagging via `firstPublicationDate` heuristics
|
| 33 |
+
- Returns: title, abstract, authors, DOI, source
|
| 34 |
+
- Marks preprints for transparency
|
| 35 |
+
|
| 36 |
+
### Current Limitations
|
| 37 |
+
|
| 38 |
+
1. **No Full-Text Retrieval**: Only metadata/abstracts
|
| 39 |
+
2. **No Citation Network**: Missing references/citations
|
| 40 |
+
3. **No Supplementary Files**: Not fetching figures/data
|
| 41 |
+
4. **Basic Preprint Detection**: Heuristic, not explicit flag
|
| 42 |
+
|
| 43 |
+
---
|
| 44 |
+
|
| 45 |
+
## Europe PMC API Capabilities
|
| 46 |
+
|
| 47 |
+
### Endpoints We Could Use
|
| 48 |
+
|
| 49 |
+
| Endpoint | Purpose | Currently Using |
|
| 50 |
+
|----------|---------|-----------------|
|
| 51 |
+
| `/search` | Query papers | Yes |
|
| 52 |
+
| `/fulltext/{ID}` | Full text (XML/JSON) | No |
|
| 53 |
+
| `/{PMCID}/supplementaryFiles` | Figures, data | No |
|
| 54 |
+
| `/citations/{ID}` | Who cited this | No |
|
| 55 |
+
| `/references/{ID}` | What this cites | No |
|
| 56 |
+
| `/annotations` | Text-mined entities | No |
|
| 57 |
+
|
| 58 |
+
### Rich Query Syntax
|
| 59 |
+
|
| 60 |
+
```python
|
| 61 |
+
# Current simple query
|
| 62 |
+
query = "metformin cancer"
|
| 63 |
+
|
| 64 |
+
# Could use advanced syntax
|
| 65 |
+
query = "(TITLE:metformin OR ABSTRACT:metformin) AND (cancer OR oncology)"
|
| 66 |
+
query += " AND (SRC:PPR)" # Only preprints
|
| 67 |
+
query += " AND (FIRST_PDATE:[2023-01-01 TO 2024-12-31])" # Date range
|
| 68 |
+
query += " AND (OPEN_ACCESS:y)" # Only open access
|
| 69 |
+
```
|
| 70 |
+
|
| 71 |
+
### Source Filters
|
| 72 |
+
|
| 73 |
+
```python
|
| 74 |
+
# Filter by source
|
| 75 |
+
"SRC:MED" # MEDLINE
|
| 76 |
+
"SRC:PMC" # PubMed Central
|
| 77 |
+
"SRC:PPR" # Preprints (bioRxiv, medRxiv, etc.)
|
| 78 |
+
"SRC:AGR" # Agricola
|
| 79 |
+
"SRC:CBA" # Chinese Biological Abstracts
|
| 80 |
+
```
|
| 81 |
+
|
| 82 |
+
---
|
| 83 |
+
|
| 84 |
+
## Recommended Improvements
|
| 85 |
+
|
| 86 |
+
### Phase 1: Rich Metadata
|
| 87 |
+
|
| 88 |
+
```python
|
| 89 |
+
# Add to search results
|
| 90 |
+
additional_fields = [
|
| 91 |
+
"citedByCount", # Impact indicator
|
| 92 |
+
"source", # Explicit source (MED, PMC, PPR)
|
| 93 |
+
"isOpenAccess", # Boolean flag
|
| 94 |
+
"fullTextUrlList", # URLs for full text
|
| 95 |
+
"authorAffiliations", # Institution info
|
| 96 |
+
"grantsList", # Funding info
|
| 97 |
+
]
|
| 98 |
+
```
|
| 99 |
+
|
| 100 |
+
### Phase 2: Full-Text Retrieval
|
| 101 |
+
|
| 102 |
+
```python
|
| 103 |
+
async def get_fulltext(pmcid: str) -> str | None:
|
| 104 |
+
"""Get full text for open access papers."""
|
| 105 |
+
# XML format
|
| 106 |
+
url = f"https://www.ebi.ac.uk/europepmc/webservices/rest/{pmcid}/fullTextXML"
|
| 107 |
+
# Or JSON
|
| 108 |
+
url = f"https://www.ebi.ac.uk/europepmc/webservices/rest/{pmcid}/fullTextJSON"
|
| 109 |
+
```
|
| 110 |
+
|
| 111 |
+
### Phase 3: Citation Network
|
| 112 |
+
|
| 113 |
+
```python
|
| 114 |
+
async def get_citations(pmcid: str) -> list[str]:
|
| 115 |
+
"""Get papers that cite this one."""
|
| 116 |
+
url = f"https://www.ebi.ac.uk/europepmc/webservices/rest/{pmcid}/citations"
|
| 117 |
+
|
| 118 |
+
async def get_references(pmcid: str) -> list[str]:
|
| 119 |
+
"""Get papers this one cites."""
|
| 120 |
+
url = f"https://www.ebi.ac.uk/europepmc/webservices/rest/{pmcid}/references"
|
| 121 |
+
```
|
| 122 |
+
|
| 123 |
+
### Phase 4: Text-Mined Annotations
|
| 124 |
+
|
| 125 |
+
Europe PMC extracts entities automatically:
|
| 126 |
+
|
| 127 |
+
```python
|
| 128 |
+
async def get_annotations(pmcid: str) -> dict:
|
| 129 |
+
"""Get text-mined entities (genes, diseases, drugs)."""
|
| 130 |
+
url = f"https://www.ebi.ac.uk/europepmc/annotations_api/annotationsByArticleIds"
|
| 131 |
+
params = {
|
| 132 |
+
"articleIds": f"PMC:{pmcid}",
|
| 133 |
+
"type": "Gene_Proteins,Diseases,Chemicals",
|
| 134 |
+
"format": "JSON",
|
| 135 |
+
}
|
| 136 |
+
# Returns structured entity mentions with positions
|
| 137 |
+
```
|
| 138 |
+
|
| 139 |
+
---
|
| 140 |
+
|
| 141 |
+
## Supplementary File Retrieval
|
| 142 |
+
|
| 143 |
+
From reference repo (`bioinformatics_tools.py` lines 123-149):
|
| 144 |
+
|
| 145 |
+
```python
|
| 146 |
+
def get_figures(pmcid: str) -> dict[str, str]:
|
| 147 |
+
"""Download figures and supplementary files."""
|
| 148 |
+
url = f"https://www.ebi.ac.uk/europepmc/webservices/rest/{pmcid}/supplementaryFiles?includeInlineImage=true"
|
| 149 |
+
# Returns ZIP with images, returns base64-encoded
|
| 150 |
+
```
|
| 151 |
+
|
| 152 |
+
---
|
| 153 |
+
|
| 154 |
+
## Preprint-Specific Features
|
| 155 |
+
|
| 156 |
+
### Identify Preprint Servers
|
| 157 |
+
|
| 158 |
+
```python
|
| 159 |
+
PREPRINT_SOURCES = {
|
| 160 |
+
"PPR": "General preprints",
|
| 161 |
+
"bioRxiv": "Biology preprints",
|
| 162 |
+
"medRxiv": "Medical preprints",
|
| 163 |
+
"chemRxiv": "Chemistry preprints",
|
| 164 |
+
"Research Square": "Multi-disciplinary",
|
| 165 |
+
"Preprints.org": "MDPI preprints",
|
| 166 |
+
}
|
| 167 |
+
|
| 168 |
+
# Check if published version exists
|
| 169 |
+
async def check_published_version(preprint_doi: str) -> str | None:
|
| 170 |
+
"""Check if preprint has been peer-reviewed and published."""
|
| 171 |
+
# Europe PMC links preprints to final versions
|
| 172 |
+
```
|
| 173 |
+
|
| 174 |
+
---
|
| 175 |
+
|
| 176 |
+
## Rate Limiting
|
| 177 |
+
|
| 178 |
+
Europe PMC is more generous than NCBI:
|
| 179 |
+
|
| 180 |
+
```python
|
| 181 |
+
# No documented hard limit, but be respectful
|
| 182 |
+
# Recommend: 10-20 requests/second max
|
| 183 |
+
# Use email in User-Agent for polite pool
|
| 184 |
+
headers = {
|
| 185 |
+
"User-Agent": "DeepCritical/1.0 (mailto:your@email.com)"
|
| 186 |
+
}
|
| 187 |
+
```
|
| 188 |
+
|
| 189 |
+
---
|
| 190 |
+
|
| 191 |
+
## vs. The Lens & OpenAlex
|
| 192 |
+
|
| 193 |
+
| Feature | Europe PMC | The Lens | OpenAlex |
|
| 194 |
+
|---------|------------|----------|----------|
|
| 195 |
+
| Biomedical Focus | Yes | Partial | Partial |
|
| 196 |
+
| Preprints | Yes (34 servers) | Yes | Yes |
|
| 197 |
+
| Full Text | PMC papers | Links | No |
|
| 198 |
+
| Citations | Yes | Yes | Yes |
|
| 199 |
+
| Annotations | Yes (text-mined) | No | No |
|
| 200 |
+
| Rate Limits | Generous | Moderate | Very generous |
|
| 201 |
+
| API Key | Optional | Required | Optional |
|
| 202 |
+
|
| 203 |
+
---
|
| 204 |
+
|
| 205 |
+
## Sources
|
| 206 |
+
|
| 207 |
+
- [Europe PMC REST API](https://europepmc.org/RestfulWebService)
|
| 208 |
+
- [Europe PMC Annotations API](https://europepmc.org/AnnotationsApi)
|
| 209 |
+
- [Europe PMC Articles API](https://europepmc.org/ArticlesApi)
|
| 210 |
+
- [rOpenSci medrxivr](https://docs.ropensci.org/medrxivr/)
|
| 211 |
+
- [bioRxiv TDM Resources](https://www.biorxiv.org/tdm)
|
docs/brainstorming/04_OPENALEX_INTEGRATION.md
ADDED
|
@@ -0,0 +1,303 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# OpenAlex Integration: The Missing Piece?
|
| 2 |
+
|
| 3 |
+
**Status**: NOT Implemented (Candidate for Addition)
|
| 4 |
+
**Priority**: HIGH - Could Replace Multiple Tools
|
| 5 |
+
**Reference**: Already implemented in `reference_repos/DeepCritical`
|
| 6 |
+
|
| 7 |
+
---
|
| 8 |
+
|
| 9 |
+
## What is OpenAlex?
|
| 10 |
+
|
| 11 |
+
OpenAlex is a **fully open** index of the global research system:
|
| 12 |
+
|
| 13 |
+
- **209M+ works** (papers, books, datasets)
|
| 14 |
+
- **2B+ author records** (disambiguated)
|
| 15 |
+
- **124K+ venues** (journals, repositories)
|
| 16 |
+
- **109K+ institutions**
|
| 17 |
+
- **65K+ concepts** (hierarchical, linked to Wikidata)
|
| 18 |
+
|
| 19 |
+
**Free. Open. No API key required.**
|
| 20 |
+
|
| 21 |
+
---
|
| 22 |
+
|
| 23 |
+
## Why OpenAlex for DeepCritical?
|
| 24 |
+
|
| 25 |
+
### Current Architecture
|
| 26 |
+
|
| 27 |
+
```
|
| 28 |
+
User Query
|
| 29 |
+
↓
|
| 30 |
+
┌──────────────────────────────────────┐
|
| 31 |
+
│ PubMed ClinicalTrials Europe PMC │ ← 3 separate APIs
|
| 32 |
+
└──────────────────────────────────────┘
|
| 33 |
+
↓
|
| 34 |
+
Orchestrator (deduplicate, judge, synthesize)
|
| 35 |
+
```
|
| 36 |
+
|
| 37 |
+
### With OpenAlex
|
| 38 |
+
|
| 39 |
+
```
|
| 40 |
+
User Query
|
| 41 |
+
↓
|
| 42 |
+
┌──────────────────────────────────────┐
|
| 43 |
+
│ OpenAlex │ ← Single API
|
| 44 |
+
│ (includes PubMed + preprints + │
|
| 45 |
+
│ citations + concepts + authors) │
|
| 46 |
+
└──────────────────────────────────────┘
|
| 47 |
+
↓
|
| 48 |
+
Orchestrator (enrich with CT.gov for trials)
|
| 49 |
+
```
|
| 50 |
+
|
| 51 |
+
**OpenAlex already aggregates**:
|
| 52 |
+
- PubMed/MEDLINE
|
| 53 |
+
- Crossref
|
| 54 |
+
- ORCID
|
| 55 |
+
- Unpaywall (open access links)
|
| 56 |
+
- Microsoft Academic Graph (legacy)
|
| 57 |
+
- Preprint servers
|
| 58 |
+
|
| 59 |
+
---
|
| 60 |
+
|
| 61 |
+
## Reference Implementation
|
| 62 |
+
|
| 63 |
+
From `reference_repos/DeepCritical/DeepResearch/src/tools/openalex_tools.py`:
|
| 64 |
+
|
| 65 |
+
```python
|
| 66 |
+
class OpenAlexFetchTool(ToolRunner):
|
| 67 |
+
def __init__(self):
|
| 68 |
+
super().__init__(
|
| 69 |
+
ToolSpec(
|
| 70 |
+
name="openalex_fetch",
|
| 71 |
+
description="Fetch OpenAlex work or author",
|
| 72 |
+
inputs={"entity": "TEXT", "identifier": "TEXT"},
|
| 73 |
+
outputs={"result": "JSON"},
|
| 74 |
+
)
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
def run(self, params: dict[str, Any]) -> ExecutionResult:
|
| 78 |
+
entity = params["entity"] # "works", "authors", "venues"
|
| 79 |
+
identifier = params["identifier"]
|
| 80 |
+
base = "https://api.openalex.org"
|
| 81 |
+
url = f"{base}/{entity}/{identifier}"
|
| 82 |
+
resp = requests.get(url, timeout=30)
|
| 83 |
+
return ExecutionResult(success=True, data={"result": resp.json()})
|
| 84 |
+
```
|
| 85 |
+
|
| 86 |
+
---
|
| 87 |
+
|
| 88 |
+
## OpenAlex API Features
|
| 89 |
+
|
| 90 |
+
### Search Works (Papers)
|
| 91 |
+
|
| 92 |
+
```python
|
| 93 |
+
# Search for metformin + cancer papers
|
| 94 |
+
url = "https://api.openalex.org/works"
|
| 95 |
+
params = {
|
| 96 |
+
"search": "metformin cancer drug repurposing",
|
| 97 |
+
"filter": "publication_year:>2020,type:article",
|
| 98 |
+
"sort": "cited_by_count:desc",
|
| 99 |
+
"per_page": 50,
|
| 100 |
+
}
|
| 101 |
+
```
|
| 102 |
+
|
| 103 |
+
### Rich Filtering
|
| 104 |
+
|
| 105 |
+
```python
|
| 106 |
+
# Filter examples
|
| 107 |
+
"publication_year:2023"
|
| 108 |
+
"type:article" # vs preprint, book, etc.
|
| 109 |
+
"is_oa:true" # Open access only
|
| 110 |
+
"concepts.id:C71924100" # Papers about "Medicine"
|
| 111 |
+
"authorships.institutions.id:I27837315" # From Harvard
|
| 112 |
+
"cited_by_count:>100" # Highly cited
|
| 113 |
+
"has_fulltext:true" # Full text available
|
| 114 |
+
```
|
| 115 |
+
|
| 116 |
+
### What You Get Back
|
| 117 |
+
|
| 118 |
+
```json
|
| 119 |
+
{
|
| 120 |
+
"id": "W2741809807",
|
| 121 |
+
"title": "Metformin: A candidate drug for...",
|
| 122 |
+
"publication_year": 2023,
|
| 123 |
+
"type": "article",
|
| 124 |
+
"cited_by_count": 45,
|
| 125 |
+
"is_oa": true,
|
| 126 |
+
"primary_location": {
|
| 127 |
+
"source": {"display_name": "Nature Medicine"},
|
| 128 |
+
"pdf_url": "https://...",
|
| 129 |
+
"landing_page_url": "https://..."
|
| 130 |
+
},
|
| 131 |
+
"concepts": [
|
| 132 |
+
{"id": "C71924100", "display_name": "Medicine", "score": 0.95},
|
| 133 |
+
{"id": "C54355233", "display_name": "Pharmacology", "score": 0.88}
|
| 134 |
+
],
|
| 135 |
+
"authorships": [
|
| 136 |
+
{
|
| 137 |
+
"author": {"id": "A123", "display_name": "John Smith"},
|
| 138 |
+
"institutions": [{"display_name": "Harvard Medical School"}]
|
| 139 |
+
}
|
| 140 |
+
],
|
| 141 |
+
"referenced_works": ["W123", "W456"], # Citations
|
| 142 |
+
"related_works": ["W789", "W012"] # Similar papers
|
| 143 |
+
}
|
| 144 |
+
```
|
| 145 |
+
|
| 146 |
+
---
|
| 147 |
+
|
| 148 |
+
## Key Advantages Over Current Tools
|
| 149 |
+
|
| 150 |
+
### 1. Citation Network (We Don't Have This!)
|
| 151 |
+
|
| 152 |
+
```python
|
| 153 |
+
# Get papers that cite a work
|
| 154 |
+
url = f"https://api.openalex.org/works?filter=cites:{work_id}"
|
| 155 |
+
|
| 156 |
+
# Get papers cited by a work
|
| 157 |
+
# Already in `referenced_works` field
|
| 158 |
+
```
|
| 159 |
+
|
| 160 |
+
### 2. Concept Tagging (We Don't Have This!)
|
| 161 |
+
|
| 162 |
+
OpenAlex auto-tags papers with hierarchical concepts:
|
| 163 |
+
- "Medicine" → "Pharmacology" → "Drug Repurposing"
|
| 164 |
+
- Can search by concept, not just keywords
|
| 165 |
+
|
| 166 |
+
### 3. Author Disambiguation (We Don't Have This!)
|
| 167 |
+
|
| 168 |
+
```python
|
| 169 |
+
# Find all works by an author
|
| 170 |
+
url = f"https://api.openalex.org/works?filter=authorships.author.id:{author_id}"
|
| 171 |
+
```
|
| 172 |
+
|
| 173 |
+
### 4. Institution Tracking
|
| 174 |
+
|
| 175 |
+
```python
|
| 176 |
+
# Find drug repurposing papers from top institutions
|
| 177 |
+
url = "https://api.openalex.org/works"
|
| 178 |
+
params = {
|
| 179 |
+
"search": "drug repurposing",
|
| 180 |
+
"filter": "authorships.institutions.id:I27837315", # Harvard
|
| 181 |
+
}
|
| 182 |
+
```
|
| 183 |
+
|
| 184 |
+
### 5. Related Works
|
| 185 |
+
|
| 186 |
+
Each paper comes with `related_works` - semantically similar papers discovered by OpenAlex's ML.
|
| 187 |
+
|
| 188 |
+
---
|
| 189 |
+
|
| 190 |
+
## Proposed Implementation
|
| 191 |
+
|
| 192 |
+
### New Tool: `src/tools/openalex.py`
|
| 193 |
+
|
| 194 |
+
```python
|
| 195 |
+
"""OpenAlex search tool for comprehensive scholarly data."""
|
| 196 |
+
|
| 197 |
+
import httpx
|
| 198 |
+
from src.tools.base import SearchTool
|
| 199 |
+
from src.utils.models import Evidence
|
| 200 |
+
|
| 201 |
+
class OpenAlexTool(SearchTool):
|
| 202 |
+
"""Search OpenAlex for scholarly works with rich metadata."""
|
| 203 |
+
|
| 204 |
+
name = "openalex"
|
| 205 |
+
|
| 206 |
+
async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
|
| 207 |
+
async with httpx.AsyncClient() as client:
|
| 208 |
+
resp = await client.get(
|
| 209 |
+
"https://api.openalex.org/works",
|
| 210 |
+
params={
|
| 211 |
+
"search": query,
|
| 212 |
+
"filter": "type:article,is_oa:true",
|
| 213 |
+
"sort": "cited_by_count:desc",
|
| 214 |
+
"per_page": max_results,
|
| 215 |
+
"mailto": "deepcritical@example.com", # Polite pool
|
| 216 |
+
},
|
| 217 |
+
)
|
| 218 |
+
data = resp.json()
|
| 219 |
+
|
| 220 |
+
return [
|
| 221 |
+
Evidence(
|
| 222 |
+
source="openalex",
|
| 223 |
+
title=work["title"],
|
| 224 |
+
abstract=work.get("abstract", ""),
|
| 225 |
+
url=work["primary_location"]["landing_page_url"],
|
| 226 |
+
metadata={
|
| 227 |
+
"cited_by_count": work["cited_by_count"],
|
| 228 |
+
"concepts": [c["display_name"] for c in work["concepts"][:5]],
|
| 229 |
+
"is_open_access": work["is_oa"],
|
| 230 |
+
"pdf_url": work["primary_location"].get("pdf_url"),
|
| 231 |
+
},
|
| 232 |
+
)
|
| 233 |
+
for work in data["results"]
|
| 234 |
+
]
|
| 235 |
+
```
|
| 236 |
+
|
| 237 |
+
---
|
| 238 |
+
|
| 239 |
+
## Rate Limits
|
| 240 |
+
|
| 241 |
+
OpenAlex is **extremely generous**:
|
| 242 |
+
|
| 243 |
+
- No hard rate limit documented
|
| 244 |
+
- Recommended: <100,000 requests/day
|
| 245 |
+
- **Polite pool**: Add `mailto=your@email.com` param for faster responses
|
| 246 |
+
- No API key required (optional for priority support)
|
| 247 |
+
|
| 248 |
+
---
|
| 249 |
+
|
| 250 |
+
## Should We Add OpenAlex?
|
| 251 |
+
|
| 252 |
+
### Arguments FOR
|
| 253 |
+
|
| 254 |
+
1. **Already in reference repo** - proven pattern
|
| 255 |
+
2. **Richer data** - citations, concepts, authors
|
| 256 |
+
3. **Single source** - reduces API complexity
|
| 257 |
+
4. **Free & open** - no keys, no limits
|
| 258 |
+
5. **Institution adoption** - Leiden, Sorbonne switched to it
|
| 259 |
+
|
| 260 |
+
### Arguments AGAINST
|
| 261 |
+
|
| 262 |
+
1. **Adds complexity** - another data source
|
| 263 |
+
2. **Overlap** - duplicates some PubMed data
|
| 264 |
+
3. **Not biomedical-focused** - covers all disciplines
|
| 265 |
+
4. **No full text** - still need PMC/Europe PMC for that
|
| 266 |
+
|
| 267 |
+
### Recommendation
|
| 268 |
+
|
| 269 |
+
**Add OpenAlex as a 4th source**, don't replace existing tools.
|
| 270 |
+
|
| 271 |
+
Use it for:
|
| 272 |
+
- Citation network analysis
|
| 273 |
+
- Concept-based discovery
|
| 274 |
+
- High-impact paper finding
|
| 275 |
+
- Author/institution tracking
|
| 276 |
+
|
| 277 |
+
Keep PubMed, ClinicalTrials, Europe PMC for:
|
| 278 |
+
- Authoritative biomedical search
|
| 279 |
+
- Clinical trial data
|
| 280 |
+
- Full-text access
|
| 281 |
+
- Preprint tracking
|
| 282 |
+
|
| 283 |
+
---
|
| 284 |
+
|
| 285 |
+
## Implementation Priority
|
| 286 |
+
|
| 287 |
+
| Task | Effort | Value |
|
| 288 |
+
|------|--------|-------|
|
| 289 |
+
| Basic search | Low | High |
|
| 290 |
+
| Citation network | Medium | Very High |
|
| 291 |
+
| Concept filtering | Low | High |
|
| 292 |
+
| Related works | Low | High |
|
| 293 |
+
| Author tracking | Medium | Medium |
|
| 294 |
+
|
| 295 |
+
---
|
| 296 |
+
|
| 297 |
+
## Sources
|
| 298 |
+
|
| 299 |
+
- [OpenAlex Documentation](https://docs.openalex.org)
|
| 300 |
+
- [OpenAlex API Overview](https://docs.openalex.org/api)
|
| 301 |
+
- [OpenAlex Wikipedia](https://en.wikipedia.org/wiki/OpenAlex)
|
| 302 |
+
- [Leiden University Announcement](https://www.leidenranking.com/information/openalex)
|
| 303 |
+
- [OpenAlex: A fully-open index (Paper)](https://arxiv.org/abs/2205.01833)
|
docs/brainstorming/implementation/15_PHASE_OPENALEX.md
ADDED
|
@@ -0,0 +1,603 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Phase 15: OpenAlex Integration
|
| 2 |
+
|
| 3 |
+
**Priority**: HIGH - Biggest bang for buck
|
| 4 |
+
**Effort**: ~2-3 hours
|
| 5 |
+
**Dependencies**: None (existing codebase patterns sufficient)
|
| 6 |
+
|
| 7 |
+
---
|
| 8 |
+
|
| 9 |
+
## Prerequisites (COMPLETED)
|
| 10 |
+
|
| 11 |
+
The following model changes have been implemented to support this integration:
|
| 12 |
+
|
| 13 |
+
1. **`SourceName` Literal Updated** (`src/utils/models.py:9`)
|
| 14 |
+
```python
|
| 15 |
+
SourceName = Literal["pubmed", "clinicaltrials", "europepmc", "preprint", "openalex"]
|
| 16 |
+
```
|
| 17 |
+
- Without this, `source="openalex"` would fail Pydantic validation
|
| 18 |
+
|
| 19 |
+
2. **`Evidence.metadata` Field Added** (`src/utils/models.py:39-42`)
|
| 20 |
+
```python
|
| 21 |
+
metadata: dict[str, Any] = Field(
|
| 22 |
+
default_factory=dict,
|
| 23 |
+
description="Additional metadata (e.g., cited_by_count, concepts, is_open_access)",
|
| 24 |
+
)
|
| 25 |
+
```
|
| 26 |
+
- Required for storing `cited_by_count`, `concepts`, etc.
|
| 27 |
+
- Model is still frozen - metadata must be passed at construction time
|
| 28 |
+
|
| 29 |
+
3. **`__init__.py` Exports Updated** (`src/tools/__init__.py`)
|
| 30 |
+
- All tools are now exported: `ClinicalTrialsTool`, `EuropePMCTool`, `PubMedTool`
|
| 31 |
+
- OpenAlexTool should be added here after implementation
|
| 32 |
+
|
| 33 |
+
---
|
| 34 |
+
|
| 35 |
+
## Overview
|
| 36 |
+
|
| 37 |
+
Add OpenAlex as a 4th data source for comprehensive scholarly data including:
|
| 38 |
+
- Citation networks (who cites whom)
|
| 39 |
+
- Concept tagging (hierarchical topic classification)
|
| 40 |
+
- Author disambiguation
|
| 41 |
+
- 209M+ works indexed
|
| 42 |
+
|
| 43 |
+
**Why OpenAlex?**
|
| 44 |
+
- Free, no API key required
|
| 45 |
+
- Already implemented in reference repo
|
| 46 |
+
- Provides citation data we don't have
|
| 47 |
+
- Aggregates PubMed + preprints + more
|
| 48 |
+
|
| 49 |
+
---
|
| 50 |
+
|
| 51 |
+
## TDD Implementation Plan
|
| 52 |
+
|
| 53 |
+
### Step 1: Write the Tests First
|
| 54 |
+
|
| 55 |
+
**File**: `tests/unit/tools/test_openalex.py`
|
| 56 |
+
|
| 57 |
+
```python
|
| 58 |
+
"""Tests for OpenAlex search tool."""
|
| 59 |
+
|
| 60 |
+
import pytest
|
| 61 |
+
import respx
|
| 62 |
+
from httpx import Response
|
| 63 |
+
|
| 64 |
+
from src.tools.openalex import OpenAlexTool
|
| 65 |
+
from src.utils.models import Evidence
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
class TestOpenAlexTool:
|
| 69 |
+
"""Test suite for OpenAlex search functionality."""
|
| 70 |
+
|
| 71 |
+
@pytest.fixture
|
| 72 |
+
def tool(self) -> OpenAlexTool:
|
| 73 |
+
return OpenAlexTool()
|
| 74 |
+
|
| 75 |
+
def test_name_property(self, tool: OpenAlexTool) -> None:
|
| 76 |
+
"""Tool should identify itself as 'openalex'."""
|
| 77 |
+
assert tool.name == "openalex"
|
| 78 |
+
|
| 79 |
+
@respx.mock
|
| 80 |
+
@pytest.mark.asyncio
|
| 81 |
+
async def test_search_returns_evidence(self, tool: OpenAlexTool) -> None:
|
| 82 |
+
"""Search should return list of Evidence objects."""
|
| 83 |
+
mock_response = {
|
| 84 |
+
"results": [
|
| 85 |
+
{
|
| 86 |
+
"id": "W2741809807",
|
| 87 |
+
"title": "Metformin and cancer: A systematic review",
|
| 88 |
+
"publication_year": 2023,
|
| 89 |
+
"cited_by_count": 45,
|
| 90 |
+
"type": "article",
|
| 91 |
+
"is_oa": True,
|
| 92 |
+
"primary_location": {
|
| 93 |
+
"source": {"display_name": "Nature Medicine"},
|
| 94 |
+
"landing_page_url": "https://doi.org/10.1038/example",
|
| 95 |
+
"pdf_url": None,
|
| 96 |
+
},
|
| 97 |
+
"abstract_inverted_index": {
|
| 98 |
+
"Metformin": [0],
|
| 99 |
+
"shows": [1],
|
| 100 |
+
"anticancer": [2],
|
| 101 |
+
"effects": [3],
|
| 102 |
+
},
|
| 103 |
+
"concepts": [
|
| 104 |
+
{"display_name": "Medicine", "score": 0.95},
|
| 105 |
+
{"display_name": "Oncology", "score": 0.88},
|
| 106 |
+
],
|
| 107 |
+
"authorships": [
|
| 108 |
+
{
|
| 109 |
+
"author": {"display_name": "John Smith"},
|
| 110 |
+
"institutions": [{"display_name": "Harvard"}],
|
| 111 |
+
}
|
| 112 |
+
],
|
| 113 |
+
}
|
| 114 |
+
]
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
respx.get("https://api.openalex.org/works").mock(
|
| 118 |
+
return_value=Response(200, json=mock_response)
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
results = await tool.search("metformin cancer", max_results=10)
|
| 122 |
+
|
| 123 |
+
assert len(results) == 1
|
| 124 |
+
assert isinstance(results[0], Evidence)
|
| 125 |
+
assert "Metformin and cancer" in results[0].citation.title
|
| 126 |
+
assert results[0].citation.source == "openalex"
|
| 127 |
+
|
| 128 |
+
@respx.mock
|
| 129 |
+
@pytest.mark.asyncio
|
| 130 |
+
async def test_search_empty_results(self, tool: OpenAlexTool) -> None:
|
| 131 |
+
"""Search with no results should return empty list."""
|
| 132 |
+
respx.get("https://api.openalex.org/works").mock(
|
| 133 |
+
return_value=Response(200, json={"results": []})
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
results = await tool.search("xyznonexistentquery123")
|
| 137 |
+
assert results == []
|
| 138 |
+
|
| 139 |
+
@respx.mock
|
| 140 |
+
@pytest.mark.asyncio
|
| 141 |
+
async def test_search_handles_missing_abstract(self, tool: OpenAlexTool) -> None:
|
| 142 |
+
"""Tool should handle papers without abstracts."""
|
| 143 |
+
mock_response = {
|
| 144 |
+
"results": [
|
| 145 |
+
{
|
| 146 |
+
"id": "W123",
|
| 147 |
+
"title": "Paper without abstract",
|
| 148 |
+
"publication_year": 2023,
|
| 149 |
+
"cited_by_count": 10,
|
| 150 |
+
"type": "article",
|
| 151 |
+
"is_oa": False,
|
| 152 |
+
"primary_location": {
|
| 153 |
+
"source": {"display_name": "Journal"},
|
| 154 |
+
"landing_page_url": "https://example.com",
|
| 155 |
+
},
|
| 156 |
+
"abstract_inverted_index": None,
|
| 157 |
+
"concepts": [],
|
| 158 |
+
"authorships": [],
|
| 159 |
+
}
|
| 160 |
+
]
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
respx.get("https://api.openalex.org/works").mock(
|
| 164 |
+
return_value=Response(200, json=mock_response)
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
results = await tool.search("test query")
|
| 168 |
+
assert len(results) == 1
|
| 169 |
+
assert results[0].content == "" # No abstract
|
| 170 |
+
|
| 171 |
+
@respx.mock
|
| 172 |
+
@pytest.mark.asyncio
|
| 173 |
+
async def test_search_extracts_citation_count(self, tool: OpenAlexTool) -> None:
|
| 174 |
+
"""Citation count should be in metadata."""
|
| 175 |
+
mock_response = {
|
| 176 |
+
"results": [
|
| 177 |
+
{
|
| 178 |
+
"id": "W456",
|
| 179 |
+
"title": "Highly cited paper",
|
| 180 |
+
"publication_year": 2020,
|
| 181 |
+
"cited_by_count": 500,
|
| 182 |
+
"type": "article",
|
| 183 |
+
"is_oa": True,
|
| 184 |
+
"primary_location": {
|
| 185 |
+
"source": {"display_name": "Science"},
|
| 186 |
+
"landing_page_url": "https://example.com",
|
| 187 |
+
},
|
| 188 |
+
"abstract_inverted_index": {"Test": [0]},
|
| 189 |
+
"concepts": [],
|
| 190 |
+
"authorships": [],
|
| 191 |
+
}
|
| 192 |
+
]
|
| 193 |
+
}
|
| 194 |
+
|
| 195 |
+
respx.get("https://api.openalex.org/works").mock(
|
| 196 |
+
return_value=Response(200, json=mock_response)
|
| 197 |
+
)
|
| 198 |
+
|
| 199 |
+
results = await tool.search("highly cited")
|
| 200 |
+
assert results[0].metadata["cited_by_count"] == 500
|
| 201 |
+
|
| 202 |
+
@respx.mock
|
| 203 |
+
@pytest.mark.asyncio
|
| 204 |
+
async def test_search_extracts_concepts(self, tool: OpenAlexTool) -> None:
|
| 205 |
+
"""Concepts should be extracted for semantic discovery."""
|
| 206 |
+
mock_response = {
|
| 207 |
+
"results": [
|
| 208 |
+
{
|
| 209 |
+
"id": "W789",
|
| 210 |
+
"title": "Drug repurposing study",
|
| 211 |
+
"publication_year": 2023,
|
| 212 |
+
"cited_by_count": 25,
|
| 213 |
+
"type": "article",
|
| 214 |
+
"is_oa": True,
|
| 215 |
+
"primary_location": {
|
| 216 |
+
"source": {"display_name": "PLOS ONE"},
|
| 217 |
+
"landing_page_url": "https://example.com",
|
| 218 |
+
},
|
| 219 |
+
"abstract_inverted_index": {"Drug": [0], "repurposing": [1]},
|
| 220 |
+
"concepts": [
|
| 221 |
+
{"display_name": "Pharmacology", "score": 0.92},
|
| 222 |
+
{"display_name": "Drug Discovery", "score": 0.85},
|
| 223 |
+
{"display_name": "Medicine", "score": 0.80},
|
| 224 |
+
],
|
| 225 |
+
"authorships": [],
|
| 226 |
+
}
|
| 227 |
+
]
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
+
respx.get("https://api.openalex.org/works").mock(
|
| 231 |
+
return_value=Response(200, json=mock_response)
|
| 232 |
+
)
|
| 233 |
+
|
| 234 |
+
results = await tool.search("drug repurposing")
|
| 235 |
+
assert "Pharmacology" in results[0].metadata["concepts"]
|
| 236 |
+
assert "Drug Discovery" in results[0].metadata["concepts"]
|
| 237 |
+
|
| 238 |
+
@respx.mock
|
| 239 |
+
@pytest.mark.asyncio
|
| 240 |
+
async def test_search_api_error_raises_search_error(
|
| 241 |
+
self, tool: OpenAlexTool
|
| 242 |
+
) -> None:
|
| 243 |
+
"""API errors should raise SearchError."""
|
| 244 |
+
from src.utils.exceptions import SearchError
|
| 245 |
+
|
| 246 |
+
respx.get("https://api.openalex.org/works").mock(
|
| 247 |
+
return_value=Response(500, text="Internal Server Error")
|
| 248 |
+
)
|
| 249 |
+
|
| 250 |
+
with pytest.raises(SearchError):
|
| 251 |
+
await tool.search("test query")
|
| 252 |
+
|
| 253 |
+
def test_reconstruct_abstract(self, tool: OpenAlexTool) -> None:
|
| 254 |
+
"""Test abstract reconstruction from inverted index."""
|
| 255 |
+
inverted_index = {
|
| 256 |
+
"Metformin": [0, 5],
|
| 257 |
+
"is": [1],
|
| 258 |
+
"a": [2],
|
| 259 |
+
"diabetes": [3],
|
| 260 |
+
"drug": [4],
|
| 261 |
+
"effective": [6],
|
| 262 |
+
}
|
| 263 |
+
abstract = tool._reconstruct_abstract(inverted_index)
|
| 264 |
+
assert abstract == "Metformin is a diabetes drug Metformin effective"
|
| 265 |
+
```
|
| 266 |
+
|
| 267 |
+
---
|
| 268 |
+
|
| 269 |
+
### Step 2: Create the Implementation
|
| 270 |
+
|
| 271 |
+
**File**: `src/tools/openalex.py`
|
| 272 |
+
|
| 273 |
+
```python
|
| 274 |
+
"""OpenAlex search tool for comprehensive scholarly data."""
|
| 275 |
+
|
| 276 |
+
from typing import Any
|
| 277 |
+
|
| 278 |
+
import httpx
|
| 279 |
+
from tenacity import retry, stop_after_attempt, wait_exponential
|
| 280 |
+
|
| 281 |
+
from src.utils.exceptions import SearchError
|
| 282 |
+
from src.utils.models import Citation, Evidence
|
| 283 |
+
|
| 284 |
+
|
| 285 |
+
class OpenAlexTool:
|
| 286 |
+
"""
|
| 287 |
+
Search OpenAlex for scholarly works with rich metadata.
|
| 288 |
+
|
| 289 |
+
OpenAlex provides:
|
| 290 |
+
- 209M+ scholarly works
|
| 291 |
+
- Citation counts and networks
|
| 292 |
+
- Concept tagging (hierarchical)
|
| 293 |
+
- Author disambiguation
|
| 294 |
+
- Open access links
|
| 295 |
+
|
| 296 |
+
API Docs: https://docs.openalex.org/
|
| 297 |
+
"""
|
| 298 |
+
|
| 299 |
+
BASE_URL = "https://api.openalex.org/works"
|
| 300 |
+
|
| 301 |
+
def __init__(self, email: str | None = None) -> None:
|
| 302 |
+
"""
|
| 303 |
+
Initialize OpenAlex tool.
|
| 304 |
+
|
| 305 |
+
Args:
|
| 306 |
+
email: Optional email for polite pool (faster responses)
|
| 307 |
+
"""
|
| 308 |
+
self.email = email or "deepcritical@example.com"
|
| 309 |
+
|
| 310 |
+
@property
|
| 311 |
+
def name(self) -> str:
|
| 312 |
+
return "openalex"
|
| 313 |
+
|
| 314 |
+
@retry(
|
| 315 |
+
stop=stop_after_attempt(3),
|
| 316 |
+
wait=wait_exponential(multiplier=1, min=1, max=10),
|
| 317 |
+
reraise=True,
|
| 318 |
+
)
|
| 319 |
+
async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
|
| 320 |
+
"""
|
| 321 |
+
Search OpenAlex for scholarly works.
|
| 322 |
+
|
| 323 |
+
Args:
|
| 324 |
+
query: Search terms
|
| 325 |
+
max_results: Maximum results to return (max 200 per request)
|
| 326 |
+
|
| 327 |
+
Returns:
|
| 328 |
+
List of Evidence objects with citation metadata
|
| 329 |
+
|
| 330 |
+
Raises:
|
| 331 |
+
SearchError: If API request fails
|
| 332 |
+
"""
|
| 333 |
+
params = {
|
| 334 |
+
"search": query,
|
| 335 |
+
"filter": "type:article", # Only peer-reviewed articles
|
| 336 |
+
"sort": "cited_by_count:desc", # Most cited first
|
| 337 |
+
"per_page": min(max_results, 200),
|
| 338 |
+
"mailto": self.email, # Polite pool for faster responses
|
| 339 |
+
}
|
| 340 |
+
|
| 341 |
+
async with httpx.AsyncClient(timeout=30.0) as client:
|
| 342 |
+
try:
|
| 343 |
+
response = await client.get(self.BASE_URL, params=params)
|
| 344 |
+
response.raise_for_status()
|
| 345 |
+
|
| 346 |
+
data = response.json()
|
| 347 |
+
results = data.get("results", [])
|
| 348 |
+
|
| 349 |
+
return [self._to_evidence(work) for work in results[:max_results]]
|
| 350 |
+
|
| 351 |
+
except httpx.HTTPStatusError as e:
|
| 352 |
+
raise SearchError(f"OpenAlex API error: {e}") from e
|
| 353 |
+
except httpx.RequestError as e:
|
| 354 |
+
raise SearchError(f"OpenAlex connection failed: {e}") from e
|
| 355 |
+
|
| 356 |
+
def _to_evidence(self, work: dict[str, Any]) -> Evidence:
|
| 357 |
+
"""Convert OpenAlex work to Evidence object."""
|
| 358 |
+
title = work.get("title", "Untitled")
|
| 359 |
+
pub_year = work.get("publication_year", "Unknown")
|
| 360 |
+
cited_by = work.get("cited_by_count", 0)
|
| 361 |
+
is_oa = work.get("is_oa", False)
|
| 362 |
+
|
| 363 |
+
# Reconstruct abstract from inverted index
|
| 364 |
+
abstract_index = work.get("abstract_inverted_index")
|
| 365 |
+
abstract = self._reconstruct_abstract(abstract_index) if abstract_index else ""
|
| 366 |
+
|
| 367 |
+
# Extract concepts (top 5)
|
| 368 |
+
concepts = [
|
| 369 |
+
c.get("display_name", "")
|
| 370 |
+
for c in work.get("concepts", [])[:5]
|
| 371 |
+
if c.get("display_name")
|
| 372 |
+
]
|
| 373 |
+
|
| 374 |
+
# Extract authors (top 5)
|
| 375 |
+
authorships = work.get("authorships", [])
|
| 376 |
+
authors = [
|
| 377 |
+
a.get("author", {}).get("display_name", "")
|
| 378 |
+
for a in authorships[:5]
|
| 379 |
+
if a.get("author", {}).get("display_name")
|
| 380 |
+
]
|
| 381 |
+
|
| 382 |
+
# Get URL
|
| 383 |
+
primary_loc = work.get("primary_location") or {}
|
| 384 |
+
url = primary_loc.get("landing_page_url", "")
|
| 385 |
+
if not url:
|
| 386 |
+
# Fallback to OpenAlex page
|
| 387 |
+
work_id = work.get("id", "").replace("https://openalex.org/", "")
|
| 388 |
+
url = f"https://openalex.org/{work_id}"
|
| 389 |
+
|
| 390 |
+
return Evidence(
|
| 391 |
+
content=abstract[:2000],
|
| 392 |
+
citation=Citation(
|
| 393 |
+
source="openalex",
|
| 394 |
+
title=title[:500],
|
| 395 |
+
url=url,
|
| 396 |
+
date=str(pub_year),
|
| 397 |
+
authors=authors,
|
| 398 |
+
),
|
| 399 |
+
relevance=min(0.9, 0.5 + (cited_by / 1000)), # Boost by citations
|
| 400 |
+
metadata={
|
| 401 |
+
"cited_by_count": cited_by,
|
| 402 |
+
"is_open_access": is_oa,
|
| 403 |
+
"concepts": concepts,
|
| 404 |
+
"pdf_url": primary_loc.get("pdf_url"),
|
| 405 |
+
},
|
| 406 |
+
)
|
| 407 |
+
|
| 408 |
+
def _reconstruct_abstract(
|
| 409 |
+
self, inverted_index: dict[str, list[int]]
|
| 410 |
+
) -> str:
|
| 411 |
+
"""
|
| 412 |
+
Reconstruct abstract from OpenAlex inverted index format.
|
| 413 |
+
|
| 414 |
+
OpenAlex stores abstracts as {"word": [position1, position2, ...]}.
|
| 415 |
+
This rebuilds the original text.
|
| 416 |
+
"""
|
| 417 |
+
if not inverted_index:
|
| 418 |
+
return ""
|
| 419 |
+
|
| 420 |
+
# Build position -> word mapping
|
| 421 |
+
position_word: dict[int, str] = {}
|
| 422 |
+
for word, positions in inverted_index.items():
|
| 423 |
+
for pos in positions:
|
| 424 |
+
position_word[pos] = word
|
| 425 |
+
|
| 426 |
+
# Reconstruct in order
|
| 427 |
+
if not position_word:
|
| 428 |
+
return ""
|
| 429 |
+
|
| 430 |
+
max_pos = max(position_word.keys())
|
| 431 |
+
words = [position_word.get(i, "") for i in range(max_pos + 1)]
|
| 432 |
+
return " ".join(w for w in words if w)
|
| 433 |
+
```
|
| 434 |
+
|
| 435 |
+
---
|
| 436 |
+
|
| 437 |
+
### Step 3: Register in Search Handler
|
| 438 |
+
|
| 439 |
+
**File**: `src/tools/search_handler.py` (add to imports and tool list)
|
| 440 |
+
|
| 441 |
+
```python
|
| 442 |
+
# Add import
|
| 443 |
+
from src.tools.openalex import OpenAlexTool
|
| 444 |
+
|
| 445 |
+
# Add to _create_tools method
|
| 446 |
+
def _create_tools(self) -> list[SearchTool]:
|
| 447 |
+
return [
|
| 448 |
+
PubMedTool(),
|
| 449 |
+
ClinicalTrialsTool(),
|
| 450 |
+
EuropePMCTool(),
|
| 451 |
+
OpenAlexTool(), # NEW
|
| 452 |
+
]
|
| 453 |
+
```
|
| 454 |
+
|
| 455 |
+
---
|
| 456 |
+
|
| 457 |
+
### Step 4: Update `__init__.py`
|
| 458 |
+
|
| 459 |
+
**File**: `src/tools/__init__.py`
|
| 460 |
+
|
| 461 |
+
```python
|
| 462 |
+
from src.tools.openalex import OpenAlexTool
|
| 463 |
+
|
| 464 |
+
__all__ = [
|
| 465 |
+
"PubMedTool",
|
| 466 |
+
"ClinicalTrialsTool",
|
| 467 |
+
"EuropePMCTool",
|
| 468 |
+
"OpenAlexTool", # NEW
|
| 469 |
+
# ...
|
| 470 |
+
]
|
| 471 |
+
```
|
| 472 |
+
|
| 473 |
+
---
|
| 474 |
+
|
| 475 |
+
## Demo Script
|
| 476 |
+
|
| 477 |
+
**File**: `examples/openalex_demo.py`
|
| 478 |
+
|
| 479 |
+
```python
|
| 480 |
+
#!/usr/bin/env python3
|
| 481 |
+
"""Demo script to verify OpenAlex integration."""
|
| 482 |
+
|
| 483 |
+
import asyncio
|
| 484 |
+
from src.tools.openalex import OpenAlexTool
|
| 485 |
+
|
| 486 |
+
|
| 487 |
+
async def main():
|
| 488 |
+
"""Run OpenAlex search demo."""
|
| 489 |
+
tool = OpenAlexTool()
|
| 490 |
+
|
| 491 |
+
print("=" * 60)
|
| 492 |
+
print("OpenAlex Integration Demo")
|
| 493 |
+
print("=" * 60)
|
| 494 |
+
|
| 495 |
+
# Test 1: Basic drug repurposing search
|
| 496 |
+
print("\n[Test 1] Searching for 'metformin cancer drug repurposing'...")
|
| 497 |
+
results = await tool.search("metformin cancer drug repurposing", max_results=5)
|
| 498 |
+
|
| 499 |
+
for i, evidence in enumerate(results, 1):
|
| 500 |
+
print(f"\n--- Result {i} ---")
|
| 501 |
+
print(f"Title: {evidence.citation.title}")
|
| 502 |
+
print(f"Year: {evidence.citation.date}")
|
| 503 |
+
print(f"Citations: {evidence.metadata.get('cited_by_count', 'N/A')}")
|
| 504 |
+
print(f"Concepts: {', '.join(evidence.metadata.get('concepts', []))}")
|
| 505 |
+
print(f"Open Access: {evidence.metadata.get('is_open_access', False)}")
|
| 506 |
+
print(f"URL: {evidence.citation.url}")
|
| 507 |
+
if evidence.content:
|
| 508 |
+
print(f"Abstract: {evidence.content[:200]}...")
|
| 509 |
+
|
| 510 |
+
# Test 2: High-impact papers
|
| 511 |
+
print("\n" + "=" * 60)
|
| 512 |
+
print("[Test 2] Finding highly-cited papers on 'long COVID treatment'...")
|
| 513 |
+
results = await tool.search("long COVID treatment", max_results=3)
|
| 514 |
+
|
| 515 |
+
for evidence in results:
|
| 516 |
+
print(f"\n- {evidence.citation.title}")
|
| 517 |
+
print(f" Citations: {evidence.metadata.get('cited_by_count', 0)}")
|
| 518 |
+
|
| 519 |
+
print("\n" + "=" * 60)
|
| 520 |
+
print("Demo complete!")
|
| 521 |
+
|
| 522 |
+
|
| 523 |
+
if __name__ == "__main__":
|
| 524 |
+
asyncio.run(main())
|
| 525 |
+
```
|
| 526 |
+
|
| 527 |
+
---
|
| 528 |
+
|
| 529 |
+
## Verification Checklist
|
| 530 |
+
|
| 531 |
+
### Unit Tests
|
| 532 |
+
```bash
|
| 533 |
+
# Run just OpenAlex tests
|
| 534 |
+
uv run pytest tests/unit/tools/test_openalex.py -v
|
| 535 |
+
|
| 536 |
+
# Expected: All tests pass
|
| 537 |
+
```
|
| 538 |
+
|
| 539 |
+
### Integration Test (Manual)
|
| 540 |
+
```bash
|
| 541 |
+
# Run demo script with real API
|
| 542 |
+
uv run python examples/openalex_demo.py
|
| 543 |
+
|
| 544 |
+
# Expected: Real results from OpenAlex API
|
| 545 |
+
```
|
| 546 |
+
|
| 547 |
+
### Full Test Suite
|
| 548 |
+
```bash
|
| 549 |
+
# Ensure nothing broke
|
| 550 |
+
make check
|
| 551 |
+
|
| 552 |
+
# Expected: All 110+ tests pass, mypy clean
|
| 553 |
+
```
|
| 554 |
+
|
| 555 |
+
---
|
| 556 |
+
|
| 557 |
+
## Success Criteria
|
| 558 |
+
|
| 559 |
+
1. **Unit tests pass**: All mocked tests in `test_openalex.py` pass
|
| 560 |
+
2. **Integration works**: Demo script returns real results
|
| 561 |
+
3. **No regressions**: `make check` passes completely
|
| 562 |
+
4. **SearchHandler integration**: OpenAlex appears in search results alongside other sources
|
| 563 |
+
5. **Citation metadata**: Results include `cited_by_count`, `concepts`, `is_open_access`
|
| 564 |
+
|
| 565 |
+
---
|
| 566 |
+
|
| 567 |
+
## Future Enhancements (P2)
|
| 568 |
+
|
| 569 |
+
Once basic integration works:
|
| 570 |
+
|
| 571 |
+
1. **Citation Network Queries**
|
| 572 |
+
```python
|
| 573 |
+
# Get papers citing a specific work
|
| 574 |
+
async def get_citing_works(self, work_id: str) -> list[Evidence]:
|
| 575 |
+
params = {"filter": f"cites:{work_id}"}
|
| 576 |
+
...
|
| 577 |
+
```
|
| 578 |
+
|
| 579 |
+
2. **Concept-Based Search**
|
| 580 |
+
```python
|
| 581 |
+
# Search by OpenAlex concept ID
|
| 582 |
+
async def search_by_concept(self, concept_id: str) -> list[Evidence]:
|
| 583 |
+
params = {"filter": f"concepts.id:{concept_id}"}
|
| 584 |
+
...
|
| 585 |
+
```
|
| 586 |
+
|
| 587 |
+
3. **Author Tracking**
|
| 588 |
+
```python
|
| 589 |
+
# Find all works by an author
|
| 590 |
+
async def search_by_author(self, author_id: str) -> list[Evidence]:
|
| 591 |
+
params = {"filter": f"authorships.author.id:{author_id}"}
|
| 592 |
+
...
|
| 593 |
+
```
|
| 594 |
+
|
| 595 |
+
---
|
| 596 |
+
|
| 597 |
+
## Notes
|
| 598 |
+
|
| 599 |
+
- OpenAlex is **very generous** with rate limits (no documented hard limit)
|
| 600 |
+
- Adding `mailto` parameter gives priority access (polite pool)
|
| 601 |
+
- Abstract is stored as inverted index - must reconstruct
|
| 602 |
+
- Citation count is a good proxy for paper quality/impact
|
| 603 |
+
- Consider caching responses for repeated queries
|
docs/brainstorming/implementation/16_PHASE_PUBMED_FULLTEXT.md
ADDED
|
@@ -0,0 +1,586 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Phase 16: PubMed Full-Text Retrieval
|
| 2 |
+
|
| 3 |
+
**Priority**: MEDIUM - Enhances evidence quality
|
| 4 |
+
**Effort**: ~3 hours
|
| 5 |
+
**Dependencies**: None (existing PubMed tool sufficient)
|
| 6 |
+
|
| 7 |
+
---
|
| 8 |
+
|
| 9 |
+
## Prerequisites (COMPLETED)
|
| 10 |
+
|
| 11 |
+
The `Evidence.metadata` field has been added to `src/utils/models.py` to support:
|
| 12 |
+
```python
|
| 13 |
+
metadata={"has_fulltext": True}
|
| 14 |
+
```
|
| 15 |
+
|
| 16 |
+
---
|
| 17 |
+
|
| 18 |
+
## Architecture Decision: Constructor Parameter vs Method Parameter
|
| 19 |
+
|
| 20 |
+
**IMPORTANT**: The original spec proposed `include_fulltext` as a method parameter:
|
| 21 |
+
```python
|
| 22 |
+
# WRONG - SearchHandler won't pass this parameter
|
| 23 |
+
async def search(self, query: str, max_results: int = 10, include_fulltext: bool = False):
|
| 24 |
+
```
|
| 25 |
+
|
| 26 |
+
**Problem**: `SearchHandler` calls `tool.search(query, max_results)` uniformly across all tools.
|
| 27 |
+
It has no mechanism to pass tool-specific parameters like `include_fulltext`.
|
| 28 |
+
|
| 29 |
+
**Solution**: Use constructor parameter instead:
|
| 30 |
+
```python
|
| 31 |
+
# CORRECT - Configured at instantiation time
|
| 32 |
+
class PubMedTool:
|
| 33 |
+
def __init__(self, api_key: str | None = None, include_fulltext: bool = False):
|
| 34 |
+
self.include_fulltext = include_fulltext
|
| 35 |
+
...
|
| 36 |
+
```
|
| 37 |
+
|
| 38 |
+
This way, you can create a full-text-enabled PubMed tool:
|
| 39 |
+
```python
|
| 40 |
+
# In orchestrator or wherever tools are created
|
| 41 |
+
tools = [
|
| 42 |
+
PubMedTool(include_fulltext=True), # Full-text enabled
|
| 43 |
+
ClinicalTrialsTool(),
|
| 44 |
+
EuropePMCTool(),
|
| 45 |
+
]
|
| 46 |
+
```
|
| 47 |
+
|
| 48 |
+
---
|
| 49 |
+
|
| 50 |
+
## Overview
|
| 51 |
+
|
| 52 |
+
Add full-text retrieval for PubMed papers via the BioC API, enabling:
|
| 53 |
+
- Complete paper text for open-access PMC papers
|
| 54 |
+
- Structured sections (intro, methods, results, discussion)
|
| 55 |
+
- Better evidence for LLM synthesis
|
| 56 |
+
|
| 57 |
+
**Why Full-Text?**
|
| 58 |
+
- Abstracts only give ~200-300 words
|
| 59 |
+
- Full text provides detailed methods, results, figures
|
| 60 |
+
- Reference repo already has this implemented
|
| 61 |
+
- Makes LLM judgments more accurate
|
| 62 |
+
|
| 63 |
+
---
|
| 64 |
+
|
| 65 |
+
## TDD Implementation Plan
|
| 66 |
+
|
| 67 |
+
### Step 1: Write the Tests First
|
| 68 |
+
|
| 69 |
+
**File**: `tests/unit/tools/test_pubmed_fulltext.py`
|
| 70 |
+
|
| 71 |
+
```python
|
| 72 |
+
"""Tests for PubMed full-text retrieval."""
|
| 73 |
+
|
| 74 |
+
import pytest
|
| 75 |
+
import respx
|
| 76 |
+
from httpx import Response
|
| 77 |
+
|
| 78 |
+
from src.tools.pubmed import PubMedTool
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
class TestPubMedFullText:
|
| 82 |
+
"""Test suite for PubMed full-text functionality."""
|
| 83 |
+
|
| 84 |
+
@pytest.fixture
|
| 85 |
+
def tool(self) -> PubMedTool:
|
| 86 |
+
return PubMedTool()
|
| 87 |
+
|
| 88 |
+
@respx.mock
|
| 89 |
+
@pytest.mark.asyncio
|
| 90 |
+
async def test_get_pmc_id_success(self, tool: PubMedTool) -> None:
|
| 91 |
+
"""Should convert PMID to PMCID for full-text access."""
|
| 92 |
+
mock_response = {
|
| 93 |
+
"records": [
|
| 94 |
+
{
|
| 95 |
+
"pmid": "12345678",
|
| 96 |
+
"pmcid": "PMC1234567",
|
| 97 |
+
}
|
| 98 |
+
]
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
respx.get("https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/").mock(
|
| 102 |
+
return_value=Response(200, json=mock_response)
|
| 103 |
+
)
|
| 104 |
+
|
| 105 |
+
pmcid = await tool.get_pmc_id("12345678")
|
| 106 |
+
assert pmcid == "PMC1234567"
|
| 107 |
+
|
| 108 |
+
@respx.mock
|
| 109 |
+
@pytest.mark.asyncio
|
| 110 |
+
async def test_get_pmc_id_not_in_pmc(self, tool: PubMedTool) -> None:
|
| 111 |
+
"""Should return None if paper not in PMC."""
|
| 112 |
+
mock_response = {
|
| 113 |
+
"records": [
|
| 114 |
+
{
|
| 115 |
+
"pmid": "12345678",
|
| 116 |
+
# No pmcid means not in PMC
|
| 117 |
+
}
|
| 118 |
+
]
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
respx.get("https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/").mock(
|
| 122 |
+
return_value=Response(200, json=mock_response)
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
pmcid = await tool.get_pmc_id("12345678")
|
| 126 |
+
assert pmcid is None
|
| 127 |
+
|
| 128 |
+
@respx.mock
|
| 129 |
+
@pytest.mark.asyncio
|
| 130 |
+
async def test_get_fulltext_success(self, tool: PubMedTool) -> None:
|
| 131 |
+
"""Should retrieve full text for PMC papers."""
|
| 132 |
+
# Mock BioC API response
|
| 133 |
+
mock_bioc = {
|
| 134 |
+
"documents": [
|
| 135 |
+
{
|
| 136 |
+
"passages": [
|
| 137 |
+
{
|
| 138 |
+
"infons": {"section_type": "INTRO"},
|
| 139 |
+
"text": "Introduction text here.",
|
| 140 |
+
},
|
| 141 |
+
{
|
| 142 |
+
"infons": {"section_type": "METHODS"},
|
| 143 |
+
"text": "Methods description here.",
|
| 144 |
+
},
|
| 145 |
+
{
|
| 146 |
+
"infons": {"section_type": "RESULTS"},
|
| 147 |
+
"text": "Results summary here.",
|
| 148 |
+
},
|
| 149 |
+
{
|
| 150 |
+
"infons": {"section_type": "DISCUSS"},
|
| 151 |
+
"text": "Discussion and conclusions.",
|
| 152 |
+
},
|
| 153 |
+
]
|
| 154 |
+
}
|
| 155 |
+
]
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
respx.get(
|
| 159 |
+
"https://www.ncbi.nlm.nih.gov/research/bionlp/RESTful/pmcoa.cgi/BioC_json/12345678/unicode"
|
| 160 |
+
).mock(return_value=Response(200, json=mock_bioc))
|
| 161 |
+
|
| 162 |
+
fulltext = await tool.get_fulltext("12345678")
|
| 163 |
+
|
| 164 |
+
assert fulltext is not None
|
| 165 |
+
assert "Introduction text here" in fulltext
|
| 166 |
+
assert "Methods description here" in fulltext
|
| 167 |
+
assert "Results summary here" in fulltext
|
| 168 |
+
|
| 169 |
+
@respx.mock
|
| 170 |
+
@pytest.mark.asyncio
|
| 171 |
+
async def test_get_fulltext_not_available(self, tool: PubMedTool) -> None:
|
| 172 |
+
"""Should return None if full text not available."""
|
| 173 |
+
respx.get(
|
| 174 |
+
"https://www.ncbi.nlm.nih.gov/research/bionlp/RESTful/pmcoa.cgi/BioC_json/99999999/unicode"
|
| 175 |
+
).mock(return_value=Response(404))
|
| 176 |
+
|
| 177 |
+
fulltext = await tool.get_fulltext("99999999")
|
| 178 |
+
assert fulltext is None
|
| 179 |
+
|
| 180 |
+
@respx.mock
|
| 181 |
+
@pytest.mark.asyncio
|
| 182 |
+
async def test_get_fulltext_structured(self, tool: PubMedTool) -> None:
|
| 183 |
+
"""Should return structured sections dict."""
|
| 184 |
+
mock_bioc = {
|
| 185 |
+
"documents": [
|
| 186 |
+
{
|
| 187 |
+
"passages": [
|
| 188 |
+
{"infons": {"section_type": "INTRO"}, "text": "Intro..."},
|
| 189 |
+
{"infons": {"section_type": "METHODS"}, "text": "Methods..."},
|
| 190 |
+
{"infons": {"section_type": "RESULTS"}, "text": "Results..."},
|
| 191 |
+
{"infons": {"section_type": "DISCUSS"}, "text": "Discussion..."},
|
| 192 |
+
]
|
| 193 |
+
}
|
| 194 |
+
]
|
| 195 |
+
}
|
| 196 |
+
|
| 197 |
+
respx.get(
|
| 198 |
+
"https://www.ncbi.nlm.nih.gov/research/bionlp/RESTful/pmcoa.cgi/BioC_json/12345678/unicode"
|
| 199 |
+
).mock(return_value=Response(200, json=mock_bioc))
|
| 200 |
+
|
| 201 |
+
sections = await tool.get_fulltext_structured("12345678")
|
| 202 |
+
|
| 203 |
+
assert sections is not None
|
| 204 |
+
assert "introduction" in sections
|
| 205 |
+
assert "methods" in sections
|
| 206 |
+
assert "results" in sections
|
| 207 |
+
assert "discussion" in sections
|
| 208 |
+
|
| 209 |
+
@respx.mock
|
| 210 |
+
@pytest.mark.asyncio
|
| 211 |
+
async def test_search_with_fulltext_enabled(self) -> None:
|
| 212 |
+
"""Search should include full text when tool is configured for it."""
|
| 213 |
+
# Create tool WITH full-text enabled via constructor
|
| 214 |
+
tool = PubMedTool(include_fulltext=True)
|
| 215 |
+
|
| 216 |
+
# Mock esearch
|
| 217 |
+
respx.get("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi").mock(
|
| 218 |
+
return_value=Response(
|
| 219 |
+
200, json={"esearchresult": {"idlist": ["12345678"]}}
|
| 220 |
+
)
|
| 221 |
+
)
|
| 222 |
+
|
| 223 |
+
# Mock efetch (abstract)
|
| 224 |
+
mock_xml = """
|
| 225 |
+
<PubmedArticleSet>
|
| 226 |
+
<PubmedArticle>
|
| 227 |
+
<MedlineCitation>
|
| 228 |
+
<PMID>12345678</PMID>
|
| 229 |
+
<Article>
|
| 230 |
+
<ArticleTitle>Test Paper</ArticleTitle>
|
| 231 |
+
<Abstract><AbstractText>Short abstract.</AbstractText></Abstract>
|
| 232 |
+
<AuthorList><Author><LastName>Smith</LastName></Author></AuthorList>
|
| 233 |
+
</Article>
|
| 234 |
+
</MedlineCitation>
|
| 235 |
+
</PubmedArticle>
|
| 236 |
+
</PubmedArticleSet>
|
| 237 |
+
"""
|
| 238 |
+
respx.get("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi").mock(
|
| 239 |
+
return_value=Response(200, text=mock_xml)
|
| 240 |
+
)
|
| 241 |
+
|
| 242 |
+
# Mock ID converter
|
| 243 |
+
respx.get("https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/").mock(
|
| 244 |
+
return_value=Response(
|
| 245 |
+
200, json={"records": [{"pmid": "12345678", "pmcid": "PMC1234567"}]}
|
| 246 |
+
)
|
| 247 |
+
)
|
| 248 |
+
|
| 249 |
+
# Mock BioC full text
|
| 250 |
+
mock_bioc = {
|
| 251 |
+
"documents": [
|
| 252 |
+
{
|
| 253 |
+
"passages": [
|
| 254 |
+
{"infons": {"section_type": "INTRO"}, "text": "Full intro..."},
|
| 255 |
+
]
|
| 256 |
+
}
|
| 257 |
+
]
|
| 258 |
+
}
|
| 259 |
+
respx.get(
|
| 260 |
+
"https://www.ncbi.nlm.nih.gov/research/bionlp/RESTful/pmcoa.cgi/BioC_json/12345678/unicode"
|
| 261 |
+
).mock(return_value=Response(200, json=mock_bioc))
|
| 262 |
+
|
| 263 |
+
# NOTE: No include_fulltext param - it's set via constructor
|
| 264 |
+
results = await tool.search("test", max_results=1)
|
| 265 |
+
|
| 266 |
+
assert len(results) == 1
|
| 267 |
+
# Full text should be appended or replace abstract
|
| 268 |
+
assert "Full intro" in results[0].content or "Short abstract" in results[0].content
|
| 269 |
+
```
|
| 270 |
+
|
| 271 |
+
---
|
| 272 |
+
|
| 273 |
+
### Step 2: Implement Full-Text Methods
|
| 274 |
+
|
| 275 |
+
**File**: `src/tools/pubmed.py` (additions to existing class)
|
| 276 |
+
|
| 277 |
+
```python
|
| 278 |
+
# Add these methods to PubMedTool class
|
| 279 |
+
|
| 280 |
+
async def get_pmc_id(self, pmid: str) -> str | None:
|
| 281 |
+
"""
|
| 282 |
+
Convert PMID to PMCID for full-text access.
|
| 283 |
+
|
| 284 |
+
Args:
|
| 285 |
+
pmid: PubMed ID
|
| 286 |
+
|
| 287 |
+
Returns:
|
| 288 |
+
PMCID if paper is in PMC, None otherwise
|
| 289 |
+
"""
|
| 290 |
+
url = "https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/"
|
| 291 |
+
params = {"ids": pmid, "format": "json"}
|
| 292 |
+
|
| 293 |
+
async with httpx.AsyncClient(timeout=30.0) as client:
|
| 294 |
+
try:
|
| 295 |
+
response = await client.get(url, params=params)
|
| 296 |
+
response.raise_for_status()
|
| 297 |
+
data = response.json()
|
| 298 |
+
|
| 299 |
+
records = data.get("records", [])
|
| 300 |
+
if records and records[0].get("pmcid"):
|
| 301 |
+
return records[0]["pmcid"]
|
| 302 |
+
return None
|
| 303 |
+
|
| 304 |
+
except httpx.HTTPError:
|
| 305 |
+
return None
|
| 306 |
+
|
| 307 |
+
|
| 308 |
+
async def get_fulltext(self, pmid: str) -> str | None:
|
| 309 |
+
"""
|
| 310 |
+
Get full text for a PubMed paper via BioC API.
|
| 311 |
+
|
| 312 |
+
Only works for open-access papers in PubMed Central.
|
| 313 |
+
|
| 314 |
+
Args:
|
| 315 |
+
pmid: PubMed ID
|
| 316 |
+
|
| 317 |
+
Returns:
|
| 318 |
+
Full text as string, or None if not available
|
| 319 |
+
"""
|
| 320 |
+
url = f"https://www.ncbi.nlm.nih.gov/research/bionlp/RESTful/pmcoa.cgi/BioC_json/{pmid}/unicode"
|
| 321 |
+
|
| 322 |
+
async with httpx.AsyncClient(timeout=60.0) as client:
|
| 323 |
+
try:
|
| 324 |
+
response = await client.get(url)
|
| 325 |
+
if response.status_code == 404:
|
| 326 |
+
return None
|
| 327 |
+
response.raise_for_status()
|
| 328 |
+
data = response.json()
|
| 329 |
+
|
| 330 |
+
# Extract text from all passages
|
| 331 |
+
documents = data.get("documents", [])
|
| 332 |
+
if not documents:
|
| 333 |
+
return None
|
| 334 |
+
|
| 335 |
+
passages = documents[0].get("passages", [])
|
| 336 |
+
text_parts = [p.get("text", "") for p in passages if p.get("text")]
|
| 337 |
+
|
| 338 |
+
return "\n\n".join(text_parts) if text_parts else None
|
| 339 |
+
|
| 340 |
+
except httpx.HTTPError:
|
| 341 |
+
return None
|
| 342 |
+
|
| 343 |
+
|
| 344 |
+
async def get_fulltext_structured(self, pmid: str) -> dict[str, str] | None:
|
| 345 |
+
"""
|
| 346 |
+
Get structured full text with sections.
|
| 347 |
+
|
| 348 |
+
Args:
|
| 349 |
+
pmid: PubMed ID
|
| 350 |
+
|
| 351 |
+
Returns:
|
| 352 |
+
Dict mapping section names to text, or None if not available
|
| 353 |
+
"""
|
| 354 |
+
url = f"https://www.ncbi.nlm.nih.gov/research/bionlp/RESTful/pmcoa.cgi/BioC_json/{pmid}/unicode"
|
| 355 |
+
|
| 356 |
+
async with httpx.AsyncClient(timeout=60.0) as client:
|
| 357 |
+
try:
|
| 358 |
+
response = await client.get(url)
|
| 359 |
+
if response.status_code == 404:
|
| 360 |
+
return None
|
| 361 |
+
response.raise_for_status()
|
| 362 |
+
data = response.json()
|
| 363 |
+
|
| 364 |
+
documents = data.get("documents", [])
|
| 365 |
+
if not documents:
|
| 366 |
+
return None
|
| 367 |
+
|
| 368 |
+
# Map section types to readable names
|
| 369 |
+
section_map = {
|
| 370 |
+
"INTRO": "introduction",
|
| 371 |
+
"METHODS": "methods",
|
| 372 |
+
"RESULTS": "results",
|
| 373 |
+
"DISCUSS": "discussion",
|
| 374 |
+
"CONCL": "conclusion",
|
| 375 |
+
"ABSTRACT": "abstract",
|
| 376 |
+
}
|
| 377 |
+
|
| 378 |
+
sections: dict[str, list[str]] = {}
|
| 379 |
+
for passage in documents[0].get("passages", []):
|
| 380 |
+
section_type = passage.get("infons", {}).get("section_type", "other")
|
| 381 |
+
section_name = section_map.get(section_type, "other")
|
| 382 |
+
text = passage.get("text", "")
|
| 383 |
+
|
| 384 |
+
if text:
|
| 385 |
+
if section_name not in sections:
|
| 386 |
+
sections[section_name] = []
|
| 387 |
+
sections[section_name].append(text)
|
| 388 |
+
|
| 389 |
+
# Join multiple passages per section
|
| 390 |
+
return {k: "\n\n".join(v) for k, v in sections.items()}
|
| 391 |
+
|
| 392 |
+
except httpx.HTTPError:
|
| 393 |
+
return None
|
| 394 |
+
```
|
| 395 |
+
|
| 396 |
+
---
|
| 397 |
+
|
| 398 |
+
### Step 3: Update Constructor and Search Method
|
| 399 |
+
|
| 400 |
+
Add full-text flag to constructor and update search to use it:
|
| 401 |
+
|
| 402 |
+
```python
|
| 403 |
+
class PubMedTool:
|
| 404 |
+
"""Search tool for PubMed/NCBI."""
|
| 405 |
+
|
| 406 |
+
def __init__(
|
| 407 |
+
self,
|
| 408 |
+
api_key: str | None = None,
|
| 409 |
+
include_fulltext: bool = False, # NEW CONSTRUCTOR PARAM
|
| 410 |
+
) -> None:
|
| 411 |
+
self.api_key = api_key or settings.ncbi_api_key
|
| 412 |
+
if self.api_key == "your-ncbi-key-here":
|
| 413 |
+
self.api_key = None
|
| 414 |
+
self._last_request_time = 0.0
|
| 415 |
+
self.include_fulltext = include_fulltext # Store for use in search()
|
| 416 |
+
|
| 417 |
+
async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
|
| 418 |
+
"""
|
| 419 |
+
Search PubMed and return evidence.
|
| 420 |
+
|
| 421 |
+
Note: Full-text enrichment is controlled by constructor parameter,
|
| 422 |
+
not method parameter, because SearchHandler doesn't pass extra args.
|
| 423 |
+
"""
|
| 424 |
+
# ... existing search logic ...
|
| 425 |
+
|
| 426 |
+
evidence_list = self._parse_pubmed_xml(fetch_resp.text)
|
| 427 |
+
|
| 428 |
+
# Optionally enrich with full text (if configured at construction)
|
| 429 |
+
if self.include_fulltext:
|
| 430 |
+
evidence_list = await self._enrich_with_fulltext(evidence_list)
|
| 431 |
+
|
| 432 |
+
return evidence_list
|
| 433 |
+
|
| 434 |
+
|
| 435 |
+
async def _enrich_with_fulltext(
|
| 436 |
+
self, evidence_list: list[Evidence]
|
| 437 |
+
) -> list[Evidence]:
|
| 438 |
+
"""Attempt to add full text to evidence items."""
|
| 439 |
+
enriched = []
|
| 440 |
+
|
| 441 |
+
for evidence in evidence_list:
|
| 442 |
+
# Extract PMID from URL
|
| 443 |
+
url = evidence.citation.url
|
| 444 |
+
pmid = url.rstrip("/").split("/")[-1] if url else None
|
| 445 |
+
|
| 446 |
+
if pmid:
|
| 447 |
+
fulltext = await self.get_fulltext(pmid)
|
| 448 |
+
if fulltext:
|
| 449 |
+
# Replace abstract with full text (truncated)
|
| 450 |
+
evidence = Evidence(
|
| 451 |
+
content=fulltext[:8000], # Larger limit for full text
|
| 452 |
+
citation=evidence.citation,
|
| 453 |
+
relevance=evidence.relevance,
|
| 454 |
+
metadata={
|
| 455 |
+
**evidence.metadata,
|
| 456 |
+
"has_fulltext": True,
|
| 457 |
+
},
|
| 458 |
+
)
|
| 459 |
+
|
| 460 |
+
enriched.append(evidence)
|
| 461 |
+
|
| 462 |
+
return enriched
|
| 463 |
+
```
|
| 464 |
+
|
| 465 |
+
---
|
| 466 |
+
|
| 467 |
+
## Demo Script
|
| 468 |
+
|
| 469 |
+
**File**: `examples/pubmed_fulltext_demo.py`
|
| 470 |
+
|
| 471 |
+
```python
|
| 472 |
+
#!/usr/bin/env python3
|
| 473 |
+
"""Demo script to verify PubMed full-text retrieval."""
|
| 474 |
+
|
| 475 |
+
import asyncio
|
| 476 |
+
from src.tools.pubmed import PubMedTool
|
| 477 |
+
|
| 478 |
+
|
| 479 |
+
async def main():
|
| 480 |
+
"""Run PubMed full-text demo."""
|
| 481 |
+
tool = PubMedTool()
|
| 482 |
+
|
| 483 |
+
print("=" * 60)
|
| 484 |
+
print("PubMed Full-Text Demo")
|
| 485 |
+
print("=" * 60)
|
| 486 |
+
|
| 487 |
+
# Test 1: Convert PMID to PMCID
|
| 488 |
+
print("\n[Test 1] Converting PMID to PMCID...")
|
| 489 |
+
# Use a known open-access paper
|
| 490 |
+
test_pmid = "34450029" # Example: COVID-related open-access paper
|
| 491 |
+
pmcid = await tool.get_pmc_id(test_pmid)
|
| 492 |
+
print(f"PMID {test_pmid} -> PMCID: {pmcid or 'Not in PMC'}")
|
| 493 |
+
|
| 494 |
+
# Test 2: Get full text
|
| 495 |
+
print("\n[Test 2] Fetching full text...")
|
| 496 |
+
if pmcid:
|
| 497 |
+
fulltext = await tool.get_fulltext(test_pmid)
|
| 498 |
+
if fulltext:
|
| 499 |
+
print(f"Full text length: {len(fulltext)} characters")
|
| 500 |
+
print(f"Preview: {fulltext[:500]}...")
|
| 501 |
+
else:
|
| 502 |
+
print("Full text not available")
|
| 503 |
+
|
| 504 |
+
# Test 3: Get structured sections
|
| 505 |
+
print("\n[Test 3] Fetching structured sections...")
|
| 506 |
+
if pmcid:
|
| 507 |
+
sections = await tool.get_fulltext_structured(test_pmid)
|
| 508 |
+
if sections:
|
| 509 |
+
print("Available sections:")
|
| 510 |
+
for section, text in sections.items():
|
| 511 |
+
print(f" - {section}: {len(text)} chars")
|
| 512 |
+
else:
|
| 513 |
+
print("Structured text not available")
|
| 514 |
+
|
| 515 |
+
# Test 4: Search with full text
|
| 516 |
+
print("\n[Test 4] Search with full-text enrichment...")
|
| 517 |
+
results = await tool.search(
|
| 518 |
+
"metformin cancer open access",
|
| 519 |
+
max_results=3,
|
| 520 |
+
include_fulltext=True
|
| 521 |
+
)
|
| 522 |
+
|
| 523 |
+
for i, evidence in enumerate(results, 1):
|
| 524 |
+
has_ft = evidence.metadata.get("has_fulltext", False)
|
| 525 |
+
print(f"\n--- Result {i} ---")
|
| 526 |
+
print(f"Title: {evidence.citation.title}")
|
| 527 |
+
print(f"Has Full Text: {has_ft}")
|
| 528 |
+
print(f"Content Length: {len(evidence.content)} chars")
|
| 529 |
+
|
| 530 |
+
print("\n" + "=" * 60)
|
| 531 |
+
print("Demo complete!")
|
| 532 |
+
|
| 533 |
+
|
| 534 |
+
if __name__ == "__main__":
|
| 535 |
+
asyncio.run(main())
|
| 536 |
+
```
|
| 537 |
+
|
| 538 |
+
---
|
| 539 |
+
|
| 540 |
+
## Verification Checklist
|
| 541 |
+
|
| 542 |
+
### Unit Tests
|
| 543 |
+
```bash
|
| 544 |
+
# Run full-text tests
|
| 545 |
+
uv run pytest tests/unit/tools/test_pubmed_fulltext.py -v
|
| 546 |
+
|
| 547 |
+
# Run all PubMed tests
|
| 548 |
+
uv run pytest tests/unit/tools/test_pubmed.py -v
|
| 549 |
+
|
| 550 |
+
# Expected: All tests pass
|
| 551 |
+
```
|
| 552 |
+
|
| 553 |
+
### Integration Test (Manual)
|
| 554 |
+
```bash
|
| 555 |
+
# Run demo with real API
|
| 556 |
+
uv run python examples/pubmed_fulltext_demo.py
|
| 557 |
+
|
| 558 |
+
# Expected: Real full text from PMC papers
|
| 559 |
+
```
|
| 560 |
+
|
| 561 |
+
### Full Test Suite
|
| 562 |
+
```bash
|
| 563 |
+
make check
|
| 564 |
+
# Expected: All tests pass, mypy clean
|
| 565 |
+
```
|
| 566 |
+
|
| 567 |
+
---
|
| 568 |
+
|
| 569 |
+
## Success Criteria
|
| 570 |
+
|
| 571 |
+
1. **ID Conversion works**: PMID -> PMCID conversion successful
|
| 572 |
+
2. **Full text retrieval works**: BioC API returns paper text
|
| 573 |
+
3. **Structured sections work**: Can get intro/methods/results/discussion separately
|
| 574 |
+
4. **Search integration works**: `include_fulltext=True` enriches results
|
| 575 |
+
5. **No regressions**: Existing tests still pass
|
| 576 |
+
6. **Graceful degradation**: Non-PMC papers still return abstracts
|
| 577 |
+
|
| 578 |
+
---
|
| 579 |
+
|
| 580 |
+
## Notes
|
| 581 |
+
|
| 582 |
+
- Only ~30% of PubMed papers have full text in PMC
|
| 583 |
+
- BioC API has no documented rate limit, but be respectful
|
| 584 |
+
- Full text can be very long - truncate appropriately
|
| 585 |
+
- Consider caching full text responses (they don't change)
|
| 586 |
+
- Timeout should be longer for full text (60s vs 30s)
|
docs/brainstorming/implementation/17_PHASE_RATE_LIMITING.md
ADDED
|
@@ -0,0 +1,540 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Phase 17: Rate Limiting with `limits` Library
|
| 2 |
+
|
| 3 |
+
**Priority**: P0 CRITICAL - Prevents API blocks
|
| 4 |
+
**Effort**: ~1 hour
|
| 5 |
+
**Dependencies**: None
|
| 6 |
+
|
| 7 |
+
---
|
| 8 |
+
|
| 9 |
+
## CRITICAL: Async Safety Requirements
|
| 10 |
+
|
| 11 |
+
**WARNING**: The rate limiter MUST be async-safe. Blocking the event loop will freeze:
|
| 12 |
+
- The Gradio UI
|
| 13 |
+
- All parallel searches
|
| 14 |
+
- The orchestrator
|
| 15 |
+
|
| 16 |
+
**Rules**:
|
| 17 |
+
1. **NEVER use `time.sleep()`** - Always use `await asyncio.sleep()`
|
| 18 |
+
2. **NEVER use blocking while loops** - Use async-aware polling
|
| 19 |
+
3. **The `limits` library check is synchronous** - Wrap it carefully
|
| 20 |
+
|
| 21 |
+
The implementation below uses a polling pattern that:
|
| 22 |
+
- Checks the limit (synchronous, fast)
|
| 23 |
+
- If exceeded, `await asyncio.sleep()` (non-blocking)
|
| 24 |
+
- Retry the check
|
| 25 |
+
|
| 26 |
+
**Alternative**: If `limits` proves problematic, use `aiolimiter` which is pure-async.
|
| 27 |
+
|
| 28 |
+
---
|
| 29 |
+
|
| 30 |
+
## Overview
|
| 31 |
+
|
| 32 |
+
Replace naive `asyncio.sleep` rate limiting with proper rate limiter using the `limits` library, which provides:
|
| 33 |
+
- Moving window rate limiting
|
| 34 |
+
- Per-API configurable limits
|
| 35 |
+
- Thread-safe storage
|
| 36 |
+
- Already used in reference repo
|
| 37 |
+
|
| 38 |
+
**Why This Matters?**
|
| 39 |
+
- NCBI will block us without proper rate limiting (3/sec without key, 10/sec with)
|
| 40 |
+
- Current implementation only has simple sleep delay
|
| 41 |
+
- Need coordinated limits across all PubMed calls
|
| 42 |
+
- Professional-grade rate limiting prevents production issues
|
| 43 |
+
|
| 44 |
+
---
|
| 45 |
+
|
| 46 |
+
## Current State
|
| 47 |
+
|
| 48 |
+
### What We Have (`src/tools/pubmed.py:20-21, 34-41`)
|
| 49 |
+
|
| 50 |
+
```python
|
| 51 |
+
RATE_LIMIT_DELAY = 0.34 # ~3 requests/sec without API key
|
| 52 |
+
|
| 53 |
+
async def _rate_limit(self) -> None:
|
| 54 |
+
"""Enforce NCBI rate limiting."""
|
| 55 |
+
loop = asyncio.get_running_loop()
|
| 56 |
+
now = loop.time()
|
| 57 |
+
elapsed = now - self._last_request_time
|
| 58 |
+
if elapsed < self.RATE_LIMIT_DELAY:
|
| 59 |
+
await asyncio.sleep(self.RATE_LIMIT_DELAY - elapsed)
|
| 60 |
+
self._last_request_time = loop.time()
|
| 61 |
+
```
|
| 62 |
+
|
| 63 |
+
### Problems
|
| 64 |
+
|
| 65 |
+
1. **Not shared across instances**: Each `PubMedTool()` has its own counter
|
| 66 |
+
2. **Simple delay vs moving window**: Doesn't handle bursts properly
|
| 67 |
+
3. **Hardcoded rate**: Doesn't adapt to API key presence
|
| 68 |
+
4. **No backoff on 429**: Just retries blindly
|
| 69 |
+
|
| 70 |
+
---
|
| 71 |
+
|
| 72 |
+
## TDD Implementation Plan
|
| 73 |
+
|
| 74 |
+
### Step 1: Add Dependency
|
| 75 |
+
|
| 76 |
+
**File**: `pyproject.toml`
|
| 77 |
+
|
| 78 |
+
```toml
|
| 79 |
+
dependencies = [
|
| 80 |
+
# ... existing deps ...
|
| 81 |
+
"limits>=3.0",
|
| 82 |
+
]
|
| 83 |
+
```
|
| 84 |
+
|
| 85 |
+
Then run:
|
| 86 |
+
```bash
|
| 87 |
+
uv sync
|
| 88 |
+
```
|
| 89 |
+
|
| 90 |
+
---
|
| 91 |
+
|
| 92 |
+
### Step 2: Write the Tests First
|
| 93 |
+
|
| 94 |
+
**File**: `tests/unit/tools/test_rate_limiting.py`
|
| 95 |
+
|
| 96 |
+
```python
|
| 97 |
+
"""Tests for rate limiting functionality."""
|
| 98 |
+
|
| 99 |
+
import asyncio
|
| 100 |
+
import time
|
| 101 |
+
|
| 102 |
+
import pytest
|
| 103 |
+
|
| 104 |
+
from src.tools.rate_limiter import RateLimiter, get_pubmed_limiter
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
class TestRateLimiter:
|
| 108 |
+
"""Test suite for rate limiter."""
|
| 109 |
+
|
| 110 |
+
def test_create_limiter_without_api_key(self) -> None:
|
| 111 |
+
"""Should create 3/sec limiter without API key."""
|
| 112 |
+
limiter = RateLimiter(rate="3/second")
|
| 113 |
+
assert limiter.rate == "3/second"
|
| 114 |
+
|
| 115 |
+
def test_create_limiter_with_api_key(self) -> None:
|
| 116 |
+
"""Should create 10/sec limiter with API key."""
|
| 117 |
+
limiter = RateLimiter(rate="10/second")
|
| 118 |
+
assert limiter.rate == "10/second"
|
| 119 |
+
|
| 120 |
+
@pytest.mark.asyncio
|
| 121 |
+
async def test_limiter_allows_requests_under_limit(self) -> None:
|
| 122 |
+
"""Should allow requests under the rate limit."""
|
| 123 |
+
limiter = RateLimiter(rate="10/second")
|
| 124 |
+
|
| 125 |
+
# 3 requests should all succeed immediately
|
| 126 |
+
for _ in range(3):
|
| 127 |
+
allowed = await limiter.acquire()
|
| 128 |
+
assert allowed is True
|
| 129 |
+
|
| 130 |
+
@pytest.mark.asyncio
|
| 131 |
+
async def test_limiter_blocks_when_exceeded(self) -> None:
|
| 132 |
+
"""Should wait when rate limit exceeded."""
|
| 133 |
+
limiter = RateLimiter(rate="2/second")
|
| 134 |
+
|
| 135 |
+
# First 2 should be instant
|
| 136 |
+
await limiter.acquire()
|
| 137 |
+
await limiter.acquire()
|
| 138 |
+
|
| 139 |
+
# Third should block briefly
|
| 140 |
+
start = time.monotonic()
|
| 141 |
+
await limiter.acquire()
|
| 142 |
+
elapsed = time.monotonic() - start
|
| 143 |
+
|
| 144 |
+
# Should have waited ~0.5 seconds (half second window for 2/sec)
|
| 145 |
+
assert elapsed >= 0.3
|
| 146 |
+
|
| 147 |
+
@pytest.mark.asyncio
|
| 148 |
+
async def test_limiter_resets_after_window(self) -> None:
|
| 149 |
+
"""Rate limit should reset after time window."""
|
| 150 |
+
limiter = RateLimiter(rate="5/second")
|
| 151 |
+
|
| 152 |
+
# Use up the limit
|
| 153 |
+
for _ in range(5):
|
| 154 |
+
await limiter.acquire()
|
| 155 |
+
|
| 156 |
+
# Wait for window to pass
|
| 157 |
+
await asyncio.sleep(1.1)
|
| 158 |
+
|
| 159 |
+
# Should be allowed again
|
| 160 |
+
start = time.monotonic()
|
| 161 |
+
await limiter.acquire()
|
| 162 |
+
elapsed = time.monotonic() - start
|
| 163 |
+
|
| 164 |
+
assert elapsed < 0.1 # Should be nearly instant
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
class TestGetPubmedLimiter:
|
| 168 |
+
"""Test PubMed-specific limiter factory."""
|
| 169 |
+
|
| 170 |
+
def test_limiter_without_api_key(self) -> None:
|
| 171 |
+
"""Should return 3/sec limiter without key."""
|
| 172 |
+
limiter = get_pubmed_limiter(api_key=None)
|
| 173 |
+
assert "3" in limiter.rate
|
| 174 |
+
|
| 175 |
+
def test_limiter_with_api_key(self) -> None:
|
| 176 |
+
"""Should return 10/sec limiter with key."""
|
| 177 |
+
limiter = get_pubmed_limiter(api_key="my-api-key")
|
| 178 |
+
assert "10" in limiter.rate
|
| 179 |
+
|
| 180 |
+
def test_limiter_is_singleton(self) -> None:
|
| 181 |
+
"""Same API key should return same limiter instance."""
|
| 182 |
+
limiter1 = get_pubmed_limiter(api_key="key1")
|
| 183 |
+
limiter2 = get_pubmed_limiter(api_key="key1")
|
| 184 |
+
assert limiter1 is limiter2
|
| 185 |
+
|
| 186 |
+
def test_different_keys_different_limiters(self) -> None:
|
| 187 |
+
"""Different API keys should return different limiters."""
|
| 188 |
+
limiter1 = get_pubmed_limiter(api_key="key1")
|
| 189 |
+
limiter2 = get_pubmed_limiter(api_key="key2")
|
| 190 |
+
# Clear cache for clean test
|
| 191 |
+
# Actually, different keys SHOULD share the same limiter
|
| 192 |
+
# since we're limiting against the same API
|
| 193 |
+
assert limiter1 is limiter2 # Shared NCBI rate limit
|
| 194 |
+
```
|
| 195 |
+
|
| 196 |
+
---
|
| 197 |
+
|
| 198 |
+
### Step 3: Create Rate Limiter Module
|
| 199 |
+
|
| 200 |
+
**File**: `src/tools/rate_limiter.py`
|
| 201 |
+
|
| 202 |
+
```python
|
| 203 |
+
"""Rate limiting utilities using the limits library."""
|
| 204 |
+
|
| 205 |
+
import asyncio
|
| 206 |
+
from typing import ClassVar
|
| 207 |
+
|
| 208 |
+
from limits import RateLimitItem, parse
|
| 209 |
+
from limits.storage import MemoryStorage
|
| 210 |
+
from limits.strategies import MovingWindowRateLimiter
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
class RateLimiter:
|
| 214 |
+
"""
|
| 215 |
+
Async-compatible rate limiter using limits library.
|
| 216 |
+
|
| 217 |
+
Uses moving window algorithm for smooth rate limiting.
|
| 218 |
+
"""
|
| 219 |
+
|
| 220 |
+
def __init__(self, rate: str) -> None:
|
| 221 |
+
"""
|
| 222 |
+
Initialize rate limiter.
|
| 223 |
+
|
| 224 |
+
Args:
|
| 225 |
+
rate: Rate string like "3/second" or "10/second"
|
| 226 |
+
"""
|
| 227 |
+
self.rate = rate
|
| 228 |
+
self._storage = MemoryStorage()
|
| 229 |
+
self._limiter = MovingWindowRateLimiter(self._storage)
|
| 230 |
+
self._rate_limit: RateLimitItem = parse(rate)
|
| 231 |
+
self._identity = "default" # Single identity for shared limiting
|
| 232 |
+
|
| 233 |
+
async def acquire(self, wait: bool = True) -> bool:
|
| 234 |
+
"""
|
| 235 |
+
Acquire permission to make a request.
|
| 236 |
+
|
| 237 |
+
ASYNC-SAFE: Uses asyncio.sleep(), never time.sleep().
|
| 238 |
+
The polling pattern allows other coroutines to run while waiting.
|
| 239 |
+
|
| 240 |
+
Args:
|
| 241 |
+
wait: If True, wait until allowed. If False, return immediately.
|
| 242 |
+
|
| 243 |
+
Returns:
|
| 244 |
+
True if allowed, False if not (only when wait=False)
|
| 245 |
+
"""
|
| 246 |
+
while True:
|
| 247 |
+
# Check if we can proceed (synchronous, fast - ~microseconds)
|
| 248 |
+
if self._limiter.hit(self._rate_limit, self._identity):
|
| 249 |
+
return True
|
| 250 |
+
|
| 251 |
+
if not wait:
|
| 252 |
+
return False
|
| 253 |
+
|
| 254 |
+
# CRITICAL: Use asyncio.sleep(), NOT time.sleep()
|
| 255 |
+
# This yields control to the event loop, allowing other
|
| 256 |
+
# coroutines (UI, parallel searches) to run
|
| 257 |
+
await asyncio.sleep(0.1)
|
| 258 |
+
|
| 259 |
+
def reset(self) -> None:
|
| 260 |
+
"""Reset the rate limiter (for testing)."""
|
| 261 |
+
self._storage.reset()
|
| 262 |
+
|
| 263 |
+
|
| 264 |
+
# Singleton limiter for PubMed/NCBI
|
| 265 |
+
_pubmed_limiter: RateLimiter | None = None
|
| 266 |
+
|
| 267 |
+
|
| 268 |
+
def get_pubmed_limiter(api_key: str | None = None) -> RateLimiter:
|
| 269 |
+
"""
|
| 270 |
+
Get the shared PubMed rate limiter.
|
| 271 |
+
|
| 272 |
+
Rate depends on whether API key is provided:
|
| 273 |
+
- Without key: 3 requests/second
|
| 274 |
+
- With key: 10 requests/second
|
| 275 |
+
|
| 276 |
+
Args:
|
| 277 |
+
api_key: NCBI API key (optional)
|
| 278 |
+
|
| 279 |
+
Returns:
|
| 280 |
+
Shared RateLimiter instance
|
| 281 |
+
"""
|
| 282 |
+
global _pubmed_limiter
|
| 283 |
+
|
| 284 |
+
if _pubmed_limiter is None:
|
| 285 |
+
rate = "10/second" if api_key else "3/second"
|
| 286 |
+
_pubmed_limiter = RateLimiter(rate)
|
| 287 |
+
|
| 288 |
+
return _pubmed_limiter
|
| 289 |
+
|
| 290 |
+
|
| 291 |
+
def reset_pubmed_limiter() -> None:
|
| 292 |
+
"""Reset the PubMed limiter (for testing)."""
|
| 293 |
+
global _pubmed_limiter
|
| 294 |
+
_pubmed_limiter = None
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
# Factory for other APIs
|
| 298 |
+
class RateLimiterFactory:
|
| 299 |
+
"""Factory for creating/getting rate limiters for different APIs."""
|
| 300 |
+
|
| 301 |
+
_limiters: ClassVar[dict[str, RateLimiter]] = {}
|
| 302 |
+
|
| 303 |
+
@classmethod
|
| 304 |
+
def get(cls, api_name: str, rate: str) -> RateLimiter:
|
| 305 |
+
"""
|
| 306 |
+
Get or create a rate limiter for an API.
|
| 307 |
+
|
| 308 |
+
Args:
|
| 309 |
+
api_name: Unique identifier for the API
|
| 310 |
+
rate: Rate limit string (e.g., "10/second")
|
| 311 |
+
|
| 312 |
+
Returns:
|
| 313 |
+
RateLimiter instance (shared for same api_name)
|
| 314 |
+
"""
|
| 315 |
+
if api_name not in cls._limiters:
|
| 316 |
+
cls._limiters[api_name] = RateLimiter(rate)
|
| 317 |
+
return cls._limiters[api_name]
|
| 318 |
+
|
| 319 |
+
@classmethod
|
| 320 |
+
def reset_all(cls) -> None:
|
| 321 |
+
"""Reset all limiters (for testing)."""
|
| 322 |
+
cls._limiters.clear()
|
| 323 |
+
```
|
| 324 |
+
|
| 325 |
+
---
|
| 326 |
+
|
| 327 |
+
### Step 4: Update PubMed Tool
|
| 328 |
+
|
| 329 |
+
**File**: `src/tools/pubmed.py` (replace rate limiting code)
|
| 330 |
+
|
| 331 |
+
```python
|
| 332 |
+
# Replace imports and rate limiting
|
| 333 |
+
|
| 334 |
+
from src.tools.rate_limiter import get_pubmed_limiter
|
| 335 |
+
|
| 336 |
+
|
| 337 |
+
class PubMedTool:
|
| 338 |
+
"""Search tool for PubMed/NCBI."""
|
| 339 |
+
|
| 340 |
+
BASE_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
|
| 341 |
+
HTTP_TOO_MANY_REQUESTS = 429
|
| 342 |
+
|
| 343 |
+
def __init__(self, api_key: str | None = None) -> None:
|
| 344 |
+
self.api_key = api_key or settings.ncbi_api_key
|
| 345 |
+
if self.api_key == "your-ncbi-key-here":
|
| 346 |
+
self.api_key = None
|
| 347 |
+
# Use shared rate limiter
|
| 348 |
+
self._limiter = get_pubmed_limiter(self.api_key)
|
| 349 |
+
|
| 350 |
+
async def _rate_limit(self) -> None:
|
| 351 |
+
"""Enforce NCBI rate limiting using shared limiter."""
|
| 352 |
+
await self._limiter.acquire()
|
| 353 |
+
|
| 354 |
+
# ... rest of class unchanged ...
|
| 355 |
+
```
|
| 356 |
+
|
| 357 |
+
---
|
| 358 |
+
|
| 359 |
+
### Step 5: Add Rate Limiters for Other APIs
|
| 360 |
+
|
| 361 |
+
**File**: `src/tools/clinicaltrials.py` (optional)
|
| 362 |
+
|
| 363 |
+
```python
|
| 364 |
+
from src.tools.rate_limiter import RateLimiterFactory
|
| 365 |
+
|
| 366 |
+
|
| 367 |
+
class ClinicalTrialsTool:
|
| 368 |
+
def __init__(self) -> None:
|
| 369 |
+
# ClinicalTrials.gov doesn't document limits, but be conservative
|
| 370 |
+
self._limiter = RateLimiterFactory.get("clinicaltrials", "5/second")
|
| 371 |
+
|
| 372 |
+
async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
|
| 373 |
+
await self._limiter.acquire()
|
| 374 |
+
# ... rest of method ...
|
| 375 |
+
```
|
| 376 |
+
|
| 377 |
+
**File**: `src/tools/europepmc.py` (optional)
|
| 378 |
+
|
| 379 |
+
```python
|
| 380 |
+
from src.tools.rate_limiter import RateLimiterFactory
|
| 381 |
+
|
| 382 |
+
|
| 383 |
+
class EuropePMCTool:
|
| 384 |
+
def __init__(self) -> None:
|
| 385 |
+
# Europe PMC is generous, but still be respectful
|
| 386 |
+
self._limiter = RateLimiterFactory.get("europepmc", "10/second")
|
| 387 |
+
|
| 388 |
+
async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
|
| 389 |
+
await self._limiter.acquire()
|
| 390 |
+
# ... rest of method ...
|
| 391 |
+
```
|
| 392 |
+
|
| 393 |
+
---
|
| 394 |
+
|
| 395 |
+
## Demo Script
|
| 396 |
+
|
| 397 |
+
**File**: `examples/rate_limiting_demo.py`
|
| 398 |
+
|
| 399 |
+
```python
|
| 400 |
+
#!/usr/bin/env python3
|
| 401 |
+
"""Demo script to verify rate limiting works correctly."""
|
| 402 |
+
|
| 403 |
+
import asyncio
|
| 404 |
+
import time
|
| 405 |
+
|
| 406 |
+
from src.tools.rate_limiter import RateLimiter, get_pubmed_limiter, reset_pubmed_limiter
|
| 407 |
+
from src.tools.pubmed import PubMedTool
|
| 408 |
+
|
| 409 |
+
|
| 410 |
+
async def test_basic_limiter():
|
| 411 |
+
"""Test basic rate limiter behavior."""
|
| 412 |
+
print("=" * 60)
|
| 413 |
+
print("Rate Limiting Demo")
|
| 414 |
+
print("=" * 60)
|
| 415 |
+
|
| 416 |
+
# Test 1: Basic limiter
|
| 417 |
+
print("\n[Test 1] Testing 3/second limiter...")
|
| 418 |
+
limiter = RateLimiter("3/second")
|
| 419 |
+
|
| 420 |
+
start = time.monotonic()
|
| 421 |
+
for i in range(6):
|
| 422 |
+
await limiter.acquire()
|
| 423 |
+
elapsed = time.monotonic() - start
|
| 424 |
+
print(f" Request {i+1} at {elapsed:.2f}s")
|
| 425 |
+
|
| 426 |
+
total = time.monotonic() - start
|
| 427 |
+
print(f" Total time for 6 requests: {total:.2f}s (expected ~2s)")
|
| 428 |
+
|
| 429 |
+
|
| 430 |
+
async def test_pubmed_limiter():
|
| 431 |
+
"""Test PubMed-specific limiter."""
|
| 432 |
+
print("\n[Test 2] Testing PubMed limiter (shared)...")
|
| 433 |
+
|
| 434 |
+
reset_pubmed_limiter() # Clean state
|
| 435 |
+
|
| 436 |
+
# Without API key: 3/sec
|
| 437 |
+
limiter = get_pubmed_limiter(api_key=None)
|
| 438 |
+
print(f" Rate without key: {limiter.rate}")
|
| 439 |
+
|
| 440 |
+
# Multiple tools should share the same limiter
|
| 441 |
+
tool1 = PubMedTool()
|
| 442 |
+
tool2 = PubMedTool()
|
| 443 |
+
|
| 444 |
+
# Verify they share the limiter
|
| 445 |
+
print(f" Tools share limiter: {tool1._limiter is tool2._limiter}")
|
| 446 |
+
|
| 447 |
+
|
| 448 |
+
async def test_concurrent_requests():
|
| 449 |
+
"""Test rate limiting under concurrent load."""
|
| 450 |
+
print("\n[Test 3] Testing concurrent request limiting...")
|
| 451 |
+
|
| 452 |
+
limiter = RateLimiter("5/second")
|
| 453 |
+
|
| 454 |
+
async def make_request(i: int):
|
| 455 |
+
await limiter.acquire()
|
| 456 |
+
return time.monotonic()
|
| 457 |
+
|
| 458 |
+
start = time.monotonic()
|
| 459 |
+
# Launch 10 concurrent requests
|
| 460 |
+
tasks = [make_request(i) for i in range(10)]
|
| 461 |
+
times = await asyncio.gather(*tasks)
|
| 462 |
+
|
| 463 |
+
# Calculate distribution
|
| 464 |
+
relative_times = [t - start for t in times]
|
| 465 |
+
print(f" Request times: {[f'{t:.2f}s' for t in sorted(relative_times)]}")
|
| 466 |
+
|
| 467 |
+
total = max(relative_times)
|
| 468 |
+
print(f" All 10 requests completed in {total:.2f}s (expected ~2s)")
|
| 469 |
+
|
| 470 |
+
|
| 471 |
+
async def main():
|
| 472 |
+
await test_basic_limiter()
|
| 473 |
+
await test_pubmed_limiter()
|
| 474 |
+
await test_concurrent_requests()
|
| 475 |
+
|
| 476 |
+
print("\n" + "=" * 60)
|
| 477 |
+
print("Demo complete!")
|
| 478 |
+
|
| 479 |
+
|
| 480 |
+
if __name__ == "__main__":
|
| 481 |
+
asyncio.run(main())
|
| 482 |
+
```
|
| 483 |
+
|
| 484 |
+
---
|
| 485 |
+
|
| 486 |
+
## Verification Checklist
|
| 487 |
+
|
| 488 |
+
### Unit Tests
|
| 489 |
+
```bash
|
| 490 |
+
# Run rate limiting tests
|
| 491 |
+
uv run pytest tests/unit/tools/test_rate_limiting.py -v
|
| 492 |
+
|
| 493 |
+
# Expected: All tests pass
|
| 494 |
+
```
|
| 495 |
+
|
| 496 |
+
### Integration Test (Manual)
|
| 497 |
+
```bash
|
| 498 |
+
# Run demo
|
| 499 |
+
uv run python examples/rate_limiting_demo.py
|
| 500 |
+
|
| 501 |
+
# Expected: Requests properly spaced
|
| 502 |
+
```
|
| 503 |
+
|
| 504 |
+
### Full Test Suite
|
| 505 |
+
```bash
|
| 506 |
+
make check
|
| 507 |
+
# Expected: All tests pass, mypy clean
|
| 508 |
+
```
|
| 509 |
+
|
| 510 |
+
---
|
| 511 |
+
|
| 512 |
+
## Success Criteria
|
| 513 |
+
|
| 514 |
+
1. **`limits` library installed**: Dependency added to pyproject.toml
|
| 515 |
+
2. **RateLimiter class works**: Can create and use limiters
|
| 516 |
+
3. **PubMed uses new limiter**: Shared limiter across instances
|
| 517 |
+
4. **Rate adapts to API key**: 3/sec without, 10/sec with
|
| 518 |
+
5. **Concurrent requests handled**: Multiple async requests properly queued
|
| 519 |
+
6. **No regressions**: All existing tests pass
|
| 520 |
+
|
| 521 |
+
---
|
| 522 |
+
|
| 523 |
+
## API Rate Limit Reference
|
| 524 |
+
|
| 525 |
+
| API | Without Key | With Key |
|
| 526 |
+
|-----|-------------|----------|
|
| 527 |
+
| PubMed/NCBI | 3/sec | 10/sec |
|
| 528 |
+
| ClinicalTrials.gov | Undocumented (~5/sec safe) | N/A |
|
| 529 |
+
| Europe PMC | ~10-20/sec (generous) | N/A |
|
| 530 |
+
| OpenAlex | ~100k/day (no per-sec limit) | Faster with `mailto` |
|
| 531 |
+
|
| 532 |
+
---
|
| 533 |
+
|
| 534 |
+
## Notes
|
| 535 |
+
|
| 536 |
+
- `limits` library uses moving window algorithm (fairer than fixed window)
|
| 537 |
+
- Singleton pattern ensures all PubMed calls share the limit
|
| 538 |
+
- The factory pattern allows easy extension to other APIs
|
| 539 |
+
- Consider adding 429 response detection + exponential backoff
|
| 540 |
+
- In production, consider Redis storage for distributed rate limiting
|
docs/brainstorming/implementation/README.md
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Implementation Plans
|
| 2 |
+
|
| 3 |
+
TDD implementation plans based on the brainstorming documents. Each phase is a self-contained vertical slice with tests, implementation, and demo scripts.
|
| 4 |
+
|
| 5 |
+
---
|
| 6 |
+
|
| 7 |
+
## Prerequisites (COMPLETED)
|
| 8 |
+
|
| 9 |
+
The following foundational changes have been implemented to support all three phases:
|
| 10 |
+
|
| 11 |
+
| Change | File | Status |
|
| 12 |
+
|--------|------|--------|
|
| 13 |
+
| Add `"openalex"` to `SourceName` | `src/utils/models.py:9` | ✅ Done |
|
| 14 |
+
| Add `metadata` field to `Evidence` | `src/utils/models.py:39-42` | ✅ Done |
|
| 15 |
+
| Export all tools from `__init__.py` | `src/tools/__init__.py` | ✅ Done |
|
| 16 |
+
|
| 17 |
+
All 110 tests pass after these changes.
|
| 18 |
+
|
| 19 |
+
---
|
| 20 |
+
|
| 21 |
+
## Priority Order
|
| 22 |
+
|
| 23 |
+
| Phase | Name | Priority | Effort | Value |
|
| 24 |
+
|-------|------|----------|--------|-------|
|
| 25 |
+
| **17** | Rate Limiting | P0 CRITICAL | 1 hour | Stability |
|
| 26 |
+
| **15** | OpenAlex | HIGH | 2-3 hours | Very High |
|
| 27 |
+
| **16** | PubMed Full-Text | MEDIUM | 3 hours | High |
|
| 28 |
+
|
| 29 |
+
**Recommended implementation order**: 17 → 15 → 16
|
| 30 |
+
|
| 31 |
+
---
|
| 32 |
+
|
| 33 |
+
## Phase 15: OpenAlex Integration
|
| 34 |
+
|
| 35 |
+
**File**: [15_PHASE_OPENALEX.md](./15_PHASE_OPENALEX.md)
|
| 36 |
+
|
| 37 |
+
Add OpenAlex as 4th data source for:
|
| 38 |
+
- Citation networks (who cites whom)
|
| 39 |
+
- Concept tagging (semantic discovery)
|
| 40 |
+
- 209M+ scholarly works
|
| 41 |
+
- Free, no API key required
|
| 42 |
+
|
| 43 |
+
**Quick Start**:
|
| 44 |
+
```bash
|
| 45 |
+
# Create the tool
|
| 46 |
+
touch src/tools/openalex.py
|
| 47 |
+
touch tests/unit/tools/test_openalex.py
|
| 48 |
+
|
| 49 |
+
# Run tests first (TDD)
|
| 50 |
+
uv run pytest tests/unit/tools/test_openalex.py -v
|
| 51 |
+
|
| 52 |
+
# Demo
|
| 53 |
+
uv run python examples/openalex_demo.py
|
| 54 |
+
```
|
| 55 |
+
|
| 56 |
+
---
|
| 57 |
+
|
| 58 |
+
## Phase 16: PubMed Full-Text
|
| 59 |
+
|
| 60 |
+
**File**: [16_PHASE_PUBMED_FULLTEXT.md](./16_PHASE_PUBMED_FULLTEXT.md)
|
| 61 |
+
|
| 62 |
+
Add full-text retrieval via BioC API for:
|
| 63 |
+
- Complete paper text (not just abstracts)
|
| 64 |
+
- Structured sections (intro, methods, results)
|
| 65 |
+
- Better evidence for LLM synthesis
|
| 66 |
+
|
| 67 |
+
**Quick Start**:
|
| 68 |
+
```bash
|
| 69 |
+
# Add methods to existing pubmed.py
|
| 70 |
+
# Tests in test_pubmed_fulltext.py
|
| 71 |
+
|
| 72 |
+
# Run tests
|
| 73 |
+
uv run pytest tests/unit/tools/test_pubmed_fulltext.py -v
|
| 74 |
+
|
| 75 |
+
# Demo
|
| 76 |
+
uv run python examples/pubmed_fulltext_demo.py
|
| 77 |
+
```
|
| 78 |
+
|
| 79 |
+
---
|
| 80 |
+
|
| 81 |
+
## Phase 17: Rate Limiting
|
| 82 |
+
|
| 83 |
+
**File**: [17_PHASE_RATE_LIMITING.md](./17_PHASE_RATE_LIMITING.md)
|
| 84 |
+
|
| 85 |
+
Replace naive sleep-based rate limiting with `limits` library for:
|
| 86 |
+
- Moving window algorithm
|
| 87 |
+
- Shared limits across instances
|
| 88 |
+
- Configurable per-API rates
|
| 89 |
+
- Production-grade stability
|
| 90 |
+
|
| 91 |
+
**Quick Start**:
|
| 92 |
+
```bash
|
| 93 |
+
# Add dependency
|
| 94 |
+
uv add limits
|
| 95 |
+
|
| 96 |
+
# Create module
|
| 97 |
+
touch src/tools/rate_limiter.py
|
| 98 |
+
touch tests/unit/tools/test_rate_limiting.py
|
| 99 |
+
|
| 100 |
+
# Run tests
|
| 101 |
+
uv run pytest tests/unit/tools/test_rate_limiting.py -v
|
| 102 |
+
|
| 103 |
+
# Demo
|
| 104 |
+
uv run python examples/rate_limiting_demo.py
|
| 105 |
+
```
|
| 106 |
+
|
| 107 |
+
---
|
| 108 |
+
|
| 109 |
+
## TDD Workflow
|
| 110 |
+
|
| 111 |
+
Each implementation doc follows this pattern:
|
| 112 |
+
|
| 113 |
+
1. **Write tests first** - Define expected behavior
|
| 114 |
+
2. **Run tests** - Verify they fail (red)
|
| 115 |
+
3. **Implement** - Write minimal code to pass
|
| 116 |
+
4. **Run tests** - Verify they pass (green)
|
| 117 |
+
5. **Refactor** - Clean up if needed
|
| 118 |
+
6. **Demo** - Verify end-to-end with real APIs
|
| 119 |
+
7. **`make check`** - Ensure no regressions
|
| 120 |
+
|
| 121 |
+
---
|
| 122 |
+
|
| 123 |
+
## Related Brainstorming Docs
|
| 124 |
+
|
| 125 |
+
These implementation plans are derived from:
|
| 126 |
+
|
| 127 |
+
- [00_ROADMAP_SUMMARY.md](../00_ROADMAP_SUMMARY.md) - Priority overview
|
| 128 |
+
- [01_PUBMED_IMPROVEMENTS.md](../01_PUBMED_IMPROVEMENTS.md) - PubMed details
|
| 129 |
+
- [02_CLINICALTRIALS_IMPROVEMENTS.md](../02_CLINICALTRIALS_IMPROVEMENTS.md) - CT.gov details
|
| 130 |
+
- [03_EUROPEPMC_IMPROVEMENTS.md](../03_EUROPEPMC_IMPROVEMENTS.md) - Europe PMC details
|
| 131 |
+
- [04_OPENALEX_INTEGRATION.md](../04_OPENALEX_INTEGRATION.md) - OpenAlex integration
|
| 132 |
+
|
| 133 |
+
---
|
| 134 |
+
|
| 135 |
+
## Future Phases (Not Yet Documented)
|
| 136 |
+
|
| 137 |
+
Based on brainstorming, these could be added later:
|
| 138 |
+
|
| 139 |
+
- **Phase 18**: ClinicalTrials.gov Results Retrieval
|
| 140 |
+
- **Phase 19**: Europe PMC Annotations API
|
| 141 |
+
- **Phase 20**: Drug Name Normalization (RxNorm)
|
| 142 |
+
- **Phase 21**: Citation Network Queries (OpenAlex)
|
| 143 |
+
- **Phase 22**: Semantic Search with Embeddings
|
docs/brainstorming/magentic-pydantic/00_SITUATION_AND_PLAN.md
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Situation Analysis: Pydantic-AI + Microsoft Agent Framework Integration
|
| 2 |
+
|
| 3 |
+
**Date:** November 27, 2025
|
| 4 |
+
**Status:** ACTIVE DECISION REQUIRED
|
| 5 |
+
**Risk Level:** HIGH - DO NOT MERGE PR #41 UNTIL RESOLVED
|
| 6 |
+
|
| 7 |
+
---
|
| 8 |
+
|
| 9 |
+
## 1. The Problem
|
| 10 |
+
|
| 11 |
+
We almost merged a refactor that would have **deleted** multi-agent orchestration capability from the codebase, mistakenly believing pydantic-ai and Microsoft Agent Framework were mutually exclusive.
|
| 12 |
+
|
| 13 |
+
**They are not.** They are complementary:
|
| 14 |
+
- **pydantic-ai** (Library): Ensures LLM outputs match Pydantic schemas
|
| 15 |
+
- **Microsoft Agent Framework** (Framework): Orchestrates multi-agent workflows
|
| 16 |
+
|
| 17 |
+
---
|
| 18 |
+
|
| 19 |
+
## 2. Current Branch State
|
| 20 |
+
|
| 21 |
+
| Branch | Location | Has Agent Framework? | Has Pydantic-AI Improvements? | Status |
|
| 22 |
+
|--------|----------|---------------------|------------------------------|--------|
|
| 23 |
+
| `origin/dev` | GitHub | YES | NO | **SAFE - Source of Truth** |
|
| 24 |
+
| `huggingface-upstream/dev` | HF Spaces | YES | NO | **SAFE - Same as GitHub** |
|
| 25 |
+
| `origin/main` | GitHub | YES | NO | **SAFE** |
|
| 26 |
+
| `feat/pubmed-fulltext` | GitHub | NO (deleted) | YES | **DANGER - Has destructive refactor** |
|
| 27 |
+
| `refactor/pydantic-unification` | Local | NO (deleted) | YES | **DANGER - Redundant, delete** |
|
| 28 |
+
| Local `dev` | Local only | NO (deleted) | YES | **DANGER - NOT PUSHED (thankfully)** |
|
| 29 |
+
|
| 30 |
+
### Key Files at Risk
|
| 31 |
+
|
| 32 |
+
**On `origin/dev` (PRESERVED):**
|
| 33 |
+
```text
|
| 34 |
+
src/agents/
|
| 35 |
+
├── analysis_agent.py # StatisticalAnalyzer wrapper
|
| 36 |
+
├── hypothesis_agent.py # Hypothesis generation
|
| 37 |
+
├── judge_agent.py # JudgeHandler wrapper
|
| 38 |
+
├── magentic_agents.py # Multi-agent definitions
|
| 39 |
+
├── report_agent.py # Report synthesis
|
| 40 |
+
├── search_agent.py # SearchHandler wrapper
|
| 41 |
+
├── state.py # Thread-safe state management
|
| 42 |
+
└── tools.py # @ai_function decorated tools
|
| 43 |
+
|
| 44 |
+
src/orchestrator_magentic.py # Multi-agent orchestrator
|
| 45 |
+
src/utils/llm_factory.py # Centralized LLM client factory
|
| 46 |
+
```
|
| 47 |
+
|
| 48 |
+
**Deleted in refactor branch (would be lost if merged):**
|
| 49 |
+
- All of the above
|
| 50 |
+
|
| 51 |
+
---
|
| 52 |
+
|
| 53 |
+
## 3. Target Architecture
|
| 54 |
+
|
| 55 |
+
```text
|
| 56 |
+
┌─────────────────────────────────────────────────────────────────┐
|
| 57 |
+
│ Microsoft Agent Framework (Orchestration Layer) │
|
| 58 |
+
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
|
| 59 |
+
│ │ SearchAgent │→ │ JudgeAgent │→ │ ReportAgent │ │
|
| 60 |
+
│ │ (BaseAgent) │ │ (BaseAgent) │ │ (BaseAgent) │ │
|
| 61 |
+
│ └──────┬───────┘ └──────┬───────┘ └──────┬───────┘ │
|
| 62 |
+
│ │ │ │ │
|
| 63 |
+
│ ▼ ▼ ▼ │
|
| 64 |
+
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
|
| 65 |
+
│ │ pydantic-ai │ │ pydantic-ai │ │ pydantic-ai │ │
|
| 66 |
+
│ │ Agent() │ │ Agent() │ │ Agent() │ │
|
| 67 |
+
│ │ output_type= │ │ output_type= │ │ output_type= │ │
|
| 68 |
+
│ │ SearchResult │ │ JudgeAssess │ │ Report │ │
|
| 69 |
+
│ └──────────────┘ └──────────────┘ └──────────────┘ │
|
| 70 |
+
└─────────────────────────────────────────────────────────────────┘
|
| 71 |
+
```
|
| 72 |
+
|
| 73 |
+
**Why this architecture:**
|
| 74 |
+
1. **Agent Framework** handles: workflow coordination, state passing, middleware, observability
|
| 75 |
+
2. **pydantic-ai** handles: type-safe LLM calls within each agent
|
| 76 |
+
|
| 77 |
+
---
|
| 78 |
+
|
| 79 |
+
## 4. CRITICAL: Naming Confusion Clarification
|
| 80 |
+
|
| 81 |
+
> **Senior Agent Review Finding:** The codebase uses "magentic" in file names (e.g., `orchestrator_magentic.py`, `magentic_agents.py`) but this is **NOT** the `magentic` PyPI package by Jacky Liang. It's Microsoft Agent Framework (`agent-framework-core`).
|
| 82 |
+
|
| 83 |
+
**The naming confusion:**
|
| 84 |
+
- `magentic` (PyPI package): A different library for structured LLM outputs
|
| 85 |
+
- "Magentic" (in our codebase): Our internal name for Microsoft Agent Framework integration
|
| 86 |
+
- `agent-framework-core` (PyPI package): Microsoft's actual multi-agent orchestration framework
|
| 87 |
+
|
| 88 |
+
**Recommended future action:** Rename `orchestrator_magentic.py` → `orchestrator_advanced.py` to eliminate confusion.
|
| 89 |
+
|
| 90 |
+
---
|
| 91 |
+
|
| 92 |
+
## 5. What the Refactor DID Get Right
|
| 93 |
+
|
| 94 |
+
The refactor branch (`feat/pubmed-fulltext`) has some valuable improvements:
|
| 95 |
+
|
| 96 |
+
1. **`judges.py` unified `get_model()`** - Supports OpenAI, Anthropic, AND HuggingFace via pydantic-ai
|
| 97 |
+
2. **HuggingFace free tier support** - `HuggingFaceModel` integration
|
| 98 |
+
3. **Test fix** - Properly mocks `HuggingFaceModel` class
|
| 99 |
+
4. **Removed broken magentic optional dependency** from pyproject.toml (this was correct - the old `magentic` package is different from Microsoft Agent Framework)
|
| 100 |
+
|
| 101 |
+
**What it got WRONG:**
|
| 102 |
+
1. Deleted `src/agents/` entirely instead of refactoring them
|
| 103 |
+
2. Deleted `src/orchestrator_magentic.py` instead of fixing it
|
| 104 |
+
3. Conflated "magentic" (old package) with "Microsoft Agent Framework" (current framework)
|
| 105 |
+
|
| 106 |
+
---
|
| 107 |
+
|
| 108 |
+
## 6. Options for Path Forward
|
| 109 |
+
|
| 110 |
+
### Option A: Abandon Refactor, Start Fresh
|
| 111 |
+
- Close PR #41
|
| 112 |
+
- Delete `feat/pubmed-fulltext` and `refactor/pydantic-unification` branches
|
| 113 |
+
- Reset local `dev` to match `origin/dev`
|
| 114 |
+
- Cherry-pick ONLY the good parts (judges.py improvements, HF support)
|
| 115 |
+
- **Pros:** Clean, safe
|
| 116 |
+
- **Cons:** Lose some work, need to redo carefully
|
| 117 |
+
|
| 118 |
+
### Option B: Cherry-Pick Good Parts to origin/dev
|
| 119 |
+
- Do NOT merge PR #41
|
| 120 |
+
- Create new branch from `origin/dev`
|
| 121 |
+
- Cherry-pick specific commits/changes that improve pydantic-ai usage
|
| 122 |
+
- Keep agent framework code intact
|
| 123 |
+
- **Pros:** Preserves both, surgical
|
| 124 |
+
- **Cons:** Requires careful file-by-file review
|
| 125 |
+
|
| 126 |
+
### Option C: Revert Deletions in Refactor Branch
|
| 127 |
+
- On `feat/pubmed-fulltext`, restore deleted agent files from `origin/dev`
|
| 128 |
+
- Keep the pydantic-ai improvements
|
| 129 |
+
- Merge THAT to dev
|
| 130 |
+
- **Pros:** Gets both
|
| 131 |
+
- **Cons:** Complex git operations, risk of conflicts
|
| 132 |
+
|
| 133 |
+
---
|
| 134 |
+
|
| 135 |
+
## 7. Recommended Action: Option B (Cherry-Pick)
|
| 136 |
+
|
| 137 |
+
**Step-by-step:**
|
| 138 |
+
|
| 139 |
+
1. **Close PR #41** (do not merge)
|
| 140 |
+
2. **Delete redundant branches:**
|
| 141 |
+
- `refactor/pydantic-unification` (local)
|
| 142 |
+
- Reset local `dev` to `origin/dev`
|
| 143 |
+
3. **Create new branch from origin/dev:**
|
| 144 |
+
```bash
|
| 145 |
+
git checkout -b feat/pydantic-ai-improvements origin/dev
|
| 146 |
+
```
|
| 147 |
+
4. **Cherry-pick or manually port these improvements:**
|
| 148 |
+
- `src/agent_factory/judges.py` - the unified `get_model()` function
|
| 149 |
+
- `examples/free_tier_demo.py` - HuggingFace demo
|
| 150 |
+
- Test improvements
|
| 151 |
+
5. **Do NOT delete any agent framework files**
|
| 152 |
+
6. **Create PR for review**
|
| 153 |
+
|
| 154 |
+
---
|
| 155 |
+
|
| 156 |
+
## 8. Files to Cherry-Pick (Safe Improvements)
|
| 157 |
+
|
| 158 |
+
| File | What Changed | Safe to Port? |
|
| 159 |
+
|------|-------------|---------------|
|
| 160 |
+
| `src/agent_factory/judges.py` | Added `HuggingFaceModel` support in `get_model()` | YES |
|
| 161 |
+
| `examples/free_tier_demo.py` | New demo for HF inference | YES |
|
| 162 |
+
| `tests/unit/agent_factory/test_judges.py` | Fixed HF model mocking | YES |
|
| 163 |
+
| `pyproject.toml` | Removed old `magentic` optional dep | MAYBE (review carefully) |
|
| 164 |
+
|
| 165 |
+
---
|
| 166 |
+
|
| 167 |
+
## 9. Questions to Answer Before Proceeding
|
| 168 |
+
|
| 169 |
+
1. **For the hackathon**: Do we need full multi-agent orchestration, or is single-agent sufficient?
|
| 170 |
+
2. **For DeepCritical mainline**: Is the plan to use Microsoft Agent Framework for orchestration?
|
| 171 |
+
3. **Timeline**: How much time do we have to get this right?
|
| 172 |
+
|
| 173 |
+
---
|
| 174 |
+
|
| 175 |
+
## 10. Immediate Actions (DO NOW)
|
| 176 |
+
|
| 177 |
+
- [ ] **DO NOT merge PR #41**
|
| 178 |
+
- [ ] Close PR #41 with comment explaining the situation
|
| 179 |
+
- [ ] Do not push local `dev` branch anywhere
|
| 180 |
+
- [ ] Confirm HuggingFace Spaces is untouched (it is - verified)
|
| 181 |
+
|
| 182 |
+
---
|
| 183 |
+
|
| 184 |
+
## 11. Decision Log
|
| 185 |
+
|
| 186 |
+
| Date | Decision | Rationale |
|
| 187 |
+
|------|----------|-----------|
|
| 188 |
+
| 2025-11-27 | Pause refactor merge | Discovered agent framework and pydantic-ai are complementary, not exclusive |
|
| 189 |
+
| TBD | ? | Awaiting decision on path forward |
|
docs/brainstorming/magentic-pydantic/01_ARCHITECTURE_SPEC.md
ADDED
|
@@ -0,0 +1,289 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Architecture Specification: Dual-Mode Agent System
|
| 2 |
+
|
| 3 |
+
**Date:** November 27, 2025
|
| 4 |
+
**Status:** SPECIFICATION
|
| 5 |
+
**Goal:** Graceful degradation from full multi-agent orchestration to simple single-agent mode
|
| 6 |
+
|
| 7 |
+
---
|
| 8 |
+
|
| 9 |
+
## 1. Core Concept: Two Operating Modes
|
| 10 |
+
|
| 11 |
+
```text
|
| 12 |
+
┌─────────────────────────────────────────────────────────────────────┐
|
| 13 |
+
│ USER REQUEST │
|
| 14 |
+
│ │ │
|
| 15 |
+
│ ▼ │
|
| 16 |
+
│ ┌─────────────────┐ │
|
| 17 |
+
│ │ Mode Selection │ │
|
| 18 |
+
│ │ (Auto-detect) │ │
|
| 19 |
+
│ └────────┬────────┘ │
|
| 20 |
+
│ │ │
|
| 21 |
+
│ ┌───────────────┴───────────────┐ │
|
| 22 |
+
│ │ │ │
|
| 23 |
+
│ ▼ ▼ │
|
| 24 |
+
│ ┌─────────────────┐ ┌─────────────────┐ │
|
| 25 |
+
│ │ SIMPLE MODE │ │ ADVANCED MODE │ │
|
| 26 |
+
│ │ (Free Tier) │ │ (Paid Tier) │ │
|
| 27 |
+
│ │ │ │ │ │
|
| 28 |
+
│ │ pydantic-ai │ │ MS Agent Fwk │ │
|
| 29 |
+
│ │ single-agent │ │ + pydantic-ai │ │
|
| 30 |
+
│ │ loop │ │ multi-agent │ │
|
| 31 |
+
│ └─────────────────┘ └─────────────────┘ │
|
| 32 |
+
│ │ │ │
|
| 33 |
+
│ └───────────────┬───────────────┘ │
|
| 34 |
+
│ ▼ │
|
| 35 |
+
│ ┌─────────────────┐ │
|
| 36 |
+
│ │ Research Report │ │
|
| 37 |
+
│ │ with Citations │ │
|
| 38 |
+
│ └─────────────────┘ │
|
| 39 |
+
└─────────────────────────────────────────────────────────────────────┘
|
| 40 |
+
```
|
| 41 |
+
|
| 42 |
+
---
|
| 43 |
+
|
| 44 |
+
## 2. Mode Comparison
|
| 45 |
+
|
| 46 |
+
| Aspect | Simple Mode | Advanced Mode |
|
| 47 |
+
|--------|-------------|---------------|
|
| 48 |
+
| **Trigger** | No API key OR `LLM_PROVIDER=huggingface` | OpenAI API key present (currently OpenAI only) |
|
| 49 |
+
| **Framework** | pydantic-ai only | Microsoft Agent Framework + pydantic-ai |
|
| 50 |
+
| **Architecture** | Single orchestrator loop | Multi-agent coordination |
|
| 51 |
+
| **Agents** | One agent does Search→Judge→Report | SearchAgent, JudgeAgent, ReportAgent, AnalysisAgent |
|
| 52 |
+
| **State Management** | Simple dict | Thread-safe `MagenticState` with context vars |
|
| 53 |
+
| **Quality** | Good (functional) | Better (specialized agents, coordination) |
|
| 54 |
+
| **Cost** | Free (HuggingFace Inference) | Paid (OpenAI/Anthropic) |
|
| 55 |
+
| **Use Case** | Demos, hackathon, budget-constrained | Production, research quality |
|
| 56 |
+
|
| 57 |
+
---
|
| 58 |
+
|
| 59 |
+
## 3. Simple Mode Architecture (pydantic-ai Only)
|
| 60 |
+
|
| 61 |
+
```text
|
| 62 |
+
┌─────────────────────────────────────────────────────┐
|
| 63 |
+
│ Orchestrator │
|
| 64 |
+
│ │
|
| 65 |
+
│ while not sufficient and iteration < max: │
|
| 66 |
+
│ 1. SearchHandler.execute(query) │
|
| 67 |
+
│ 2. JudgeHandler.assess(evidence) ◄── pydantic-ai Agent │
|
| 68 |
+
│ 3. if sufficient: break │
|
| 69 |
+
│ 4. query = judge.next_queries │
|
| 70 |
+
│ │
|
| 71 |
+
│ return ReportGenerator.generate(evidence) │
|
| 72 |
+
└─────────────────────────────────────────────────────┘
|
| 73 |
+
```
|
| 74 |
+
|
| 75 |
+
**Components:**
|
| 76 |
+
- `src/orchestrator.py` - Simple loop orchestrator
|
| 77 |
+
- `src/agent_factory/judges.py` - JudgeHandler with pydantic-ai
|
| 78 |
+
- `src/tools/search_handler.py` - Scatter-gather search
|
| 79 |
+
- `src/tools/pubmed.py`, `clinicaltrials.py`, `europepmc.py` - Search tools
|
| 80 |
+
|
| 81 |
+
---
|
| 82 |
+
|
| 83 |
+
## 4. Advanced Mode Architecture (MS Agent Framework + pydantic-ai)
|
| 84 |
+
|
| 85 |
+
```text
|
| 86 |
+
┌─────────────────────────────────────────────────────────────────────┐
|
| 87 |
+
│ Microsoft Agent Framework Orchestrator │
|
| 88 |
+
│ │
|
| 89 |
+
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
|
| 90 |
+
│ │ SearchAgent │───▶│ JudgeAgent │───▶│ ReportAgent │ │
|
| 91 |
+
│ │ (BaseAgent) │ │ (BaseAgent) │ │ (BaseAgent) │ │
|
| 92 |
+
│ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ │
|
| 93 |
+
│ │ │ │ │
|
| 94 |
+
│ ▼ ▼ ▼ │
|
| 95 |
+
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
|
| 96 |
+
│ │ pydantic-ai │ │ pydantic-ai │ │ pydantic-ai │ │
|
| 97 |
+
│ │ Agent() │ │ Agent() │ │ Agent() │ │
|
| 98 |
+
│ │ output_type=│ │ output_type=│ │ output_type=│ │
|
| 99 |
+
│ │ SearchResult│ │ JudgeAssess │ │ Report │ │
|
| 100 |
+
│ └─────────────┘ └─────────────┘ └─────────────┘ │
|
| 101 |
+
│ │
|
| 102 |
+
│ Shared State: MagenticState (thread-safe via contextvars) │
|
| 103 |
+
│ - evidence: list[Evidence] │
|
| 104 |
+
│ - embedding_service: EmbeddingService │
|
| 105 |
+
└─────────────────────────────────────────────────────────────────────┘
|
| 106 |
+
```
|
| 107 |
+
|
| 108 |
+
**Components:**
|
| 109 |
+
- `src/orchestrator_magentic.py` - Multi-agent orchestrator
|
| 110 |
+
- `src/agents/search_agent.py` - SearchAgent (BaseAgent)
|
| 111 |
+
- `src/agents/judge_agent.py` - JudgeAgent (BaseAgent)
|
| 112 |
+
- `src/agents/report_agent.py` - ReportAgent (BaseAgent)
|
| 113 |
+
- `src/agents/analysis_agent.py` - AnalysisAgent (BaseAgent)
|
| 114 |
+
- `src/agents/state.py` - Thread-safe state management
|
| 115 |
+
- `src/agents/tools.py` - @ai_function decorated tools
|
| 116 |
+
|
| 117 |
+
---
|
| 118 |
+
|
| 119 |
+
## 5. Mode Selection Logic
|
| 120 |
+
|
| 121 |
+
```python
|
| 122 |
+
# src/orchestrator_factory.py (actual implementation)
|
| 123 |
+
|
| 124 |
+
def create_orchestrator(
|
| 125 |
+
search_handler: SearchHandlerProtocol | None = None,
|
| 126 |
+
judge_handler: JudgeHandlerProtocol | None = None,
|
| 127 |
+
config: OrchestratorConfig | None = None,
|
| 128 |
+
mode: Literal["simple", "magentic", "advanced"] | None = None,
|
| 129 |
+
) -> Any:
|
| 130 |
+
"""
|
| 131 |
+
Auto-select orchestrator based on available credentials.
|
| 132 |
+
|
| 133 |
+
Priority:
|
| 134 |
+
1. If mode explicitly set, use that
|
| 135 |
+
2. If OpenAI key available -> Advanced Mode (currently OpenAI only)
|
| 136 |
+
3. Otherwise -> Simple Mode (HuggingFace free tier)
|
| 137 |
+
"""
|
| 138 |
+
effective_mode = _determine_mode(mode)
|
| 139 |
+
|
| 140 |
+
if effective_mode == "advanced":
|
| 141 |
+
orchestrator_cls = _get_magentic_orchestrator_class()
|
| 142 |
+
return orchestrator_cls(max_rounds=config.max_iterations if config else 10)
|
| 143 |
+
|
| 144 |
+
# Simple mode requires handlers
|
| 145 |
+
if search_handler is None or judge_handler is None:
|
| 146 |
+
raise ValueError("Simple mode requires search_handler and judge_handler")
|
| 147 |
+
|
| 148 |
+
return Orchestrator(
|
| 149 |
+
search_handler=search_handler,
|
| 150 |
+
judge_handler=judge_handler,
|
| 151 |
+
config=config,
|
| 152 |
+
)
|
| 153 |
+
```
|
| 154 |
+
|
| 155 |
+
---
|
| 156 |
+
|
| 157 |
+
## 6. Shared Components (Both Modes Use)
|
| 158 |
+
|
| 159 |
+
These components work in both modes:
|
| 160 |
+
|
| 161 |
+
| Component | Purpose |
|
| 162 |
+
|-----------|---------|
|
| 163 |
+
| `src/tools/pubmed.py` | PubMed search |
|
| 164 |
+
| `src/tools/clinicaltrials.py` | ClinicalTrials.gov search |
|
| 165 |
+
| `src/tools/europepmc.py` | Europe PMC search |
|
| 166 |
+
| `src/tools/search_handler.py` | Scatter-gather orchestration |
|
| 167 |
+
| `src/tools/rate_limiter.py` | Rate limiting |
|
| 168 |
+
| `src/utils/models.py` | Evidence, Citation, JudgeAssessment |
|
| 169 |
+
| `src/utils/config.py` | Settings |
|
| 170 |
+
| `src/services/embeddings.py` | Vector search (optional) |
|
| 171 |
+
|
| 172 |
+
---
|
| 173 |
+
|
| 174 |
+
## 7. pydantic-ai Integration Points
|
| 175 |
+
|
| 176 |
+
Both modes use pydantic-ai for structured LLM outputs:
|
| 177 |
+
|
| 178 |
+
```python
|
| 179 |
+
# In JudgeHandler (both modes)
|
| 180 |
+
from pydantic_ai import Agent
|
| 181 |
+
from pydantic_ai.models.huggingface import HuggingFaceModel
|
| 182 |
+
from pydantic_ai.models.openai import OpenAIModel
|
| 183 |
+
from pydantic_ai.models.anthropic import AnthropicModel
|
| 184 |
+
|
| 185 |
+
class JudgeHandler:
|
| 186 |
+
def __init__(self, model: Any = None):
|
| 187 |
+
self.model = model or get_model() # Auto-selects based on config
|
| 188 |
+
self.agent = Agent(
|
| 189 |
+
model=self.model,
|
| 190 |
+
output_type=JudgeAssessment, # Structured output!
|
| 191 |
+
system_prompt=SYSTEM_PROMPT,
|
| 192 |
+
)
|
| 193 |
+
|
| 194 |
+
async def assess(self, question: str, evidence: list[Evidence]) -> JudgeAssessment:
|
| 195 |
+
result = await self.agent.run(format_prompt(question, evidence))
|
| 196 |
+
return result.output # Guaranteed to be JudgeAssessment
|
| 197 |
+
```
|
| 198 |
+
|
| 199 |
+
---
|
| 200 |
+
|
| 201 |
+
## 8. Microsoft Agent Framework Integration Points
|
| 202 |
+
|
| 203 |
+
Advanced mode wraps pydantic-ai agents in BaseAgent:
|
| 204 |
+
|
| 205 |
+
```python
|
| 206 |
+
# In JudgeAgent (advanced mode only)
|
| 207 |
+
from agent_framework import BaseAgent, AgentRunResponse, ChatMessage, Role
|
| 208 |
+
|
| 209 |
+
class JudgeAgent(BaseAgent):
|
| 210 |
+
def __init__(self, judge_handler: JudgeHandlerProtocol):
|
| 211 |
+
super().__init__(
|
| 212 |
+
name="JudgeAgent",
|
| 213 |
+
description="Evaluates evidence quality",
|
| 214 |
+
)
|
| 215 |
+
self._handler = judge_handler # Uses pydantic-ai internally
|
| 216 |
+
|
| 217 |
+
async def run(self, messages, **kwargs) -> AgentRunResponse:
|
| 218 |
+
question = extract_question(messages)
|
| 219 |
+
evidence = self._evidence_store.get("current", [])
|
| 220 |
+
|
| 221 |
+
# Delegate to pydantic-ai powered handler
|
| 222 |
+
assessment = await self._handler.assess(question, evidence)
|
| 223 |
+
|
| 224 |
+
return AgentRunResponse(
|
| 225 |
+
messages=[ChatMessage(role=Role.ASSISTANT, text=format_response(assessment))],
|
| 226 |
+
additional_properties={"assessment": assessment.model_dump()},
|
| 227 |
+
)
|
| 228 |
+
```
|
| 229 |
+
|
| 230 |
+
---
|
| 231 |
+
|
| 232 |
+
## 9. Benefits of This Architecture
|
| 233 |
+
|
| 234 |
+
1. **Graceful Degradation**: Works without API keys (free tier)
|
| 235 |
+
2. **Progressive Enhancement**: Better with API keys (orchestration)
|
| 236 |
+
3. **Code Reuse**: pydantic-ai handlers shared between modes
|
| 237 |
+
4. **Hackathon Ready**: Demo works without requiring paid keys
|
| 238 |
+
5. **Production Ready**: Full orchestration available when needed
|
| 239 |
+
6. **Future Proof**: Can add more agents to advanced mode
|
| 240 |
+
7. **Testable**: Simple mode is easier to unit test
|
| 241 |
+
|
| 242 |
+
---
|
| 243 |
+
|
| 244 |
+
## 10. Known Risks and Mitigations
|
| 245 |
+
|
| 246 |
+
> **From Senior Agent Review**
|
| 247 |
+
|
| 248 |
+
### 10.1 Bridge Complexity (MEDIUM)
|
| 249 |
+
|
| 250 |
+
**Risk:** In Advanced Mode, agents (Agent Framework) wrap handlers (pydantic-ai). Both are async. Context variables (`MagenticState`) must propagate correctly through the pydantic-ai call stack.
|
| 251 |
+
|
| 252 |
+
**Mitigation:**
|
| 253 |
+
- pydantic-ai uses standard Python `contextvars`, which naturally propagate through `await` chains
|
| 254 |
+
- Test context propagation explicitly in integration tests
|
| 255 |
+
- If issues arise, pass state explicitly rather than via context vars
|
| 256 |
+
|
| 257 |
+
### 10.2 Integration Drift (MEDIUM)
|
| 258 |
+
|
| 259 |
+
**Risk:** Simple Mode and Advanced Mode might diverge in behavior over time (e.g., Simple Mode uses logic A, Advanced Mode uses logic B).
|
| 260 |
+
|
| 261 |
+
**Mitigation:**
|
| 262 |
+
- Both modes MUST call the exact same underlying Tools (`src/tools/*`) and Handlers (`src/agent_factory/*`)
|
| 263 |
+
- Handlers are the single source of truth for business logic
|
| 264 |
+
- Agents are thin wrappers that delegate to handlers
|
| 265 |
+
|
| 266 |
+
### 10.3 Testing Burden (LOW-MEDIUM)
|
| 267 |
+
|
| 268 |
+
**Risk:** Two distinct orchestrators (`src/orchestrator.py` and `src/orchestrator_magentic.py`) doubles integration testing surface area.
|
| 269 |
+
|
| 270 |
+
**Mitigation:**
|
| 271 |
+
- Unit test handlers independently (shared code)
|
| 272 |
+
- Integration tests for each mode separately
|
| 273 |
+
- End-to-end tests verify same output for same input (determinism permitting)
|
| 274 |
+
|
| 275 |
+
### 10.4 Dependency Conflicts (LOW)
|
| 276 |
+
|
| 277 |
+
**Risk:** `agent-framework-core` might conflict with `pydantic-ai`'s dependencies (e.g., different pydantic versions).
|
| 278 |
+
|
| 279 |
+
**Status:** Both use `pydantic>=2.x`. Should be compatible.
|
| 280 |
+
|
| 281 |
+
---
|
| 282 |
+
|
| 283 |
+
## 11. Naming Clarification
|
| 284 |
+
|
| 285 |
+
> See `00_SITUATION_AND_PLAN.md` Section 4 for full details.
|
| 286 |
+
|
| 287 |
+
**Important:** The codebase uses "magentic" in file names (`orchestrator_magentic.py`, `magentic_agents.py`) but this refers to our internal naming for Microsoft Agent Framework integration, **NOT** the `magentic` PyPI package.
|
| 288 |
+
|
| 289 |
+
**Future action:** Rename to `orchestrator_advanced.py` to eliminate confusion.
|
docs/brainstorming/magentic-pydantic/02_IMPLEMENTATION_PHASES.md
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Implementation Phases: Dual-Mode Agent System
|
| 2 |
+
|
| 3 |
+
**Date:** November 27, 2025
|
| 4 |
+
**Status:** IMPLEMENTATION PLAN (REVISED)
|
| 5 |
+
**Strategy:** TDD (Test-Driven Development), SOLID Principles
|
| 6 |
+
**Dependency Strategy:** PyPI (agent-framework-core)
|
| 7 |
+
|
| 8 |
+
---
|
| 9 |
+
|
| 10 |
+
## Phase 0: Environment Validation & Cleanup
|
| 11 |
+
|
| 12 |
+
**Goal:** Ensure clean state and dependencies are correctly installed.
|
| 13 |
+
|
| 14 |
+
### Step 0.1: Verify PyPI Package
|
| 15 |
+
The `agent-framework-core` package is published on PyPI by Microsoft. Verify installation:
|
| 16 |
+
|
| 17 |
+
```bash
|
| 18 |
+
uv sync --all-extras
|
| 19 |
+
python -c "from agent_framework import ChatAgent; print('OK')"
|
| 20 |
+
```
|
| 21 |
+
|
| 22 |
+
### Step 0.2: Branch State
|
| 23 |
+
We are on `feat/dual-mode-architecture`. Ensure it is up to date with `origin/dev` before starting.
|
| 24 |
+
|
| 25 |
+
**Note:** The `reference_repos/agent-framework` folder is kept for reference/documentation only.
|
| 26 |
+
The production dependency uses the official PyPI release.
|
| 27 |
+
|
| 28 |
+
---
|
| 29 |
+
|
| 30 |
+
## Phase 1: Pydantic-AI Improvements (Simple Mode)
|
| 31 |
+
|
| 32 |
+
**Goal:** Implement `HuggingFaceModel` support in `JudgeHandler` using strict TDD.
|
| 33 |
+
|
| 34 |
+
### Step 1.1: Test First (Red)
|
| 35 |
+
Create `tests/unit/agent_factory/test_judges_factory.py`:
|
| 36 |
+
- Test `get_model()` returns `HuggingFaceModel` when `LLM_PROVIDER=huggingface`.
|
| 37 |
+
- Test `get_model()` respects `HF_TOKEN`.
|
| 38 |
+
- Test fallback to OpenAI.
|
| 39 |
+
|
| 40 |
+
### Step 1.2: Implementation (Green)
|
| 41 |
+
Update `src/utils/config.py`:
|
| 42 |
+
- Add `huggingface_model` and `hf_token` fields.
|
| 43 |
+
|
| 44 |
+
Update `src/agent_factory/judges.py`:
|
| 45 |
+
- Implement `get_model` with the logic derived from the tests.
|
| 46 |
+
- Use dependency injection for the model where possible.
|
| 47 |
+
|
| 48 |
+
### Step 1.3: Refactor
|
| 49 |
+
Ensure `JudgeHandler` is loosely coupled from the specific model provider.
|
| 50 |
+
|
| 51 |
+
---
|
| 52 |
+
|
| 53 |
+
## Phase 2: Orchestrator Factory (The Switch)
|
| 54 |
+
|
| 55 |
+
**Goal:** Implement the factory pattern to switch between Simple and Advanced modes.
|
| 56 |
+
|
| 57 |
+
### Step 2.1: Test First (Red)
|
| 58 |
+
Create `tests/unit/test_orchestrator_factory.py`:
|
| 59 |
+
- Test `create_orchestrator` returns `Orchestrator` (simple) when API keys are missing.
|
| 60 |
+
- Test `create_orchestrator` returns `MagenticOrchestrator` (advanced) when OpenAI key exists.
|
| 61 |
+
- Test explicit mode override.
|
| 62 |
+
|
| 63 |
+
### Step 2.2: Implementation (Green)
|
| 64 |
+
Update `src/orchestrator_factory.py` to implement the selection logic.
|
| 65 |
+
|
| 66 |
+
---
|
| 67 |
+
|
| 68 |
+
## Phase 3: Agent Framework Integration (Advanced Mode)
|
| 69 |
+
|
| 70 |
+
**Goal:** Integrate Microsoft Agent Framework from PyPI.
|
| 71 |
+
|
| 72 |
+
### Step 3.1: Dependency Management
|
| 73 |
+
The `agent-framework-core` package is installed from PyPI:
|
| 74 |
+
```toml
|
| 75 |
+
[project.optional-dependencies]
|
| 76 |
+
magentic = [
|
| 77 |
+
"agent-framework-core>=1.0.0b251120,<2.0.0", # Microsoft Agent Framework (PyPI)
|
| 78 |
+
]
|
| 79 |
+
```
|
| 80 |
+
Install with: `uv sync --all-extras`
|
| 81 |
+
|
| 82 |
+
### Step 3.2: Verify Imports (Test First)
|
| 83 |
+
Create `tests/unit/agents/test_agent_imports.py`:
|
| 84 |
+
- Verify `from agent_framework import ChatAgent` works.
|
| 85 |
+
- Verify instantiation of `ChatAgent` with a mock client.
|
| 86 |
+
|
| 87 |
+
### Step 3.3: Update Agents
|
| 88 |
+
Refactor `src/agents/*.py` to ensure they match the exact signature of the local `ChatAgent` class.
|
| 89 |
+
- **SOLID:** Ensure agents have single responsibilities.
|
| 90 |
+
- **DRY:** Share tool definitions between Pydantic-AI simple mode and Agent Framework advanced mode.
|
| 91 |
+
|
| 92 |
+
---
|
| 93 |
+
|
| 94 |
+
## Phase 4: UI & End-to-End Verification
|
| 95 |
+
|
| 96 |
+
**Goal:** Update Gradio to reflect the active mode.
|
| 97 |
+
|
| 98 |
+
### Step 4.1: UI Updates
|
| 99 |
+
Update `src/app.py` to display "Simple Mode" vs "Advanced Mode".
|
| 100 |
+
|
| 101 |
+
### Step 4.2: End-to-End Test
|
| 102 |
+
Run the full loop:
|
| 103 |
+
1. Simple Mode (No Keys) -> Search -> Judge (HF) -> Report.
|
| 104 |
+
2. Advanced Mode (OpenAI Key) -> SearchAgent -> JudgeAgent -> ReportAgent.
|
| 105 |
+
|
| 106 |
+
---
|
| 107 |
+
|
| 108 |
+
## Phase 5: Cleanup & Documentation
|
| 109 |
+
|
| 110 |
+
- Remove unused code.
|
| 111 |
+
- Update main README.md.
|
| 112 |
+
- Final `make check`.
|
docs/brainstorming/magentic-pydantic/03_IMMEDIATE_ACTIONS.md
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Immediate Actions Checklist
|
| 2 |
+
|
| 3 |
+
**Date:** November 27, 2025
|
| 4 |
+
**Priority:** Execute in order
|
| 5 |
+
|
| 6 |
+
---
|
| 7 |
+
|
| 8 |
+
## Before Starting Implementation
|
| 9 |
+
|
| 10 |
+
### 1. Close PR #41 (CRITICAL)
|
| 11 |
+
|
| 12 |
+
```bash
|
| 13 |
+
gh pr close 41 --comment "Architecture decision changed. Cherry-picking improvements to preserve both pydantic-ai and Agent Framework capabilities."
|
| 14 |
+
```
|
| 15 |
+
|
| 16 |
+
### 2. Verify HuggingFace Spaces is Safe
|
| 17 |
+
|
| 18 |
+
```bash
|
| 19 |
+
# Should show agent framework files exist
|
| 20 |
+
git ls-tree --name-only huggingface-upstream/dev -- src/agents/
|
| 21 |
+
git ls-tree --name-only huggingface-upstream/dev -- src/orchestrator_magentic.py
|
| 22 |
+
```
|
| 23 |
+
|
| 24 |
+
Expected output: Files should exist (they do as of this writing).
|
| 25 |
+
|
| 26 |
+
### 3. Clean Local Environment
|
| 27 |
+
|
| 28 |
+
```bash
|
| 29 |
+
# Switch to main first
|
| 30 |
+
git checkout main
|
| 31 |
+
|
| 32 |
+
# Delete problematic branches
|
| 33 |
+
git branch -D refactor/pydantic-unification 2>/dev/null || true
|
| 34 |
+
git branch -D feat/pubmed-fulltext 2>/dev/null || true
|
| 35 |
+
|
| 36 |
+
# Reset local dev to origin/dev
|
| 37 |
+
git branch -D dev 2>/dev/null || true
|
| 38 |
+
git checkout -b dev origin/dev
|
| 39 |
+
|
| 40 |
+
# Verify agent framework code exists
|
| 41 |
+
ls src/agents/
|
| 42 |
+
# Expected: __init__.py, analysis_agent.py, hypothesis_agent.py, judge_agent.py,
|
| 43 |
+
# magentic_agents.py, report_agent.py, search_agent.py, state.py, tools.py
|
| 44 |
+
|
| 45 |
+
ls src/orchestrator_magentic.py
|
| 46 |
+
# Expected: file exists
|
| 47 |
+
```
|
| 48 |
+
|
| 49 |
+
### 4. Create Fresh Feature Branch
|
| 50 |
+
|
| 51 |
+
```bash
|
| 52 |
+
git checkout -b feat/dual-mode-architecture origin/dev
|
| 53 |
+
```
|
| 54 |
+
|
| 55 |
+
---
|
| 56 |
+
|
| 57 |
+
## Decision Points
|
| 58 |
+
|
| 59 |
+
Before proceeding, confirm:
|
| 60 |
+
|
| 61 |
+
1. **For hackathon**: Do we need advanced mode, or is simple mode sufficient?
|
| 62 |
+
- Simple mode = faster to implement, works today
|
| 63 |
+
- Advanced mode = better quality, more work
|
| 64 |
+
|
| 65 |
+
2. **Timeline**: How much time do we have?
|
| 66 |
+
- If < 1 day: Focus on simple mode only
|
| 67 |
+
- If > 1 day: Implement dual-mode
|
| 68 |
+
|
| 69 |
+
3. **Dependencies**: Is `agent-framework-core` available?
|
| 70 |
+
- Check: `pip index versions agent-framework-core`
|
| 71 |
+
- If not on PyPI, may need to install from GitHub
|
| 72 |
+
|
| 73 |
+
---
|
| 74 |
+
|
| 75 |
+
## Quick Start (Simple Mode Only)
|
| 76 |
+
|
| 77 |
+
If time is limited, implement only simple mode improvements:
|
| 78 |
+
|
| 79 |
+
```bash
|
| 80 |
+
# On feat/dual-mode-architecture branch
|
| 81 |
+
|
| 82 |
+
# 1. Update judges.py to add HuggingFace support
|
| 83 |
+
# 2. Update config.py to add HF settings
|
| 84 |
+
# 3. Create free_tier_demo.py
|
| 85 |
+
# 4. Run make check
|
| 86 |
+
# 5. Create PR to dev
|
| 87 |
+
```
|
| 88 |
+
|
| 89 |
+
This gives you free-tier capability without touching agent framework code.
|
| 90 |
+
|
| 91 |
+
---
|
| 92 |
+
|
| 93 |
+
## Quick Start (Full Dual-Mode)
|
| 94 |
+
|
| 95 |
+
If time permits, implement full dual-mode:
|
| 96 |
+
|
| 97 |
+
Follow phases 1-6 in `02_IMPLEMENTATION_PHASES.md`
|
| 98 |
+
|
| 99 |
+
---
|
| 100 |
+
|
| 101 |
+
## Emergency Rollback
|
| 102 |
+
|
| 103 |
+
If anything goes wrong:
|
| 104 |
+
|
| 105 |
+
```bash
|
| 106 |
+
# Reset to safe state
|
| 107 |
+
git checkout main
|
| 108 |
+
git branch -D feat/dual-mode-architecture
|
| 109 |
+
git checkout -b feat/dual-mode-architecture origin/dev
|
| 110 |
+
```
|
| 111 |
+
|
| 112 |
+
Origin/dev is the safe fallback - it has agent framework intact.
|
docs/brainstorming/magentic-pydantic/04_FOLLOWUP_REVIEW_REQUEST.md
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Follow-Up Review Request: Did We Implement Your Feedback?
|
| 2 |
+
|
| 3 |
+
**Date:** November 27, 2025
|
| 4 |
+
**Context:** You previously reviewed our dual-mode architecture plan and provided feedback. We have updated the documentation. Please verify we correctly implemented your recommendations.
|
| 5 |
+
|
| 6 |
+
---
|
| 7 |
+
|
| 8 |
+
## Your Original Feedback vs Our Changes
|
| 9 |
+
|
| 10 |
+
### 1. Naming Confusion Clarification
|
| 11 |
+
|
| 12 |
+
**Your feedback:** "You are using Microsoft Agent Framework, but you've named your integration 'Magentic'. This caused the confusion."
|
| 13 |
+
|
| 14 |
+
**Our change:** Added Section 4 in `00_SITUATION_AND_PLAN.md`:
|
| 15 |
+
```markdown
|
| 16 |
+
## 4. CRITICAL: Naming Confusion Clarification
|
| 17 |
+
|
| 18 |
+
> **Senior Agent Review Finding:** The codebase uses "magentic" in file names
|
| 19 |
+
> (e.g., `orchestrator_magentic.py`, `magentic_agents.py`) but this is **NOT**
|
| 20 |
+
> the `magentic` PyPI package by Jacky Liang. It's Microsoft Agent Framework.
|
| 21 |
+
|
| 22 |
+
**The naming confusion:**
|
| 23 |
+
- `magentic` (PyPI package): A different library for structured LLM outputs
|
| 24 |
+
- "Magentic" (in our codebase): Our internal name for Microsoft Agent Framework integration
|
| 25 |
+
- `agent-framework-core` (PyPI package): Microsoft's actual multi-agent orchestration framework
|
| 26 |
+
|
| 27 |
+
**Recommended future action:** Rename `orchestrator_magentic.py` → `orchestrator_advanced.py`
|
| 28 |
+
```
|
| 29 |
+
|
| 30 |
+
**Status:** ✅ IMPLEMENTED
|
| 31 |
+
|
| 32 |
+
---
|
| 33 |
+
|
| 34 |
+
### 2. Bridge Complexity Warning
|
| 35 |
+
|
| 36 |
+
**Your feedback:** "You must ensure MagenticState (context vars) propagates correctly through the pydantic-ai call stack."
|
| 37 |
+
|
| 38 |
+
**Our change:** Added Section 10.1 in `01_ARCHITECTURE_SPEC.md`:
|
| 39 |
+
```markdown
|
| 40 |
+
### 10.1 Bridge Complexity (MEDIUM)
|
| 41 |
+
|
| 42 |
+
**Risk:** In Advanced Mode, agents (Agent Framework) wrap handlers (pydantic-ai).
|
| 43 |
+
Both are async. Context variables (`MagenticState`) must propagate correctly.
|
| 44 |
+
|
| 45 |
+
**Mitigation:**
|
| 46 |
+
- pydantic-ai uses standard Python `contextvars`, which naturally propagate through `await` chains
|
| 47 |
+
- Test context propagation explicitly in integration tests
|
| 48 |
+
- If issues arise, pass state explicitly rather than via context vars
|
| 49 |
+
```
|
| 50 |
+
|
| 51 |
+
**Status:** ✅ IMPLEMENTED
|
| 52 |
+
|
| 53 |
+
---
|
| 54 |
+
|
| 55 |
+
### 3. Integration Drift Warning
|
| 56 |
+
|
| 57 |
+
**Your feedback:** "Simple Mode and Advanced Mode might diverge in behavior."
|
| 58 |
+
|
| 59 |
+
**Our change:** Added Section 10.2 in `01_ARCHITECTURE_SPEC.md`:
|
| 60 |
+
```markdown
|
| 61 |
+
### 10.2 Integration Drift (MEDIUM)
|
| 62 |
+
|
| 63 |
+
**Risk:** Simple Mode and Advanced Mode might diverge in behavior over time.
|
| 64 |
+
|
| 65 |
+
**Mitigation:**
|
| 66 |
+
- Both modes MUST call the exact same underlying Tools (`src/tools/*`) and Handlers (`src/agent_factory/*`)
|
| 67 |
+
- Handlers are the single source of truth for business logic
|
| 68 |
+
- Agents are thin wrappers that delegate to handlers
|
| 69 |
+
```
|
| 70 |
+
|
| 71 |
+
**Status:** ✅ IMPLEMENTED
|
| 72 |
+
|
| 73 |
+
---
|
| 74 |
+
|
| 75 |
+
### 4. Testing Burden Warning
|
| 76 |
+
|
| 77 |
+
**Your feedback:** "You now have two distinct orchestrators to maintain. This doubles your integration testing surface area."
|
| 78 |
+
|
| 79 |
+
**Our change:** Added Section 10.3 in `01_ARCHITECTURE_SPEC.md`:
|
| 80 |
+
```markdown
|
| 81 |
+
### 10.3 Testing Burden (LOW-MEDIUM)
|
| 82 |
+
|
| 83 |
+
**Risk:** Two distinct orchestrators doubles integration testing surface area.
|
| 84 |
+
|
| 85 |
+
**Mitigation:**
|
| 86 |
+
- Unit test handlers independently (shared code)
|
| 87 |
+
- Integration tests for each mode separately
|
| 88 |
+
- End-to-end tests verify same output for same input
|
| 89 |
+
```
|
| 90 |
+
|
| 91 |
+
**Status:** ✅ IMPLEMENTED
|
| 92 |
+
|
| 93 |
+
---
|
| 94 |
+
|
| 95 |
+
### 5. Rename Recommendation
|
| 96 |
+
|
| 97 |
+
**Your feedback:** "Rename `src/orchestrator_magentic.py` to `src/orchestrator_advanced.py`"
|
| 98 |
+
|
| 99 |
+
**Our change:** Added Step 3.4 in `02_IMPLEMENTATION_PHASES.md`:
|
| 100 |
+
```markdown
|
| 101 |
+
### Step 3.4: (OPTIONAL) Rename "Magentic" to "Advanced"
|
| 102 |
+
|
| 103 |
+
> **Senior Agent Recommendation:** Rename files to eliminate confusion.
|
| 104 |
+
|
| 105 |
+
git mv src/orchestrator_magentic.py src/orchestrator_advanced.py
|
| 106 |
+
git mv src/agents/magentic_agents.py src/agents/advanced_agents.py
|
| 107 |
+
|
| 108 |
+
**Note:** This is optional for the hackathon. Can be done in a follow-up PR.
|
| 109 |
+
```
|
| 110 |
+
|
| 111 |
+
**Status:** ✅ DOCUMENTED (marked as optional for hackathon)
|
| 112 |
+
|
| 113 |
+
---
|
| 114 |
+
|
| 115 |
+
### 6. Standardize Wrapper Recommendation
|
| 116 |
+
|
| 117 |
+
**Your feedback:** "Create a generic `PydanticAiAgentWrapper(BaseAgent)` class instead of manually wrapping each handler."
|
| 118 |
+
|
| 119 |
+
**Our change:** NOT YET DOCUMENTED
|
| 120 |
+
|
| 121 |
+
**Status:** ⚠️ NOT IMPLEMENTED - Should we add this?
|
| 122 |
+
|
| 123 |
+
---
|
| 124 |
+
|
| 125 |
+
## Questions for Your Review
|
| 126 |
+
|
| 127 |
+
1. **Did we correctly implement your feedback?** Are there any misunderstandings in how we interpreted your recommendations?
|
| 128 |
+
|
| 129 |
+
2. **Is the "Standardize Wrapper" recommendation critical?** Should we add it to the implementation phases, or is it a nice-to-have for later?
|
| 130 |
+
|
| 131 |
+
3. **Dependency versioning:** You noted `agent-framework-core>=1.0.0b251120` might be ephemeral. Should we:
|
| 132 |
+
- Pin to a specific version?
|
| 133 |
+
- Use a version range?
|
| 134 |
+
- Install from GitHub source?
|
| 135 |
+
|
| 136 |
+
4. **Anything else we missed?**
|
| 137 |
+
|
| 138 |
+
---
|
| 139 |
+
|
| 140 |
+
## Files to Re-Review
|
| 141 |
+
|
| 142 |
+
1. `00_SITUATION_AND_PLAN.md` - Added Section 4 (Naming Clarification)
|
| 143 |
+
2. `01_ARCHITECTURE_SPEC.md` - Added Sections 10-11 (Risks, Naming)
|
| 144 |
+
3. `02_IMPLEMENTATION_PHASES.md` - Added Step 3.4 (Optional Rename)
|
| 145 |
+
|
| 146 |
+
---
|
| 147 |
+
|
| 148 |
+
## Current Branch State
|
| 149 |
+
|
| 150 |
+
We are now on `feat/dual-mode-architecture` branched from `origin/dev`:
|
| 151 |
+
- ✅ Agent framework code intact (`src/agents/`, `src/orchestrator_magentic.py`)
|
| 152 |
+
- ✅ Documentation committed
|
| 153 |
+
- ❌ PR #41 still open (need to close it)
|
| 154 |
+
- ❌ Cherry-pick of pydantic-ai improvements not yet done
|
| 155 |
+
|
| 156 |
+
---
|
| 157 |
+
|
| 158 |
+
Please confirm: **GO / NO-GO** to proceed with Phase 1 (cherry-picking pydantic-ai improvements)?
|
docs/brainstorming/magentic-pydantic/REVIEW_PROMPT_FOR_SENIOR_AGENT.md
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Senior Agent Review Prompt
|
| 2 |
+
|
| 3 |
+
Copy and paste everything below this line to a fresh Claude/AI session:
|
| 4 |
+
|
| 5 |
+
---
|
| 6 |
+
|
| 7 |
+
## Context
|
| 8 |
+
|
| 9 |
+
I am a junior developer working on a HuggingFace hackathon project called DeepCritical. We made a significant architectural mistake and are now trying to course-correct. I need you to act as a **senior staff engineer** and critically review our proposed solution.
|
| 10 |
+
|
| 11 |
+
## The Situation
|
| 12 |
+
|
| 13 |
+
We almost merged a refactor that would have **deleted** our multi-agent orchestration capability, mistakenly believing that `pydantic-ai` (a library for structured LLM outputs) and Microsoft's `agent-framework` (a framework for multi-agent orchestration) were mutually exclusive alternatives.
|
| 14 |
+
|
| 15 |
+
**They are not.** They are complementary:
|
| 16 |
+
- `pydantic-ai` ensures LLM responses match Pydantic schemas (type-safe outputs)
|
| 17 |
+
- `agent-framework` orchestrates multiple agents working together (coordination layer)
|
| 18 |
+
|
| 19 |
+
We now want to implement a **dual-mode architecture** where:
|
| 20 |
+
- **Simple Mode (No API key):** Uses only pydantic-ai with HuggingFace free tier
|
| 21 |
+
- **Advanced Mode (With API key):** Uses Microsoft Agent Framework for orchestration, with pydantic-ai inside each agent for structured outputs
|
| 22 |
+
|
| 23 |
+
## Your Task
|
| 24 |
+
|
| 25 |
+
Please perform a **deep, critical review** of:
|
| 26 |
+
|
| 27 |
+
1. **The architecture diagram** (image attached: `assets/magentic-pydantic.png`)
|
| 28 |
+
2. **Our documentation** (4 files listed below)
|
| 29 |
+
3. **The actual codebase** to verify our claims
|
| 30 |
+
|
| 31 |
+
## Specific Questions to Answer
|
| 32 |
+
|
| 33 |
+
### Architecture Validation
|
| 34 |
+
1. Is our understanding correct that pydantic-ai and agent-framework are complementary, not competing?
|
| 35 |
+
2. Does the dual-mode architecture diagram accurately represent how these should integrate?
|
| 36 |
+
3. Are there any architectural flaws or anti-patterns in our proposed design?
|
| 37 |
+
|
| 38 |
+
### Documentation Accuracy
|
| 39 |
+
4. Are the branch states we documented accurate? (Check `git log`, `git ls-tree`)
|
| 40 |
+
5. Is our understanding of what code exists where correct?
|
| 41 |
+
6. Are the implementation phases realistic and in the correct order?
|
| 42 |
+
7. Are there any missing steps or dependencies we overlooked?
|
| 43 |
+
|
| 44 |
+
### Codebase Reality Check
|
| 45 |
+
8. Does `origin/dev` actually have the agent framework code intact? Verify by checking:
|
| 46 |
+
- `git ls-tree origin/dev -- src/agents/`
|
| 47 |
+
- `git ls-tree origin/dev -- src/orchestrator_magentic.py`
|
| 48 |
+
9. What does the current `src/agents/` code actually import? Does it use `agent_framework` or `agent-framework-core`?
|
| 49 |
+
10. Is the `agent-framework-core` package actually available on PyPI, or do we need to install from source?
|
| 50 |
+
|
| 51 |
+
### Implementation Feasibility
|
| 52 |
+
11. Can the cherry-pick strategy we outlined actually work, or are there merge conflicts we're not seeing?
|
| 53 |
+
12. Is the mode auto-detection logic sound?
|
| 54 |
+
13. What are the risks we haven't identified?
|
| 55 |
+
|
| 56 |
+
### Critical Errors Check
|
| 57 |
+
14. Did we miss anything critical in our analysis?
|
| 58 |
+
15. Are there any factual errors in our documentation?
|
| 59 |
+
16. Would a Google/DeepMind senior engineer approve this plan, or would they flag issues?
|
| 60 |
+
|
| 61 |
+
## Files to Review
|
| 62 |
+
|
| 63 |
+
Please read these files in order:
|
| 64 |
+
|
| 65 |
+
1. `/Users/ray/Desktop/CLARITY-DIGITAL-TWIN/DeepCritical-1/docs/brainstorming/magentic-pydantic/00_SITUATION_AND_PLAN.md`
|
| 66 |
+
2. `/Users/ray/Desktop/CLARITY-DIGITAL-TWIN/DeepCritical-1/docs/brainstorming/magentic-pydantic/01_ARCHITECTURE_SPEC.md`
|
| 67 |
+
3. `/Users/ray/Desktop/CLARITY-DIGITAL-TWIN/DeepCritical-1/docs/brainstorming/magentic-pydantic/02_IMPLEMENTATION_PHASES.md`
|
| 68 |
+
4. `/Users/ray/Desktop/CLARITY-DIGITAL-TWIN/DeepCritical-1/docs/brainstorming/magentic-pydantic/03_IMMEDIATE_ACTIONS.md`
|
| 69 |
+
|
| 70 |
+
And the architecture diagram:
|
| 71 |
+
5. `/Users/ray/Desktop/CLARITY-DIGITAL-TWIN/DeepCritical-1/assets/magentic-pydantic.png`
|
| 72 |
+
|
| 73 |
+
## Reference Repositories to Consult
|
| 74 |
+
|
| 75 |
+
We have local clones of the source-of-truth repositories:
|
| 76 |
+
|
| 77 |
+
- **Original DeepCritical:** `/Users/ray/Desktop/CLARITY-DIGITAL-TWIN/DeepCritical-1/reference_repos/DeepCritical/`
|
| 78 |
+
- **Microsoft Agent Framework:** `/Users/ray/Desktop/CLARITY-DIGITAL-TWIN/DeepCritical-1/reference_repos/agent-framework/`
|
| 79 |
+
- **Microsoft AutoGen:** `/Users/ray/Desktop/CLARITY-DIGITAL-TWIN/DeepCritical-1/reference_repos/autogen-microsoft/`
|
| 80 |
+
|
| 81 |
+
Please cross-reference our hackathon fork against these to verify architectural alignment.
|
| 82 |
+
|
| 83 |
+
## Codebase to Analyze
|
| 84 |
+
|
| 85 |
+
Our hackathon fork is at:
|
| 86 |
+
`/Users/ray/Desktop/CLARITY-DIGITAL-TWIN/DeepCritical-1/`
|
| 87 |
+
|
| 88 |
+
Key files to examine:
|
| 89 |
+
- `src/agents/` - Agent framework integration
|
| 90 |
+
- `src/agent_factory/judges.py` - pydantic-ai integration
|
| 91 |
+
- `src/orchestrator.py` - Simple mode orchestrator
|
| 92 |
+
- `src/orchestrator_magentic.py` - Advanced mode orchestrator
|
| 93 |
+
- `src/orchestrator_factory.py` - Mode selection
|
| 94 |
+
- `pyproject.toml` - Dependencies
|
| 95 |
+
|
| 96 |
+
## Expected Output
|
| 97 |
+
|
| 98 |
+
Please provide:
|
| 99 |
+
|
| 100 |
+
1. **Validation Summary:** Is our plan sound? (YES/NO with explanation)
|
| 101 |
+
2. **Errors Found:** List any factual errors in our documentation
|
| 102 |
+
3. **Missing Items:** What did we overlook?
|
| 103 |
+
4. **Risk Assessment:** What could go wrong?
|
| 104 |
+
5. **Recommended Changes:** Specific edits to our documentation or plan
|
| 105 |
+
6. **Go/No-Go Recommendation:** Should we proceed with this plan?
|
| 106 |
+
|
| 107 |
+
## Tone
|
| 108 |
+
|
| 109 |
+
Be brutally honest. If our plan is flawed, say so directly. We would rather know now than after implementation. Don't soften criticism - we need accuracy.
|
| 110 |
+
|
| 111 |
+
---
|
| 112 |
+
|
| 113 |
+
END OF PROMPT
|
docs/bugs/FIX_PLAN_MAGENTIC_MODE.md
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Fix Plan: Magentic Mode Report Generation
|
| 2 |
+
|
| 3 |
+
**Related Bug**: `P0_MAGENTIC_MODE_BROKEN.md`
|
| 4 |
+
**Approach**: Test-Driven Development (TDD)
|
| 5 |
+
**Estimated Scope**: 4 tasks, ~2-3 hours
|
| 6 |
+
|
| 7 |
+
---
|
| 8 |
+
|
| 9 |
+
## Problem Summary
|
| 10 |
+
|
| 11 |
+
Magentic mode runs but fails to produce readable reports due to:
|
| 12 |
+
|
| 13 |
+
1. **Primary Bug**: `MagenticFinalResultEvent.message` returns `ChatMessage` object, not text
|
| 14 |
+
2. **Secondary Bug**: Max rounds (3) reached before ReportAgent completes
|
| 15 |
+
3. **Tertiary Issues**: Stale "bioRxiv" references in prompts
|
| 16 |
+
|
| 17 |
+
---
|
| 18 |
+
|
| 19 |
+
## Fix Order (TDD)
|
| 20 |
+
|
| 21 |
+
### Phase 1: Write Failing Tests
|
| 22 |
+
|
| 23 |
+
**Task 1.1**: Create test for ChatMessage text extraction
|
| 24 |
+
|
| 25 |
+
```python
|
| 26 |
+
# tests/unit/test_orchestrator_magentic.py
|
| 27 |
+
|
| 28 |
+
def test_process_event_extracts_text_from_chat_message():
|
| 29 |
+
"""Final result event should extract text from ChatMessage object."""
|
| 30 |
+
# Arrange: Mock ChatMessage with .content attribute
|
| 31 |
+
# Act: Call _process_event with MagenticFinalResultEvent
|
| 32 |
+
# Assert: Returned AgentEvent.message is a string, not object repr
|
| 33 |
+
```
|
| 34 |
+
|
| 35 |
+
**Task 1.2**: Create test for max rounds configuration
|
| 36 |
+
|
| 37 |
+
```python
|
| 38 |
+
def test_orchestrator_uses_configured_max_rounds():
|
| 39 |
+
"""MagenticOrchestrator should use max_rounds from constructor."""
|
| 40 |
+
# Arrange: Create orchestrator with max_rounds=10
|
| 41 |
+
# Act: Build workflow
|
| 42 |
+
# Assert: Workflow has max_round_count=10
|
| 43 |
+
```
|
| 44 |
+
|
| 45 |
+
**Task 1.3**: Create test for bioRxiv reference removal
|
| 46 |
+
|
| 47 |
+
```python
|
| 48 |
+
def test_task_prompt_references_europe_pmc():
|
| 49 |
+
"""Task prompt should reference Europe PMC, not bioRxiv."""
|
| 50 |
+
# Arrange: Create orchestrator
|
| 51 |
+
# Act: Check task string in run()
|
| 52 |
+
# Assert: Contains "Europe PMC", not "bioRxiv"
|
| 53 |
+
```
|
| 54 |
+
|
| 55 |
+
---
|
| 56 |
+
|
| 57 |
+
### Phase 2: Fix ChatMessage Text Extraction
|
| 58 |
+
|
| 59 |
+
**File**: `src/orchestrator_magentic.py`
|
| 60 |
+
**Lines**: 192-199
|
| 61 |
+
|
| 62 |
+
**Current Code**:
|
| 63 |
+
```python
|
| 64 |
+
elif isinstance(event, MagenticFinalResultEvent):
|
| 65 |
+
text = event.message.text if event.message else "No result"
|
| 66 |
+
```
|
| 67 |
+
|
| 68 |
+
**Fixed Code**:
|
| 69 |
+
```python
|
| 70 |
+
elif isinstance(event, MagenticFinalResultEvent):
|
| 71 |
+
if event.message:
|
| 72 |
+
# ChatMessage may have .content or .text depending on version
|
| 73 |
+
if hasattr(event.message, 'content') and event.message.content:
|
| 74 |
+
text = str(event.message.content)
|
| 75 |
+
elif hasattr(event.message, 'text') and event.message.text:
|
| 76 |
+
text = str(event.message.text)
|
| 77 |
+
else:
|
| 78 |
+
# Fallback: convert entire message to string
|
| 79 |
+
text = str(event.message)
|
| 80 |
+
else:
|
| 81 |
+
text = "No result generated"
|
| 82 |
+
```
|
| 83 |
+
|
| 84 |
+
**Why**: The `agent_framework.ChatMessage` object structure may vary. We need defensive extraction.
|
| 85 |
+
|
| 86 |
+
---
|
| 87 |
+
|
| 88 |
+
### Phase 3: Fix Max Rounds Configuration
|
| 89 |
+
|
| 90 |
+
**File**: `src/orchestrator_magentic.py`
|
| 91 |
+
**Lines**: 97-99
|
| 92 |
+
|
| 93 |
+
**Current Code**:
|
| 94 |
+
```python
|
| 95 |
+
.with_standard_manager(
|
| 96 |
+
chat_client=manager_client,
|
| 97 |
+
max_round_count=self._max_rounds, # Already uses config
|
| 98 |
+
max_stall_count=3,
|
| 99 |
+
max_reset_count=2,
|
| 100 |
+
)
|
| 101 |
+
```
|
| 102 |
+
|
| 103 |
+
**Issue**: Default `max_rounds` in `__init__` is 10, but workflow may need more for complex queries.
|
| 104 |
+
|
| 105 |
+
**Fix**: Verify the value flows through correctly. Add logging.
|
| 106 |
+
|
| 107 |
+
```python
|
| 108 |
+
logger.info(
|
| 109 |
+
"Building Magentic workflow",
|
| 110 |
+
max_rounds=self._max_rounds,
|
| 111 |
+
max_stall=3,
|
| 112 |
+
max_reset=2,
|
| 113 |
+
)
|
| 114 |
+
```
|
| 115 |
+
|
| 116 |
+
**Also check**: `src/orchestrator_factory.py` passes config correctly:
|
| 117 |
+
```python
|
| 118 |
+
return MagenticOrchestrator(
|
| 119 |
+
max_rounds=config.max_iterations if config else 10,
|
| 120 |
+
)
|
| 121 |
+
```
|
| 122 |
+
|
| 123 |
+
---
|
| 124 |
+
|
| 125 |
+
### Phase 4: Fix Stale bioRxiv References
|
| 126 |
+
|
| 127 |
+
**Files to update**:
|
| 128 |
+
|
| 129 |
+
| File | Line | Change |
|
| 130 |
+
|------|------|--------|
|
| 131 |
+
| `src/orchestrator_magentic.py` | 131 | "bioRxiv" → "Europe PMC" |
|
| 132 |
+
| `src/agents/magentic_agents.py` | 32-33 | "bioRxiv" → "Europe PMC" |
|
| 133 |
+
| `src/app.py` | 202-203 | "bioRxiv" → "Europe PMC" |
|
| 134 |
+
|
| 135 |
+
**Search command to verify**:
|
| 136 |
+
```bash
|
| 137 |
+
grep -rn "bioRxiv\|biorxiv" src/
|
| 138 |
+
```
|
| 139 |
+
|
| 140 |
+
---
|
| 141 |
+
|
| 142 |
+
## Implementation Checklist
|
| 143 |
+
|
| 144 |
+
```
|
| 145 |
+
[ ] Phase 1: Write failing tests
|
| 146 |
+
[ ] 1.1 Test ChatMessage text extraction
|
| 147 |
+
[ ] 1.2 Test max rounds configuration
|
| 148 |
+
[ ] 1.3 Test Europe PMC references
|
| 149 |
+
|
| 150 |
+
[ ] Phase 2: Fix ChatMessage extraction
|
| 151 |
+
[ ] Update _process_event() in orchestrator_magentic.py
|
| 152 |
+
[ ] Run test 1.1 - should pass
|
| 153 |
+
|
| 154 |
+
[ ] Phase 3: Fix max rounds
|
| 155 |
+
[ ] Add logging to _build_workflow()
|
| 156 |
+
[ ] Verify factory passes config correctly
|
| 157 |
+
[ ] Run test 1.2 - should pass
|
| 158 |
+
|
| 159 |
+
[ ] Phase 4: Fix bioRxiv references
|
| 160 |
+
[ ] Update orchestrator_magentic.py task prompt
|
| 161 |
+
[ ] Update magentic_agents.py descriptions
|
| 162 |
+
[ ] Update app.py UI text
|
| 163 |
+
[ ] Run test 1.3 - should pass
|
| 164 |
+
[ ] Run grep to verify no remaining refs
|
| 165 |
+
|
| 166 |
+
[ ] Final Verification
|
| 167 |
+
[ ] make check passes
|
| 168 |
+
[ ] All tests pass (108+)
|
| 169 |
+
[ ] Manual test: run_magentic.py produces readable report
|
| 170 |
+
```
|
| 171 |
+
|
| 172 |
+
---
|
| 173 |
+
|
| 174 |
+
## Test Commands
|
| 175 |
+
|
| 176 |
+
```bash
|
| 177 |
+
# Run specific test file
|
| 178 |
+
uv run pytest tests/unit/test_orchestrator_magentic.py -v
|
| 179 |
+
|
| 180 |
+
# Run all tests
|
| 181 |
+
uv run pytest tests/unit/ -v
|
| 182 |
+
|
| 183 |
+
# Full check
|
| 184 |
+
make check
|
| 185 |
+
|
| 186 |
+
# Manual integration test
|
| 187 |
+
set -a && source .env && set +a
|
| 188 |
+
uv run python examples/orchestrator_demo/run_magentic.py "metformin alzheimer"
|
| 189 |
+
```
|
| 190 |
+
|
| 191 |
+
---
|
| 192 |
+
|
| 193 |
+
## Success Criteria
|
| 194 |
+
|
| 195 |
+
1. `run_magentic.py` outputs a readable research report (not `<ChatMessage object>`)
|
| 196 |
+
2. Report includes: Executive Summary, Key Findings, Drug Candidates, References
|
| 197 |
+
3. No "Max round count reached" error with default settings
|
| 198 |
+
4. No "bioRxiv" references anywhere in codebase
|
| 199 |
+
5. All 108+ tests pass
|
| 200 |
+
6. `make check` passes
|
| 201 |
+
|
| 202 |
+
---
|
| 203 |
+
|
| 204 |
+
## Files Modified
|
| 205 |
+
|
| 206 |
+
```
|
| 207 |
+
src/
|
| 208 |
+
├── orchestrator_magentic.py # ChatMessage fix, logging
|
| 209 |
+
├── agents/magentic_agents.py # bioRxiv → Europe PMC
|
| 210 |
+
└── app.py # bioRxiv → Europe PMC
|
| 211 |
+
|
| 212 |
+
tests/unit/
|
| 213 |
+
└── test_orchestrator_magentic.py # NEW: 3 tests
|
| 214 |
+
```
|
| 215 |
+
|
| 216 |
+
---
|
| 217 |
+
|
| 218 |
+
## Notes for AI Agent
|
| 219 |
+
|
| 220 |
+
When implementing this fix plan:
|
| 221 |
+
|
| 222 |
+
1. **DO NOT** create mock data or fake responses
|
| 223 |
+
2. **DO** write real tests that verify actual behavior
|
| 224 |
+
3. **DO** run `make check` after each phase
|
| 225 |
+
4. **DO** test with real OpenAI API key via `.env`
|
| 226 |
+
5. **DO** preserve existing functionality - simple mode must still work
|
| 227 |
+
6. **DO NOT** over-engineer - minimal changes to fix the specific bugs
|
docs/bugs/P0_ACTIONABLE_FIXES.md
DELETED
|
@@ -1,281 +0,0 @@
|
|
| 1 |
-
# P0 Actionable Fixes - What to Do
|
| 2 |
-
|
| 3 |
-
**Date:** November 27, 2025
|
| 4 |
-
**Status:** ACTIONABLE
|
| 5 |
-
|
| 6 |
-
---
|
| 7 |
-
|
| 8 |
-
## Summary: What's Broken and What's Fixable
|
| 9 |
-
|
| 10 |
-
| Tool | Problem | Fixable? | How |
|
| 11 |
-
|------|---------|----------|-----|
|
| 12 |
-
| BioRxiv | API has NO search endpoint | **NO** | Replace with Europe PMC |
|
| 13 |
-
| PubMed | No query preprocessing | **YES** | Add query cleaner |
|
| 14 |
-
| ClinicalTrials | No filters applied | **YES** | Add filter params |
|
| 15 |
-
| Magentic Framework | Nothing wrong | N/A | Already working |
|
| 16 |
-
|
| 17 |
-
---
|
| 18 |
-
|
| 19 |
-
## FIX 1: Replace BioRxiv with Europe PMC (30 min)
|
| 20 |
-
|
| 21 |
-
### Why BioRxiv Can't Be Fixed
|
| 22 |
-
|
| 23 |
-
The bioRxiv API only has this endpoint:
|
| 24 |
-
```
|
| 25 |
-
https://api.biorxiv.org/details/{server}/{date-range}/{cursor}/json
|
| 26 |
-
```
|
| 27 |
-
|
| 28 |
-
This returns papers **by date**, not by keyword. There is NO search endpoint.
|
| 29 |
-
|
| 30 |
-
**Proof:** I queried `medrxiv/2024-01-01/2024-01-02` and got:
|
| 31 |
-
- "Global risk of Plasmodium falciparum" (malaria)
|
| 32 |
-
- "Multiple Endocrine Neoplasia in India"
|
| 33 |
-
- "Acupuncture for Acute Musculoskeletal Pain"
|
| 34 |
-
|
| 35 |
-
**None of these are about Long COVID** because the API doesn't search.
|
| 36 |
-
|
| 37 |
-
### Europe PMC Has Search + Preprints
|
| 38 |
-
|
| 39 |
-
```bash
|
| 40 |
-
curl "https://www.ebi.ac.uk/europepmc/webservices/rest/search?query=long+covid+treatment&resultType=core&pageSize=3&format=json"
|
| 41 |
-
```
|
| 42 |
-
|
| 43 |
-
Returns 283,058 results including:
|
| 44 |
-
- "Long COVID Treatment No Silver Bullets, Only a Few Bronze BBs" ✅
|
| 45 |
-
|
| 46 |
-
### The Fix
|
| 47 |
-
|
| 48 |
-
Replace `src/tools/biorxiv.py` with `src/tools/europepmc.py`:
|
| 49 |
-
|
| 50 |
-
```python
|
| 51 |
-
"""Europe PMC preprint and paper search tool."""
|
| 52 |
-
|
| 53 |
-
import httpx
|
| 54 |
-
from src.utils.models import Citation, Evidence
|
| 55 |
-
|
| 56 |
-
class EuropePMCTool:
|
| 57 |
-
"""Search Europe PMC for papers and preprints."""
|
| 58 |
-
|
| 59 |
-
BASE_URL = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"
|
| 60 |
-
|
| 61 |
-
@property
|
| 62 |
-
def name(self) -> str:
|
| 63 |
-
return "europepmc"
|
| 64 |
-
|
| 65 |
-
async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
|
| 66 |
-
"""Search Europe PMC (includes preprints from bioRxiv/medRxiv)."""
|
| 67 |
-
params = {
|
| 68 |
-
"query": query,
|
| 69 |
-
"resultType": "core",
|
| 70 |
-
"pageSize": max_results,
|
| 71 |
-
"format": "json",
|
| 72 |
-
}
|
| 73 |
-
|
| 74 |
-
async with httpx.AsyncClient(timeout=30.0) as client:
|
| 75 |
-
response = await client.get(self.BASE_URL, params=params)
|
| 76 |
-
response.raise_for_status()
|
| 77 |
-
|
| 78 |
-
data = response.json()
|
| 79 |
-
results = data.get("resultList", {}).get("result", [])
|
| 80 |
-
|
| 81 |
-
return [self._to_evidence(r) for r in results]
|
| 82 |
-
|
| 83 |
-
def _to_evidence(self, result: dict) -> Evidence:
|
| 84 |
-
"""Convert Europe PMC result to Evidence."""
|
| 85 |
-
title = result.get("title", "Untitled")
|
| 86 |
-
abstract = result.get("abstractText", "No abstract")
|
| 87 |
-
doi = result.get("doi", "")
|
| 88 |
-
pub_year = result.get("pubYear", "Unknown")
|
| 89 |
-
source = result.get("source", "europepmc")
|
| 90 |
-
|
| 91 |
-
# Mark preprints
|
| 92 |
-
pub_type = result.get("pubTypeList", {}).get("pubType", [])
|
| 93 |
-
is_preprint = "Preprint" in pub_type
|
| 94 |
-
|
| 95 |
-
content = f"{'[PREPRINT] ' if is_preprint else ''}{abstract[:1800]}"
|
| 96 |
-
|
| 97 |
-
return Evidence(
|
| 98 |
-
content=content,
|
| 99 |
-
citation=Citation(
|
| 100 |
-
source="europepmc" if not is_preprint else "preprint",
|
| 101 |
-
title=title[:500],
|
| 102 |
-
url=f"https://doi.org/{doi}" if doi else "",
|
| 103 |
-
date=str(pub_year),
|
| 104 |
-
),
|
| 105 |
-
relevance=0.75 if is_preprint else 0.9,
|
| 106 |
-
)
|
| 107 |
-
```
|
| 108 |
-
|
| 109 |
-
---
|
| 110 |
-
|
| 111 |
-
## FIX 2: Add PubMed Query Preprocessing (1 hour)
|
| 112 |
-
|
| 113 |
-
### Current Problem
|
| 114 |
-
|
| 115 |
-
User enters: `What medications show promise for Long COVID?`
|
| 116 |
-
PubMed receives: `What medications show promise for Long COVID?`
|
| 117 |
-
|
| 118 |
-
The question words pollute the search.
|
| 119 |
-
|
| 120 |
-
### The Fix
|
| 121 |
-
|
| 122 |
-
Add `src/tools/query_utils.py`:
|
| 123 |
-
|
| 124 |
-
```python
|
| 125 |
-
"""Query preprocessing utilities."""
|
| 126 |
-
|
| 127 |
-
import re
|
| 128 |
-
|
| 129 |
-
# Question words to remove
|
| 130 |
-
QUESTION_WORDS = {
|
| 131 |
-
"what", "which", "how", "why", "when", "where", "who",
|
| 132 |
-
"is", "are", "can", "could", "would", "should", "do", "does",
|
| 133 |
-
"show", "promise", "help", "treat", "cure",
|
| 134 |
-
}
|
| 135 |
-
|
| 136 |
-
# Medical synonyms to expand
|
| 137 |
-
SYNONYMS = {
|
| 138 |
-
"long covid": ["long COVID", "PASC", "post-COVID syndrome", "post-acute sequelae"],
|
| 139 |
-
"alzheimer": ["Alzheimer's disease", "AD", "Alzheimer dementia"],
|
| 140 |
-
"cancer": ["neoplasm", "tumor", "malignancy", "carcinoma"],
|
| 141 |
-
}
|
| 142 |
-
|
| 143 |
-
def preprocess_pubmed_query(raw_query: str) -> str:
|
| 144 |
-
"""Convert natural language to cleaner PubMed query."""
|
| 145 |
-
# Lowercase
|
| 146 |
-
query = raw_query.lower()
|
| 147 |
-
|
| 148 |
-
# Remove question marks
|
| 149 |
-
query = query.replace("?", "")
|
| 150 |
-
|
| 151 |
-
# Remove question words
|
| 152 |
-
words = query.split()
|
| 153 |
-
words = [w for w in words if w not in QUESTION_WORDS]
|
| 154 |
-
query = " ".join(words)
|
| 155 |
-
|
| 156 |
-
# Expand synonyms
|
| 157 |
-
for term, expansions in SYNONYMS.items():
|
| 158 |
-
if term in query:
|
| 159 |
-
# Add OR clause
|
| 160 |
-
expansion = " OR ".join([f'"{e}"' for e in expansions])
|
| 161 |
-
query = query.replace(term, f"({expansion})")
|
| 162 |
-
|
| 163 |
-
return query.strip()
|
| 164 |
-
```
|
| 165 |
-
|
| 166 |
-
Then update `src/tools/pubmed.py`:
|
| 167 |
-
|
| 168 |
-
```python
|
| 169 |
-
from src.tools.query_utils import preprocess_pubmed_query
|
| 170 |
-
|
| 171 |
-
async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
|
| 172 |
-
# Preprocess query
|
| 173 |
-
clean_query = preprocess_pubmed_query(query)
|
| 174 |
-
|
| 175 |
-
search_params = self._build_params(
|
| 176 |
-
db="pubmed",
|
| 177 |
-
term=clean_query, # Use cleaned query
|
| 178 |
-
retmax=max_results,
|
| 179 |
-
sort="relevance",
|
| 180 |
-
)
|
| 181 |
-
# ... rest unchanged
|
| 182 |
-
```
|
| 183 |
-
|
| 184 |
-
---
|
| 185 |
-
|
| 186 |
-
## FIX 3: Add ClinicalTrials.gov Filters (30 min)
|
| 187 |
-
|
| 188 |
-
### Current Problem
|
| 189 |
-
|
| 190 |
-
Returns ALL trials including withdrawn, terminated, observational studies.
|
| 191 |
-
|
| 192 |
-
### The Fix
|
| 193 |
-
|
| 194 |
-
The API supports `filter.overallStatus` and other filters. Update `src/tools/clinicaltrials.py`:
|
| 195 |
-
|
| 196 |
-
```python
|
| 197 |
-
async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
|
| 198 |
-
params: dict[str, str | int] = {
|
| 199 |
-
"query.term": query,
|
| 200 |
-
"pageSize": min(max_results, 100),
|
| 201 |
-
"fields": "|".join(self.FIELDS),
|
| 202 |
-
# ADD THESE FILTERS:
|
| 203 |
-
"filter.overallStatus": "COMPLETED|RECRUITING|ACTIVE_NOT_RECRUITING",
|
| 204 |
-
# Only interventional studies (not observational)
|
| 205 |
-
"aggFilters": "studyType:int",
|
| 206 |
-
}
|
| 207 |
-
# ... rest unchanged
|
| 208 |
-
```
|
| 209 |
-
|
| 210 |
-
**Note:** I tested the API - it supports filtering but with slightly different syntax. Check the [API docs](https://clinicaltrials.gov/data-api/api).
|
| 211 |
-
|
| 212 |
-
---
|
| 213 |
-
|
| 214 |
-
## What NOT to Change
|
| 215 |
-
|
| 216 |
-
### Microsoft Agent Framework - WORKING
|
| 217 |
-
|
| 218 |
-
I verified:
|
| 219 |
-
```python
|
| 220 |
-
from agent_framework import MagenticBuilder, ChatAgent
|
| 221 |
-
from agent_framework.openai import OpenAIChatClient
|
| 222 |
-
# All imports OK
|
| 223 |
-
|
| 224 |
-
orchestrator = MagenticOrchestrator(max_rounds=2)
|
| 225 |
-
workflow = orchestrator._build_workflow()
|
| 226 |
-
# Workflow built successfully
|
| 227 |
-
```
|
| 228 |
-
|
| 229 |
-
The Magentic agents are correctly wired:
|
| 230 |
-
- SearchAgent → GPT-5.1 ✅
|
| 231 |
-
- JudgeAgent → GPT-5.1 ✅
|
| 232 |
-
- HypothesisAgent → GPT-5.1 ✅
|
| 233 |
-
- ReportAgent → GPT-5.1 ✅
|
| 234 |
-
|
| 235 |
-
**The framework is fine. The tools it calls are broken.**
|
| 236 |
-
|
| 237 |
-
---
|
| 238 |
-
|
| 239 |
-
## Priority Order
|
| 240 |
-
|
| 241 |
-
1. **Replace BioRxiv** → Immediate, fundamental
|
| 242 |
-
2. **Add PubMed preprocessing** → High impact, easy
|
| 243 |
-
3. **Add ClinicalTrials filters** → Medium impact, easy
|
| 244 |
-
|
| 245 |
-
---
|
| 246 |
-
|
| 247 |
-
## Test After Fixes
|
| 248 |
-
|
| 249 |
-
```bash
|
| 250 |
-
# Test Europe PMC
|
| 251 |
-
uv run python -c "
|
| 252 |
-
import asyncio
|
| 253 |
-
from src.tools.europepmc import EuropePMCTool
|
| 254 |
-
tool = EuropePMCTool()
|
| 255 |
-
results = asyncio.run(tool.search('long covid treatment', 3))
|
| 256 |
-
for r in results:
|
| 257 |
-
print(r.citation.title)
|
| 258 |
-
"
|
| 259 |
-
|
| 260 |
-
# Test PubMed with preprocessing
|
| 261 |
-
uv run python -c "
|
| 262 |
-
from src.tools.query_utils import preprocess_pubmed_query
|
| 263 |
-
q = 'What medications show promise for Long COVID?'
|
| 264 |
-
print(preprocess_pubmed_query(q))
|
| 265 |
-
# Should output: (\"long COVID\" OR \"PASC\" OR \"post-COVID syndrome\") medications
|
| 266 |
-
"
|
| 267 |
-
```
|
| 268 |
-
|
| 269 |
-
---
|
| 270 |
-
|
| 271 |
-
## After These Fixes
|
| 272 |
-
|
| 273 |
-
The Magentic workflow will:
|
| 274 |
-
1. SearchAgent calls `search_pubmed("long COVID treatment")` → Gets RELEVANT papers
|
| 275 |
-
2. SearchAgent calls `search_preprints("long COVID treatment")` → Gets RELEVANT preprints via Europe PMC
|
| 276 |
-
3. SearchAgent calls `search_clinical_trials("long COVID")` → Gets INTERVENTIONAL trials only
|
| 277 |
-
4. JudgeAgent evaluates GOOD evidence
|
| 278 |
-
5. HypothesisAgent generates hypotheses from GOOD evidence
|
| 279 |
-
6. ReportAgent synthesizes GOOD report
|
| 280 |
-
|
| 281 |
-
**The framework will work once we feed it good data.**
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docs/bugs/P0_CRITICAL_BUGS.md
DELETED
|
@@ -1,298 +0,0 @@
|
|
| 1 |
-
# P0 CRITICAL BUGS - Why DeepCritical Produces Garbage Results
|
| 2 |
-
|
| 3 |
-
**Date:** November 27, 2025
|
| 4 |
-
**Status:** CRITICAL - App is functionally useless
|
| 5 |
-
**Severity:** P0 (Blocker)
|
| 6 |
-
|
| 7 |
-
## TL;DR
|
| 8 |
-
|
| 9 |
-
The app produces garbage because:
|
| 10 |
-
1. **BioRxiv search doesn't work** - returns random papers
|
| 11 |
-
2. **Free tier LLM is too dumb** - can't identify drugs
|
| 12 |
-
3. **Query construction is naive** - no optimization for PubMed/CT.gov syntax
|
| 13 |
-
4. **Loop terminates too early** - 5 iterations isn't enough
|
| 14 |
-
|
| 15 |
-
---
|
| 16 |
-
|
| 17 |
-
## P0-001: BioRxiv Search is Fundamentally Broken
|
| 18 |
-
|
| 19 |
-
**File:** `src/tools/biorxiv.py:248-286`
|
| 20 |
-
|
| 21 |
-
**The Problem:**
|
| 22 |
-
The bioRxiv API **DOES NOT SUPPORT KEYWORD SEARCH**.
|
| 23 |
-
|
| 24 |
-
The code does this:
|
| 25 |
-
```python
|
| 26 |
-
# Fetch recent papers (last 90 days, first 100 papers)
|
| 27 |
-
url = f"{self.BASE_URL}/{self.server}/{interval}/0/json"
|
| 28 |
-
# Then filter client-side for keywords
|
| 29 |
-
```
|
| 30 |
-
|
| 31 |
-
**What Actually Happens:**
|
| 32 |
-
1. Fetches the first 100 papers from medRxiv in the last 90 days (chronological order)
|
| 33 |
-
2. Filters those 100 random papers for query keywords
|
| 34 |
-
3. Returns whatever garbage matches
|
| 35 |
-
|
| 36 |
-
**Result:** For "Long COVID medications", you get random papers like:
|
| 37 |
-
- "Calf muscle structure-function adaptations"
|
| 38 |
-
- "Work-Life Balance of Ophthalmologists During COVID"
|
| 39 |
-
|
| 40 |
-
These papers contain "COVID" somewhere but have NOTHING to do with Long COVID treatments.
|
| 41 |
-
|
| 42 |
-
**Root Cause:** The `/0/json` pagination only returns 100 papers. You'd need to paginate through ALL papers (thousands) to do proper keyword filtering.
|
| 43 |
-
|
| 44 |
-
**Fix Options:**
|
| 45 |
-
1. **Remove BioRxiv entirely** - It's unusable without proper search API
|
| 46 |
-
2. **Use a different preprint aggregator** - Europe PMC has preprints WITH search
|
| 47 |
-
3. **Add pagination** - Fetch all papers (slow, expensive)
|
| 48 |
-
4. **Use Semantic Scholar API** - Has preprints and proper search
|
| 49 |
-
|
| 50 |
-
---
|
| 51 |
-
|
| 52 |
-
## P0-002: Free Tier LLM Cannot Perform Drug Identification
|
| 53 |
-
|
| 54 |
-
**File:** `src/agent_factory/judges.py:153-211`
|
| 55 |
-
|
| 56 |
-
**The Problem:**
|
| 57 |
-
Without an API key, the app uses `HFInferenceJudgeHandler` with:
|
| 58 |
-
- Llama 3.1 8B Instruct
|
| 59 |
-
- Mistral 7B Instruct
|
| 60 |
-
|
| 61 |
-
These are **7-8 billion parameter models**. They cannot:
|
| 62 |
-
- Reliably parse complex biomedical abstracts
|
| 63 |
-
- Identify drug candidates from scientific text
|
| 64 |
-
- Generate structured JSON output consistently
|
| 65 |
-
- Reason about mechanism of action
|
| 66 |
-
|
| 67 |
-
**Evidence of Failure:**
|
| 68 |
-
```python
|
| 69 |
-
# From MockJudgeHandler - the honest fallback when LLM fails
|
| 70 |
-
drug_candidates=[
|
| 71 |
-
"Drug identification requires AI analysis",
|
| 72 |
-
"Enter API key above for full results",
|
| 73 |
-
]
|
| 74 |
-
```
|
| 75 |
-
|
| 76 |
-
The team KNEW the free tier can't identify drugs and added this message.
|
| 77 |
-
|
| 78 |
-
**Root Cause:** Drug repurposing requires understanding:
|
| 79 |
-
- Drug mechanisms
|
| 80 |
-
- Disease pathophysiology
|
| 81 |
-
- Clinical trial phases
|
| 82 |
-
- Statistical significance
|
| 83 |
-
|
| 84 |
-
This requires GPT-4 / Claude Sonnet class models (100B+ parameters).
|
| 85 |
-
|
| 86 |
-
**Fix Options:**
|
| 87 |
-
1. **Require API key** - No free tier, be honest
|
| 88 |
-
2. **Use larger HF models** - Llama 70B or Mixtral 8x7B (expensive on free tier)
|
| 89 |
-
3. **Hybrid approach** - Use free tier for search, require paid for synthesis
|
| 90 |
-
|
| 91 |
-
---
|
| 92 |
-
|
| 93 |
-
## P0-003: PubMed Query Not Optimized
|
| 94 |
-
|
| 95 |
-
**File:** `src/tools/pubmed.py:54-71`
|
| 96 |
-
|
| 97 |
-
**The Problem:**
|
| 98 |
-
The query is passed directly to PubMed without optimization:
|
| 99 |
-
```python
|
| 100 |
-
search_params = self._build_params(
|
| 101 |
-
db="pubmed",
|
| 102 |
-
term=query, # Raw user query!
|
| 103 |
-
retmax=max_results,
|
| 104 |
-
sort="relevance",
|
| 105 |
-
)
|
| 106 |
-
```
|
| 107 |
-
|
| 108 |
-
**What User Enters:** "What medications show promise for Long COVID?"
|
| 109 |
-
|
| 110 |
-
**What PubMed Receives:** `What medications show promise for Long COVID?`
|
| 111 |
-
|
| 112 |
-
**What PubMed Should Receive:**
|
| 113 |
-
```
|
| 114 |
-
("long covid"[Title/Abstract] OR "post-COVID"[Title/Abstract] OR "PASC"[Title/Abstract])
|
| 115 |
-
AND (drug[Title/Abstract] OR treatment[Title/Abstract] OR medication[Title/Abstract] OR therapy[Title/Abstract])
|
| 116 |
-
AND (clinical trial[Publication Type] OR randomized[Title/Abstract])
|
| 117 |
-
```
|
| 118 |
-
|
| 119 |
-
**Root Cause:** No query preprocessing or medical term expansion.
|
| 120 |
-
|
| 121 |
-
**Fix Options:**
|
| 122 |
-
1. **Add query preprocessor** - Extract medical entities, expand synonyms
|
| 123 |
-
2. **Use MeSH terms** - PubMed's controlled vocabulary for better recall
|
| 124 |
-
3. **LLM query generation** - Use LLM to generate optimized PubMed query
|
| 125 |
-
|
| 126 |
-
---
|
| 127 |
-
|
| 128 |
-
## P0-004: Loop Terminates Too Early
|
| 129 |
-
|
| 130 |
-
**File:** `src/app.py:42-45` and `src/utils/models.py`
|
| 131 |
-
|
| 132 |
-
**The Problem:**
|
| 133 |
-
```python
|
| 134 |
-
config = OrchestratorConfig(
|
| 135 |
-
max_iterations=5,
|
| 136 |
-
max_results_per_tool=10,
|
| 137 |
-
)
|
| 138 |
-
```
|
| 139 |
-
|
| 140 |
-
5 iterations is not enough to:
|
| 141 |
-
1. Search multiple variations of the query
|
| 142 |
-
2. Gather enough evidence for the Judge to synthesize
|
| 143 |
-
3. Refine queries based on initial results
|
| 144 |
-
|
| 145 |
-
**Evidence:** The user's output shows "Max Iterations Reached" with only 6 sources.
|
| 146 |
-
|
| 147 |
-
**Root Cause:** Conservative defaults to avoid API costs, but makes app useless.
|
| 148 |
-
|
| 149 |
-
**Fix Options:**
|
| 150 |
-
1. **Increase default to 10-15** - More iterations = better results
|
| 151 |
-
2. **Dynamic termination** - Stop when confidence > threshold, not iteration count
|
| 152 |
-
3. **Parallel query expansion** - Run more queries per iteration
|
| 153 |
-
|
| 154 |
-
---
|
| 155 |
-
|
| 156 |
-
## P0-005: No Query Understanding Layer
|
| 157 |
-
|
| 158 |
-
**Files:** `src/orchestrator.py`, `src/tools/search_handler.py`
|
| 159 |
-
|
| 160 |
-
**The Problem:**
|
| 161 |
-
There's no NLU (Natural Language Understanding) layer. The system:
|
| 162 |
-
1. Takes raw user query
|
| 163 |
-
2. Passes directly to search tools
|
| 164 |
-
3. No entity extraction
|
| 165 |
-
4. No intent classification
|
| 166 |
-
5. No query expansion
|
| 167 |
-
|
| 168 |
-
For drug repurposing, you need to extract:
|
| 169 |
-
- **Disease:** "Long COVID" → [Long COVID, PASC, Post-COVID syndrome, chronic COVID]
|
| 170 |
-
- **Drug intent:** "medications" → [drugs, treatments, therapeutics, interventions]
|
| 171 |
-
- **Evidence type:** "show promise" → [clinical trials, efficacy, RCT]
|
| 172 |
-
|
| 173 |
-
**Root Cause:** No preprocessing pipeline between user input and search execution.
|
| 174 |
-
|
| 175 |
-
**Fix Options:**
|
| 176 |
-
1. **Add entity extraction** - Use BioBERT or PubMedBERT for medical NER
|
| 177 |
-
2. **Add query expansion** - Use medical ontologies (UMLS, MeSH)
|
| 178 |
-
3. **LLM preprocessing** - Use LLM to generate search strategy before searching
|
| 179 |
-
|
| 180 |
-
---
|
| 181 |
-
|
| 182 |
-
## P0-006: ClinicalTrials.gov Results Not Filtered
|
| 183 |
-
|
| 184 |
-
**File:** `src/tools/clinicaltrials.py`
|
| 185 |
-
|
| 186 |
-
**The Problem:**
|
| 187 |
-
ClinicalTrials.gov returns ALL matching trials including:
|
| 188 |
-
- Withdrawn trials
|
| 189 |
-
- Terminated trials
|
| 190 |
-
- Not yet recruiting
|
| 191 |
-
- Observational studies (not interventional)
|
| 192 |
-
|
| 193 |
-
For drug repurposing, you want:
|
| 194 |
-
- Interventional studies
|
| 195 |
-
- Phase 2+ (has safety/efficacy data)
|
| 196 |
-
- Completed or with results
|
| 197 |
-
|
| 198 |
-
**Root Cause:** No filtering of trial metadata.
|
| 199 |
-
|
| 200 |
-
---
|
| 201 |
-
|
| 202 |
-
## Summary: Why This App Produces Garbage
|
| 203 |
-
|
| 204 |
-
```
|
| 205 |
-
User Query: "What medications show promise for Long COVID?"
|
| 206 |
-
│
|
| 207 |
-
▼
|
| 208 |
-
┌─────────────────────────────────────────────────────────────┐
|
| 209 |
-
│ NO QUERY PREPROCESSING │
|
| 210 |
-
│ - No entity extraction │
|
| 211 |
-
│ - No synonym expansion │
|
| 212 |
-
│ - No medical term normalization │
|
| 213 |
-
└─────────────────────────────────────────────────────────────┘
|
| 214 |
-
│
|
| 215 |
-
▼
|
| 216 |
-
┌─────────────────────────────────────────────────────────────┐
|
| 217 |
-
│ BROKEN SEARCH LAYER │
|
| 218 |
-
│ - PubMed: Raw query, no MeSH, gets 1 result │
|
| 219 |
-
│ - BioRxiv: Returns random papers (API doesn't support search)│
|
| 220 |
-
│ - ClinicalTrials: Returns all trials, no filtering │
|
| 221 |
-
└─────────────────────────────────────────────────────────────┘
|
| 222 |
-
│
|
| 223 |
-
▼
|
| 224 |
-
┌─────────────────────────────────────────────────────────────┐
|
| 225 |
-
│ GARBAGE EVIDENCE │
|
| 226 |
-
│ - 6 papers, most irrelevant │
|
| 227 |
-
│ - "Calf muscle adaptations" (mentions COVID once) │
|
| 228 |
-
│ - "Ophthalmologist work-life balance" │
|
| 229 |
-
└─────────────────────────────────────────────────────────────┘
|
| 230 |
-
│
|
| 231 |
-
▼
|
| 232 |
-
┌─────────────────────────────────────────────────────────────┐
|
| 233 |
-
│ DUMB JUDGE (Free Tier) │
|
| 234 |
-
│ - Llama 8B can't identify drugs from garbage │
|
| 235 |
-
│ - JSON parsing fails │
|
| 236 |
-
│ - Falls back to "Drug identification requires AI analysis" │
|
| 237 |
-
└─────────────────────────────────────────────────────────────┘
|
| 238 |
-
│
|
| 239 |
-
▼
|
| 240 |
-
┌─────────────────────────────────────────────────────────────┐
|
| 241 |
-
│ LOOP HITS MAX (5 iterations) │
|
| 242 |
-
│ - Never finds enough good evidence │
|
| 243 |
-
│ - Never synthesizes anything useful │
|
| 244 |
-
└─────────────────────────────────────────────────────────────┘
|
| 245 |
-
│
|
| 246 |
-
▼
|
| 247 |
-
GARBAGE OUTPUT
|
| 248 |
-
```
|
| 249 |
-
|
| 250 |
-
---
|
| 251 |
-
|
| 252 |
-
## What Would Make This Actually Work
|
| 253 |
-
|
| 254 |
-
### Minimum Viable Fix (1-2 days)
|
| 255 |
-
|
| 256 |
-
1. **Remove BioRxiv** - It doesn't work
|
| 257 |
-
2. **Require API key** - Be honest that free tier is useless
|
| 258 |
-
3. **Add basic query preprocessing** - Strip question words, expand COVID synonyms
|
| 259 |
-
4. **Increase iterations to 10**
|
| 260 |
-
|
| 261 |
-
### Proper Fix (1-2 weeks)
|
| 262 |
-
|
| 263 |
-
1. **Query Understanding Layer**
|
| 264 |
-
- Medical NER (BioBERT/SciBERT)
|
| 265 |
-
- Query expansion with MeSH/UMLS
|
| 266 |
-
- Intent classification (drug discovery vs mechanism vs safety)
|
| 267 |
-
|
| 268 |
-
2. **Optimized Search**
|
| 269 |
-
- PubMed: Proper query syntax with MeSH terms
|
| 270 |
-
- ClinicalTrials: Filter by phase, status, intervention type
|
| 271 |
-
- Replace BioRxiv with Europe PMC (has preprints + search)
|
| 272 |
-
|
| 273 |
-
3. **Evidence Ranking**
|
| 274 |
-
- Score by publication type (RCT > cohort > case report)
|
| 275 |
-
- Score by journal impact factor
|
| 276 |
-
- Score by recency
|
| 277 |
-
- Score by citation count
|
| 278 |
-
|
| 279 |
-
4. **Proper LLM Pipeline**
|
| 280 |
-
- Use GPT-4 / Claude for synthesis
|
| 281 |
-
- Structured extraction of: drug, mechanism, evidence level, effect size
|
| 282 |
-
- Multi-step reasoning: identify → validate → rank → synthesize
|
| 283 |
-
|
| 284 |
-
---
|
| 285 |
-
|
| 286 |
-
## The Hard Truth
|
| 287 |
-
|
| 288 |
-
Building a drug repurposing agent that works is HARD. The state of the art is:
|
| 289 |
-
|
| 290 |
-
- **Drug2Disease (IBM)** - Uses knowledge graphs + ML
|
| 291 |
-
- **COVID-KG (Stanford)** - Dedicated COVID knowledge graph
|
| 292 |
-
- **Literature Mining at scale (PubMed)** - Millions of papers, not 10
|
| 293 |
-
|
| 294 |
-
This hackathon project is fundamentally a **search wrapper with an LLM prompt**. That's not enough.
|
| 295 |
-
|
| 296 |
-
To make it useful:
|
| 297 |
-
1. Either scope it down (e.g., "find clinical trials for X disease")
|
| 298 |
-
2. Or invest serious engineering in the NLU + search + ranking pipeline
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docs/bugs/P0_MAGENTIC_AND_SEARCH_AUDIT.md
DELETED
|
@@ -1,249 +0,0 @@
|
|
| 1 |
-
# P0 Audit: Microsoft Agent Framework (Magentic) & Search Tools
|
| 2 |
-
|
| 3 |
-
**Date:** November 27, 2025
|
| 4 |
-
**Auditor:** Claude Code
|
| 5 |
-
**Status:** VERIFIED
|
| 6 |
-
|
| 7 |
-
---
|
| 8 |
-
|
| 9 |
-
## TL;DR
|
| 10 |
-
|
| 11 |
-
| Component | Status | Verdict |
|
| 12 |
-
|-----------|--------|---------|
|
| 13 |
-
| Microsoft Agent Framework | ✅ WORKING | Correctly wired, no bugs |
|
| 14 |
-
| GPT-5.1 Model Config | ✅ CORRECT | Using `gpt-5.1` as configured |
|
| 15 |
-
| Search Tools | ❌ BROKEN | Root cause of garbage results |
|
| 16 |
-
|
| 17 |
-
**The orchestration framework is fine. The search layer is garbage.**
|
| 18 |
-
|
| 19 |
-
---
|
| 20 |
-
|
| 21 |
-
## Microsoft Agent Framework Verification
|
| 22 |
-
|
| 23 |
-
### Import Test: PASSED
|
| 24 |
-
```python
|
| 25 |
-
from agent_framework import MagenticBuilder, ChatAgent
|
| 26 |
-
from agent_framework.openai import OpenAIChatClient
|
| 27 |
-
# All imports successful
|
| 28 |
-
```
|
| 29 |
-
|
| 30 |
-
### Agent Creation Test: PASSED
|
| 31 |
-
```python
|
| 32 |
-
from src.agents.magentic_agents import create_search_agent
|
| 33 |
-
search_agent = create_search_agent()
|
| 34 |
-
# SearchAgent created: SearchAgent
|
| 35 |
-
# Description: Searches biomedical databases (PubMed, ClinicalTrials.gov, bioRxiv)
|
| 36 |
-
```
|
| 37 |
-
|
| 38 |
-
### Workflow Build Test: PASSED
|
| 39 |
-
```python
|
| 40 |
-
from src.orchestrator_magentic import MagenticOrchestrator
|
| 41 |
-
orchestrator = MagenticOrchestrator(max_rounds=2)
|
| 42 |
-
workflow = orchestrator._build_workflow()
|
| 43 |
-
# Workflow built successfully: <class 'agent_framework._workflows._workflow.Workflow'>
|
| 44 |
-
```
|
| 45 |
-
|
| 46 |
-
### Model Configuration: CORRECT
|
| 47 |
-
```python
|
| 48 |
-
settings.openai_model = "gpt-5.1" # ✅ Using GPT-5.1, not GPT-4o
|
| 49 |
-
settings.openai_api_key = True # ✅ API key is set
|
| 50 |
-
```
|
| 51 |
-
|
| 52 |
-
---
|
| 53 |
-
|
| 54 |
-
## What Magentic Provides (Working)
|
| 55 |
-
|
| 56 |
-
1. **Multi-Agent Coordination**
|
| 57 |
-
- Manager agent orchestrates SearchAgent, JudgeAgent, HypothesisAgent, ReportAgent
|
| 58 |
-
- Uses `MagenticBuilder().with_standard_manager()` for coordination
|
| 59 |
-
|
| 60 |
-
2. **ChatAgent Pattern**
|
| 61 |
-
- Each agent has internal LLM (GPT-5.1)
|
| 62 |
-
- Can call tools via `@ai_function` decorator
|
| 63 |
-
- Has proper instructions for domain-specific tasks
|
| 64 |
-
|
| 65 |
-
3. **Workflow Streaming**
|
| 66 |
-
- Events: `MagenticAgentMessageEvent`, `MagenticFinalResultEvent`, etc.
|
| 67 |
-
- Real-time UI updates via `workflow.run_stream(task)`
|
| 68 |
-
|
| 69 |
-
4. **State Management**
|
| 70 |
-
- `MagenticState` persists evidence across agents
|
| 71 |
-
- `get_bibliography()` tool for ReportAgent
|
| 72 |
-
|
| 73 |
-
---
|
| 74 |
-
|
| 75 |
-
## What's Actually Broken: The Search Tools
|
| 76 |
-
|
| 77 |
-
### File: `src/agents/tools.py`
|
| 78 |
-
|
| 79 |
-
The Magentic agents call these tools:
|
| 80 |
-
- `search_pubmed` → Uses `PubMedTool`
|
| 81 |
-
- `search_clinical_trials` → Uses `ClinicalTrialsTool`
|
| 82 |
-
- `search_preprints` → Uses `BioRxivTool`
|
| 83 |
-
|
| 84 |
-
**These tools are the problem, not the framework.**
|
| 85 |
-
|
| 86 |
-
---
|
| 87 |
-
|
| 88 |
-
## Search Tool Bugs (Detailed)
|
| 89 |
-
|
| 90 |
-
### BUG 1: BioRxiv API Does Not Support Search
|
| 91 |
-
|
| 92 |
-
**File:** `src/tools/biorxiv.py:248-286`
|
| 93 |
-
|
| 94 |
-
```python
|
| 95 |
-
# This fetches the FIRST 100 papers from the last 90 days
|
| 96 |
-
# It does NOT search by keyword - the API doesn't support that
|
| 97 |
-
url = f"{self.BASE_URL}/{self.server}/{interval}/0/json"
|
| 98 |
-
|
| 99 |
-
# Then filters client-side for keywords
|
| 100 |
-
matching = self._filter_by_keywords(papers, query_terms, max_results)
|
| 101 |
-
```
|
| 102 |
-
|
| 103 |
-
**Problem:**
|
| 104 |
-
- Fetches 100 random chronological papers
|
| 105 |
-
- Filters for ANY keyword match in title/abstract
|
| 106 |
-
- "Long COVID medications" returns papers about "calf muscles" because they mention "COVID" once
|
| 107 |
-
|
| 108 |
-
**Fix:** Remove BioRxiv or use Europe PMC (which has actual search)
|
| 109 |
-
|
| 110 |
-
---
|
| 111 |
-
|
| 112 |
-
### BUG 2: PubMed Query Not Optimized
|
| 113 |
-
|
| 114 |
-
**File:** `src/tools/pubmed.py:54-71`
|
| 115 |
-
|
| 116 |
-
```python
|
| 117 |
-
search_params = self._build_params(
|
| 118 |
-
db="pubmed",
|
| 119 |
-
term=query, # RAW USER QUERY - no preprocessing!
|
| 120 |
-
retmax=max_results,
|
| 121 |
-
sort="relevance",
|
| 122 |
-
)
|
| 123 |
-
```
|
| 124 |
-
|
| 125 |
-
**Problem:**
|
| 126 |
-
- User enters: "What medications show promise for Long COVID?"
|
| 127 |
-
- PubMed receives: `What medications show promise for Long COVID?`
|
| 128 |
-
- Should receive: `("long covid"[Title/Abstract] OR "PASC"[Title/Abstract]) AND (treatment[Title/Abstract] OR drug[Title/Abstract])`
|
| 129 |
-
|
| 130 |
-
**Fix:** Add query preprocessing:
|
| 131 |
-
1. Strip question words (what, which, how, etc.)
|
| 132 |
-
2. Expand medical synonyms (Long COVID → PASC, Post-COVID)
|
| 133 |
-
3. Use MeSH terms for better recall
|
| 134 |
-
|
| 135 |
-
---
|
| 136 |
-
|
| 137 |
-
### BUG 3: ClinicalTrials.gov No Filtering
|
| 138 |
-
|
| 139 |
-
**File:** `src/tools/clinicaltrials.py`
|
| 140 |
-
|
| 141 |
-
Returns ALL trials including:
|
| 142 |
-
- Withdrawn trials
|
| 143 |
-
- Terminated trials
|
| 144 |
-
- Observational studies (not drug interventions)
|
| 145 |
-
- Phase 1 (no efficacy data)
|
| 146 |
-
|
| 147 |
-
**Fix:** Filter by:
|
| 148 |
-
- `studyType=INTERVENTIONAL`
|
| 149 |
-
- `phase=PHASE2,PHASE3,PHASE4`
|
| 150 |
-
- `status=COMPLETED,ACTIVE_NOT_RECRUITING,RECRUITING`
|
| 151 |
-
|
| 152 |
-
---
|
| 153 |
-
|
| 154 |
-
## Evidence: Garbage In → Garbage Out
|
| 155 |
-
|
| 156 |
-
When the Magentic SearchAgent calls these tools:
|
| 157 |
-
|
| 158 |
-
```
|
| 159 |
-
SearchAgent: "Find evidence for Long COVID medications"
|
| 160 |
-
│
|
| 161 |
-
▼
|
| 162 |
-
search_pubmed("Long COVID medications")
|
| 163 |
-
→ Returns 1 semi-relevant paper (raw query hits)
|
| 164 |
-
|
| 165 |
-
search_preprints("Long COVID medications")
|
| 166 |
-
→ Returns garbage (BioRxiv API doesn't search)
|
| 167 |
-
→ "Calf muscle adaptations" (has "COVID" somewhere)
|
| 168 |
-
→ "Ophthalmologist work-life balance" (mentions COVID)
|
| 169 |
-
|
| 170 |
-
search_clinical_trials("Long COVID medications")
|
| 171 |
-
→ Returns all trials, no filtering
|
| 172 |
-
│
|
| 173 |
-
▼
|
| 174 |
-
JudgeAgent receives garbage evidence
|
| 175 |
-
│
|
| 176 |
-
▼
|
| 177 |
-
HypothesisAgent can't generate good hypotheses from garbage
|
| 178 |
-
│
|
| 179 |
-
▼
|
| 180 |
-
ReportAgent produces garbage report
|
| 181 |
-
```
|
| 182 |
-
|
| 183 |
-
**The framework is doing its job. It's orchestrating agents correctly. But the agents are being fed garbage data.**
|
| 184 |
-
|
| 185 |
-
---
|
| 186 |
-
|
| 187 |
-
## Recommended Fixes
|
| 188 |
-
|
| 189 |
-
### Priority 1: Delete or Fix BioRxiv (30 min)
|
| 190 |
-
|
| 191 |
-
**Option A: Delete it**
|
| 192 |
-
```python
|
| 193 |
-
# In src/agents/tools.py, remove:
|
| 194 |
-
# from src.tools.biorxiv import BioRxivTool
|
| 195 |
-
# _biorxiv = BioRxivTool()
|
| 196 |
-
# @ai_function search_preprints(...)
|
| 197 |
-
```
|
| 198 |
-
|
| 199 |
-
**Option B: Replace with Europe PMC**
|
| 200 |
-
Europe PMC has preprints AND proper search API:
|
| 201 |
-
```
|
| 202 |
-
https://www.ebi.ac.uk/europepmc/webservices/rest/search?query=long+covid+treatment&format=json
|
| 203 |
-
```
|
| 204 |
-
|
| 205 |
-
### Priority 2: Fix PubMed Query (1 hour)
|
| 206 |
-
|
| 207 |
-
Add query preprocessor:
|
| 208 |
-
```python
|
| 209 |
-
def preprocess_query(raw_query: str) -> str:
|
| 210 |
-
"""Convert natural language to PubMed query syntax."""
|
| 211 |
-
# Strip question words
|
| 212 |
-
# Expand medical synonyms
|
| 213 |
-
# Add field tags [Title/Abstract]
|
| 214 |
-
# Return optimized query
|
| 215 |
-
```
|
| 216 |
-
|
| 217 |
-
### Priority 3: Filter ClinicalTrials (30 min)
|
| 218 |
-
|
| 219 |
-
Add parameters to API call:
|
| 220 |
-
```python
|
| 221 |
-
params = {
|
| 222 |
-
"query.term": query,
|
| 223 |
-
"filter.overallStatus": "COMPLETED,RECRUITING",
|
| 224 |
-
"filter.studyType": "INTERVENTIONAL",
|
| 225 |
-
"pageSize": max_results,
|
| 226 |
-
}
|
| 227 |
-
```
|
| 228 |
-
|
| 229 |
-
---
|
| 230 |
-
|
| 231 |
-
## Conclusion
|
| 232 |
-
|
| 233 |
-
**Microsoft Agent Framework: NO BUGS FOUND**
|
| 234 |
-
- Imports work ✅
|
| 235 |
-
- Agent creation works ✅
|
| 236 |
-
- Workflow building works ✅
|
| 237 |
-
- Model config correct (GPT-5.1) ✅
|
| 238 |
-
- Streaming events work ✅
|
| 239 |
-
|
| 240 |
-
**Search Tools: CRITICALLY BROKEN**
|
| 241 |
-
- BioRxiv: API doesn't support search (fundamental)
|
| 242 |
-
- PubMed: No query optimization (fixable)
|
| 243 |
-
- ClinicalTrials: No filtering (fixable)
|
| 244 |
-
|
| 245 |
-
**Recommendation:**
|
| 246 |
-
1. Delete BioRxiv immediately (unusable)
|
| 247 |
-
2. Add PubMed query preprocessing
|
| 248 |
-
3. Add ClinicalTrials filtering
|
| 249 |
-
4. Then the Magentic multi-agent system will work as designed
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docs/bugs/P0_MAGENTIC_MODE_BROKEN.md
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# P0 Bug: Magentic Mode Returns ChatMessage Object Instead of Report Text
|
| 2 |
+
|
| 3 |
+
**Status**: OPEN
|
| 4 |
+
**Priority**: P0 (Critical)
|
| 5 |
+
**Date**: 2025-11-27
|
| 6 |
+
|
| 7 |
+
---
|
| 8 |
+
|
| 9 |
+
## Actual Bug Found (Not What We Thought)
|
| 10 |
+
|
| 11 |
+
**The OpenAI key works fine.** The real bug is different:
|
| 12 |
+
|
| 13 |
+
### The Problem
|
| 14 |
+
|
| 15 |
+
When Magentic mode completes, the final report returns a `ChatMessage` object instead of the actual text:
|
| 16 |
+
|
| 17 |
+
```
|
| 18 |
+
FINAL REPORT:
|
| 19 |
+
<agent_framework._types.ChatMessage object at 0x11db70310>
|
| 20 |
+
```
|
| 21 |
+
|
| 22 |
+
### Evidence
|
| 23 |
+
|
| 24 |
+
Full test output shows:
|
| 25 |
+
1. Magentic orchestrator starts correctly
|
| 26 |
+
2. SearchAgent finds evidence
|
| 27 |
+
3. HypothesisAgent generates hypotheses
|
| 28 |
+
4. JudgeAgent evaluates
|
| 29 |
+
5. **BUT**: Final output is `ChatMessage` object, not text
|
| 30 |
+
|
| 31 |
+
### Root Cause
|
| 32 |
+
|
| 33 |
+
In `src/orchestrator_magentic.py` line 193:
|
| 34 |
+
|
| 35 |
+
```python
|
| 36 |
+
elif isinstance(event, MagenticFinalResultEvent):
|
| 37 |
+
text = event.message.text if event.message else "No result"
|
| 38 |
+
```
|
| 39 |
+
|
| 40 |
+
The `event.message` is a `ChatMessage` object, and `.text` may not extract the content correctly, or the message structure changed in the agent-framework library.
|
| 41 |
+
|
| 42 |
+
---
|
| 43 |
+
|
| 44 |
+
## Secondary Issue: Max Rounds Reached
|
| 45 |
+
|
| 46 |
+
The orchestrator hits max rounds before producing a report:
|
| 47 |
+
|
| 48 |
+
```
|
| 49 |
+
[ERROR] Magentic Orchestrator: Max round count reached
|
| 50 |
+
```
|
| 51 |
+
|
| 52 |
+
This means the workflow times out before the ReportAgent synthesizes the final output.
|
| 53 |
+
|
| 54 |
+
---
|
| 55 |
+
|
| 56 |
+
## What Works
|
| 57 |
+
|
| 58 |
+
- OpenAI API key: **Works** (loaded from .env)
|
| 59 |
+
- SearchAgent: **Works** (finds evidence from PubMed, ClinicalTrials, Europe PMC)
|
| 60 |
+
- HypothesisAgent: **Works** (generates Drug -> Target -> Pathway chains)
|
| 61 |
+
- JudgeAgent: **Partial** (evaluates but sometimes loses context)
|
| 62 |
+
|
| 63 |
+
---
|
| 64 |
+
|
| 65 |
+
## Files to Fix
|
| 66 |
+
|
| 67 |
+
| File | Line | Issue |
|
| 68 |
+
|------|------|-------|
|
| 69 |
+
| `src/orchestrator_magentic.py` | 193 | `event.message.text` returns object, not string |
|
| 70 |
+
| `src/orchestrator_magentic.py` | 97-99 | `max_round_count=3` too low for full pipeline |
|
| 71 |
+
|
| 72 |
+
---
|
| 73 |
+
|
| 74 |
+
## Suggested Fix
|
| 75 |
+
|
| 76 |
+
```python
|
| 77 |
+
# In _process_event, line 192-199
|
| 78 |
+
elif isinstance(event, MagenticFinalResultEvent):
|
| 79 |
+
# Handle ChatMessage object properly
|
| 80 |
+
if event.message:
|
| 81 |
+
if hasattr(event.message, 'content'):
|
| 82 |
+
text = event.message.content
|
| 83 |
+
elif hasattr(event.message, 'text'):
|
| 84 |
+
text = event.message.text
|
| 85 |
+
else:
|
| 86 |
+
text = str(event.message)
|
| 87 |
+
else:
|
| 88 |
+
text = "No result"
|
| 89 |
+
```
|
| 90 |
+
|
| 91 |
+
And increase rounds:
|
| 92 |
+
|
| 93 |
+
```python
|
| 94 |
+
# In _build_workflow, line 97
|
| 95 |
+
max_round_count=self._max_rounds, # Use configured value, default 10
|
| 96 |
+
```
|
| 97 |
+
|
| 98 |
+
---
|
| 99 |
+
|
| 100 |
+
## Test Command
|
| 101 |
+
|
| 102 |
+
```bash
|
| 103 |
+
set -a && source .env && set +a && uv run python examples/orchestrator_demo/run_magentic.py "metformin alzheimer"
|
| 104 |
+
```
|
| 105 |
+
|
| 106 |
+
---
|
| 107 |
+
|
| 108 |
+
## Simple Mode Works
|
| 109 |
+
|
| 110 |
+
For reference, simple mode produces full reports:
|
| 111 |
+
|
| 112 |
+
```bash
|
| 113 |
+
uv run python examples/orchestrator_demo/run_agent.py "metformin alzheimer"
|
| 114 |
+
```
|
| 115 |
+
|
| 116 |
+
Output includes structured report with Drug Candidates, Key Findings, etc.
|
docs/bugs/P1_GRADIO_SETTINGS_CLEANUP.md
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# P1 Bug: Gradio Settings Accordion Not Collapsing
|
| 2 |
+
|
| 3 |
+
**Priority**: P1 (UX Bug)
|
| 4 |
+
**Status**: OPEN
|
| 5 |
+
**Date**: 2025-11-27
|
| 6 |
+
**Target Component**: `src/app.py`
|
| 7 |
+
|
| 8 |
+
---
|
| 9 |
+
|
| 10 |
+
## 1. Problem Description
|
| 11 |
+
|
| 12 |
+
The "Settings" accordion in the Gradio UI (containing Orchestrator Mode, API Key, Provider) fails to collapse, even when configured with `open=False`. It remains permanently expanded, cluttering the interface and obscuring the chat history.
|
| 13 |
+
|
| 14 |
+
### Symptoms
|
| 15 |
+
- Accordion arrow toggles visually, but content remains visible.
|
| 16 |
+
- Occurs in both local development (`uv run src/app.py`) and HuggingFace Spaces.
|
| 17 |
+
|
| 18 |
+
---
|
| 19 |
+
|
| 20 |
+
## 2. Root Cause Analysis
|
| 21 |
+
|
| 22 |
+
**Definitive Cause**: Nested `Blocks` Context Bug.
|
| 23 |
+
`gr.ChatInterface` is itself a high-level abstraction that creates a `gr.Blocks` context. Wrapping `gr.ChatInterface` inside an external `with gr.Blocks():` context causes event listener conflicts, specifically breaking the JavaScript state management for `additional_inputs_accordion`.
|
| 24 |
+
|
| 25 |
+
**Reference**: [Gradio Issue #8861](https://github.com/gradio-app/gradio/issues/8861) confirms that `additional_inputs_accordion` malfunctions when `ChatInterface` is not the top-level block.
|
| 26 |
+
|
| 27 |
+
---
|
| 28 |
+
|
| 29 |
+
## 3. Solution Strategy: "The Unwrap Fix"
|
| 30 |
+
|
| 31 |
+
We will remove the redundant `gr.Blocks` wrapper. This restores the native behavior of `ChatInterface`, ensuring the accordion respects `open=False`.
|
| 32 |
+
|
| 33 |
+
### Implementation Plan
|
| 34 |
+
|
| 35 |
+
**Refactor `src/app.py` / `create_demo()`**:
|
| 36 |
+
|
| 37 |
+
1. **Remove** the `with gr.Blocks() as demo:` context manager.
|
| 38 |
+
2. **Instantiate** `gr.ChatInterface` directly as the `demo` object.
|
| 39 |
+
3. **Migrate UI Elements**:
|
| 40 |
+
* **Header**: Move the H1/Title text into the `title` parameter of `ChatInterface`.
|
| 41 |
+
* **Footer**: Move the footer text ("MCP Server Active...") into the `description` parameter. `ChatInterface` supports Markdown in `description`, making it the ideal place for static info below the title but above the chat.
|
| 42 |
+
|
| 43 |
+
### Before (Buggy)
|
| 44 |
+
```python
|
| 45 |
+
def create_demo():
|
| 46 |
+
with gr.Blocks() as demo: # <--- CAUSE OF BUG
|
| 47 |
+
gr.Markdown("# Title")
|
| 48 |
+
gr.ChatInterface(..., additional_inputs_accordion=gr.Accordion(open=False))
|
| 49 |
+
gr.Markdown("Footer")
|
| 50 |
+
return demo
|
| 51 |
+
```
|
| 52 |
+
|
| 53 |
+
### After (Correct)
|
| 54 |
+
```python
|
| 55 |
+
def create_demo():
|
| 56 |
+
return gr.ChatInterface( # <--- FIX: Top-level component
|
| 57 |
+
...,
|
| 58 |
+
title="🧬 DeepCritical",
|
| 59 |
+
description="*AI-Powered Drug Repurposing Agent...*\n\n---\n**MCP Server Active**...",
|
| 60 |
+
additional_inputs_accordion=gr.Accordion(label="⚙️ Settings", open=False)
|
| 61 |
+
)
|
| 62 |
+
```
|
| 63 |
+
|
| 64 |
+
---
|
| 65 |
+
|
| 66 |
+
## 4. Validation
|
| 67 |
+
|
| 68 |
+
1. **Run**: `uv run python src/app.py`
|
| 69 |
+
2. **Check**: Open `http://localhost:7860`
|
| 70 |
+
3. **Verify**:
|
| 71 |
+
* Settings accordion starts **COLLAPSED**.
|
| 72 |
+
* Header title ("DeepCritical") is visible.
|
| 73 |
+
* Footer text ("MCP Server Active") is visible in the description area.
|
| 74 |
+
* Chat functionality works (Magentic/Simple modes).
|
| 75 |
+
|
| 76 |
+
---
|
| 77 |
+
|
| 78 |
+
## 5. Constraints & Notes
|
| 79 |
+
|
| 80 |
+
- **Layout**: We lose the ability to place arbitrary elements *below* the chat box (footer will move to top, under title), but this is an acceptable trade-off for a working UI.
|
| 81 |
+
- **CSS**: `ChatInterface` handles its own CSS; any custom class styling from the previous footer will be standardized to the description text style.
|
docs/bugs/PHASE_00_IMPLEMENTATION_ORDER.md
DELETED
|
@@ -1,156 +0,0 @@
|
|
| 1 |
-
# Phase 00: Implementation Order & Summary
|
| 2 |
-
|
| 3 |
-
**Total Effort:** 5-8 hours
|
| 4 |
-
**Parallelizable:** Yes (all 3 phases are independent)
|
| 5 |
-
|
| 6 |
-
---
|
| 7 |
-
|
| 8 |
-
## Executive Summary
|
| 9 |
-
|
| 10 |
-
The DeepCritical drug repurposing agent produces garbage results because the search tools are broken:
|
| 11 |
-
|
| 12 |
-
| Tool | Problem | Fix |
|
| 13 |
-
|------|---------|-----|
|
| 14 |
-
| BioRxiv | API doesn't support search | Replace with Europe PMC |
|
| 15 |
-
| PubMed | Raw queries, no preprocessing | Add query cleaner |
|
| 16 |
-
| ClinicalTrials | No filtering | Add status/type filters |
|
| 17 |
-
|
| 18 |
-
**The Microsoft Agent Framework (Magentic) is working correctly.** The orchestration layer is fine. The data layer is broken.
|
| 19 |
-
|
| 20 |
-
---
|
| 21 |
-
|
| 22 |
-
## Phase Specs
|
| 23 |
-
|
| 24 |
-
| Phase | Title | Effort | Priority | Dependencies |
|
| 25 |
-
|-------|-------|--------|----------|--------------|
|
| 26 |
-
| **01** | [Replace BioRxiv with Europe PMC](./PHASE_01_REPLACE_BIORXIV.md) | 2-3 hrs | P0 | None |
|
| 27 |
-
| **02** | [PubMed Query Preprocessing](./PHASE_02_PUBMED_QUERY_PREPROCESSING.md) | 2-3 hrs | P0 | None |
|
| 28 |
-
| **03** | [ClinicalTrials Filtering](./PHASE_03_CLINICALTRIALS_FILTERING.md) | 1-2 hrs | P1 | None |
|
| 29 |
-
|
| 30 |
-
---
|
| 31 |
-
|
| 32 |
-
## Recommended Execution Order
|
| 33 |
-
|
| 34 |
-
Since all phases are independent, they can be done in parallel by different developers.
|
| 35 |
-
|
| 36 |
-
**If doing sequentially, order by impact:**
|
| 37 |
-
|
| 38 |
-
1. **Phase 01** - BioRxiv is completely broken (returns random papers)
|
| 39 |
-
2. **Phase 02** - PubMed is partially broken (returns suboptimal results)
|
| 40 |
-
3. **Phase 03** - ClinicalTrials returns too much noise
|
| 41 |
-
|
| 42 |
-
---
|
| 43 |
-
|
| 44 |
-
## TDD Workflow (Per Phase)
|
| 45 |
-
|
| 46 |
-
```
|
| 47 |
-
1. Write failing tests
|
| 48 |
-
2. Run tests (confirm they fail)
|
| 49 |
-
3. Implement fix
|
| 50 |
-
4. Run tests (confirm they pass)
|
| 51 |
-
5. Run ALL tests (confirm no regressions)
|
| 52 |
-
6. Manual verification
|
| 53 |
-
7. Commit
|
| 54 |
-
```
|
| 55 |
-
|
| 56 |
-
---
|
| 57 |
-
|
| 58 |
-
## Verification After All Phases
|
| 59 |
-
|
| 60 |
-
After completing all 3 phases, run this integration test:
|
| 61 |
-
|
| 62 |
-
```bash
|
| 63 |
-
# Full system test
|
| 64 |
-
uv run python -c "
|
| 65 |
-
import asyncio
|
| 66 |
-
from src.tools.europepmc import EuropePMCTool
|
| 67 |
-
from src.tools.pubmed import PubMedTool
|
| 68 |
-
from src.tools.clinicaltrials import ClinicalTrialsTool
|
| 69 |
-
|
| 70 |
-
async def test_all():
|
| 71 |
-
query = 'long covid treatment'
|
| 72 |
-
|
| 73 |
-
print('=== Europe PMC (Preprints) ===')
|
| 74 |
-
epmc = EuropePMCTool()
|
| 75 |
-
results = await epmc.search(query, 2)
|
| 76 |
-
for r in results:
|
| 77 |
-
print(f' - {r.citation.title[:60]}...')
|
| 78 |
-
|
| 79 |
-
print()
|
| 80 |
-
print('=== PubMed ===')
|
| 81 |
-
pm = PubMedTool()
|
| 82 |
-
results = await pm.search(query, 2)
|
| 83 |
-
for r in results:
|
| 84 |
-
print(f' - {r.citation.title[:60]}...')
|
| 85 |
-
|
| 86 |
-
print()
|
| 87 |
-
print('=== ClinicalTrials.gov ===')
|
| 88 |
-
ct = ClinicalTrialsTool()
|
| 89 |
-
results = await ct.search(query, 2)
|
| 90 |
-
for r in results:
|
| 91 |
-
print(f' - {r.citation.title[:60]}...')
|
| 92 |
-
|
| 93 |
-
asyncio.run(test_all())
|
| 94 |
-
"
|
| 95 |
-
```
|
| 96 |
-
|
| 97 |
-
**Expected:** All results should be relevant to "long covid treatment"
|
| 98 |
-
|
| 99 |
-
---
|
| 100 |
-
|
| 101 |
-
## Test Magentic Integration
|
| 102 |
-
|
| 103 |
-
After all phases are complete, test the full Magentic workflow:
|
| 104 |
-
|
| 105 |
-
```bash
|
| 106 |
-
# Test Magentic mode (requires OPENAI_API_KEY)
|
| 107 |
-
uv run python -c "
|
| 108 |
-
import asyncio
|
| 109 |
-
from src.orchestrator_magentic import MagenticOrchestrator
|
| 110 |
-
|
| 111 |
-
async def test_magentic():
|
| 112 |
-
orchestrator = MagenticOrchestrator(max_rounds=3)
|
| 113 |
-
|
| 114 |
-
print('Running Magentic workflow...')
|
| 115 |
-
async for event in orchestrator.run('What drugs show promise for Long COVID?'):
|
| 116 |
-
print(f'[{event.type}] {event.message[:100]}...')
|
| 117 |
-
|
| 118 |
-
asyncio.run(test_magentic())
|
| 119 |
-
"
|
| 120 |
-
```
|
| 121 |
-
|
| 122 |
-
---
|
| 123 |
-
|
| 124 |
-
## Files Changed (All Phases)
|
| 125 |
-
|
| 126 |
-
| File | Phase | Action |
|
| 127 |
-
|------|-------|--------|
|
| 128 |
-
| `src/tools/europepmc.py` | 01 | CREATE |
|
| 129 |
-
| `tests/unit/tools/test_europepmc.py` | 01 | CREATE |
|
| 130 |
-
| `src/agents/tools.py` | 01 | MODIFY |
|
| 131 |
-
| `src/tools/search_handler.py` | 01 | MODIFY |
|
| 132 |
-
| `src/tools/biorxiv.py` | 01 | DELETE |
|
| 133 |
-
| `tests/unit/tools/test_biorxiv.py` | 01 | DELETE |
|
| 134 |
-
| `src/tools/query_utils.py` | 02 | CREATE |
|
| 135 |
-
| `tests/unit/tools/test_query_utils.py` | 02 | CREATE |
|
| 136 |
-
| `src/tools/pubmed.py` | 02 | MODIFY |
|
| 137 |
-
| `src/tools/clinicaltrials.py` | 03 | MODIFY |
|
| 138 |
-
| `tests/unit/tools/test_clinicaltrials.py` | 03 | MODIFY |
|
| 139 |
-
|
| 140 |
-
---
|
| 141 |
-
|
| 142 |
-
## Success Criteria (Overall)
|
| 143 |
-
|
| 144 |
-
- [ ] All unit tests pass
|
| 145 |
-
- [ ] All integration tests pass (real APIs)
|
| 146 |
-
- [ ] Query "What drugs show promise for Long COVID?" returns relevant results from all 3 sources
|
| 147 |
-
- [ ] Magentic workflow produces a coherent research report
|
| 148 |
-
- [ ] No regressions in existing functionality
|
| 149 |
-
|
| 150 |
-
---
|
| 151 |
-
|
| 152 |
-
## Related Documentation
|
| 153 |
-
|
| 154 |
-
- [P0 Critical Bugs](./P0_CRITICAL_BUGS.md) - Root cause analysis
|
| 155 |
-
- [P0 Magentic Audit](./P0_MAGENTIC_AND_SEARCH_AUDIT.md) - Framework verification
|
| 156 |
-
- [P0 Actionable Fixes](./P0_ACTIONABLE_FIXES.md) - Fix summaries
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docs/bugs/PHASE_01_REPLACE_BIORXIV.md
DELETED
|
@@ -1,371 +0,0 @@
|
|
| 1 |
-
# Phase 01: Replace BioRxiv with Europe PMC
|
| 2 |
-
|
| 3 |
-
**Priority:** P0 - Critical
|
| 4 |
-
**Effort:** 2-3 hours
|
| 5 |
-
**Dependencies:** None
|
| 6 |
-
|
| 7 |
-
---
|
| 8 |
-
|
| 9 |
-
## Problem Statement
|
| 10 |
-
|
| 11 |
-
The BioRxiv API does not support keyword search. It only returns papers by date range, resulting in completely irrelevant results for any query.
|
| 12 |
-
|
| 13 |
-
## Success Criteria
|
| 14 |
-
|
| 15 |
-
- [ ] `search_preprints("long covid treatment")` returns papers actually about Long COVID
|
| 16 |
-
- [ ] All existing tests pass
|
| 17 |
-
- [ ] New tests cover Europe PMC integration
|
| 18 |
-
|
| 19 |
-
---
|
| 20 |
-
|
| 21 |
-
## TDD Implementation Order
|
| 22 |
-
|
| 23 |
-
### Step 1: Write Failing Test
|
| 24 |
-
|
| 25 |
-
**File:** `tests/unit/tools/test_europepmc.py`
|
| 26 |
-
|
| 27 |
-
```python
|
| 28 |
-
"""Unit tests for Europe PMC tool."""
|
| 29 |
-
|
| 30 |
-
import pytest
|
| 31 |
-
from unittest.mock import AsyncMock, patch
|
| 32 |
-
|
| 33 |
-
from src.tools.europepmc import EuropePMCTool
|
| 34 |
-
from src.utils.models import Evidence
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
@pytest.mark.unit
|
| 38 |
-
class TestEuropePMCTool:
|
| 39 |
-
"""Tests for EuropePMCTool."""
|
| 40 |
-
|
| 41 |
-
@pytest.fixture
|
| 42 |
-
def tool(self):
|
| 43 |
-
return EuropePMCTool()
|
| 44 |
-
|
| 45 |
-
def test_tool_name(self, tool):
|
| 46 |
-
assert tool.name == "europepmc"
|
| 47 |
-
|
| 48 |
-
@pytest.mark.asyncio
|
| 49 |
-
async def test_search_returns_evidence(self, tool):
|
| 50 |
-
"""Test that search returns Evidence objects."""
|
| 51 |
-
mock_response = {
|
| 52 |
-
"resultList": {
|
| 53 |
-
"result": [
|
| 54 |
-
{
|
| 55 |
-
"id": "12345",
|
| 56 |
-
"title": "Long COVID Treatment Study",
|
| 57 |
-
"abstractText": "This study examines treatments for Long COVID.",
|
| 58 |
-
"doi": "10.1234/test",
|
| 59 |
-
"pubYear": "2024",
|
| 60 |
-
"source": "MED",
|
| 61 |
-
"pubTypeList": {"pubType": ["research-article"]},
|
| 62 |
-
}
|
| 63 |
-
]
|
| 64 |
-
}
|
| 65 |
-
}
|
| 66 |
-
|
| 67 |
-
with patch("httpx.AsyncClient") as mock_client:
|
| 68 |
-
mock_instance = AsyncMock()
|
| 69 |
-
mock_client.return_value.__aenter__.return_value = mock_instance
|
| 70 |
-
mock_instance.get.return_value.json.return_value = mock_response
|
| 71 |
-
mock_instance.get.return_value.raise_for_status = lambda: None
|
| 72 |
-
|
| 73 |
-
results = await tool.search("long covid treatment", max_results=5)
|
| 74 |
-
|
| 75 |
-
assert len(results) == 1
|
| 76 |
-
assert isinstance(results[0], Evidence)
|
| 77 |
-
assert "Long COVID Treatment Study" in results[0].citation.title
|
| 78 |
-
|
| 79 |
-
@pytest.mark.asyncio
|
| 80 |
-
async def test_search_marks_preprints(self, tool):
|
| 81 |
-
"""Test that preprints are marked correctly."""
|
| 82 |
-
mock_response = {
|
| 83 |
-
"resultList": {
|
| 84 |
-
"result": [
|
| 85 |
-
{
|
| 86 |
-
"id": "PPR12345",
|
| 87 |
-
"title": "Preprint Study",
|
| 88 |
-
"abstractText": "Abstract text",
|
| 89 |
-
"doi": "10.1234/preprint",
|
| 90 |
-
"pubYear": "2024",
|
| 91 |
-
"source": "PPR",
|
| 92 |
-
"pubTypeList": {"pubType": ["Preprint"]},
|
| 93 |
-
}
|
| 94 |
-
]
|
| 95 |
-
}
|
| 96 |
-
}
|
| 97 |
-
|
| 98 |
-
with patch("httpx.AsyncClient") as mock_client:
|
| 99 |
-
mock_instance = AsyncMock()
|
| 100 |
-
mock_client.return_value.__aenter__.return_value = mock_instance
|
| 101 |
-
mock_instance.get.return_value.json.return_value = mock_response
|
| 102 |
-
mock_instance.get.return_value.raise_for_status = lambda: None
|
| 103 |
-
|
| 104 |
-
results = await tool.search("test", max_results=5)
|
| 105 |
-
|
| 106 |
-
assert "[PREPRINT]" in results[0].content
|
| 107 |
-
assert results[0].citation.source == "preprint"
|
| 108 |
-
|
| 109 |
-
@pytest.mark.asyncio
|
| 110 |
-
async def test_search_empty_results(self, tool):
|
| 111 |
-
"""Test handling of empty results."""
|
| 112 |
-
mock_response = {"resultList": {"result": []}}
|
| 113 |
-
|
| 114 |
-
with patch("httpx.AsyncClient") as mock_client:
|
| 115 |
-
mock_instance = AsyncMock()
|
| 116 |
-
mock_client.return_value.__aenter__.return_value = mock_instance
|
| 117 |
-
mock_instance.get.return_value.json.return_value = mock_response
|
| 118 |
-
mock_instance.get.return_value.raise_for_status = lambda: None
|
| 119 |
-
|
| 120 |
-
results = await tool.search("nonexistent query xyz", max_results=5)
|
| 121 |
-
|
| 122 |
-
assert results == []
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
@pytest.mark.integration
|
| 126 |
-
class TestEuropePMCIntegration:
|
| 127 |
-
"""Integration tests with real API."""
|
| 128 |
-
|
| 129 |
-
@pytest.mark.asyncio
|
| 130 |
-
async def test_real_api_call(self):
|
| 131 |
-
"""Test actual API returns relevant results."""
|
| 132 |
-
tool = EuropePMCTool()
|
| 133 |
-
results = await tool.search("long covid treatment", max_results=3)
|
| 134 |
-
|
| 135 |
-
assert len(results) > 0
|
| 136 |
-
# At least one result should mention COVID
|
| 137 |
-
titles = " ".join([r.citation.title.lower() for r in results])
|
| 138 |
-
assert "covid" in titles or "sars" in titles
|
| 139 |
-
```
|
| 140 |
-
|
| 141 |
-
### Step 2: Implement Europe PMC Tool
|
| 142 |
-
|
| 143 |
-
**File:** `src/tools/europepmc.py`
|
| 144 |
-
|
| 145 |
-
```python
|
| 146 |
-
"""Europe PMC search tool - replaces BioRxiv."""
|
| 147 |
-
|
| 148 |
-
from typing import Any
|
| 149 |
-
|
| 150 |
-
import httpx
|
| 151 |
-
from tenacity import retry, stop_after_attempt, wait_exponential
|
| 152 |
-
|
| 153 |
-
from src.utils.exceptions import SearchError
|
| 154 |
-
from src.utils.models import Citation, Evidence
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
class EuropePMCTool:
|
| 158 |
-
"""
|
| 159 |
-
Search Europe PMC for papers and preprints.
|
| 160 |
-
|
| 161 |
-
Europe PMC indexes:
|
| 162 |
-
- PubMed/MEDLINE articles
|
| 163 |
-
- PMC full-text articles
|
| 164 |
-
- Preprints from bioRxiv, medRxiv, ChemRxiv, etc.
|
| 165 |
-
- Patents and clinical guidelines
|
| 166 |
-
|
| 167 |
-
API Docs: https://europepmc.org/RestfulWebService
|
| 168 |
-
"""
|
| 169 |
-
|
| 170 |
-
BASE_URL = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"
|
| 171 |
-
|
| 172 |
-
@property
|
| 173 |
-
def name(self) -> str:
|
| 174 |
-
return "europepmc"
|
| 175 |
-
|
| 176 |
-
@retry(
|
| 177 |
-
stop=stop_after_attempt(3),
|
| 178 |
-
wait=wait_exponential(multiplier=1, min=1, max=10),
|
| 179 |
-
reraise=True,
|
| 180 |
-
)
|
| 181 |
-
async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
|
| 182 |
-
"""
|
| 183 |
-
Search Europe PMC for papers matching query.
|
| 184 |
-
|
| 185 |
-
Args:
|
| 186 |
-
query: Search keywords
|
| 187 |
-
max_results: Maximum results to return
|
| 188 |
-
|
| 189 |
-
Returns:
|
| 190 |
-
List of Evidence objects
|
| 191 |
-
"""
|
| 192 |
-
params = {
|
| 193 |
-
"query": query,
|
| 194 |
-
"resultType": "core",
|
| 195 |
-
"pageSize": min(max_results, 100),
|
| 196 |
-
"format": "json",
|
| 197 |
-
}
|
| 198 |
-
|
| 199 |
-
async with httpx.AsyncClient(timeout=30.0) as client:
|
| 200 |
-
try:
|
| 201 |
-
response = await client.get(self.BASE_URL, params=params)
|
| 202 |
-
response.raise_for_status()
|
| 203 |
-
|
| 204 |
-
data = response.json()
|
| 205 |
-
results = data.get("resultList", {}).get("result", [])
|
| 206 |
-
|
| 207 |
-
return [self._to_evidence(r) for r in results[:max_results]]
|
| 208 |
-
|
| 209 |
-
except httpx.HTTPStatusError as e:
|
| 210 |
-
raise SearchError(f"Europe PMC API error: {e}") from e
|
| 211 |
-
except httpx.RequestError as e:
|
| 212 |
-
raise SearchError(f"Europe PMC connection failed: {e}") from e
|
| 213 |
-
|
| 214 |
-
def _to_evidence(self, result: dict[str, Any]) -> Evidence:
|
| 215 |
-
"""Convert Europe PMC result to Evidence."""
|
| 216 |
-
title = result.get("title", "Untitled")
|
| 217 |
-
abstract = result.get("abstractText", "No abstract available.")
|
| 218 |
-
doi = result.get("doi", "")
|
| 219 |
-
pub_year = result.get("pubYear", "Unknown")
|
| 220 |
-
|
| 221 |
-
# Get authors
|
| 222 |
-
author_list = result.get("authorList", {}).get("author", [])
|
| 223 |
-
authors = [a.get("fullName", "") for a in author_list[:5] if a.get("fullName")]
|
| 224 |
-
|
| 225 |
-
# Check if preprint
|
| 226 |
-
pub_types = result.get("pubTypeList", {}).get("pubType", [])
|
| 227 |
-
is_preprint = "Preprint" in pub_types
|
| 228 |
-
source_db = result.get("source", "europepmc")
|
| 229 |
-
|
| 230 |
-
# Build content
|
| 231 |
-
preprint_marker = "[PREPRINT - Not peer-reviewed] " if is_preprint else ""
|
| 232 |
-
content = f"{preprint_marker}{abstract[:1800]}"
|
| 233 |
-
|
| 234 |
-
# Build URL
|
| 235 |
-
if doi:
|
| 236 |
-
url = f"https://doi.org/{doi}"
|
| 237 |
-
elif result.get("pmid"):
|
| 238 |
-
url = f"https://pubmed.ncbi.nlm.nih.gov/{result['pmid']}/"
|
| 239 |
-
else:
|
| 240 |
-
url = f"https://europepmc.org/article/{source_db}/{result.get('id', '')}"
|
| 241 |
-
|
| 242 |
-
return Evidence(
|
| 243 |
-
content=content[:2000],
|
| 244 |
-
citation=Citation(
|
| 245 |
-
source="preprint" if is_preprint else "europepmc",
|
| 246 |
-
title=title[:500],
|
| 247 |
-
url=url,
|
| 248 |
-
date=str(pub_year),
|
| 249 |
-
authors=authors,
|
| 250 |
-
),
|
| 251 |
-
relevance=0.75 if is_preprint else 0.9,
|
| 252 |
-
)
|
| 253 |
-
```
|
| 254 |
-
|
| 255 |
-
### Step 3: Update Magentic Tools
|
| 256 |
-
|
| 257 |
-
**File:** `src/agents/tools.py` - Replace biorxiv import:
|
| 258 |
-
|
| 259 |
-
```python
|
| 260 |
-
# REMOVE:
|
| 261 |
-
# from src.tools.biorxiv import BioRxivTool
|
| 262 |
-
# _biorxiv = BioRxivTool()
|
| 263 |
-
|
| 264 |
-
# ADD:
|
| 265 |
-
from src.tools.europepmc import EuropePMCTool
|
| 266 |
-
_europepmc = EuropePMCTool()
|
| 267 |
-
|
| 268 |
-
# UPDATE search_preprints function:
|
| 269 |
-
@ai_function
|
| 270 |
-
async def search_preprints(query: str, max_results: int = 10) -> str:
|
| 271 |
-
"""Search Europe PMC for preprints and papers.
|
| 272 |
-
|
| 273 |
-
Use this tool to find the latest research including preprints
|
| 274 |
-
from bioRxiv, medRxiv, and peer-reviewed papers.
|
| 275 |
-
|
| 276 |
-
Args:
|
| 277 |
-
query: Search terms (e.g., "long covid treatment")
|
| 278 |
-
max_results: Maximum results to return (default 10)
|
| 279 |
-
|
| 280 |
-
Returns:
|
| 281 |
-
Formatted list of papers with abstracts and links
|
| 282 |
-
"""
|
| 283 |
-
state = get_magentic_state()
|
| 284 |
-
|
| 285 |
-
results = await _europepmc.search(query, max_results)
|
| 286 |
-
if not results:
|
| 287 |
-
return f"No papers found for: {query}"
|
| 288 |
-
|
| 289 |
-
new_count = state.add_evidence(results)
|
| 290 |
-
|
| 291 |
-
output = [f"Found {len(results)} papers ({new_count} new stored):\n"]
|
| 292 |
-
for i, r in enumerate(results[:max_results], 1):
|
| 293 |
-
title = r.citation.title
|
| 294 |
-
date = r.citation.date
|
| 295 |
-
source = r.citation.source
|
| 296 |
-
content_clean = r.content[:300].replace("\n", " ")
|
| 297 |
-
url = r.citation.url
|
| 298 |
-
|
| 299 |
-
output.append(f"{i}. **{title}**")
|
| 300 |
-
output.append(f" Source: {source} | Date: {date}")
|
| 301 |
-
output.append(f" {content_clean}...")
|
| 302 |
-
output.append(f" URL: {url}\n")
|
| 303 |
-
|
| 304 |
-
return "\n".join(output)
|
| 305 |
-
```
|
| 306 |
-
|
| 307 |
-
### Step 4: Update Search Handler (Simple Mode)
|
| 308 |
-
|
| 309 |
-
**File:** `src/tools/search_handler.py` - Update imports:
|
| 310 |
-
|
| 311 |
-
```python
|
| 312 |
-
# REMOVE:
|
| 313 |
-
# from src.tools.biorxiv import BioRxivTool
|
| 314 |
-
|
| 315 |
-
# ADD:
|
| 316 |
-
from src.tools.europepmc import EuropePMCTool
|
| 317 |
-
```
|
| 318 |
-
|
| 319 |
-
### Step 5: Delete Old BioRxiv Tests
|
| 320 |
-
|
| 321 |
-
```bash
|
| 322 |
-
# After all new tests pass:
|
| 323 |
-
rm tests/unit/tools/test_biorxiv.py
|
| 324 |
-
```
|
| 325 |
-
|
| 326 |
-
---
|
| 327 |
-
|
| 328 |
-
## Verification
|
| 329 |
-
|
| 330 |
-
```bash
|
| 331 |
-
# Run new tests
|
| 332 |
-
uv run pytest tests/unit/tools/test_europepmc.py -v
|
| 333 |
-
|
| 334 |
-
# Run integration test (real API)
|
| 335 |
-
uv run pytest tests/unit/tools/test_europepmc.py::TestEuropePMCIntegration -v
|
| 336 |
-
|
| 337 |
-
# Run all tests to ensure no regressions
|
| 338 |
-
uv run pytest tests/unit/ -v
|
| 339 |
-
|
| 340 |
-
# Manual verification
|
| 341 |
-
uv run python -c "
|
| 342 |
-
import asyncio
|
| 343 |
-
from src.tools.europepmc import EuropePMCTool
|
| 344 |
-
tool = EuropePMCTool()
|
| 345 |
-
results = asyncio.run(tool.search('long covid treatment', 3))
|
| 346 |
-
for r in results:
|
| 347 |
-
print(f'- {r.citation.title}')
|
| 348 |
-
"
|
| 349 |
-
```
|
| 350 |
-
|
| 351 |
-
---
|
| 352 |
-
|
| 353 |
-
## Files Changed
|
| 354 |
-
|
| 355 |
-
| File | Action |
|
| 356 |
-
|------|--------|
|
| 357 |
-
| `src/tools/europepmc.py` | CREATE |
|
| 358 |
-
| `tests/unit/tools/test_europepmc.py` | CREATE |
|
| 359 |
-
| `src/agents/tools.py` | MODIFY (replace biorxiv import) |
|
| 360 |
-
| `src/tools/search_handler.py` | MODIFY (replace biorxiv import) |
|
| 361 |
-
| `src/tools/biorxiv.py` | DELETE (after verification) |
|
| 362 |
-
| `tests/unit/tools/test_biorxiv.py` | DELETE (after verification) |
|
| 363 |
-
|
| 364 |
-
---
|
| 365 |
-
|
| 366 |
-
## Rollback Plan
|
| 367 |
-
|
| 368 |
-
If issues arise:
|
| 369 |
-
1. Revert `src/agents/tools.py` to use BioRxivTool
|
| 370 |
-
2. Revert `src/tools/search_handler.py`
|
| 371 |
-
3. Keep `europepmc.py` for future use
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docs/bugs/PHASE_02_PUBMED_QUERY_PREPROCESSING.md
DELETED
|
@@ -1,355 +0,0 @@
|
|
| 1 |
-
# Phase 02: PubMed Query Preprocessing
|
| 2 |
-
|
| 3 |
-
**Priority:** P0 - Critical
|
| 4 |
-
**Effort:** 2-3 hours
|
| 5 |
-
**Dependencies:** None (can run parallel with Phase 01)
|
| 6 |
-
|
| 7 |
-
---
|
| 8 |
-
|
| 9 |
-
## Problem Statement
|
| 10 |
-
|
| 11 |
-
PubMed receives raw natural language queries like "What medications show promise for Long COVID?" which include question words that pollute search results.
|
| 12 |
-
|
| 13 |
-
## Success Criteria
|
| 14 |
-
|
| 15 |
-
- [ ] Question words stripped from queries
|
| 16 |
-
- [ ] Medical synonyms expanded (Long COVID → PASC, etc.)
|
| 17 |
-
- [ ] Relevant results returned for natural language questions
|
| 18 |
-
- [ ] All existing tests pass
|
| 19 |
-
- [ ] New tests cover query preprocessing
|
| 20 |
-
|
| 21 |
-
---
|
| 22 |
-
|
| 23 |
-
## TDD Implementation Order
|
| 24 |
-
|
| 25 |
-
### Step 1: Write Failing Tests
|
| 26 |
-
|
| 27 |
-
**File:** `tests/unit/tools/test_query_utils.py`
|
| 28 |
-
|
| 29 |
-
```python
|
| 30 |
-
"""Unit tests for query preprocessing utilities."""
|
| 31 |
-
|
| 32 |
-
import pytest
|
| 33 |
-
|
| 34 |
-
from src.tools.query_utils import preprocess_query, expand_synonyms, strip_question_words
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
@pytest.mark.unit
|
| 38 |
-
class TestQueryPreprocessing:
|
| 39 |
-
"""Tests for query preprocessing."""
|
| 40 |
-
|
| 41 |
-
def test_strip_question_words(self):
|
| 42 |
-
"""Test removal of question words."""
|
| 43 |
-
assert strip_question_words("What drugs treat cancer") == "drugs treat cancer"
|
| 44 |
-
assert strip_question_words("Which medications help diabetes") == "medications diabetes"
|
| 45 |
-
assert strip_question_words("How can we cure alzheimer") == "cure alzheimer"
|
| 46 |
-
assert strip_question_words("Is metformin effective") == "metformin effective"
|
| 47 |
-
|
| 48 |
-
def test_strip_preserves_medical_terms(self):
|
| 49 |
-
"""Test that medical terms are preserved."""
|
| 50 |
-
result = strip_question_words("What is the mechanism of metformin")
|
| 51 |
-
assert "metformin" in result
|
| 52 |
-
assert "mechanism" in result
|
| 53 |
-
|
| 54 |
-
def test_expand_synonyms_long_covid(self):
|
| 55 |
-
"""Test Long COVID synonym expansion."""
|
| 56 |
-
result = expand_synonyms("long covid treatment")
|
| 57 |
-
assert "PASC" in result or "post-COVID" in result
|
| 58 |
-
|
| 59 |
-
def test_expand_synonyms_alzheimer(self):
|
| 60 |
-
"""Test Alzheimer's synonym expansion."""
|
| 61 |
-
result = expand_synonyms("alzheimer drug")
|
| 62 |
-
assert "Alzheimer" in result
|
| 63 |
-
|
| 64 |
-
def test_expand_synonyms_preserves_unknown(self):
|
| 65 |
-
"""Test that unknown terms are preserved."""
|
| 66 |
-
result = expand_synonyms("metformin diabetes")
|
| 67 |
-
assert "metformin" in result
|
| 68 |
-
assert "diabetes" in result
|
| 69 |
-
|
| 70 |
-
def test_preprocess_query_full_pipeline(self):
|
| 71 |
-
"""Test complete preprocessing pipeline."""
|
| 72 |
-
raw = "What medications show promise for Long COVID?"
|
| 73 |
-
result = preprocess_query(raw)
|
| 74 |
-
|
| 75 |
-
# Should not contain question words
|
| 76 |
-
assert "what" not in result.lower()
|
| 77 |
-
assert "show" not in result.lower()
|
| 78 |
-
assert "promise" not in result.lower()
|
| 79 |
-
|
| 80 |
-
# Should contain expanded terms
|
| 81 |
-
assert "PASC" in result or "post-COVID" in result or "long covid" in result.lower()
|
| 82 |
-
assert "medications" in result.lower() or "drug" in result.lower()
|
| 83 |
-
|
| 84 |
-
def test_preprocess_query_removes_punctuation(self):
|
| 85 |
-
"""Test that question marks are removed."""
|
| 86 |
-
result = preprocess_query("Is metformin safe?")
|
| 87 |
-
assert "?" not in result
|
| 88 |
-
|
| 89 |
-
def test_preprocess_query_handles_empty(self):
|
| 90 |
-
"""Test handling of empty/whitespace queries."""
|
| 91 |
-
assert preprocess_query("") == ""
|
| 92 |
-
assert preprocess_query(" ") == ""
|
| 93 |
-
|
| 94 |
-
def test_preprocess_query_already_clean(self):
|
| 95 |
-
"""Test that clean queries pass through."""
|
| 96 |
-
clean = "metformin diabetes mechanism"
|
| 97 |
-
result = preprocess_query(clean)
|
| 98 |
-
assert "metformin" in result
|
| 99 |
-
assert "diabetes" in result
|
| 100 |
-
assert "mechanism" in result
|
| 101 |
-
```
|
| 102 |
-
|
| 103 |
-
### Step 2: Implement Query Utils
|
| 104 |
-
|
| 105 |
-
**File:** `src/tools/query_utils.py`
|
| 106 |
-
|
| 107 |
-
```python
|
| 108 |
-
"""Query preprocessing utilities for biomedical search."""
|
| 109 |
-
|
| 110 |
-
import re
|
| 111 |
-
from typing import ClassVar
|
| 112 |
-
|
| 113 |
-
# Question words and filler words to remove
|
| 114 |
-
QUESTION_WORDS: set[str] = {
|
| 115 |
-
# Question starters
|
| 116 |
-
"what", "which", "how", "why", "when", "where", "who", "whom",
|
| 117 |
-
# Auxiliary verbs in questions
|
| 118 |
-
"is", "are", "was", "were", "do", "does", "did", "can", "could",
|
| 119 |
-
"would", "should", "will", "shall", "may", "might",
|
| 120 |
-
# Filler words in natural questions
|
| 121 |
-
"show", "promise", "help", "believe", "think", "suggest",
|
| 122 |
-
"possible", "potential", "effective", "useful", "good",
|
| 123 |
-
# Articles (remove but less aggressively)
|
| 124 |
-
"the", "a", "an",
|
| 125 |
-
}
|
| 126 |
-
|
| 127 |
-
# Medical synonym expansions
|
| 128 |
-
SYNONYMS: dict[str, list[str]] = {
|
| 129 |
-
"long covid": [
|
| 130 |
-
"long COVID",
|
| 131 |
-
"PASC",
|
| 132 |
-
"post-acute sequelae of SARS-CoV-2",
|
| 133 |
-
"post-COVID syndrome",
|
| 134 |
-
"post-COVID-19 condition",
|
| 135 |
-
],
|
| 136 |
-
"alzheimer": [
|
| 137 |
-
"Alzheimer's disease",
|
| 138 |
-
"Alzheimer disease",
|
| 139 |
-
"AD",
|
| 140 |
-
"Alzheimer dementia",
|
| 141 |
-
],
|
| 142 |
-
"parkinson": [
|
| 143 |
-
"Parkinson's disease",
|
| 144 |
-
"Parkinson disease",
|
| 145 |
-
"PD",
|
| 146 |
-
],
|
| 147 |
-
"diabetes": [
|
| 148 |
-
"diabetes mellitus",
|
| 149 |
-
"type 2 diabetes",
|
| 150 |
-
"T2DM",
|
| 151 |
-
"diabetic",
|
| 152 |
-
],
|
| 153 |
-
"cancer": [
|
| 154 |
-
"cancer",
|
| 155 |
-
"neoplasm",
|
| 156 |
-
"tumor",
|
| 157 |
-
"malignancy",
|
| 158 |
-
"carcinoma",
|
| 159 |
-
],
|
| 160 |
-
"heart disease": [
|
| 161 |
-
"cardiovascular disease",
|
| 162 |
-
"CVD",
|
| 163 |
-
"coronary artery disease",
|
| 164 |
-
"heart failure",
|
| 165 |
-
],
|
| 166 |
-
}
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
def strip_question_words(query: str) -> str:
|
| 170 |
-
"""
|
| 171 |
-
Remove question words and filler terms from query.
|
| 172 |
-
|
| 173 |
-
Args:
|
| 174 |
-
query: Raw query string
|
| 175 |
-
|
| 176 |
-
Returns:
|
| 177 |
-
Query with question words removed
|
| 178 |
-
"""
|
| 179 |
-
words = query.lower().split()
|
| 180 |
-
filtered = [w for w in words if w not in QUESTION_WORDS]
|
| 181 |
-
return " ".join(filtered)
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
def expand_synonyms(query: str) -> str:
|
| 185 |
-
"""
|
| 186 |
-
Expand medical terms to include synonyms.
|
| 187 |
-
|
| 188 |
-
Args:
|
| 189 |
-
query: Query string
|
| 190 |
-
|
| 191 |
-
Returns:
|
| 192 |
-
Query with synonym expansions in OR groups
|
| 193 |
-
"""
|
| 194 |
-
result = query.lower()
|
| 195 |
-
|
| 196 |
-
for term, expansions in SYNONYMS.items():
|
| 197 |
-
if term in result:
|
| 198 |
-
# Create OR group: ("term1" OR "term2" OR "term3")
|
| 199 |
-
or_group = " OR ".join([f'"{exp}"' for exp in expansions])
|
| 200 |
-
result = result.replace(term, f"({or_group})")
|
| 201 |
-
|
| 202 |
-
return result
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
def preprocess_query(raw_query: str) -> str:
|
| 206 |
-
"""
|
| 207 |
-
Full preprocessing pipeline for PubMed queries.
|
| 208 |
-
|
| 209 |
-
Pipeline:
|
| 210 |
-
1. Strip whitespace and punctuation
|
| 211 |
-
2. Remove question words
|
| 212 |
-
3. Expand medical synonyms
|
| 213 |
-
|
| 214 |
-
Args:
|
| 215 |
-
raw_query: Natural language query from user
|
| 216 |
-
|
| 217 |
-
Returns:
|
| 218 |
-
Optimized query for PubMed
|
| 219 |
-
"""
|
| 220 |
-
if not raw_query or not raw_query.strip():
|
| 221 |
-
return ""
|
| 222 |
-
|
| 223 |
-
# Remove question marks and extra whitespace
|
| 224 |
-
query = raw_query.replace("?", "").strip()
|
| 225 |
-
query = re.sub(r"\s+", " ", query)
|
| 226 |
-
|
| 227 |
-
# Strip question words
|
| 228 |
-
query = strip_question_words(query)
|
| 229 |
-
|
| 230 |
-
# Expand synonyms
|
| 231 |
-
query = expand_synonyms(query)
|
| 232 |
-
|
| 233 |
-
return query.strip()
|
| 234 |
-
```
|
| 235 |
-
|
| 236 |
-
### Step 3: Update PubMed Tool
|
| 237 |
-
|
| 238 |
-
**File:** `src/tools/pubmed.py` - Add preprocessing:
|
| 239 |
-
|
| 240 |
-
```python
|
| 241 |
-
# Add import at top:
|
| 242 |
-
from src.tools.query_utils import preprocess_query
|
| 243 |
-
|
| 244 |
-
# Update search method:
|
| 245 |
-
@retry(
|
| 246 |
-
stop=stop_after_attempt(3),
|
| 247 |
-
wait=wait_exponential(multiplier=1, min=1, max=10),
|
| 248 |
-
reraise=True,
|
| 249 |
-
)
|
| 250 |
-
async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
|
| 251 |
-
"""
|
| 252 |
-
Search PubMed and return evidence.
|
| 253 |
-
"""
|
| 254 |
-
await self._rate_limit()
|
| 255 |
-
|
| 256 |
-
# PREPROCESS QUERY
|
| 257 |
-
clean_query = preprocess_query(query)
|
| 258 |
-
if not clean_query:
|
| 259 |
-
clean_query = query # Fallback to original if preprocessing empties it
|
| 260 |
-
|
| 261 |
-
async with httpx.AsyncClient(timeout=30.0) as client:
|
| 262 |
-
search_params = self._build_params(
|
| 263 |
-
db="pubmed",
|
| 264 |
-
term=clean_query, # Use preprocessed query
|
| 265 |
-
retmax=max_results,
|
| 266 |
-
sort="relevance",
|
| 267 |
-
)
|
| 268 |
-
# ... rest unchanged
|
| 269 |
-
```
|
| 270 |
-
|
| 271 |
-
### Step 4: Update PubMed Tests
|
| 272 |
-
|
| 273 |
-
**File:** `tests/unit/tools/test_pubmed.py` - Add preprocessing test:
|
| 274 |
-
|
| 275 |
-
```python
|
| 276 |
-
@pytest.mark.asyncio
|
| 277 |
-
async def test_search_preprocesses_query(self, pubmed_tool, mock_httpx_client):
|
| 278 |
-
"""Test that queries are preprocessed before search."""
|
| 279 |
-
# This test verifies the integration - the actual preprocessing
|
| 280 |
-
# is tested in test_query_utils.py
|
| 281 |
-
|
| 282 |
-
mock_httpx_client.get.return_value = httpx.Response(
|
| 283 |
-
200,
|
| 284 |
-
json={"esearchresult": {"idlist": []}},
|
| 285 |
-
)
|
| 286 |
-
|
| 287 |
-
# Natural language query
|
| 288 |
-
await pubmed_tool.search("What drugs help with Long COVID?")
|
| 289 |
-
|
| 290 |
-
# Verify the call was made (preprocessing happens internally)
|
| 291 |
-
assert mock_httpx_client.get.called
|
| 292 |
-
```
|
| 293 |
-
|
| 294 |
-
---
|
| 295 |
-
|
| 296 |
-
## Verification
|
| 297 |
-
|
| 298 |
-
```bash
|
| 299 |
-
# Run query utils tests
|
| 300 |
-
uv run pytest tests/unit/tools/test_query_utils.py -v
|
| 301 |
-
|
| 302 |
-
# Run pubmed tests
|
| 303 |
-
uv run pytest tests/unit/tools/test_pubmed.py -v
|
| 304 |
-
|
| 305 |
-
# Run all tests
|
| 306 |
-
uv run pytest tests/unit/ -v
|
| 307 |
-
|
| 308 |
-
# Manual verification
|
| 309 |
-
uv run python -c "
|
| 310 |
-
from src.tools.query_utils import preprocess_query
|
| 311 |
-
|
| 312 |
-
queries = [
|
| 313 |
-
'What medications show promise for Long COVID?',
|
| 314 |
-
'Is metformin effective for cancer treatment?',
|
| 315 |
-
'How can we treat Alzheimer with existing drugs?',
|
| 316 |
-
]
|
| 317 |
-
|
| 318 |
-
for q in queries:
|
| 319 |
-
print(f'Input: {q}')
|
| 320 |
-
print(f'Output: {preprocess_query(q)}')
|
| 321 |
-
print()
|
| 322 |
-
"
|
| 323 |
-
```
|
| 324 |
-
|
| 325 |
-
Expected output:
|
| 326 |
-
```
|
| 327 |
-
Input: What medications show promise for Long COVID?
|
| 328 |
-
Output: medications ("long COVID" OR "PASC" OR "post-acute sequelae of SARS-CoV-2" OR "post-COVID syndrome" OR "post-COVID-19 condition")
|
| 329 |
-
|
| 330 |
-
Input: Is metformin effective for cancer treatment?
|
| 331 |
-
Output: metformin for ("cancer" OR "neoplasm" OR "tumor" OR "malignancy" OR "carcinoma") treatment
|
| 332 |
-
|
| 333 |
-
Input: How can we treat Alzheimer with existing drugs?
|
| 334 |
-
Output: we treat ("Alzheimer's disease" OR "Alzheimer disease" OR "AD" OR "Alzheimer dementia") with existing drugs
|
| 335 |
-
```
|
| 336 |
-
|
| 337 |
-
---
|
| 338 |
-
|
| 339 |
-
## Files Changed
|
| 340 |
-
|
| 341 |
-
| File | Action |
|
| 342 |
-
|------|--------|
|
| 343 |
-
| `src/tools/query_utils.py` | CREATE |
|
| 344 |
-
| `tests/unit/tools/test_query_utils.py` | CREATE |
|
| 345 |
-
| `src/tools/pubmed.py` | MODIFY (add preprocessing) |
|
| 346 |
-
| `tests/unit/tools/test_pubmed.py` | MODIFY (add integration test) |
|
| 347 |
-
|
| 348 |
-
---
|
| 349 |
-
|
| 350 |
-
## Future Enhancements (Out of Scope)
|
| 351 |
-
|
| 352 |
-
- MeSH term lookup via NCBI API
|
| 353 |
-
- Drug name normalization (brand → generic)
|
| 354 |
-
- Disease ontology integration (UMLS)
|
| 355 |
-
- Query intent classification
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docs/bugs/PHASE_03_CLINICALTRIALS_FILTERING.md
DELETED
|
@@ -1,386 +0,0 @@
|
|
| 1 |
-
# Phase 03: ClinicalTrials.gov Filtering
|
| 2 |
-
|
| 3 |
-
**Priority:** P1 - High
|
| 4 |
-
**Effort:** 1-2 hours
|
| 5 |
-
**Dependencies:** None (can run parallel with Phase 01 & 02)
|
| 6 |
-
|
| 7 |
-
---
|
| 8 |
-
|
| 9 |
-
## Problem Statement
|
| 10 |
-
|
| 11 |
-
ClinicalTrials.gov returns ALL matching trials including:
|
| 12 |
-
- Withdrawn/Terminated trials (no useful data)
|
| 13 |
-
- Observational studies (not drug interventions)
|
| 14 |
-
- Phase 1 trials (safety only, no efficacy)
|
| 15 |
-
|
| 16 |
-
For drug repurposing, we need interventional studies with efficacy data.
|
| 17 |
-
|
| 18 |
-
## Success Criteria
|
| 19 |
-
|
| 20 |
-
- [ ] Only interventional studies returned
|
| 21 |
-
- [ ] Withdrawn/terminated trials filtered out
|
| 22 |
-
- [ ] Phase information included in results
|
| 23 |
-
- [ ] All existing tests pass
|
| 24 |
-
- [ ] New tests cover filtering
|
| 25 |
-
|
| 26 |
-
---
|
| 27 |
-
|
| 28 |
-
## TDD Implementation Order
|
| 29 |
-
|
| 30 |
-
### Step 1: Write Failing Tests
|
| 31 |
-
|
| 32 |
-
**File:** `tests/unit/tools/test_clinicaltrials.py` - Add filter tests:
|
| 33 |
-
|
| 34 |
-
```python
|
| 35 |
-
"""Unit tests for ClinicalTrials.gov tool."""
|
| 36 |
-
|
| 37 |
-
import pytest
|
| 38 |
-
from unittest.mock import patch, MagicMock
|
| 39 |
-
|
| 40 |
-
from src.tools.clinicaltrials import ClinicalTrialsTool
|
| 41 |
-
from src.utils.models import Evidence
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
@pytest.mark.unit
|
| 45 |
-
class TestClinicalTrialsTool:
|
| 46 |
-
"""Tests for ClinicalTrialsTool."""
|
| 47 |
-
|
| 48 |
-
@pytest.fixture
|
| 49 |
-
def tool(self):
|
| 50 |
-
return ClinicalTrialsTool()
|
| 51 |
-
|
| 52 |
-
def test_tool_name(self, tool):
|
| 53 |
-
assert tool.name == "clinicaltrials"
|
| 54 |
-
|
| 55 |
-
@pytest.mark.asyncio
|
| 56 |
-
async def test_search_uses_filters(self, tool):
|
| 57 |
-
"""Test that search applies status and type filters."""
|
| 58 |
-
mock_response = MagicMock()
|
| 59 |
-
mock_response.json.return_value = {"studies": []}
|
| 60 |
-
mock_response.raise_for_status = MagicMock()
|
| 61 |
-
|
| 62 |
-
with patch("requests.get", return_value=mock_response) as mock_get:
|
| 63 |
-
await tool.search("test query", max_results=5)
|
| 64 |
-
|
| 65 |
-
# Verify filters were applied
|
| 66 |
-
call_args = mock_get.call_args
|
| 67 |
-
params = call_args.kwargs.get("params", call_args[1].get("params", {}))
|
| 68 |
-
|
| 69 |
-
# Should filter for active/completed studies
|
| 70 |
-
assert "filter.overallStatus" in params
|
| 71 |
-
assert "COMPLETED" in params["filter.overallStatus"]
|
| 72 |
-
assert "RECRUITING" in params["filter.overallStatus"]
|
| 73 |
-
|
| 74 |
-
# Should filter for interventional studies
|
| 75 |
-
assert "filter.studyType" in params
|
| 76 |
-
assert "INTERVENTIONAL" in params["filter.studyType"]
|
| 77 |
-
|
| 78 |
-
@pytest.mark.asyncio
|
| 79 |
-
async def test_search_returns_evidence(self, tool):
|
| 80 |
-
"""Test that search returns Evidence objects."""
|
| 81 |
-
mock_study = {
|
| 82 |
-
"protocolSection": {
|
| 83 |
-
"identificationModule": {
|
| 84 |
-
"nctId": "NCT12345678",
|
| 85 |
-
"briefTitle": "Metformin for Long COVID Treatment",
|
| 86 |
-
},
|
| 87 |
-
"statusModule": {
|
| 88 |
-
"overallStatus": "COMPLETED",
|
| 89 |
-
"startDateStruct": {"date": "2023-01-01"},
|
| 90 |
-
},
|
| 91 |
-
"descriptionModule": {
|
| 92 |
-
"briefSummary": "A study examining metformin for Long COVID symptoms.",
|
| 93 |
-
},
|
| 94 |
-
"designModule": {
|
| 95 |
-
"phases": ["PHASE2", "PHASE3"],
|
| 96 |
-
},
|
| 97 |
-
"conditionsModule": {
|
| 98 |
-
"conditions": ["Long COVID", "PASC"],
|
| 99 |
-
},
|
| 100 |
-
"armsInterventionsModule": {
|
| 101 |
-
"interventions": [{"name": "Metformin"}],
|
| 102 |
-
},
|
| 103 |
-
}
|
| 104 |
-
}
|
| 105 |
-
|
| 106 |
-
mock_response = MagicMock()
|
| 107 |
-
mock_response.json.return_value = {"studies": [mock_study]}
|
| 108 |
-
mock_response.raise_for_status = MagicMock()
|
| 109 |
-
|
| 110 |
-
with patch("requests.get", return_value=mock_response):
|
| 111 |
-
results = await tool.search("long covid metformin", max_results=5)
|
| 112 |
-
|
| 113 |
-
assert len(results) == 1
|
| 114 |
-
assert isinstance(results[0], Evidence)
|
| 115 |
-
assert "Metformin" in results[0].citation.title
|
| 116 |
-
assert "PHASE2" in results[0].content or "Phase" in results[0].content
|
| 117 |
-
|
| 118 |
-
@pytest.mark.asyncio
|
| 119 |
-
async def test_search_includes_phase_info(self, tool):
|
| 120 |
-
"""Test that phase information is included in content."""
|
| 121 |
-
mock_study = {
|
| 122 |
-
"protocolSection": {
|
| 123 |
-
"identificationModule": {
|
| 124 |
-
"nctId": "NCT12345678",
|
| 125 |
-
"briefTitle": "Test Study",
|
| 126 |
-
},
|
| 127 |
-
"statusModule": {
|
| 128 |
-
"overallStatus": "RECRUITING",
|
| 129 |
-
"startDateStruct": {"date": "2024-01-01"},
|
| 130 |
-
},
|
| 131 |
-
"descriptionModule": {
|
| 132 |
-
"briefSummary": "Test summary.",
|
| 133 |
-
},
|
| 134 |
-
"designModule": {
|
| 135 |
-
"phases": ["PHASE3"],
|
| 136 |
-
},
|
| 137 |
-
"conditionsModule": {"conditions": ["Test"]},
|
| 138 |
-
"armsInterventionsModule": {"interventions": []},
|
| 139 |
-
}
|
| 140 |
-
}
|
| 141 |
-
|
| 142 |
-
mock_response = MagicMock()
|
| 143 |
-
mock_response.json.return_value = {"studies": [mock_study]}
|
| 144 |
-
mock_response.raise_for_status = MagicMock()
|
| 145 |
-
|
| 146 |
-
with patch("requests.get", return_value=mock_response):
|
| 147 |
-
results = await tool.search("test", max_results=5)
|
| 148 |
-
|
| 149 |
-
# Phase should be in content
|
| 150 |
-
assert "PHASE3" in results[0].content or "Phase 3" in results[0].content
|
| 151 |
-
|
| 152 |
-
@pytest.mark.asyncio
|
| 153 |
-
async def test_search_empty_results(self, tool):
|
| 154 |
-
"""Test handling of empty results."""
|
| 155 |
-
mock_response = MagicMock()
|
| 156 |
-
mock_response.json.return_value = {"studies": []}
|
| 157 |
-
mock_response.raise_for_status = MagicMock()
|
| 158 |
-
|
| 159 |
-
with patch("requests.get", return_value=mock_response):
|
| 160 |
-
results = await tool.search("nonexistent xyz 12345", max_results=5)
|
| 161 |
-
assert results == []
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
@pytest.mark.integration
|
| 165 |
-
class TestClinicalTrialsIntegration:
|
| 166 |
-
"""Integration tests with real API."""
|
| 167 |
-
|
| 168 |
-
@pytest.mark.asyncio
|
| 169 |
-
async def test_real_api_returns_interventional(self):
|
| 170 |
-
"""Test that real API returns interventional studies."""
|
| 171 |
-
tool = ClinicalTrialsTool()
|
| 172 |
-
results = await tool.search("long covid treatment", max_results=3)
|
| 173 |
-
|
| 174 |
-
# Should get results
|
| 175 |
-
assert len(results) > 0
|
| 176 |
-
|
| 177 |
-
# Results should mention interventions or treatments
|
| 178 |
-
all_content = " ".join([r.content.lower() for r in results])
|
| 179 |
-
has_intervention = (
|
| 180 |
-
"intervention" in all_content
|
| 181 |
-
or "treatment" in all_content
|
| 182 |
-
or "drug" in all_content
|
| 183 |
-
or "phase" in all_content
|
| 184 |
-
)
|
| 185 |
-
assert has_intervention
|
| 186 |
-
```
|
| 187 |
-
|
| 188 |
-
### Step 2: Update ClinicalTrials Tool
|
| 189 |
-
|
| 190 |
-
**File:** `src/tools/clinicaltrials.py` - Add filters:
|
| 191 |
-
|
| 192 |
-
```python
|
| 193 |
-
"""ClinicalTrials.gov search tool using API v2."""
|
| 194 |
-
|
| 195 |
-
import asyncio
|
| 196 |
-
from typing import Any, ClassVar
|
| 197 |
-
|
| 198 |
-
import requests
|
| 199 |
-
from tenacity import retry, stop_after_attempt, wait_exponential
|
| 200 |
-
|
| 201 |
-
from src.utils.exceptions import SearchError
|
| 202 |
-
from src.utils.models import Citation, Evidence
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
class ClinicalTrialsTool:
|
| 206 |
-
"""Search tool for ClinicalTrials.gov.
|
| 207 |
-
|
| 208 |
-
Note: Uses `requests` library instead of `httpx` because ClinicalTrials.gov's
|
| 209 |
-
WAF blocks httpx's TLS fingerprint. The `requests` library is not blocked.
|
| 210 |
-
See: https://clinicaltrials.gov/data-api/api
|
| 211 |
-
"""
|
| 212 |
-
|
| 213 |
-
BASE_URL = "https://clinicaltrials.gov/api/v2/studies"
|
| 214 |
-
|
| 215 |
-
# Fields to retrieve
|
| 216 |
-
FIELDS: ClassVar[list[str]] = [
|
| 217 |
-
"NCTId",
|
| 218 |
-
"BriefTitle",
|
| 219 |
-
"Phase",
|
| 220 |
-
"OverallStatus",
|
| 221 |
-
"Condition",
|
| 222 |
-
"InterventionName",
|
| 223 |
-
"StartDate",
|
| 224 |
-
"BriefSummary",
|
| 225 |
-
]
|
| 226 |
-
|
| 227 |
-
# Status filter: Only active/completed studies with potential data
|
| 228 |
-
STATUS_FILTER = "COMPLETED|ACTIVE_NOT_RECRUITING|RECRUITING|ENROLLING_BY_INVITATION"
|
| 229 |
-
|
| 230 |
-
# Study type filter: Only interventional (drug/treatment studies)
|
| 231 |
-
STUDY_TYPE_FILTER = "INTERVENTIONAL"
|
| 232 |
-
|
| 233 |
-
@property
|
| 234 |
-
def name(self) -> str:
|
| 235 |
-
return "clinicaltrials"
|
| 236 |
-
|
| 237 |
-
@retry(
|
| 238 |
-
stop=stop_after_attempt(3),
|
| 239 |
-
wait=wait_exponential(multiplier=1, min=1, max=10),
|
| 240 |
-
reraise=True,
|
| 241 |
-
)
|
| 242 |
-
async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
|
| 243 |
-
"""Search ClinicalTrials.gov for interventional studies.
|
| 244 |
-
|
| 245 |
-
Args:
|
| 246 |
-
query: Search query (e.g., "metformin alzheimer")
|
| 247 |
-
max_results: Maximum results to return (max 100)
|
| 248 |
-
|
| 249 |
-
Returns:
|
| 250 |
-
List of Evidence objects from clinical trials
|
| 251 |
-
"""
|
| 252 |
-
params: dict[str, str | int] = {
|
| 253 |
-
"query.term": query,
|
| 254 |
-
"pageSize": min(max_results, 100),
|
| 255 |
-
"fields": "|".join(self.FIELDS),
|
| 256 |
-
# FILTERS - Only interventional, active/completed studies
|
| 257 |
-
"filter.overallStatus": self.STATUS_FILTER,
|
| 258 |
-
"filter.studyType": self.STUDY_TYPE_FILTER,
|
| 259 |
-
}
|
| 260 |
-
|
| 261 |
-
try:
|
| 262 |
-
# Run blocking requests.get in a separate thread for async compatibility
|
| 263 |
-
response = await asyncio.to_thread(
|
| 264 |
-
requests.get,
|
| 265 |
-
self.BASE_URL,
|
| 266 |
-
params=params,
|
| 267 |
-
headers={"User-Agent": "DeepCritical-Research-Agent/1.0"},
|
| 268 |
-
timeout=30,
|
| 269 |
-
)
|
| 270 |
-
response.raise_for_status()
|
| 271 |
-
|
| 272 |
-
data = response.json()
|
| 273 |
-
studies = data.get("studies", [])
|
| 274 |
-
return [self._study_to_evidence(study) for study in studies[:max_results]]
|
| 275 |
-
|
| 276 |
-
except requests.HTTPError as e:
|
| 277 |
-
raise SearchError(f"ClinicalTrials.gov API error: {e}") from e
|
| 278 |
-
except requests.RequestException as e:
|
| 279 |
-
raise SearchError(f"ClinicalTrials.gov request failed: {e}") from e
|
| 280 |
-
|
| 281 |
-
def _study_to_evidence(self, study: dict[str, Any]) -> Evidence:
|
| 282 |
-
"""Convert a clinical trial study to Evidence."""
|
| 283 |
-
# Navigate nested structure
|
| 284 |
-
protocol = study.get("protocolSection", {})
|
| 285 |
-
id_module = protocol.get("identificationModule", {})
|
| 286 |
-
status_module = protocol.get("statusModule", {})
|
| 287 |
-
desc_module = protocol.get("descriptionModule", {})
|
| 288 |
-
design_module = protocol.get("designModule", {})
|
| 289 |
-
conditions_module = protocol.get("conditionsModule", {})
|
| 290 |
-
arms_module = protocol.get("armsInterventionsModule", {})
|
| 291 |
-
|
| 292 |
-
nct_id = id_module.get("nctId", "Unknown")
|
| 293 |
-
title = id_module.get("briefTitle", "Untitled Study")
|
| 294 |
-
status = status_module.get("overallStatus", "Unknown")
|
| 295 |
-
start_date = status_module.get("startDateStruct", {}).get("date", "Unknown")
|
| 296 |
-
|
| 297 |
-
# Get phase (might be a list)
|
| 298 |
-
phases = design_module.get("phases", [])
|
| 299 |
-
phase = phases[0] if phases else "Not Applicable"
|
| 300 |
-
|
| 301 |
-
# Get conditions
|
| 302 |
-
conditions = conditions_module.get("conditions", [])
|
| 303 |
-
conditions_str = ", ".join(conditions[:3]) if conditions else "Unknown"
|
| 304 |
-
|
| 305 |
-
# Get interventions
|
| 306 |
-
interventions = arms_module.get("interventions", [])
|
| 307 |
-
intervention_names = [i.get("name", "") for i in interventions[:3]]
|
| 308 |
-
interventions_str = ", ".join(intervention_names) if intervention_names else "Unknown"
|
| 309 |
-
|
| 310 |
-
# Get summary
|
| 311 |
-
summary = desc_module.get("briefSummary", "No summary available.")
|
| 312 |
-
|
| 313 |
-
# Build content with key trial info
|
| 314 |
-
content = (
|
| 315 |
-
f"{summary[:500]}... "
|
| 316 |
-
f"Trial Phase: {phase}. "
|
| 317 |
-
f"Status: {status}. "
|
| 318 |
-
f"Conditions: {conditions_str}. "
|
| 319 |
-
f"Interventions: {interventions_str}."
|
| 320 |
-
)
|
| 321 |
-
|
| 322 |
-
return Evidence(
|
| 323 |
-
content=content[:2000],
|
| 324 |
-
citation=Citation(
|
| 325 |
-
source="clinicaltrials",
|
| 326 |
-
title=title[:500],
|
| 327 |
-
url=f"https://clinicaltrials.gov/study/{nct_id}",
|
| 328 |
-
date=start_date,
|
| 329 |
-
authors=[], # Trials don't have traditional authors
|
| 330 |
-
),
|
| 331 |
-
relevance=0.85, # Trials are highly relevant for repurposing
|
| 332 |
-
)
|
| 333 |
-
```
|
| 334 |
-
|
| 335 |
-
---
|
| 336 |
-
|
| 337 |
-
## Verification
|
| 338 |
-
|
| 339 |
-
```bash
|
| 340 |
-
# Run clinicaltrials tests
|
| 341 |
-
uv run pytest tests/unit/tools/test_clinicaltrials.py -v
|
| 342 |
-
|
| 343 |
-
# Run integration test (real API)
|
| 344 |
-
uv run pytest tests/unit/tools/test_clinicaltrials.py::TestClinicalTrialsIntegration -v
|
| 345 |
-
|
| 346 |
-
# Run all tests
|
| 347 |
-
uv run pytest tests/unit/ -v
|
| 348 |
-
|
| 349 |
-
# Manual verification
|
| 350 |
-
uv run python -c "
|
| 351 |
-
import asyncio
|
| 352 |
-
from src.tools.clinicaltrials import ClinicalTrialsTool
|
| 353 |
-
|
| 354 |
-
tool = ClinicalTrialsTool()
|
| 355 |
-
results = asyncio.run(tool.search('long covid treatment', 3))
|
| 356 |
-
|
| 357 |
-
for r in results:
|
| 358 |
-
print(f'Title: {r.citation.title}')
|
| 359 |
-
print(f'Content: {r.content[:200]}...')
|
| 360 |
-
print()
|
| 361 |
-
"
|
| 362 |
-
```
|
| 363 |
-
|
| 364 |
-
---
|
| 365 |
-
|
| 366 |
-
## Files Changed
|
| 367 |
-
|
| 368 |
-
| File | Action |
|
| 369 |
-
|------|--------|
|
| 370 |
-
| `src/tools/clinicaltrials.py` | MODIFY (add filters) |
|
| 371 |
-
| `tests/unit/tools/test_clinicaltrials.py` | MODIFY (add filter tests) |
|
| 372 |
-
|
| 373 |
-
---
|
| 374 |
-
|
| 375 |
-
## API Filter Reference
|
| 376 |
-
|
| 377 |
-
ClinicalTrials.gov API v2 supports these filters:
|
| 378 |
-
|
| 379 |
-
| Parameter | Values | Purpose |
|
| 380 |
-
|-----------|--------|---------|
|
| 381 |
-
| `filter.overallStatus` | COMPLETED, RECRUITING, etc. | Trial status |
|
| 382 |
-
| `filter.studyType` | INTERVENTIONAL, OBSERVATIONAL | Study design |
|
| 383 |
-
| `filter.phase` | PHASE1, PHASE2, PHASE3, PHASE4 | Trial phase |
|
| 384 |
-
| `filter.geo` | Country codes | Geographic filter |
|
| 385 |
-
|
| 386 |
-
See: https://clinicaltrials.gov/data-api/api
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples/rate_limiting_demo.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""Demo script to verify rate limiting works correctly."""
|
| 3 |
+
|
| 4 |
+
import asyncio
|
| 5 |
+
import time
|
| 6 |
+
|
| 7 |
+
from src.tools.pubmed import PubMedTool
|
| 8 |
+
from src.tools.rate_limiter import RateLimiter, get_pubmed_limiter, reset_pubmed_limiter
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
async def test_basic_limiter():
|
| 12 |
+
"""Test basic rate limiter behavior."""
|
| 13 |
+
print("=" * 60)
|
| 14 |
+
print("Rate Limiting Demo")
|
| 15 |
+
print("=" * 60)
|
| 16 |
+
|
| 17 |
+
# Test 1: Basic limiter
|
| 18 |
+
print("\n[Test 1] Testing 3/second limiter...")
|
| 19 |
+
limiter = RateLimiter("3/second")
|
| 20 |
+
|
| 21 |
+
start = time.monotonic()
|
| 22 |
+
for i in range(6):
|
| 23 |
+
await limiter.acquire()
|
| 24 |
+
elapsed = time.monotonic() - start
|
| 25 |
+
print(f" Request {i+1} at {elapsed:.2f}s")
|
| 26 |
+
|
| 27 |
+
total = time.monotonic() - start
|
| 28 |
+
print(f" Total time for 6 requests: {total:.2f}s (expected ~2s)")
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
async def test_pubmed_limiter():
|
| 32 |
+
"""Test PubMed-specific limiter."""
|
| 33 |
+
print("\n[Test 2] Testing PubMed limiter (shared)...")
|
| 34 |
+
|
| 35 |
+
reset_pubmed_limiter() # Clean state
|
| 36 |
+
|
| 37 |
+
# Without API key: 3/sec
|
| 38 |
+
limiter = get_pubmed_limiter(api_key=None)
|
| 39 |
+
print(f" Rate without key: {limiter.rate}")
|
| 40 |
+
|
| 41 |
+
# Multiple tools should share the same limiter
|
| 42 |
+
tool1 = PubMedTool()
|
| 43 |
+
tool2 = PubMedTool()
|
| 44 |
+
|
| 45 |
+
# Verify they share the limiter
|
| 46 |
+
print(f" Tools share limiter: {tool1._limiter is tool2._limiter}")
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
async def test_concurrent_requests():
|
| 50 |
+
"""Test rate limiting under concurrent load."""
|
| 51 |
+
print("\n[Test 3] Testing concurrent request limiting...")
|
| 52 |
+
|
| 53 |
+
limiter = RateLimiter("5/second")
|
| 54 |
+
|
| 55 |
+
async def make_request(i: int):
|
| 56 |
+
await limiter.acquire()
|
| 57 |
+
return time.monotonic()
|
| 58 |
+
|
| 59 |
+
start = time.monotonic()
|
| 60 |
+
# Launch 10 concurrent requests
|
| 61 |
+
tasks = [make_request(i) for i in range(10)]
|
| 62 |
+
times = await asyncio.gather(*tasks)
|
| 63 |
+
|
| 64 |
+
# Calculate distribution
|
| 65 |
+
relative_times = [t - start for t in times]
|
| 66 |
+
print(f" Request times: {[f'{t:.2f}s' for t in sorted(relative_times)]}")
|
| 67 |
+
|
| 68 |
+
total = max(relative_times)
|
| 69 |
+
print(f" All 10 requests completed in {total:.2f}s (expected ~2s)")
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
async def main():
|
| 73 |
+
await test_basic_limiter()
|
| 74 |
+
await test_pubmed_limiter()
|
| 75 |
+
await test_concurrent_requests()
|
| 76 |
+
|
| 77 |
+
print("\n" + "=" * 60)
|
| 78 |
+
print("Demo complete!")
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
if __name__ == "__main__":
|
| 82 |
+
asyncio.run(main())
|
pyproject.toml
CHANGED
|
@@ -25,6 +25,8 @@ dependencies = [
|
|
| 25 |
"structlog>=24.1", # Structured logging
|
| 26 |
"requests>=2.32.5", # ClinicalTrials.gov (httpx blocked by WAF)
|
| 27 |
"pydantic-graph>=1.22.0",
|
|
|
|
|
|
|
| 28 |
]
|
| 29 |
|
| 30 |
[project.optional-dependencies]
|
|
@@ -44,7 +46,7 @@ dev = [
|
|
| 44 |
"pre-commit>=3.7",
|
| 45 |
]
|
| 46 |
magentic = [
|
| 47 |
-
"agent-framework-core>=1.0.0b251120,<2.0.0", #
|
| 48 |
]
|
| 49 |
embeddings = [
|
| 50 |
"chromadb>=0.4.0",
|
|
|
|
| 25 |
"structlog>=24.1", # Structured logging
|
| 26 |
"requests>=2.32.5", # ClinicalTrials.gov (httpx blocked by WAF)
|
| 27 |
"pydantic-graph>=1.22.0",
|
| 28 |
+
"limits>=3.0", # Rate limiting
|
| 29 |
+
"duckduckgo-search>=5.0", # Web search
|
| 30 |
]
|
| 31 |
|
| 32 |
[project.optional-dependencies]
|
|
|
|
| 46 |
"pre-commit>=3.7",
|
| 47 |
]
|
| 48 |
magentic = [
|
| 49 |
+
"agent-framework-core>=1.0.0b251120,<2.0.0", # Microsoft Agent Framework (PyPI)
|
| 50 |
]
|
| 51 |
embeddings = [
|
| 52 |
"chromadb>=0.4.0",
|
requirements.txt
CHANGED
|
@@ -7,6 +7,12 @@ pydantic-ai>=0.0.16
|
|
| 7 |
openai>=1.0.0
|
| 8 |
anthropic>=0.18.0
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
# HTTP & Parsing
|
| 11 |
httpx>=0.27
|
| 12 |
beautifulsoup4>=4.12
|
|
@@ -20,6 +26,7 @@ python-dotenv>=1.0
|
|
| 20 |
tenacity>=8.2
|
| 21 |
structlog>=24.1
|
| 22 |
requests>=2.32.5
|
|
|
|
| 23 |
|
| 24 |
# Optional: Modal for code execution
|
| 25 |
modal>=0.63.0
|
|
|
|
| 7 |
openai>=1.0.0
|
| 8 |
anthropic>=0.18.0
|
| 9 |
|
| 10 |
+
# Multi-agent orchestration (Advanced mode)
|
| 11 |
+
agent-framework-core>=1.0.0b251120
|
| 12 |
+
|
| 13 |
+
# Web search
|
| 14 |
+
duckduckgo-search>=5.0
|
| 15 |
+
|
| 16 |
# HTTP & Parsing
|
| 17 |
httpx>=0.27
|
| 18 |
beautifulsoup4>=4.12
|
|
|
|
| 26 |
tenacity>=8.2
|
| 27 |
structlog>=24.1
|
| 28 |
requests>=2.32.5
|
| 29 |
+
limits>=3.0 # Rate limiting (Phase 17)
|
| 30 |
|
| 31 |
# Optional: Modal for code execution
|
| 32 |
modal>=0.63.0
|
src/agent_factory/judges.py
CHANGED
|
@@ -8,8 +8,10 @@ import structlog
|
|
| 8 |
from huggingface_hub import InferenceClient
|
| 9 |
from pydantic_ai import Agent
|
| 10 |
from pydantic_ai.models.anthropic import AnthropicModel
|
|
|
|
| 11 |
from pydantic_ai.models.openai import OpenAIModel
|
| 12 |
from pydantic_ai.providers.anthropic import AnthropicProvider
|
|
|
|
| 13 |
from pydantic_ai.providers.openai import OpenAIProvider
|
| 14 |
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
|
| 15 |
|
|
@@ -36,6 +38,12 @@ def get_model() -> Any:
|
|
| 36 |
provider = AnthropicProvider(api_key=settings.anthropic_api_key)
|
| 37 |
return AnthropicModel(settings.anthropic_model, provider=provider)
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
if llm_provider != "openai":
|
| 40 |
logger.warning("Unknown LLM provider, defaulting to OpenAI", provider=llm_provider)
|
| 41 |
|
|
@@ -434,7 +442,7 @@ class MockJudgeHandler:
|
|
| 434 |
clinical_evidence_score=clinical_score,
|
| 435 |
clinical_reasoning=(
|
| 436 |
f"Demo mode: {evidence_count} sources retrieved from PubMed, "
|
| 437 |
-
"ClinicalTrials.gov, and
|
| 438 |
),
|
| 439 |
drug_candidates=drug_candidates,
|
| 440 |
key_findings=key_findings,
|
|
|
|
| 8 |
from huggingface_hub import InferenceClient
|
| 9 |
from pydantic_ai import Agent
|
| 10 |
from pydantic_ai.models.anthropic import AnthropicModel
|
| 11 |
+
from pydantic_ai.models.huggingface import HuggingFaceModel
|
| 12 |
from pydantic_ai.models.openai import OpenAIModel
|
| 13 |
from pydantic_ai.providers.anthropic import AnthropicProvider
|
| 14 |
+
from pydantic_ai.providers.huggingface import HuggingFaceProvider
|
| 15 |
from pydantic_ai.providers.openai import OpenAIProvider
|
| 16 |
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
|
| 17 |
|
|
|
|
| 38 |
provider = AnthropicProvider(api_key=settings.anthropic_api_key)
|
| 39 |
return AnthropicModel(settings.anthropic_model, provider=provider)
|
| 40 |
|
| 41 |
+
if llm_provider == "huggingface":
|
| 42 |
+
# Free tier - uses HF_TOKEN from environment if available
|
| 43 |
+
model_name = settings.huggingface_model or "meta-llama/Llama-3.1-70B-Instruct"
|
| 44 |
+
hf_provider = HuggingFaceProvider(api_key=settings.hf_token)
|
| 45 |
+
return HuggingFaceModel(model_name, provider=hf_provider)
|
| 46 |
+
|
| 47 |
if llm_provider != "openai":
|
| 48 |
logger.warning("Unknown LLM provider, defaulting to OpenAI", provider=llm_provider)
|
| 49 |
|
|
|
|
| 442 |
clinical_evidence_score=clinical_score,
|
| 443 |
clinical_reasoning=(
|
| 444 |
f"Demo mode: {evidence_count} sources retrieved from PubMed, "
|
| 445 |
+
"ClinicalTrials.gov, and Europe PMC. Full analysis requires LLM API key."
|
| 446 |
),
|
| 447 |
drug_candidates=drug_candidates,
|
| 448 |
key_findings=key_findings,
|
src/agents/code_executor_agent.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Code execution agent using Modal."""
|
| 2 |
+
|
| 3 |
+
import asyncio
|
| 4 |
+
|
| 5 |
+
import structlog
|
| 6 |
+
from agent_framework import ChatAgent, ai_function
|
| 7 |
+
from agent_framework.openai import OpenAIChatClient
|
| 8 |
+
|
| 9 |
+
from src.tools.code_execution import get_code_executor
|
| 10 |
+
from src.utils.config import settings
|
| 11 |
+
|
| 12 |
+
logger = structlog.get_logger()
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
@ai_function # type: ignore[arg-type, misc]
|
| 16 |
+
async def execute_python_code(code: str) -> str:
|
| 17 |
+
"""Execute Python code in a secure sandbox.
|
| 18 |
+
|
| 19 |
+
Args:
|
| 20 |
+
code: The Python code to execute.
|
| 21 |
+
|
| 22 |
+
Returns:
|
| 23 |
+
The standard output and standard error of the execution.
|
| 24 |
+
"""
|
| 25 |
+
logger.info("Code execution starting", code_length=len(code))
|
| 26 |
+
executor = get_code_executor()
|
| 27 |
+
loop = asyncio.get_running_loop()
|
| 28 |
+
|
| 29 |
+
# Run in executor to avoid blocking
|
| 30 |
+
try:
|
| 31 |
+
result = await loop.run_in_executor(None, lambda: executor.execute(code))
|
| 32 |
+
if result["success"]:
|
| 33 |
+
logger.info("Code execution succeeded")
|
| 34 |
+
return f"Stdout:\n{result['stdout']}"
|
| 35 |
+
else:
|
| 36 |
+
logger.warning("Code execution failed", error=result.get("error"))
|
| 37 |
+
return f"Error:\n{result['error']}\nStderr:\n{result['stderr']}"
|
| 38 |
+
except Exception as e:
|
| 39 |
+
logger.error("Code execution exception", error=str(e))
|
| 40 |
+
return f"Execution failed: {e}"
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def create_code_executor_agent(chat_client: OpenAIChatClient | None = None) -> ChatAgent:
|
| 44 |
+
"""Create a code executor agent.
|
| 45 |
+
|
| 46 |
+
Args:
|
| 47 |
+
chat_client: Optional custom chat client.
|
| 48 |
+
|
| 49 |
+
Returns:
|
| 50 |
+
ChatAgent configured for code execution.
|
| 51 |
+
"""
|
| 52 |
+
client = chat_client or OpenAIChatClient(
|
| 53 |
+
model_id=settings.openai_model,
|
| 54 |
+
api_key=settings.openai_api_key,
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
return ChatAgent(
|
| 58 |
+
name="CodeExecutorAgent",
|
| 59 |
+
description="Executes Python code for data analysis, calculation, and simulation.",
|
| 60 |
+
instructions="""You are a code execution expert.
|
| 61 |
+
When asked to analyze data or perform calculations, write Python code and execute it.
|
| 62 |
+
Use libraries like pandas, numpy, scipy, matplotlib.
|
| 63 |
+
|
| 64 |
+
Always output the code you want to execute using the `execute_python_code` tool.
|
| 65 |
+
Check the output and interpret the results.""",
|
| 66 |
+
chat_client=client,
|
| 67 |
+
tools=[execute_python_code],
|
| 68 |
+
temperature=0.0, # Strict code generation
|
| 69 |
+
)
|
src/agents/judge_agent_llm.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""LLM Judge for sub-iterations."""
|
| 2 |
+
|
| 3 |
+
from typing import Any
|
| 4 |
+
|
| 5 |
+
import structlog
|
| 6 |
+
from pydantic_ai import Agent
|
| 7 |
+
|
| 8 |
+
from src.agent_factory.judges import get_model
|
| 9 |
+
from src.utils.models import JudgeAssessment
|
| 10 |
+
|
| 11 |
+
logger = structlog.get_logger()
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class LLMSubIterationJudge:
|
| 15 |
+
"""Judge that uses an LLM to assess sub-iteration results."""
|
| 16 |
+
|
| 17 |
+
def __init__(self) -> None:
|
| 18 |
+
self.model = get_model()
|
| 19 |
+
self.agent = Agent(
|
| 20 |
+
model=self.model,
|
| 21 |
+
output_type=JudgeAssessment,
|
| 22 |
+
system_prompt="""You are a strict judge evaluating a research task.
|
| 23 |
+
|
| 24 |
+
Evaluate if the result is sufficient to answer the task.
|
| 25 |
+
Provide scores and detailed reasoning.
|
| 26 |
+
If not sufficient, suggest next steps.""",
|
| 27 |
+
retries=3,
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
async def assess(self, task: str, result: Any, history: list[Any]) -> JudgeAssessment:
|
| 31 |
+
"""Assess the result using LLM."""
|
| 32 |
+
logger.info("LLM judge assessing result", task=task[:100], history_len=len(history))
|
| 33 |
+
|
| 34 |
+
prompt = f"""Task: {task}
|
| 35 |
+
|
| 36 |
+
Current Result:
|
| 37 |
+
{str(result)[:4000]}
|
| 38 |
+
|
| 39 |
+
History of previous attempts: {len(history)}
|
| 40 |
+
|
| 41 |
+
Evaluate validity and sufficiency."""
|
| 42 |
+
|
| 43 |
+
run_result = await self.agent.run(prompt)
|
| 44 |
+
logger.info("LLM judge assessment complete", sufficient=run_result.output.sufficient)
|
| 45 |
+
return run_result.output
|
src/agents/magentic_agents.py
CHANGED
|
@@ -29,7 +29,7 @@ def create_search_agent(chat_client: OpenAIChatClient | None = None) -> ChatAgen
|
|
| 29 |
return ChatAgent(
|
| 30 |
name="SearchAgent",
|
| 31 |
description=(
|
| 32 |
-
"Searches biomedical databases (PubMed, ClinicalTrials.gov,
|
| 33 |
"for drug repurposing evidence"
|
| 34 |
),
|
| 35 |
instructions="""You are a biomedical search specialist. When asked to find evidence:
|
|
|
|
| 29 |
return ChatAgent(
|
| 30 |
name="SearchAgent",
|
| 31 |
description=(
|
| 32 |
+
"Searches biomedical databases (PubMed, ClinicalTrials.gov, Europe PMC) "
|
| 33 |
"for drug repurposing evidence"
|
| 34 |
),
|
| 35 |
instructions="""You are a biomedical search specialist. When asked to find evidence:
|
src/agents/retrieval_agent.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Retrieval agent for web search and context management."""
|
| 2 |
+
|
| 3 |
+
import structlog
|
| 4 |
+
from agent_framework import ChatAgent, ai_function
|
| 5 |
+
from agent_framework.openai import OpenAIChatClient
|
| 6 |
+
|
| 7 |
+
from src.state import get_magentic_state
|
| 8 |
+
from src.tools.web_search import WebSearchTool
|
| 9 |
+
from src.utils.config import settings
|
| 10 |
+
|
| 11 |
+
logger = structlog.get_logger()
|
| 12 |
+
|
| 13 |
+
_web_search = WebSearchTool()
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
@ai_function # type: ignore[arg-type, misc]
|
| 17 |
+
async def search_web(query: str, max_results: int = 10) -> str:
|
| 18 |
+
"""Search the web using DuckDuckGo.
|
| 19 |
+
|
| 20 |
+
Args:
|
| 21 |
+
query: Search keywords.
|
| 22 |
+
max_results: Maximum results to return (default 10).
|
| 23 |
+
|
| 24 |
+
Returns:
|
| 25 |
+
Formatted search results.
|
| 26 |
+
"""
|
| 27 |
+
logger.info("Web search starting", query=query, max_results=max_results)
|
| 28 |
+
state = get_magentic_state()
|
| 29 |
+
|
| 30 |
+
results = await _web_search.search(query, max_results)
|
| 31 |
+
if not results.evidence:
|
| 32 |
+
logger.info("Web search returned no results", query=query)
|
| 33 |
+
return f"No web results found for: {query}"
|
| 34 |
+
|
| 35 |
+
# Update state
|
| 36 |
+
# We add *all* found results to state
|
| 37 |
+
new_count = state.add_evidence(results.evidence)
|
| 38 |
+
logger.info(
|
| 39 |
+
"Web search complete",
|
| 40 |
+
query=query,
|
| 41 |
+
results_found=len(results.evidence),
|
| 42 |
+
new_evidence=new_count,
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
# Use embedding service for deduplication/indexing if available
|
| 46 |
+
if state.embedding_service:
|
| 47 |
+
# This method also adds to vector DB as a side effect for unique items
|
| 48 |
+
await state.embedding_service.deduplicate(results.evidence)
|
| 49 |
+
|
| 50 |
+
output = [f"Found {len(results.evidence)} web results ({new_count} new stored):\n"]
|
| 51 |
+
for i, r in enumerate(results.evidence[:max_results], 1):
|
| 52 |
+
output.append(f"{i}. **{r.citation.title}**")
|
| 53 |
+
output.append(f" Source: {r.citation.url}")
|
| 54 |
+
output.append(f" {r.content[:300]}...\n")
|
| 55 |
+
|
| 56 |
+
return "\n".join(output)
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def create_retrieval_agent(chat_client: OpenAIChatClient | None = None) -> ChatAgent:
|
| 60 |
+
"""Create a retrieval agent.
|
| 61 |
+
|
| 62 |
+
Args:
|
| 63 |
+
chat_client: Optional custom chat client.
|
| 64 |
+
|
| 65 |
+
Returns:
|
| 66 |
+
ChatAgent configured for retrieval.
|
| 67 |
+
"""
|
| 68 |
+
client = chat_client or OpenAIChatClient(
|
| 69 |
+
model_id=settings.openai_model,
|
| 70 |
+
api_key=settings.openai_api_key,
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
return ChatAgent(
|
| 74 |
+
name="RetrievalAgent",
|
| 75 |
+
description="Searches the web and manages context/evidence.",
|
| 76 |
+
instructions="""You are a retrieval specialist.
|
| 77 |
+
Use `search_web` to find information on the internet.
|
| 78 |
+
Your goal is to gather relevant evidence for the research task.
|
| 79 |
+
Always summarize what you found.""",
|
| 80 |
+
chat_client=client,
|
| 81 |
+
tools=[search_web],
|
| 82 |
+
)
|
src/app.py
CHANGED
|
@@ -31,7 +31,7 @@ def configure_orchestrator(
|
|
| 31 |
|
| 32 |
Args:
|
| 33 |
use_mock: If True, use MockJudgeHandler (no API key needed)
|
| 34 |
-
mode: Orchestrator mode ("simple" or "
|
| 35 |
user_api_key: Optional user-provided API key (BYOK)
|
| 36 |
api_provider: API provider ("openai" or "anthropic")
|
| 37 |
|
|
@@ -115,7 +115,7 @@ async def research_agent(
|
|
| 115 |
Args:
|
| 116 |
message: User's research question
|
| 117 |
history: Chat history (Gradio format)
|
| 118 |
-
mode: Orchestrator mode ("simple" or "
|
| 119 |
api_key: Optional user-provided API key (BYOK - Bring Your Own Key)
|
| 120 |
api_provider: API provider ("openai" or "anthropic")
|
| 121 |
|
|
@@ -135,10 +135,11 @@ async def research_agent(
|
|
| 135 |
has_user_key = bool(user_api_key)
|
| 136 |
has_paid_key = has_openai or has_anthropic or has_user_key
|
| 137 |
|
| 138 |
-
#
|
| 139 |
-
if mode == "
|
| 140 |
yield (
|
| 141 |
-
"⚠️ **Warning**:
|
|
|
|
| 142 |
)
|
| 143 |
mode = "simple"
|
| 144 |
|
|
@@ -186,78 +187,68 @@ async def research_agent(
|
|
| 186 |
yield f"❌ **Error**: {e!s}"
|
| 187 |
|
| 188 |
|
| 189 |
-
def create_demo() ->
|
| 190 |
"""
|
| 191 |
Create the Gradio demo interface with MCP support.
|
| 192 |
|
| 193 |
Returns:
|
| 194 |
Configured Gradio Blocks interface with MCP server enabled
|
| 195 |
"""
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
"
|
| 202 |
-
"
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
"",
|
| 214 |
-
"openai",
|
| 215 |
-
],
|
| 216 |
-
[
|
| 217 |
-
"Is metformin effective for treating cancer?",
|
| 218 |
-
"simple",
|
| 219 |
-
"",
|
| 220 |
-
"openai",
|
| 221 |
-
],
|
| 222 |
-
[
|
| 223 |
-
"What medications show promise for Long COVID treatment?",
|
| 224 |
-
"simple",
|
| 225 |
-
"",
|
| 226 |
-
"openai",
|
| 227 |
-
],
|
| 228 |
],
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
label="Orchestrator Mode",
|
| 235 |
-
info="Simple: Linear | Magentic: Multi-Agent (OpenAI)",
|
| 236 |
-
),
|
| 237 |
-
gr.Textbox(
|
| 238 |
-
label="🔑 API Key (Optional - BYOK)",
|
| 239 |
-
placeholder="sk-... or sk-ant-...",
|
| 240 |
-
type="password",
|
| 241 |
-
info="Enter your own API key. Never stored.",
|
| 242 |
-
),
|
| 243 |
-
gr.Radio(
|
| 244 |
-
choices=["openai", "anthropic"],
|
| 245 |
-
value="openai",
|
| 246 |
-
label="API Provider",
|
| 247 |
-
info="Select the provider for your API key",
|
| 248 |
-
),
|
| 249 |
],
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 261 |
|
| 262 |
return demo
|
| 263 |
|
|
|
|
| 31 |
|
| 32 |
Args:
|
| 33 |
use_mock: If True, use MockJudgeHandler (no API key needed)
|
| 34 |
+
mode: Orchestrator mode ("simple" or "advanced")
|
| 35 |
user_api_key: Optional user-provided API key (BYOK)
|
| 36 |
api_provider: API provider ("openai" or "anthropic")
|
| 37 |
|
|
|
|
| 115 |
Args:
|
| 116 |
message: User's research question
|
| 117 |
history: Chat history (Gradio format)
|
| 118 |
+
mode: Orchestrator mode ("simple" or "advanced")
|
| 119 |
api_key: Optional user-provided API key (BYOK - Bring Your Own Key)
|
| 120 |
api_provider: API provider ("openai" or "anthropic")
|
| 121 |
|
|
|
|
| 135 |
has_user_key = bool(user_api_key)
|
| 136 |
has_paid_key = has_openai or has_anthropic or has_user_key
|
| 137 |
|
| 138 |
+
# Advanced mode requires OpenAI specifically (due to agent-framework binding)
|
| 139 |
+
if mode == "advanced" and not (has_openai or (has_user_key and api_provider == "openai")):
|
| 140 |
yield (
|
| 141 |
+
"⚠️ **Warning**: Advanced mode currently requires OpenAI API key. "
|
| 142 |
+
"Falling back to simple mode.\n\n"
|
| 143 |
)
|
| 144 |
mode = "simple"
|
| 145 |
|
|
|
|
| 187 |
yield f"❌ **Error**: {e!s}"
|
| 188 |
|
| 189 |
|
| 190 |
+
def create_demo() -> gr.ChatInterface:
|
| 191 |
"""
|
| 192 |
Create the Gradio demo interface with MCP support.
|
| 193 |
|
| 194 |
Returns:
|
| 195 |
Configured Gradio Blocks interface with MCP server enabled
|
| 196 |
"""
|
| 197 |
+
# 1. Unwrapped ChatInterface (Fixes Accordion Bug)
|
| 198 |
+
demo = gr.ChatInterface(
|
| 199 |
+
fn=research_agent,
|
| 200 |
+
title="🧬 DeepCritical",
|
| 201 |
+
description=(
|
| 202 |
+
"*AI-Powered Drug Repurposing Agent — searches PubMed, "
|
| 203 |
+
"ClinicalTrials.gov & Europe PMC*\n\n"
|
| 204 |
+
"---\n"
|
| 205 |
+
"*Research tool only — not for medical advice.* \n"
|
| 206 |
+
"**MCP Server Active**: Connect Claude Desktop to `/gradio_api/mcp/`"
|
| 207 |
+
),
|
| 208 |
+
examples=[
|
| 209 |
+
[
|
| 210 |
+
"What drugs could be repurposed for Alzheimer's disease?",
|
| 211 |
+
"simple",
|
| 212 |
+
"",
|
| 213 |
+
"openai",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
],
|
| 215 |
+
[
|
| 216 |
+
"Is metformin effective for treating cancer?",
|
| 217 |
+
"simple",
|
| 218 |
+
"",
|
| 219 |
+
"openai",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 220 |
],
|
| 221 |
+
[
|
| 222 |
+
"What medications show promise for Long COVID treatment?",
|
| 223 |
+
"simple",
|
| 224 |
+
"",
|
| 225 |
+
"openai",
|
| 226 |
+
],
|
| 227 |
+
],
|
| 228 |
+
additional_inputs_accordion=gr.Accordion(label="⚙️ Settings", open=False),
|
| 229 |
+
additional_inputs=[
|
| 230 |
+
gr.Radio(
|
| 231 |
+
choices=["simple", "advanced"],
|
| 232 |
+
value="simple",
|
| 233 |
+
label="Orchestrator Mode",
|
| 234 |
+
info=(
|
| 235 |
+
"Simple: Linear (Free Tier Friendly) | Advanced: Multi-Agent (Requires OpenAI)"
|
| 236 |
+
),
|
| 237 |
+
),
|
| 238 |
+
gr.Textbox(
|
| 239 |
+
label="🔑 API Key (Optional - BYOK)",
|
| 240 |
+
placeholder="sk-... or sk-ant-...",
|
| 241 |
+
type="password",
|
| 242 |
+
info="Enter your own API key. Never stored.",
|
| 243 |
+
),
|
| 244 |
+
gr.Radio(
|
| 245 |
+
choices=["openai", "anthropic"],
|
| 246 |
+
value="openai",
|
| 247 |
+
label="API Provider",
|
| 248 |
+
info="Select the provider for your API key",
|
| 249 |
+
),
|
| 250 |
+
],
|
| 251 |
+
)
|
| 252 |
|
| 253 |
return demo
|
| 254 |
|
src/middleware/sub_iteration.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Middleware for orchestrating sub-iterations with research teams and judges."""
|
| 2 |
+
|
| 3 |
+
from typing import Any, Protocol
|
| 4 |
+
|
| 5 |
+
import structlog
|
| 6 |
+
|
| 7 |
+
from src.utils.models import AgentEvent, JudgeAssessment
|
| 8 |
+
|
| 9 |
+
logger = structlog.get_logger()
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class SubIterationTeam(Protocol):
|
| 13 |
+
"""Protocol for a research team that executes a sub-task."""
|
| 14 |
+
|
| 15 |
+
async def execute(self, task: str) -> Any:
|
| 16 |
+
"""Execute the sub-task and return a result."""
|
| 17 |
+
...
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class SubIterationJudge(Protocol):
|
| 21 |
+
"""Protocol for a judge that evaluates the sub-task result."""
|
| 22 |
+
|
| 23 |
+
async def assess(self, task: str, result: Any, history: list[Any]) -> JudgeAssessment:
|
| 24 |
+
"""Assess the quality of the result."""
|
| 25 |
+
...
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class SubIterationMiddleware:
|
| 29 |
+
"""
|
| 30 |
+
Middleware that manages a sub-iteration loop:
|
| 31 |
+
1. Orchestrator delegates to a Research Team.
|
| 32 |
+
2. Research Team produces a result.
|
| 33 |
+
3. Judge evaluates the result.
|
| 34 |
+
4. Loop continues until Judge approves or max iterations reached.
|
| 35 |
+
"""
|
| 36 |
+
|
| 37 |
+
def __init__(
|
| 38 |
+
self,
|
| 39 |
+
team: SubIterationTeam,
|
| 40 |
+
judge: SubIterationJudge,
|
| 41 |
+
max_iterations: int = 3,
|
| 42 |
+
):
|
| 43 |
+
self.team = team
|
| 44 |
+
self.judge = judge
|
| 45 |
+
self.max_iterations = max_iterations
|
| 46 |
+
|
| 47 |
+
async def run(
|
| 48 |
+
self,
|
| 49 |
+
task: str,
|
| 50 |
+
event_callback: Any = None, # Optional callback for streaming events
|
| 51 |
+
) -> tuple[Any, JudgeAssessment | None]:
|
| 52 |
+
"""
|
| 53 |
+
Run the sub-iteration loop.
|
| 54 |
+
|
| 55 |
+
Args:
|
| 56 |
+
task: The research task or question.
|
| 57 |
+
event_callback: Async callable to report events (e.g. to UI).
|
| 58 |
+
|
| 59 |
+
Returns:
|
| 60 |
+
Tuple of (best_result, final_assessment).
|
| 61 |
+
"""
|
| 62 |
+
history: list[Any] = []
|
| 63 |
+
best_result: Any = None
|
| 64 |
+
final_assessment: JudgeAssessment | None = None
|
| 65 |
+
|
| 66 |
+
for i in range(1, self.max_iterations + 1):
|
| 67 |
+
logger.info("Sub-iteration starting", iteration=i, task=task)
|
| 68 |
+
|
| 69 |
+
if event_callback:
|
| 70 |
+
await event_callback(
|
| 71 |
+
AgentEvent(
|
| 72 |
+
type="looping",
|
| 73 |
+
message=f"Sub-iteration {i}: Executing task...",
|
| 74 |
+
iteration=i,
|
| 75 |
+
)
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
# 1. Team Execution
|
| 79 |
+
try:
|
| 80 |
+
result = await self.team.execute(task)
|
| 81 |
+
history.append(result)
|
| 82 |
+
best_result = result # Assume latest is best for now
|
| 83 |
+
except Exception as e:
|
| 84 |
+
logger.error("Sub-iteration execution failed", error=str(e))
|
| 85 |
+
if event_callback:
|
| 86 |
+
await event_callback(
|
| 87 |
+
AgentEvent(
|
| 88 |
+
type="error",
|
| 89 |
+
message=f"Sub-iteration execution failed: {e}",
|
| 90 |
+
iteration=i,
|
| 91 |
+
)
|
| 92 |
+
)
|
| 93 |
+
return best_result, final_assessment
|
| 94 |
+
|
| 95 |
+
# 2. Judge Assessment
|
| 96 |
+
try:
|
| 97 |
+
assessment = await self.judge.assess(task, result, history)
|
| 98 |
+
final_assessment = assessment
|
| 99 |
+
except Exception as e:
|
| 100 |
+
logger.error("Sub-iteration judge failed", error=str(e))
|
| 101 |
+
if event_callback:
|
| 102 |
+
await event_callback(
|
| 103 |
+
AgentEvent(
|
| 104 |
+
type="error",
|
| 105 |
+
message=f"Sub-iteration judge failed: {e}",
|
| 106 |
+
iteration=i,
|
| 107 |
+
)
|
| 108 |
+
)
|
| 109 |
+
return best_result, final_assessment
|
| 110 |
+
|
| 111 |
+
# 3. Decision
|
| 112 |
+
if assessment.sufficient:
|
| 113 |
+
logger.info("Sub-iteration sufficient", iteration=i)
|
| 114 |
+
return best_result, assessment
|
| 115 |
+
|
| 116 |
+
# If not sufficient, we might refine the task for the next iteration
|
| 117 |
+
# For this implementation, we assume the team is smart enough or the task stays same
|
| 118 |
+
# but we could append feedback to the task.
|
| 119 |
+
|
| 120 |
+
feedback = assessment.reasoning
|
| 121 |
+
logger.info("Sub-iteration insufficient", feedback=feedback)
|
| 122 |
+
|
| 123 |
+
if event_callback:
|
| 124 |
+
await event_callback(
|
| 125 |
+
AgentEvent(
|
| 126 |
+
type="looping",
|
| 127 |
+
message=(
|
| 128 |
+
f"Sub-iteration {i} result insufficient. Feedback: {feedback[:100]}..."
|
| 129 |
+
),
|
| 130 |
+
iteration=i,
|
| 131 |
+
)
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
logger.warning("Sub-iteration max iterations reached", task=task)
|
| 135 |
+
return best_result, final_assessment
|
src/orchestrator_factory.py
CHANGED
|
@@ -9,12 +9,29 @@ from src.legacy_orchestrator import (
|
|
| 9 |
)
|
| 10 |
from src.utils.models import OrchestratorConfig
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
def create_orchestrator(
|
| 14 |
search_handler: SearchHandlerProtocol | None = None,
|
| 15 |
judge_handler: JudgeHandlerProtocol | None = None,
|
| 16 |
config: OrchestratorConfig | None = None,
|
| 17 |
-
mode: Literal["simple", "magentic"] =
|
| 18 |
) -> Any:
|
| 19 |
"""
|
| 20 |
Create an orchestrator instance.
|
|
@@ -23,25 +40,19 @@ def create_orchestrator(
|
|
| 23 |
search_handler: The search handler (required for simple mode)
|
| 24 |
judge_handler: The judge handler (required for simple mode)
|
| 25 |
config: Optional configuration
|
| 26 |
-
mode: "simple"
|
| 27 |
|
| 28 |
Returns:
|
| 29 |
Orchestrator instance
|
| 30 |
-
|
| 31 |
-
Note:
|
| 32 |
-
Magentic mode does NOT use search_handler/judge_handler.
|
| 33 |
-
It creates ChatAgent instances with internal LLMs that call tools directly.
|
| 34 |
"""
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
from src.orchestrator_magentic import MagenticOrchestrator
|
| 38 |
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
pass
|
| 45 |
|
| 46 |
# Simple mode requires handlers
|
| 47 |
if search_handler is None or judge_handler is None:
|
|
@@ -52,3 +63,17 @@ def create_orchestrator(
|
|
| 52 |
judge_handler=judge_handler,
|
| 53 |
config=config,
|
| 54 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
)
|
| 10 |
from src.utils.models import OrchestratorConfig
|
| 11 |
|
| 12 |
+
import structlog
|
| 13 |
+
|
| 14 |
+
logger = structlog.get_logger()
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def _get_magentic_orchestrator_class() -> Any:
|
| 18 |
+
"""Import MagenticOrchestrator lazily to avoid hard dependency."""
|
| 19 |
+
try:
|
| 20 |
+
from src.orchestrator_magentic import MagenticOrchestrator
|
| 21 |
+
|
| 22 |
+
return MagenticOrchestrator
|
| 23 |
+
except ImportError as e:
|
| 24 |
+
logger.error("Failed to import MagenticOrchestrator", error=str(e))
|
| 25 |
+
raise ValueError(
|
| 26 |
+
"Advanced mode requires agent-framework-core. Please install it or use mode='simple'."
|
| 27 |
+
) from e
|
| 28 |
+
|
| 29 |
|
| 30 |
def create_orchestrator(
|
| 31 |
search_handler: SearchHandlerProtocol | None = None,
|
| 32 |
judge_handler: JudgeHandlerProtocol | None = None,
|
| 33 |
config: OrchestratorConfig | None = None,
|
| 34 |
+
mode: Literal["simple", "magentic", "advanced"] | None = None,
|
| 35 |
) -> Any:
|
| 36 |
"""
|
| 37 |
Create an orchestrator instance.
|
|
|
|
| 40 |
search_handler: The search handler (required for simple mode)
|
| 41 |
judge_handler: The judge handler (required for simple mode)
|
| 42 |
config: Optional configuration
|
| 43 |
+
mode: "simple", "magentic", "advanced" or None (auto-detect)
|
| 44 |
|
| 45 |
Returns:
|
| 46 |
Orchestrator instance
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
"""
|
| 48 |
+
effective_mode = _determine_mode(mode)
|
| 49 |
+
logger.info("Creating orchestrator", mode=effective_mode)
|
|
|
|
| 50 |
|
| 51 |
+
if effective_mode == "advanced":
|
| 52 |
+
orchestrator_cls = _get_magentic_orchestrator_class()
|
| 53 |
+
return orchestrator_cls(
|
| 54 |
+
max_rounds=config.max_iterations if config else 10,
|
| 55 |
+
)
|
|
|
|
| 56 |
|
| 57 |
# Simple mode requires handlers
|
| 58 |
if search_handler is None or judge_handler is None:
|
|
|
|
| 63 |
judge_handler=judge_handler,
|
| 64 |
config=config,
|
| 65 |
)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def _determine_mode(explicit_mode: str | None) -> str:
|
| 69 |
+
"""Determine which mode to use."""
|
| 70 |
+
if explicit_mode:
|
| 71 |
+
if explicit_mode in ("magentic", "advanced"):
|
| 72 |
+
return "advanced"
|
| 73 |
+
return "simple"
|
| 74 |
+
|
| 75 |
+
# Auto-detect: advanced if paid API key available
|
| 76 |
+
if settings.has_openai_key:
|
| 77 |
+
return "advanced"
|
| 78 |
+
|
| 79 |
+
return "simple"
|
src/orchestrator_hierarchical.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Hierarchical orchestrator using middleware and sub-teams."""
|
| 2 |
+
|
| 3 |
+
import asyncio
|
| 4 |
+
from collections.abc import AsyncGenerator
|
| 5 |
+
|
| 6 |
+
import structlog
|
| 7 |
+
|
| 8 |
+
from src.agents.judge_agent_llm import LLMSubIterationJudge
|
| 9 |
+
from src.agents.magentic_agents import create_search_agent
|
| 10 |
+
from src.middleware.sub_iteration import SubIterationMiddleware, SubIterationTeam
|
| 11 |
+
from src.services.embeddings import get_embedding_service
|
| 12 |
+
from src.state import init_magentic_state
|
| 13 |
+
from src.utils.models import AgentEvent
|
| 14 |
+
|
| 15 |
+
logger = structlog.get_logger()
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class ResearchTeam(SubIterationTeam):
|
| 19 |
+
"""Adapts Magentic ChatAgent to SubIterationTeam protocol."""
|
| 20 |
+
|
| 21 |
+
def __init__(self) -> None:
|
| 22 |
+
self.agent = create_search_agent()
|
| 23 |
+
|
| 24 |
+
async def execute(self, task: str) -> str:
|
| 25 |
+
response = await self.agent.run(task)
|
| 26 |
+
if response.messages:
|
| 27 |
+
for msg in reversed(response.messages):
|
| 28 |
+
if msg.role == "assistant" and msg.text:
|
| 29 |
+
return str(msg.text)
|
| 30 |
+
return "No response from agent."
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class HierarchicalOrchestrator:
|
| 34 |
+
"""Orchestrator that uses hierarchical teams and sub-iterations."""
|
| 35 |
+
|
| 36 |
+
def __init__(self) -> None:
|
| 37 |
+
self.team = ResearchTeam()
|
| 38 |
+
self.judge = LLMSubIterationJudge()
|
| 39 |
+
self.middleware = SubIterationMiddleware(self.team, self.judge, max_iterations=5)
|
| 40 |
+
|
| 41 |
+
async def run(self, query: str) -> AsyncGenerator[AgentEvent, None]:
|
| 42 |
+
logger.info("Starting hierarchical orchestrator", query=query)
|
| 43 |
+
|
| 44 |
+
try:
|
| 45 |
+
service = get_embedding_service()
|
| 46 |
+
init_magentic_state(service)
|
| 47 |
+
except Exception as e:
|
| 48 |
+
logger.warning(
|
| 49 |
+
"Embedding service initialization failed, using default state",
|
| 50 |
+
error=str(e),
|
| 51 |
+
)
|
| 52 |
+
init_magentic_state()
|
| 53 |
+
|
| 54 |
+
yield AgentEvent(type="started", message=f"Starting research: {query}")
|
| 55 |
+
|
| 56 |
+
queue: asyncio.Queue[AgentEvent | None] = asyncio.Queue()
|
| 57 |
+
|
| 58 |
+
async def event_callback(event: AgentEvent) -> None:
|
| 59 |
+
await queue.put(event)
|
| 60 |
+
|
| 61 |
+
task_future = asyncio.create_task(self.middleware.run(query, event_callback))
|
| 62 |
+
|
| 63 |
+
while not task_future.done():
|
| 64 |
+
get_event = asyncio.create_task(queue.get())
|
| 65 |
+
done, _ = await asyncio.wait(
|
| 66 |
+
{task_future, get_event}, return_when=asyncio.FIRST_COMPLETED
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
if get_event in done:
|
| 70 |
+
event = get_event.result()
|
| 71 |
+
if event:
|
| 72 |
+
yield event
|
| 73 |
+
else:
|
| 74 |
+
get_event.cancel()
|
| 75 |
+
|
| 76 |
+
# Process remaining events
|
| 77 |
+
while not queue.empty():
|
| 78 |
+
ev = queue.get_nowait()
|
| 79 |
+
if ev:
|
| 80 |
+
yield ev
|
| 81 |
+
|
| 82 |
+
try:
|
| 83 |
+
result, assessment = await task_future
|
| 84 |
+
|
| 85 |
+
assessment_text = assessment.reasoning if assessment else "None"
|
| 86 |
+
yield AgentEvent(
|
| 87 |
+
type="complete",
|
| 88 |
+
message=(
|
| 89 |
+
f"Research complete.\n\nResult:\n{result}\n\nAssessment:\n{assessment_text}"
|
| 90 |
+
),
|
| 91 |
+
data={"assessment": assessment.model_dump() if assessment else None},
|
| 92 |
+
)
|
| 93 |
+
except Exception as e:
|
| 94 |
+
logger.error("Orchestrator failed", error=str(e))
|
| 95 |
+
yield AgentEvent(type="error", message=f"Orchestrator failed: {e}")
|
src/orchestrator_magentic.py
CHANGED
|
@@ -128,7 +128,7 @@ class MagenticOrchestrator:
|
|
| 128 |
task = f"""Research drug repurposing opportunities for: {query}
|
| 129 |
|
| 130 |
Workflow:
|
| 131 |
-
1. SearchAgent: Find evidence from PubMed, ClinicalTrials.gov, and
|
| 132 |
2. HypothesisAgent: Generate mechanistic hypotheses (Drug -> Target -> Pathway -> Effect)
|
| 133 |
3. JudgeAgent: Evaluate if evidence is sufficient
|
| 134 |
4. If insufficient -> SearchAgent refines search based on gaps
|
|
@@ -158,10 +158,41 @@ The final output should be a structured research report."""
|
|
| 158 |
iteration=iteration,
|
| 159 |
)
|
| 160 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
def _process_event(self, event: Any, iteration: int) -> AgentEvent | None:
|
| 162 |
"""Process workflow event into AgentEvent."""
|
| 163 |
if isinstance(event, MagenticOrchestratorMessageEvent):
|
| 164 |
-
text =
|
| 165 |
if text:
|
| 166 |
return AgentEvent(
|
| 167 |
type="judging",
|
|
@@ -171,7 +202,7 @@ The final output should be a structured research report."""
|
|
| 171 |
|
| 172 |
elif isinstance(event, MagenticAgentMessageEvent):
|
| 173 |
agent_name = event.agent_id or "unknown"
|
| 174 |
-
text =
|
| 175 |
|
| 176 |
event_type = "judging"
|
| 177 |
if "search" in agent_name.lower():
|
|
@@ -190,7 +221,7 @@ The final output should be a structured research report."""
|
|
| 190 |
)
|
| 191 |
|
| 192 |
elif isinstance(event, MagenticFinalResultEvent):
|
| 193 |
-
text = event.message
|
| 194 |
return AgentEvent(
|
| 195 |
type="complete",
|
| 196 |
message=text,
|
|
|
|
| 128 |
task = f"""Research drug repurposing opportunities for: {query}
|
| 129 |
|
| 130 |
Workflow:
|
| 131 |
+
1. SearchAgent: Find evidence from PubMed, ClinicalTrials.gov, and Europe PMC
|
| 132 |
2. HypothesisAgent: Generate mechanistic hypotheses (Drug -> Target -> Pathway -> Effect)
|
| 133 |
3. JudgeAgent: Evaluate if evidence is sufficient
|
| 134 |
4. If insufficient -> SearchAgent refines search based on gaps
|
|
|
|
| 158 |
iteration=iteration,
|
| 159 |
)
|
| 160 |
|
| 161 |
+
def _extract_text(self, message: Any) -> str:
|
| 162 |
+
"""
|
| 163 |
+
Defensively extract text from a message object.
|
| 164 |
+
|
| 165 |
+
Fixes bug where message.text might return the object itself or its repr.
|
| 166 |
+
"""
|
| 167 |
+
if not message:
|
| 168 |
+
return ""
|
| 169 |
+
|
| 170 |
+
# Priority 1: .content (often the raw string or list of content)
|
| 171 |
+
if hasattr(message, "content") and message.content:
|
| 172 |
+
content = message.content
|
| 173 |
+
# If it's a list (e.g., Multi-modal), join text parts
|
| 174 |
+
if isinstance(content, list):
|
| 175 |
+
return " ".join([str(c.text) for c in content if hasattr(c, "text")])
|
| 176 |
+
return str(content)
|
| 177 |
+
|
| 178 |
+
# Priority 2: .text (standard, but sometimes buggy/missing)
|
| 179 |
+
if hasattr(message, "text") and message.text:
|
| 180 |
+
# Verify it's not the object itself or a repr string
|
| 181 |
+
text = str(message.text)
|
| 182 |
+
if text.startswith("<") and "object at" in text:
|
| 183 |
+
# Likely a repr string, ignore if possible
|
| 184 |
+
pass
|
| 185 |
+
else:
|
| 186 |
+
return text
|
| 187 |
+
|
| 188 |
+
# Fallback: If we can't find clean text, return str(message)
|
| 189 |
+
# taking care to avoid infinite recursion if str() calls .text
|
| 190 |
+
return str(message)
|
| 191 |
+
|
| 192 |
def _process_event(self, event: Any, iteration: int) -> AgentEvent | None:
|
| 193 |
"""Process workflow event into AgentEvent."""
|
| 194 |
if isinstance(event, MagenticOrchestratorMessageEvent):
|
| 195 |
+
text = self._extract_text(event.message)
|
| 196 |
if text:
|
| 197 |
return AgentEvent(
|
| 198 |
type="judging",
|
|
|
|
| 202 |
|
| 203 |
elif isinstance(event, MagenticAgentMessageEvent):
|
| 204 |
agent_name = event.agent_id or "unknown"
|
| 205 |
+
text = self._extract_text(event.message)
|
| 206 |
|
| 207 |
event_type = "judging"
|
| 208 |
if "search" in agent_name.lower():
|
|
|
|
| 221 |
)
|
| 222 |
|
| 223 |
elif isinstance(event, MagenticFinalResultEvent):
|
| 224 |
+
text = self._extract_text(event.message) if event.message else "No result"
|
| 225 |
return AgentEvent(
|
| 226 |
type="complete",
|
| 227 |
message=text,
|
src/state/__init__.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""State package - re-exports from agents.state for compatibility."""
|
| 2 |
+
|
| 3 |
+
from src.agents.state import (
|
| 4 |
+
MagenticState,
|
| 5 |
+
get_magentic_state,
|
| 6 |
+
init_magentic_state,
|
| 7 |
+
)
|
| 8 |
+
|
| 9 |
+
__all__ = ["MagenticState", "get_magentic_state", "init_magentic_state"]
|
src/tools/__init__.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
| 1 |
"""Search tools package."""
|
| 2 |
|
| 3 |
from src.tools.base import SearchTool
|
|
|
|
|
|
|
| 4 |
from src.tools.pubmed import PubMedTool
|
| 5 |
from src.tools.rag_tool import RAGTool, create_rag_tool
|
| 6 |
from src.tools.search_handler import SearchHandler
|
|
|
|
| 1 |
"""Search tools package."""
|
| 2 |
|
| 3 |
from src.tools.base import SearchTool
|
| 4 |
+
from src.tools.clinicaltrials import ClinicalTrialsTool
|
| 5 |
+
from src.tools.europepmc import EuropePMCTool
|
| 6 |
from src.tools.pubmed import PubMedTool
|
| 7 |
from src.tools.rag_tool import RAGTool, create_rag_tool
|
| 8 |
from src.tools.search_handler import SearchHandler
|
src/tools/pubmed.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
"""PubMed search tool using NCBI E-utilities."""
|
| 2 |
|
| 3 |
-
import asyncio
|
| 4 |
from typing import Any
|
| 5 |
|
| 6 |
import httpx
|
|
@@ -8,6 +7,7 @@ import xmltodict
|
|
| 8 |
from tenacity import retry, stop_after_attempt, wait_exponential
|
| 9 |
|
| 10 |
from src.tools.query_utils import preprocess_query
|
|
|
|
| 11 |
from src.utils.config import settings
|
| 12 |
from src.utils.exceptions import RateLimitError, SearchError
|
| 13 |
from src.utils.models import Citation, Evidence
|
|
@@ -17,7 +17,6 @@ class PubMedTool:
|
|
| 17 |
"""Search tool for PubMed/NCBI."""
|
| 18 |
|
| 19 |
BASE_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
|
| 20 |
-
RATE_LIMIT_DELAY = 0.34 # ~3 requests/sec without API key
|
| 21 |
HTTP_TOO_MANY_REQUESTS = 429
|
| 22 |
|
| 23 |
def __init__(self, api_key: str | None = None) -> None:
|
|
@@ -25,7 +24,9 @@ class PubMedTool:
|
|
| 25 |
# Ignore placeholder values from .env.example
|
| 26 |
if self.api_key == "your-ncbi-key-here":
|
| 27 |
self.api_key = None
|
| 28 |
-
|
|
|
|
|
|
|
| 29 |
|
| 30 |
@property
|
| 31 |
def name(self) -> str:
|
|
@@ -33,12 +34,7 @@ class PubMedTool:
|
|
| 33 |
|
| 34 |
async def _rate_limit(self) -> None:
|
| 35 |
"""Enforce NCBI rate limiting."""
|
| 36 |
-
|
| 37 |
-
now = loop.time()
|
| 38 |
-
elapsed = now - self._last_request_time
|
| 39 |
-
if elapsed < self.RATE_LIMIT_DELAY:
|
| 40 |
-
await asyncio.sleep(self.RATE_LIMIT_DELAY - elapsed)
|
| 41 |
-
self._last_request_time = loop.time()
|
| 42 |
|
| 43 |
def _build_params(self, **kwargs: Any) -> dict[str, Any]:
|
| 44 |
"""Build request params with optional API key."""
|
|
|
|
| 1 |
"""PubMed search tool using NCBI E-utilities."""
|
| 2 |
|
|
|
|
| 3 |
from typing import Any
|
| 4 |
|
| 5 |
import httpx
|
|
|
|
| 7 |
from tenacity import retry, stop_after_attempt, wait_exponential
|
| 8 |
|
| 9 |
from src.tools.query_utils import preprocess_query
|
| 10 |
+
from src.tools.rate_limiter import get_pubmed_limiter
|
| 11 |
from src.utils.config import settings
|
| 12 |
from src.utils.exceptions import RateLimitError, SearchError
|
| 13 |
from src.utils.models import Citation, Evidence
|
|
|
|
| 17 |
"""Search tool for PubMed/NCBI."""
|
| 18 |
|
| 19 |
BASE_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
|
|
|
|
| 20 |
HTTP_TOO_MANY_REQUESTS = 429
|
| 21 |
|
| 22 |
def __init__(self, api_key: str | None = None) -> None:
|
|
|
|
| 24 |
# Ignore placeholder values from .env.example
|
| 25 |
if self.api_key == "your-ncbi-key-here":
|
| 26 |
self.api_key = None
|
| 27 |
+
|
| 28 |
+
# Use shared rate limiter
|
| 29 |
+
self._limiter = get_pubmed_limiter(self.api_key)
|
| 30 |
|
| 31 |
@property
|
| 32 |
def name(self) -> str:
|
|
|
|
| 34 |
|
| 35 |
async def _rate_limit(self) -> None:
|
| 36 |
"""Enforce NCBI rate limiting."""
|
| 37 |
+
await self._limiter.acquire()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
def _build_params(self, **kwargs: Any) -> dict[str, Any]:
|
| 40 |
"""Build request params with optional API key."""
|
src/tools/rate_limiter.py
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Rate limiting utilities using the limits library."""
|
| 2 |
+
|
| 3 |
+
import asyncio
|
| 4 |
+
from typing import ClassVar
|
| 5 |
+
|
| 6 |
+
from limits import RateLimitItem, parse
|
| 7 |
+
from limits.storage import MemoryStorage
|
| 8 |
+
from limits.strategies import MovingWindowRateLimiter
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class RateLimiter:
|
| 12 |
+
"""
|
| 13 |
+
Async-compatible rate limiter using limits library.
|
| 14 |
+
|
| 15 |
+
Uses moving window algorithm for smooth rate limiting.
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
def __init__(self, rate: str) -> None:
|
| 19 |
+
"""
|
| 20 |
+
Initialize rate limiter.
|
| 21 |
+
|
| 22 |
+
Args:
|
| 23 |
+
rate: Rate string like "3/second" or "10/second"
|
| 24 |
+
"""
|
| 25 |
+
self.rate = rate
|
| 26 |
+
self._storage = MemoryStorage()
|
| 27 |
+
self._limiter = MovingWindowRateLimiter(self._storage)
|
| 28 |
+
self._rate_limit: RateLimitItem = parse(rate)
|
| 29 |
+
self._identity = "default" # Single identity for shared limiting
|
| 30 |
+
|
| 31 |
+
async def acquire(self, wait: bool = True) -> bool:
|
| 32 |
+
"""
|
| 33 |
+
Acquire permission to make a request.
|
| 34 |
+
|
| 35 |
+
ASYNC-SAFE: Uses asyncio.sleep(), never time.sleep().
|
| 36 |
+
The polling pattern allows other coroutines to run while waiting.
|
| 37 |
+
|
| 38 |
+
Args:
|
| 39 |
+
wait: If True, wait until allowed. If False, return immediately.
|
| 40 |
+
|
| 41 |
+
Returns:
|
| 42 |
+
True if allowed, False if not (only when wait=False)
|
| 43 |
+
"""
|
| 44 |
+
while True:
|
| 45 |
+
# Check if we can proceed (synchronous, fast - ~microseconds)
|
| 46 |
+
if self._limiter.hit(self._rate_limit, self._identity):
|
| 47 |
+
return True
|
| 48 |
+
|
| 49 |
+
if not wait:
|
| 50 |
+
return False
|
| 51 |
+
|
| 52 |
+
# CRITICAL: Use asyncio.sleep(), NOT time.sleep()
|
| 53 |
+
# This yields control to the event loop, allowing other
|
| 54 |
+
# coroutines (UI, parallel searches) to run.
|
| 55 |
+
# Using 0.01s for fine-grained responsiveness.
|
| 56 |
+
await asyncio.sleep(0.01)
|
| 57 |
+
|
| 58 |
+
def reset(self) -> None:
|
| 59 |
+
"""Reset the rate limiter (for testing)."""
|
| 60 |
+
self._storage.reset()
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
# Singleton limiter for PubMed/NCBI
|
| 64 |
+
_pubmed_limiter: RateLimiter | None = None
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def get_pubmed_limiter(api_key: str | None = None) -> RateLimiter:
|
| 68 |
+
"""
|
| 69 |
+
Get the shared PubMed rate limiter.
|
| 70 |
+
|
| 71 |
+
Rate depends on whether API key is provided:
|
| 72 |
+
- Without key: 3 requests/second
|
| 73 |
+
- With key: 10 requests/second
|
| 74 |
+
|
| 75 |
+
Args:
|
| 76 |
+
api_key: NCBI API key (optional)
|
| 77 |
+
|
| 78 |
+
Returns:
|
| 79 |
+
Shared RateLimiter instance
|
| 80 |
+
"""
|
| 81 |
+
global _pubmed_limiter
|
| 82 |
+
|
| 83 |
+
if _pubmed_limiter is None:
|
| 84 |
+
rate = "10/second" if api_key else "3/second"
|
| 85 |
+
_pubmed_limiter = RateLimiter(rate)
|
| 86 |
+
|
| 87 |
+
return _pubmed_limiter
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def reset_pubmed_limiter() -> None:
|
| 91 |
+
"""Reset the PubMed limiter (for testing)."""
|
| 92 |
+
global _pubmed_limiter
|
| 93 |
+
_pubmed_limiter = None
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
# Factory for other APIs
|
| 97 |
+
class RateLimiterFactory:
|
| 98 |
+
"""Factory for creating/getting rate limiters for different APIs."""
|
| 99 |
+
|
| 100 |
+
_limiters: ClassVar[dict[str, RateLimiter]] = {}
|
| 101 |
+
|
| 102 |
+
@classmethod
|
| 103 |
+
def get(cls, api_name: str, rate: str) -> RateLimiter:
|
| 104 |
+
"""
|
| 105 |
+
Get or create a rate limiter for an API.
|
| 106 |
+
|
| 107 |
+
Args:
|
| 108 |
+
api_name: Unique identifier for the API
|
| 109 |
+
rate: Rate limit string (e.g., "10/second")
|
| 110 |
+
|
| 111 |
+
Returns:
|
| 112 |
+
RateLimiter instance (shared for same api_name)
|
| 113 |
+
"""
|
| 114 |
+
if api_name not in cls._limiters:
|
| 115 |
+
cls._limiters[api_name] = RateLimiter(rate)
|
| 116 |
+
return cls._limiters[api_name]
|
| 117 |
+
|
| 118 |
+
@classmethod
|
| 119 |
+
def reset_all(cls) -> None:
|
| 120 |
+
"""Reset all limiters (for testing)."""
|
| 121 |
+
cls._limiters.clear()
|
src/tools/web_search.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Web search tool using DuckDuckGo."""
|
| 2 |
+
|
| 3 |
+
import asyncio
|
| 4 |
+
|
| 5 |
+
import structlog
|
| 6 |
+
from duckduckgo_search import DDGS
|
| 7 |
+
|
| 8 |
+
from src.utils.models import Citation, Evidence, SearchResult
|
| 9 |
+
|
| 10 |
+
logger = structlog.get_logger()
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class WebSearchTool:
|
| 14 |
+
"""Tool for searching the web using DuckDuckGo."""
|
| 15 |
+
|
| 16 |
+
def __init__(self) -> None:
|
| 17 |
+
self._ddgs = DDGS()
|
| 18 |
+
|
| 19 |
+
async def search(self, query: str, max_results: int = 10) -> SearchResult:
|
| 20 |
+
"""Execute a web search."""
|
| 21 |
+
try:
|
| 22 |
+
loop = asyncio.get_running_loop()
|
| 23 |
+
|
| 24 |
+
def _do_search() -> list[dict[str, str]]:
|
| 25 |
+
# text() returns an iterator, need to list() it or iterate
|
| 26 |
+
return list(self._ddgs.text(query, max_results=max_results))
|
| 27 |
+
|
| 28 |
+
raw_results = await loop.run_in_executor(None, _do_search)
|
| 29 |
+
|
| 30 |
+
evidence = []
|
| 31 |
+
for r in raw_results:
|
| 32 |
+
ev = Evidence(
|
| 33 |
+
content=r.get("body", ""),
|
| 34 |
+
citation=Citation(
|
| 35 |
+
title=r.get("title", "No Title"),
|
| 36 |
+
url=r.get("href", ""),
|
| 37 |
+
source="web",
|
| 38 |
+
date="Unknown",
|
| 39 |
+
authors=[],
|
| 40 |
+
),
|
| 41 |
+
relevance=0.0,
|
| 42 |
+
)
|
| 43 |
+
evidence.append(ev)
|
| 44 |
+
|
| 45 |
+
return SearchResult(
|
| 46 |
+
query=query, evidence=evidence, sources_searched=["web"], total_found=len(evidence)
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
except Exception as e:
|
| 50 |
+
logger.error("Web search failed", error=str(e))
|
| 51 |
+
return SearchResult(
|
| 52 |
+
query=query, evidence=[], sources_searched=["web"], total_found=0, errors=[str(e)]
|
| 53 |
+
)
|
src/utils/config.py
CHANGED
|
@@ -23,13 +23,20 @@ class Settings(BaseSettings):
|
|
| 23 |
# LLM Configuration
|
| 24 |
openai_api_key: str | None = Field(default=None, description="OpenAI API key")
|
| 25 |
anthropic_api_key: str | None = Field(default=None, description="Anthropic API key")
|
| 26 |
-
llm_provider: Literal["openai", "anthropic"] = Field(
|
| 27 |
default="openai", description="Which LLM provider to use"
|
| 28 |
)
|
| 29 |
openai_model: str = Field(default="gpt-5.1", description="OpenAI model name")
|
| 30 |
anthropic_model: str = Field(
|
| 31 |
default="claude-sonnet-4-5-20250929", description="Anthropic model"
|
| 32 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
# Embedding Configuration
|
| 35 |
# Note: OpenAI embeddings require OPENAI_API_KEY (Anthropic has no embeddings API)
|
|
@@ -175,10 +182,15 @@ class Settings(BaseSettings):
|
|
| 175 |
"""Check if Anthropic API key is available."""
|
| 176 |
return bool(self.anthropic_api_key)
|
| 177 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
@property
|
| 179 |
def has_any_llm_key(self) -> bool:
|
| 180 |
"""Check if any LLM API key is available."""
|
| 181 |
-
return self.has_openai_key or self.has_anthropic_key
|
| 182 |
|
| 183 |
@property
|
| 184 |
def has_huggingface_key(self) -> bool:
|
|
|
|
| 23 |
# LLM Configuration
|
| 24 |
openai_api_key: str | None = Field(default=None, description="OpenAI API key")
|
| 25 |
anthropic_api_key: str | None = Field(default=None, description="Anthropic API key")
|
| 26 |
+
llm_provider: Literal["openai", "anthropic", "huggingface"] = Field(
|
| 27 |
default="openai", description="Which LLM provider to use"
|
| 28 |
)
|
| 29 |
openai_model: str = Field(default="gpt-5.1", description="OpenAI model name")
|
| 30 |
anthropic_model: str = Field(
|
| 31 |
default="claude-sonnet-4-5-20250929", description="Anthropic model"
|
| 32 |
)
|
| 33 |
+
# HuggingFace (free tier)
|
| 34 |
+
huggingface_model: str | None = Field(
|
| 35 |
+
default="meta-llama/Llama-3.1-70B-Instruct", description="HuggingFace model name"
|
| 36 |
+
)
|
| 37 |
+
hf_token: str | None = Field(
|
| 38 |
+
default=None, alias="HF_TOKEN", description="HuggingFace API token"
|
| 39 |
+
)
|
| 40 |
|
| 41 |
# Embedding Configuration
|
| 42 |
# Note: OpenAI embeddings require OPENAI_API_KEY (Anthropic has no embeddings API)
|
|
|
|
| 182 |
"""Check if Anthropic API key is available."""
|
| 183 |
return bool(self.anthropic_api_key)
|
| 184 |
|
| 185 |
+
@property
|
| 186 |
+
def has_huggingface_key(self) -> bool:
|
| 187 |
+
"""Check if HuggingFace token is available."""
|
| 188 |
+
return bool(self.hf_token)
|
| 189 |
+
|
| 190 |
@property
|
| 191 |
def has_any_llm_key(self) -> bool:
|
| 192 |
"""Check if any LLM API key is available."""
|
| 193 |
+
return self.has_openai_key or self.has_anthropic_key or self.has_huggingface_key
|
| 194 |
|
| 195 |
@property
|
| 196 |
def has_huggingface_key(self) -> bool:
|
src/utils/models.py
CHANGED
|
@@ -36,6 +36,10 @@ class Evidence(BaseModel):
|
|
| 36 |
content: str = Field(min_length=1, description="The actual text content")
|
| 37 |
citation: Citation
|
| 38 |
relevance: float = Field(default=0.0, ge=0.0, le=1.0, description="Relevance score 0-1")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
model_config = {"frozen": True}
|
| 41 |
|
|
|
|
| 36 |
content: str = Field(min_length=1, description="The actual text content")
|
| 37 |
citation: Citation
|
| 38 |
relevance: float = Field(default=0.0, ge=0.0, le=1.0, description="Relevance score 0-1")
|
| 39 |
+
metadata: dict[str, Any] = Field(
|
| 40 |
+
default_factory=dict,
|
| 41 |
+
description="Additional metadata (e.g., cited_by_count, concepts, is_open_access)",
|
| 42 |
+
)
|
| 43 |
|
| 44 |
model_config = {"frozen": True}
|
| 45 |
|
tests/integration/test_dual_mode_e2e.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""End-to-End Integration Tests for Dual-Mode Architecture."""
|
| 2 |
+
|
| 3 |
+
from unittest.mock import AsyncMock, MagicMock, patch
|
| 4 |
+
|
| 5 |
+
import pytest
|
| 6 |
+
|
| 7 |
+
pytestmark = [pytest.mark.integration, pytest.mark.slow]
|
| 8 |
+
|
| 9 |
+
from src.orchestrator_factory import create_orchestrator
|
| 10 |
+
from src.utils.models import Citation, Evidence, OrchestratorConfig
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
@pytest.fixture
|
| 14 |
+
def mock_search_handler():
|
| 15 |
+
handler = MagicMock()
|
| 16 |
+
handler.execute = AsyncMock(
|
| 17 |
+
return_value=[
|
| 18 |
+
Evidence(
|
| 19 |
+
citation=Citation(
|
| 20 |
+
title="Test Paper", url="http://test", date="2024", source="pubmed"
|
| 21 |
+
),
|
| 22 |
+
content="Metformin increases lifespan in mice.",
|
| 23 |
+
)
|
| 24 |
+
]
|
| 25 |
+
)
|
| 26 |
+
return handler
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
@pytest.fixture
|
| 30 |
+
def mock_judge_handler():
|
| 31 |
+
handler = MagicMock()
|
| 32 |
+
# Mock return value of assess
|
| 33 |
+
assessment = MagicMock()
|
| 34 |
+
assessment.sufficient = True
|
| 35 |
+
assessment.recommendation = "synthesize"
|
| 36 |
+
handler.assess = AsyncMock(return_value=assessment)
|
| 37 |
+
return handler
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
@pytest.mark.asyncio
|
| 41 |
+
async def test_simple_mode_e2e(mock_search_handler, mock_judge_handler):
|
| 42 |
+
"""Test Simple Mode Orchestration flow."""
|
| 43 |
+
orch = create_orchestrator(
|
| 44 |
+
search_handler=mock_search_handler,
|
| 45 |
+
judge_handler=mock_judge_handler,
|
| 46 |
+
mode="simple",
|
| 47 |
+
config=OrchestratorConfig(max_iterations=1),
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
# Run
|
| 51 |
+
results = []
|
| 52 |
+
async for event in orch.run("Test query"):
|
| 53 |
+
results.append(event)
|
| 54 |
+
|
| 55 |
+
assert len(results) > 0
|
| 56 |
+
assert mock_search_handler.execute.called
|
| 57 |
+
assert mock_judge_handler.assess.called
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
@pytest.mark.asyncio
|
| 61 |
+
async def test_advanced_mode_explicit_instantiation():
|
| 62 |
+
"""Test explicit Advanced Mode instantiation (not auto-detect).
|
| 63 |
+
|
| 64 |
+
This tests the explicit mode="advanced" path, verifying that
|
| 65 |
+
MagenticOrchestrator can be instantiated when explicitly requested.
|
| 66 |
+
The settings patch ensures any internal checks pass.
|
| 67 |
+
"""
|
| 68 |
+
with patch("src.orchestrator_factory.settings") as mock_settings:
|
| 69 |
+
# Settings patch ensures factory checks pass (even though mode is explicit)
|
| 70 |
+
mock_settings.has_openai_key = True
|
| 71 |
+
|
| 72 |
+
with patch("src.agents.magentic_agents.OpenAIChatClient"):
|
| 73 |
+
# Mock agent creation to avoid real API calls during init
|
| 74 |
+
with (
|
| 75 |
+
patch("src.orchestrator_magentic.create_search_agent"),
|
| 76 |
+
patch("src.orchestrator_magentic.create_judge_agent"),
|
| 77 |
+
patch("src.orchestrator_magentic.create_hypothesis_agent"),
|
| 78 |
+
patch("src.orchestrator_magentic.create_report_agent"),
|
| 79 |
+
):
|
| 80 |
+
# Explicit mode="advanced" - tests the explicit path, not auto-detect
|
| 81 |
+
orch = create_orchestrator(mode="advanced")
|
| 82 |
+
assert orch is not None
|
tests/integration/test_modal.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
"""Integration tests for Modal (requires credentials)."""
|
| 2 |
|
| 3 |
import pytest
|
| 4 |
|
|
@@ -7,9 +7,18 @@ from src.utils.config import settings
|
|
| 7 |
# Check if any LLM API key is available
|
| 8 |
_llm_available = bool(settings.openai_api_key or settings.anthropic_api_key)
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
@pytest.mark.integration
|
| 12 |
-
@pytest.mark.skipif(not
|
|
|
|
| 13 |
class TestModalIntegration:
|
| 14 |
"""Integration tests requiring Modal credentials."""
|
| 15 |
|
|
|
|
| 1 |
+
"""Integration tests for Modal (requires credentials and modal package)."""
|
| 2 |
|
| 3 |
import pytest
|
| 4 |
|
|
|
|
| 7 |
# Check if any LLM API key is available
|
| 8 |
_llm_available = bool(settings.openai_api_key or settings.anthropic_api_key)
|
| 9 |
|
| 10 |
+
# Check if modal package is installed
|
| 11 |
+
try:
|
| 12 |
+
import modal # noqa: F401
|
| 13 |
+
|
| 14 |
+
_modal_installed = True
|
| 15 |
+
except ImportError:
|
| 16 |
+
_modal_installed = False
|
| 17 |
+
|
| 18 |
|
| 19 |
@pytest.mark.integration
|
| 20 |
+
@pytest.mark.skipif(not _modal_installed, reason="Modal package not installed")
|
| 21 |
+
@pytest.mark.skipif(not settings.modal_available, reason="Modal credentials not configured")
|
| 22 |
class TestModalIntegration:
|
| 23 |
"""Integration tests requiring Modal credentials."""
|
| 24 |
|
tests/unit/agent_factory/test_judges_factory.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Unit tests for Judge Factory and Model Selection."""
|
| 2 |
+
|
| 3 |
+
from unittest.mock import patch
|
| 4 |
+
|
| 5 |
+
import pytest
|
| 6 |
+
|
| 7 |
+
pytestmark = pytest.mark.unit
|
| 8 |
+
from pydantic_ai.models.anthropic import AnthropicModel
|
| 9 |
+
|
| 10 |
+
# We expect this import to exist after we implement it, or we mock it if it's not there yet
|
| 11 |
+
# For TDD, we assume we will use the library class
|
| 12 |
+
from pydantic_ai.models.huggingface import HuggingFaceModel
|
| 13 |
+
from pydantic_ai.models.openai import OpenAIModel
|
| 14 |
+
|
| 15 |
+
from src.agent_factory.judges import get_model
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
@pytest.fixture
|
| 19 |
+
def mock_settings():
|
| 20 |
+
with patch("src.agent_factory.judges.settings", autospec=True) as mock_settings:
|
| 21 |
+
yield mock_settings
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def test_get_model_openai(mock_settings):
|
| 25 |
+
"""Test that OpenAI model is returned when provider is openai."""
|
| 26 |
+
mock_settings.llm_provider = "openai"
|
| 27 |
+
mock_settings.openai_api_key = "sk-test"
|
| 28 |
+
mock_settings.openai_model = "gpt-5.1"
|
| 29 |
+
|
| 30 |
+
model = get_model()
|
| 31 |
+
assert isinstance(model, OpenAIModel)
|
| 32 |
+
assert model.model_name == "gpt-5.1"
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def test_get_model_anthropic(mock_settings):
|
| 36 |
+
"""Test that Anthropic model is returned when provider is anthropic."""
|
| 37 |
+
mock_settings.llm_provider = "anthropic"
|
| 38 |
+
mock_settings.anthropic_api_key = "sk-ant-test"
|
| 39 |
+
mock_settings.anthropic_model = "claude-sonnet-4-5-20250929"
|
| 40 |
+
|
| 41 |
+
model = get_model()
|
| 42 |
+
assert isinstance(model, AnthropicModel)
|
| 43 |
+
assert model.model_name == "claude-sonnet-4-5-20250929"
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def test_get_model_huggingface(mock_settings):
|
| 47 |
+
"""Test that HuggingFace model is returned when provider is huggingface."""
|
| 48 |
+
mock_settings.llm_provider = "huggingface"
|
| 49 |
+
mock_settings.hf_token = "hf_test_token"
|
| 50 |
+
mock_settings.huggingface_model = "meta-llama/Llama-3.1-70B-Instruct"
|
| 51 |
+
|
| 52 |
+
model = get_model()
|
| 53 |
+
assert isinstance(model, HuggingFaceModel)
|
| 54 |
+
assert model.model_name == "meta-llama/Llama-3.1-70B-Instruct"
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def test_get_model_default_fallback(mock_settings):
|
| 58 |
+
"""Test fallback to OpenAI if provider is unknown."""
|
| 59 |
+
mock_settings.llm_provider = "unknown_provider"
|
| 60 |
+
mock_settings.openai_api_key = "sk-test"
|
| 61 |
+
mock_settings.openai_model = "gpt-5.1"
|
| 62 |
+
|
| 63 |
+
model = get_model()
|
| 64 |
+
assert isinstance(model, OpenAIModel)
|