Spaces:

DataQuests
/

DeepCritical

Running

App Files Files Community

Joseph Pollack commited on 10 days ago

Commit

cb48bd4

unverified ·

1 Parent(s): 448c679

fix interface

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.env copy.example +0 -124
.env.example +93 -17
.github/README.md +0 -25
.github/workflows/ci.yml +7 -7
.pre-commit-config.yaml +4 -4
docs/api/agents.md +2 -0
docs/api/models.md +2 -0
docs/api/orchestrators.md +2 -0
docs/api/services.md +2 -0
docs/api/tools.md +2 -0
docs/architecture/agents.md +2 -0
docs/architecture/middleware.md +2 -0
docs/architecture/services.md +2 -0
docs/architecture/tools.md +2 -0
docs/contributing/code-quality.md +2 -0
docs/contributing/code-style.md +2 -0
docs/contributing/error-handling.md +2 -0
docs/contributing/implementation-patterns.md +2 -0
docs/contributing/index.md +2 -0
docs/contributing/prompt-engineering.md +2 -0
docs/contributing/testing.md +2 -0
docs/getting-started/examples.md +2 -0
docs/getting-started/installation.md +2 -0
docs/getting-started/mcp-integration.md +2 -0
docs/getting-started/quick-start.md +2 -0
docs/license.md +2 -0
docs/overview/architecture.md +2 -0
docs/overview/features.md +2 -0
docs/team.md +2 -0
pyproject.toml +1 -0
requirements.txt +31 -18
src/agent_factory/judges.py +15 -2
src/agents/hypothesis_agent.py +1 -1
src/agents/input_parser.py +1 -1
src/agents/judge_agent_llm.py +2 -2
src/agents/knowledge_gap.py +1 -1
src/agents/long_writer.py +1 -1
src/agents/proofreader.py +2 -2
src/agents/report_agent.py +1 -1
src/agents/thinking.py +2 -2
src/agents/tool_selector.py +1 -1
src/agents/writer.py +2 -2
src/app.py +44 -12
src/orchestrator/planner_agent.py +1 -1
src/services/llamaindex_rag.py +2 -1
src/services/statistical_analyzer.py +1 -1
tests/integration/test_rag_integration.py +5 -2
tests/unit/agent_factory/test_judges.py +3 -1
tests/unit/agents/test_hypothesis_agent.py +13 -12
tests/unit/agents/test_input_parser.py +36 -6

.env copy.example DELETED Viewed

@@ -1,124 +0,0 @@
-# ============== LLM CONFIGURATION ==============
-# Provider: "openai", "anthropic", or "huggingface"
-LLM_PROVIDER=openai
-# API Keys (at least one required for full LLM analysis)
-OPENAI_API_KEY=sk-your-key-here
-ANTHROPIC_API_KEY=sk-ant-your-key-here
-# Model names (optional - sensible defaults set in config.py)
-# OPENAI_MODEL=gpt-5.1
-# ANTHROPIC_MODEL=claude-sonnet-4-5-20250929
-# ============== HUGGINGFACE CONFIGURATION ==============
-# HuggingFace Token - enables gated models and higher rate limits
-# Get yours at: https://huggingface.co/settings/tokens
-#
-# WITHOUT HF_TOKEN: Falls back to ungated models (zephyr-7b-beta, Qwen2-7B)
-# WITH HF_TOKEN: Uses gated models (Llama 3.1, Gemma-2) via inference providers
-#
-# For HuggingFace Spaces deployment:
-#   Set this as a "Secret" in Space Settings -> Variables and secrets
-#   Users/judges don't need their own token - the Space secret is used
-#
-HF_TOKEN=hf_your-token-here
-# Alternative: HUGGINGFACE_API_KEY (same as HF_TOKEN)
-# Default HuggingFace model for inference (gated, requires auth)
-# Can be overridden in UI dropdown
-# Latest reasoning models: Qwen3-Next-80B-A3B-Thinking, Qwen3-Next-80B-A3B-Instruct, Llama-3.3-70B-Instruct
-HUGGINGFACE_MODEL=Qwen/Qwen3-Next-80B-A3B-Thinking
-# Fallback models for HuggingFace Inference API (comma-separated)
-# Models are tried in order until one succeeds
-# Format: model1,model2,model3
-# Latest reasoning models first, then reliable fallbacks
-# Reasoning models: Qwen3-Next (thinking/instruct), Llama-3.3-70B, Qwen3-235B
-# Fallbacks: Llama-3.1-8B, Zephyr-7B (ungated), Qwen2-7B (ungated)
-HF_FALLBACK_MODELS=Qwen/Qwen3-Next-80B-A3B-Thinking,Qwen/Qwen3-Next-80B-A3B-Instruct,meta-llama/Llama-3.3-70B-Instruct,meta-llama/Llama-3.1-8B-Instruct,HuggingFaceH4/zephyr-7b-beta,Qwen/Qwen2-7B-Instruct
-# Override model/provider selection (optional, usually set via UI)
-# HF_MODEL=Qwen/Qwen3-Next-80B-A3B-Thinking
-# HF_PROVIDER=hyperbolic
-# ============== EMBEDDING CONFIGURATION ==============
-# Embedding Provider: "openai", "local", or "huggingface"
-# Default: "local" (no API key required)
-EMBEDDING_PROVIDER=local
-# OpenAI Embedding Model (used if EMBEDDING_PROVIDER=openai)
-OPENAI_EMBEDDING_MODEL=text-embedding-3-small
-# Local Embedding Model (sentence-transformers, used if EMBEDDING_PROVIDER=local)
-# BAAI/bge-small-en-v1.5 is newer, faster, and better than all-MiniLM-L6-v2
-LOCAL_EMBEDDING_MODEL=BAAI/bge-small-en-v1.5
-# HuggingFace Embedding Model (used if EMBEDDING_PROVIDER=huggingface)
-HUGGINGFACE_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
-# ============== AGENT CONFIGURATION ==============
-MAX_ITERATIONS=10
-SEARCH_TIMEOUT=30
-LOG_LEVEL=INFO
-# Graph-based execution (experimental)
-# USE_GRAPH_EXECUTION=false
-# Budget & Rate Limiting
-# DEFAULT_TOKEN_LIMIT=100000
-# DEFAULT_TIME_LIMIT_MINUTES=10
-# DEFAULT_ITERATIONS_LIMIT=10
-# ============== WEB SEARCH CONFIGURATION ==============
-# Web Search Provider: "serper", "searchxng", "brave", "tavily", or "duckduckgo"
-# Default: "duckduckgo" (no API key required)
-WEB_SEARCH_PROVIDER=duckduckgo
-# Serper API Key (for Google search via Serper)
-# SERPER_API_KEY=your-serper-key-here
-# SearchXNG Host URL (for self-hosted search)
-# SEARCHXNG_HOST=http://localhost:8080
-# Brave Search API Key
-# BRAVE_API_KEY=your-brave-key-here
-# Tavily API Key
-# TAVILY_API_KEY=your-tavily-key-here
-# ============== EXTERNAL SERVICES ==============
-# PubMed (optional - higher rate limits: 10 req/sec vs 3 req/sec)
-NCBI_API_KEY=your-ncbi-key-here
-# Modal (optional - for secure code execution sandbox)
-# MODAL_TOKEN_ID=your-modal-token-id
-# MODAL_TOKEN_SECRET=your-modal-token-secret
-# ============== VECTOR DATABASE (ChromaDB) ==============
-# ChromaDB storage path
-CHROMA_DB_PATH=./chroma_db
-# Persist ChromaDB to disk (default: true)
-# CHROMA_DB_PERSIST=true
-# Remote ChromaDB server (optional)
-# CHROMA_DB_HOST=localhost
-# CHROMA_DB_PORT=8000
-# ============== RAG SERVICE CONFIGURATION ==============
-# ChromaDB collection name for RAG
-# RAG_COLLECTION_NAME=deepcritical_evidence
-# Number of top results to retrieve from RAG
-# RAG_SIMILARITY_TOP_K=5
-# Automatically ingest evidence into RAG
-# RAG_AUTO_INGEST=true

.env.example CHANGED Viewed

@@ -1,6 +1,6 @@
 # ============== LLM CONFIGURATION ==============
-# Provider: "openai" or "anthropic"
 LLM_PROVIDER=openai
 # API Keys (at least one required for full LLM analysis)
@@ -8,30 +8,56 @@ OPENAI_API_KEY=sk-your-key-here
 ANTHROPIC_API_KEY=sk-ant-your-key-here
 # Model names (optional - sensible defaults set in config.py)
-# ANTHROPIC_MODEL=claude-sonnet-4-5-20250929
 # OPENAI_MODEL=gpt-5.1
-# ============== EMBEDDINGS ==============
-# OpenAI Embedding Model (used if LLM_PROVIDER is openai and performing RAG/Embeddings)
-OPENAI_EMBEDDING_MODEL=text-embedding-3-small
-# Local Embedding Model (used for local/offline embeddings)
-LOCAL_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
-# ============== HUGGINGFACE (FREE TIER) ==============
-# HuggingFace Token - enables Llama 3.1 (best quality free model)
 # Get yours at: https://huggingface.co/settings/tokens
-#
-# WITHOUT HF_TOKEN: Falls back to ungated models (zephyr-7b-beta)
-# WITH HF_TOKEN: Uses Llama 3.1 8B Instruct (requires accepting license)
 #
 # For HuggingFace Spaces deployment:
 #   Set this as a "Secret" in Space Settings -> Variables and secrets
 #   Users/judges don't need their own token - the Space secret is used
 #
 HF_TOKEN=hf_your-token-here
 # ============== AGENT CONFIGURATION ==============
@@ -39,10 +65,60 @@ MAX_ITERATIONS=10
 SEARCH_TIMEOUT=30
 LOG_LEVEL=INFO
 # ============== EXTERNAL SERVICES ==============
-# PubMed (optional - higher rate limits)
 NCBI_API_KEY=your-ncbi-key-here
-# Vector Database (optional - for LlamaIndex RAG)
 CHROMA_DB_PATH=./chroma_db

 # ============== LLM CONFIGURATION ==============
+# Provider: "openai", "anthropic", or "huggingface"
 LLM_PROVIDER=openai
 # API Keys (at least one required for full LLM analysis)
 ANTHROPIC_API_KEY=sk-ant-your-key-here
 # Model names (optional - sensible defaults set in config.py)
 # OPENAI_MODEL=gpt-5.1
+# ANTHROPIC_MODEL=claude-sonnet-4-5-20250929
+# ============== HUGGINGFACE CONFIGURATION ==============
+# HuggingFace Token - enables gated models and higher rate limits
 # Get yours at: https://huggingface.co/settings/tokens
+#
+# WITHOUT HF_TOKEN: Falls back to ungated models (zephyr-7b-beta, Qwen2-7B)
+# WITH HF_TOKEN: Uses gated models (Llama 3.1, Gemma-2) via inference providers
 #
 # For HuggingFace Spaces deployment:
 #   Set this as a "Secret" in Space Settings -> Variables and secrets
 #   Users/judges don't need their own token - the Space secret is used
 #
 HF_TOKEN=hf_your-token-here
+# Alternative: HUGGINGFACE_API_KEY (same as HF_TOKEN)
+# Default HuggingFace model for inference (gated, requires auth)
+# Can be overridden in UI dropdown
+# Latest reasoning models: Qwen3-Next-80B-A3B-Thinking, Qwen3-Next-80B-A3B-Instruct, Llama-3.3-70B-Instruct
+HUGGINGFACE_MODEL=Qwen/Qwen3-Next-80B-A3B-Thinking
+# Fallback models for HuggingFace Inference API (comma-separated)
+# Models are tried in order until one succeeds
+# Format: model1,model2,model3
+# Latest reasoning models first, then reliable fallbacks
+# Reasoning models: Qwen3-Next (thinking/instruct), Llama-3.3-70B, Qwen3-235B
+# Fallbacks: Llama-3.1-8B, Zephyr-7B (ungated), Qwen2-7B (ungated)
+HF_FALLBACK_MODELS=Qwen/Qwen3-Next-80B-A3B-Thinking,Qwen/Qwen3-Next-80B-A3B-Instruct,meta-llama/Llama-3.3-70B-Instruct,meta-llama/Llama-3.1-8B-Instruct,HuggingFaceH4/zephyr-7b-beta,Qwen/Qwen2-7B-Instruct
+# Override model/provider selection (optional, usually set via UI)
+# HF_MODEL=Qwen/Qwen3-Next-80B-A3B-Thinking
+# HF_PROVIDER=hyperbolic
+# ============== EMBEDDING CONFIGURATION ==============
+# Embedding Provider: "openai", "local", or "huggingface"
+# Default: "local" (no API key required)
+EMBEDDING_PROVIDER=local
+# OpenAI Embedding Model (used if EMBEDDING_PROVIDER=openai)
+OPENAI_EMBEDDING_MODEL=text-embedding-3-small
+# Local Embedding Model (sentence-transformers, used if EMBEDDING_PROVIDER=local)
+# BAAI/bge-small-en-v1.5 is newer, faster, and better than all-MiniLM-L6-v2
+LOCAL_EMBEDDING_MODEL=BAAI/bge-small-en-v1.5
+# HuggingFace Embedding Model (used if EMBEDDING_PROVIDER=huggingface)
+HUGGINGFACE_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
 # ============== AGENT CONFIGURATION ==============
 SEARCH_TIMEOUT=30
 LOG_LEVEL=INFO
+# Graph-based execution (experimental)
+# USE_GRAPH_EXECUTION=false
+# Budget & Rate Limiting
+# DEFAULT_TOKEN_LIMIT=100000
+# DEFAULT_TIME_LIMIT_MINUTES=10
+# DEFAULT_ITERATIONS_LIMIT=10
+# ============== WEB SEARCH CONFIGURATION ==============
+# Web Search Provider: "serper", "searchxng", "brave", "tavily", or "duckduckgo"
+# Default: "duckduckgo" (no API key required)
+WEB_SEARCH_PROVIDER=duckduckgo
+# Serper API Key (for Google search via Serper)
+# SERPER_API_KEY=your-serper-key-here
+# SearchXNG Host URL (for self-hosted search)
+# SEARCHXNG_HOST=http://localhost:8080
+# Brave Search API Key
+# BRAVE_API_KEY=your-brave-key-here
+# Tavily API Key
+# TAVILY_API_KEY=your-tavily-key-here
 # ============== EXTERNAL SERVICES ==============
+# PubMed (optional - higher rate limits: 10 req/sec vs 3 req/sec)
 NCBI_API_KEY=your-ncbi-key-here
+# Modal (optional - for secure code execution sandbox)
+# MODAL_TOKEN_ID=your-modal-token-id
+# MODAL_TOKEN_SECRET=your-modal-token-secret
+# ============== VECTOR DATABASE (ChromaDB) ==============
+# ChromaDB storage path
 CHROMA_DB_PATH=./chroma_db
+# Persist ChromaDB to disk (default: true)
+# CHROMA_DB_PERSIST=true
+# Remote ChromaDB server (optional)
+# CHROMA_DB_HOST=localhost
+# CHROMA_DB_PORT=8000
+# ============== RAG SERVICE CONFIGURATION ==============
+# ChromaDB collection name for RAG
+# RAG_COLLECTION_NAME=deepcritical_evidence
+# Number of top results to retrieve from RAG
+# RAG_SIMILARITY_TOP_K=5
+# Automatically ingest evidence into RAG
+# RAG_AUTO_INGEST=true

.github/README.md CHANGED Viewed

@@ -1,28 +1,3 @@
----
-title: DeepCritical
-emoji: 🧬
-colorFrom: blue
-colorTo: purple
-sdk: gradio
-sdk_version: "6.0.1"
-python_version: "3.11"
-app_file: src/app.py
-hf_oauth: true
-hf_oauth_expiration_minutes: 480
-hf_oauth_scopes:
- - inference-api
-pinned: true
-license: mit
-tags:
-  - mcp-in-action-track-enterprise
-  - mcp-hackathon
-  - drug-repurposing
-  - biomedical-ai
-  - pydantic-ai
-  - llamaindex
-  - modal
----
 <div align="center">
 [![GitHub](https://img.shields.io/github/stars/DeepCritical/GradioDemo?style=for-the-badge&logo=github&logoColor=white&label=🐙%20GitHub&labelColor=181717&color=181717)](https://github.com/DeepCritical/GradioDemo)



























1	<div align="center">
2
3	[![GitHub](https://img.shields.io/github/stars/DeepCritical/GradioDemo?style=for-the-badge&logo=github&logoColor=white&label=🐙%20GitHub&labelColor=181717&color=181717)](https://github.com/DeepCritical/GradioDemo)

.github/workflows/ci.yml CHANGED Viewed

@@ -33,19 +33,19 @@ jobs:
       - name: Lint with ruff
         continue-on-error: true
         run: |
-          uv run ruff check . --exclude tests
-          uv run ruff format --check . --exclude tests
       - name: Type check with mypy
         continue-on-error: true
         run: |
-          uv run mypy src
-      - name: Run unit tests (No Black Box Apis)
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
         run: |
-          uv run pytest tests/unit/ -v -m "not openai and not embedding_provider" --tb=short -p no:logfire --cov --cov-branch --cov-report=xml
       - name: Run local embeddings tests
         env:
@@ -61,11 +61,11 @@ jobs:
           uv run pytest tests/integration/ -v -m "huggingface and not embedding_provider" --tb=short -p no:logfire --cov --cov-branch --cov-report=xml --cov-append || true
         continue-on-error: true  # Allow failures if HF_TOKEN not set
-      - name: Run non-OpenAI integration tests (excluding embedding providers)
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
         run: |
-          uv run pytest tests/integration/ -v -m "integration and not openai and not embedding_provider" --tb=short -p no:logfire --cov --cov-branch --cov-report=xml --cov-append || true
         continue-on-error: true  # Allow failures if dependencies not available
       - name: Upload coverage reports to Codecov

       - name: Lint with ruff
         continue-on-error: true
         run: |
+          uv run ruff check . --exclude tests --exclude reference_repos
+          uv run ruff format --check . --exclude tests --exclude reference_repos
       - name: Type check with mypy
         continue-on-error: true
         run: |
+          uv run mypy src --ignore-missing-imports
+      - name: Run unit tests (No OpenAI/Anthropic, HuggingFace only)
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
         run: |
+          uv run pytest tests/unit/ -v -m "not openai and not anthropic and not embedding_provider" --tb=short -p no:logfire --cov --cov-branch --cov-report=xml
       - name: Run local embeddings tests
         env:
           uv run pytest tests/integration/ -v -m "huggingface and not embedding_provider" --tb=short -p no:logfire --cov --cov-branch --cov-report=xml --cov-append || true
         continue-on-error: true  # Allow failures if HF_TOKEN not set
+      - name: Run non-OpenAI/Anthropic integration tests (excluding embedding providers)
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
         run: |
+          uv run pytest tests/integration/ -v -m "integration and not openai and not anthropic and not embedding_provider" --tb=short -p no:logfire --cov --cov-branch --cov-report=xml --cov-append || true
         continue-on-error: true  # Allow failures if dependencies not available
       - name: Upload coverage reports to Codecov

.pre-commit-config.yaml CHANGED Viewed

@@ -1,16 +1,16 @@
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.4.4
     hooks:
       - id: ruff
-        args: [--fix, --exclude, tests]
         exclude: ^reference_repos/
       - id: ruff-format
-        args: [--exclude, tests]
         exclude: ^reference_repos/
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.10.0
     hooks:
       - id: mypy
         files: ^src/

 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.14.7  # Compatible with ruff>=0.14.6 (matches CI)
     hooks:
       - id: ruff
+        args: [--fix, --exclude, tests, --exclude, reference_repos]
         exclude: ^reference_repos/
       - id: ruff-format
+        args: [--exclude, tests, --exclude, reference_repos]
         exclude: ^reference_repos/
   - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.18.2  # Matches CI version mypy>=1.18.2
     hooks:
       - id: mypy
         files: ^src/

docs/api/agents.md CHANGED Viewed

	@@ -262,3 +262,5 @@ def create_input_parser_agent(model: Any \| None = None) -> InputParserAgent
262
263
264


262
263
264
265	+
266	+

docs/api/models.md CHANGED Viewed

	@@ -240,3 +240,5 @@ class BudgetStatus(BaseModel):
240
241
242


240
241
242
243	+
244	+

docs/api/orchestrators.md CHANGED Viewed

	@@ -187,3 +187,5 @@ Runs Magentic orchestration.
187
188
189


187
188
189
190	+
191	+

docs/api/services.md CHANGED Viewed

	@@ -193,3 +193,5 @@ Analyzes a hypothesis using statistical methods.
193
194
195


193
194
195
196	+
197	+

docs/api/tools.md CHANGED Viewed

	@@ -227,3 +227,5 @@ Searches multiple tools in parallel.
227
228
229


227
228
229
230	+
231	+

docs/architecture/agents.md CHANGED Viewed

	@@ -184,3 +184,5 @@ Factory functions:
184
185
186


184
185
186
187	+
188	+

docs/architecture/middleware.md CHANGED Viewed

	@@ -134,3 +134,5 @@ All middleware components use `ContextVar` for thread-safe isolation:
134
135
136


134
135
136
137	+
138	+

docs/architecture/services.md CHANGED Viewed

	@@ -134,3 +134,5 @@ if settings.has_openai_key:
134
135
136


134
135
136
137	+
138	+

docs/architecture/tools.md CHANGED Viewed

	@@ -167,3 +167,5 @@ search_handler = SearchHandler(
167
168
169


167
168
169
170	+
171	+

docs/contributing/code-quality.md CHANGED Viewed

	@@ -73,3 +73,5 @@ async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
73
74
75


73
74
75
76	+
77	+

docs/contributing/code-style.md CHANGED Viewed

	@@ -53,3 +53,5 @@ result = await loop.run_in_executor(None, cpu_bound_function, args)
53
54
55


53
54
55
56	+
57	+

docs/contributing/error-handling.md CHANGED Viewed

	@@ -61,3 +61,5 @@ except httpx.HTTPError as e:
61
62
63


61
62
63
64	+
65	+

docs/contributing/implementation-patterns.md CHANGED Viewed

	@@ -76,3 +76,5 @@ def get_embedding_service() -> EmbeddingService:
76
77
78


76
77
78
79	+
80	+

docs/contributing/index.md CHANGED Viewed

	@@ -155,3 +155,5 @@ Thank you for contributing to DeepCritical!
155
156
157


155
156
157
158	+
159	+

docs/contributing/prompt-engineering.md CHANGED Viewed

	@@ -61,3 +61,5 @@ This document outlines prompt engineering guidelines and citation validation rul
61
62
63


61
62
63
64	+
65	+

docs/contributing/testing.md CHANGED Viewed

	@@ -57,3 +57,5 @@ async def test_real_pubmed_search():
57
58
59


57
58
59
60	+
61	+

docs/getting-started/examples.md CHANGED Viewed

	@@ -201,3 +201,5 @@ USE_GRAPH_EXECUTION=true
201
202
203


201
202
203
204	+
205	+

docs/getting-started/installation.md CHANGED Viewed

	@@ -140,3 +140,5 @@ uv run pre-commit install
140
141
142


140
141
142
143	+
144	+

docs/getting-started/mcp-integration.md CHANGED Viewed

	@@ -207,3 +207,5 @@ You can configure multiple DeepCritical instances:
207
208
209


207
208
209
210	+
211	+

docs/getting-started/quick-start.md CHANGED Viewed

	@@ -111,3 +111,5 @@ What are the active clinical trials investigating Alzheimer's disease treatments
111
112
113


111
112
113
114	+
115	+

docs/license.md CHANGED Viewed

	@@ -31,3 +31,5 @@ SOFTWARE.
31
32
33


31
32
33
34	+
35	+

docs/overview/architecture.md CHANGED Viewed

	@@ -188,3 +188,5 @@ The system supports complex research workflows through:
188
189
190


188
189
190
191	+
192	+

docs/overview/features.md CHANGED Viewed

	@@ -140,3 +140,5 @@ DeepCritical provides a comprehensive set of features for AI-assisted research:
140
141
142


140
141
142
143	+
144	+

docs/team.md CHANGED Viewed

	@@ -36,3 +36,5 @@ We welcome contributions! See the [Contributing Guide](contributing/index.md) fo
36
37
38


36
37
38
39	+
40	+

pyproject.toml CHANGED Viewed

@@ -29,6 +29,7 @@ dependencies = [
     "tokenizers>=0.22.0,<=0.23.0",
     "transformers>=4.57.2",
     "chromadb>=0.4.0",
     "sentence-transformers>=2.2.0",
     "numpy<2.0",
     "agent-framework-core>=1.0.0b251120,<2.0.0",

     "tokenizers>=0.22.0,<=0.23.0",
     "transformers>=4.57.2",
     "chromadb>=0.4.0",
+    "rpds-py>=0.29.0",  # Python implementation of rpds (required by chromadb on Windows)
     "sentence-transformers>=2.2.0",
     "numpy<2.0",
     "agent-framework-core>=1.0.0b251120,<2.0.0",

requirements.txt CHANGED Viewed

@@ -9,40 +9,53 @@ pydantic>=2.7
 pydantic-settings>=2.2
 pydantic-ai>=0.0.16
 # OPTIONAL AI Providers
 openai>=1.0.0
-# anthropic>=0.18.0
-# Multi-agent orchestration (Advanced mode)
-agent-framework-core>=1.0.0b251120
-# Web search
-duckduckgo-search>=5.0
 # HTTP & Parsing
 httpx>=0.27
 beautifulsoup4>=4.12
 xmltodict>=0.13
 # UI (Gradio with MCP server support)
-gradio[mcp]>=6.0.0
 # Utils
 python-dotenv>=1.0
 tenacity>=8.2
 structlog>=24.1
 requests>=2.32.5
-limits>=3.0  # Rate limiting
-# Optional: Modal for code execution
-modal>=0.63.0
-# Optional: LlamaIndex RAG
-llama-index>=0.11.0
-llama-index-llms-openai
-llama-index-llms-huggingface
-llama-index-embeddings-openai
-llama-index-vector-stores-chroma
 chromadb>=0.4.0
 sentence-transformers>=2.2.0

 pydantic-settings>=2.2
 pydantic-ai>=0.0.16
 # OPTIONAL AI Providers
 openai>=1.0.0
+anthropic>=0.18.0
 # HTTP & Parsing
 httpx>=0.27
 beautifulsoup4>=4.12
 xmltodict>=0.13
+# HuggingFace Hub
+huggingface-hub>=0.20.0
 # UI (Gradio with MCP server support)
+gradio[mcp,oauth]>=6.0.0
 # Utils
 python-dotenv>=1.0
 tenacity>=8.2
 structlog>=24.1
 requests>=2.32.5
+limits>=3.0  # Rate limiting
+pydantic-graph>=1.22.0
+# Web search
+duckduckgo-search>=5.0
+# Multi-agent orchestration (Advanced mode)
+agent-framework-core>=1.0.0b251120,<2.0.0
+# LlamaIndex RAG
+llama-index-llms-huggingface>=0.6.1
+llama-index-llms-huggingface-api>=0.6.1
+llama-index-vector-stores-chroma>=0.5.3
+llama-index>=0.14.8
+llama-index-llms-openai>=0.6.9
+llama-index-embeddings-openai>=0.5.1
+# Embeddings & Vector Store
+tokenizers>=0.22.0,<=0.23.0
+transformers>=4.57.2
 chromadb>=0.4.0
+rpds-py>=0.29.0  # Python implementation of rpds (required by chromadb on Windows)
 sentence-transformers>=2.2.0
+numpy<2.0
+# Optional: Modal for code execution
+modal>=0.63.0
+# Pydantic AI with HuggingFace support
+pydantic-ai-slim[huggingface]>=0.0.18

src/agent_factory/judges.py CHANGED Viewed

@@ -8,10 +8,18 @@ from typing import Any
 import structlog
 from huggingface_hub import InferenceClient
 from pydantic_ai import Agent
-from pydantic_ai.models.anthropic import AnthropicModel
 from pydantic_ai.models.openai import OpenAIModel  # type: ignore[attr-defined]
 from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
 # Try to import HuggingFace support (may not be available in all pydantic-ai versions)
 # According to https://ai.pydantic.dev/models/huggingface/, HuggingFace support requires
 # pydantic-ai with huggingface extra or pydantic-ai-slim[huggingface]
@@ -50,6 +58,11 @@ def get_model() -> Any:
     llm_provider = settings.llm_provider
     if llm_provider == "anthropic":
         return AnthropicModel(settings.anthropic_model, api_key=settings.anthropic_api_key)  # type: ignore[call-arg]
     if llm_provider == "huggingface":
@@ -144,7 +157,7 @@ class JudgeHandler:
         try:
             # Run the agent with structured output
             result = await self.agent.run(user_prompt)
-            assessment = result.output  # type: ignore[attr-defined]
             logger.info(
                 "Assessment complete",

 import structlog
 from huggingface_hub import InferenceClient
 from pydantic_ai import Agent
 from pydantic_ai.models.openai import OpenAIModel  # type: ignore[attr-defined]
 from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
+# Try to import AnthropicModel (may not be available if anthropic package is missing)
+try:
+    from pydantic_ai.models.anthropic import AnthropicModel
+    _ANTHROPIC_AVAILABLE = True
+except ImportError:
+    AnthropicModel = None  # type: ignore[assignment, misc]
+    _ANTHROPIC_AVAILABLE = False
 # Try to import HuggingFace support (may not be available in all pydantic-ai versions)
 # According to https://ai.pydantic.dev/models/huggingface/, HuggingFace support requires
 # pydantic-ai with huggingface extra or pydantic-ai-slim[huggingface]
     llm_provider = settings.llm_provider
     if llm_provider == "anthropic":
+        if not _ANTHROPIC_AVAILABLE:
+            raise ImportError(
+                "Anthropic models are not available. "
+                "Please install with: uv add 'pydantic-ai[anthropic]' or use 'openai'/'huggingface' as the LLM provider."
+            )
         return AnthropicModel(settings.anthropic_model, api_key=settings.anthropic_api_key)  # type: ignore[call-arg]
     if llm_provider == "huggingface":
         try:
             # Run the agent with structured output
             result = await self.agent.run(user_prompt)
+            assessment = result.data
             logger.info(
                 "Assessment complete",

src/agents/hypothesis_agent.py CHANGED Viewed

@@ -75,7 +75,7 @@ class HypothesisAgent(BaseAgent):  # type: ignore[misc]
         # Generate hypotheses with diverse evidence selection
         prompt = await format_hypothesis_prompt(query, evidence, embeddings=self._embeddings)
         result = await self._get_agent().run(prompt)
-        assessment = result.output  # pydantic-ai returns .output for structured output
         # Store hypotheses in shared context
         existing = self._evidence_store.get("hypotheses", [])

         # Generate hypotheses with diverse evidence selection
         prompt = await format_hypothesis_prompt(query, evidence, embeddings=self._embeddings)
         result = await self._get_agent().run(prompt)
+        assessment = result.data  # type: ignore[attr-defined]
         # Store hypotheses in shared context
         existing = self._evidence_store.get("hypotheses", [])

src/agents/input_parser.py CHANGED Viewed

@@ -92,7 +92,7 @@ class InputParserAgent:
         try:
             # Run the agent
             result = await self.agent.run(user_message)
-            parsed_query = result.output
             # Validate parsed query
             if not parsed_query.original_query:

         try:
             # Run the agent
             result = await self.agent.run(user_message)
+            parsed_query = result.data
             # Validate parsed query
             if not parsed_query.original_query:

src/agents/judge_agent_llm.py CHANGED Viewed

@@ -41,5 +41,5 @@ History of previous attempts: {len(history)}
 Evaluate validity and sufficiency."""
         run_result = await self.agent.run(prompt)
-        logger.info("LLM judge assessment complete", sufficient=run_result.output.sufficient)
-        return run_result.output  # type: ignore[no-any-return]

 Evaluate validity and sufficiency."""
         run_result = await self.agent.run(prompt)
+        logger.info("LLM judge assessment complete", sufficient=run_result.data.sufficient)  # type: ignore[attr-defined]
+        return run_result.data  # type: ignore[no-any-return,attr-defined]

src/agents/knowledge_gap.py CHANGED Viewed

@@ -113,7 +113,7 @@ HISTORY OF ACTIONS, FINDINGS AND THOUGHTS:
         try:
             # Run the agent
             result = await self.agent.run(user_message)
-            evaluation = result.output
             self.logger.info(
                 "Knowledge gap evaluation complete",

         try:
             # Run the agent
             result = await self.agent.run(user_message)
+            evaluation = result.data
             self.logger.info(
                 "Knowledge gap evaluation complete",

src/agents/long_writer.py CHANGED Viewed

@@ -176,7 +176,7 @@ class LongWriterAgent:
             try:
                 # Run the agent
                 result = await self.agent.run(user_message)
-                output = result.output
                 # Validate output
                 if not output or not isinstance(output, LongWriterOutput):

             try:
                 # Run the agent
                 result = await self.agent.run(user_message)
+                output = result.data
                 # Validate output
                 if not output or not isinstance(output, LongWriterOutput):

src/agents/proofreader.py CHANGED Viewed

@@ -133,7 +133,7 @@ REPORT DRAFT:
             try:
                 # Run the agent
                 result = await self.agent.run(user_message)
-                final_report = result.output
                 # Validate output
                 if not final_report or not final_report.strip():
@@ -142,7 +142,7 @@ REPORT DRAFT:
                 self.logger.info("Report proofread", length=len(final_report), attempt=attempt + 1)
-                return final_report
             except (TimeoutError, ConnectionError) as e:
                 # Transient errors - retry

             try:
                 # Run the agent
                 result = await self.agent.run(user_message)
+                final_report = result.data  # type: ignore[attr-defined]
                 # Validate output
                 if not final_report or not final_report.strip():
                 self.logger.info("Report proofread", length=len(final_report), attempt=attempt + 1)
+                return final_report  # type: ignore[no-any-return]
             except (TimeoutError, ConnectionError) as e:
                 # Transient errors - retry

src/agents/report_agent.py CHANGED Viewed

@@ -91,7 +91,7 @@ class ReportAgent(BaseAgent):  # type: ignore[misc]
         )
         result = await self._get_agent().run(prompt)
-        report = result.output
         # ═══════════════════════════════════════════════════════════════════
         # 🚨 CRITICAL: Validate citations to prevent hallucination

         )
         result = await self._get_agent().run(prompt)
+        report = result.data  # type: ignore[attr-defined]
         # ═══════════════════════════════════════════════════════════════════
         # 🚨 CRITICAL: Validate citations to prevent hallucination

src/agents/thinking.py CHANGED Viewed

@@ -112,11 +112,11 @@ HISTORY OF ACTIONS, FINDINGS AND THOUGHTS:
         try:
             # Run the agent
             result = await self.agent.run(user_message)
-            observations = result.output
             self.logger.info("Observations generated", length=len(observations))
-            return observations
         except Exception as e:
             self.logger.error("Observation generation failed", error=str(e))

         try:
             # Run the agent
             result = await self.agent.run(user_message)
+            observations = result.data  # type: ignore[attr-defined]
             self.logger.info("Observations generated", length=len(observations))
+            return observations  # type: ignore[no-any-return]
         except Exception as e:
             self.logger.error("Observation generation failed", error=str(e))

src/agents/tool_selector.py CHANGED Viewed

@@ -117,7 +117,7 @@ HISTORY OF ACTIONS, FINDINGS AND THOUGHTS:
         try:
             # Run the agent
             result = await self.agent.run(user_message)
-            selection_plan = result.output
             self.logger.info(
                 "Tool selection complete",

         try:
             # Run the agent
             result = await self.agent.run(user_message)
+            selection_plan = result.data
             self.logger.info(
                 "Tool selection complete",

src/agents/writer.py CHANGED Viewed

@@ -136,7 +136,7 @@ FINDINGS:
             try:
                 # Run the agent
                 result = await self.agent.run(user_message)
-                report = result.output
                 # Validate output
                 if not report or not report.strip():
@@ -145,7 +145,7 @@ FINDINGS:
                 self.logger.info("Report written", length=len(report), attempt=attempt + 1)
-                return report
             except (TimeoutError, ConnectionError) as e:
                 # Transient errors - retry

             try:
                 # Run the agent
                 result = await self.agent.run(user_message)
+                report = result.data  # type: ignore[attr-defined]
                 # Validate output
                 if not report or not report.strip():
                 self.logger.info("Report written", length=len(report), attempt=attempt + 1)
+                return report  # type: ignore[no-any-return]
             except (TimeoutError, ConnectionError) as e:
                 # Transient errors - retry

src/app.py CHANGED Viewed

@@ -172,20 +172,29 @@ def event_to_chat_message(event: AgentEvent) -> dict[str, Any]:
             "content": event.message,
         }
-    # Build metadata for accordion
     metadata: dict[str, Any] = {}
     if config["title"]:
-        metadata["title"] = config["title"]
     # Set status (pending shows spinner, done is collapsed)
     if config["status"] == "pending":
         metadata["status"] = "pending"
-    # Add duration if available in data
     if event.data and isinstance(event.data, dict) and "duration" in event.data:
-        metadata["duration"] = event.data["duration"]
-    # Add log info (iteration number, etc.)
     log_parts: list[str] = []
     if event.iteration > 0:
         log_parts.append(f"Iteration {event.iteration}")
@@ -198,12 +207,22 @@ def event_to_chat_message(event: AgentEvent) -> dict[str, Any]:
         metadata["log"] = " | ".join(log_parts)
     # Return as dict format for Gradio Chatbot compatibility
-    # Gradio Chatbot expects dict format, not gr.ChatMessage objects
     result: dict[str, Any] = {
         "role": "assistant",
         "content": event.message,
     }
-    if metadata:
         result["metadata"] = metadata
     return result
@@ -455,10 +474,11 @@ async def research_agent(
             yield msg
     except Exception as e:
         yield {
             "role": "assistant",
-            "content": f"❌ **Error**: {e!s}",
-            "metadata": {"title": "❌ Error", "status": "done"},
         }
@@ -681,9 +701,21 @@ def create_demo() -> gr.Blocks:
                 "**Sign in with HuggingFace** above to access premium models and providers."
             ),
             examples=[
-                ["What drugs could be repurposed for Alzheimer's disease?", "simple"],
-                ["Is metformin effective for treating cancer?", "simple"],
-                ["What medications show promise for Long COVID treatment?", "simple"],
             ],
             additional_inputs_accordion=gr.Accordion(label="⚙️ Settings", open=False),
             additional_inputs=[

             "content": event.message,
         }
+    # Build metadata for accordion according to Gradio ChatMessage spec
+    # Metadata keys: title (str), status ("pending"|"done"), log (str), duration (float)
+    # See: https://www.gradio.app/guides/agents-and-tool-usage
     metadata: dict[str, Any] = {}
+    # Title is required for accordion display - must be string
     if config["title"]:
+        metadata["title"] = str(config["title"])
     # Set status (pending shows spinner, done is collapsed)
+    # Must be exactly "pending" or "done" per Gradio spec
     if config["status"] == "pending":
         metadata["status"] = "pending"
+    elif config["status"] == "done":
+        metadata["status"] = "done"
+    # Add duration if available in data (must be float)
     if event.data and isinstance(event.data, dict) and "duration" in event.data:
+        duration = event.data["duration"]
+        if isinstance(duration, int | float):
+            metadata["duration"] = float(duration)
+    # Add log info (iteration number, etc.) - must be string
     log_parts: list[str] = []
     if event.iteration > 0:
         log_parts.append(f"Iteration {event.iteration}")
         metadata["log"] = " | ".join(log_parts)
     # Return as dict format for Gradio Chatbot compatibility
+    # According to Gradio docs: https://www.gradio.app/guides/agents-and-tool-usage
+    # ChatMessage format: {"role": "assistant", "content": "...", "metadata": {...}}
+    # Metadata must have "title" key for accordion display
+    # Valid metadata keys: title (str), status ("pending"|"done"), log (str), duration (float)
     result: dict[str, Any] = {
         "role": "assistant",
         "content": event.message,
     }
+    # Only add metadata if it has a title (required for accordion display)
+    # Ensure metadata values match Gradio's expected types
+    if metadata and metadata.get("title"):
+        # Ensure status is valid if present
+        if "status" in metadata:
+            status = metadata["status"]
+            if status not in ("pending", "done"):
+                metadata["status"] = "done"  # Default to "done" if invalid
         result["metadata"] = metadata
     return result
             yield msg
     except Exception as e:
+        # Return error message without metadata to avoid issues during example caching
+        # Metadata can cause validation errors when Gradio caches examples
         yield {
             "role": "assistant",
+            "content": f"❌ **Error**: {e!s}\n\n*Please check your configuration and try again.*",
         }
                 "**Sign in with HuggingFace** above to access premium models and providers."
             ),
             examples=[
+                # When additional_inputs are provided, examples must be lists of lists
+                # Each inner list: [message, mode, hf_model, hf_provider]
+                [
+                    "What drugs could be repurposed for Alzheimer's disease?",
+                    "iterative",
+                    None,
+                    None,
+                ],
+                ["Is metformin effective for treating cancer?", "iterative", None, None],
+                [
+                    "What medications show promise for Long COVID treatment?",
+                    "iterative",
+                    None,
+                    None,
+                ],
             ],
             additional_inputs_accordion=gr.Accordion(label="⚙️ Settings", open=False),
             additional_inputs=[

src/orchestrator/planner_agent.py CHANGED Viewed

@@ -109,7 +109,7 @@ class PlannerAgent:
         try:
             # Run the agent
             result = await self.agent.run(user_message)
-            report_plan = result.output
             # Validate report plan
             if not report_plan.report_outline:

         try:
             # Run the agent
             result = await self.agent.run(user_message)
+            report_plan = result.data
             # Validate report plan
             if not report_plan.report_outline:

src/services/llamaindex_rag.py CHANGED Viewed

@@ -136,7 +136,8 @@ class LlamaIndexRAGService:
             }
         except ImportError as e:
             raise ImportError(
-                "LlamaIndex dependencies not installed. Run: uv sync --extra modal"
             ) from e
     def _configure_embeddings(

             }
         except ImportError as e:
             raise ImportError(
+                "LlamaIndex dependencies not installed. Required packages: chromadb, llama-index, "
+                "and their dependencies. If rpds is missing, try: uv pip install rpds-py"
             ) from e
     def _configure_embeddings(

src/services/statistical_analyzer.py CHANGED Viewed

@@ -135,7 +135,7 @@ Generate executable Python code to analyze this evidence."""
             # Generate code
             agent = self._get_agent()
             code_result = await agent.run(prompt)
-            generated_code = code_result.output
             # Execute in Modal sandbox
             loop = asyncio.get_running_loop()

             # Generate code
             agent = self._get_agent()
             code_result = await agent.run(prompt)
+            generated_code = code_result.data  # type: ignore[attr-defined]
             # Execute in Modal sandbox
             loop = asyncio.get_running_loop()

tests/integration/test_rag_integration.py CHANGED Viewed

@@ -121,9 +121,12 @@ class TestRAGServiceIntegration:
             assert len(response) > 0
             assert "python" in response.lower()
         except Exception as e:
-            # If model is not available (404), skip the test
-            if "404" in str(e) or "Not Found" in str(e):
                 pytest.skip(f"HuggingFace model not available via inference API: {e}")
             raise
         # Cleanup

             assert len(response) > 0
             assert "python" in response.lower()
         except Exception as e:
+            # If model is not available (404) or authentication required (401), skip the test
+            error_str = str(e)
+            if "404" in error_str or "Not Found" in error_str:
                 pytest.skip(f"HuggingFace model not available via inference API: {e}")
+            if "401" in error_str or "Unauthorized" in error_str or "Invalid username or password" in error_str:
+                pytest.skip(f"HuggingFace authentication required but not available: {e}")
             raise
         # Cleanup

tests/unit/agent_factory/test_judges.py CHANGED Viewed

@@ -34,6 +34,7 @@ class TestJudgeHandler:
         # Mock the PydanticAI agent
         mock_result = MagicMock()
         mock_result.output = mock_assessment
         with (
@@ -88,7 +89,8 @@ class TestJudgeHandler:
         )
         mock_result = MagicMock()
-        mock_result.output = mock_assessment
         with (
             patch("src.agent_factory.judges.get_model") as mock_get_model,

         # Mock the PydanticAI agent
         mock_result = MagicMock()
+        type(mock_result).data = mock_assessment  # pydantic-ai uses .data for structured output
         mock_result.output = mock_assessment
         with (
         )
         mock_result = MagicMock()
+        mock_result.data = mock_assessment
+        mock_result.output = mock_assessment  # Some code may use .output
         with (
             patch("src.agent_factory.judges.get_model") as mock_get_model,

tests/unit/agents/test_hypothesis_agent.py CHANGED Viewed

@@ -28,18 +28,17 @@ def sample_evidence():
 @pytest.fixture
 def mock_assessment():
     return HypothesisAssessment(
-        hypotheses=[
-            MechanismHypothesis(
-                drug="Metformin",
-                target="AMPK",
-                pathway="mTOR inhibition",
-                effect="Reduced cancer cell proliferation",
-                confidence=0.75,
-                search_suggestions=["metformin AMPK cancer", "mTOR cancer therapy"],
-            )
-        ],
-        primary_hypothesis=None,
         knowledge_gaps=["Clinical trial data needed"],
         recommended_searches=["metformin clinical trial cancer"],
     )
@@ -54,8 +53,9 @@ async def test_hypothesis_agent_generates_hypotheses(sample_evidence, mock_asses
         with patch("src.agents.hypothesis_agent.Agent") as mock_agent_class:
             mock_get_model.return_value = MagicMock()  # Mock model
             mock_result = MagicMock()
             mock_result.output = mock_assessment
-            # pydantic-ai Agent returns an object with .output for structured output
             mock_agent_class.return_value.run = AsyncMock(return_value=mock_result)
             agent = HypothesisAgent(store)
@@ -94,6 +94,7 @@ async def test_hypothesis_agent_uses_embeddings(sample_evidence, mock_assessment
                 mock_format.return_value = "Prompt"
                 mock_result = MagicMock()
                 mock_result.output = mock_assessment
                 mock_agent_class.return_value.run = AsyncMock(return_value=mock_result)

 @pytest.fixture
 def mock_assessment():
+    primary_hyp = MechanismHypothesis(
+        drug="Metformin",
+        target="AMPK",
+        pathway="mTOR inhibition",
+        effect="Reduced cancer cell proliferation",
+        confidence=0.75,
+        search_suggestions=["metformin AMPK cancer", "mTOR cancer therapy"],
+    )
     return HypothesisAssessment(
+        hypotheses=[primary_hyp],
+        primary_hypothesis=primary_hyp,  # Set primary hypothesis
         knowledge_gaps=["Clinical trial data needed"],
         recommended_searches=["metformin clinical trial cancer"],
     )
         with patch("src.agents.hypothesis_agent.Agent") as mock_agent_class:
             mock_get_model.return_value = MagicMock()  # Mock model
             mock_result = MagicMock()
+            type(mock_result).data = mock_assessment  # pydantic-ai uses .data for structured output
             mock_result.output = mock_assessment
+            # pydantic-ai Agent returns an object with .data for structured output
             mock_agent_class.return_value.run = AsyncMock(return_value=mock_result)
             agent = HypothesisAgent(store)
                 mock_format.return_value = "Prompt"
                 mock_result = MagicMock()
+                type(mock_result).data = mock_assessment  # pydantic-ai uses .data for structured output
                 mock_result.output = mock_assessment
                 mock_agent_class.return_value.run = AsyncMock(return_value=mock_result)

tests/unit/agents/test_input_parser.py CHANGED Viewed

@@ -18,6 +18,13 @@ def mock_model() -> MagicMock:
     return model
 @pytest.fixture
 def mock_parsed_query_iterative() -> ParsedQuery:
     """Create a mock ParsedQuery for iterative mode."""
@@ -51,7 +58,9 @@ def mock_agent_result_iterative(
     mock_parsed_query_iterative: ParsedQuery,
 ) -> RunResult[ParsedQuery]:
     """Create a mock agent result for iterative mode."""
-    result = MagicMock(spec=RunResult)
     result.output = mock_parsed_query_iterative
     return result
@@ -61,7 +70,9 @@ def mock_agent_result_deep(
     mock_parsed_query_deep: ParsedQuery,
 ) -> RunResult[ParsedQuery]:
     """Create a mock agent result for deep mode."""
-    result = MagicMock(spec=RunResult)
     result.output = mock_parsed_query_deep
     return result
@@ -72,33 +83,52 @@ def input_parser_agent(mock_model: MagicMock) -> InputParserAgent:
     return InputParserAgent(model=mock_model)
 class TestInputParserAgentInit:
     """Test InputParserAgent initialization."""
-    def test_input_parser_agent_init_with_model(self, mock_model: MagicMock) -> None:
         """Test InputParserAgent initialization with provided model."""
         agent = InputParserAgent(model=mock_model)
         assert agent.model == mock_model
         assert agent.agent is not None
     @patch("src.agents.input_parser.get_model")
     def test_input_parser_agent_init_without_model(
-        self, mock_get_model: MagicMock, mock_model: MagicMock
     ) -> None:
         """Test InputParserAgent initialization without model (uses default)."""
         mock_get_model.return_value = mock_model
         agent = InputParserAgent()
         assert agent.model == mock_model
         mock_get_model.assert_called_once()
     def test_input_parser_agent_has_correct_system_prompt(
-        self, input_parser_agent: InputParserAgent
     ) -> None:
         """Test that InputParserAgent has correct system prompt."""
         # System prompt should contain key instructions
         # In pydantic_ai, system_prompt is a property that returns the prompt string
         # For mocked agents, we check that the agent was created with a system prompt
-        assert input_parser_agent.agent is not None
         # The actual system prompt is set during agent creation
         # We verify the agent exists and was properly initialized
         # Note: Direct access to system_prompt may not work with mocks

     return model
+@pytest.fixture(autouse=True)
+def patch_infer_model(mock_model: MagicMock):
+    """Auto-patch infer_model for all tests to avoid OpenAI API key requirements."""
+    with patch("pydantic_ai.models.infer_model", return_value=mock_model):
+        yield
 @pytest.fixture
 def mock_parsed_query_iterative() -> ParsedQuery:
     """Create a mock ParsedQuery for iterative mode."""
     mock_parsed_query_iterative: ParsedQuery,
 ) -> RunResult[ParsedQuery]:
     """Create a mock agent result for iterative mode."""
+    result = MagicMock()
+    # Configure the mock to return the actual output when .data is accessed
+    type(result).data = mock_parsed_query_iterative
     result.output = mock_parsed_query_iterative
     return result
     mock_parsed_query_deep: ParsedQuery,
 ) -> RunResult[ParsedQuery]:
     """Create a mock agent result for deep mode."""
+    result = MagicMock()
+    # Configure the mock to return the actual output when .data is accessed
+    type(result).data = mock_parsed_query_deep
     result.output = mock_parsed_query_deep
     return result
     return InputParserAgent(model=mock_model)
+@pytest.fixture(autouse=True)
+def patch_infer_model(mock_model: MagicMock):
+    """Auto-patch infer_model for all tests to avoid OpenAI API key requirements."""
+    with patch("pydantic_ai.models.infer_model", return_value=mock_model):
+        yield
 class TestInputParserAgentInit:
     """Test InputParserAgent initialization."""
+    @patch("pydantic_ai.models.infer_model")
+    def test_input_parser_agent_init_with_model(
+        self, mock_infer_model: MagicMock, mock_model: MagicMock
+    ) -> None:
         """Test InputParserAgent initialization with provided model."""
+        mock_infer_model.return_value = mock_model
         agent = InputParserAgent(model=mock_model)
         assert agent.model == mock_model
         assert agent.agent is not None
     @patch("src.agents.input_parser.get_model")
+    @patch("pydantic_ai.models.infer_model")
     def test_input_parser_agent_init_without_model(
+        self,
+        mock_infer_model: MagicMock,
+        mock_get_model: MagicMock,
+        mock_model: MagicMock,
     ) -> None:
         """Test InputParserAgent initialization without model (uses default)."""
         mock_get_model.return_value = mock_model
+        mock_infer_model.return_value = mock_model
         agent = InputParserAgent()
         assert agent.model == mock_model
         mock_get_model.assert_called_once()
+    @patch("pydantic_ai.models.infer_model")
     def test_input_parser_agent_has_correct_system_prompt(
+        self, mock_infer_model: MagicMock, mock_model: MagicMock
     ) -> None:
         """Test that InputParserAgent has correct system prompt."""
+        mock_infer_model.return_value = mock_model
+        agent = InputParserAgent(model=mock_model)
         # System prompt should contain key instructions
         # In pydantic_ai, system_prompt is a property that returns the prompt string
         # For mocked agents, we check that the agent was created with a system prompt
+        assert agent.agent is not None
         # The actual system prompt is set during agent creation
         # We verify the agent exists and was properly initialized
         # Note: Direct access to system_prompt may not work with mocks