Joseph Pollack commited on
Commit
cb48bd4
·
unverified ·
1 Parent(s): 448c679

fix interface

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .env copy.example +0 -124
  2. .env.example +93 -17
  3. .github/README.md +0 -25
  4. .github/workflows/ci.yml +7 -7
  5. .pre-commit-config.yaml +4 -4
  6. docs/api/agents.md +2 -0
  7. docs/api/models.md +2 -0
  8. docs/api/orchestrators.md +2 -0
  9. docs/api/services.md +2 -0
  10. docs/api/tools.md +2 -0
  11. docs/architecture/agents.md +2 -0
  12. docs/architecture/middleware.md +2 -0
  13. docs/architecture/services.md +2 -0
  14. docs/architecture/tools.md +2 -0
  15. docs/contributing/code-quality.md +2 -0
  16. docs/contributing/code-style.md +2 -0
  17. docs/contributing/error-handling.md +2 -0
  18. docs/contributing/implementation-patterns.md +2 -0
  19. docs/contributing/index.md +2 -0
  20. docs/contributing/prompt-engineering.md +2 -0
  21. docs/contributing/testing.md +2 -0
  22. docs/getting-started/examples.md +2 -0
  23. docs/getting-started/installation.md +2 -0
  24. docs/getting-started/mcp-integration.md +2 -0
  25. docs/getting-started/quick-start.md +2 -0
  26. docs/license.md +2 -0
  27. docs/overview/architecture.md +2 -0
  28. docs/overview/features.md +2 -0
  29. docs/team.md +2 -0
  30. pyproject.toml +1 -0
  31. requirements.txt +31 -18
  32. src/agent_factory/judges.py +15 -2
  33. src/agents/hypothesis_agent.py +1 -1
  34. src/agents/input_parser.py +1 -1
  35. src/agents/judge_agent_llm.py +2 -2
  36. src/agents/knowledge_gap.py +1 -1
  37. src/agents/long_writer.py +1 -1
  38. src/agents/proofreader.py +2 -2
  39. src/agents/report_agent.py +1 -1
  40. src/agents/thinking.py +2 -2
  41. src/agents/tool_selector.py +1 -1
  42. src/agents/writer.py +2 -2
  43. src/app.py +44 -12
  44. src/orchestrator/planner_agent.py +1 -1
  45. src/services/llamaindex_rag.py +2 -1
  46. src/services/statistical_analyzer.py +1 -1
  47. tests/integration/test_rag_integration.py +5 -2
  48. tests/unit/agent_factory/test_judges.py +3 -1
  49. tests/unit/agents/test_hypothesis_agent.py +13 -12
  50. tests/unit/agents/test_input_parser.py +36 -6
.env copy.example DELETED
@@ -1,124 +0,0 @@
1
- # ============== LLM CONFIGURATION ==============
2
-
3
- # Provider: "openai", "anthropic", or "huggingface"
4
- LLM_PROVIDER=openai
5
-
6
- # API Keys (at least one required for full LLM analysis)
7
- OPENAI_API_KEY=sk-your-key-here
8
- ANTHROPIC_API_KEY=sk-ant-your-key-here
9
-
10
- # Model names (optional - sensible defaults set in config.py)
11
- # OPENAI_MODEL=gpt-5.1
12
- # ANTHROPIC_MODEL=claude-sonnet-4-5-20250929
13
-
14
- # ============== HUGGINGFACE CONFIGURATION ==============
15
-
16
- # HuggingFace Token - enables gated models and higher rate limits
17
- # Get yours at: https://huggingface.co/settings/tokens
18
- #
19
- # WITHOUT HF_TOKEN: Falls back to ungated models (zephyr-7b-beta, Qwen2-7B)
20
- # WITH HF_TOKEN: Uses gated models (Llama 3.1, Gemma-2) via inference providers
21
- #
22
- # For HuggingFace Spaces deployment:
23
- # Set this as a "Secret" in Space Settings -> Variables and secrets
24
- # Users/judges don't need their own token - the Space secret is used
25
- #
26
- HF_TOKEN=hf_your-token-here
27
- # Alternative: HUGGINGFACE_API_KEY (same as HF_TOKEN)
28
-
29
- # Default HuggingFace model for inference (gated, requires auth)
30
- # Can be overridden in UI dropdown
31
- # Latest reasoning models: Qwen3-Next-80B-A3B-Thinking, Qwen3-Next-80B-A3B-Instruct, Llama-3.3-70B-Instruct
32
- HUGGINGFACE_MODEL=Qwen/Qwen3-Next-80B-A3B-Thinking
33
-
34
- # Fallback models for HuggingFace Inference API (comma-separated)
35
- # Models are tried in order until one succeeds
36
- # Format: model1,model2,model3
37
- # Latest reasoning models first, then reliable fallbacks
38
- # Reasoning models: Qwen3-Next (thinking/instruct), Llama-3.3-70B, Qwen3-235B
39
- # Fallbacks: Llama-3.1-8B, Zephyr-7B (ungated), Qwen2-7B (ungated)
40
- HF_FALLBACK_MODELS=Qwen/Qwen3-Next-80B-A3B-Thinking,Qwen/Qwen3-Next-80B-A3B-Instruct,meta-llama/Llama-3.3-70B-Instruct,meta-llama/Llama-3.1-8B-Instruct,HuggingFaceH4/zephyr-7b-beta,Qwen/Qwen2-7B-Instruct
41
-
42
- # Override model/provider selection (optional, usually set via UI)
43
- # HF_MODEL=Qwen/Qwen3-Next-80B-A3B-Thinking
44
- # HF_PROVIDER=hyperbolic
45
-
46
- # ============== EMBEDDING CONFIGURATION ==============
47
-
48
- # Embedding Provider: "openai", "local", or "huggingface"
49
- # Default: "local" (no API key required)
50
- EMBEDDING_PROVIDER=local
51
-
52
- # OpenAI Embedding Model (used if EMBEDDING_PROVIDER=openai)
53
- OPENAI_EMBEDDING_MODEL=text-embedding-3-small
54
-
55
- # Local Embedding Model (sentence-transformers, used if EMBEDDING_PROVIDER=local)
56
- # BAAI/bge-small-en-v1.5 is newer, faster, and better than all-MiniLM-L6-v2
57
- LOCAL_EMBEDDING_MODEL=BAAI/bge-small-en-v1.5
58
-
59
- # HuggingFace Embedding Model (used if EMBEDDING_PROVIDER=huggingface)
60
- HUGGINGFACE_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
61
-
62
- # ============== AGENT CONFIGURATION ==============
63
-
64
- MAX_ITERATIONS=10
65
- SEARCH_TIMEOUT=30
66
- LOG_LEVEL=INFO
67
-
68
- # Graph-based execution (experimental)
69
- # USE_GRAPH_EXECUTION=false
70
-
71
- # Budget & Rate Limiting
72
- # DEFAULT_TOKEN_LIMIT=100000
73
- # DEFAULT_TIME_LIMIT_MINUTES=10
74
- # DEFAULT_ITERATIONS_LIMIT=10
75
-
76
- # ============== WEB SEARCH CONFIGURATION ==============
77
-
78
- # Web Search Provider: "serper", "searchxng", "brave", "tavily", or "duckduckgo"
79
- # Default: "duckduckgo" (no API key required)
80
- WEB_SEARCH_PROVIDER=duckduckgo
81
-
82
- # Serper API Key (for Google search via Serper)
83
- # SERPER_API_KEY=your-serper-key-here
84
-
85
- # SearchXNG Host URL (for self-hosted search)
86
- # SEARCHXNG_HOST=http://localhost:8080
87
-
88
- # Brave Search API Key
89
- # BRAVE_API_KEY=your-brave-key-here
90
-
91
- # Tavily API Key
92
- # TAVILY_API_KEY=your-tavily-key-here
93
-
94
- # ============== EXTERNAL SERVICES ==============
95
-
96
- # PubMed (optional - higher rate limits: 10 req/sec vs 3 req/sec)
97
- NCBI_API_KEY=your-ncbi-key-here
98
-
99
- # Modal (optional - for secure code execution sandbox)
100
- # MODAL_TOKEN_ID=your-modal-token-id
101
- # MODAL_TOKEN_SECRET=your-modal-token-secret
102
-
103
- # ============== VECTOR DATABASE (ChromaDB) ==============
104
-
105
- # ChromaDB storage path
106
- CHROMA_DB_PATH=./chroma_db
107
-
108
- # Persist ChromaDB to disk (default: true)
109
- # CHROMA_DB_PERSIST=true
110
-
111
- # Remote ChromaDB server (optional)
112
- # CHROMA_DB_HOST=localhost
113
- # CHROMA_DB_PORT=8000
114
-
115
- # ============== RAG SERVICE CONFIGURATION ==============
116
-
117
- # ChromaDB collection name for RAG
118
- # RAG_COLLECTION_NAME=deepcritical_evidence
119
-
120
- # Number of top results to retrieve from RAG
121
- # RAG_SIMILARITY_TOP_K=5
122
-
123
- # Automatically ingest evidence into RAG
124
- # RAG_AUTO_INGEST=true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.env.example CHANGED
@@ -1,6 +1,6 @@
1
  # ============== LLM CONFIGURATION ==============
2
 
3
- # Provider: "openai" or "anthropic"
4
  LLM_PROVIDER=openai
5
 
6
  # API Keys (at least one required for full LLM analysis)
@@ -8,30 +8,56 @@ OPENAI_API_KEY=sk-your-key-here
8
  ANTHROPIC_API_KEY=sk-ant-your-key-here
9
 
10
  # Model names (optional - sensible defaults set in config.py)
11
- # ANTHROPIC_MODEL=claude-sonnet-4-5-20250929
12
  # OPENAI_MODEL=gpt-5.1
 
13
 
14
- # ============== EMBEDDINGS ==============
15
-
16
- # OpenAI Embedding Model (used if LLM_PROVIDER is openai and performing RAG/Embeddings)
17
- OPENAI_EMBEDDING_MODEL=text-embedding-3-small
18
-
19
- # Local Embedding Model (used for local/offline embeddings)
20
- LOCAL_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
21
-
22
- # ============== HUGGINGFACE (FREE TIER) ==============
23
 
24
- # HuggingFace Token - enables Llama 3.1 (best quality free model)
25
  # Get yours at: https://huggingface.co/settings/tokens
26
- #
27
- # WITHOUT HF_TOKEN: Falls back to ungated models (zephyr-7b-beta)
28
- # WITH HF_TOKEN: Uses Llama 3.1 8B Instruct (requires accepting license)
29
  #
30
  # For HuggingFace Spaces deployment:
31
  # Set this as a "Secret" in Space Settings -> Variables and secrets
32
  # Users/judges don't need their own token - the Space secret is used
33
  #
34
  HF_TOKEN=hf_your-token-here
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  # ============== AGENT CONFIGURATION ==============
37
 
@@ -39,10 +65,60 @@ MAX_ITERATIONS=10
39
  SEARCH_TIMEOUT=30
40
  LOG_LEVEL=INFO
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  # ============== EXTERNAL SERVICES ==============
43
 
44
- # PubMed (optional - higher rate limits)
45
  NCBI_API_KEY=your-ncbi-key-here
46
 
47
- # Vector Database (optional - for LlamaIndex RAG)
 
 
 
 
 
 
48
  CHROMA_DB_PATH=./chroma_db
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # ============== LLM CONFIGURATION ==============
2
 
3
+ # Provider: "openai", "anthropic", or "huggingface"
4
  LLM_PROVIDER=openai
5
 
6
  # API Keys (at least one required for full LLM analysis)
 
8
  ANTHROPIC_API_KEY=sk-ant-your-key-here
9
 
10
  # Model names (optional - sensible defaults set in config.py)
 
11
  # OPENAI_MODEL=gpt-5.1
12
+ # ANTHROPIC_MODEL=claude-sonnet-4-5-20250929
13
 
14
+ # ============== HUGGINGFACE CONFIGURATION ==============
 
 
 
 
 
 
 
 
15
 
16
+ # HuggingFace Token - enables gated models and higher rate limits
17
  # Get yours at: https://huggingface.co/settings/tokens
18
+ #
19
+ # WITHOUT HF_TOKEN: Falls back to ungated models (zephyr-7b-beta, Qwen2-7B)
20
+ # WITH HF_TOKEN: Uses gated models (Llama 3.1, Gemma-2) via inference providers
21
  #
22
  # For HuggingFace Spaces deployment:
23
  # Set this as a "Secret" in Space Settings -> Variables and secrets
24
  # Users/judges don't need their own token - the Space secret is used
25
  #
26
  HF_TOKEN=hf_your-token-here
27
+ # Alternative: HUGGINGFACE_API_KEY (same as HF_TOKEN)
28
+
29
+ # Default HuggingFace model for inference (gated, requires auth)
30
+ # Can be overridden in UI dropdown
31
+ # Latest reasoning models: Qwen3-Next-80B-A3B-Thinking, Qwen3-Next-80B-A3B-Instruct, Llama-3.3-70B-Instruct
32
+ HUGGINGFACE_MODEL=Qwen/Qwen3-Next-80B-A3B-Thinking
33
+
34
+ # Fallback models for HuggingFace Inference API (comma-separated)
35
+ # Models are tried in order until one succeeds
36
+ # Format: model1,model2,model3
37
+ # Latest reasoning models first, then reliable fallbacks
38
+ # Reasoning models: Qwen3-Next (thinking/instruct), Llama-3.3-70B, Qwen3-235B
39
+ # Fallbacks: Llama-3.1-8B, Zephyr-7B (ungated), Qwen2-7B (ungated)
40
+ HF_FALLBACK_MODELS=Qwen/Qwen3-Next-80B-A3B-Thinking,Qwen/Qwen3-Next-80B-A3B-Instruct,meta-llama/Llama-3.3-70B-Instruct,meta-llama/Llama-3.1-8B-Instruct,HuggingFaceH4/zephyr-7b-beta,Qwen/Qwen2-7B-Instruct
41
+
42
+ # Override model/provider selection (optional, usually set via UI)
43
+ # HF_MODEL=Qwen/Qwen3-Next-80B-A3B-Thinking
44
+ # HF_PROVIDER=hyperbolic
45
+
46
+ # ============== EMBEDDING CONFIGURATION ==============
47
+
48
+ # Embedding Provider: "openai", "local", or "huggingface"
49
+ # Default: "local" (no API key required)
50
+ EMBEDDING_PROVIDER=local
51
+
52
+ # OpenAI Embedding Model (used if EMBEDDING_PROVIDER=openai)
53
+ OPENAI_EMBEDDING_MODEL=text-embedding-3-small
54
+
55
+ # Local Embedding Model (sentence-transformers, used if EMBEDDING_PROVIDER=local)
56
+ # BAAI/bge-small-en-v1.5 is newer, faster, and better than all-MiniLM-L6-v2
57
+ LOCAL_EMBEDDING_MODEL=BAAI/bge-small-en-v1.5
58
+
59
+ # HuggingFace Embedding Model (used if EMBEDDING_PROVIDER=huggingface)
60
+ HUGGINGFACE_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
61
 
62
  # ============== AGENT CONFIGURATION ==============
63
 
 
65
  SEARCH_TIMEOUT=30
66
  LOG_LEVEL=INFO
67
 
68
+ # Graph-based execution (experimental)
69
+ # USE_GRAPH_EXECUTION=false
70
+
71
+ # Budget & Rate Limiting
72
+ # DEFAULT_TOKEN_LIMIT=100000
73
+ # DEFAULT_TIME_LIMIT_MINUTES=10
74
+ # DEFAULT_ITERATIONS_LIMIT=10
75
+
76
+ # ============== WEB SEARCH CONFIGURATION ==============
77
+
78
+ # Web Search Provider: "serper", "searchxng", "brave", "tavily", or "duckduckgo"
79
+ # Default: "duckduckgo" (no API key required)
80
+ WEB_SEARCH_PROVIDER=duckduckgo
81
+
82
+ # Serper API Key (for Google search via Serper)
83
+ # SERPER_API_KEY=your-serper-key-here
84
+
85
+ # SearchXNG Host URL (for self-hosted search)
86
+ # SEARCHXNG_HOST=http://localhost:8080
87
+
88
+ # Brave Search API Key
89
+ # BRAVE_API_KEY=your-brave-key-here
90
+
91
+ # Tavily API Key
92
+ # TAVILY_API_KEY=your-tavily-key-here
93
+
94
  # ============== EXTERNAL SERVICES ==============
95
 
96
+ # PubMed (optional - higher rate limits: 10 req/sec vs 3 req/sec)
97
  NCBI_API_KEY=your-ncbi-key-here
98
 
99
+ # Modal (optional - for secure code execution sandbox)
100
+ # MODAL_TOKEN_ID=your-modal-token-id
101
+ # MODAL_TOKEN_SECRET=your-modal-token-secret
102
+
103
+ # ============== VECTOR DATABASE (ChromaDB) ==============
104
+
105
+ # ChromaDB storage path
106
  CHROMA_DB_PATH=./chroma_db
107
+
108
+ # Persist ChromaDB to disk (default: true)
109
+ # CHROMA_DB_PERSIST=true
110
+
111
+ # Remote ChromaDB server (optional)
112
+ # CHROMA_DB_HOST=localhost
113
+ # CHROMA_DB_PORT=8000
114
+
115
+ # ============== RAG SERVICE CONFIGURATION ==============
116
+
117
+ # ChromaDB collection name for RAG
118
+ # RAG_COLLECTION_NAME=deepcritical_evidence
119
+
120
+ # Number of top results to retrieve from RAG
121
+ # RAG_SIMILARITY_TOP_K=5
122
+
123
+ # Automatically ingest evidence into RAG
124
+ # RAG_AUTO_INGEST=true
.github/README.md CHANGED
@@ -1,28 +1,3 @@
1
- ---
2
- title: DeepCritical
3
- emoji: 🧬
4
- colorFrom: blue
5
- colorTo: purple
6
- sdk: gradio
7
- sdk_version: "6.0.1"
8
- python_version: "3.11"
9
- app_file: src/app.py
10
- hf_oauth: true
11
- hf_oauth_expiration_minutes: 480
12
- hf_oauth_scopes:
13
- - inference-api
14
- pinned: true
15
- license: mit
16
- tags:
17
- - mcp-in-action-track-enterprise
18
- - mcp-hackathon
19
- - drug-repurposing
20
- - biomedical-ai
21
- - pydantic-ai
22
- - llamaindex
23
- - modal
24
- ---
25
-
26
  <div align="center">
27
 
28
  [![GitHub](https://img.shields.io/github/stars/DeepCritical/GradioDemo?style=for-the-badge&logo=github&logoColor=white&label=🐙%20GitHub&labelColor=181717&color=181717)](https://github.com/DeepCritical/GradioDemo)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  <div align="center">
2
 
3
  [![GitHub](https://img.shields.io/github/stars/DeepCritical/GradioDemo?style=for-the-badge&logo=github&logoColor=white&label=🐙%20GitHub&labelColor=181717&color=181717)](https://github.com/DeepCritical/GradioDemo)
.github/workflows/ci.yml CHANGED
@@ -33,19 +33,19 @@ jobs:
33
  - name: Lint with ruff
34
  continue-on-error: true
35
  run: |
36
- uv run ruff check . --exclude tests
37
- uv run ruff format --check . --exclude tests
38
 
39
  - name: Type check with mypy
40
  continue-on-error: true
41
  run: |
42
- uv run mypy src
43
 
44
- - name: Run unit tests (No Black Box Apis)
45
  env:
46
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
47
  run: |
48
- uv run pytest tests/unit/ -v -m "not openai and not embedding_provider" --tb=short -p no:logfire --cov --cov-branch --cov-report=xml
49
 
50
  - name: Run local embeddings tests
51
  env:
@@ -61,11 +61,11 @@ jobs:
61
  uv run pytest tests/integration/ -v -m "huggingface and not embedding_provider" --tb=short -p no:logfire --cov --cov-branch --cov-report=xml --cov-append || true
62
  continue-on-error: true # Allow failures if HF_TOKEN not set
63
 
64
- - name: Run non-OpenAI integration tests (excluding embedding providers)
65
  env:
66
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
67
  run: |
68
- uv run pytest tests/integration/ -v -m "integration and not openai and not embedding_provider" --tb=short -p no:logfire --cov --cov-branch --cov-report=xml --cov-append || true
69
  continue-on-error: true # Allow failures if dependencies not available
70
 
71
  - name: Upload coverage reports to Codecov
 
33
  - name: Lint with ruff
34
  continue-on-error: true
35
  run: |
36
+ uv run ruff check . --exclude tests --exclude reference_repos
37
+ uv run ruff format --check . --exclude tests --exclude reference_repos
38
 
39
  - name: Type check with mypy
40
  continue-on-error: true
41
  run: |
42
+ uv run mypy src --ignore-missing-imports
43
 
44
+ - name: Run unit tests (No OpenAI/Anthropic, HuggingFace only)
45
  env:
46
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
47
  run: |
48
+ uv run pytest tests/unit/ -v -m "not openai and not anthropic and not embedding_provider" --tb=short -p no:logfire --cov --cov-branch --cov-report=xml
49
 
50
  - name: Run local embeddings tests
51
  env:
 
61
  uv run pytest tests/integration/ -v -m "huggingface and not embedding_provider" --tb=short -p no:logfire --cov --cov-branch --cov-report=xml --cov-append || true
62
  continue-on-error: true # Allow failures if HF_TOKEN not set
63
 
64
+ - name: Run non-OpenAI/Anthropic integration tests (excluding embedding providers)
65
  env:
66
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
67
  run: |
68
+ uv run pytest tests/integration/ -v -m "integration and not openai and not anthropic and not embedding_provider" --tb=short -p no:logfire --cov --cov-branch --cov-report=xml --cov-append || true
69
  continue-on-error: true # Allow failures if dependencies not available
70
 
71
  - name: Upload coverage reports to Codecov
.pre-commit-config.yaml CHANGED
@@ -1,16 +1,16 @@
1
  repos:
2
  - repo: https://github.com/astral-sh/ruff-pre-commit
3
- rev: v0.4.4
4
  hooks:
5
  - id: ruff
6
- args: [--fix, --exclude, tests]
7
  exclude: ^reference_repos/
8
  - id: ruff-format
9
- args: [--exclude, tests]
10
  exclude: ^reference_repos/
11
 
12
  - repo: https://github.com/pre-commit/mirrors-mypy
13
- rev: v1.10.0
14
  hooks:
15
  - id: mypy
16
  files: ^src/
 
1
  repos:
2
  - repo: https://github.com/astral-sh/ruff-pre-commit
3
+ rev: v0.14.7 # Compatible with ruff>=0.14.6 (matches CI)
4
  hooks:
5
  - id: ruff
6
+ args: [--fix, --exclude, tests, --exclude, reference_repos]
7
  exclude: ^reference_repos/
8
  - id: ruff-format
9
+ args: [--exclude, tests, --exclude, reference_repos]
10
  exclude: ^reference_repos/
11
 
12
  - repo: https://github.com/pre-commit/mirrors-mypy
13
+ rev: v1.18.2 # Matches CI version mypy>=1.18.2
14
  hooks:
15
  - id: mypy
16
  files: ^src/
docs/api/agents.md CHANGED
@@ -262,3 +262,5 @@ def create_input_parser_agent(model: Any | None = None) -> InputParserAgent
262
 
263
 
264
 
 
 
 
262
 
263
 
264
 
265
+
266
+
docs/api/models.md CHANGED
@@ -240,3 +240,5 @@ class BudgetStatus(BaseModel):
240
 
241
 
242
 
 
 
 
240
 
241
 
242
 
243
+
244
+
docs/api/orchestrators.md CHANGED
@@ -187,3 +187,5 @@ Runs Magentic orchestration.
187
 
188
 
189
 
 
 
 
187
 
188
 
189
 
190
+
191
+
docs/api/services.md CHANGED
@@ -193,3 +193,5 @@ Analyzes a hypothesis using statistical methods.
193
 
194
 
195
 
 
 
 
193
 
194
 
195
 
196
+
197
+
docs/api/tools.md CHANGED
@@ -227,3 +227,5 @@ Searches multiple tools in parallel.
227
 
228
 
229
 
 
 
 
227
 
228
 
229
 
230
+
231
+
docs/architecture/agents.md CHANGED
@@ -184,3 +184,5 @@ Factory functions:
184
 
185
 
186
 
 
 
 
184
 
185
 
186
 
187
+
188
+
docs/architecture/middleware.md CHANGED
@@ -134,3 +134,5 @@ All middleware components use `ContextVar` for thread-safe isolation:
134
 
135
 
136
 
 
 
 
134
 
135
 
136
 
137
+
138
+
docs/architecture/services.md CHANGED
@@ -134,3 +134,5 @@ if settings.has_openai_key:
134
 
135
 
136
 
 
 
 
134
 
135
 
136
 
137
+
138
+
docs/architecture/tools.md CHANGED
@@ -167,3 +167,5 @@ search_handler = SearchHandler(
167
 
168
 
169
 
 
 
 
167
 
168
 
169
 
170
+
171
+
docs/contributing/code-quality.md CHANGED
@@ -73,3 +73,5 @@ async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
73
 
74
 
75
 
 
 
 
73
 
74
 
75
 
76
+
77
+
docs/contributing/code-style.md CHANGED
@@ -53,3 +53,5 @@ result = await loop.run_in_executor(None, cpu_bound_function, args)
53
 
54
 
55
 
 
 
 
53
 
54
 
55
 
56
+
57
+
docs/contributing/error-handling.md CHANGED
@@ -61,3 +61,5 @@ except httpx.HTTPError as e:
61
 
62
 
63
 
 
 
 
61
 
62
 
63
 
64
+
65
+
docs/contributing/implementation-patterns.md CHANGED
@@ -76,3 +76,5 @@ def get_embedding_service() -> EmbeddingService:
76
 
77
 
78
 
 
 
 
76
 
77
 
78
 
79
+
80
+
docs/contributing/index.md CHANGED
@@ -155,3 +155,5 @@ Thank you for contributing to DeepCritical!
155
 
156
 
157
 
 
 
 
155
 
156
 
157
 
158
+
159
+
docs/contributing/prompt-engineering.md CHANGED
@@ -61,3 +61,5 @@ This document outlines prompt engineering guidelines and citation validation rul
61
 
62
 
63
 
 
 
 
61
 
62
 
63
 
64
+
65
+
docs/contributing/testing.md CHANGED
@@ -57,3 +57,5 @@ async def test_real_pubmed_search():
57
 
58
 
59
 
 
 
 
57
 
58
 
59
 
60
+
61
+
docs/getting-started/examples.md CHANGED
@@ -201,3 +201,5 @@ USE_GRAPH_EXECUTION=true
201
 
202
 
203
 
 
 
 
201
 
202
 
203
 
204
+
205
+
docs/getting-started/installation.md CHANGED
@@ -140,3 +140,5 @@ uv run pre-commit install
140
 
141
 
142
 
 
 
 
140
 
141
 
142
 
143
+
144
+
docs/getting-started/mcp-integration.md CHANGED
@@ -207,3 +207,5 @@ You can configure multiple DeepCritical instances:
207
 
208
 
209
 
 
 
 
207
 
208
 
209
 
210
+
211
+
docs/getting-started/quick-start.md CHANGED
@@ -111,3 +111,5 @@ What are the active clinical trials investigating Alzheimer's disease treatments
111
 
112
 
113
 
 
 
 
111
 
112
 
113
 
114
+
115
+
docs/license.md CHANGED
@@ -31,3 +31,5 @@ SOFTWARE.
31
 
32
 
33
 
 
 
 
31
 
32
 
33
 
34
+
35
+
docs/overview/architecture.md CHANGED
@@ -188,3 +188,5 @@ The system supports complex research workflows through:
188
 
189
 
190
 
 
 
 
188
 
189
 
190
 
191
+
192
+
docs/overview/features.md CHANGED
@@ -140,3 +140,5 @@ DeepCritical provides a comprehensive set of features for AI-assisted research:
140
 
141
 
142
 
 
 
 
140
 
141
 
142
 
143
+
144
+
docs/team.md CHANGED
@@ -36,3 +36,5 @@ We welcome contributions! See the [Contributing Guide](contributing/index.md) fo
36
 
37
 
38
 
 
 
 
36
 
37
 
38
 
39
+
40
+
pyproject.toml CHANGED
@@ -29,6 +29,7 @@ dependencies = [
29
  "tokenizers>=0.22.0,<=0.23.0",
30
  "transformers>=4.57.2",
31
  "chromadb>=0.4.0",
 
32
  "sentence-transformers>=2.2.0",
33
  "numpy<2.0",
34
  "agent-framework-core>=1.0.0b251120,<2.0.0",
 
29
  "tokenizers>=0.22.0,<=0.23.0",
30
  "transformers>=4.57.2",
31
  "chromadb>=0.4.0",
32
+ "rpds-py>=0.29.0", # Python implementation of rpds (required by chromadb on Windows)
33
  "sentence-transformers>=2.2.0",
34
  "numpy<2.0",
35
  "agent-framework-core>=1.0.0b251120,<2.0.0",
requirements.txt CHANGED
@@ -9,40 +9,53 @@ pydantic>=2.7
9
  pydantic-settings>=2.2
10
  pydantic-ai>=0.0.16
11
 
12
-
13
  # OPTIONAL AI Providers
14
  openai>=1.0.0
15
- # anthropic>=0.18.0
16
-
17
- # Multi-agent orchestration (Advanced mode)
18
- agent-framework-core>=1.0.0b251120
19
-
20
- # Web search
21
- duckduckgo-search>=5.0
22
 
23
  # HTTP & Parsing
24
  httpx>=0.27
25
  beautifulsoup4>=4.12
26
  xmltodict>=0.13
27
 
 
 
 
28
  # UI (Gradio with MCP server support)
29
- gradio[mcp]>=6.0.0
30
 
31
  # Utils
32
  python-dotenv>=1.0
33
  tenacity>=8.2
34
  structlog>=24.1
35
  requests>=2.32.5
36
- limits>=3.0 # Rate limiting
 
37
 
38
- # Optional: Modal for code execution
39
- modal>=0.63.0
40
 
41
- # Optional: LlamaIndex RAG
42
- llama-index>=0.11.0
43
- llama-index-llms-openai
44
- llama-index-llms-huggingface
45
- llama-index-embeddings-openai
46
- llama-index-vector-stores-chroma
 
 
 
 
 
 
 
 
47
  chromadb>=0.4.0
 
48
  sentence-transformers>=2.2.0
 
 
 
 
 
 
 
 
9
  pydantic-settings>=2.2
10
  pydantic-ai>=0.0.16
11
 
 
12
  # OPTIONAL AI Providers
13
  openai>=1.0.0
14
+ anthropic>=0.18.0
 
 
 
 
 
 
15
 
16
  # HTTP & Parsing
17
  httpx>=0.27
18
  beautifulsoup4>=4.12
19
  xmltodict>=0.13
20
 
21
+ # HuggingFace Hub
22
+ huggingface-hub>=0.20.0
23
+
24
  # UI (Gradio with MCP server support)
25
+ gradio[mcp,oauth]>=6.0.0
26
 
27
  # Utils
28
  python-dotenv>=1.0
29
  tenacity>=8.2
30
  structlog>=24.1
31
  requests>=2.32.5
32
+ limits>=3.0 # Rate limiting
33
+ pydantic-graph>=1.22.0
34
 
35
+ # Web search
36
+ duckduckgo-search>=5.0
37
 
38
+ # Multi-agent orchestration (Advanced mode)
39
+ agent-framework-core>=1.0.0b251120,<2.0.0
40
+
41
+ # LlamaIndex RAG
42
+ llama-index-llms-huggingface>=0.6.1
43
+ llama-index-llms-huggingface-api>=0.6.1
44
+ llama-index-vector-stores-chroma>=0.5.3
45
+ llama-index>=0.14.8
46
+ llama-index-llms-openai>=0.6.9
47
+ llama-index-embeddings-openai>=0.5.1
48
+
49
+ # Embeddings & Vector Store
50
+ tokenizers>=0.22.0,<=0.23.0
51
+ transformers>=4.57.2
52
  chromadb>=0.4.0
53
+ rpds-py>=0.29.0 # Python implementation of rpds (required by chromadb on Windows)
54
  sentence-transformers>=2.2.0
55
+ numpy<2.0
56
+
57
+ # Optional: Modal for code execution
58
+ modal>=0.63.0
59
+
60
+ # Pydantic AI with HuggingFace support
61
+ pydantic-ai-slim[huggingface]>=0.0.18
src/agent_factory/judges.py CHANGED
@@ -8,10 +8,18 @@ from typing import Any
8
  import structlog
9
  from huggingface_hub import InferenceClient
10
  from pydantic_ai import Agent
11
- from pydantic_ai.models.anthropic import AnthropicModel
12
  from pydantic_ai.models.openai import OpenAIModel # type: ignore[attr-defined]
13
  from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
14
 
 
 
 
 
 
 
 
 
 
15
  # Try to import HuggingFace support (may not be available in all pydantic-ai versions)
16
  # According to https://ai.pydantic.dev/models/huggingface/, HuggingFace support requires
17
  # pydantic-ai with huggingface extra or pydantic-ai-slim[huggingface]
@@ -50,6 +58,11 @@ def get_model() -> Any:
50
  llm_provider = settings.llm_provider
51
 
52
  if llm_provider == "anthropic":
 
 
 
 
 
53
  return AnthropicModel(settings.anthropic_model, api_key=settings.anthropic_api_key) # type: ignore[call-arg]
54
 
55
  if llm_provider == "huggingface":
@@ -144,7 +157,7 @@ class JudgeHandler:
144
  try:
145
  # Run the agent with structured output
146
  result = await self.agent.run(user_prompt)
147
- assessment = result.output # type: ignore[attr-defined]
148
 
149
  logger.info(
150
  "Assessment complete",
 
8
  import structlog
9
  from huggingface_hub import InferenceClient
10
  from pydantic_ai import Agent
 
11
  from pydantic_ai.models.openai import OpenAIModel # type: ignore[attr-defined]
12
  from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
13
 
14
+ # Try to import AnthropicModel (may not be available if anthropic package is missing)
15
+ try:
16
+ from pydantic_ai.models.anthropic import AnthropicModel
17
+
18
+ _ANTHROPIC_AVAILABLE = True
19
+ except ImportError:
20
+ AnthropicModel = None # type: ignore[assignment, misc]
21
+ _ANTHROPIC_AVAILABLE = False
22
+
23
  # Try to import HuggingFace support (may not be available in all pydantic-ai versions)
24
  # According to https://ai.pydantic.dev/models/huggingface/, HuggingFace support requires
25
  # pydantic-ai with huggingface extra or pydantic-ai-slim[huggingface]
 
58
  llm_provider = settings.llm_provider
59
 
60
  if llm_provider == "anthropic":
61
+ if not _ANTHROPIC_AVAILABLE:
62
+ raise ImportError(
63
+ "Anthropic models are not available. "
64
+ "Please install with: uv add 'pydantic-ai[anthropic]' or use 'openai'/'huggingface' as the LLM provider."
65
+ )
66
  return AnthropicModel(settings.anthropic_model, api_key=settings.anthropic_api_key) # type: ignore[call-arg]
67
 
68
  if llm_provider == "huggingface":
 
157
  try:
158
  # Run the agent with structured output
159
  result = await self.agent.run(user_prompt)
160
+ assessment = result.data
161
 
162
  logger.info(
163
  "Assessment complete",
src/agents/hypothesis_agent.py CHANGED
@@ -75,7 +75,7 @@ class HypothesisAgent(BaseAgent): # type: ignore[misc]
75
  # Generate hypotheses with diverse evidence selection
76
  prompt = await format_hypothesis_prompt(query, evidence, embeddings=self._embeddings)
77
  result = await self._get_agent().run(prompt)
78
- assessment = result.output # pydantic-ai returns .output for structured output
79
 
80
  # Store hypotheses in shared context
81
  existing = self._evidence_store.get("hypotheses", [])
 
75
  # Generate hypotheses with diverse evidence selection
76
  prompt = await format_hypothesis_prompt(query, evidence, embeddings=self._embeddings)
77
  result = await self._get_agent().run(prompt)
78
+ assessment = result.data # type: ignore[attr-defined]
79
 
80
  # Store hypotheses in shared context
81
  existing = self._evidence_store.get("hypotheses", [])
src/agents/input_parser.py CHANGED
@@ -92,7 +92,7 @@ class InputParserAgent:
92
  try:
93
  # Run the agent
94
  result = await self.agent.run(user_message)
95
- parsed_query = result.output
96
 
97
  # Validate parsed query
98
  if not parsed_query.original_query:
 
92
  try:
93
  # Run the agent
94
  result = await self.agent.run(user_message)
95
+ parsed_query = result.data
96
 
97
  # Validate parsed query
98
  if not parsed_query.original_query:
src/agents/judge_agent_llm.py CHANGED
@@ -41,5 +41,5 @@ History of previous attempts: {len(history)}
41
  Evaluate validity and sufficiency."""
42
 
43
  run_result = await self.agent.run(prompt)
44
- logger.info("LLM judge assessment complete", sufficient=run_result.output.sufficient)
45
- return run_result.output # type: ignore[no-any-return]
 
41
  Evaluate validity and sufficiency."""
42
 
43
  run_result = await self.agent.run(prompt)
44
+ logger.info("LLM judge assessment complete", sufficient=run_result.data.sufficient) # type: ignore[attr-defined]
45
+ return run_result.data # type: ignore[no-any-return,attr-defined]
src/agents/knowledge_gap.py CHANGED
@@ -113,7 +113,7 @@ HISTORY OF ACTIONS, FINDINGS AND THOUGHTS:
113
  try:
114
  # Run the agent
115
  result = await self.agent.run(user_message)
116
- evaluation = result.output
117
 
118
  self.logger.info(
119
  "Knowledge gap evaluation complete",
 
113
  try:
114
  # Run the agent
115
  result = await self.agent.run(user_message)
116
+ evaluation = result.data
117
 
118
  self.logger.info(
119
  "Knowledge gap evaluation complete",
src/agents/long_writer.py CHANGED
@@ -176,7 +176,7 @@ class LongWriterAgent:
176
  try:
177
  # Run the agent
178
  result = await self.agent.run(user_message)
179
- output = result.output
180
 
181
  # Validate output
182
  if not output or not isinstance(output, LongWriterOutput):
 
176
  try:
177
  # Run the agent
178
  result = await self.agent.run(user_message)
179
+ output = result.data
180
 
181
  # Validate output
182
  if not output or not isinstance(output, LongWriterOutput):
src/agents/proofreader.py CHANGED
@@ -133,7 +133,7 @@ REPORT DRAFT:
133
  try:
134
  # Run the agent
135
  result = await self.agent.run(user_message)
136
- final_report = result.output
137
 
138
  # Validate output
139
  if not final_report or not final_report.strip():
@@ -142,7 +142,7 @@ REPORT DRAFT:
142
 
143
  self.logger.info("Report proofread", length=len(final_report), attempt=attempt + 1)
144
 
145
- return final_report
146
 
147
  except (TimeoutError, ConnectionError) as e:
148
  # Transient errors - retry
 
133
  try:
134
  # Run the agent
135
  result = await self.agent.run(user_message)
136
+ final_report = result.data # type: ignore[attr-defined]
137
 
138
  # Validate output
139
  if not final_report or not final_report.strip():
 
142
 
143
  self.logger.info("Report proofread", length=len(final_report), attempt=attempt + 1)
144
 
145
+ return final_report # type: ignore[no-any-return]
146
 
147
  except (TimeoutError, ConnectionError) as e:
148
  # Transient errors - retry
src/agents/report_agent.py CHANGED
@@ -91,7 +91,7 @@ class ReportAgent(BaseAgent): # type: ignore[misc]
91
  )
92
 
93
  result = await self._get_agent().run(prompt)
94
- report = result.output
95
 
96
  # ═══════════════════════════════════════════════════════════════════
97
  # 🚨 CRITICAL: Validate citations to prevent hallucination
 
91
  )
92
 
93
  result = await self._get_agent().run(prompt)
94
+ report = result.data # type: ignore[attr-defined]
95
 
96
  # ═══════════════════════════════════════════════════════════════════
97
  # 🚨 CRITICAL: Validate citations to prevent hallucination
src/agents/thinking.py CHANGED
@@ -112,11 +112,11 @@ HISTORY OF ACTIONS, FINDINGS AND THOUGHTS:
112
  try:
113
  # Run the agent
114
  result = await self.agent.run(user_message)
115
- observations = result.output
116
 
117
  self.logger.info("Observations generated", length=len(observations))
118
 
119
- return observations
120
 
121
  except Exception as e:
122
  self.logger.error("Observation generation failed", error=str(e))
 
112
  try:
113
  # Run the agent
114
  result = await self.agent.run(user_message)
115
+ observations = result.data # type: ignore[attr-defined]
116
 
117
  self.logger.info("Observations generated", length=len(observations))
118
 
119
+ return observations # type: ignore[no-any-return]
120
 
121
  except Exception as e:
122
  self.logger.error("Observation generation failed", error=str(e))
src/agents/tool_selector.py CHANGED
@@ -117,7 +117,7 @@ HISTORY OF ACTIONS, FINDINGS AND THOUGHTS:
117
  try:
118
  # Run the agent
119
  result = await self.agent.run(user_message)
120
- selection_plan = result.output
121
 
122
  self.logger.info(
123
  "Tool selection complete",
 
117
  try:
118
  # Run the agent
119
  result = await self.agent.run(user_message)
120
+ selection_plan = result.data
121
 
122
  self.logger.info(
123
  "Tool selection complete",
src/agents/writer.py CHANGED
@@ -136,7 +136,7 @@ FINDINGS:
136
  try:
137
  # Run the agent
138
  result = await self.agent.run(user_message)
139
- report = result.output
140
 
141
  # Validate output
142
  if not report or not report.strip():
@@ -145,7 +145,7 @@ FINDINGS:
145
 
146
  self.logger.info("Report written", length=len(report), attempt=attempt + 1)
147
 
148
- return report
149
 
150
  except (TimeoutError, ConnectionError) as e:
151
  # Transient errors - retry
 
136
  try:
137
  # Run the agent
138
  result = await self.agent.run(user_message)
139
+ report = result.data # type: ignore[attr-defined]
140
 
141
  # Validate output
142
  if not report or not report.strip():
 
145
 
146
  self.logger.info("Report written", length=len(report), attempt=attempt + 1)
147
 
148
+ return report # type: ignore[no-any-return]
149
 
150
  except (TimeoutError, ConnectionError) as e:
151
  # Transient errors - retry
src/app.py CHANGED
@@ -172,20 +172,29 @@ def event_to_chat_message(event: AgentEvent) -> dict[str, Any]:
172
  "content": event.message,
173
  }
174
 
175
- # Build metadata for accordion
 
 
176
  metadata: dict[str, Any] = {}
 
 
177
  if config["title"]:
178
- metadata["title"] = config["title"]
179
 
180
  # Set status (pending shows spinner, done is collapsed)
 
181
  if config["status"] == "pending":
182
  metadata["status"] = "pending"
 
 
183
 
184
- # Add duration if available in data
185
  if event.data and isinstance(event.data, dict) and "duration" in event.data:
186
- metadata["duration"] = event.data["duration"]
 
 
187
 
188
- # Add log info (iteration number, etc.)
189
  log_parts: list[str] = []
190
  if event.iteration > 0:
191
  log_parts.append(f"Iteration {event.iteration}")
@@ -198,12 +207,22 @@ def event_to_chat_message(event: AgentEvent) -> dict[str, Any]:
198
  metadata["log"] = " | ".join(log_parts)
199
 
200
  # Return as dict format for Gradio Chatbot compatibility
201
- # Gradio Chatbot expects dict format, not gr.ChatMessage objects
 
 
 
202
  result: dict[str, Any] = {
203
  "role": "assistant",
204
  "content": event.message,
205
  }
206
- if metadata:
 
 
 
 
 
 
 
207
  result["metadata"] = metadata
208
  return result
209
 
@@ -455,10 +474,11 @@ async def research_agent(
455
  yield msg
456
 
457
  except Exception as e:
 
 
458
  yield {
459
  "role": "assistant",
460
- "content": f"❌ **Error**: {e!s}",
461
- "metadata": {"title": "❌ Error", "status": "done"},
462
  }
463
 
464
 
@@ -681,9 +701,21 @@ def create_demo() -> gr.Blocks:
681
  "**Sign in with HuggingFace** above to access premium models and providers."
682
  ),
683
  examples=[
684
- ["What drugs could be repurposed for Alzheimer's disease?", "simple"],
685
- ["Is metformin effective for treating cancer?", "simple"],
686
- ["What medications show promise for Long COVID treatment?", "simple"],
 
 
 
 
 
 
 
 
 
 
 
 
687
  ],
688
  additional_inputs_accordion=gr.Accordion(label="⚙️ Settings", open=False),
689
  additional_inputs=[
 
172
  "content": event.message,
173
  }
174
 
175
+ # Build metadata for accordion according to Gradio ChatMessage spec
176
+ # Metadata keys: title (str), status ("pending"|"done"), log (str), duration (float)
177
+ # See: https://www.gradio.app/guides/agents-and-tool-usage
178
  metadata: dict[str, Any] = {}
179
+
180
+ # Title is required for accordion display - must be string
181
  if config["title"]:
182
+ metadata["title"] = str(config["title"])
183
 
184
  # Set status (pending shows spinner, done is collapsed)
185
+ # Must be exactly "pending" or "done" per Gradio spec
186
  if config["status"] == "pending":
187
  metadata["status"] = "pending"
188
+ elif config["status"] == "done":
189
+ metadata["status"] = "done"
190
 
191
+ # Add duration if available in data (must be float)
192
  if event.data and isinstance(event.data, dict) and "duration" in event.data:
193
+ duration = event.data["duration"]
194
+ if isinstance(duration, int | float):
195
+ metadata["duration"] = float(duration)
196
 
197
+ # Add log info (iteration number, etc.) - must be string
198
  log_parts: list[str] = []
199
  if event.iteration > 0:
200
  log_parts.append(f"Iteration {event.iteration}")
 
207
  metadata["log"] = " | ".join(log_parts)
208
 
209
  # Return as dict format for Gradio Chatbot compatibility
210
+ # According to Gradio docs: https://www.gradio.app/guides/agents-and-tool-usage
211
+ # ChatMessage format: {"role": "assistant", "content": "...", "metadata": {...}}
212
+ # Metadata must have "title" key for accordion display
213
+ # Valid metadata keys: title (str), status ("pending"|"done"), log (str), duration (float)
214
  result: dict[str, Any] = {
215
  "role": "assistant",
216
  "content": event.message,
217
  }
218
+ # Only add metadata if it has a title (required for accordion display)
219
+ # Ensure metadata values match Gradio's expected types
220
+ if metadata and metadata.get("title"):
221
+ # Ensure status is valid if present
222
+ if "status" in metadata:
223
+ status = metadata["status"]
224
+ if status not in ("pending", "done"):
225
+ metadata["status"] = "done" # Default to "done" if invalid
226
  result["metadata"] = metadata
227
  return result
228
 
 
474
  yield msg
475
 
476
  except Exception as e:
477
+ # Return error message without metadata to avoid issues during example caching
478
+ # Metadata can cause validation errors when Gradio caches examples
479
  yield {
480
  "role": "assistant",
481
+ "content": f"❌ **Error**: {e!s}\n\n*Please check your configuration and try again.*",
 
482
  }
483
 
484
 
 
701
  "**Sign in with HuggingFace** above to access premium models and providers."
702
  ),
703
  examples=[
704
+ # When additional_inputs are provided, examples must be lists of lists
705
+ # Each inner list: [message, mode, hf_model, hf_provider]
706
+ [
707
+ "What drugs could be repurposed for Alzheimer's disease?",
708
+ "iterative",
709
+ None,
710
+ None,
711
+ ],
712
+ ["Is metformin effective for treating cancer?", "iterative", None, None],
713
+ [
714
+ "What medications show promise for Long COVID treatment?",
715
+ "iterative",
716
+ None,
717
+ None,
718
+ ],
719
  ],
720
  additional_inputs_accordion=gr.Accordion(label="⚙️ Settings", open=False),
721
  additional_inputs=[
src/orchestrator/planner_agent.py CHANGED
@@ -109,7 +109,7 @@ class PlannerAgent:
109
  try:
110
  # Run the agent
111
  result = await self.agent.run(user_message)
112
- report_plan = result.output
113
 
114
  # Validate report plan
115
  if not report_plan.report_outline:
 
109
  try:
110
  # Run the agent
111
  result = await self.agent.run(user_message)
112
+ report_plan = result.data
113
 
114
  # Validate report plan
115
  if not report_plan.report_outline:
src/services/llamaindex_rag.py CHANGED
@@ -136,7 +136,8 @@ class LlamaIndexRAGService:
136
  }
137
  except ImportError as e:
138
  raise ImportError(
139
- "LlamaIndex dependencies not installed. Run: uv sync --extra modal"
 
140
  ) from e
141
 
142
  def _configure_embeddings(
 
136
  }
137
  except ImportError as e:
138
  raise ImportError(
139
+ "LlamaIndex dependencies not installed. Required packages: chromadb, llama-index, "
140
+ "and their dependencies. If rpds is missing, try: uv pip install rpds-py"
141
  ) from e
142
 
143
  def _configure_embeddings(
src/services/statistical_analyzer.py CHANGED
@@ -135,7 +135,7 @@ Generate executable Python code to analyze this evidence."""
135
  # Generate code
136
  agent = self._get_agent()
137
  code_result = await agent.run(prompt)
138
- generated_code = code_result.output
139
 
140
  # Execute in Modal sandbox
141
  loop = asyncio.get_running_loop()
 
135
  # Generate code
136
  agent = self._get_agent()
137
  code_result = await agent.run(prompt)
138
+ generated_code = code_result.data # type: ignore[attr-defined]
139
 
140
  # Execute in Modal sandbox
141
  loop = asyncio.get_running_loop()
tests/integration/test_rag_integration.py CHANGED
@@ -121,9 +121,12 @@ class TestRAGServiceIntegration:
121
  assert len(response) > 0
122
  assert "python" in response.lower()
123
  except Exception as e:
124
- # If model is not available (404), skip the test
125
- if "404" in str(e) or "Not Found" in str(e):
 
126
  pytest.skip(f"HuggingFace model not available via inference API: {e}")
 
 
127
  raise
128
 
129
  # Cleanup
 
121
  assert len(response) > 0
122
  assert "python" in response.lower()
123
  except Exception as e:
124
+ # If model is not available (404) or authentication required (401), skip the test
125
+ error_str = str(e)
126
+ if "404" in error_str or "Not Found" in error_str:
127
  pytest.skip(f"HuggingFace model not available via inference API: {e}")
128
+ if "401" in error_str or "Unauthorized" in error_str or "Invalid username or password" in error_str:
129
+ pytest.skip(f"HuggingFace authentication required but not available: {e}")
130
  raise
131
 
132
  # Cleanup
tests/unit/agent_factory/test_judges.py CHANGED
@@ -34,6 +34,7 @@ class TestJudgeHandler:
34
 
35
  # Mock the PydanticAI agent
36
  mock_result = MagicMock()
 
37
  mock_result.output = mock_assessment
38
 
39
  with (
@@ -88,7 +89,8 @@ class TestJudgeHandler:
88
  )
89
 
90
  mock_result = MagicMock()
91
- mock_result.output = mock_assessment
 
92
 
93
  with (
94
  patch("src.agent_factory.judges.get_model") as mock_get_model,
 
34
 
35
  # Mock the PydanticAI agent
36
  mock_result = MagicMock()
37
+ type(mock_result).data = mock_assessment # pydantic-ai uses .data for structured output
38
  mock_result.output = mock_assessment
39
 
40
  with (
 
89
  )
90
 
91
  mock_result = MagicMock()
92
+ mock_result.data = mock_assessment
93
+ mock_result.output = mock_assessment # Some code may use .output
94
 
95
  with (
96
  patch("src.agent_factory.judges.get_model") as mock_get_model,
tests/unit/agents/test_hypothesis_agent.py CHANGED
@@ -28,18 +28,17 @@ def sample_evidence():
28
 
29
  @pytest.fixture
30
  def mock_assessment():
 
 
 
 
 
 
 
 
31
  return HypothesisAssessment(
32
- hypotheses=[
33
- MechanismHypothesis(
34
- drug="Metformin",
35
- target="AMPK",
36
- pathway="mTOR inhibition",
37
- effect="Reduced cancer cell proliferation",
38
- confidence=0.75,
39
- search_suggestions=["metformin AMPK cancer", "mTOR cancer therapy"],
40
- )
41
- ],
42
- primary_hypothesis=None,
43
  knowledge_gaps=["Clinical trial data needed"],
44
  recommended_searches=["metformin clinical trial cancer"],
45
  )
@@ -54,8 +53,9 @@ async def test_hypothesis_agent_generates_hypotheses(sample_evidence, mock_asses
54
  with patch("src.agents.hypothesis_agent.Agent") as mock_agent_class:
55
  mock_get_model.return_value = MagicMock() # Mock model
56
  mock_result = MagicMock()
 
57
  mock_result.output = mock_assessment
58
- # pydantic-ai Agent returns an object with .output for structured output
59
  mock_agent_class.return_value.run = AsyncMock(return_value=mock_result)
60
 
61
  agent = HypothesisAgent(store)
@@ -94,6 +94,7 @@ async def test_hypothesis_agent_uses_embeddings(sample_evidence, mock_assessment
94
  mock_format.return_value = "Prompt"
95
 
96
  mock_result = MagicMock()
 
97
  mock_result.output = mock_assessment
98
  mock_agent_class.return_value.run = AsyncMock(return_value=mock_result)
99
 
 
28
 
29
  @pytest.fixture
30
  def mock_assessment():
31
+ primary_hyp = MechanismHypothesis(
32
+ drug="Metformin",
33
+ target="AMPK",
34
+ pathway="mTOR inhibition",
35
+ effect="Reduced cancer cell proliferation",
36
+ confidence=0.75,
37
+ search_suggestions=["metformin AMPK cancer", "mTOR cancer therapy"],
38
+ )
39
  return HypothesisAssessment(
40
+ hypotheses=[primary_hyp],
41
+ primary_hypothesis=primary_hyp, # Set primary hypothesis
 
 
 
 
 
 
 
 
 
42
  knowledge_gaps=["Clinical trial data needed"],
43
  recommended_searches=["metformin clinical trial cancer"],
44
  )
 
53
  with patch("src.agents.hypothesis_agent.Agent") as mock_agent_class:
54
  mock_get_model.return_value = MagicMock() # Mock model
55
  mock_result = MagicMock()
56
+ type(mock_result).data = mock_assessment # pydantic-ai uses .data for structured output
57
  mock_result.output = mock_assessment
58
+ # pydantic-ai Agent returns an object with .data for structured output
59
  mock_agent_class.return_value.run = AsyncMock(return_value=mock_result)
60
 
61
  agent = HypothesisAgent(store)
 
94
  mock_format.return_value = "Prompt"
95
 
96
  mock_result = MagicMock()
97
+ type(mock_result).data = mock_assessment # pydantic-ai uses .data for structured output
98
  mock_result.output = mock_assessment
99
  mock_agent_class.return_value.run = AsyncMock(return_value=mock_result)
100
 
tests/unit/agents/test_input_parser.py CHANGED
@@ -18,6 +18,13 @@ def mock_model() -> MagicMock:
18
  return model
19
 
20
 
 
 
 
 
 
 
 
21
  @pytest.fixture
22
  def mock_parsed_query_iterative() -> ParsedQuery:
23
  """Create a mock ParsedQuery for iterative mode."""
@@ -51,7 +58,9 @@ def mock_agent_result_iterative(
51
  mock_parsed_query_iterative: ParsedQuery,
52
  ) -> RunResult[ParsedQuery]:
53
  """Create a mock agent result for iterative mode."""
54
- result = MagicMock(spec=RunResult)
 
 
55
  result.output = mock_parsed_query_iterative
56
  return result
57
 
@@ -61,7 +70,9 @@ def mock_agent_result_deep(
61
  mock_parsed_query_deep: ParsedQuery,
62
  ) -> RunResult[ParsedQuery]:
63
  """Create a mock agent result for deep mode."""
64
- result = MagicMock(spec=RunResult)
 
 
65
  result.output = mock_parsed_query_deep
66
  return result
67
 
@@ -72,33 +83,52 @@ def input_parser_agent(mock_model: MagicMock) -> InputParserAgent:
72
  return InputParserAgent(model=mock_model)
73
 
74
 
 
 
 
 
 
 
 
75
  class TestInputParserAgentInit:
76
  """Test InputParserAgent initialization."""
77
 
78
- def test_input_parser_agent_init_with_model(self, mock_model: MagicMock) -> None:
 
 
 
79
  """Test InputParserAgent initialization with provided model."""
 
80
  agent = InputParserAgent(model=mock_model)
81
  assert agent.model == mock_model
82
  assert agent.agent is not None
83
 
84
  @patch("src.agents.input_parser.get_model")
 
85
  def test_input_parser_agent_init_without_model(
86
- self, mock_get_model: MagicMock, mock_model: MagicMock
 
 
 
87
  ) -> None:
88
  """Test InputParserAgent initialization without model (uses default)."""
89
  mock_get_model.return_value = mock_model
 
90
  agent = InputParserAgent()
91
  assert agent.model == mock_model
92
  mock_get_model.assert_called_once()
93
 
 
94
  def test_input_parser_agent_has_correct_system_prompt(
95
- self, input_parser_agent: InputParserAgent
96
  ) -> None:
97
  """Test that InputParserAgent has correct system prompt."""
 
 
98
  # System prompt should contain key instructions
99
  # In pydantic_ai, system_prompt is a property that returns the prompt string
100
  # For mocked agents, we check that the agent was created with a system prompt
101
- assert input_parser_agent.agent is not None
102
  # The actual system prompt is set during agent creation
103
  # We verify the agent exists and was properly initialized
104
  # Note: Direct access to system_prompt may not work with mocks
 
18
  return model
19
 
20
 
21
+ @pytest.fixture(autouse=True)
22
+ def patch_infer_model(mock_model: MagicMock):
23
+ """Auto-patch infer_model for all tests to avoid OpenAI API key requirements."""
24
+ with patch("pydantic_ai.models.infer_model", return_value=mock_model):
25
+ yield
26
+
27
+
28
  @pytest.fixture
29
  def mock_parsed_query_iterative() -> ParsedQuery:
30
  """Create a mock ParsedQuery for iterative mode."""
 
58
  mock_parsed_query_iterative: ParsedQuery,
59
  ) -> RunResult[ParsedQuery]:
60
  """Create a mock agent result for iterative mode."""
61
+ result = MagicMock()
62
+ # Configure the mock to return the actual output when .data is accessed
63
+ type(result).data = mock_parsed_query_iterative
64
  result.output = mock_parsed_query_iterative
65
  return result
66
 
 
70
  mock_parsed_query_deep: ParsedQuery,
71
  ) -> RunResult[ParsedQuery]:
72
  """Create a mock agent result for deep mode."""
73
+ result = MagicMock()
74
+ # Configure the mock to return the actual output when .data is accessed
75
+ type(result).data = mock_parsed_query_deep
76
  result.output = mock_parsed_query_deep
77
  return result
78
 
 
83
  return InputParserAgent(model=mock_model)
84
 
85
 
86
+ @pytest.fixture(autouse=True)
87
+ def patch_infer_model(mock_model: MagicMock):
88
+ """Auto-patch infer_model for all tests to avoid OpenAI API key requirements."""
89
+ with patch("pydantic_ai.models.infer_model", return_value=mock_model):
90
+ yield
91
+
92
+
93
  class TestInputParserAgentInit:
94
  """Test InputParserAgent initialization."""
95
 
96
+ @patch("pydantic_ai.models.infer_model")
97
+ def test_input_parser_agent_init_with_model(
98
+ self, mock_infer_model: MagicMock, mock_model: MagicMock
99
+ ) -> None:
100
  """Test InputParserAgent initialization with provided model."""
101
+ mock_infer_model.return_value = mock_model
102
  agent = InputParserAgent(model=mock_model)
103
  assert agent.model == mock_model
104
  assert agent.agent is not None
105
 
106
  @patch("src.agents.input_parser.get_model")
107
+ @patch("pydantic_ai.models.infer_model")
108
  def test_input_parser_agent_init_without_model(
109
+ self,
110
+ mock_infer_model: MagicMock,
111
+ mock_get_model: MagicMock,
112
+ mock_model: MagicMock,
113
  ) -> None:
114
  """Test InputParserAgent initialization without model (uses default)."""
115
  mock_get_model.return_value = mock_model
116
+ mock_infer_model.return_value = mock_model
117
  agent = InputParserAgent()
118
  assert agent.model == mock_model
119
  mock_get_model.assert_called_once()
120
 
121
+ @patch("pydantic_ai.models.infer_model")
122
  def test_input_parser_agent_has_correct_system_prompt(
123
+ self, mock_infer_model: MagicMock, mock_model: MagicMock
124
  ) -> None:
125
  """Test that InputParserAgent has correct system prompt."""
126
+ mock_infer_model.return_value = mock_model
127
+ agent = InputParserAgent(model=mock_model)
128
  # System prompt should contain key instructions
129
  # In pydantic_ai, system_prompt is a property that returns the prompt string
130
  # For mocked agents, we check that the agent was created with a system prompt
131
+ assert agent.agent is not None
132
  # The actual system prompt is set during agent creation
133
  # We verify the agent exists and was properly initialized
134
  # Note: Direct access to system_prompt may not work with mocks