Joseph Pollack commited on
Commit
ce644a9
Β·
unverified Β·
1 Parent(s): ca3a4f7

adds or improves : interface , tests, docs , ci , precommit , build , and demo

Browse files
This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. .env copy.example +124 -0
  2. .github/README.md +3 -145
  3. .github/workflows/ci.yml +14 -7
  4. .github/workflows/docs.yml +6 -3
  5. .gitignore +2 -0
  6. .pre-commit-hooks/run_pytest_with_sync.py +109 -3
  7. README.md +3 -2
  8. docs/api/agents.md +3 -0
  9. docs/api/models.md +3 -0
  10. docs/api/orchestrators.md +3 -0
  11. docs/api/services.md +3 -0
  12. docs/api/tools.md +3 -0
  13. docs/architecture/agents.md +3 -0
  14. docs/architecture/middleware.md +3 -0
  15. docs/architecture/services.md +3 -0
  16. docs/architecture/tools.md +3 -0
  17. docs/contributing/code-quality.md +3 -0
  18. docs/contributing/code-style.md +3 -0
  19. docs/contributing/error-handling.md +3 -0
  20. docs/contributing/implementation-patterns.md +3 -0
  21. docs/contributing/index.md +3 -0
  22. docs/contributing/prompt-engineering.md +3 -0
  23. docs/contributing/testing.md +3 -0
  24. docs/getting-started/examples.md +3 -0
  25. docs/getting-started/installation.md +3 -0
  26. docs/getting-started/mcp-integration.md +3 -0
  27. docs/getting-started/quick-start.md +3 -0
  28. docs/license.md +3 -0
  29. docs/overview/architecture.md +3 -0
  30. docs/overview/features.md +3 -0
  31. docs/team.md +3 -0
  32. mkdocs.yml +2 -2
  33. pyproject.toml +50 -41
  34. requirements.txt +1 -1
  35. src/agent_factory/judges.py +123 -37
  36. src/agents/hypothesis_agent.py +2 -2
  37. src/agents/input_parser.py +3 -3
  38. src/agents/judge_agent_llm.py +3 -3
  39. src/agents/knowledge_gap.py +3 -3
  40. src/agents/long_writer.py +3 -3
  41. src/agents/report_agent.py +2 -2
  42. src/agents/tool_selector.py +3 -3
  43. src/app.py +223 -18
  44. src/legacy_orchestrator.py +17 -3
  45. src/orchestrator/planner_agent.py +3 -3
  46. src/services/llamaindex_rag.py +1 -1
  47. src/services/statistical_analyzer.py +2 -2
  48. src/utils/config.py +12 -4
  49. src/utils/huggingface_chat_client.py +2 -2
  50. src/utils/inference_models.py +627 -0
.env copy.example ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ============== LLM CONFIGURATION ==============
2
+
3
+ # Provider: "openai", "anthropic", or "huggingface"
4
+ LLM_PROVIDER=openai
5
+
6
+ # API Keys (at least one required for full LLM analysis)
7
+ OPENAI_API_KEY=sk-your-key-here
8
+ ANTHROPIC_API_KEY=sk-ant-your-key-here
9
+
10
+ # Model names (optional - sensible defaults set in config.py)
11
+ # OPENAI_MODEL=gpt-5.1
12
+ # ANTHROPIC_MODEL=claude-sonnet-4-5-20250929
13
+
14
+ # ============== HUGGINGFACE CONFIGURATION ==============
15
+
16
+ # HuggingFace Token - enables gated models and higher rate limits
17
+ # Get yours at: https://huggingface.co/settings/tokens
18
+ #
19
+ # WITHOUT HF_TOKEN: Falls back to ungated models (zephyr-7b-beta, Qwen2-7B)
20
+ # WITH HF_TOKEN: Uses gated models (Llama 3.1, Gemma-2) via inference providers
21
+ #
22
+ # For HuggingFace Spaces deployment:
23
+ # Set this as a "Secret" in Space Settings -> Variables and secrets
24
+ # Users/judges don't need their own token - the Space secret is used
25
+ #
26
+ HF_TOKEN=hf_your-token-here
27
+ # Alternative: HUGGINGFACE_API_KEY (same as HF_TOKEN)
28
+
29
+ # Default HuggingFace model for inference (gated, requires auth)
30
+ # Can be overridden in UI dropdown
31
+ # Latest reasoning models: Qwen3-Next-80B-A3B-Thinking, Qwen3-Next-80B-A3B-Instruct, Llama-3.3-70B-Instruct
32
+ HUGGINGFACE_MODEL=Qwen/Qwen3-Next-80B-A3B-Thinking
33
+
34
+ # Fallback models for HuggingFace Inference API (comma-separated)
35
+ # Models are tried in order until one succeeds
36
+ # Format: model1,model2,model3
37
+ # Latest reasoning models first, then reliable fallbacks
38
+ # Reasoning models: Qwen3-Next (thinking/instruct), Llama-3.3-70B, Qwen3-235B
39
+ # Fallbacks: Llama-3.1-8B, Zephyr-7B (ungated), Qwen2-7B (ungated)
40
+ HF_FALLBACK_MODELS=Qwen/Qwen3-Next-80B-A3B-Thinking,Qwen/Qwen3-Next-80B-A3B-Instruct,meta-llama/Llama-3.3-70B-Instruct,meta-llama/Llama-3.1-8B-Instruct,HuggingFaceH4/zephyr-7b-beta,Qwen/Qwen2-7B-Instruct
41
+
42
+ # Override model/provider selection (optional, usually set via UI)
43
+ # HF_MODEL=Qwen/Qwen3-Next-80B-A3B-Thinking
44
+ # HF_PROVIDER=hyperbolic
45
+
46
+ # ============== EMBEDDING CONFIGURATION ==============
47
+
48
+ # Embedding Provider: "openai", "local", or "huggingface"
49
+ # Default: "local" (no API key required)
50
+ EMBEDDING_PROVIDER=local
51
+
52
+ # OpenAI Embedding Model (used if EMBEDDING_PROVIDER=openai)
53
+ OPENAI_EMBEDDING_MODEL=text-embedding-3-small
54
+
55
+ # Local Embedding Model (sentence-transformers, used if EMBEDDING_PROVIDER=local)
56
+ # BAAI/bge-small-en-v1.5 is newer, faster, and better than all-MiniLM-L6-v2
57
+ LOCAL_EMBEDDING_MODEL=BAAI/bge-small-en-v1.5
58
+
59
+ # HuggingFace Embedding Model (used if EMBEDDING_PROVIDER=huggingface)
60
+ HUGGINGFACE_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
61
+
62
+ # ============== AGENT CONFIGURATION ==============
63
+
64
+ MAX_ITERATIONS=10
65
+ SEARCH_TIMEOUT=30
66
+ LOG_LEVEL=INFO
67
+
68
+ # Graph-based execution (experimental)
69
+ # USE_GRAPH_EXECUTION=false
70
+
71
+ # Budget & Rate Limiting
72
+ # DEFAULT_TOKEN_LIMIT=100000
73
+ # DEFAULT_TIME_LIMIT_MINUTES=10
74
+ # DEFAULT_ITERATIONS_LIMIT=10
75
+
76
+ # ============== WEB SEARCH CONFIGURATION ==============
77
+
78
+ # Web Search Provider: "serper", "searchxng", "brave", "tavily", or "duckduckgo"
79
+ # Default: "duckduckgo" (no API key required)
80
+ WEB_SEARCH_PROVIDER=duckduckgo
81
+
82
+ # Serper API Key (for Google search via Serper)
83
+ # SERPER_API_KEY=your-serper-key-here
84
+
85
+ # SearchXNG Host URL (for self-hosted search)
86
+ # SEARCHXNG_HOST=http://localhost:8080
87
+
88
+ # Brave Search API Key
89
+ # BRAVE_API_KEY=your-brave-key-here
90
+
91
+ # Tavily API Key
92
+ # TAVILY_API_KEY=your-tavily-key-here
93
+
94
+ # ============== EXTERNAL SERVICES ==============
95
+
96
+ # PubMed (optional - higher rate limits: 10 req/sec vs 3 req/sec)
97
+ NCBI_API_KEY=your-ncbi-key-here
98
+
99
+ # Modal (optional - for secure code execution sandbox)
100
+ # MODAL_TOKEN_ID=your-modal-token-id
101
+ # MODAL_TOKEN_SECRET=your-modal-token-secret
102
+
103
+ # ============== VECTOR DATABASE (ChromaDB) ==============
104
+
105
+ # ChromaDB storage path
106
+ CHROMA_DB_PATH=./chroma_db
107
+
108
+ # Persist ChromaDB to disk (default: true)
109
+ # CHROMA_DB_PERSIST=true
110
+
111
+ # Remote ChromaDB server (optional)
112
+ # CHROMA_DB_HOST=localhost
113
+ # CHROMA_DB_PORT=8000
114
+
115
+ # ============== RAG SERVICE CONFIGURATION ==============
116
+
117
+ # ChromaDB collection name for RAG
118
+ # RAG_COLLECTION_NAME=deepcritical_evidence
119
+
120
+ # Number of top results to retrieve from RAG
121
+ # RAG_SIMILARITY_TOP_K=5
122
+
123
+ # Automatically ingest evidence into RAG
124
+ # RAG_AUTO_INGEST=true
.github/README.md CHANGED
@@ -28,29 +28,11 @@ tags:
28
  [![GitHub](https://img.shields.io/github/stars/DeepCritical/GradioDemo?style=for-the-badge&logo=github&logoColor=white&label=πŸ™%20GitHub&labelColor=181717&color=181717)](https://github.com/DeepCritical/GradioDemo)
29
  [![Documentation](https://img.shields.io/badge/πŸ“š%20Docs-0080FF?style=for-the-badge&logo=readthedocs&logoColor=white&labelColor=0080FF&color=0080FF)](docs/index.md)
30
  [![Demo](https://img.shields.io/badge/πŸš€%20Demo-FFD21E?style=for-the-badge&logo=huggingface&logoColor=white&labelColor=FFD21E&color=FFD21E)](https://huggingface.co/spaces/DataQuests/DeepCritical)
31
- [![CodeCov](https://img.shields.io/badge/πŸ“Š%20Coverage-F01F7A?style=for-the-badge&logo=codecov&logoColor=white&labelColor=F01F7A&color=F01F7A)](https://codecov.io/gh/DeepCritical/GradioDemo)
32
  [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/qdfnvSPcqP)
33
 
34
-
35
  </div>
36
 
37
-
38
- # DeepCritical
39
-
40
- ## Intro
41
-
42
- ## Features
43
-
44
- - **Multi-Source Search**: PubMed, ClinicalTrials.gov, bioRxiv/medRxiv
45
- - **MCP Integration**: Use our tools from Claude Desktop or any MCP client
46
- - **HuggingFace OAuth**: Sign in with your HuggingFace account to automatically use your API token
47
- - **Modal Sandbox**: Secure execution of AI-generated statistical code
48
- - **LlamaIndex RAG**: Semantic search and evidence synthesis
49
- - **HuggingfaceInference**: Free tier support with automatic fallback
50
- - **HuggingfaceMCP Custom Config To Use Community Tools**:
51
- - **Strongly Typed Composable Graphs**:
52
- - **Specialized Research Teams of Agents**:
53
-
54
  ## Quick Start
55
 
56
  ### 1. Environment Setup
@@ -60,14 +42,14 @@ tags:
60
  pip install uv
61
 
62
  # Sync dependencies
63
- uv sync
64
  ```
65
 
66
  ### 2. Run the UI
67
 
68
  ```bash
69
  # Start the Gradio app
70
- uv run gradio run src/app.py
71
  ```
72
 
73
  Open your browser to `http://localhost:7860`.
@@ -80,11 +62,6 @@ Open your browser to `http://localhost:7860`.
80
  - No need to manually enter API keys when logged in
81
  - OAuth token is used only for the current session and never stored
82
 
83
- **Manual API Key (BYOK)**:
84
- - You can still provide your own API key in the Settings accordion
85
- - Supports HuggingFace, OpenAI, or Anthropic API keys
86
- - Manual keys take priority over OAuth tokens
87
-
88
  ### 4. Connect via MCP
89
 
90
  This application exposes a Model Context Protocol (MCP) server, allowing you to use its search tools directly from Claude Desktop or other MCP clients.
@@ -102,122 +79,3 @@ Add this to your `claude_desktop_config.json`:
102
  }
103
  }
104
  ```
105
-
106
- **Available Tools**:
107
- - `search_pubmed`: Search peer-reviewed biomedical literature.
108
- - `search_clinical_trials`: Search ClinicalTrials.gov.
109
- - `search_biorxiv`: Search bioRxiv/medRxiv preprints.
110
- - `search_all`: Search all sources simultaneously.
111
- - `analyze_hypothesis`: Secure statistical analysis using Modal sandboxes.
112
-
113
-
114
- ## Architecture
115
-
116
- DeepCritical uses a Vertical Slice Architecture:
117
-
118
- 1. **Search Slice**: Retrieving evidence from PubMed, ClinicalTrials.gov, and bioRxiv.
119
- 2. **Judge Slice**: Evaluating evidence quality using LLMs.
120
- 3. **Orchestrator Slice**: Managing the research loop and UI.
121
-
122
- - iterativeResearch
123
- - deepResearch
124
- - researchTeam
125
-
126
- ### Iterative Research
127
-
128
- ```mermaid
129
- sequenceDiagram
130
- participant IterativeFlow
131
- participant ThinkingAgent
132
- participant KnowledgeGapAgent
133
- participant ToolSelector
134
- participant ToolExecutor
135
- participant JudgeHandler
136
- participant WriterAgent
137
-
138
- IterativeFlow->>IterativeFlow: run(query)
139
-
140
- loop Until complete or max_iterations
141
- IterativeFlow->>ThinkingAgent: generate_observations()
142
- ThinkingAgent-->>IterativeFlow: observations
143
-
144
- IterativeFlow->>KnowledgeGapAgent: evaluate_gaps()
145
- KnowledgeGapAgent-->>IterativeFlow: KnowledgeGapOutput
146
-
147
- alt Research complete
148
- IterativeFlow->>WriterAgent: create_final_report()
149
- WriterAgent-->>IterativeFlow: final_report
150
- else Gaps remain
151
- IterativeFlow->>ToolSelector: select_agents(gap)
152
- ToolSelector-->>IterativeFlow: AgentSelectionPlan
153
-
154
- IterativeFlow->>ToolExecutor: execute_tool_tasks()
155
- ToolExecutor-->>IterativeFlow: ToolAgentOutput[]
156
-
157
- IterativeFlow->>JudgeHandler: assess_evidence()
158
- JudgeHandler-->>IterativeFlow: should_continue
159
- end
160
- end
161
- ```
162
-
163
-
164
- ### Deep Research
165
-
166
- ```mermaid
167
- sequenceDiagram
168
- actor User
169
- participant GraphOrchestrator
170
- participant InputParser
171
- participant GraphBuilder
172
- participant GraphExecutor
173
- participant Agent
174
- participant BudgetTracker
175
- participant WorkflowState
176
-
177
- User->>GraphOrchestrator: run(query)
178
- GraphOrchestrator->>InputParser: detect_research_mode(query)
179
- InputParser-->>GraphOrchestrator: mode (iterative/deep)
180
- GraphOrchestrator->>GraphBuilder: build_graph(mode)
181
- GraphBuilder-->>GraphOrchestrator: ResearchGraph
182
- GraphOrchestrator->>WorkflowState: init_workflow_state()
183
- GraphOrchestrator->>BudgetTracker: create_budget()
184
- GraphOrchestrator->>GraphExecutor: _execute_graph(graph)
185
-
186
- loop For each node in graph
187
- GraphExecutor->>Agent: execute_node(agent_node)
188
- Agent->>Agent: process_input
189
- Agent-->>GraphExecutor: result
190
- GraphExecutor->>WorkflowState: update_state(result)
191
- GraphExecutor->>BudgetTracker: add_tokens(used)
192
- GraphExecutor->>BudgetTracker: check_budget()
193
- alt Budget exceeded
194
- GraphExecutor->>GraphOrchestrator: emit(error_event)
195
- else Continue
196
- GraphExecutor->>GraphOrchestrator: emit(progress_event)
197
- end
198
- end
199
-
200
- GraphOrchestrator->>User: AsyncGenerator[AgentEvent]
201
- ```
202
-
203
- ### Research Team
204
-
205
- Critical Deep Research Agent
206
-
207
- ## Development
208
-
209
- ### Run Tests
210
-
211
- ```bash
212
- uv run pytest
213
- ```
214
-
215
- ### Run Checks
216
-
217
- ```bash
218
- make check
219
- ```
220
-
221
- ## Links
222
-
223
- - [GitHub Repository](https://github.com/DeepCritical/GradioDemo)
 
28
  [![GitHub](https://img.shields.io/github/stars/DeepCritical/GradioDemo?style=for-the-badge&logo=github&logoColor=white&label=πŸ™%20GitHub&labelColor=181717&color=181717)](https://github.com/DeepCritical/GradioDemo)
29
  [![Documentation](https://img.shields.io/badge/πŸ“š%20Docs-0080FF?style=for-the-badge&logo=readthedocs&logoColor=white&labelColor=0080FF&color=0080FF)](docs/index.md)
30
  [![Demo](https://img.shields.io/badge/πŸš€%20Demo-FFD21E?style=for-the-badge&logo=huggingface&logoColor=white&labelColor=FFD21E&color=FFD21E)](https://huggingface.co/spaces/DataQuests/DeepCritical)
31
+ [![codecov](https://codecov.io/gh/DeepCritical/GradioDemo/graph/badge.svg?token=B1f05RCGpz)](https://codecov.io/gh/DeepCritical/GradioDemo)
32
  [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/qdfnvSPcqP)
33
 
 
34
  </div>
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  ## Quick Start
37
 
38
  ### 1. Environment Setup
 
42
  pip install uv
43
 
44
  # Sync dependencies
45
+ uv sync --all-extras
46
  ```
47
 
48
  ### 2. Run the UI
49
 
50
  ```bash
51
  # Start the Gradio app
52
+ uv run gradio gradio src/app.py
53
  ```
54
 
55
  Open your browser to `http://localhost:7860`.
 
62
  - No need to manually enter API keys when logged in
63
  - OAuth token is used only for the current session and never stored
64
 
 
 
 
 
 
65
  ### 4. Connect via MCP
66
 
67
  This application exposes a Model Context Protocol (MCP) server, allowing you to use its search tools directly from Claude Desktop or other MCP clients.
 
79
  }
80
  }
81
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.github/workflows/ci.yml CHANGED
@@ -2,9 +2,9 @@ name: CI
2
 
3
  on:
4
  push:
5
- branches: [main, develop]
6
  pull_request:
7
- branches: [main, develop]
8
 
9
  jobs:
10
  test:
@@ -28,7 +28,7 @@ jobs:
28
 
29
  - name: Install dependencies
30
  run: |
31
- uv sync --dev
32
 
33
  - name: Lint with ruff
34
  run: |
@@ -43,25 +43,32 @@ jobs:
43
  env:
44
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
45
  run: |
46
- uv run pytest tests/unit/ -v -m "not openai and not embedding_provider" --tb=short -p no:logfire
47
 
48
  - name: Run local embeddings tests
49
  env:
50
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
51
  run: |
52
- uv run pytest tests/ -v -m "local_embeddings" --tb=short -p no:logfire || true
53
  continue-on-error: true # Allow failures if dependencies not available
54
 
55
  - name: Run HuggingFace integration tests
56
  env:
57
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
58
  run: |
59
- uv run pytest tests/integration/ -v -m "huggingface and not embedding_provider" --tb=short -p no:logfire || true
60
  continue-on-error: true # Allow failures if HF_TOKEN not set
61
 
62
  - name: Run non-OpenAI integration tests (excluding embedding providers)
63
  env:
64
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
65
  run: |
66
- uv run pytest tests/integration/ -v -m "integration and not openai and not embedding_provider" --tb=short -p no:logfire || true
67
  continue-on-error: true # Allow failures if dependencies not available
 
 
 
 
 
 
 
 
2
 
3
  on:
4
  push:
5
+ branches: [main, dev]
6
  pull_request:
7
+ branches: [main, dev]
8
 
9
  jobs:
10
  test:
 
28
 
29
  - name: Install dependencies
30
  run: |
31
+ uv sync --extra dev
32
 
33
  - name: Lint with ruff
34
  run: |
 
43
  env:
44
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
45
  run: |
46
+ uv run pytest tests/unit/ -v -m "not openai and not embedding_provider" --tb=short -p no:logfire --cov --cov-branch --cov-report=xml
47
 
48
  - name: Run local embeddings tests
49
  env:
50
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
51
  run: |
52
+ uv run pytest tests/ -v -m "local_embeddings" --tb=short -p no:logfire --cov --cov-branch --cov-report=xml --cov-append || true
53
  continue-on-error: true # Allow failures if dependencies not available
54
 
55
  - name: Run HuggingFace integration tests
56
  env:
57
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
58
  run: |
59
+ uv run pytest tests/integration/ -v -m "huggingface and not embedding_provider" --tb=short -p no:logfire --cov --cov-branch --cov-report=xml --cov-append || true
60
  continue-on-error: true # Allow failures if HF_TOKEN not set
61
 
62
  - name: Run non-OpenAI integration tests (excluding embedding providers)
63
  env:
64
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
65
  run: |
66
+ uv run pytest tests/integration/ -v -m "integration and not openai and not embedding_provider" --tb=short -p no:logfire --cov --cov-branch --cov-report=xml --cov-append || true
67
  continue-on-error: true # Allow failures if dependencies not available
68
+
69
+ - name: Upload coverage reports to Codecov
70
+ uses: codecov/codecov-action@v5
71
+ continue-on-error: true
72
+ with:
73
+ token: ${{ secrets.CODECOV_TOKEN }}
74
+ slug: DeepCritical/GradioDemo
.github/workflows/docs.yml CHANGED
@@ -32,12 +32,13 @@ jobs:
32
  python-version: '3.11'
33
 
34
  - name: Install uv
35
- run: |
36
- pip install uv
 
37
 
38
  - name: Install dependencies
39
  run: |
40
- uv sync --all-extras --dev
41
 
42
  - name: Build documentation
43
  run: |
@@ -49,7 +50,9 @@ jobs:
49
  with:
50
  github_token: ${{ secrets.GITHUB_TOKEN }}
51
  publish_dir: ./site
 
52
  cname: false
 
53
 
54
 
55
 
 
32
  python-version: '3.11'
33
 
34
  - name: Install uv
35
+ uses: astral-sh/setup-uv@v5
36
+ with:
37
+ version: "latest"
38
 
39
  - name: Install dependencies
40
  run: |
41
+ uv sync --extra dev
42
 
43
  - name: Build documentation
44
  run: |
 
50
  with:
51
  github_token: ${{ secrets.GITHUB_TOKEN }}
52
  publish_dir: ./site
53
+ publish_branch: gh-pages
54
  cname: false
55
+ keep_files: false
56
 
57
 
58
 
.gitignore CHANGED
@@ -72,6 +72,8 @@ logs/
72
  .pytest_cache/
73
  .mypy_cache/
74
  .coverage
 
 
75
  htmlcov/
76
 
77
  # Database files
 
72
  .pytest_cache/
73
  .mypy_cache/
74
  .coverage
75
+ .coverage.*
76
+ coverage.xml
77
  htmlcov/
78
 
79
  # Database files
.pre-commit-hooks/run_pytest_with_sync.py CHANGED
@@ -1,8 +1,109 @@
1
  #!/usr/bin/env python3
2
  """Cross-platform pytest runner that syncs dependencies before running tests."""
3
 
 
4
  import subprocess
5
  import sys
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
 
8
  def run_command(
@@ -28,7 +129,6 @@ def run_command(
28
  def main() -> int:
29
  """Main entry point."""
30
  import os
31
- from pathlib import Path
32
 
33
  # Get the project root (where pyproject.toml is)
34
  script_dir = Path(__file__).parent
@@ -37,6 +137,10 @@ def main() -> int:
37
  # Change to project root to ensure uv works correctly
38
  os.chdir(project_root)
39
 
 
 
 
 
40
  # Check if uv is available
41
  if run_command(["uv", "--version"], check=False) != 0:
42
  print("Error: uv not found. Please install uv: https://github.com/astral-sh/uv")
@@ -48,8 +152,8 @@ def main() -> int:
48
 
49
  # Sync dependencies - always include dev
50
  # Note: embeddings dependencies are now in main dependencies, not optional
51
- # So we just sync with --dev for all test types
52
- sync_cmd = ["uv", "sync", "--dev"]
53
 
54
  print(f"Syncing dependencies for {test_type} tests...")
55
  if run_command(sync_cmd, cwd=project_root) != 0:
@@ -65,6 +169,7 @@ def main() -> int:
65
  "--tb=short",
66
  "-p",
67
  "no:logfire",
 
68
  ]
69
  elif test_type == "embeddings":
70
  pytest_args = [
@@ -75,6 +180,7 @@ def main() -> int:
75
  "--tb=short",
76
  "-p",
77
  "no:logfire",
 
78
  ]
79
  else:
80
  pytest_args = []
 
1
  #!/usr/bin/env python3
2
  """Cross-platform pytest runner that syncs dependencies before running tests."""
3
 
4
+ import shutil
5
  import subprocess
6
  import sys
7
+ from pathlib import Path
8
+
9
+
10
+ def clean_caches(project_root: Path) -> None:
11
+ """Remove pytest and Python cache directories and files.
12
+
13
+ Only scans specific directories (src/, tests/) to avoid resource
14
+ exhaustion from scanning large directories like .venv on Windows.
15
+ """
16
+ # Directories to scan for caches (only project code, not dependencies)
17
+ scan_dirs = ["src", "tests", ".pre-commit-hooks"]
18
+
19
+ # Directories to exclude (to avoid resource issues)
20
+ exclude_dirs = {
21
+ ".venv",
22
+ "venv",
23
+ "ENV",
24
+ "env",
25
+ ".git",
26
+ "node_modules",
27
+ "dist",
28
+ "build",
29
+ ".eggs",
30
+ "reference_repos",
31
+ "folder",
32
+ }
33
+
34
+ cache_patterns = [
35
+ ".pytest_cache",
36
+ "__pycache__",
37
+ "*.pyc",
38
+ "*.pyo",
39
+ "*.pyd",
40
+ ".mypy_cache",
41
+ ".ruff_cache",
42
+ ]
43
+
44
+ def should_exclude(path: Path) -> bool:
45
+ """Check if a path should be excluded from cache cleanup."""
46
+ # Check if any parent directory is in exclude list
47
+ for parent in path.parents:
48
+ if parent.name in exclude_dirs:
49
+ return True
50
+ # Check if the path itself is excluded
51
+ if path.name in exclude_dirs:
52
+ return True
53
+ return False
54
+
55
+ cleaned = []
56
+
57
+ # Only scan specific directories to avoid resource exhaustion
58
+ for scan_dir in scan_dirs:
59
+ scan_path = project_root / scan_dir
60
+ if not scan_path.exists():
61
+ continue
62
+
63
+ for pattern in cache_patterns:
64
+ if "*" in pattern:
65
+ # Handle glob patterns for files
66
+ try:
67
+ for cache_file in scan_path.rglob(pattern):
68
+ if should_exclude(cache_file):
69
+ continue
70
+ try:
71
+ if cache_file.is_file():
72
+ cache_file.unlink()
73
+ cleaned.append(str(cache_file.relative_to(project_root)))
74
+ except OSError:
75
+ pass # Ignore errors (file might be locked or already deleted)
76
+ except OSError:
77
+ pass # Ignore errors during directory traversal
78
+ else:
79
+ # Handle directory patterns
80
+ try:
81
+ for cache_dir in scan_path.rglob(pattern):
82
+ if should_exclude(cache_dir):
83
+ continue
84
+ try:
85
+ if cache_dir.is_dir():
86
+ shutil.rmtree(cache_dir, ignore_errors=True)
87
+ cleaned.append(str(cache_dir.relative_to(project_root)))
88
+ except OSError:
89
+ pass # Ignore errors (directory might be locked)
90
+ except OSError:
91
+ pass # Ignore errors during directory traversal
92
+
93
+ # Also clean root-level caches (like .pytest_cache in project root)
94
+ for pattern in [".pytest_cache", ".mypy_cache", ".ruff_cache"]:
95
+ cache_path = project_root / pattern
96
+ if cache_path.exists() and cache_path.is_dir():
97
+ try:
98
+ shutil.rmtree(cache_path, ignore_errors=True)
99
+ cleaned.append(pattern)
100
+ except OSError:
101
+ pass
102
+
103
+ if cleaned:
104
+ print(f"Cleaned {len(cleaned)} cache items")
105
+ else:
106
+ print("No cache files found to clean")
107
 
108
 
109
  def run_command(
 
129
  def main() -> int:
130
  """Main entry point."""
131
  import os
 
132
 
133
  # Get the project root (where pyproject.toml is)
134
  script_dir = Path(__file__).parent
 
137
  # Change to project root to ensure uv works correctly
138
  os.chdir(project_root)
139
 
140
+ # Clean caches before running tests
141
+ print("Cleaning pytest and Python caches...")
142
+ clean_caches(project_root)
143
+
144
  # Check if uv is available
145
  if run_command(["uv", "--version"], check=False) != 0:
146
  print("Error: uv not found. Please install uv: https://github.com/astral-sh/uv")
 
152
 
153
  # Sync dependencies - always include dev
154
  # Note: embeddings dependencies are now in main dependencies, not optional
155
+ # Use --extra dev for [project.optional-dependencies].dev (not --dev which is for [dependency-groups])
156
+ sync_cmd = ["uv", "sync", "--extra", "dev"]
157
 
158
  print(f"Syncing dependencies for {test_type} tests...")
159
  if run_command(sync_cmd, cwd=project_root) != 0:
 
169
  "--tb=short",
170
  "-p",
171
  "no:logfire",
172
+ "--cache-clear", # Clear pytest cache before running
173
  ]
174
  elif test_type == "embeddings":
175
  pytest_args = [
 
180
  "--tb=short",
181
  "-p",
182
  "no:logfire",
183
+ "--cache-clear", # Clear pytest cache before running
184
  ]
185
  else:
186
  pytest_args = []
README.md CHANGED
@@ -35,7 +35,7 @@ tags:
35
  [![GitHub](https://img.shields.io/github/stars/DeepCritical/GradioDemo?style=for-the-badge&logo=github&logoColor=white&label=πŸ™%20GitHub&labelColor=181717&color=181717)](https://github.com/DeepCritical/GradioDemo)
36
  [![Documentation](https://img.shields.io/badge/πŸ“š%20Docs-0080FF?style=for-the-badge&logo=readthedocs&logoColor=white&labelColor=0080FF&color=0080FF)](docs/index.md)
37
  [![Demo](https://img.shields.io/badge/πŸš€%20Demo-FFD21E?style=for-the-badge&logo=huggingface&logoColor=white&labelColor=FFD21E&color=FFD21E)](https://huggingface.co/spaces/DataQuests/DeepCritical)
38
- [![CodeCov](https://img.shields.io/badge/πŸ“Š%20Coverage-F01F7A?style=for-the-badge&logo=codecov&logoColor=white&labelColor=F01F7A&color=F01F7A)](https://codecov.io/gh/DeepCritical/GradioDemo)
39
  [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/qdfnvSPcqP)
40
 
41
 
@@ -76,6 +76,7 @@ For this hackathon we're proposing a simple yet powerful Deep Research Agent tha
76
  - [] Create Deep Critical Drug Reporposing / Discovery Demo
77
  - [] Create Deep Critical Literal Review
78
  - [] Create Deep Critical Hypothesis Generator
 
79
 
80
  ## Completed
81
 
@@ -118,5 +119,5 @@ For this hackathon we're proposing a simple yet powerful Deep Research Agent tha
118
  [![GitHub](https://img.shields.io/github/stars/DeepCritical/GradioDemo?style=for-the-badge&logo=github&logoColor=white&label=πŸ™%20GitHub&labelColor=181717&color=181717)](https://github.com/DeepCritical/GradioDemo)
119
  [![Documentation](https://img.shields.io/badge/πŸ“š%20Docs-0080FF?style=for-the-badge&logo=readthedocs&logoColor=white&labelColor=0080FF&color=0080FF)](docs/index.md)
120
  [![Demo](https://img.shields.io/badge/πŸš€%20Demo-FFD21E?style=for-the-badge&logo=huggingface&logoColor=white&labelColor=FFD21E&color=FFD21E)](https://huggingface.co/spaces/DataQuests/DeepCritical)
121
- [![CodeCov](https://img.shields.io/badge/πŸ“Š%20Coverage-F01F7A?style=for-the-badge&logo=codecov&logoColor=white&labelColor=F01F7A&color=F01F7A)](https://codecov.io/gh/DeepCritical/GradioDemo)
122
  [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/qdfnvSPcqP)
 
35
  [![GitHub](https://img.shields.io/github/stars/DeepCritical/GradioDemo?style=for-the-badge&logo=github&logoColor=white&label=πŸ™%20GitHub&labelColor=181717&color=181717)](https://github.com/DeepCritical/GradioDemo)
36
  [![Documentation](https://img.shields.io/badge/πŸ“š%20Docs-0080FF?style=for-the-badge&logo=readthedocs&logoColor=white&labelColor=0080FF&color=0080FF)](docs/index.md)
37
  [![Demo](https://img.shields.io/badge/πŸš€%20Demo-FFD21E?style=for-the-badge&logo=huggingface&logoColor=white&labelColor=FFD21E&color=FFD21E)](https://huggingface.co/spaces/DataQuests/DeepCritical)
38
+ [![codecov](https://codecov.io/gh/DeepCritical/GradioDemo/graph/badge.svg?token=B1f05RCGpz)](https://codecov.io/gh/DeepCritical/GradioDemo)
39
  [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/qdfnvSPcqP)
40
 
41
 
 
76
  - [] Create Deep Critical Drug Reporposing / Discovery Demo
77
  - [] Create Deep Critical Literal Review
78
  - [] Create Deep Critical Hypothesis Generator
79
+ - [] Create PyPi Package
80
 
81
  ## Completed
82
 
 
119
  [![GitHub](https://img.shields.io/github/stars/DeepCritical/GradioDemo?style=for-the-badge&logo=github&logoColor=white&label=πŸ™%20GitHub&labelColor=181717&color=181717)](https://github.com/DeepCritical/GradioDemo)
120
  [![Documentation](https://img.shields.io/badge/πŸ“š%20Docs-0080FF?style=for-the-badge&logo=readthedocs&logoColor=white&labelColor=0080FF&color=0080FF)](docs/index.md)
121
  [![Demo](https://img.shields.io/badge/πŸš€%20Demo-FFD21E?style=for-the-badge&logo=huggingface&logoColor=white&labelColor=FFD21E&color=FFD21E)](https://huggingface.co/spaces/DataQuests/DeepCritical)
122
+ [![codecov](https://codecov.io/gh/DeepCritical/GradioDemo/graph/badge.svg?token=B1f05RCGpz)](https://codecov.io/gh/DeepCritical/GradioDemo)
123
  [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/qdfnvSPcqP)
docs/api/agents.md CHANGED
@@ -258,3 +258,6 @@ def create_input_parser_agent(model: Any | None = None) -> InputParserAgent
258
 
259
 
260
 
 
 
 
 
258
 
259
 
260
 
261
+
262
+
263
+
docs/api/models.md CHANGED
@@ -236,3 +236,6 @@ class BudgetStatus(BaseModel):
236
 
237
 
238
 
 
 
 
 
236
 
237
 
238
 
239
+
240
+
241
+
docs/api/orchestrators.md CHANGED
@@ -183,3 +183,6 @@ Runs Magentic orchestration.
183
 
184
 
185
 
 
 
 
 
183
 
184
 
185
 
186
+
187
+
188
+
docs/api/services.md CHANGED
@@ -189,3 +189,6 @@ Analyzes a hypothesis using statistical methods.
189
 
190
 
191
 
 
 
 
 
189
 
190
 
191
 
192
+
193
+
194
+
docs/api/tools.md CHANGED
@@ -223,3 +223,6 @@ Searches multiple tools in parallel.
223
 
224
 
225
 
 
 
 
 
223
 
224
 
225
 
226
+
227
+
228
+
docs/architecture/agents.md CHANGED
@@ -180,3 +180,6 @@ Factory functions:
180
 
181
 
182
 
 
 
 
 
180
 
181
 
182
 
183
+
184
+
185
+
docs/architecture/middleware.md CHANGED
@@ -130,3 +130,6 @@ All middleware components use `ContextVar` for thread-safe isolation:
130
 
131
 
132
 
 
 
 
 
130
 
131
 
132
 
133
+
134
+
135
+
docs/architecture/services.md CHANGED
@@ -130,3 +130,6 @@ if settings.has_openai_key:
130
 
131
 
132
 
 
 
 
 
130
 
131
 
132
 
133
+
134
+
135
+
docs/architecture/tools.md CHANGED
@@ -163,3 +163,6 @@ search_handler = SearchHandler(
163
 
164
 
165
 
 
 
 
 
163
 
164
 
165
 
166
+
167
+
168
+
docs/contributing/code-quality.md CHANGED
@@ -69,3 +69,6 @@ async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
69
 
70
 
71
 
 
 
 
 
69
 
70
 
71
 
72
+
73
+
74
+
docs/contributing/code-style.md CHANGED
@@ -49,3 +49,6 @@ result = await loop.run_in_executor(None, cpu_bound_function, args)
49
 
50
 
51
 
 
 
 
 
49
 
50
 
51
 
52
+
53
+
54
+
docs/contributing/error-handling.md CHANGED
@@ -57,3 +57,6 @@ except httpx.HTTPError as e:
57
 
58
 
59
 
 
 
 
 
57
 
58
 
59
 
60
+
61
+
62
+
docs/contributing/implementation-patterns.md CHANGED
@@ -72,3 +72,6 @@ def get_embedding_service() -> EmbeddingService:
72
 
73
 
74
 
 
 
 
 
72
 
73
 
74
 
75
+
76
+
77
+
docs/contributing/index.md CHANGED
@@ -151,3 +151,6 @@ Thank you for contributing to DeepCritical!
151
 
152
 
153
 
 
 
 
 
151
 
152
 
153
 
154
+
155
+
156
+
docs/contributing/prompt-engineering.md CHANGED
@@ -57,3 +57,6 @@ This document outlines prompt engineering guidelines and citation validation rul
57
 
58
 
59
 
 
 
 
 
57
 
58
 
59
 
60
+
61
+
62
+
docs/contributing/testing.md CHANGED
@@ -53,3 +53,6 @@ async def test_real_pubmed_search():
53
 
54
 
55
 
 
 
 
 
53
 
54
 
55
 
56
+
57
+
58
+
docs/getting-started/examples.md CHANGED
@@ -197,3 +197,6 @@ USE_GRAPH_EXECUTION=true
197
 
198
 
199
 
 
 
 
 
197
 
198
 
199
 
200
+
201
+
202
+
docs/getting-started/installation.md CHANGED
@@ -136,3 +136,6 @@ uv run pre-commit install
136
 
137
 
138
 
 
 
 
 
136
 
137
 
138
 
139
+
140
+
141
+
docs/getting-started/mcp-integration.md CHANGED
@@ -203,3 +203,6 @@ You can configure multiple DeepCritical instances:
203
 
204
 
205
 
 
 
 
 
203
 
204
 
205
 
206
+
207
+
208
+
docs/getting-started/quick-start.md CHANGED
@@ -107,3 +107,6 @@ What are the active clinical trials investigating Alzheimer's disease treatments
107
 
108
 
109
 
 
 
 
 
107
 
108
 
109
 
110
+
111
+
112
+
docs/license.md CHANGED
@@ -27,3 +27,6 @@ SOFTWARE.
27
 
28
 
29
 
 
 
 
 
27
 
28
 
29
 
30
+
31
+
32
+
docs/overview/architecture.md CHANGED
@@ -184,3 +184,6 @@ The system supports complex research workflows through:
184
 
185
 
186
 
 
 
 
 
184
 
185
 
186
 
187
+
188
+
189
+
docs/overview/features.md CHANGED
@@ -136,3 +136,6 @@ DeepCritical provides a comprehensive set of features for AI-assisted research:
136
 
137
 
138
 
 
 
 
 
136
 
137
 
138
 
139
+
140
+
141
+
docs/team.md CHANGED
@@ -32,3 +32,6 @@ We welcome contributions! See the [Contributing Guide](contributing/index.md) fo
32
 
33
 
34
 
 
 
 
 
32
 
33
 
34
 
35
+
36
+
37
+
mkdocs.yml CHANGED
@@ -62,8 +62,8 @@ markdown_extensions:
62
  - pymdownx.tasklist:
63
  custom_checkbox: true
64
  - pymdownx.emoji:
65
- emoji_index: !!python/name:material.extensions.emoji.twemoji
66
- emoji_generator: !!python/name:material.extensions.emoji.to_svg
67
  - admonition
68
  - pymdownx.details
69
  - pymdownx.superfences
 
62
  - pymdownx.tasklist:
63
  custom_checkbox: true
64
  - pymdownx.emoji:
65
+ emoji_generator: !!python/name:pymdownx.emoji.to_svg
66
+ emoji_index: !!python/name:pymdownx.emoji.twemoji
67
  - admonition
68
  - pymdownx.details
69
  - pymdownx.superfences
pyproject.toml CHANGED
@@ -5,21 +5,16 @@ description = "AI-Native Drug Repurposing Research Agent"
5
  readme = "README.md"
6
  requires-python = ">=3.11"
7
  dependencies = [
8
- # Core
9
  "pydantic>=2.7",
10
  "pydantic-settings>=2.2",
11
  "pydantic-ai>=0.0.16",
12
- # AI Providers
13
  "openai>=1.0.0",
14
  "anthropic>=0.18.0",
15
- # HTTP & Parsing
16
- "httpx>=0.27",
17
- "beautifulsoup4>=4.12",
18
- "xmltodict>=0.13",
19
- "huggingface-hub>=0.20.0",
20
- # UI
21
- "gradio[mcp,oauth]>=6.0.0",
22
- # Utils
23
  "python-dotenv>=1.0", # .env loading
24
  "tenacity>=8.2", # Retry logic
25
  "structlog>=24.1", # Structured logging
@@ -40,28 +35,30 @@ dependencies = [
40
  "modal>=0.63.0",
41
  "llama-index-llms-openai>=0.6.9",
42
  "llama-index-embeddings-openai>=0.5.1",
 
 
 
43
  ]
44
 
45
  [project.optional-dependencies]
46
  dev = [
47
- # Testing
48
- "pytest>=8.0",
49
- "pytest-asyncio>=0.23",
50
- "pytest-sugar>=1.0",
51
- "pytest-cov>=5.0",
52
- "pytest-mock>=3.12",
53
- "respx>=0.21",
54
- "typer>=0.9.0",
55
-
56
- # Quality
57
- "ruff>=0.4.0",
58
- "mypy>=1.10",
59
  "pre-commit>=3.7",
60
- # Documentation
61
  "mkdocs>=1.5.0",
62
- "mkdocs-material>=9.0.0",
63
- "mkdocs-mermaid2-plugin>=1.1.0",
64
- "mkdocs-minify-plugin>=0.7.0",
 
 
 
65
  ]
66
 
67
  [build-system]
@@ -106,6 +103,9 @@ ignore = [
106
  "RUF100", # Unused noqa (version differences between local/CI)
107
  ]
108
 
 
 
 
109
  [tool.ruff.lint.isort]
110
  known-first-party = ["src"]
111
 
@@ -123,6 +123,7 @@ exclude = [
123
  "^reference_repos/",
124
  "^examples/",
125
  "^folder/",
 
126
  ]
127
 
128
  # ============== PYTEST CONFIG ==============
@@ -136,6 +137,25 @@ addopts = [
136
  "-p",
137
  "no:logfire",
138
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  markers = [
140
  "unit: Unit tests (mocked)",
141
  "integration: Integration tests (real APIs)",
@@ -149,7 +169,10 @@ markers = [
149
  # ============== COVERAGE CONFIG ==============
150
  [tool.coverage.run]
151
  source = ["src"]
152
- omit = ["*/__init__.py"]
 
 
 
153
 
154
  [tool.coverage.report]
155
  exclude_lines = [
@@ -157,17 +180,3 @@ exclude_lines = [
157
  "if TYPE_CHECKING:",
158
  "raise NotImplementedError",
159
  ]
160
-
161
- [dependency-groups]
162
- dev = [
163
- "mkdocs-codeinclude-plugin>=0.2.1",
164
- "mkdocs-macros-plugin>=1.5.0",
165
- "pytest>=9.0.1",
166
- "pytest-asyncio>=1.3.0",
167
- "pytest-cov>=7.0.0",
168
- "pytest-mock>=3.15.1",
169
- "pytest-sugar>=1.1.1",
170
- "respx>=0.22.0",
171
- "structlog>=25.5.0",
172
- "ty>=0.0.1a28",
173
- ]
 
5
  readme = "README.md"
6
  requires-python = ">=3.11"
7
  dependencies = [
 
8
  "pydantic>=2.7",
9
  "pydantic-settings>=2.2",
10
  "pydantic-ai>=0.0.16",
 
11
  "openai>=1.0.0",
12
  "anthropic>=0.18.0",
13
+ "httpx>=0.27",
14
+ "beautifulsoup4>=4.12",
15
+ "xmltodict>=0.13",
16
+ "huggingface-hub>=0.20.0",
17
+ "gradio[mcp,oauth]>=6.0.0",
 
 
 
18
  "python-dotenv>=1.0", # .env loading
19
  "tenacity>=8.2", # Retry logic
20
  "structlog>=24.1", # Structured logging
 
35
  "modal>=0.63.0",
36
  "llama-index-llms-openai>=0.6.9",
37
  "llama-index-embeddings-openai>=0.5.1",
38
+ "pydantic-ai-slim[huggingface]>=0.0.18",
39
+ "pytest>=9.0.1",
40
+ "pytest-cov>=7.0.0",
41
  ]
42
 
43
  [project.optional-dependencies]
44
  dev = [
45
+ "pytest>=9.0.1",
46
+ "pytest-asyncio>=1.3.0",
47
+ "pytest-sugar>=1.1.1",
48
+ "pytest-cov>=7.0.0",
49
+ "pytest-mock>=3.15.1",
50
+ "respx>=0.22.0",
51
+ "typer>=0.9.0",
52
+ "ruff>=0.14.6",
53
+ "mypy>=1.18.2",
 
 
 
54
  "pre-commit>=3.7",
 
55
  "mkdocs>=1.5.0",
56
+ "mkdocs-material>=9.7.0",
57
+ "mkdocs-mermaid2-plugin>=1.2.3",
58
+ "mkdocs-minify-plugin>=0.8.0",
59
+ "mkdocs-codeinclude-plugin>=0.2.1",
60
+ "mkdocs-macros-plugin>=1.5.0",
61
+ "pymdown-extensions>=10.17.2",
62
  ]
63
 
64
  [build-system]
 
103
  "RUF100", # Unused noqa (version differences between local/CI)
104
  ]
105
 
106
+ [tool.ruff.lint.per-file-ignores]
107
+ "src/app.py" = ["PLR0915"] # Too many statements (Gradio UI setup is complex)
108
+
109
  [tool.ruff.lint.isort]
110
  known-first-party = ["src"]
111
 
 
123
  "^reference_repos/",
124
  "^examples/",
125
  "^folder/",
126
+ "^src/app\\.py$", # Gradio UI setup - ignore mypy checks
127
  ]
128
 
129
  # ============== PYTEST CONFIG ==============
 
137
  "-p",
138
  "no:logfire",
139
  ]
140
+ # Suppress known warnings that don't indicate test failures
141
+ # These are from third-party libraries and don't affect test correctness
142
+ filterwarnings = [
143
+ # Pydantic deprecation warnings from unittest.mock introspection
144
+ # These occur when mock tries to introspect Pydantic models
145
+ "ignore::pydantic.warnings.PydanticDeprecatedSince20",
146
+ "ignore::pydantic.warnings.PydanticDeprecatedSince211",
147
+ # Gradio UI warnings (not relevant for unit tests)
148
+ "ignore::UserWarning:gradio.components.dropdown",
149
+ "ignore::UserWarning:gradio.oauth",
150
+ # Pattern-based filters for Pydantic deprecation messages (catch-all)
151
+ "ignore:The `__fields__` attribute is deprecated.*",
152
+ "ignore:The `__fields_set__` attribute is deprecated.*",
153
+ "ignore:Accessing the 'model_computed_fields' attribute.*",
154
+ "ignore:Accessing the 'model_fields' attribute.*",
155
+ # Also catch warnings from unittest.mock module
156
+ "ignore::DeprecationWarning:unittest.mock",
157
+ ]
158
+ # Note: pytest only runs test files, so source files don't need exclusion
159
  markers = [
160
  "unit: Unit tests (mocked)",
161
  "integration: Integration tests (real APIs)",
 
169
  # ============== COVERAGE CONFIG ==============
170
  [tool.coverage.run]
171
  source = ["src"]
172
+ omit = [
173
+ "*/__init__.py",
174
+ "src/app.py", # Exclude Gradio UI from coverage
175
+ ]
176
 
177
  [tool.coverage.report]
178
  exclude_lines = [
 
180
  "if TYPE_CHECKING:",
181
  "raise NotImplementedError",
182
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -26,7 +26,7 @@ beautifulsoup4>=4.12
26
  xmltodict>=0.13
27
 
28
  # UI (Gradio with MCP server support)
29
- # gradio[mcp]>=6.0.0
30
 
31
  # Utils
32
  python-dotenv>=1.0
 
26
  xmltodict>=0.13
27
 
28
  # UI (Gradio with MCP server support)
29
+ gradio[mcp]>=6.0.0
30
 
31
  # Utils
32
  python-dotenv>=1.0
src/agent_factory/judges.py CHANGED
@@ -2,19 +2,34 @@
2
 
3
  import asyncio
4
  import json
5
- from typing import Any, ClassVar
 
6
 
7
  import structlog
8
  from huggingface_hub import InferenceClient
9
  from pydantic_ai import Agent
10
  from pydantic_ai.models.anthropic import AnthropicModel
11
- from pydantic_ai.models.huggingface import HuggingFaceModel
12
- from pydantic_ai.models.openai import OpenAIChatModel as OpenAIModel
13
- from pydantic_ai.providers.anthropic import AnthropicProvider
14
- from pydantic_ai.providers.huggingface import HuggingFaceProvider
15
- from pydantic_ai.providers.openai import OpenAIProvider
16
  from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  from src.prompts.judge import (
19
  SYSTEM_PROMPT,
20
  format_empty_evidence_prompt,
@@ -35,26 +50,43 @@ def get_model() -> Any:
35
  llm_provider = settings.llm_provider
36
 
37
  if llm_provider == "anthropic":
38
- provider = AnthropicProvider(api_key=settings.anthropic_api_key)
39
- return AnthropicModel(settings.anthropic_model, provider=provider)
40
 
41
  if llm_provider == "huggingface":
42
- # Free tier - uses HF_TOKEN from environment if available
43
- model_name = settings.huggingface_model or "meta-llama/Llama-3.1-8B-Instruct"
44
- hf_provider = HuggingFaceProvider(api_key=settings.hf_token)
45
- return HuggingFaceModel(model_name, provider=hf_provider)
 
 
 
 
 
 
 
 
 
46
 
47
  if llm_provider == "openai":
48
- openai_provider = OpenAIProvider(api_key=settings.openai_api_key)
49
- return OpenAIModel(settings.openai_model, provider=openai_provider)
50
 
51
  # Default to HuggingFace if provider is unknown or not specified
52
  if llm_provider != "huggingface":
53
  logger.warning("Unknown LLM provider, defaulting to HuggingFace", provider=llm_provider)
54
 
55
- model_name = settings.huggingface_model or "meta-llama/Llama-3.1-8B-Instruct"
56
- hf_provider = HuggingFaceProvider(api_key=settings.hf_token)
57
- return HuggingFaceModel(model_name, provider=hf_provider)
 
 
 
 
 
 
 
 
 
 
58
 
59
 
60
  class JudgeHandler:
@@ -72,9 +104,9 @@ class JudgeHandler:
72
  model: Optional PydanticAI model. If None, uses config default.
73
  """
74
  self.model = model or get_model()
75
- self.agent = Agent(
76
  model=self.model,
77
- output_type=JudgeAssessment,
78
  system_prompt=SYSTEM_PROMPT,
79
  retries=3,
80
  )
@@ -112,7 +144,7 @@ class JudgeHandler:
112
  try:
113
  # Run the agent with structured output
114
  result = await self.agent.run(user_prompt)
115
- assessment = result.output
116
 
117
  logger.info(
118
  "Assessment complete",
@@ -121,7 +153,7 @@ class JudgeHandler:
121
  confidence=assessment.confidence,
122
  )
123
 
124
- return assessment
125
 
126
  except Exception as e:
127
  logger.error("Assessment failed", error=str(e))
@@ -167,25 +199,58 @@ class JudgeHandler:
167
  class HFInferenceJudgeHandler:
168
  """
169
  JudgeHandler using HuggingFace Inference API for FREE LLM calls.
170
- Defaults to Llama-3.1-8B-Instruct (requires HF_TOKEN) or falls back to public models.
 
 
 
 
 
 
171
  """
172
 
173
- FALLBACK_MODELS: ClassVar[list[str]] = [
174
- "meta-llama/Llama-3.1-8B-Instruct", # Primary (Gated)
175
- "mistralai/Mistral-7B-Instruct-v0.3", # Secondary
176
- "HuggingFaceH4/zephyr-7b-beta", # Fallback (Ungated)
177
- ]
 
 
 
 
 
178
 
179
- def __init__(self, model_id: str | None = None) -> None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  """
181
  Initialize with HF Inference client.
182
 
183
  Args:
184
  model_id: Optional specific model ID. If None, uses FALLBACK_MODELS chain.
 
 
 
 
185
  """
186
  self.model_id = model_id
187
- # Will automatically use HF_TOKEN from env if available
188
- self.client = InferenceClient()
 
 
189
  self.call_count = 0
190
  self.last_question: str | None = None
191
  self.last_evidence: list[Evidence] | None = None
@@ -209,7 +274,7 @@ class HFInferenceJudgeHandler:
209
  else:
210
  user_prompt = format_empty_evidence_prompt(question)
211
 
212
- models_to_try: list[str] = [self.model_id] if self.model_id else self.FALLBACK_MODELS
213
  last_error: Exception | None = None
214
 
215
  for model in models_to_try:
@@ -261,14 +326,35 @@ IMPORTANT: Respond with ONLY valid JSON matching this schema:
261
  ]
262
 
263
  # Use chat_completion (conversational task - supported by all models)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
  response = await loop.run_in_executor(
265
  None,
266
- lambda: self.client.chat_completion(
267
- messages=messages,
268
- model=model,
269
- max_tokens=1024,
270
- temperature=0.1,
271
- ),
272
  )
273
 
274
  # Extract content from response
 
2
 
3
  import asyncio
4
  import json
5
+ import os
6
+ from typing import Any
7
 
8
  import structlog
9
  from huggingface_hub import InferenceClient
10
  from pydantic_ai import Agent
11
  from pydantic_ai.models.anthropic import AnthropicModel
12
+ from pydantic_ai.models.openai import OpenAIModel # type: ignore[attr-defined]
 
 
 
 
13
  from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
14
 
15
+ # Try to import HuggingFace support (may not be available in all pydantic-ai versions)
16
+ # According to https://ai.pydantic.dev/models/huggingface/, HuggingFace support requires
17
+ # pydantic-ai with huggingface extra or pydantic-ai-slim[huggingface]
18
+ # There are two ways to use HuggingFace:
19
+ # 1. Inference API: HuggingFaceModel with HuggingFaceProvider (uses AsyncInferenceClient internally)
20
+ # 2. Local models: Would use transformers directly (not via pydantic-ai)
21
+ try:
22
+ from huggingface_hub import AsyncInferenceClient
23
+ from pydantic_ai.models.huggingface import HuggingFaceModel
24
+ from pydantic_ai.providers.huggingface import HuggingFaceProvider
25
+
26
+ _HUGGINGFACE_AVAILABLE = True
27
+ except ImportError:
28
+ HuggingFaceModel = None # type: ignore[assignment, misc]
29
+ HuggingFaceProvider = None # type: ignore[assignment, misc]
30
+ AsyncInferenceClient = None # type: ignore[assignment, misc]
31
+ _HUGGINGFACE_AVAILABLE = False
32
+
33
  from src.prompts.judge import (
34
  SYSTEM_PROMPT,
35
  format_empty_evidence_prompt,
 
50
  llm_provider = settings.llm_provider
51
 
52
  if llm_provider == "anthropic":
53
+ return AnthropicModel(settings.anthropic_model, api_key=settings.anthropic_api_key) # type: ignore[call-arg]
 
54
 
55
  if llm_provider == "huggingface":
56
+ if not _HUGGINGFACE_AVAILABLE:
57
+ raise ImportError(
58
+ "HuggingFace models are not available in this version of pydantic-ai. "
59
+ "Please install with: uv add 'pydantic-ai[huggingface]' or use 'openai'/'anthropic' as the LLM provider."
60
+ )
61
+ # Inference API - uses HuggingFace Inference API via AsyncInferenceClient
62
+ # Per https://ai.pydantic.dev/models/huggingface/#configure-the-provider
63
+ model_name = settings.huggingface_model or "Qwen/Qwen3-Next-80B-A3B-Thinking"
64
+ # Create AsyncInferenceClient for inference API
65
+ hf_client = AsyncInferenceClient(api_key=settings.hf_token) # type: ignore[misc]
66
+ # Pass client to HuggingFaceProvider for inference API usage
67
+ provider = HuggingFaceProvider(hf_client=hf_client) # type: ignore[misc]
68
+ return HuggingFaceModel(model_name, provider=provider) # type: ignore[misc]
69
 
70
  if llm_provider == "openai":
71
+ return OpenAIModel(settings.openai_model, api_key=settings.openai_api_key) # type: ignore[call-overload]
 
72
 
73
  # Default to HuggingFace if provider is unknown or not specified
74
  if llm_provider != "huggingface":
75
  logger.warning("Unknown LLM provider, defaulting to HuggingFace", provider=llm_provider)
76
 
77
+ if not _HUGGINGFACE_AVAILABLE:
78
+ raise ImportError(
79
+ "HuggingFace models are not available in this version of pydantic-ai. "
80
+ "Please install with: uv add 'pydantic-ai[huggingface]' or set LLM_PROVIDER to 'openai'/'anthropic'."
81
+ )
82
+ # Inference API - uses HuggingFace Inference API via AsyncInferenceClient
83
+ # Per https://ai.pydantic.dev/models/huggingface/#configure-the-provider
84
+ model_name = settings.huggingface_model or "Qwen/Qwen3-Next-80B-A3B-Thinking"
85
+ # Create AsyncInferenceClient for inference API
86
+ hf_client = AsyncInferenceClient(api_key=settings.hf_token) # type: ignore[misc]
87
+ # Pass client to HuggingFaceProvider for inference API usage
88
+ provider = HuggingFaceProvider(hf_client=hf_client) # type: ignore[misc]
89
+ return HuggingFaceModel(model_name, provider=provider) # type: ignore[misc]
90
 
91
 
92
  class JudgeHandler:
 
104
  model: Optional PydanticAI model. If None, uses config default.
105
  """
106
  self.model = model or get_model()
107
+ self.agent = Agent( # type: ignore[call-overload]
108
  model=self.model,
109
+ result_type=JudgeAssessment,
110
  system_prompt=SYSTEM_PROMPT,
111
  retries=3,
112
  )
 
144
  try:
145
  # Run the agent with structured output
146
  result = await self.agent.run(user_prompt)
147
+ assessment = result.output # type: ignore[attr-defined]
148
 
149
  logger.info(
150
  "Assessment complete",
 
153
  confidence=assessment.confidence,
154
  )
155
 
156
+ return assessment # type: ignore[no-any-return]
157
 
158
  except Exception as e:
159
  logger.error("Assessment failed", error=str(e))
 
199
  class HFInferenceJudgeHandler:
200
  """
201
  JudgeHandler using HuggingFace Inference API for FREE LLM calls.
202
+
203
+ Models are loaded from environment variable HF_FALLBACK_MODELS (comma-separated)
204
+ or use defaults based on currently available inference providers:
205
+ - meta-llama/Llama-3.1-8B-Instruct (gated, multiple providers)
206
+ - HuggingFaceH4/zephyr-7b-beta (ungated, featherless-ai)
207
+ - Qwen/Qwen2-7B-Instruct (ungated, featherless-ai)
208
+ - google/gemma-2-2b-it (gated, nebius)
209
  """
210
 
211
+ @classmethod
212
+ def _get_fallback_models(cls) -> list[str]:
213
+ """Get fallback models from env var or use defaults."""
214
+ from src.utils.config import settings
215
+
216
+ # Get from env var or settings
217
+ models_str = os.getenv("HF_FALLBACK_MODELS") or settings.huggingface_fallback_models
218
+
219
+ # Parse comma-separated list
220
+ models = [m.strip() for m in models_str.split(",") if m.strip()]
221
 
222
+ # Default fallback if empty
223
+ if not models:
224
+ models = [
225
+ "meta-llama/Llama-3.1-8B-Instruct", # Primary (Gated, multiple providers)
226
+ "HuggingFaceH4/zephyr-7b-beta", # Fallback (Ungated, featherless-ai)
227
+ "Qwen/Qwen2-7B-Instruct", # Fallback (Ungated, featherless-ai)
228
+ "google/gemma-2-2b-it", # Fallback (Gated, nebius)
229
+ ]
230
+
231
+ return models
232
+
233
+ def __init__(
234
+ self,
235
+ model_id: str | None = None,
236
+ api_key: str | None = None,
237
+ provider: str | None = None,
238
+ ) -> None:
239
  """
240
  Initialize with HF Inference client.
241
 
242
  Args:
243
  model_id: Optional specific model ID. If None, uses FALLBACK_MODELS chain.
244
+ api_key: Optional HuggingFace API key (OAuth token or HF_TOKEN).
245
+ If provided, will use authenticated access for gated models.
246
+ provider: Optional inference provider name (e.g., "novita", "nebius").
247
+ If provided, will use that specific provider.
248
  """
249
  self.model_id = model_id
250
+ self.api_key = api_key
251
+ self.provider = provider
252
+ # Use provided API key, or fall back to env var, or use no auth
253
+ self.client = InferenceClient(token=api_key) if api_key else InferenceClient()
254
  self.call_count = 0
255
  self.last_question: str | None = None
256
  self.last_evidence: list[Evidence] | None = None
 
274
  else:
275
  user_prompt = format_empty_evidence_prompt(question)
276
 
277
+ models_to_try: list[str] = [self.model_id] if self.model_id else self._get_fallback_models()
278
  last_error: Exception | None = None
279
 
280
  for model in models_to_try:
 
326
  ]
327
 
328
  # Use chat_completion (conversational task - supported by all models)
329
+ # HuggingFace Inference Providers format: "model-id:provider" or use provider parameter
330
+ # According to docs: https://huggingface.co/docs/inference-providers
331
+ model_to_use = model
332
+ provider_param = None
333
+ if self.provider:
334
+ # Format: model-id:provider for explicit provider selection
335
+ model_to_use = f"{model}:{self.provider}"
336
+ # Alternative: pass provider as separate parameter (if client supports it)
337
+ provider_param = self.provider
338
+
339
+ # Build chat_completion call
340
+ call_kwargs = {
341
+ "messages": messages,
342
+ "model": model_to_use,
343
+ "max_tokens": 1024,
344
+ "temperature": 0.1,
345
+ }
346
+ # Add provider parameter if client supports it (some clients use this instead of model:provider)
347
+ if provider_param and hasattr(self.client.chat_completion, "__code__"):
348
+ # Check if provider parameter is supported
349
+ try:
350
+ call_kwargs["provider"] = provider_param
351
+ except TypeError:
352
+ # Provider not supported as parameter, use model:provider format
353
+ pass
354
+
355
  response = await loop.run_in_executor(
356
  None,
357
+ lambda: self.client.chat_completion(**call_kwargs), # type: ignore[call-overload]
 
 
 
 
 
358
  )
359
 
360
  # Extract content from response
src/agents/hypothesis_agent.py CHANGED
@@ -40,9 +40,9 @@ class HypothesisAgent(BaseAgent): # type: ignore[misc]
40
  def _get_agent(self) -> Agent[None, HypothesisAssessment]:
41
  """Lazy initialization of LLM agent to avoid requiring API keys at import."""
42
  if self._agent is None:
43
- self._agent = Agent(
44
  model=get_model(), # Uses configured LLM (OpenAI/Anthropic)
45
- output_type=HypothesisAssessment,
46
  system_prompt=SYSTEM_PROMPT,
47
  )
48
  return self._agent
 
40
  def _get_agent(self) -> Agent[None, HypothesisAssessment]:
41
  """Lazy initialization of LLM agent to avoid requiring API keys at import."""
42
  if self._agent is None:
43
+ self._agent = Agent( # type: ignore[call-overload]
44
  model=get_model(), # Uses configured LLM (OpenAI/Anthropic)
45
+ result_type=HypothesisAssessment,
46
  system_prompt=SYSTEM_PROMPT,
47
  )
48
  return self._agent
src/agents/input_parser.py CHANGED
@@ -64,9 +64,9 @@ class InputParserAgent:
64
  self.logger = logger
65
 
66
  # Initialize Pydantic AI Agent
67
- self.agent = Agent(
68
  model=self.model,
69
- output_type=ParsedQuery,
70
  system_prompt=SYSTEM_PROMPT,
71
  retries=3,
72
  )
@@ -117,7 +117,7 @@ class InputParserAgent:
117
  questions=len(parsed_query.research_questions),
118
  )
119
 
120
- return parsed_query
121
 
122
  except Exception as e:
123
  self.logger.error("Query parsing failed", error=str(e), query=query[:100])
 
64
  self.logger = logger
65
 
66
  # Initialize Pydantic AI Agent
67
+ self.agent = Agent( # type: ignore[call-overload]
68
  model=self.model,
69
+ result_type=ParsedQuery,
70
  system_prompt=SYSTEM_PROMPT,
71
  retries=3,
72
  )
 
117
  questions=len(parsed_query.research_questions),
118
  )
119
 
120
+ return parsed_query # type: ignore[no-any-return]
121
 
122
  except Exception as e:
123
  self.logger.error("Query parsing failed", error=str(e), query=query[:100])
src/agents/judge_agent_llm.py CHANGED
@@ -16,9 +16,9 @@ class LLMSubIterationJudge:
16
 
17
  def __init__(self) -> None:
18
  self.model = get_model()
19
- self.agent = Agent(
20
  model=self.model,
21
- output_type=JudgeAssessment,
22
  system_prompt="""You are a strict judge evaluating a research task.
23
 
24
  Evaluate if the result is sufficient to answer the task.
@@ -42,4 +42,4 @@ Evaluate validity and sufficiency."""
42
 
43
  run_result = await self.agent.run(prompt)
44
  logger.info("LLM judge assessment complete", sufficient=run_result.output.sufficient)
45
- return run_result.output
 
16
 
17
  def __init__(self) -> None:
18
  self.model = get_model()
19
+ self.agent = Agent( # type: ignore[call-overload]
20
  model=self.model,
21
+ result_type=JudgeAssessment,
22
  system_prompt="""You are a strict judge evaluating a research task.
23
 
24
  Evaluate if the result is sufficient to answer the task.
 
42
 
43
  run_result = await self.agent.run(prompt)
44
  logger.info("LLM judge assessment complete", sufficient=run_result.output.sufficient)
45
+ return run_result.output # type: ignore[no-any-return]
src/agents/knowledge_gap.py CHANGED
@@ -56,9 +56,9 @@ class KnowledgeGapAgent:
56
  self.logger = logger
57
 
58
  # Initialize Pydantic AI Agent
59
- self.agent = Agent(
60
  model=self.model,
61
- output_type=KnowledgeGapOutput,
62
  system_prompt=SYSTEM_PROMPT,
63
  retries=3,
64
  )
@@ -121,7 +121,7 @@ HISTORY OF ACTIONS, FINDINGS AND THOUGHTS:
121
  gaps_count=len(evaluation.outstanding_gaps),
122
  )
123
 
124
- return evaluation
125
 
126
  except Exception as e:
127
  self.logger.error("Knowledge gap evaluation failed", error=str(e))
 
56
  self.logger = logger
57
 
58
  # Initialize Pydantic AI Agent
59
+ self.agent = Agent( # type: ignore[call-overload]
60
  model=self.model,
61
+ result_type=KnowledgeGapOutput,
62
  system_prompt=SYSTEM_PROMPT,
63
  retries=3,
64
  )
 
121
  gaps_count=len(evaluation.outstanding_gaps),
122
  )
123
 
124
+ return evaluation # type: ignore[no-any-return]
125
 
126
  except Exception as e:
127
  self.logger.error("Knowledge gap evaluation failed", error=str(e))
src/agents/long_writer.py CHANGED
@@ -84,9 +84,9 @@ class LongWriterAgent:
84
  self.logger = logger
85
 
86
  # Initialize Pydantic AI Agent
87
- self.agent = Agent(
88
  model=self.model,
89
- output_type=LongWriterOutput,
90
  system_prompt=SYSTEM_PROMPT,
91
  retries=3,
92
  )
@@ -193,7 +193,7 @@ class LongWriterAgent:
193
  attempt=attempt + 1,
194
  )
195
 
196
- return output
197
 
198
  except (TimeoutError, ConnectionError) as e:
199
  # Transient errors - retry
 
84
  self.logger = logger
85
 
86
  # Initialize Pydantic AI Agent
87
+ self.agent = Agent( # type: ignore[call-overload]
88
  model=self.model,
89
+ result_type=LongWriterOutput,
90
  system_prompt=SYSTEM_PROMPT,
91
  retries=3,
92
  )
 
193
  attempt=attempt + 1,
194
  )
195
 
196
+ return output # type: ignore[no-any-return]
197
 
198
  except (TimeoutError, ConnectionError) as e:
199
  # Transient errors - retry
src/agents/report_agent.py CHANGED
@@ -41,9 +41,9 @@ class ReportAgent(BaseAgent): # type: ignore[misc]
41
  def _get_agent(self) -> Agent[None, ResearchReport]:
42
  """Lazy initialization of LLM agent to avoid requiring API keys at import."""
43
  if self._agent is None:
44
- self._agent = Agent(
45
  model=get_model(),
46
- output_type=ResearchReport,
47
  system_prompt=SYSTEM_PROMPT,
48
  )
49
  return self._agent
 
41
  def _get_agent(self) -> Agent[None, ResearchReport]:
42
  """Lazy initialization of LLM agent to avoid requiring API keys at import."""
43
  if self._agent is None:
44
+ self._agent = Agent( # type: ignore[call-overload]
45
  model=get_model(),
46
+ result_type=ResearchReport,
47
  system_prompt=SYSTEM_PROMPT,
48
  )
49
  return self._agent
src/agents/tool_selector.py CHANGED
@@ -68,9 +68,9 @@ class ToolSelectorAgent:
68
  self.logger = logger
69
 
70
  # Initialize Pydantic AI Agent
71
- self.agent = Agent(
72
  model=self.model,
73
- output_type=AgentSelectionPlan,
74
  system_prompt=SYSTEM_PROMPT,
75
  retries=3,
76
  )
@@ -125,7 +125,7 @@ HISTORY OF ACTIONS, FINDINGS AND THOUGHTS:
125
  agents=[task.agent for task in selection_plan.tasks],
126
  )
127
 
128
- return selection_plan
129
 
130
  except Exception as e:
131
  self.logger.error("Tool selection failed", error=str(e))
 
68
  self.logger = logger
69
 
70
  # Initialize Pydantic AI Agent
71
+ self.agent = Agent( # type: ignore[call-overload]
72
  model=self.model,
73
+ result_type=AgentSelectionPlan,
74
  system_prompt=SYSTEM_PROMPT,
75
  retries=3,
76
  )
 
125
  agents=[task.agent for task in selection_plan.tasks],
126
  )
127
 
128
+ return selection_plan # type: ignore[no-any-return]
129
 
130
  except Exception as e:
131
  self.logger.error("Tool selection failed", error=str(e))
src/app.py CHANGED
@@ -5,8 +5,24 @@ from collections.abc import AsyncGenerator
5
  from typing import Any
6
 
7
  import gradio as gr
8
- from pydantic_ai.models.huggingface import HuggingFaceModel
9
- from pydantic_ai.providers.huggingface import HuggingFaceProvider
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  from src.agent_factory.judges import HFInferenceJudgeHandler, JudgeHandler, MockJudgeHandler
12
  from src.orchestrator_factory import create_orchestrator
@@ -15,6 +31,7 @@ from src.tools.europepmc import EuropePMCTool
15
  from src.tools.pubmed import PubMedTool
16
  from src.tools.search_handler import SearchHandler
17
  from src.utils.config import settings
 
18
  from src.utils.models import AgentEvent, OrchestratorConfig
19
 
20
 
@@ -22,6 +39,8 @@ def configure_orchestrator(
22
  use_mock: bool = False,
23
  mode: str = "simple",
24
  oauth_token: str | None = None,
 
 
25
  ) -> tuple[Any, str]:
26
  """
27
  Create an orchestrator instance.
@@ -30,6 +49,8 @@ def configure_orchestrator(
30
  use_mock: If True, use MockJudgeHandler (no API key needed)
31
  mode: Orchestrator mode ("simple" or "advanced")
32
  oauth_token: Optional OAuth token from HuggingFace login
 
 
33
 
34
  Returns:
35
  Tuple of (Orchestrator instance, backend_name)
@@ -59,11 +80,27 @@ def configure_orchestrator(
59
  # Priority: oauth_token > env vars
60
  effective_api_key = oauth_token
61
  if effective_api_key or (os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_API_KEY")):
62
- model: HuggingFaceModel | None = None
63
  if effective_api_key:
64
- model_name = settings.huggingface_model or "meta-llama/Llama-3.1-8B-Instruct"
65
- hf_provider = HuggingFaceProvider(api_key=effective_api_key)
66
- model = HuggingFaceModel(model_name, provider=hf_provider)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  backend_info = "API (HuggingFace OAuth)"
68
  else:
69
  backend_info = "API (Env Config)"
@@ -72,8 +109,19 @@ def configure_orchestrator(
72
 
73
  # 3. Free Tier (HuggingFace Inference)
74
  else:
75
- judge_handler = HFInferenceJudgeHandler()
76
- backend_info = "Free Tier (Llama 3.1 / Mistral)"
 
 
 
 
 
 
 
 
 
 
 
77
 
78
  orchestrator = create_orchestrator(
79
  search_handler=search_handler,
@@ -332,6 +380,8 @@ async def research_agent(
332
  message: str,
333
  history: list[dict[str, Any]],
334
  mode: str = "simple",
 
 
335
  request: gr.Request | None = None,
336
  ) -> AsyncGenerator[gr.ChatMessage | list[gr.ChatMessage], None]:
337
  """
@@ -341,6 +391,8 @@ async def research_agent(
341
  message: User's research question
342
  history: Chat history (Gradio format)
343
  mode: Orchestrator mode ("simple" or "advanced")
 
 
344
  request: Gradio request object containing OAuth information
345
 
346
  Yields:
@@ -372,10 +424,13 @@ async def research_agent(
372
  try:
373
  # use_mock=False - let configure_orchestrator decide based on available keys
374
  # It will use: OAuth token > Env vars > HF Inference (free tier)
 
375
  orchestrator, backend_name = configure_orchestrator(
376
  use_mock=False, # Never use mock in production - HF Inference is the free fallback
377
  mode=effective_mode,
378
  oauth_token=oauth_token,
 
 
379
  )
380
 
381
  yield gr.ChatMessage(
@@ -407,7 +462,162 @@ def create_demo() -> gr.Blocks:
407
  with gr.Row():
408
  gr.LoginButton()
409
 
410
- # Chat interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
411
  gr.ChatInterface(
412
  fn=research_agent,
413
  title="🧬 DeepCritical",
@@ -417,7 +627,7 @@ def create_demo() -> gr.Blocks:
417
  "---\n"
418
  "*Research tool only β€” not for medical advice.* \n"
419
  "**MCP Server Active**: Connect Claude Desktop to `/gradio_api/mcp/`\n\n"
420
- "**Sign in with HuggingFace** above to use your account's API token automatically."
421
  ),
422
  examples=[
423
  ["What drugs could be repurposed for Alzheimer's disease?", "simple"],
@@ -426,14 +636,9 @@ def create_demo() -> gr.Blocks:
426
  ],
427
  additional_inputs_accordion=gr.Accordion(label="βš™οΈ Settings", open=False),
428
  additional_inputs=[
429
- gr.Radio(
430
- choices=["simple", "advanced"],
431
- value="simple",
432
- label="Orchestrator Mode",
433
- info=(
434
- "Simple: Linear (Free Tier Friendly) | Advanced: Multi-Agent (Requires OpenAI - not available without manual config)"
435
- ),
436
- ),
437
  ],
438
  )
439
 
 
5
  from typing import Any
6
 
7
  import gradio as gr
8
+
9
+ # Try to import HuggingFace support (may not be available in all pydantic-ai versions)
10
+ # According to https://ai.pydantic.dev/models/huggingface/, HuggingFace support requires
11
+ # pydantic-ai with huggingface extra or pydantic-ai-slim[huggingface]
12
+ # There are two ways to use HuggingFace:
13
+ # 1. Inference API: HuggingFaceModel with HuggingFaceProvider (uses AsyncInferenceClient internally)
14
+ # 2. Local models: Would use transformers directly (not via pydantic-ai)
15
+ try:
16
+ from huggingface_hub import AsyncInferenceClient
17
+ from pydantic_ai.models.huggingface import HuggingFaceModel
18
+ from pydantic_ai.providers.huggingface import HuggingFaceProvider
19
+
20
+ _HUGGINGFACE_AVAILABLE = True
21
+ except ImportError:
22
+ HuggingFaceModel = None # type: ignore[assignment, misc]
23
+ HuggingFaceProvider = None # type: ignore[assignment, misc]
24
+ AsyncInferenceClient = None # type: ignore[assignment, misc]
25
+ _HUGGINGFACE_AVAILABLE = False
26
 
27
  from src.agent_factory.judges import HFInferenceJudgeHandler, JudgeHandler, MockJudgeHandler
28
  from src.orchestrator_factory import create_orchestrator
 
31
  from src.tools.pubmed import PubMedTool
32
  from src.tools.search_handler import SearchHandler
33
  from src.utils.config import settings
34
+ from src.utils.inference_models import get_available_models, get_available_providers
35
  from src.utils.models import AgentEvent, OrchestratorConfig
36
 
37
 
 
39
  use_mock: bool = False,
40
  mode: str = "simple",
41
  oauth_token: str | None = None,
42
+ hf_model: str | None = None,
43
+ hf_provider: str | None = None,
44
  ) -> tuple[Any, str]:
45
  """
46
  Create an orchestrator instance.
 
49
  use_mock: If True, use MockJudgeHandler (no API key needed)
50
  mode: Orchestrator mode ("simple" or "advanced")
51
  oauth_token: Optional OAuth token from HuggingFace login
52
+ hf_model: Selected HuggingFace model ID
53
+ hf_provider: Selected inference provider
54
 
55
  Returns:
56
  Tuple of (Orchestrator instance, backend_name)
 
80
  # Priority: oauth_token > env vars
81
  effective_api_key = oauth_token
82
  if effective_api_key or (os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_API_KEY")):
83
+ model: Any | None = None
84
  if effective_api_key:
85
+ # Use selected model or fall back to env var/settings
86
+ model_name = (
87
+ hf_model
88
+ or os.getenv("HF_MODEL")
89
+ or settings.huggingface_model
90
+ or "Qwen/Qwen3-Next-80B-A3B-Thinking"
91
+ )
92
+ if not _HUGGINGFACE_AVAILABLE:
93
+ raise ImportError(
94
+ "HuggingFace models are not available in this version of pydantic-ai. "
95
+ "Please install with: uv add 'pydantic-ai[huggingface]' or use 'openai'/'anthropic' as the LLM provider."
96
+ )
97
+ # Inference API - uses HuggingFace Inference API via AsyncInferenceClient
98
+ # Per https://ai.pydantic.dev/models/huggingface/#configure-the-provider
99
+ # Create AsyncInferenceClient for inference API
100
+ hf_client = AsyncInferenceClient(api_key=effective_api_key) # type: ignore[misc]
101
+ # Pass client to HuggingFaceProvider for inference API usage
102
+ provider = HuggingFaceProvider(hf_client=hf_client) # type: ignore[misc]
103
+ model = HuggingFaceModel(model_name, provider=provider) # type: ignore[misc]
104
  backend_info = "API (HuggingFace OAuth)"
105
  else:
106
  backend_info = "API (Env Config)"
 
109
 
110
  # 3. Free Tier (HuggingFace Inference)
111
  else:
112
+ # Pass OAuth token if available (even if not in env vars)
113
+ # This allows OAuth login to work with free tier models
114
+ # Use selected model and provider if provided
115
+ judge_handler = HFInferenceJudgeHandler(
116
+ model_id=hf_model,
117
+ api_key=oauth_token,
118
+ provider=hf_provider,
119
+ )
120
+ model_display = hf_model.split("/")[-1] if hf_model else "Default"
121
+ provider_display = hf_provider or "auto"
122
+ backend_info = f"Free Tier ({model_display} via {provider_display})" + (
123
+ " (OAuth)" if oauth_token else ""
124
+ )
125
 
126
  orchestrator = create_orchestrator(
127
  search_handler=search_handler,
 
380
  message: str,
381
  history: list[dict[str, Any]],
382
  mode: str = "simple",
383
+ hf_model: str | None = None,
384
+ hf_provider: str | None = None,
385
  request: gr.Request | None = None,
386
  ) -> AsyncGenerator[gr.ChatMessage | list[gr.ChatMessage], None]:
387
  """
 
391
  message: User's research question
392
  history: Chat history (Gradio format)
393
  mode: Orchestrator mode ("simple" or "advanced")
394
+ hf_model: Selected HuggingFace model ID (from dropdown)
395
+ hf_provider: Selected inference provider (from dropdown)
396
  request: Gradio request object containing OAuth information
397
 
398
  Yields:
 
424
  try:
425
  # use_mock=False - let configure_orchestrator decide based on available keys
426
  # It will use: OAuth token > Env vars > HF Inference (free tier)
427
+ # hf_model and hf_provider come from dropdown, so they're guaranteed to be valid
428
  orchestrator, backend_name = configure_orchestrator(
429
  use_mock=False, # Never use mock in production - HF Inference is the free fallback
430
  mode=effective_mode,
431
  oauth_token=oauth_token,
432
+ hf_model=hf_model, # Can be None, will use defaults in configure_orchestrator
433
+ hf_provider=hf_provider, # Can be None, will use defaults in configure_orchestrator
434
  )
435
 
436
  yield gr.ChatMessage(
 
462
  with gr.Row():
463
  gr.LoginButton()
464
 
465
+ # Get initial model/provider lists (no auth by default)
466
+ # Check if user has auth to determine which model list to use
467
+ has_auth = bool(os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_API_KEY"))
468
+
469
+ # Get the appropriate model list based on user's actual auth status
470
+ # CRITICAL: Use the list that matches the user's auth status to avoid mismatches
471
+ if has_auth:
472
+ # User has auth - get models available with auth (includes gated models)
473
+ initial_models = get_available_models(has_auth=True)
474
+ # Fallback to unauthenticated models if auth list is empty (shouldn't happen, but be safe)
475
+ if not initial_models:
476
+ initial_models = get_available_models(has_auth=False)
477
+ else:
478
+ # User doesn't have auth - only get unauthenticated models (ungated only)
479
+ initial_models = get_available_models(has_auth=False)
480
+
481
+ # Extract available model IDs (first element of tuples) - this is what Gradio uses as values
482
+ available_model_ids = [m[0] for m in initial_models] if initial_models else []
483
+
484
+ # Prefer latest reasoning models if available, otherwise use fallback
485
+ preferred_models = [
486
+ "Qwen/Qwen3-Next-80B-A3B-Thinking",
487
+ "Qwen/Qwen3-Next-80B-A3B-Instruct",
488
+ "meta-llama/Llama-3.3-70B-Instruct",
489
+ ]
490
+
491
+ # Find first available preferred model from the actual available models list
492
+ # CRITICAL: Only use models that are actually in available_model_ids
493
+ initial_model_id = None
494
+ for preferred in preferred_models:
495
+ if preferred in available_model_ids:
496
+ initial_model_id = preferred
497
+ break
498
+
499
+ # Fall back to first available model from the actual list
500
+ # CRITICAL: Always use a model that's guaranteed to be in available_model_ids
501
+ if not initial_model_id:
502
+ if available_model_ids:
503
+ initial_model_id = available_model_ids[0] # First model ID from available list
504
+ else:
505
+ # No models available - this shouldn't happen, but handle gracefully
506
+ initial_model_id = None
507
+
508
+ # Final safety check: ensure initial_model_id is actually in the available models
509
+ # This is the last line of defense - if it's not in the list, use the first available
510
+ if initial_model_id and initial_model_id not in available_model_ids:
511
+ if available_model_ids:
512
+ initial_model_id = available_model_ids[0]
513
+ else:
514
+ initial_model_id = None
515
+
516
+ # Get providers for the selected model (only if we have a valid model)
517
+ initial_providers = []
518
+ initial_provider = None
519
+ if initial_model_id:
520
+ initial_providers = get_available_providers(initial_model_id, has_auth=has_auth)
521
+ # Ensure we have a valid provider value that's in the choices
522
+ if initial_providers:
523
+ initial_provider = initial_providers[0][0] # Use first provider's ID
524
+ # Safety check: ensure provider is in the list
525
+ available_provider_ids = [p[0] for p in initial_providers]
526
+ if initial_provider not in available_provider_ids:
527
+ initial_provider = initial_providers[0][0] if initial_providers else None
528
+
529
+ # Create dropdowns for model and provider selection
530
+ # Note: Components can be in a hidden row and still work with ChatInterface additional_inputs
531
+ # The visible=False just hides the row itself, but components are still accessible
532
+ with gr.Row(visible=False):
533
+ mode_radio = gr.Radio(
534
+ choices=["simple", "advanced"],
535
+ value="simple",
536
+ label="Orchestrator Mode",
537
+ info="Simple: Linear | Advanced: Multi-Agent (Requires OpenAI)",
538
+ )
539
+
540
+ # Final validation: ensure value is in choices before creating dropdown
541
+ # Gradio requires the value to be exactly one of the choice values (first element of tuples)
542
+ # CRITICAL: Always default to the first available choice to ensure value is always valid
543
+ # Extract model IDs from choices (first element of each tuple)
544
+ model_ids_in_choices = [m[0] for m in initial_models] if initial_models else []
545
+
546
+ # Determine the model value - must be in model_ids_in_choices
547
+ if initial_models and model_ids_in_choices:
548
+ # First try to use initial_model_id if it's valid
549
+ if initial_model_id and initial_model_id in model_ids_in_choices:
550
+ model_value = initial_model_id
551
+ else:
552
+ # Fallback to first available model - guarantees a valid value
553
+ model_value = model_ids_in_choices[0]
554
+ else:
555
+ # No models available - set to None (empty dropdown)
556
+ model_value = None
557
+
558
+ # Absolute final check: if we have choices but model_value is None or invalid, use first choice
559
+ if initial_models and model_ids_in_choices:
560
+ if not model_value or model_value not in model_ids_in_choices:
561
+ model_value = model_ids_in_choices[0]
562
+
563
+ hf_model_dropdown = gr.Dropdown(
564
+ choices=initial_models if initial_models else [],
565
+ value=model_value, # Always set to a valid value from choices (or None if empty)
566
+ label="πŸ€– Reasoning Model",
567
+ info="Select AI model for evidence assessment. Sign in to access gated models.",
568
+ interactive=True,
569
+ allow_custom_value=False, # Only allow values from choices
570
+ )
571
+
572
+ # Final validation for provider: ensure value is in choices
573
+ # CRITICAL: Always default to the first available choice to ensure value is always valid
574
+ provider_ids_in_choices = [p[0] for p in initial_providers] if initial_providers else []
575
+ provider_value = None
576
+ if initial_providers and provider_ids_in_choices:
577
+ # First try to use the preferred provider if it's available
578
+ if initial_provider and initial_provider in provider_ids_in_choices:
579
+ provider_value = initial_provider
580
+ else:
581
+ # Fallback to first available provider - this ensures we always have a valid value
582
+ provider_value = provider_ids_in_choices[0]
583
+
584
+ # Absolute final check: if we have choices but provider_value is None or invalid, use first choice
585
+ if initial_providers and provider_ids_in_choices:
586
+ if not provider_value or provider_value not in provider_ids_in_choices:
587
+ provider_value = provider_ids_in_choices[0]
588
+
589
+ hf_provider_dropdown = gr.Dropdown(
590
+ choices=initial_providers if initial_providers else [],
591
+ value=provider_value, # Always set to a valid value from choices (or None if empty)
592
+ label="⚑ Inference Provider",
593
+ info="Select provider for model execution. Some require authentication.",
594
+ interactive=True,
595
+ allow_custom_value=False, # Only allow values from choices
596
+ )
597
+
598
+ # Update providers when model changes
599
+ def update_providers(model_id: str, request: gr.Request | None = None) -> gr.Dropdown:
600
+ """Update provider list when model changes."""
601
+ # Check if user is authenticated
602
+ oauth_token, _ = extract_oauth_info(request)
603
+ has_auth = bool(
604
+ oauth_token or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_API_KEY")
605
+ )
606
+
607
+ providers = get_available_providers(model_id, has_auth=has_auth)
608
+ if providers:
609
+ # Always set value to first provider to ensure it's valid
610
+ return gr.Dropdown(choices=providers, value=providers[0][0])
611
+ # If no providers, return empty dropdown with no value
612
+ return gr.Dropdown(choices=[], value=None)
613
+
614
+ hf_model_dropdown.change(
615
+ fn=update_providers,
616
+ inputs=[hf_model_dropdown],
617
+ outputs=[hf_provider_dropdown],
618
+ )
619
+
620
+ # Chat interface with model/provider selection
621
  gr.ChatInterface(
622
  fn=research_agent,
623
  title="🧬 DeepCritical",
 
627
  "---\n"
628
  "*Research tool only β€” not for medical advice.* \n"
629
  "**MCP Server Active**: Connect Claude Desktop to `/gradio_api/mcp/`\n\n"
630
+ "**Sign in with HuggingFace** above to access premium models and providers."
631
  ),
632
  examples=[
633
  ["What drugs could be repurposed for Alzheimer's disease?", "simple"],
 
636
  ],
637
  additional_inputs_accordion=gr.Accordion(label="βš™οΈ Settings", open=False),
638
  additional_inputs=[
639
+ mode_radio,
640
+ hf_model_dropdown,
641
+ hf_provider_dropdown,
 
 
 
 
 
642
  ],
643
  )
644
 
src/legacy_orchestrator.py CHANGED
@@ -101,12 +101,26 @@ class Orchestrator:
101
  return evidence
102
 
103
  try:
104
- # Deduplicate using semantic similarity
105
- unique_evidence: list[Evidence] = await embeddings.deduplicate(evidence, threshold=0.85)
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  logger.info(
107
  "Deduplicated evidence",
108
  before=len(evidence),
109
- after=len(unique_evidence),
 
110
  )
111
  return unique_evidence
112
  except Exception as e:
 
101
  return evidence
102
 
103
  try:
104
+ # First, deduplicate by URL (exact duplicates) from current evidence batch
105
+ # This prevents the same URL from appearing multiple times in one batch
106
+ seen_urls: set[str] = set()
107
+ unique_by_url: list[Evidence] = []
108
+ for e in evidence:
109
+ if e.citation.url not in seen_urls:
110
+ unique_by_url.append(e)
111
+ seen_urls.add(e.citation.url)
112
+
113
+ # Then, deduplicate using semantic similarity with stricter threshold
114
+ # threshold=0.95 means only remove near-identical content (distance < 0.05)
115
+ # This prevents over-filtering while still removing true duplicates
116
+ unique_evidence: list[Evidence] = await embeddings.deduplicate(
117
+ unique_by_url, threshold=0.95
118
+ )
119
  logger.info(
120
  "Deduplicated evidence",
121
  before=len(evidence),
122
+ after_url=len(unique_by_url),
123
+ after_semantic=len(unique_evidence),
124
  )
125
  return unique_evidence
126
  except Exception as e:
src/orchestrator/planner_agent.py CHANGED
@@ -80,9 +80,9 @@ class PlannerAgent:
80
  raise ConfigurationError("crawl_tool must be callable")
81
 
82
  # Initialize Pydantic AI Agent
83
- self.agent = Agent(
84
  model=self.model,
85
- output_type=ReportPlan,
86
  system_prompt=SYSTEM_PROMPT,
87
  tools=[self.web_search_tool, self.crawl_tool],
88
  retries=3,
@@ -136,7 +136,7 @@ class PlannerAgent:
136
  has_background=bool(report_plan.background_context),
137
  )
138
 
139
- return report_plan
140
 
141
  except Exception as e:
142
  self.logger.error("Planning failed", error=str(e), query=query[:100])
 
80
  raise ConfigurationError("crawl_tool must be callable")
81
 
82
  # Initialize Pydantic AI Agent
83
+ self.agent = Agent( # type: ignore[call-overload]
84
  model=self.model,
85
+ result_type=ReportPlan,
86
  system_prompt=SYSTEM_PROMPT,
87
  tools=[self.web_search_tool, self.crawl_tool],
88
  retries=3,
 
136
  has_background=bool(report_plan.background_context),
137
  )
138
 
139
+ return report_plan # type: ignore[no-any-return]
140
 
141
  except Exception as e:
142
  self.logger.error("Planning failed", error=str(e), query=query[:100])
src/services/llamaindex_rag.py CHANGED
@@ -202,7 +202,7 @@ class LlamaIndexRAGService:
202
  def _configure_llm(self, huggingface_llm: Any, openai_llm: Any) -> None:
203
  """Configure LLM for query synthesis."""
204
  if huggingface_llm is not None and (settings.hf_token or settings.huggingface_api_key):
205
- model_name = settings.huggingface_model or "meta-llama/Llama-3.1-8B-Instruct"
206
  token = settings.hf_token or settings.huggingface_api_key
207
 
208
  # Check if it's HuggingFaceInferenceAPI (API-based) or HuggingFaceLLM (local)
 
202
  def _configure_llm(self, huggingface_llm: Any, openai_llm: Any) -> None:
203
  """Configure LLM for query synthesis."""
204
  if huggingface_llm is not None and (settings.hf_token or settings.huggingface_api_key):
205
+ model_name = settings.huggingface_model or "Qwen/Qwen3-Next-80B-A3B-Thinking"
206
  token = settings.hf_token or settings.huggingface_api_key
207
 
208
  # Check if it's HuggingFaceInferenceAPI (API-based) or HuggingFaceLLM (local)
src/services/statistical_analyzer.py CHANGED
@@ -71,9 +71,9 @@ class StatisticalAnalyzer:
71
  """Lazy initialization of LLM agent for code generation."""
72
  if self._agent is None:
73
  library_versions = get_sandbox_library_prompt()
74
- self._agent = Agent(
75
  model=get_model(),
76
- output_type=str,
77
  system_prompt=f"""You are a biomedical data scientist.
78
 
79
  Generate Python code to analyze research evidence and test hypotheses.
 
71
  """Lazy initialization of LLM agent for code generation."""
72
  if self._agent is None:
73
  library_versions = get_sandbox_library_prompt()
74
+ self._agent = Agent( # type: ignore[call-overload]
75
  model=get_model(),
76
+ result_type=str,
77
  system_prompt=f"""You are a biomedical data scientist.
78
 
79
  Generate Python code to analyze research evidence and test hypotheses.
src/utils/config.py CHANGED
@@ -41,8 +41,9 @@ class Settings(BaseSettings):
41
  description="OpenAI embedding model (used by LlamaIndex RAG)",
42
  )
43
  local_embedding_model: str = Field(
44
- default="all-MiniLM-L6-v2",
45
- description="Local sentence-transformers model (used by EmbeddingService)",
 
46
  )
47
  embedding_provider: Literal["openai", "local", "huggingface"] = Field(
48
  default="local",
@@ -58,8 +59,15 @@ class Settings(BaseSettings):
58
  default=None, description="HuggingFace API token (HF_TOKEN or HUGGINGFACE_API_KEY)"
59
  )
60
  huggingface_model: str = Field(
61
- default="meta-llama/Llama-3.1-8B-Instruct",
62
- description="Default HuggingFace model ID for inference",
 
 
 
 
 
 
 
63
  )
64
 
65
  # PubMed Configuration
 
41
  description="OpenAI embedding model (used by LlamaIndex RAG)",
42
  )
43
  local_embedding_model: str = Field(
44
+ default="BAAI/bge-small-en-v1.5",
45
+ description="Local sentence-transformers model (used by EmbeddingService). "
46
+ "BAAI/bge-small-en-v1.5 is newer, faster, and better than all-MiniLM-L6-v2.",
47
  )
48
  embedding_provider: Literal["openai", "local", "huggingface"] = Field(
49
  default="local",
 
59
  default=None, description="HuggingFace API token (HF_TOKEN or HUGGINGFACE_API_KEY)"
60
  )
61
  huggingface_model: str = Field(
62
+ default="Qwen/Qwen3-Next-80B-A3B-Thinking",
63
+ description="Default HuggingFace model ID for inference (gated, requires auth). "
64
+ "Latest reasoning model with advanced thinking capabilities.",
65
+ )
66
+ huggingface_fallback_models: str = Field(
67
+ default="Qwen/Qwen3-Next-80B-A3B-Thinking,Qwen/Qwen3-Next-80B-A3B-Instruct,meta-llama/Llama-3.3-70B-Instruct,meta-llama/Llama-3.1-8B-Instruct,HuggingFaceH4/zephyr-7b-beta,Qwen/Qwen2-7B-Instruct",
68
+ description="Comma-separated list of fallback HuggingFace models for inference API. "
69
+ "Models are tried in order until one succeeds. "
70
+ "Default: Latest reasoning models (Qwen3-Next, Llama-3.3) followed by reliable fallbacks.",
71
  )
72
 
73
  # PubMed Configuration
src/utils/huggingface_chat_client.py CHANGED
@@ -28,14 +28,14 @@ class HuggingFaceChatClient:
28
 
29
  def __init__(
30
  self,
31
- model_name: str = "meta-llama/Llama-3.1-8B-Instruct",
32
  api_key: str | None = None,
33
  provider: str = "auto",
34
  ) -> None:
35
  """Initialize HuggingFace chat client.
36
 
37
  Args:
38
- model_name: HuggingFace model identifier (e.g., "meta-llama/Llama-3.1-8B-Instruct")
39
  api_key: Optional HF_TOKEN for gated models. If None, uses environment token.
40
  provider: Provider name or "auto" for automatic selection.
41
  Options: "auto", "cerebras", "together", "sambanova", etc.
 
28
 
29
  def __init__(
30
  self,
31
+ model_name: str = "Qwen/Qwen3-Next-80B-A3B-Thinking",
32
  api_key: str | None = None,
33
  provider: str = "auto",
34
  ) -> None:
35
  """Initialize HuggingFace chat client.
36
 
37
  Args:
38
+ model_name: HuggingFace model identifier (e.g., "Qwen/Qwen3-Next-80B-A3B-Thinking")
39
  api_key: Optional HF_TOKEN for gated models. If None, uses environment token.
40
  provider: Provider name or "auto" for automatic selection.
41
  Options: "auto", "cerebras", "together", "sambanova", etc.
src/utils/inference_models.py ADDED
@@ -0,0 +1,627 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Configuration for HuggingFace Inference Providers models.
2
+
3
+ Based on: https://huggingface.co/inference/models
4
+
5
+ This module provides model and provider configurations with verification
6
+ capabilities to ensure models are actually available on selected providers.
7
+ """
8
+
9
+ from typing import TypedDict
10
+
11
+
12
+ class ModelProvider(TypedDict):
13
+ """Provider information for a model."""
14
+
15
+ name: str
16
+ input_cost: float | None # $/1M tokens
17
+ output_cost: float | None # $/1M tokens
18
+ latency: float | None # seconds
19
+ throughput: float | None # tokens/second
20
+ supports_tools: bool
21
+ supports_structured: bool
22
+ requires_auth: bool # Whether this provider requires authentication
23
+
24
+
25
+ class InferenceModel(TypedDict):
26
+ """Model configuration with available providers."""
27
+
28
+ model_id: str
29
+ display_name: str
30
+ providers: dict[str, ModelProvider]
31
+ requires_auth: bool # Whether the model itself requires authentication (gated)
32
+ description: str
33
+
34
+
35
+ # Latest Reasoning Models from https://huggingface.co/inference/models
36
+ # Updated with latest reasoning models (Qwen3-Next, Qwen3-235B, Llama-3.3, etc.)
37
+ INFERENCE_MODELS: dict[str, InferenceModel] = {
38
+ # Top-tier reasoning models (latest)
39
+ "Qwen/Qwen3-Next-80B-A3B-Thinking": {
40
+ "model_id": "Qwen/Qwen3-Next-80B-A3B-Thinking",
41
+ "display_name": "Qwen3-Next-80B-A3B-Thinking",
42
+ "requires_auth": True, # Gated
43
+ "description": "Qwen's latest reasoning model - Advanced thinking capabilities, 262K context",
44
+ "providers": {
45
+ "together": {
46
+ "name": "together",
47
+ "input_cost": 0.15,
48
+ "output_cost": 1.5,
49
+ "latency": 0.48,
50
+ "throughput": 202.0,
51
+ "supports_tools": True,
52
+ "supports_structured": True,
53
+ "requires_auth": True,
54
+ },
55
+ "together-fastest": {
56
+ "name": "together-fastest",
57
+ "input_cost": 0.15,
58
+ "output_cost": 1.5,
59
+ "latency": 0.48,
60
+ "throughput": 202.0,
61
+ "supports_tools": True,
62
+ "supports_structured": True,
63
+ "requires_auth": True,
64
+ },
65
+ },
66
+ },
67
+ "Qwen/Qwen3-Next-80B-A3B-Instruct": {
68
+ "model_id": "Qwen/Qwen3-Next-80B-A3B-Instruct",
69
+ "display_name": "Qwen3-Next-80B-A3B-Instruct",
70
+ "requires_auth": True, # Gated
71
+ "description": "Qwen's latest instruction model - High performance, 262K context",
72
+ "providers": {
73
+ "together": {
74
+ "name": "together",
75
+ "input_cost": 0.15,
76
+ "output_cost": 1.5,
77
+ "latency": 0.60,
78
+ "throughput": 153.0,
79
+ "supports_tools": True,
80
+ "supports_structured": True,
81
+ "requires_auth": True,
82
+ },
83
+ "together-fastest": {
84
+ "name": "together-fastest",
85
+ "input_cost": 0.15,
86
+ "output_cost": 1.5,
87
+ "latency": 0.60,
88
+ "throughput": 153.0,
89
+ "supports_tools": True,
90
+ "supports_structured": True,
91
+ "requires_auth": True,
92
+ },
93
+ },
94
+ },
95
+ "Qwen/Qwen3-235B-A22B-Instruct-2507": {
96
+ "model_id": "Qwen/Qwen3-235B-A22B-Instruct-2507",
97
+ "display_name": "Qwen3-235B-A22B-Instruct",
98
+ "requires_auth": True, # Gated
99
+ "description": "Qwen's massive 235B model - Ultra-high performance, 262K context",
100
+ "providers": {
101
+ "cerebras": {
102
+ "name": "cerebras",
103
+ "input_cost": 0.6,
104
+ "output_cost": 1.2,
105
+ "latency": 0.23,
106
+ "throughput": 509.0,
107
+ "supports_tools": True,
108
+ "supports_structured": False,
109
+ "requires_auth": True,
110
+ },
111
+ "cerebras-fastest": {
112
+ "name": "cerebras-fastest",
113
+ "input_cost": 0.6,
114
+ "output_cost": 1.2,
115
+ "latency": 0.23,
116
+ "throughput": 509.0,
117
+ "supports_tools": True,
118
+ "supports_structured": False,
119
+ "requires_auth": True,
120
+ },
121
+ "together": {
122
+ "name": "together",
123
+ "input_cost": 0.2,
124
+ "output_cost": 0.6,
125
+ "latency": 0.39,
126
+ "throughput": 42.0,
127
+ "supports_tools": True,
128
+ "supports_structured": True,
129
+ "requires_auth": True,
130
+ },
131
+ },
132
+ },
133
+ "Qwen/Qwen3-235B-A22B-Thinking-2507": {
134
+ "model_id": "Qwen/Qwen3-235B-A22B-Thinking-2507",
135
+ "display_name": "Qwen3-235B-A22B-Thinking",
136
+ "requires_auth": True, # Gated
137
+ "description": "Qwen's massive 235B reasoning model - Advanced thinking, 262K context",
138
+ "providers": {
139
+ "cerebras": {
140
+ "name": "cerebras",
141
+ "input_cost": None,
142
+ "output_cost": None,
143
+ "latency": None,
144
+ "throughput": None,
145
+ "supports_tools": False,
146
+ "supports_structured": False,
147
+ "requires_auth": True,
148
+ },
149
+ },
150
+ },
151
+ "meta-llama/Llama-3.3-70B-Instruct": {
152
+ "model_id": "meta-llama/Llama-3.3-70B-Instruct",
153
+ "display_name": "Llama 3.3 70B Instruct",
154
+ "requires_auth": True, # Gated
155
+ "description": "Meta's latest Llama 3.3 - High performance, tools support",
156
+ "providers": {
157
+ "cerebras": {
158
+ "name": "cerebras",
159
+ "input_cost": 0.85,
160
+ "output_cost": 1.2,
161
+ "latency": 0.35,
162
+ "throughput": 948.0,
163
+ "supports_tools": True,
164
+ "supports_structured": False,
165
+ "requires_auth": True,
166
+ },
167
+ "cerebras-fastest": {
168
+ "name": "cerebras-fastest",
169
+ "input_cost": 0.85,
170
+ "output_cost": 1.2,
171
+ "latency": 0.35,
172
+ "throughput": 948.0,
173
+ "supports_tools": True,
174
+ "supports_structured": False,
175
+ "requires_auth": True,
176
+ },
177
+ },
178
+ },
179
+ "openai/gpt-oss-120b": {
180
+ "model_id": "openai/gpt-oss-120b",
181
+ "display_name": "GPT-OSS-120B",
182
+ "requires_auth": True, # Gated
183
+ "description": "OpenAI's open-source 120B model - Ultra-fast inference",
184
+ "providers": {
185
+ "cerebras": {
186
+ "name": "cerebras",
187
+ "input_cost": 0.25,
188
+ "output_cost": 0.69,
189
+ "latency": 0.23,
190
+ "throughput": 1051.0,
191
+ "supports_tools": True,
192
+ "supports_structured": False,
193
+ "requires_auth": True,
194
+ },
195
+ "cerebras-fastest": {
196
+ "name": "cerebras-fastest",
197
+ "input_cost": 0.25,
198
+ "output_cost": 0.69,
199
+ "latency": 0.23,
200
+ "throughput": 1051.0,
201
+ "supports_tools": True,
202
+ "supports_structured": False,
203
+ "requires_auth": True,
204
+ },
205
+ },
206
+ },
207
+ "CohereLabs/command-a-reasoning-08-2025": {
208
+ "model_id": "CohereLabs/command-a-reasoning-08-2025",
209
+ "display_name": "Command A Reasoning 08-2025",
210
+ "requires_auth": True, # Gated
211
+ "description": "Cohere's latest reasoning model - Specialized for reasoning tasks",
212
+ "providers": {
213
+ "cohere": {
214
+ "name": "cohere",
215
+ "input_cost": None,
216
+ "output_cost": None,
217
+ "latency": 0.18,
218
+ "throughput": 94.0,
219
+ "supports_tools": True,
220
+ "supports_structured": False,
221
+ "requires_auth": True,
222
+ },
223
+ },
224
+ },
225
+ "zai-org/GLM-4.6": {
226
+ "model_id": "zai-org/GLM-4.6",
227
+ "display_name": "GLM-4.6",
228
+ "requires_auth": True, # Gated
229
+ "description": "ZAI's GLM-4.6 - High performance reasoning model",
230
+ "providers": {
231
+ "cerebras": {
232
+ "name": "cerebras",
233
+ "input_cost": None,
234
+ "output_cost": None,
235
+ "latency": 0.27,
236
+ "throughput": 381.0,
237
+ "supports_tools": True,
238
+ "supports_structured": False,
239
+ "requires_auth": True,
240
+ },
241
+ "cerebras-fastest": {
242
+ "name": "cerebras-fastest",
243
+ "input_cost": None,
244
+ "output_cost": None,
245
+ "latency": 0.27,
246
+ "throughput": 381.0,
247
+ "supports_tools": True,
248
+ "supports_structured": False,
249
+ "requires_auth": True,
250
+ },
251
+ "zai-org": {
252
+ "name": "zai-org",
253
+ "input_cost": None,
254
+ "output_cost": None,
255
+ "latency": 3.08,
256
+ "throughput": 54.0,
257
+ "supports_tools": True,
258
+ "supports_structured": False,
259
+ "requires_auth": True,
260
+ },
261
+ },
262
+ },
263
+ "meta-llama/Llama-3.1-8B-Instruct": {
264
+ "model_id": "meta-llama/Llama-3.1-8B-Instruct",
265
+ "display_name": "Llama 3.1 8B Instruct",
266
+ "requires_auth": True, # Gated
267
+ "description": "Meta's Llama 3.1 8B - Fast, efficient reasoning",
268
+ "providers": {
269
+ "novita": {
270
+ "name": "novita",
271
+ "input_cost": 0.02,
272
+ "output_cost": 0.05,
273
+ "latency": 0.64,
274
+ "throughput": 84.0,
275
+ "supports_tools": False,
276
+ "supports_structured": False,
277
+ "requires_auth": True,
278
+ },
279
+ "nebius": {
280
+ "name": "nebius",
281
+ "input_cost": 0.03,
282
+ "output_cost": 0.09,
283
+ "latency": 0.35,
284
+ "throughput": 194.0,
285
+ "supports_tools": False,
286
+ "supports_structured": True,
287
+ "requires_auth": True,
288
+ },
289
+ "cerebras": {
290
+ "name": "cerebras",
291
+ "input_cost": 0.1,
292
+ "output_cost": 0.1,
293
+ "latency": 0.33,
294
+ "throughput": 1148.0,
295
+ "supports_tools": False,
296
+ "supports_structured": False,
297
+ "requires_auth": True,
298
+ },
299
+ "sambanova": {
300
+ "name": "sambanova",
301
+ "input_cost": 0.1,
302
+ "output_cost": 0.2,
303
+ "latency": 0.85,
304
+ "throughput": 527.0,
305
+ "supports_tools": True,
306
+ "supports_structured": True,
307
+ "requires_auth": True,
308
+ },
309
+ },
310
+ },
311
+ "deepseek-ai/DeepSeek-R1-Distill-Llama-70B": {
312
+ "model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
313
+ "display_name": "DeepSeek R1 Distill Llama 70B",
314
+ "requires_auth": True, # Gated
315
+ "description": "DeepSeek's reasoning model - Advanced chain-of-thought",
316
+ "providers": {
317
+ "novita": {
318
+ "name": "novita",
319
+ "input_cost": 0.64,
320
+ "output_cost": 0.64,
321
+ "latency": 1.21,
322
+ "throughput": 31.0,
323
+ "supports_tools": False,
324
+ "supports_structured": False,
325
+ "requires_auth": True,
326
+ },
327
+ "sambanova": {
328
+ "name": "sambanova",
329
+ "input_cost": 0.7,
330
+ "output_cost": 1.4,
331
+ "latency": 2.67,
332
+ "throughput": 158.0,
333
+ "supports_tools": False,
334
+ "supports_structured": False,
335
+ "requires_auth": True,
336
+ },
337
+ "nscale": {
338
+ "name": "nscale",
339
+ "input_cost": 0.75,
340
+ "output_cost": 0.75,
341
+ "latency": 1.24,
342
+ "throughput": 16.0,
343
+ "supports_tools": False,
344
+ "supports_structured": False,
345
+ "requires_auth": True,
346
+ },
347
+ },
348
+ },
349
+ "moonshotai/Kimi-K2-Thinking": {
350
+ "model_id": "moonshotai/Kimi-K2-Thinking",
351
+ "display_name": "Kimi K2 Thinking",
352
+ "requires_auth": True, # Gated
353
+ "description": "Moonshot AI's thinking model - Long context reasoning",
354
+ "providers": {
355
+ "novita": {
356
+ "name": "novita",
357
+ "input_cost": 0.48,
358
+ "output_cost": 2.0,
359
+ "latency": 1.60,
360
+ "throughput": 16.0,
361
+ "supports_tools": True,
362
+ "supports_structured": False,
363
+ "requires_auth": True,
364
+ },
365
+ "nebius": {
366
+ "name": "nebius",
367
+ "input_cost": 0.6,
368
+ "output_cost": 2.5,
369
+ "latency": 0.34,
370
+ "throughput": 87.0,
371
+ "supports_tools": True,
372
+ "supports_structured": True,
373
+ "requires_auth": True,
374
+ },
375
+ "together": {
376
+ "name": "together",
377
+ "input_cost": 1.2,
378
+ "output_cost": 4.0,
379
+ "latency": 0.86,
380
+ "throughput": 97.0,
381
+ "supports_tools": True,
382
+ "supports_structured": True,
383
+ "requires_auth": True,
384
+ },
385
+ },
386
+ },
387
+ "allenai/Olmo-3-7B-Instruct": {
388
+ "model_id": "allenai/Olmo-3-7B-Instruct",
389
+ "display_name": "Olmo 3 7B Instruct",
390
+ "requires_auth": False, # Ungated
391
+ "description": "AllenAI's open model - Good reasoning, no auth needed",
392
+ "providers": {
393
+ "publicai": {
394
+ "name": "publicai",
395
+ "input_cost": None,
396
+ "output_cost": None,
397
+ "latency": 1.78,
398
+ "throughput": 36.0,
399
+ "supports_tools": True,
400
+ "supports_structured": True,
401
+ "requires_auth": False,
402
+ },
403
+ },
404
+ },
405
+ "Qwen/Qwen2-7B-Instruct": {
406
+ "model_id": "Qwen/Qwen2-7B-Instruct",
407
+ "display_name": "Qwen2 7B Instruct",
408
+ "requires_auth": False, # Ungated
409
+ "description": "Qwen's efficient model - Fast, no authentication",
410
+ "providers": {
411
+ "featherless-ai": {
412
+ "name": "featherless-ai",
413
+ "input_cost": None,
414
+ "output_cost": None,
415
+ "latency": None,
416
+ "throughput": None,
417
+ "supports_tools": False,
418
+ "supports_structured": False,
419
+ "requires_auth": False,
420
+ },
421
+ },
422
+ },
423
+ "HuggingFaceH4/zephyr-7b-beta": {
424
+ "model_id": "HuggingFaceH4/zephyr-7b-beta",
425
+ "display_name": "Zephyr 7B Beta",
426
+ "requires_auth": False, # Ungated
427
+ "description": "HuggingFace's fine-tuned model - Free tier friendly",
428
+ "providers": {
429
+ "featherless-ai": {
430
+ "name": "featherless-ai",
431
+ "input_cost": None,
432
+ "output_cost": None,
433
+ "latency": None,
434
+ "throughput": None,
435
+ "supports_tools": False,
436
+ "supports_structured": False,
437
+ "requires_auth": False,
438
+ },
439
+ },
440
+ },
441
+ "google/gemma-2-2b-it": {
442
+ "model_id": "google/gemma-2-2b-it",
443
+ "display_name": "Gemma 2 2B IT",
444
+ "requires_auth": True, # Gated
445
+ "description": "Google's compact model - Small but capable",
446
+ "providers": {
447
+ "nebius": {
448
+ "name": "nebius",
449
+ "input_cost": None,
450
+ "output_cost": None,
451
+ "latency": None,
452
+ "throughput": None,
453
+ "supports_tools": False,
454
+ "supports_structured": False,
455
+ "requires_auth": True,
456
+ },
457
+ },
458
+ },
459
+ "microsoft/Phi-3-mini-4k-instruct": {
460
+ "model_id": "microsoft/Phi-3-mini-4k-instruct",
461
+ "display_name": "Phi-3 Mini 4K Instruct",
462
+ "requires_auth": False, # Ungated
463
+ "description": "Microsoft's efficient model - Fast inference",
464
+ "providers": {
465
+ "featherless-ai": {
466
+ "name": "featherless-ai",
467
+ "input_cost": None,
468
+ "output_cost": None,
469
+ "latency": None,
470
+ "throughput": None,
471
+ "supports_tools": False,
472
+ "supports_structured": False,
473
+ "requires_auth": False,
474
+ },
475
+ },
476
+ },
477
+ }
478
+
479
+
480
+ def get_available_models(has_auth: bool = False) -> list[tuple[str, str]]:
481
+ """
482
+ Get list of available models based on authentication status.
483
+
484
+ Args:
485
+ has_auth: Whether user has authentication (OAuth or HF_TOKEN)
486
+
487
+ Returns:
488
+ List of (model_id, display_name) tuples for dropdown
489
+ """
490
+ models = []
491
+ for model_id, model_info in INFERENCE_MODELS.items():
492
+ # If no auth, only show ungated models
493
+ if not has_auth and model_info["requires_auth"]:
494
+ continue
495
+ models.append((model_id, model_info["display_name"]))
496
+ return models
497
+
498
+
499
+ def get_available_providers(model_id: str, has_auth: bool = False) -> list[tuple[str, str]]:
500
+ """
501
+ Get list of available providers for a model based on authentication.
502
+
503
+ This is a convenience wrapper around get_available_providers_verified
504
+ that doesn't perform async verification.
505
+
506
+ Args:
507
+ model_id: The model ID
508
+ has_auth: Whether user has authentication
509
+
510
+ Returns:
511
+ List of (provider_name, display_name) tuples for dropdown
512
+ """
513
+ return get_available_providers_verified(model_id, has_auth=has_auth, verify=False)
514
+
515
+
516
+ def get_model_info(model_id: str) -> InferenceModel | None:
517
+ """Get model information."""
518
+ return INFERENCE_MODELS.get(model_id)
519
+
520
+
521
+ def get_provider_info(model_id: str, provider_name: str) -> ModelProvider | None:
522
+ """Get provider information for a model."""
523
+ model = INFERENCE_MODELS.get(model_id)
524
+ if not model:
525
+ return None
526
+ return model["providers"].get(provider_name)
527
+
528
+
529
+ def verify_provider_availability(
530
+ model_id: str,
531
+ provider_name: str,
532
+ ) -> bool:
533
+ """
534
+ Verify that a model is available on the specified provider (static check).
535
+
536
+ This function checks the static configuration to see if a provider
537
+ is listed for the model. For dynamic verification via API calls,
538
+ use verify_provider_availability_async().
539
+
540
+ Args:
541
+ model_id: The model ID to verify
542
+ provider_name: The provider name to verify
543
+
544
+ Returns:
545
+ True if the model is configured for the provider, False otherwise
546
+ """
547
+ model_config = INFERENCE_MODELS.get(model_id)
548
+ if not model_config:
549
+ return False
550
+ providers = model_config.get("providers", {})
551
+ return provider_name in providers
552
+
553
+
554
+ async def verify_provider_availability_async(
555
+ model_id: str,
556
+ provider_name: str,
557
+ api_key: str | None = None,
558
+ ) -> bool:
559
+ """
560
+ Verify that a model is actually available on the specified provider via API.
561
+
562
+ This function attempts to check if the model/provider combination is valid
563
+ by making a lightweight API call to the HuggingFace Inference API.
564
+
565
+ Note: This is an async function and should be called from an async context.
566
+ For synchronous checks, use verify_provider_availability().
567
+
568
+ Args:
569
+ model_id: The model ID to verify
570
+ provider_name: The provider name to verify
571
+ api_key: Optional API key for authentication (uses env vars if not provided)
572
+
573
+ Returns:
574
+ True if the model is available on the provider, False otherwise
575
+ """
576
+ # For now, fall back to static check
577
+ # TODO: Implement actual API verification when needed
578
+ return verify_provider_availability(model_id, provider_name)
579
+
580
+
581
+ def get_available_providers_verified(
582
+ model_id: str,
583
+ has_auth: bool = False,
584
+ api_key: str | None = None,
585
+ verify: bool = False,
586
+ ) -> list[tuple[str, str]]:
587
+ """
588
+ Get list of available providers for a model with optional verification.
589
+
590
+ Args:
591
+ model_id: The model ID
592
+ has_auth: Whether user has authentication
593
+ api_key: Optional API key for verification
594
+ verify: Whether to verify provider availability (async, requires api_key)
595
+
596
+ Returns:
597
+ List of (provider_name, display_name) tuples for dropdown
598
+ """
599
+ if model_id not in INFERENCE_MODELS:
600
+ return []
601
+
602
+ model = INFERENCE_MODELS[model_id]
603
+ providers = []
604
+
605
+ for provider_name, provider_info in model["providers"].items():
606
+ # If no auth, only show providers that don't require auth
607
+ if not has_auth and provider_info["requires_auth"]:
608
+ continue
609
+
610
+ # Create display name with cost/latency info
611
+ display_parts = [provider_name]
612
+ if provider_info["latency"]:
613
+ display_parts.append(f"{provider_info['latency']:.2f}s")
614
+ if provider_info["input_cost"]:
615
+ display_parts.append(f"${provider_info['input_cost']}/1M")
616
+ if provider_info["supports_tools"]:
617
+ display_parts.append("πŸ”§")
618
+ if provider_info["supports_structured"]:
619
+ display_parts.append("πŸ“Š")
620
+ display_name = " | ".join(display_parts)
621
+
622
+ providers.append((provider_name, display_name))
623
+
624
+ # Note: If verify=True, this should be called from an async context
625
+ # For now, we return static providers. Async verification can be done separately.
626
+
627
+ return providers