Spaces:
Running
Running
Joseph Pollack
commited on
adds or improves : interface , tests, docs , ci , precommit , build , and demo
Browse filesThis view is limited to 50 files because it contains too many changes. Β
See raw diff
- .env copy.example +124 -0
- .github/README.md +3 -145
- .github/workflows/ci.yml +14 -7
- .github/workflows/docs.yml +6 -3
- .gitignore +2 -0
- .pre-commit-hooks/run_pytest_with_sync.py +109 -3
- README.md +3 -2
- docs/api/agents.md +3 -0
- docs/api/models.md +3 -0
- docs/api/orchestrators.md +3 -0
- docs/api/services.md +3 -0
- docs/api/tools.md +3 -0
- docs/architecture/agents.md +3 -0
- docs/architecture/middleware.md +3 -0
- docs/architecture/services.md +3 -0
- docs/architecture/tools.md +3 -0
- docs/contributing/code-quality.md +3 -0
- docs/contributing/code-style.md +3 -0
- docs/contributing/error-handling.md +3 -0
- docs/contributing/implementation-patterns.md +3 -0
- docs/contributing/index.md +3 -0
- docs/contributing/prompt-engineering.md +3 -0
- docs/contributing/testing.md +3 -0
- docs/getting-started/examples.md +3 -0
- docs/getting-started/installation.md +3 -0
- docs/getting-started/mcp-integration.md +3 -0
- docs/getting-started/quick-start.md +3 -0
- docs/license.md +3 -0
- docs/overview/architecture.md +3 -0
- docs/overview/features.md +3 -0
- docs/team.md +3 -0
- mkdocs.yml +2 -2
- pyproject.toml +50 -41
- requirements.txt +1 -1
- src/agent_factory/judges.py +123 -37
- src/agents/hypothesis_agent.py +2 -2
- src/agents/input_parser.py +3 -3
- src/agents/judge_agent_llm.py +3 -3
- src/agents/knowledge_gap.py +3 -3
- src/agents/long_writer.py +3 -3
- src/agents/report_agent.py +2 -2
- src/agents/tool_selector.py +3 -3
- src/app.py +223 -18
- src/legacy_orchestrator.py +17 -3
- src/orchestrator/planner_agent.py +3 -3
- src/services/llamaindex_rag.py +1 -1
- src/services/statistical_analyzer.py +2 -2
- src/utils/config.py +12 -4
- src/utils/huggingface_chat_client.py +2 -2
- src/utils/inference_models.py +627 -0
.env copy.example
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ============== LLM CONFIGURATION ==============
|
| 2 |
+
|
| 3 |
+
# Provider: "openai", "anthropic", or "huggingface"
|
| 4 |
+
LLM_PROVIDER=openai
|
| 5 |
+
|
| 6 |
+
# API Keys (at least one required for full LLM analysis)
|
| 7 |
+
OPENAI_API_KEY=sk-your-key-here
|
| 8 |
+
ANTHROPIC_API_KEY=sk-ant-your-key-here
|
| 9 |
+
|
| 10 |
+
# Model names (optional - sensible defaults set in config.py)
|
| 11 |
+
# OPENAI_MODEL=gpt-5.1
|
| 12 |
+
# ANTHROPIC_MODEL=claude-sonnet-4-5-20250929
|
| 13 |
+
|
| 14 |
+
# ============== HUGGINGFACE CONFIGURATION ==============
|
| 15 |
+
|
| 16 |
+
# HuggingFace Token - enables gated models and higher rate limits
|
| 17 |
+
# Get yours at: https://huggingface.co/settings/tokens
|
| 18 |
+
#
|
| 19 |
+
# WITHOUT HF_TOKEN: Falls back to ungated models (zephyr-7b-beta, Qwen2-7B)
|
| 20 |
+
# WITH HF_TOKEN: Uses gated models (Llama 3.1, Gemma-2) via inference providers
|
| 21 |
+
#
|
| 22 |
+
# For HuggingFace Spaces deployment:
|
| 23 |
+
# Set this as a "Secret" in Space Settings -> Variables and secrets
|
| 24 |
+
# Users/judges don't need their own token - the Space secret is used
|
| 25 |
+
#
|
| 26 |
+
HF_TOKEN=hf_your-token-here
|
| 27 |
+
# Alternative: HUGGINGFACE_API_KEY (same as HF_TOKEN)
|
| 28 |
+
|
| 29 |
+
# Default HuggingFace model for inference (gated, requires auth)
|
| 30 |
+
# Can be overridden in UI dropdown
|
| 31 |
+
# Latest reasoning models: Qwen3-Next-80B-A3B-Thinking, Qwen3-Next-80B-A3B-Instruct, Llama-3.3-70B-Instruct
|
| 32 |
+
HUGGINGFACE_MODEL=Qwen/Qwen3-Next-80B-A3B-Thinking
|
| 33 |
+
|
| 34 |
+
# Fallback models for HuggingFace Inference API (comma-separated)
|
| 35 |
+
# Models are tried in order until one succeeds
|
| 36 |
+
# Format: model1,model2,model3
|
| 37 |
+
# Latest reasoning models first, then reliable fallbacks
|
| 38 |
+
# Reasoning models: Qwen3-Next (thinking/instruct), Llama-3.3-70B, Qwen3-235B
|
| 39 |
+
# Fallbacks: Llama-3.1-8B, Zephyr-7B (ungated), Qwen2-7B (ungated)
|
| 40 |
+
HF_FALLBACK_MODELS=Qwen/Qwen3-Next-80B-A3B-Thinking,Qwen/Qwen3-Next-80B-A3B-Instruct,meta-llama/Llama-3.3-70B-Instruct,meta-llama/Llama-3.1-8B-Instruct,HuggingFaceH4/zephyr-7b-beta,Qwen/Qwen2-7B-Instruct
|
| 41 |
+
|
| 42 |
+
# Override model/provider selection (optional, usually set via UI)
|
| 43 |
+
# HF_MODEL=Qwen/Qwen3-Next-80B-A3B-Thinking
|
| 44 |
+
# HF_PROVIDER=hyperbolic
|
| 45 |
+
|
| 46 |
+
# ============== EMBEDDING CONFIGURATION ==============
|
| 47 |
+
|
| 48 |
+
# Embedding Provider: "openai", "local", or "huggingface"
|
| 49 |
+
# Default: "local" (no API key required)
|
| 50 |
+
EMBEDDING_PROVIDER=local
|
| 51 |
+
|
| 52 |
+
# OpenAI Embedding Model (used if EMBEDDING_PROVIDER=openai)
|
| 53 |
+
OPENAI_EMBEDDING_MODEL=text-embedding-3-small
|
| 54 |
+
|
| 55 |
+
# Local Embedding Model (sentence-transformers, used if EMBEDDING_PROVIDER=local)
|
| 56 |
+
# BAAI/bge-small-en-v1.5 is newer, faster, and better than all-MiniLM-L6-v2
|
| 57 |
+
LOCAL_EMBEDDING_MODEL=BAAI/bge-small-en-v1.5
|
| 58 |
+
|
| 59 |
+
# HuggingFace Embedding Model (used if EMBEDDING_PROVIDER=huggingface)
|
| 60 |
+
HUGGINGFACE_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
|
| 61 |
+
|
| 62 |
+
# ============== AGENT CONFIGURATION ==============
|
| 63 |
+
|
| 64 |
+
MAX_ITERATIONS=10
|
| 65 |
+
SEARCH_TIMEOUT=30
|
| 66 |
+
LOG_LEVEL=INFO
|
| 67 |
+
|
| 68 |
+
# Graph-based execution (experimental)
|
| 69 |
+
# USE_GRAPH_EXECUTION=false
|
| 70 |
+
|
| 71 |
+
# Budget & Rate Limiting
|
| 72 |
+
# DEFAULT_TOKEN_LIMIT=100000
|
| 73 |
+
# DEFAULT_TIME_LIMIT_MINUTES=10
|
| 74 |
+
# DEFAULT_ITERATIONS_LIMIT=10
|
| 75 |
+
|
| 76 |
+
# ============== WEB SEARCH CONFIGURATION ==============
|
| 77 |
+
|
| 78 |
+
# Web Search Provider: "serper", "searchxng", "brave", "tavily", or "duckduckgo"
|
| 79 |
+
# Default: "duckduckgo" (no API key required)
|
| 80 |
+
WEB_SEARCH_PROVIDER=duckduckgo
|
| 81 |
+
|
| 82 |
+
# Serper API Key (for Google search via Serper)
|
| 83 |
+
# SERPER_API_KEY=your-serper-key-here
|
| 84 |
+
|
| 85 |
+
# SearchXNG Host URL (for self-hosted search)
|
| 86 |
+
# SEARCHXNG_HOST=http://localhost:8080
|
| 87 |
+
|
| 88 |
+
# Brave Search API Key
|
| 89 |
+
# BRAVE_API_KEY=your-brave-key-here
|
| 90 |
+
|
| 91 |
+
# Tavily API Key
|
| 92 |
+
# TAVILY_API_KEY=your-tavily-key-here
|
| 93 |
+
|
| 94 |
+
# ============== EXTERNAL SERVICES ==============
|
| 95 |
+
|
| 96 |
+
# PubMed (optional - higher rate limits: 10 req/sec vs 3 req/sec)
|
| 97 |
+
NCBI_API_KEY=your-ncbi-key-here
|
| 98 |
+
|
| 99 |
+
# Modal (optional - for secure code execution sandbox)
|
| 100 |
+
# MODAL_TOKEN_ID=your-modal-token-id
|
| 101 |
+
# MODAL_TOKEN_SECRET=your-modal-token-secret
|
| 102 |
+
|
| 103 |
+
# ============== VECTOR DATABASE (ChromaDB) ==============
|
| 104 |
+
|
| 105 |
+
# ChromaDB storage path
|
| 106 |
+
CHROMA_DB_PATH=./chroma_db
|
| 107 |
+
|
| 108 |
+
# Persist ChromaDB to disk (default: true)
|
| 109 |
+
# CHROMA_DB_PERSIST=true
|
| 110 |
+
|
| 111 |
+
# Remote ChromaDB server (optional)
|
| 112 |
+
# CHROMA_DB_HOST=localhost
|
| 113 |
+
# CHROMA_DB_PORT=8000
|
| 114 |
+
|
| 115 |
+
# ============== RAG SERVICE CONFIGURATION ==============
|
| 116 |
+
|
| 117 |
+
# ChromaDB collection name for RAG
|
| 118 |
+
# RAG_COLLECTION_NAME=deepcritical_evidence
|
| 119 |
+
|
| 120 |
+
# Number of top results to retrieve from RAG
|
| 121 |
+
# RAG_SIMILARITY_TOP_K=5
|
| 122 |
+
|
| 123 |
+
# Automatically ingest evidence into RAG
|
| 124 |
+
# RAG_AUTO_INGEST=true
|
.github/README.md
CHANGED
|
@@ -28,29 +28,11 @@ tags:
|
|
| 28 |
[](https://github.com/DeepCritical/GradioDemo)
|
| 29 |
[](docs/index.md)
|
| 30 |
[](https://huggingface.co/spaces/DataQuests/DeepCritical)
|
| 31 |
-
[](https://discord.gg/qdfnvSPcqP)
|
| 33 |
|
| 34 |
-
|
| 35 |
</div>
|
| 36 |
|
| 37 |
-
|
| 38 |
-
# DeepCritical
|
| 39 |
-
|
| 40 |
-
## Intro
|
| 41 |
-
|
| 42 |
-
## Features
|
| 43 |
-
|
| 44 |
-
- **Multi-Source Search**: PubMed, ClinicalTrials.gov, bioRxiv/medRxiv
|
| 45 |
-
- **MCP Integration**: Use our tools from Claude Desktop or any MCP client
|
| 46 |
-
- **HuggingFace OAuth**: Sign in with your HuggingFace account to automatically use your API token
|
| 47 |
-
- **Modal Sandbox**: Secure execution of AI-generated statistical code
|
| 48 |
-
- **LlamaIndex RAG**: Semantic search and evidence synthesis
|
| 49 |
-
- **HuggingfaceInference**: Free tier support with automatic fallback
|
| 50 |
-
- **HuggingfaceMCP Custom Config To Use Community Tools**:
|
| 51 |
-
- **Strongly Typed Composable Graphs**:
|
| 52 |
-
- **Specialized Research Teams of Agents**:
|
| 53 |
-
|
| 54 |
## Quick Start
|
| 55 |
|
| 56 |
### 1. Environment Setup
|
|
@@ -60,14 +42,14 @@ tags:
|
|
| 60 |
pip install uv
|
| 61 |
|
| 62 |
# Sync dependencies
|
| 63 |
-
uv sync
|
| 64 |
```
|
| 65 |
|
| 66 |
### 2. Run the UI
|
| 67 |
|
| 68 |
```bash
|
| 69 |
# Start the Gradio app
|
| 70 |
-
uv run gradio
|
| 71 |
```
|
| 72 |
|
| 73 |
Open your browser to `http://localhost:7860`.
|
|
@@ -80,11 +62,6 @@ Open your browser to `http://localhost:7860`.
|
|
| 80 |
- No need to manually enter API keys when logged in
|
| 81 |
- OAuth token is used only for the current session and never stored
|
| 82 |
|
| 83 |
-
**Manual API Key (BYOK)**:
|
| 84 |
-
- You can still provide your own API key in the Settings accordion
|
| 85 |
-
- Supports HuggingFace, OpenAI, or Anthropic API keys
|
| 86 |
-
- Manual keys take priority over OAuth tokens
|
| 87 |
-
|
| 88 |
### 4. Connect via MCP
|
| 89 |
|
| 90 |
This application exposes a Model Context Protocol (MCP) server, allowing you to use its search tools directly from Claude Desktop or other MCP clients.
|
|
@@ -102,122 +79,3 @@ Add this to your `claude_desktop_config.json`:
|
|
| 102 |
}
|
| 103 |
}
|
| 104 |
```
|
| 105 |
-
|
| 106 |
-
**Available Tools**:
|
| 107 |
-
- `search_pubmed`: Search peer-reviewed biomedical literature.
|
| 108 |
-
- `search_clinical_trials`: Search ClinicalTrials.gov.
|
| 109 |
-
- `search_biorxiv`: Search bioRxiv/medRxiv preprints.
|
| 110 |
-
- `search_all`: Search all sources simultaneously.
|
| 111 |
-
- `analyze_hypothesis`: Secure statistical analysis using Modal sandboxes.
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
## Architecture
|
| 115 |
-
|
| 116 |
-
DeepCritical uses a Vertical Slice Architecture:
|
| 117 |
-
|
| 118 |
-
1. **Search Slice**: Retrieving evidence from PubMed, ClinicalTrials.gov, and bioRxiv.
|
| 119 |
-
2. **Judge Slice**: Evaluating evidence quality using LLMs.
|
| 120 |
-
3. **Orchestrator Slice**: Managing the research loop and UI.
|
| 121 |
-
|
| 122 |
-
- iterativeResearch
|
| 123 |
-
- deepResearch
|
| 124 |
-
- researchTeam
|
| 125 |
-
|
| 126 |
-
### Iterative Research
|
| 127 |
-
|
| 128 |
-
```mermaid
|
| 129 |
-
sequenceDiagram
|
| 130 |
-
participant IterativeFlow
|
| 131 |
-
participant ThinkingAgent
|
| 132 |
-
participant KnowledgeGapAgent
|
| 133 |
-
participant ToolSelector
|
| 134 |
-
participant ToolExecutor
|
| 135 |
-
participant JudgeHandler
|
| 136 |
-
participant WriterAgent
|
| 137 |
-
|
| 138 |
-
IterativeFlow->>IterativeFlow: run(query)
|
| 139 |
-
|
| 140 |
-
loop Until complete or max_iterations
|
| 141 |
-
IterativeFlow->>ThinkingAgent: generate_observations()
|
| 142 |
-
ThinkingAgent-->>IterativeFlow: observations
|
| 143 |
-
|
| 144 |
-
IterativeFlow->>KnowledgeGapAgent: evaluate_gaps()
|
| 145 |
-
KnowledgeGapAgent-->>IterativeFlow: KnowledgeGapOutput
|
| 146 |
-
|
| 147 |
-
alt Research complete
|
| 148 |
-
IterativeFlow->>WriterAgent: create_final_report()
|
| 149 |
-
WriterAgent-->>IterativeFlow: final_report
|
| 150 |
-
else Gaps remain
|
| 151 |
-
IterativeFlow->>ToolSelector: select_agents(gap)
|
| 152 |
-
ToolSelector-->>IterativeFlow: AgentSelectionPlan
|
| 153 |
-
|
| 154 |
-
IterativeFlow->>ToolExecutor: execute_tool_tasks()
|
| 155 |
-
ToolExecutor-->>IterativeFlow: ToolAgentOutput[]
|
| 156 |
-
|
| 157 |
-
IterativeFlow->>JudgeHandler: assess_evidence()
|
| 158 |
-
JudgeHandler-->>IterativeFlow: should_continue
|
| 159 |
-
end
|
| 160 |
-
end
|
| 161 |
-
```
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
### Deep Research
|
| 165 |
-
|
| 166 |
-
```mermaid
|
| 167 |
-
sequenceDiagram
|
| 168 |
-
actor User
|
| 169 |
-
participant GraphOrchestrator
|
| 170 |
-
participant InputParser
|
| 171 |
-
participant GraphBuilder
|
| 172 |
-
participant GraphExecutor
|
| 173 |
-
participant Agent
|
| 174 |
-
participant BudgetTracker
|
| 175 |
-
participant WorkflowState
|
| 176 |
-
|
| 177 |
-
User->>GraphOrchestrator: run(query)
|
| 178 |
-
GraphOrchestrator->>InputParser: detect_research_mode(query)
|
| 179 |
-
InputParser-->>GraphOrchestrator: mode (iterative/deep)
|
| 180 |
-
GraphOrchestrator->>GraphBuilder: build_graph(mode)
|
| 181 |
-
GraphBuilder-->>GraphOrchestrator: ResearchGraph
|
| 182 |
-
GraphOrchestrator->>WorkflowState: init_workflow_state()
|
| 183 |
-
GraphOrchestrator->>BudgetTracker: create_budget()
|
| 184 |
-
GraphOrchestrator->>GraphExecutor: _execute_graph(graph)
|
| 185 |
-
|
| 186 |
-
loop For each node in graph
|
| 187 |
-
GraphExecutor->>Agent: execute_node(agent_node)
|
| 188 |
-
Agent->>Agent: process_input
|
| 189 |
-
Agent-->>GraphExecutor: result
|
| 190 |
-
GraphExecutor->>WorkflowState: update_state(result)
|
| 191 |
-
GraphExecutor->>BudgetTracker: add_tokens(used)
|
| 192 |
-
GraphExecutor->>BudgetTracker: check_budget()
|
| 193 |
-
alt Budget exceeded
|
| 194 |
-
GraphExecutor->>GraphOrchestrator: emit(error_event)
|
| 195 |
-
else Continue
|
| 196 |
-
GraphExecutor->>GraphOrchestrator: emit(progress_event)
|
| 197 |
-
end
|
| 198 |
-
end
|
| 199 |
-
|
| 200 |
-
GraphOrchestrator->>User: AsyncGenerator[AgentEvent]
|
| 201 |
-
```
|
| 202 |
-
|
| 203 |
-
### Research Team
|
| 204 |
-
|
| 205 |
-
Critical Deep Research Agent
|
| 206 |
-
|
| 207 |
-
## Development
|
| 208 |
-
|
| 209 |
-
### Run Tests
|
| 210 |
-
|
| 211 |
-
```bash
|
| 212 |
-
uv run pytest
|
| 213 |
-
```
|
| 214 |
-
|
| 215 |
-
### Run Checks
|
| 216 |
-
|
| 217 |
-
```bash
|
| 218 |
-
make check
|
| 219 |
-
```
|
| 220 |
-
|
| 221 |
-
## Links
|
| 222 |
-
|
| 223 |
-
- [GitHub Repository](https://github.com/DeepCritical/GradioDemo)
|
|
|
|
| 28 |
[](https://github.com/DeepCritical/GradioDemo)
|
| 29 |
[](docs/index.md)
|
| 30 |
[](https://huggingface.co/spaces/DataQuests/DeepCritical)
|
| 31 |
+
[](https://codecov.io/gh/DeepCritical/GradioDemo)
|
| 32 |
[](https://discord.gg/qdfnvSPcqP)
|
| 33 |
|
|
|
|
| 34 |
</div>
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
## Quick Start
|
| 37 |
|
| 38 |
### 1. Environment Setup
|
|
|
|
| 42 |
pip install uv
|
| 43 |
|
| 44 |
# Sync dependencies
|
| 45 |
+
uv sync --all-extras
|
| 46 |
```
|
| 47 |
|
| 48 |
### 2. Run the UI
|
| 49 |
|
| 50 |
```bash
|
| 51 |
# Start the Gradio app
|
| 52 |
+
uv run gradio gradio src/app.py
|
| 53 |
```
|
| 54 |
|
| 55 |
Open your browser to `http://localhost:7860`.
|
|
|
|
| 62 |
- No need to manually enter API keys when logged in
|
| 63 |
- OAuth token is used only for the current session and never stored
|
| 64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
### 4. Connect via MCP
|
| 66 |
|
| 67 |
This application exposes a Model Context Protocol (MCP) server, allowing you to use its search tools directly from Claude Desktop or other MCP clients.
|
|
|
|
| 79 |
}
|
| 80 |
}
|
| 81 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.github/workflows/ci.yml
CHANGED
|
@@ -2,9 +2,9 @@ name: CI
|
|
| 2 |
|
| 3 |
on:
|
| 4 |
push:
|
| 5 |
-
branches: [main,
|
| 6 |
pull_request:
|
| 7 |
-
branches: [main,
|
| 8 |
|
| 9 |
jobs:
|
| 10 |
test:
|
|
@@ -28,7 +28,7 @@ jobs:
|
|
| 28 |
|
| 29 |
- name: Install dependencies
|
| 30 |
run: |
|
| 31 |
-
uv sync --dev
|
| 32 |
|
| 33 |
- name: Lint with ruff
|
| 34 |
run: |
|
|
@@ -43,25 +43,32 @@ jobs:
|
|
| 43 |
env:
|
| 44 |
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 45 |
run: |
|
| 46 |
-
uv run pytest tests/unit/ -v -m "not openai and not embedding_provider" --tb=short -p no:logfire
|
| 47 |
|
| 48 |
- name: Run local embeddings tests
|
| 49 |
env:
|
| 50 |
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 51 |
run: |
|
| 52 |
-
uv run pytest tests/ -v -m "local_embeddings" --tb=short -p no:logfire || true
|
| 53 |
continue-on-error: true # Allow failures if dependencies not available
|
| 54 |
|
| 55 |
- name: Run HuggingFace integration tests
|
| 56 |
env:
|
| 57 |
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 58 |
run: |
|
| 59 |
-
uv run pytest tests/integration/ -v -m "huggingface and not embedding_provider" --tb=short -p no:logfire || true
|
| 60 |
continue-on-error: true # Allow failures if HF_TOKEN not set
|
| 61 |
|
| 62 |
- name: Run non-OpenAI integration tests (excluding embedding providers)
|
| 63 |
env:
|
| 64 |
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 65 |
run: |
|
| 66 |
-
uv run pytest tests/integration/ -v -m "integration and not openai and not embedding_provider" --tb=short -p no:logfire || true
|
| 67 |
continue-on-error: true # Allow failures if dependencies not available
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
on:
|
| 4 |
push:
|
| 5 |
+
branches: [main, dev]
|
| 6 |
pull_request:
|
| 7 |
+
branches: [main, dev]
|
| 8 |
|
| 9 |
jobs:
|
| 10 |
test:
|
|
|
|
| 28 |
|
| 29 |
- name: Install dependencies
|
| 30 |
run: |
|
| 31 |
+
uv sync --extra dev
|
| 32 |
|
| 33 |
- name: Lint with ruff
|
| 34 |
run: |
|
|
|
|
| 43 |
env:
|
| 44 |
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 45 |
run: |
|
| 46 |
+
uv run pytest tests/unit/ -v -m "not openai and not embedding_provider" --tb=short -p no:logfire --cov --cov-branch --cov-report=xml
|
| 47 |
|
| 48 |
- name: Run local embeddings tests
|
| 49 |
env:
|
| 50 |
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 51 |
run: |
|
| 52 |
+
uv run pytest tests/ -v -m "local_embeddings" --tb=short -p no:logfire --cov --cov-branch --cov-report=xml --cov-append || true
|
| 53 |
continue-on-error: true # Allow failures if dependencies not available
|
| 54 |
|
| 55 |
- name: Run HuggingFace integration tests
|
| 56 |
env:
|
| 57 |
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 58 |
run: |
|
| 59 |
+
uv run pytest tests/integration/ -v -m "huggingface and not embedding_provider" --tb=short -p no:logfire --cov --cov-branch --cov-report=xml --cov-append || true
|
| 60 |
continue-on-error: true # Allow failures if HF_TOKEN not set
|
| 61 |
|
| 62 |
- name: Run non-OpenAI integration tests (excluding embedding providers)
|
| 63 |
env:
|
| 64 |
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 65 |
run: |
|
| 66 |
+
uv run pytest tests/integration/ -v -m "integration and not openai and not embedding_provider" --tb=short -p no:logfire --cov --cov-branch --cov-report=xml --cov-append || true
|
| 67 |
continue-on-error: true # Allow failures if dependencies not available
|
| 68 |
+
|
| 69 |
+
- name: Upload coverage reports to Codecov
|
| 70 |
+
uses: codecov/codecov-action@v5
|
| 71 |
+
continue-on-error: true
|
| 72 |
+
with:
|
| 73 |
+
token: ${{ secrets.CODECOV_TOKEN }}
|
| 74 |
+
slug: DeepCritical/GradioDemo
|
.github/workflows/docs.yml
CHANGED
|
@@ -32,12 +32,13 @@ jobs:
|
|
| 32 |
python-version: '3.11'
|
| 33 |
|
| 34 |
- name: Install uv
|
| 35 |
-
|
| 36 |
-
|
|
|
|
| 37 |
|
| 38 |
- name: Install dependencies
|
| 39 |
run: |
|
| 40 |
-
uv sync --
|
| 41 |
|
| 42 |
- name: Build documentation
|
| 43 |
run: |
|
|
@@ -49,7 +50,9 @@ jobs:
|
|
| 49 |
with:
|
| 50 |
github_token: ${{ secrets.GITHUB_TOKEN }}
|
| 51 |
publish_dir: ./site
|
|
|
|
| 52 |
cname: false
|
|
|
|
| 53 |
|
| 54 |
|
| 55 |
|
|
|
|
| 32 |
python-version: '3.11'
|
| 33 |
|
| 34 |
- name: Install uv
|
| 35 |
+
uses: astral-sh/setup-uv@v5
|
| 36 |
+
with:
|
| 37 |
+
version: "latest"
|
| 38 |
|
| 39 |
- name: Install dependencies
|
| 40 |
run: |
|
| 41 |
+
uv sync --extra dev
|
| 42 |
|
| 43 |
- name: Build documentation
|
| 44 |
run: |
|
|
|
|
| 50 |
with:
|
| 51 |
github_token: ${{ secrets.GITHUB_TOKEN }}
|
| 52 |
publish_dir: ./site
|
| 53 |
+
publish_branch: gh-pages
|
| 54 |
cname: false
|
| 55 |
+
keep_files: false
|
| 56 |
|
| 57 |
|
| 58 |
|
.gitignore
CHANGED
|
@@ -72,6 +72,8 @@ logs/
|
|
| 72 |
.pytest_cache/
|
| 73 |
.mypy_cache/
|
| 74 |
.coverage
|
|
|
|
|
|
|
| 75 |
htmlcov/
|
| 76 |
|
| 77 |
# Database files
|
|
|
|
| 72 |
.pytest_cache/
|
| 73 |
.mypy_cache/
|
| 74 |
.coverage
|
| 75 |
+
.coverage.*
|
| 76 |
+
coverage.xml
|
| 77 |
htmlcov/
|
| 78 |
|
| 79 |
# Database files
|
.pre-commit-hooks/run_pytest_with_sync.py
CHANGED
|
@@ -1,8 +1,109 @@
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
"""Cross-platform pytest runner that syncs dependencies before running tests."""
|
| 3 |
|
|
|
|
| 4 |
import subprocess
|
| 5 |
import sys
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
def run_command(
|
|
@@ -28,7 +129,6 @@ def run_command(
|
|
| 28 |
def main() -> int:
|
| 29 |
"""Main entry point."""
|
| 30 |
import os
|
| 31 |
-
from pathlib import Path
|
| 32 |
|
| 33 |
# Get the project root (where pyproject.toml is)
|
| 34 |
script_dir = Path(__file__).parent
|
|
@@ -37,6 +137,10 @@ def main() -> int:
|
|
| 37 |
# Change to project root to ensure uv works correctly
|
| 38 |
os.chdir(project_root)
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
# Check if uv is available
|
| 41 |
if run_command(["uv", "--version"], check=False) != 0:
|
| 42 |
print("Error: uv not found. Please install uv: https://github.com/astral-sh/uv")
|
|
@@ -48,8 +152,8 @@ def main() -> int:
|
|
| 48 |
|
| 49 |
# Sync dependencies - always include dev
|
| 50 |
# Note: embeddings dependencies are now in main dependencies, not optional
|
| 51 |
-
#
|
| 52 |
-
sync_cmd = ["uv", "sync", "--dev"]
|
| 53 |
|
| 54 |
print(f"Syncing dependencies for {test_type} tests...")
|
| 55 |
if run_command(sync_cmd, cwd=project_root) != 0:
|
|
@@ -65,6 +169,7 @@ def main() -> int:
|
|
| 65 |
"--tb=short",
|
| 66 |
"-p",
|
| 67 |
"no:logfire",
|
|
|
|
| 68 |
]
|
| 69 |
elif test_type == "embeddings":
|
| 70 |
pytest_args = [
|
|
@@ -75,6 +180,7 @@ def main() -> int:
|
|
| 75 |
"--tb=short",
|
| 76 |
"-p",
|
| 77 |
"no:logfire",
|
|
|
|
| 78 |
]
|
| 79 |
else:
|
| 80 |
pytest_args = []
|
|
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
"""Cross-platform pytest runner that syncs dependencies before running tests."""
|
| 3 |
|
| 4 |
+
import shutil
|
| 5 |
import subprocess
|
| 6 |
import sys
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def clean_caches(project_root: Path) -> None:
|
| 11 |
+
"""Remove pytest and Python cache directories and files.
|
| 12 |
+
|
| 13 |
+
Only scans specific directories (src/, tests/) to avoid resource
|
| 14 |
+
exhaustion from scanning large directories like .venv on Windows.
|
| 15 |
+
"""
|
| 16 |
+
# Directories to scan for caches (only project code, not dependencies)
|
| 17 |
+
scan_dirs = ["src", "tests", ".pre-commit-hooks"]
|
| 18 |
+
|
| 19 |
+
# Directories to exclude (to avoid resource issues)
|
| 20 |
+
exclude_dirs = {
|
| 21 |
+
".venv",
|
| 22 |
+
"venv",
|
| 23 |
+
"ENV",
|
| 24 |
+
"env",
|
| 25 |
+
".git",
|
| 26 |
+
"node_modules",
|
| 27 |
+
"dist",
|
| 28 |
+
"build",
|
| 29 |
+
".eggs",
|
| 30 |
+
"reference_repos",
|
| 31 |
+
"folder",
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
cache_patterns = [
|
| 35 |
+
".pytest_cache",
|
| 36 |
+
"__pycache__",
|
| 37 |
+
"*.pyc",
|
| 38 |
+
"*.pyo",
|
| 39 |
+
"*.pyd",
|
| 40 |
+
".mypy_cache",
|
| 41 |
+
".ruff_cache",
|
| 42 |
+
]
|
| 43 |
+
|
| 44 |
+
def should_exclude(path: Path) -> bool:
|
| 45 |
+
"""Check if a path should be excluded from cache cleanup."""
|
| 46 |
+
# Check if any parent directory is in exclude list
|
| 47 |
+
for parent in path.parents:
|
| 48 |
+
if parent.name in exclude_dirs:
|
| 49 |
+
return True
|
| 50 |
+
# Check if the path itself is excluded
|
| 51 |
+
if path.name in exclude_dirs:
|
| 52 |
+
return True
|
| 53 |
+
return False
|
| 54 |
+
|
| 55 |
+
cleaned = []
|
| 56 |
+
|
| 57 |
+
# Only scan specific directories to avoid resource exhaustion
|
| 58 |
+
for scan_dir in scan_dirs:
|
| 59 |
+
scan_path = project_root / scan_dir
|
| 60 |
+
if not scan_path.exists():
|
| 61 |
+
continue
|
| 62 |
+
|
| 63 |
+
for pattern in cache_patterns:
|
| 64 |
+
if "*" in pattern:
|
| 65 |
+
# Handle glob patterns for files
|
| 66 |
+
try:
|
| 67 |
+
for cache_file in scan_path.rglob(pattern):
|
| 68 |
+
if should_exclude(cache_file):
|
| 69 |
+
continue
|
| 70 |
+
try:
|
| 71 |
+
if cache_file.is_file():
|
| 72 |
+
cache_file.unlink()
|
| 73 |
+
cleaned.append(str(cache_file.relative_to(project_root)))
|
| 74 |
+
except OSError:
|
| 75 |
+
pass # Ignore errors (file might be locked or already deleted)
|
| 76 |
+
except OSError:
|
| 77 |
+
pass # Ignore errors during directory traversal
|
| 78 |
+
else:
|
| 79 |
+
# Handle directory patterns
|
| 80 |
+
try:
|
| 81 |
+
for cache_dir in scan_path.rglob(pattern):
|
| 82 |
+
if should_exclude(cache_dir):
|
| 83 |
+
continue
|
| 84 |
+
try:
|
| 85 |
+
if cache_dir.is_dir():
|
| 86 |
+
shutil.rmtree(cache_dir, ignore_errors=True)
|
| 87 |
+
cleaned.append(str(cache_dir.relative_to(project_root)))
|
| 88 |
+
except OSError:
|
| 89 |
+
pass # Ignore errors (directory might be locked)
|
| 90 |
+
except OSError:
|
| 91 |
+
pass # Ignore errors during directory traversal
|
| 92 |
+
|
| 93 |
+
# Also clean root-level caches (like .pytest_cache in project root)
|
| 94 |
+
for pattern in [".pytest_cache", ".mypy_cache", ".ruff_cache"]:
|
| 95 |
+
cache_path = project_root / pattern
|
| 96 |
+
if cache_path.exists() and cache_path.is_dir():
|
| 97 |
+
try:
|
| 98 |
+
shutil.rmtree(cache_path, ignore_errors=True)
|
| 99 |
+
cleaned.append(pattern)
|
| 100 |
+
except OSError:
|
| 101 |
+
pass
|
| 102 |
+
|
| 103 |
+
if cleaned:
|
| 104 |
+
print(f"Cleaned {len(cleaned)} cache items")
|
| 105 |
+
else:
|
| 106 |
+
print("No cache files found to clean")
|
| 107 |
|
| 108 |
|
| 109 |
def run_command(
|
|
|
|
| 129 |
def main() -> int:
|
| 130 |
"""Main entry point."""
|
| 131 |
import os
|
|
|
|
| 132 |
|
| 133 |
# Get the project root (where pyproject.toml is)
|
| 134 |
script_dir = Path(__file__).parent
|
|
|
|
| 137 |
# Change to project root to ensure uv works correctly
|
| 138 |
os.chdir(project_root)
|
| 139 |
|
| 140 |
+
# Clean caches before running tests
|
| 141 |
+
print("Cleaning pytest and Python caches...")
|
| 142 |
+
clean_caches(project_root)
|
| 143 |
+
|
| 144 |
# Check if uv is available
|
| 145 |
if run_command(["uv", "--version"], check=False) != 0:
|
| 146 |
print("Error: uv not found. Please install uv: https://github.com/astral-sh/uv")
|
|
|
|
| 152 |
|
| 153 |
# Sync dependencies - always include dev
|
| 154 |
# Note: embeddings dependencies are now in main dependencies, not optional
|
| 155 |
+
# Use --extra dev for [project.optional-dependencies].dev (not --dev which is for [dependency-groups])
|
| 156 |
+
sync_cmd = ["uv", "sync", "--extra", "dev"]
|
| 157 |
|
| 158 |
print(f"Syncing dependencies for {test_type} tests...")
|
| 159 |
if run_command(sync_cmd, cwd=project_root) != 0:
|
|
|
|
| 169 |
"--tb=short",
|
| 170 |
"-p",
|
| 171 |
"no:logfire",
|
| 172 |
+
"--cache-clear", # Clear pytest cache before running
|
| 173 |
]
|
| 174 |
elif test_type == "embeddings":
|
| 175 |
pytest_args = [
|
|
|
|
| 180 |
"--tb=short",
|
| 181 |
"-p",
|
| 182 |
"no:logfire",
|
| 183 |
+
"--cache-clear", # Clear pytest cache before running
|
| 184 |
]
|
| 185 |
else:
|
| 186 |
pytest_args = []
|
README.md
CHANGED
|
@@ -35,7 +35,7 @@ tags:
|
|
| 35 |
[](https://github.com/DeepCritical/GradioDemo)
|
| 36 |
[](docs/index.md)
|
| 37 |
[](https://huggingface.co/spaces/DataQuests/DeepCritical)
|
| 38 |
-
[](https://discord.gg/qdfnvSPcqP)
|
| 40 |
|
| 41 |
|
|
@@ -76,6 +76,7 @@ For this hackathon we're proposing a simple yet powerful Deep Research Agent tha
|
|
| 76 |
- [] Create Deep Critical Drug Reporposing / Discovery Demo
|
| 77 |
- [] Create Deep Critical Literal Review
|
| 78 |
- [] Create Deep Critical Hypothesis Generator
|
|
|
|
| 79 |
|
| 80 |
## Completed
|
| 81 |
|
|
@@ -118,5 +119,5 @@ For this hackathon we're proposing a simple yet powerful Deep Research Agent tha
|
|
| 118 |
[](https://github.com/DeepCritical/GradioDemo)
|
| 119 |
[](docs/index.md)
|
| 120 |
[](https://huggingface.co/spaces/DataQuests/DeepCritical)
|
| 121 |
-
[](https://discord.gg/qdfnvSPcqP)
|
|
|
|
| 35 |
[](https://github.com/DeepCritical/GradioDemo)
|
| 36 |
[](docs/index.md)
|
| 37 |
[](https://huggingface.co/spaces/DataQuests/DeepCritical)
|
| 38 |
+
[](https://codecov.io/gh/DeepCritical/GradioDemo)
|
| 39 |
[](https://discord.gg/qdfnvSPcqP)
|
| 40 |
|
| 41 |
|
|
|
|
| 76 |
- [] Create Deep Critical Drug Reporposing / Discovery Demo
|
| 77 |
- [] Create Deep Critical Literal Review
|
| 78 |
- [] Create Deep Critical Hypothesis Generator
|
| 79 |
+
- [] Create PyPi Package
|
| 80 |
|
| 81 |
## Completed
|
| 82 |
|
|
|
|
| 119 |
[](https://github.com/DeepCritical/GradioDemo)
|
| 120 |
[](docs/index.md)
|
| 121 |
[](https://huggingface.co/spaces/DataQuests/DeepCritical)
|
| 122 |
+
[](https://codecov.io/gh/DeepCritical/GradioDemo)
|
| 123 |
[](https://discord.gg/qdfnvSPcqP)
|
docs/api/agents.md
CHANGED
|
@@ -258,3 +258,6 @@ def create_input_parser_agent(model: Any | None = None) -> InputParserAgent
|
|
| 258 |
|
| 259 |
|
| 260 |
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
|
| 259 |
|
| 260 |
|
| 261 |
+
|
| 262 |
+
|
| 263 |
+
|
docs/api/models.md
CHANGED
|
@@ -236,3 +236,6 @@ class BudgetStatus(BaseModel):
|
|
| 236 |
|
| 237 |
|
| 238 |
|
|
|
|
|
|
|
|
|
|
|
|
| 236 |
|
| 237 |
|
| 238 |
|
| 239 |
+
|
| 240 |
+
|
| 241 |
+
|
docs/api/orchestrators.md
CHANGED
|
@@ -183,3 +183,6 @@ Runs Magentic orchestration.
|
|
| 183 |
|
| 184 |
|
| 185 |
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
|
| 184 |
|
| 185 |
|
| 186 |
+
|
| 187 |
+
|
| 188 |
+
|
docs/api/services.md
CHANGED
|
@@ -189,3 +189,6 @@ Analyzes a hypothesis using statistical methods.
|
|
| 189 |
|
| 190 |
|
| 191 |
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
|
| 190 |
|
| 191 |
|
| 192 |
+
|
| 193 |
+
|
| 194 |
+
|
docs/api/tools.md
CHANGED
|
@@ -223,3 +223,6 @@ Searches multiple tools in parallel.
|
|
| 223 |
|
| 224 |
|
| 225 |
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
|
| 224 |
|
| 225 |
|
| 226 |
+
|
| 227 |
+
|
| 228 |
+
|
docs/architecture/agents.md
CHANGED
|
@@ -180,3 +180,6 @@ Factory functions:
|
|
| 180 |
|
| 181 |
|
| 182 |
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
|
| 181 |
|
| 182 |
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
|
docs/architecture/middleware.md
CHANGED
|
@@ -130,3 +130,6 @@ All middleware components use `ContextVar` for thread-safe isolation:
|
|
| 130 |
|
| 131 |
|
| 132 |
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
|
| 132 |
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
|
docs/architecture/services.md
CHANGED
|
@@ -130,3 +130,6 @@ if settings.has_openai_key:
|
|
| 130 |
|
| 131 |
|
| 132 |
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
|
| 132 |
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
|
docs/architecture/tools.md
CHANGED
|
@@ -163,3 +163,6 @@ search_handler = SearchHandler(
|
|
| 163 |
|
| 164 |
|
| 165 |
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
|
| 164 |
|
| 165 |
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
|
docs/contributing/code-quality.md
CHANGED
|
@@ -69,3 +69,6 @@ async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
|
|
| 69 |
|
| 70 |
|
| 71 |
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
|
| 71 |
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
|
docs/contributing/code-style.md
CHANGED
|
@@ -49,3 +49,6 @@ result = await loop.run_in_executor(None, cpu_bound_function, args)
|
|
| 49 |
|
| 50 |
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
|
| 51 |
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
|
docs/contributing/error-handling.md
CHANGED
|
@@ -57,3 +57,6 @@ except httpx.HTTPError as e:
|
|
| 57 |
|
| 58 |
|
| 59 |
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
|
| 59 |
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
|
docs/contributing/implementation-patterns.md
CHANGED
|
@@ -72,3 +72,6 @@ def get_embedding_service() -> EmbeddingService:
|
|
| 72 |
|
| 73 |
|
| 74 |
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
|
| 74 |
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
|
docs/contributing/index.md
CHANGED
|
@@ -151,3 +151,6 @@ Thank you for contributing to DeepCritical!
|
|
| 151 |
|
| 152 |
|
| 153 |
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
|
| 152 |
|
| 153 |
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
|
docs/contributing/prompt-engineering.md
CHANGED
|
@@ -57,3 +57,6 @@ This document outlines prompt engineering guidelines and citation validation rul
|
|
| 57 |
|
| 58 |
|
| 59 |
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
|
| 59 |
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
|
docs/contributing/testing.md
CHANGED
|
@@ -53,3 +53,6 @@ async def test_real_pubmed_search():
|
|
| 53 |
|
| 54 |
|
| 55 |
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
|
| 55 |
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
|
docs/getting-started/examples.md
CHANGED
|
@@ -197,3 +197,6 @@ USE_GRAPH_EXECUTION=true
|
|
| 197 |
|
| 198 |
|
| 199 |
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
|
| 198 |
|
| 199 |
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
|
docs/getting-started/installation.md
CHANGED
|
@@ -136,3 +136,6 @@ uv run pre-commit install
|
|
| 136 |
|
| 137 |
|
| 138 |
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
|
| 137 |
|
| 138 |
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
|
docs/getting-started/mcp-integration.md
CHANGED
|
@@ -203,3 +203,6 @@ You can configure multiple DeepCritical instances:
|
|
| 203 |
|
| 204 |
|
| 205 |
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
|
| 204 |
|
| 205 |
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
|
docs/getting-started/quick-start.md
CHANGED
|
@@ -107,3 +107,6 @@ What are the active clinical trials investigating Alzheimer's disease treatments
|
|
| 107 |
|
| 108 |
|
| 109 |
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
|
| 109 |
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
|
docs/license.md
CHANGED
|
@@ -27,3 +27,6 @@ SOFTWARE.
|
|
| 27 |
|
| 28 |
|
| 29 |
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
|
| 29 |
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
|
docs/overview/architecture.md
CHANGED
|
@@ -184,3 +184,6 @@ The system supports complex research workflows through:
|
|
| 184 |
|
| 185 |
|
| 186 |
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
|
| 185 |
|
| 186 |
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
|
docs/overview/features.md
CHANGED
|
@@ -136,3 +136,6 @@ DeepCritical provides a comprehensive set of features for AI-assisted research:
|
|
| 136 |
|
| 137 |
|
| 138 |
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
|
| 137 |
|
| 138 |
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
|
docs/team.md
CHANGED
|
@@ -32,3 +32,6 @@ We welcome contributions! See the [Contributing Guide](contributing/index.md) fo
|
|
| 32 |
|
| 33 |
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
|
| 34 |
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
|
mkdocs.yml
CHANGED
|
@@ -62,8 +62,8 @@ markdown_extensions:
|
|
| 62 |
- pymdownx.tasklist:
|
| 63 |
custom_checkbox: true
|
| 64 |
- pymdownx.emoji:
|
| 65 |
-
|
| 66 |
-
|
| 67 |
- admonition
|
| 68 |
- pymdownx.details
|
| 69 |
- pymdownx.superfences
|
|
|
|
| 62 |
- pymdownx.tasklist:
|
| 63 |
custom_checkbox: true
|
| 64 |
- pymdownx.emoji:
|
| 65 |
+
emoji_generator: !!python/name:pymdownx.emoji.to_svg
|
| 66 |
+
emoji_index: !!python/name:pymdownx.emoji.twemoji
|
| 67 |
- admonition
|
| 68 |
- pymdownx.details
|
| 69 |
- pymdownx.superfences
|
pyproject.toml
CHANGED
|
@@ -5,21 +5,16 @@ description = "AI-Native Drug Repurposing Research Agent"
|
|
| 5 |
readme = "README.md"
|
| 6 |
requires-python = ">=3.11"
|
| 7 |
dependencies = [
|
| 8 |
-
# Core
|
| 9 |
"pydantic>=2.7",
|
| 10 |
"pydantic-settings>=2.2",
|
| 11 |
"pydantic-ai>=0.0.16",
|
| 12 |
-
# AI Providers
|
| 13 |
"openai>=1.0.0",
|
| 14 |
"anthropic>=0.18.0",
|
| 15 |
-
|
| 16 |
-
"
|
| 17 |
-
"
|
| 18 |
-
"
|
| 19 |
-
"
|
| 20 |
-
# UI
|
| 21 |
-
"gradio[mcp,oauth]>=6.0.0",
|
| 22 |
-
# Utils
|
| 23 |
"python-dotenv>=1.0", # .env loading
|
| 24 |
"tenacity>=8.2", # Retry logic
|
| 25 |
"structlog>=24.1", # Structured logging
|
|
@@ -40,28 +35,30 @@ dependencies = [
|
|
| 40 |
"modal>=0.63.0",
|
| 41 |
"llama-index-llms-openai>=0.6.9",
|
| 42 |
"llama-index-embeddings-openai>=0.5.1",
|
|
|
|
|
|
|
|
|
|
| 43 |
]
|
| 44 |
|
| 45 |
[project.optional-dependencies]
|
| 46 |
dev = [
|
| 47 |
-
|
| 48 |
-
"pytest>=
|
| 49 |
-
"pytest-
|
| 50 |
-
"pytest-
|
| 51 |
-
"pytest-
|
| 52 |
-
"
|
| 53 |
-
"
|
| 54 |
-
"
|
| 55 |
-
|
| 56 |
-
# Quality
|
| 57 |
-
"ruff>=0.4.0",
|
| 58 |
-
"mypy>=1.10",
|
| 59 |
"pre-commit>=3.7",
|
| 60 |
-
# Documentation
|
| 61 |
"mkdocs>=1.5.0",
|
| 62 |
-
"mkdocs-material>=9.
|
| 63 |
-
"mkdocs-mermaid2-plugin>=1.
|
| 64 |
-
"mkdocs-minify-plugin>=0.
|
|
|
|
|
|
|
|
|
|
| 65 |
]
|
| 66 |
|
| 67 |
[build-system]
|
|
@@ -106,6 +103,9 @@ ignore = [
|
|
| 106 |
"RUF100", # Unused noqa (version differences between local/CI)
|
| 107 |
]
|
| 108 |
|
|
|
|
|
|
|
|
|
|
| 109 |
[tool.ruff.lint.isort]
|
| 110 |
known-first-party = ["src"]
|
| 111 |
|
|
@@ -123,6 +123,7 @@ exclude = [
|
|
| 123 |
"^reference_repos/",
|
| 124 |
"^examples/",
|
| 125 |
"^folder/",
|
|
|
|
| 126 |
]
|
| 127 |
|
| 128 |
# ============== PYTEST CONFIG ==============
|
|
@@ -136,6 +137,25 @@ addopts = [
|
|
| 136 |
"-p",
|
| 137 |
"no:logfire",
|
| 138 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
markers = [
|
| 140 |
"unit: Unit tests (mocked)",
|
| 141 |
"integration: Integration tests (real APIs)",
|
|
@@ -149,7 +169,10 @@ markers = [
|
|
| 149 |
# ============== COVERAGE CONFIG ==============
|
| 150 |
[tool.coverage.run]
|
| 151 |
source = ["src"]
|
| 152 |
-
omit = [
|
|
|
|
|
|
|
|
|
|
| 153 |
|
| 154 |
[tool.coverage.report]
|
| 155 |
exclude_lines = [
|
|
@@ -157,17 +180,3 @@ exclude_lines = [
|
|
| 157 |
"if TYPE_CHECKING:",
|
| 158 |
"raise NotImplementedError",
|
| 159 |
]
|
| 160 |
-
|
| 161 |
-
[dependency-groups]
|
| 162 |
-
dev = [
|
| 163 |
-
"mkdocs-codeinclude-plugin>=0.2.1",
|
| 164 |
-
"mkdocs-macros-plugin>=1.5.0",
|
| 165 |
-
"pytest>=9.0.1",
|
| 166 |
-
"pytest-asyncio>=1.3.0",
|
| 167 |
-
"pytest-cov>=7.0.0",
|
| 168 |
-
"pytest-mock>=3.15.1",
|
| 169 |
-
"pytest-sugar>=1.1.1",
|
| 170 |
-
"respx>=0.22.0",
|
| 171 |
-
"structlog>=25.5.0",
|
| 172 |
-
"ty>=0.0.1a28",
|
| 173 |
-
]
|
|
|
|
| 5 |
readme = "README.md"
|
| 6 |
requires-python = ">=3.11"
|
| 7 |
dependencies = [
|
|
|
|
| 8 |
"pydantic>=2.7",
|
| 9 |
"pydantic-settings>=2.2",
|
| 10 |
"pydantic-ai>=0.0.16",
|
|
|
|
| 11 |
"openai>=1.0.0",
|
| 12 |
"anthropic>=0.18.0",
|
| 13 |
+
"httpx>=0.27",
|
| 14 |
+
"beautifulsoup4>=4.12",
|
| 15 |
+
"xmltodict>=0.13",
|
| 16 |
+
"huggingface-hub>=0.20.0",
|
| 17 |
+
"gradio[mcp,oauth]>=6.0.0",
|
|
|
|
|
|
|
|
|
|
| 18 |
"python-dotenv>=1.0", # .env loading
|
| 19 |
"tenacity>=8.2", # Retry logic
|
| 20 |
"structlog>=24.1", # Structured logging
|
|
|
|
| 35 |
"modal>=0.63.0",
|
| 36 |
"llama-index-llms-openai>=0.6.9",
|
| 37 |
"llama-index-embeddings-openai>=0.5.1",
|
| 38 |
+
"pydantic-ai-slim[huggingface]>=0.0.18",
|
| 39 |
+
"pytest>=9.0.1",
|
| 40 |
+
"pytest-cov>=7.0.0",
|
| 41 |
]
|
| 42 |
|
| 43 |
[project.optional-dependencies]
|
| 44 |
dev = [
|
| 45 |
+
"pytest>=9.0.1",
|
| 46 |
+
"pytest-asyncio>=1.3.0",
|
| 47 |
+
"pytest-sugar>=1.1.1",
|
| 48 |
+
"pytest-cov>=7.0.0",
|
| 49 |
+
"pytest-mock>=3.15.1",
|
| 50 |
+
"respx>=0.22.0",
|
| 51 |
+
"typer>=0.9.0",
|
| 52 |
+
"ruff>=0.14.6",
|
| 53 |
+
"mypy>=1.18.2",
|
|
|
|
|
|
|
|
|
|
| 54 |
"pre-commit>=3.7",
|
|
|
|
| 55 |
"mkdocs>=1.5.0",
|
| 56 |
+
"mkdocs-material>=9.7.0",
|
| 57 |
+
"mkdocs-mermaid2-plugin>=1.2.3",
|
| 58 |
+
"mkdocs-minify-plugin>=0.8.0",
|
| 59 |
+
"mkdocs-codeinclude-plugin>=0.2.1",
|
| 60 |
+
"mkdocs-macros-plugin>=1.5.0",
|
| 61 |
+
"pymdown-extensions>=10.17.2",
|
| 62 |
]
|
| 63 |
|
| 64 |
[build-system]
|
|
|
|
| 103 |
"RUF100", # Unused noqa (version differences between local/CI)
|
| 104 |
]
|
| 105 |
|
| 106 |
+
[tool.ruff.lint.per-file-ignores]
|
| 107 |
+
"src/app.py" = ["PLR0915"] # Too many statements (Gradio UI setup is complex)
|
| 108 |
+
|
| 109 |
[tool.ruff.lint.isort]
|
| 110 |
known-first-party = ["src"]
|
| 111 |
|
|
|
|
| 123 |
"^reference_repos/",
|
| 124 |
"^examples/",
|
| 125 |
"^folder/",
|
| 126 |
+
"^src/app\\.py$", # Gradio UI setup - ignore mypy checks
|
| 127 |
]
|
| 128 |
|
| 129 |
# ============== PYTEST CONFIG ==============
|
|
|
|
| 137 |
"-p",
|
| 138 |
"no:logfire",
|
| 139 |
]
|
| 140 |
+
# Suppress known warnings that don't indicate test failures
|
| 141 |
+
# These are from third-party libraries and don't affect test correctness
|
| 142 |
+
filterwarnings = [
|
| 143 |
+
# Pydantic deprecation warnings from unittest.mock introspection
|
| 144 |
+
# These occur when mock tries to introspect Pydantic models
|
| 145 |
+
"ignore::pydantic.warnings.PydanticDeprecatedSince20",
|
| 146 |
+
"ignore::pydantic.warnings.PydanticDeprecatedSince211",
|
| 147 |
+
# Gradio UI warnings (not relevant for unit tests)
|
| 148 |
+
"ignore::UserWarning:gradio.components.dropdown",
|
| 149 |
+
"ignore::UserWarning:gradio.oauth",
|
| 150 |
+
# Pattern-based filters for Pydantic deprecation messages (catch-all)
|
| 151 |
+
"ignore:The `__fields__` attribute is deprecated.*",
|
| 152 |
+
"ignore:The `__fields_set__` attribute is deprecated.*",
|
| 153 |
+
"ignore:Accessing the 'model_computed_fields' attribute.*",
|
| 154 |
+
"ignore:Accessing the 'model_fields' attribute.*",
|
| 155 |
+
# Also catch warnings from unittest.mock module
|
| 156 |
+
"ignore::DeprecationWarning:unittest.mock",
|
| 157 |
+
]
|
| 158 |
+
# Note: pytest only runs test files, so source files don't need exclusion
|
| 159 |
markers = [
|
| 160 |
"unit: Unit tests (mocked)",
|
| 161 |
"integration: Integration tests (real APIs)",
|
|
|
|
| 169 |
# ============== COVERAGE CONFIG ==============
|
| 170 |
[tool.coverage.run]
|
| 171 |
source = ["src"]
|
| 172 |
+
omit = [
|
| 173 |
+
"*/__init__.py",
|
| 174 |
+
"src/app.py", # Exclude Gradio UI from coverage
|
| 175 |
+
]
|
| 176 |
|
| 177 |
[tool.coverage.report]
|
| 178 |
exclude_lines = [
|
|
|
|
| 180 |
"if TYPE_CHECKING:",
|
| 181 |
"raise NotImplementedError",
|
| 182 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
|
@@ -26,7 +26,7 @@ beautifulsoup4>=4.12
|
|
| 26 |
xmltodict>=0.13
|
| 27 |
|
| 28 |
# UI (Gradio with MCP server support)
|
| 29 |
-
|
| 30 |
|
| 31 |
# Utils
|
| 32 |
python-dotenv>=1.0
|
|
|
|
| 26 |
xmltodict>=0.13
|
| 27 |
|
| 28 |
# UI (Gradio with MCP server support)
|
| 29 |
+
gradio[mcp]>=6.0.0
|
| 30 |
|
| 31 |
# Utils
|
| 32 |
python-dotenv>=1.0
|
src/agent_factory/judges.py
CHANGED
|
@@ -2,19 +2,34 @@
|
|
| 2 |
|
| 3 |
import asyncio
|
| 4 |
import json
|
| 5 |
-
|
|
|
|
| 6 |
|
| 7 |
import structlog
|
| 8 |
from huggingface_hub import InferenceClient
|
| 9 |
from pydantic_ai import Agent
|
| 10 |
from pydantic_ai.models.anthropic import AnthropicModel
|
| 11 |
-
from pydantic_ai.models.
|
| 12 |
-
from pydantic_ai.models.openai import OpenAIChatModel as OpenAIModel
|
| 13 |
-
from pydantic_ai.providers.anthropic import AnthropicProvider
|
| 14 |
-
from pydantic_ai.providers.huggingface import HuggingFaceProvider
|
| 15 |
-
from pydantic_ai.providers.openai import OpenAIProvider
|
| 16 |
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
from src.prompts.judge import (
|
| 19 |
SYSTEM_PROMPT,
|
| 20 |
format_empty_evidence_prompt,
|
|
@@ -35,26 +50,43 @@ def get_model() -> Any:
|
|
| 35 |
llm_provider = settings.llm_provider
|
| 36 |
|
| 37 |
if llm_provider == "anthropic":
|
| 38 |
-
|
| 39 |
-
return AnthropicModel(settings.anthropic_model, provider=provider)
|
| 40 |
|
| 41 |
if llm_provider == "huggingface":
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
if llm_provider == "openai":
|
| 48 |
-
|
| 49 |
-
return OpenAIModel(settings.openai_model, provider=openai_provider)
|
| 50 |
|
| 51 |
# Default to HuggingFace if provider is unknown or not specified
|
| 52 |
if llm_provider != "huggingface":
|
| 53 |
logger.warning("Unknown LLM provider, defaulting to HuggingFace", provider=llm_provider)
|
| 54 |
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
|
| 60 |
class JudgeHandler:
|
|
@@ -72,9 +104,9 @@ class JudgeHandler:
|
|
| 72 |
model: Optional PydanticAI model. If None, uses config default.
|
| 73 |
"""
|
| 74 |
self.model = model or get_model()
|
| 75 |
-
self.agent = Agent(
|
| 76 |
model=self.model,
|
| 77 |
-
|
| 78 |
system_prompt=SYSTEM_PROMPT,
|
| 79 |
retries=3,
|
| 80 |
)
|
|
@@ -112,7 +144,7 @@ class JudgeHandler:
|
|
| 112 |
try:
|
| 113 |
# Run the agent with structured output
|
| 114 |
result = await self.agent.run(user_prompt)
|
| 115 |
-
assessment = result.output
|
| 116 |
|
| 117 |
logger.info(
|
| 118 |
"Assessment complete",
|
|
@@ -121,7 +153,7 @@ class JudgeHandler:
|
|
| 121 |
confidence=assessment.confidence,
|
| 122 |
)
|
| 123 |
|
| 124 |
-
return assessment
|
| 125 |
|
| 126 |
except Exception as e:
|
| 127 |
logger.error("Assessment failed", error=str(e))
|
|
@@ -167,25 +199,58 @@ class JudgeHandler:
|
|
| 167 |
class HFInferenceJudgeHandler:
|
| 168 |
"""
|
| 169 |
JudgeHandler using HuggingFace Inference API for FREE LLM calls.
|
| 170 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
"""
|
| 172 |
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
"
|
| 176 |
-
|
| 177 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
|
| 179 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
"""
|
| 181 |
Initialize with HF Inference client.
|
| 182 |
|
| 183 |
Args:
|
| 184 |
model_id: Optional specific model ID. If None, uses FALLBACK_MODELS chain.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
"""
|
| 186 |
self.model_id = model_id
|
| 187 |
-
|
| 188 |
-
self.
|
|
|
|
|
|
|
| 189 |
self.call_count = 0
|
| 190 |
self.last_question: str | None = None
|
| 191 |
self.last_evidence: list[Evidence] | None = None
|
|
@@ -209,7 +274,7 @@ class HFInferenceJudgeHandler:
|
|
| 209 |
else:
|
| 210 |
user_prompt = format_empty_evidence_prompt(question)
|
| 211 |
|
| 212 |
-
models_to_try: list[str] = [self.model_id] if self.model_id else self.
|
| 213 |
last_error: Exception | None = None
|
| 214 |
|
| 215 |
for model in models_to_try:
|
|
@@ -261,14 +326,35 @@ IMPORTANT: Respond with ONLY valid JSON matching this schema:
|
|
| 261 |
]
|
| 262 |
|
| 263 |
# Use chat_completion (conversational task - supported by all models)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 264 |
response = await loop.run_in_executor(
|
| 265 |
None,
|
| 266 |
-
lambda: self.client.chat_completion(
|
| 267 |
-
messages=messages,
|
| 268 |
-
model=model,
|
| 269 |
-
max_tokens=1024,
|
| 270 |
-
temperature=0.1,
|
| 271 |
-
),
|
| 272 |
)
|
| 273 |
|
| 274 |
# Extract content from response
|
|
|
|
| 2 |
|
| 3 |
import asyncio
|
| 4 |
import json
|
| 5 |
+
import os
|
| 6 |
+
from typing import Any
|
| 7 |
|
| 8 |
import structlog
|
| 9 |
from huggingface_hub import InferenceClient
|
| 10 |
from pydantic_ai import Agent
|
| 11 |
from pydantic_ai.models.anthropic import AnthropicModel
|
| 12 |
+
from pydantic_ai.models.openai import OpenAIModel # type: ignore[attr-defined]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
|
| 14 |
|
| 15 |
+
# Try to import HuggingFace support (may not be available in all pydantic-ai versions)
|
| 16 |
+
# According to https://ai.pydantic.dev/models/huggingface/, HuggingFace support requires
|
| 17 |
+
# pydantic-ai with huggingface extra or pydantic-ai-slim[huggingface]
|
| 18 |
+
# There are two ways to use HuggingFace:
|
| 19 |
+
# 1. Inference API: HuggingFaceModel with HuggingFaceProvider (uses AsyncInferenceClient internally)
|
| 20 |
+
# 2. Local models: Would use transformers directly (not via pydantic-ai)
|
| 21 |
+
try:
|
| 22 |
+
from huggingface_hub import AsyncInferenceClient
|
| 23 |
+
from pydantic_ai.models.huggingface import HuggingFaceModel
|
| 24 |
+
from pydantic_ai.providers.huggingface import HuggingFaceProvider
|
| 25 |
+
|
| 26 |
+
_HUGGINGFACE_AVAILABLE = True
|
| 27 |
+
except ImportError:
|
| 28 |
+
HuggingFaceModel = None # type: ignore[assignment, misc]
|
| 29 |
+
HuggingFaceProvider = None # type: ignore[assignment, misc]
|
| 30 |
+
AsyncInferenceClient = None # type: ignore[assignment, misc]
|
| 31 |
+
_HUGGINGFACE_AVAILABLE = False
|
| 32 |
+
|
| 33 |
from src.prompts.judge import (
|
| 34 |
SYSTEM_PROMPT,
|
| 35 |
format_empty_evidence_prompt,
|
|
|
|
| 50 |
llm_provider = settings.llm_provider
|
| 51 |
|
| 52 |
if llm_provider == "anthropic":
|
| 53 |
+
return AnthropicModel(settings.anthropic_model, api_key=settings.anthropic_api_key) # type: ignore[call-arg]
|
|
|
|
| 54 |
|
| 55 |
if llm_provider == "huggingface":
|
| 56 |
+
if not _HUGGINGFACE_AVAILABLE:
|
| 57 |
+
raise ImportError(
|
| 58 |
+
"HuggingFace models are not available in this version of pydantic-ai. "
|
| 59 |
+
"Please install with: uv add 'pydantic-ai[huggingface]' or use 'openai'/'anthropic' as the LLM provider."
|
| 60 |
+
)
|
| 61 |
+
# Inference API - uses HuggingFace Inference API via AsyncInferenceClient
|
| 62 |
+
# Per https://ai.pydantic.dev/models/huggingface/#configure-the-provider
|
| 63 |
+
model_name = settings.huggingface_model or "Qwen/Qwen3-Next-80B-A3B-Thinking"
|
| 64 |
+
# Create AsyncInferenceClient for inference API
|
| 65 |
+
hf_client = AsyncInferenceClient(api_key=settings.hf_token) # type: ignore[misc]
|
| 66 |
+
# Pass client to HuggingFaceProvider for inference API usage
|
| 67 |
+
provider = HuggingFaceProvider(hf_client=hf_client) # type: ignore[misc]
|
| 68 |
+
return HuggingFaceModel(model_name, provider=provider) # type: ignore[misc]
|
| 69 |
|
| 70 |
if llm_provider == "openai":
|
| 71 |
+
return OpenAIModel(settings.openai_model, api_key=settings.openai_api_key) # type: ignore[call-overload]
|
|
|
|
| 72 |
|
| 73 |
# Default to HuggingFace if provider is unknown or not specified
|
| 74 |
if llm_provider != "huggingface":
|
| 75 |
logger.warning("Unknown LLM provider, defaulting to HuggingFace", provider=llm_provider)
|
| 76 |
|
| 77 |
+
if not _HUGGINGFACE_AVAILABLE:
|
| 78 |
+
raise ImportError(
|
| 79 |
+
"HuggingFace models are not available in this version of pydantic-ai. "
|
| 80 |
+
"Please install with: uv add 'pydantic-ai[huggingface]' or set LLM_PROVIDER to 'openai'/'anthropic'."
|
| 81 |
+
)
|
| 82 |
+
# Inference API - uses HuggingFace Inference API via AsyncInferenceClient
|
| 83 |
+
# Per https://ai.pydantic.dev/models/huggingface/#configure-the-provider
|
| 84 |
+
model_name = settings.huggingface_model or "Qwen/Qwen3-Next-80B-A3B-Thinking"
|
| 85 |
+
# Create AsyncInferenceClient for inference API
|
| 86 |
+
hf_client = AsyncInferenceClient(api_key=settings.hf_token) # type: ignore[misc]
|
| 87 |
+
# Pass client to HuggingFaceProvider for inference API usage
|
| 88 |
+
provider = HuggingFaceProvider(hf_client=hf_client) # type: ignore[misc]
|
| 89 |
+
return HuggingFaceModel(model_name, provider=provider) # type: ignore[misc]
|
| 90 |
|
| 91 |
|
| 92 |
class JudgeHandler:
|
|
|
|
| 104 |
model: Optional PydanticAI model. If None, uses config default.
|
| 105 |
"""
|
| 106 |
self.model = model or get_model()
|
| 107 |
+
self.agent = Agent( # type: ignore[call-overload]
|
| 108 |
model=self.model,
|
| 109 |
+
result_type=JudgeAssessment,
|
| 110 |
system_prompt=SYSTEM_PROMPT,
|
| 111 |
retries=3,
|
| 112 |
)
|
|
|
|
| 144 |
try:
|
| 145 |
# Run the agent with structured output
|
| 146 |
result = await self.agent.run(user_prompt)
|
| 147 |
+
assessment = result.output # type: ignore[attr-defined]
|
| 148 |
|
| 149 |
logger.info(
|
| 150 |
"Assessment complete",
|
|
|
|
| 153 |
confidence=assessment.confidence,
|
| 154 |
)
|
| 155 |
|
| 156 |
+
return assessment # type: ignore[no-any-return]
|
| 157 |
|
| 158 |
except Exception as e:
|
| 159 |
logger.error("Assessment failed", error=str(e))
|
|
|
|
| 199 |
class HFInferenceJudgeHandler:
|
| 200 |
"""
|
| 201 |
JudgeHandler using HuggingFace Inference API for FREE LLM calls.
|
| 202 |
+
|
| 203 |
+
Models are loaded from environment variable HF_FALLBACK_MODELS (comma-separated)
|
| 204 |
+
or use defaults based on currently available inference providers:
|
| 205 |
+
- meta-llama/Llama-3.1-8B-Instruct (gated, multiple providers)
|
| 206 |
+
- HuggingFaceH4/zephyr-7b-beta (ungated, featherless-ai)
|
| 207 |
+
- Qwen/Qwen2-7B-Instruct (ungated, featherless-ai)
|
| 208 |
+
- google/gemma-2-2b-it (gated, nebius)
|
| 209 |
"""
|
| 210 |
|
| 211 |
+
@classmethod
|
| 212 |
+
def _get_fallback_models(cls) -> list[str]:
|
| 213 |
+
"""Get fallback models from env var or use defaults."""
|
| 214 |
+
from src.utils.config import settings
|
| 215 |
+
|
| 216 |
+
# Get from env var or settings
|
| 217 |
+
models_str = os.getenv("HF_FALLBACK_MODELS") or settings.huggingface_fallback_models
|
| 218 |
+
|
| 219 |
+
# Parse comma-separated list
|
| 220 |
+
models = [m.strip() for m in models_str.split(",") if m.strip()]
|
| 221 |
|
| 222 |
+
# Default fallback if empty
|
| 223 |
+
if not models:
|
| 224 |
+
models = [
|
| 225 |
+
"meta-llama/Llama-3.1-8B-Instruct", # Primary (Gated, multiple providers)
|
| 226 |
+
"HuggingFaceH4/zephyr-7b-beta", # Fallback (Ungated, featherless-ai)
|
| 227 |
+
"Qwen/Qwen2-7B-Instruct", # Fallback (Ungated, featherless-ai)
|
| 228 |
+
"google/gemma-2-2b-it", # Fallback (Gated, nebius)
|
| 229 |
+
]
|
| 230 |
+
|
| 231 |
+
return models
|
| 232 |
+
|
| 233 |
+
def __init__(
|
| 234 |
+
self,
|
| 235 |
+
model_id: str | None = None,
|
| 236 |
+
api_key: str | None = None,
|
| 237 |
+
provider: str | None = None,
|
| 238 |
+
) -> None:
|
| 239 |
"""
|
| 240 |
Initialize with HF Inference client.
|
| 241 |
|
| 242 |
Args:
|
| 243 |
model_id: Optional specific model ID. If None, uses FALLBACK_MODELS chain.
|
| 244 |
+
api_key: Optional HuggingFace API key (OAuth token or HF_TOKEN).
|
| 245 |
+
If provided, will use authenticated access for gated models.
|
| 246 |
+
provider: Optional inference provider name (e.g., "novita", "nebius").
|
| 247 |
+
If provided, will use that specific provider.
|
| 248 |
"""
|
| 249 |
self.model_id = model_id
|
| 250 |
+
self.api_key = api_key
|
| 251 |
+
self.provider = provider
|
| 252 |
+
# Use provided API key, or fall back to env var, or use no auth
|
| 253 |
+
self.client = InferenceClient(token=api_key) if api_key else InferenceClient()
|
| 254 |
self.call_count = 0
|
| 255 |
self.last_question: str | None = None
|
| 256 |
self.last_evidence: list[Evidence] | None = None
|
|
|
|
| 274 |
else:
|
| 275 |
user_prompt = format_empty_evidence_prompt(question)
|
| 276 |
|
| 277 |
+
models_to_try: list[str] = [self.model_id] if self.model_id else self._get_fallback_models()
|
| 278 |
last_error: Exception | None = None
|
| 279 |
|
| 280 |
for model in models_to_try:
|
|
|
|
| 326 |
]
|
| 327 |
|
| 328 |
# Use chat_completion (conversational task - supported by all models)
|
| 329 |
+
# HuggingFace Inference Providers format: "model-id:provider" or use provider parameter
|
| 330 |
+
# According to docs: https://huggingface.co/docs/inference-providers
|
| 331 |
+
model_to_use = model
|
| 332 |
+
provider_param = None
|
| 333 |
+
if self.provider:
|
| 334 |
+
# Format: model-id:provider for explicit provider selection
|
| 335 |
+
model_to_use = f"{model}:{self.provider}"
|
| 336 |
+
# Alternative: pass provider as separate parameter (if client supports it)
|
| 337 |
+
provider_param = self.provider
|
| 338 |
+
|
| 339 |
+
# Build chat_completion call
|
| 340 |
+
call_kwargs = {
|
| 341 |
+
"messages": messages,
|
| 342 |
+
"model": model_to_use,
|
| 343 |
+
"max_tokens": 1024,
|
| 344 |
+
"temperature": 0.1,
|
| 345 |
+
}
|
| 346 |
+
# Add provider parameter if client supports it (some clients use this instead of model:provider)
|
| 347 |
+
if provider_param and hasattr(self.client.chat_completion, "__code__"):
|
| 348 |
+
# Check if provider parameter is supported
|
| 349 |
+
try:
|
| 350 |
+
call_kwargs["provider"] = provider_param
|
| 351 |
+
except TypeError:
|
| 352 |
+
# Provider not supported as parameter, use model:provider format
|
| 353 |
+
pass
|
| 354 |
+
|
| 355 |
response = await loop.run_in_executor(
|
| 356 |
None,
|
| 357 |
+
lambda: self.client.chat_completion(**call_kwargs), # type: ignore[call-overload]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 358 |
)
|
| 359 |
|
| 360 |
# Extract content from response
|
src/agents/hypothesis_agent.py
CHANGED
|
@@ -40,9 +40,9 @@ class HypothesisAgent(BaseAgent): # type: ignore[misc]
|
|
| 40 |
def _get_agent(self) -> Agent[None, HypothesisAssessment]:
|
| 41 |
"""Lazy initialization of LLM agent to avoid requiring API keys at import."""
|
| 42 |
if self._agent is None:
|
| 43 |
-
self._agent = Agent(
|
| 44 |
model=get_model(), # Uses configured LLM (OpenAI/Anthropic)
|
| 45 |
-
|
| 46 |
system_prompt=SYSTEM_PROMPT,
|
| 47 |
)
|
| 48 |
return self._agent
|
|
|
|
| 40 |
def _get_agent(self) -> Agent[None, HypothesisAssessment]:
|
| 41 |
"""Lazy initialization of LLM agent to avoid requiring API keys at import."""
|
| 42 |
if self._agent is None:
|
| 43 |
+
self._agent = Agent( # type: ignore[call-overload]
|
| 44 |
model=get_model(), # Uses configured LLM (OpenAI/Anthropic)
|
| 45 |
+
result_type=HypothesisAssessment,
|
| 46 |
system_prompt=SYSTEM_PROMPT,
|
| 47 |
)
|
| 48 |
return self._agent
|
src/agents/input_parser.py
CHANGED
|
@@ -64,9 +64,9 @@ class InputParserAgent:
|
|
| 64 |
self.logger = logger
|
| 65 |
|
| 66 |
# Initialize Pydantic AI Agent
|
| 67 |
-
self.agent = Agent(
|
| 68 |
model=self.model,
|
| 69 |
-
|
| 70 |
system_prompt=SYSTEM_PROMPT,
|
| 71 |
retries=3,
|
| 72 |
)
|
|
@@ -117,7 +117,7 @@ class InputParserAgent:
|
|
| 117 |
questions=len(parsed_query.research_questions),
|
| 118 |
)
|
| 119 |
|
| 120 |
-
return parsed_query
|
| 121 |
|
| 122 |
except Exception as e:
|
| 123 |
self.logger.error("Query parsing failed", error=str(e), query=query[:100])
|
|
|
|
| 64 |
self.logger = logger
|
| 65 |
|
| 66 |
# Initialize Pydantic AI Agent
|
| 67 |
+
self.agent = Agent( # type: ignore[call-overload]
|
| 68 |
model=self.model,
|
| 69 |
+
result_type=ParsedQuery,
|
| 70 |
system_prompt=SYSTEM_PROMPT,
|
| 71 |
retries=3,
|
| 72 |
)
|
|
|
|
| 117 |
questions=len(parsed_query.research_questions),
|
| 118 |
)
|
| 119 |
|
| 120 |
+
return parsed_query # type: ignore[no-any-return]
|
| 121 |
|
| 122 |
except Exception as e:
|
| 123 |
self.logger.error("Query parsing failed", error=str(e), query=query[:100])
|
src/agents/judge_agent_llm.py
CHANGED
|
@@ -16,9 +16,9 @@ class LLMSubIterationJudge:
|
|
| 16 |
|
| 17 |
def __init__(self) -> None:
|
| 18 |
self.model = get_model()
|
| 19 |
-
self.agent = Agent(
|
| 20 |
model=self.model,
|
| 21 |
-
|
| 22 |
system_prompt="""You are a strict judge evaluating a research task.
|
| 23 |
|
| 24 |
Evaluate if the result is sufficient to answer the task.
|
|
@@ -42,4 +42,4 @@ Evaluate validity and sufficiency."""
|
|
| 42 |
|
| 43 |
run_result = await self.agent.run(prompt)
|
| 44 |
logger.info("LLM judge assessment complete", sufficient=run_result.output.sufficient)
|
| 45 |
-
return run_result.output
|
|
|
|
| 16 |
|
| 17 |
def __init__(self) -> None:
|
| 18 |
self.model = get_model()
|
| 19 |
+
self.agent = Agent( # type: ignore[call-overload]
|
| 20 |
model=self.model,
|
| 21 |
+
result_type=JudgeAssessment,
|
| 22 |
system_prompt="""You are a strict judge evaluating a research task.
|
| 23 |
|
| 24 |
Evaluate if the result is sufficient to answer the task.
|
|
|
|
| 42 |
|
| 43 |
run_result = await self.agent.run(prompt)
|
| 44 |
logger.info("LLM judge assessment complete", sufficient=run_result.output.sufficient)
|
| 45 |
+
return run_result.output # type: ignore[no-any-return]
|
src/agents/knowledge_gap.py
CHANGED
|
@@ -56,9 +56,9 @@ class KnowledgeGapAgent:
|
|
| 56 |
self.logger = logger
|
| 57 |
|
| 58 |
# Initialize Pydantic AI Agent
|
| 59 |
-
self.agent = Agent(
|
| 60 |
model=self.model,
|
| 61 |
-
|
| 62 |
system_prompt=SYSTEM_PROMPT,
|
| 63 |
retries=3,
|
| 64 |
)
|
|
@@ -121,7 +121,7 @@ HISTORY OF ACTIONS, FINDINGS AND THOUGHTS:
|
|
| 121 |
gaps_count=len(evaluation.outstanding_gaps),
|
| 122 |
)
|
| 123 |
|
| 124 |
-
return evaluation
|
| 125 |
|
| 126 |
except Exception as e:
|
| 127 |
self.logger.error("Knowledge gap evaluation failed", error=str(e))
|
|
|
|
| 56 |
self.logger = logger
|
| 57 |
|
| 58 |
# Initialize Pydantic AI Agent
|
| 59 |
+
self.agent = Agent( # type: ignore[call-overload]
|
| 60 |
model=self.model,
|
| 61 |
+
result_type=KnowledgeGapOutput,
|
| 62 |
system_prompt=SYSTEM_PROMPT,
|
| 63 |
retries=3,
|
| 64 |
)
|
|
|
|
| 121 |
gaps_count=len(evaluation.outstanding_gaps),
|
| 122 |
)
|
| 123 |
|
| 124 |
+
return evaluation # type: ignore[no-any-return]
|
| 125 |
|
| 126 |
except Exception as e:
|
| 127 |
self.logger.error("Knowledge gap evaluation failed", error=str(e))
|
src/agents/long_writer.py
CHANGED
|
@@ -84,9 +84,9 @@ class LongWriterAgent:
|
|
| 84 |
self.logger = logger
|
| 85 |
|
| 86 |
# Initialize Pydantic AI Agent
|
| 87 |
-
self.agent = Agent(
|
| 88 |
model=self.model,
|
| 89 |
-
|
| 90 |
system_prompt=SYSTEM_PROMPT,
|
| 91 |
retries=3,
|
| 92 |
)
|
|
@@ -193,7 +193,7 @@ class LongWriterAgent:
|
|
| 193 |
attempt=attempt + 1,
|
| 194 |
)
|
| 195 |
|
| 196 |
-
return output
|
| 197 |
|
| 198 |
except (TimeoutError, ConnectionError) as e:
|
| 199 |
# Transient errors - retry
|
|
|
|
| 84 |
self.logger = logger
|
| 85 |
|
| 86 |
# Initialize Pydantic AI Agent
|
| 87 |
+
self.agent = Agent( # type: ignore[call-overload]
|
| 88 |
model=self.model,
|
| 89 |
+
result_type=LongWriterOutput,
|
| 90 |
system_prompt=SYSTEM_PROMPT,
|
| 91 |
retries=3,
|
| 92 |
)
|
|
|
|
| 193 |
attempt=attempt + 1,
|
| 194 |
)
|
| 195 |
|
| 196 |
+
return output # type: ignore[no-any-return]
|
| 197 |
|
| 198 |
except (TimeoutError, ConnectionError) as e:
|
| 199 |
# Transient errors - retry
|
src/agents/report_agent.py
CHANGED
|
@@ -41,9 +41,9 @@ class ReportAgent(BaseAgent): # type: ignore[misc]
|
|
| 41 |
def _get_agent(self) -> Agent[None, ResearchReport]:
|
| 42 |
"""Lazy initialization of LLM agent to avoid requiring API keys at import."""
|
| 43 |
if self._agent is None:
|
| 44 |
-
self._agent = Agent(
|
| 45 |
model=get_model(),
|
| 46 |
-
|
| 47 |
system_prompt=SYSTEM_PROMPT,
|
| 48 |
)
|
| 49 |
return self._agent
|
|
|
|
| 41 |
def _get_agent(self) -> Agent[None, ResearchReport]:
|
| 42 |
"""Lazy initialization of LLM agent to avoid requiring API keys at import."""
|
| 43 |
if self._agent is None:
|
| 44 |
+
self._agent = Agent( # type: ignore[call-overload]
|
| 45 |
model=get_model(),
|
| 46 |
+
result_type=ResearchReport,
|
| 47 |
system_prompt=SYSTEM_PROMPT,
|
| 48 |
)
|
| 49 |
return self._agent
|
src/agents/tool_selector.py
CHANGED
|
@@ -68,9 +68,9 @@ class ToolSelectorAgent:
|
|
| 68 |
self.logger = logger
|
| 69 |
|
| 70 |
# Initialize Pydantic AI Agent
|
| 71 |
-
self.agent = Agent(
|
| 72 |
model=self.model,
|
| 73 |
-
|
| 74 |
system_prompt=SYSTEM_PROMPT,
|
| 75 |
retries=3,
|
| 76 |
)
|
|
@@ -125,7 +125,7 @@ HISTORY OF ACTIONS, FINDINGS AND THOUGHTS:
|
|
| 125 |
agents=[task.agent for task in selection_plan.tasks],
|
| 126 |
)
|
| 127 |
|
| 128 |
-
return selection_plan
|
| 129 |
|
| 130 |
except Exception as e:
|
| 131 |
self.logger.error("Tool selection failed", error=str(e))
|
|
|
|
| 68 |
self.logger = logger
|
| 69 |
|
| 70 |
# Initialize Pydantic AI Agent
|
| 71 |
+
self.agent = Agent( # type: ignore[call-overload]
|
| 72 |
model=self.model,
|
| 73 |
+
result_type=AgentSelectionPlan,
|
| 74 |
system_prompt=SYSTEM_PROMPT,
|
| 75 |
retries=3,
|
| 76 |
)
|
|
|
|
| 125 |
agents=[task.agent for task in selection_plan.tasks],
|
| 126 |
)
|
| 127 |
|
| 128 |
+
return selection_plan # type: ignore[no-any-return]
|
| 129 |
|
| 130 |
except Exception as e:
|
| 131 |
self.logger.error("Tool selection failed", error=str(e))
|
src/app.py
CHANGED
|
@@ -5,8 +5,24 @@ from collections.abc import AsyncGenerator
|
|
| 5 |
from typing import Any
|
| 6 |
|
| 7 |
import gradio as gr
|
| 8 |
-
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
from src.agent_factory.judges import HFInferenceJudgeHandler, JudgeHandler, MockJudgeHandler
|
| 12 |
from src.orchestrator_factory import create_orchestrator
|
|
@@ -15,6 +31,7 @@ from src.tools.europepmc import EuropePMCTool
|
|
| 15 |
from src.tools.pubmed import PubMedTool
|
| 16 |
from src.tools.search_handler import SearchHandler
|
| 17 |
from src.utils.config import settings
|
|
|
|
| 18 |
from src.utils.models import AgentEvent, OrchestratorConfig
|
| 19 |
|
| 20 |
|
|
@@ -22,6 +39,8 @@ def configure_orchestrator(
|
|
| 22 |
use_mock: bool = False,
|
| 23 |
mode: str = "simple",
|
| 24 |
oauth_token: str | None = None,
|
|
|
|
|
|
|
| 25 |
) -> tuple[Any, str]:
|
| 26 |
"""
|
| 27 |
Create an orchestrator instance.
|
|
@@ -30,6 +49,8 @@ def configure_orchestrator(
|
|
| 30 |
use_mock: If True, use MockJudgeHandler (no API key needed)
|
| 31 |
mode: Orchestrator mode ("simple" or "advanced")
|
| 32 |
oauth_token: Optional OAuth token from HuggingFace login
|
|
|
|
|
|
|
| 33 |
|
| 34 |
Returns:
|
| 35 |
Tuple of (Orchestrator instance, backend_name)
|
|
@@ -59,11 +80,27 @@ def configure_orchestrator(
|
|
| 59 |
# Priority: oauth_token > env vars
|
| 60 |
effective_api_key = oauth_token
|
| 61 |
if effective_api_key or (os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_API_KEY")):
|
| 62 |
-
model:
|
| 63 |
if effective_api_key:
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
backend_info = "API (HuggingFace OAuth)"
|
| 68 |
else:
|
| 69 |
backend_info = "API (Env Config)"
|
|
@@ -72,8 +109,19 @@ def configure_orchestrator(
|
|
| 72 |
|
| 73 |
# 3. Free Tier (HuggingFace Inference)
|
| 74 |
else:
|
| 75 |
-
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
|
| 78 |
orchestrator = create_orchestrator(
|
| 79 |
search_handler=search_handler,
|
|
@@ -332,6 +380,8 @@ async def research_agent(
|
|
| 332 |
message: str,
|
| 333 |
history: list[dict[str, Any]],
|
| 334 |
mode: str = "simple",
|
|
|
|
|
|
|
| 335 |
request: gr.Request | None = None,
|
| 336 |
) -> AsyncGenerator[gr.ChatMessage | list[gr.ChatMessage], None]:
|
| 337 |
"""
|
|
@@ -341,6 +391,8 @@ async def research_agent(
|
|
| 341 |
message: User's research question
|
| 342 |
history: Chat history (Gradio format)
|
| 343 |
mode: Orchestrator mode ("simple" or "advanced")
|
|
|
|
|
|
|
| 344 |
request: Gradio request object containing OAuth information
|
| 345 |
|
| 346 |
Yields:
|
|
@@ -372,10 +424,13 @@ async def research_agent(
|
|
| 372 |
try:
|
| 373 |
# use_mock=False - let configure_orchestrator decide based on available keys
|
| 374 |
# It will use: OAuth token > Env vars > HF Inference (free tier)
|
|
|
|
| 375 |
orchestrator, backend_name = configure_orchestrator(
|
| 376 |
use_mock=False, # Never use mock in production - HF Inference is the free fallback
|
| 377 |
mode=effective_mode,
|
| 378 |
oauth_token=oauth_token,
|
|
|
|
|
|
|
| 379 |
)
|
| 380 |
|
| 381 |
yield gr.ChatMessage(
|
|
@@ -407,7 +462,162 @@ def create_demo() -> gr.Blocks:
|
|
| 407 |
with gr.Row():
|
| 408 |
gr.LoginButton()
|
| 409 |
|
| 410 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 411 |
gr.ChatInterface(
|
| 412 |
fn=research_agent,
|
| 413 |
title="𧬠DeepCritical",
|
|
@@ -417,7 +627,7 @@ def create_demo() -> gr.Blocks:
|
|
| 417 |
"---\n"
|
| 418 |
"*Research tool only β not for medical advice.* \n"
|
| 419 |
"**MCP Server Active**: Connect Claude Desktop to `/gradio_api/mcp/`\n\n"
|
| 420 |
-
"**Sign in with HuggingFace** above to
|
| 421 |
),
|
| 422 |
examples=[
|
| 423 |
["What drugs could be repurposed for Alzheimer's disease?", "simple"],
|
|
@@ -426,14 +636,9 @@ def create_demo() -> gr.Blocks:
|
|
| 426 |
],
|
| 427 |
additional_inputs_accordion=gr.Accordion(label="βοΈ Settings", open=False),
|
| 428 |
additional_inputs=[
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
label="Orchestrator Mode",
|
| 433 |
-
info=(
|
| 434 |
-
"Simple: Linear (Free Tier Friendly) | Advanced: Multi-Agent (Requires OpenAI - not available without manual config)"
|
| 435 |
-
),
|
| 436 |
-
),
|
| 437 |
],
|
| 438 |
)
|
| 439 |
|
|
|
|
| 5 |
from typing import Any
|
| 6 |
|
| 7 |
import gradio as gr
|
| 8 |
+
|
| 9 |
+
# Try to import HuggingFace support (may not be available in all pydantic-ai versions)
|
| 10 |
+
# According to https://ai.pydantic.dev/models/huggingface/, HuggingFace support requires
|
| 11 |
+
# pydantic-ai with huggingface extra or pydantic-ai-slim[huggingface]
|
| 12 |
+
# There are two ways to use HuggingFace:
|
| 13 |
+
# 1. Inference API: HuggingFaceModel with HuggingFaceProvider (uses AsyncInferenceClient internally)
|
| 14 |
+
# 2. Local models: Would use transformers directly (not via pydantic-ai)
|
| 15 |
+
try:
|
| 16 |
+
from huggingface_hub import AsyncInferenceClient
|
| 17 |
+
from pydantic_ai.models.huggingface import HuggingFaceModel
|
| 18 |
+
from pydantic_ai.providers.huggingface import HuggingFaceProvider
|
| 19 |
+
|
| 20 |
+
_HUGGINGFACE_AVAILABLE = True
|
| 21 |
+
except ImportError:
|
| 22 |
+
HuggingFaceModel = None # type: ignore[assignment, misc]
|
| 23 |
+
HuggingFaceProvider = None # type: ignore[assignment, misc]
|
| 24 |
+
AsyncInferenceClient = None # type: ignore[assignment, misc]
|
| 25 |
+
_HUGGINGFACE_AVAILABLE = False
|
| 26 |
|
| 27 |
from src.agent_factory.judges import HFInferenceJudgeHandler, JudgeHandler, MockJudgeHandler
|
| 28 |
from src.orchestrator_factory import create_orchestrator
|
|
|
|
| 31 |
from src.tools.pubmed import PubMedTool
|
| 32 |
from src.tools.search_handler import SearchHandler
|
| 33 |
from src.utils.config import settings
|
| 34 |
+
from src.utils.inference_models import get_available_models, get_available_providers
|
| 35 |
from src.utils.models import AgentEvent, OrchestratorConfig
|
| 36 |
|
| 37 |
|
|
|
|
| 39 |
use_mock: bool = False,
|
| 40 |
mode: str = "simple",
|
| 41 |
oauth_token: str | None = None,
|
| 42 |
+
hf_model: str | None = None,
|
| 43 |
+
hf_provider: str | None = None,
|
| 44 |
) -> tuple[Any, str]:
|
| 45 |
"""
|
| 46 |
Create an orchestrator instance.
|
|
|
|
| 49 |
use_mock: If True, use MockJudgeHandler (no API key needed)
|
| 50 |
mode: Orchestrator mode ("simple" or "advanced")
|
| 51 |
oauth_token: Optional OAuth token from HuggingFace login
|
| 52 |
+
hf_model: Selected HuggingFace model ID
|
| 53 |
+
hf_provider: Selected inference provider
|
| 54 |
|
| 55 |
Returns:
|
| 56 |
Tuple of (Orchestrator instance, backend_name)
|
|
|
|
| 80 |
# Priority: oauth_token > env vars
|
| 81 |
effective_api_key = oauth_token
|
| 82 |
if effective_api_key or (os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_API_KEY")):
|
| 83 |
+
model: Any | None = None
|
| 84 |
if effective_api_key:
|
| 85 |
+
# Use selected model or fall back to env var/settings
|
| 86 |
+
model_name = (
|
| 87 |
+
hf_model
|
| 88 |
+
or os.getenv("HF_MODEL")
|
| 89 |
+
or settings.huggingface_model
|
| 90 |
+
or "Qwen/Qwen3-Next-80B-A3B-Thinking"
|
| 91 |
+
)
|
| 92 |
+
if not _HUGGINGFACE_AVAILABLE:
|
| 93 |
+
raise ImportError(
|
| 94 |
+
"HuggingFace models are not available in this version of pydantic-ai. "
|
| 95 |
+
"Please install with: uv add 'pydantic-ai[huggingface]' or use 'openai'/'anthropic' as the LLM provider."
|
| 96 |
+
)
|
| 97 |
+
# Inference API - uses HuggingFace Inference API via AsyncInferenceClient
|
| 98 |
+
# Per https://ai.pydantic.dev/models/huggingface/#configure-the-provider
|
| 99 |
+
# Create AsyncInferenceClient for inference API
|
| 100 |
+
hf_client = AsyncInferenceClient(api_key=effective_api_key) # type: ignore[misc]
|
| 101 |
+
# Pass client to HuggingFaceProvider for inference API usage
|
| 102 |
+
provider = HuggingFaceProvider(hf_client=hf_client) # type: ignore[misc]
|
| 103 |
+
model = HuggingFaceModel(model_name, provider=provider) # type: ignore[misc]
|
| 104 |
backend_info = "API (HuggingFace OAuth)"
|
| 105 |
else:
|
| 106 |
backend_info = "API (Env Config)"
|
|
|
|
| 109 |
|
| 110 |
# 3. Free Tier (HuggingFace Inference)
|
| 111 |
else:
|
| 112 |
+
# Pass OAuth token if available (even if not in env vars)
|
| 113 |
+
# This allows OAuth login to work with free tier models
|
| 114 |
+
# Use selected model and provider if provided
|
| 115 |
+
judge_handler = HFInferenceJudgeHandler(
|
| 116 |
+
model_id=hf_model,
|
| 117 |
+
api_key=oauth_token,
|
| 118 |
+
provider=hf_provider,
|
| 119 |
+
)
|
| 120 |
+
model_display = hf_model.split("/")[-1] if hf_model else "Default"
|
| 121 |
+
provider_display = hf_provider or "auto"
|
| 122 |
+
backend_info = f"Free Tier ({model_display} via {provider_display})" + (
|
| 123 |
+
" (OAuth)" if oauth_token else ""
|
| 124 |
+
)
|
| 125 |
|
| 126 |
orchestrator = create_orchestrator(
|
| 127 |
search_handler=search_handler,
|
|
|
|
| 380 |
message: str,
|
| 381 |
history: list[dict[str, Any]],
|
| 382 |
mode: str = "simple",
|
| 383 |
+
hf_model: str | None = None,
|
| 384 |
+
hf_provider: str | None = None,
|
| 385 |
request: gr.Request | None = None,
|
| 386 |
) -> AsyncGenerator[gr.ChatMessage | list[gr.ChatMessage], None]:
|
| 387 |
"""
|
|
|
|
| 391 |
message: User's research question
|
| 392 |
history: Chat history (Gradio format)
|
| 393 |
mode: Orchestrator mode ("simple" or "advanced")
|
| 394 |
+
hf_model: Selected HuggingFace model ID (from dropdown)
|
| 395 |
+
hf_provider: Selected inference provider (from dropdown)
|
| 396 |
request: Gradio request object containing OAuth information
|
| 397 |
|
| 398 |
Yields:
|
|
|
|
| 424 |
try:
|
| 425 |
# use_mock=False - let configure_orchestrator decide based on available keys
|
| 426 |
# It will use: OAuth token > Env vars > HF Inference (free tier)
|
| 427 |
+
# hf_model and hf_provider come from dropdown, so they're guaranteed to be valid
|
| 428 |
orchestrator, backend_name = configure_orchestrator(
|
| 429 |
use_mock=False, # Never use mock in production - HF Inference is the free fallback
|
| 430 |
mode=effective_mode,
|
| 431 |
oauth_token=oauth_token,
|
| 432 |
+
hf_model=hf_model, # Can be None, will use defaults in configure_orchestrator
|
| 433 |
+
hf_provider=hf_provider, # Can be None, will use defaults in configure_orchestrator
|
| 434 |
)
|
| 435 |
|
| 436 |
yield gr.ChatMessage(
|
|
|
|
| 462 |
with gr.Row():
|
| 463 |
gr.LoginButton()
|
| 464 |
|
| 465 |
+
# Get initial model/provider lists (no auth by default)
|
| 466 |
+
# Check if user has auth to determine which model list to use
|
| 467 |
+
has_auth = bool(os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_API_KEY"))
|
| 468 |
+
|
| 469 |
+
# Get the appropriate model list based on user's actual auth status
|
| 470 |
+
# CRITICAL: Use the list that matches the user's auth status to avoid mismatches
|
| 471 |
+
if has_auth:
|
| 472 |
+
# User has auth - get models available with auth (includes gated models)
|
| 473 |
+
initial_models = get_available_models(has_auth=True)
|
| 474 |
+
# Fallback to unauthenticated models if auth list is empty (shouldn't happen, but be safe)
|
| 475 |
+
if not initial_models:
|
| 476 |
+
initial_models = get_available_models(has_auth=False)
|
| 477 |
+
else:
|
| 478 |
+
# User doesn't have auth - only get unauthenticated models (ungated only)
|
| 479 |
+
initial_models = get_available_models(has_auth=False)
|
| 480 |
+
|
| 481 |
+
# Extract available model IDs (first element of tuples) - this is what Gradio uses as values
|
| 482 |
+
available_model_ids = [m[0] for m in initial_models] if initial_models else []
|
| 483 |
+
|
| 484 |
+
# Prefer latest reasoning models if available, otherwise use fallback
|
| 485 |
+
preferred_models = [
|
| 486 |
+
"Qwen/Qwen3-Next-80B-A3B-Thinking",
|
| 487 |
+
"Qwen/Qwen3-Next-80B-A3B-Instruct",
|
| 488 |
+
"meta-llama/Llama-3.3-70B-Instruct",
|
| 489 |
+
]
|
| 490 |
+
|
| 491 |
+
# Find first available preferred model from the actual available models list
|
| 492 |
+
# CRITICAL: Only use models that are actually in available_model_ids
|
| 493 |
+
initial_model_id = None
|
| 494 |
+
for preferred in preferred_models:
|
| 495 |
+
if preferred in available_model_ids:
|
| 496 |
+
initial_model_id = preferred
|
| 497 |
+
break
|
| 498 |
+
|
| 499 |
+
# Fall back to first available model from the actual list
|
| 500 |
+
# CRITICAL: Always use a model that's guaranteed to be in available_model_ids
|
| 501 |
+
if not initial_model_id:
|
| 502 |
+
if available_model_ids:
|
| 503 |
+
initial_model_id = available_model_ids[0] # First model ID from available list
|
| 504 |
+
else:
|
| 505 |
+
# No models available - this shouldn't happen, but handle gracefully
|
| 506 |
+
initial_model_id = None
|
| 507 |
+
|
| 508 |
+
# Final safety check: ensure initial_model_id is actually in the available models
|
| 509 |
+
# This is the last line of defense - if it's not in the list, use the first available
|
| 510 |
+
if initial_model_id and initial_model_id not in available_model_ids:
|
| 511 |
+
if available_model_ids:
|
| 512 |
+
initial_model_id = available_model_ids[0]
|
| 513 |
+
else:
|
| 514 |
+
initial_model_id = None
|
| 515 |
+
|
| 516 |
+
# Get providers for the selected model (only if we have a valid model)
|
| 517 |
+
initial_providers = []
|
| 518 |
+
initial_provider = None
|
| 519 |
+
if initial_model_id:
|
| 520 |
+
initial_providers = get_available_providers(initial_model_id, has_auth=has_auth)
|
| 521 |
+
# Ensure we have a valid provider value that's in the choices
|
| 522 |
+
if initial_providers:
|
| 523 |
+
initial_provider = initial_providers[0][0] # Use first provider's ID
|
| 524 |
+
# Safety check: ensure provider is in the list
|
| 525 |
+
available_provider_ids = [p[0] for p in initial_providers]
|
| 526 |
+
if initial_provider not in available_provider_ids:
|
| 527 |
+
initial_provider = initial_providers[0][0] if initial_providers else None
|
| 528 |
+
|
| 529 |
+
# Create dropdowns for model and provider selection
|
| 530 |
+
# Note: Components can be in a hidden row and still work with ChatInterface additional_inputs
|
| 531 |
+
# The visible=False just hides the row itself, but components are still accessible
|
| 532 |
+
with gr.Row(visible=False):
|
| 533 |
+
mode_radio = gr.Radio(
|
| 534 |
+
choices=["simple", "advanced"],
|
| 535 |
+
value="simple",
|
| 536 |
+
label="Orchestrator Mode",
|
| 537 |
+
info="Simple: Linear | Advanced: Multi-Agent (Requires OpenAI)",
|
| 538 |
+
)
|
| 539 |
+
|
| 540 |
+
# Final validation: ensure value is in choices before creating dropdown
|
| 541 |
+
# Gradio requires the value to be exactly one of the choice values (first element of tuples)
|
| 542 |
+
# CRITICAL: Always default to the first available choice to ensure value is always valid
|
| 543 |
+
# Extract model IDs from choices (first element of each tuple)
|
| 544 |
+
model_ids_in_choices = [m[0] for m in initial_models] if initial_models else []
|
| 545 |
+
|
| 546 |
+
# Determine the model value - must be in model_ids_in_choices
|
| 547 |
+
if initial_models and model_ids_in_choices:
|
| 548 |
+
# First try to use initial_model_id if it's valid
|
| 549 |
+
if initial_model_id and initial_model_id in model_ids_in_choices:
|
| 550 |
+
model_value = initial_model_id
|
| 551 |
+
else:
|
| 552 |
+
# Fallback to first available model - guarantees a valid value
|
| 553 |
+
model_value = model_ids_in_choices[0]
|
| 554 |
+
else:
|
| 555 |
+
# No models available - set to None (empty dropdown)
|
| 556 |
+
model_value = None
|
| 557 |
+
|
| 558 |
+
# Absolute final check: if we have choices but model_value is None or invalid, use first choice
|
| 559 |
+
if initial_models and model_ids_in_choices:
|
| 560 |
+
if not model_value or model_value not in model_ids_in_choices:
|
| 561 |
+
model_value = model_ids_in_choices[0]
|
| 562 |
+
|
| 563 |
+
hf_model_dropdown = gr.Dropdown(
|
| 564 |
+
choices=initial_models if initial_models else [],
|
| 565 |
+
value=model_value, # Always set to a valid value from choices (or None if empty)
|
| 566 |
+
label="π€ Reasoning Model",
|
| 567 |
+
info="Select AI model for evidence assessment. Sign in to access gated models.",
|
| 568 |
+
interactive=True,
|
| 569 |
+
allow_custom_value=False, # Only allow values from choices
|
| 570 |
+
)
|
| 571 |
+
|
| 572 |
+
# Final validation for provider: ensure value is in choices
|
| 573 |
+
# CRITICAL: Always default to the first available choice to ensure value is always valid
|
| 574 |
+
provider_ids_in_choices = [p[0] for p in initial_providers] if initial_providers else []
|
| 575 |
+
provider_value = None
|
| 576 |
+
if initial_providers and provider_ids_in_choices:
|
| 577 |
+
# First try to use the preferred provider if it's available
|
| 578 |
+
if initial_provider and initial_provider in provider_ids_in_choices:
|
| 579 |
+
provider_value = initial_provider
|
| 580 |
+
else:
|
| 581 |
+
# Fallback to first available provider - this ensures we always have a valid value
|
| 582 |
+
provider_value = provider_ids_in_choices[0]
|
| 583 |
+
|
| 584 |
+
# Absolute final check: if we have choices but provider_value is None or invalid, use first choice
|
| 585 |
+
if initial_providers and provider_ids_in_choices:
|
| 586 |
+
if not provider_value or provider_value not in provider_ids_in_choices:
|
| 587 |
+
provider_value = provider_ids_in_choices[0]
|
| 588 |
+
|
| 589 |
+
hf_provider_dropdown = gr.Dropdown(
|
| 590 |
+
choices=initial_providers if initial_providers else [],
|
| 591 |
+
value=provider_value, # Always set to a valid value from choices (or None if empty)
|
| 592 |
+
label="β‘ Inference Provider",
|
| 593 |
+
info="Select provider for model execution. Some require authentication.",
|
| 594 |
+
interactive=True,
|
| 595 |
+
allow_custom_value=False, # Only allow values from choices
|
| 596 |
+
)
|
| 597 |
+
|
| 598 |
+
# Update providers when model changes
|
| 599 |
+
def update_providers(model_id: str, request: gr.Request | None = None) -> gr.Dropdown:
|
| 600 |
+
"""Update provider list when model changes."""
|
| 601 |
+
# Check if user is authenticated
|
| 602 |
+
oauth_token, _ = extract_oauth_info(request)
|
| 603 |
+
has_auth = bool(
|
| 604 |
+
oauth_token or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_API_KEY")
|
| 605 |
+
)
|
| 606 |
+
|
| 607 |
+
providers = get_available_providers(model_id, has_auth=has_auth)
|
| 608 |
+
if providers:
|
| 609 |
+
# Always set value to first provider to ensure it's valid
|
| 610 |
+
return gr.Dropdown(choices=providers, value=providers[0][0])
|
| 611 |
+
# If no providers, return empty dropdown with no value
|
| 612 |
+
return gr.Dropdown(choices=[], value=None)
|
| 613 |
+
|
| 614 |
+
hf_model_dropdown.change(
|
| 615 |
+
fn=update_providers,
|
| 616 |
+
inputs=[hf_model_dropdown],
|
| 617 |
+
outputs=[hf_provider_dropdown],
|
| 618 |
+
)
|
| 619 |
+
|
| 620 |
+
# Chat interface with model/provider selection
|
| 621 |
gr.ChatInterface(
|
| 622 |
fn=research_agent,
|
| 623 |
title="𧬠DeepCritical",
|
|
|
|
| 627 |
"---\n"
|
| 628 |
"*Research tool only β not for medical advice.* \n"
|
| 629 |
"**MCP Server Active**: Connect Claude Desktop to `/gradio_api/mcp/`\n\n"
|
| 630 |
+
"**Sign in with HuggingFace** above to access premium models and providers."
|
| 631 |
),
|
| 632 |
examples=[
|
| 633 |
["What drugs could be repurposed for Alzheimer's disease?", "simple"],
|
|
|
|
| 636 |
],
|
| 637 |
additional_inputs_accordion=gr.Accordion(label="βοΈ Settings", open=False),
|
| 638 |
additional_inputs=[
|
| 639 |
+
mode_radio,
|
| 640 |
+
hf_model_dropdown,
|
| 641 |
+
hf_provider_dropdown,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 642 |
],
|
| 643 |
)
|
| 644 |
|
src/legacy_orchestrator.py
CHANGED
|
@@ -101,12 +101,26 @@ class Orchestrator:
|
|
| 101 |
return evidence
|
| 102 |
|
| 103 |
try:
|
| 104 |
-
#
|
| 105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
logger.info(
|
| 107 |
"Deduplicated evidence",
|
| 108 |
before=len(evidence),
|
| 109 |
-
|
|
|
|
| 110 |
)
|
| 111 |
return unique_evidence
|
| 112 |
except Exception as e:
|
|
|
|
| 101 |
return evidence
|
| 102 |
|
| 103 |
try:
|
| 104 |
+
# First, deduplicate by URL (exact duplicates) from current evidence batch
|
| 105 |
+
# This prevents the same URL from appearing multiple times in one batch
|
| 106 |
+
seen_urls: set[str] = set()
|
| 107 |
+
unique_by_url: list[Evidence] = []
|
| 108 |
+
for e in evidence:
|
| 109 |
+
if e.citation.url not in seen_urls:
|
| 110 |
+
unique_by_url.append(e)
|
| 111 |
+
seen_urls.add(e.citation.url)
|
| 112 |
+
|
| 113 |
+
# Then, deduplicate using semantic similarity with stricter threshold
|
| 114 |
+
# threshold=0.95 means only remove near-identical content (distance < 0.05)
|
| 115 |
+
# This prevents over-filtering while still removing true duplicates
|
| 116 |
+
unique_evidence: list[Evidence] = await embeddings.deduplicate(
|
| 117 |
+
unique_by_url, threshold=0.95
|
| 118 |
+
)
|
| 119 |
logger.info(
|
| 120 |
"Deduplicated evidence",
|
| 121 |
before=len(evidence),
|
| 122 |
+
after_url=len(unique_by_url),
|
| 123 |
+
after_semantic=len(unique_evidence),
|
| 124 |
)
|
| 125 |
return unique_evidence
|
| 126 |
except Exception as e:
|
src/orchestrator/planner_agent.py
CHANGED
|
@@ -80,9 +80,9 @@ class PlannerAgent:
|
|
| 80 |
raise ConfigurationError("crawl_tool must be callable")
|
| 81 |
|
| 82 |
# Initialize Pydantic AI Agent
|
| 83 |
-
self.agent = Agent(
|
| 84 |
model=self.model,
|
| 85 |
-
|
| 86 |
system_prompt=SYSTEM_PROMPT,
|
| 87 |
tools=[self.web_search_tool, self.crawl_tool],
|
| 88 |
retries=3,
|
|
@@ -136,7 +136,7 @@ class PlannerAgent:
|
|
| 136 |
has_background=bool(report_plan.background_context),
|
| 137 |
)
|
| 138 |
|
| 139 |
-
return report_plan
|
| 140 |
|
| 141 |
except Exception as e:
|
| 142 |
self.logger.error("Planning failed", error=str(e), query=query[:100])
|
|
|
|
| 80 |
raise ConfigurationError("crawl_tool must be callable")
|
| 81 |
|
| 82 |
# Initialize Pydantic AI Agent
|
| 83 |
+
self.agent = Agent( # type: ignore[call-overload]
|
| 84 |
model=self.model,
|
| 85 |
+
result_type=ReportPlan,
|
| 86 |
system_prompt=SYSTEM_PROMPT,
|
| 87 |
tools=[self.web_search_tool, self.crawl_tool],
|
| 88 |
retries=3,
|
|
|
|
| 136 |
has_background=bool(report_plan.background_context),
|
| 137 |
)
|
| 138 |
|
| 139 |
+
return report_plan # type: ignore[no-any-return]
|
| 140 |
|
| 141 |
except Exception as e:
|
| 142 |
self.logger.error("Planning failed", error=str(e), query=query[:100])
|
src/services/llamaindex_rag.py
CHANGED
|
@@ -202,7 +202,7 @@ class LlamaIndexRAGService:
|
|
| 202 |
def _configure_llm(self, huggingface_llm: Any, openai_llm: Any) -> None:
|
| 203 |
"""Configure LLM for query synthesis."""
|
| 204 |
if huggingface_llm is not None and (settings.hf_token or settings.huggingface_api_key):
|
| 205 |
-
model_name = settings.huggingface_model or "
|
| 206 |
token = settings.hf_token or settings.huggingface_api_key
|
| 207 |
|
| 208 |
# Check if it's HuggingFaceInferenceAPI (API-based) or HuggingFaceLLM (local)
|
|
|
|
| 202 |
def _configure_llm(self, huggingface_llm: Any, openai_llm: Any) -> None:
|
| 203 |
"""Configure LLM for query synthesis."""
|
| 204 |
if huggingface_llm is not None and (settings.hf_token or settings.huggingface_api_key):
|
| 205 |
+
model_name = settings.huggingface_model or "Qwen/Qwen3-Next-80B-A3B-Thinking"
|
| 206 |
token = settings.hf_token or settings.huggingface_api_key
|
| 207 |
|
| 208 |
# Check if it's HuggingFaceInferenceAPI (API-based) or HuggingFaceLLM (local)
|
src/services/statistical_analyzer.py
CHANGED
|
@@ -71,9 +71,9 @@ class StatisticalAnalyzer:
|
|
| 71 |
"""Lazy initialization of LLM agent for code generation."""
|
| 72 |
if self._agent is None:
|
| 73 |
library_versions = get_sandbox_library_prompt()
|
| 74 |
-
self._agent = Agent(
|
| 75 |
model=get_model(),
|
| 76 |
-
|
| 77 |
system_prompt=f"""You are a biomedical data scientist.
|
| 78 |
|
| 79 |
Generate Python code to analyze research evidence and test hypotheses.
|
|
|
|
| 71 |
"""Lazy initialization of LLM agent for code generation."""
|
| 72 |
if self._agent is None:
|
| 73 |
library_versions = get_sandbox_library_prompt()
|
| 74 |
+
self._agent = Agent( # type: ignore[call-overload]
|
| 75 |
model=get_model(),
|
| 76 |
+
result_type=str,
|
| 77 |
system_prompt=f"""You are a biomedical data scientist.
|
| 78 |
|
| 79 |
Generate Python code to analyze research evidence and test hypotheses.
|
src/utils/config.py
CHANGED
|
@@ -41,8 +41,9 @@ class Settings(BaseSettings):
|
|
| 41 |
description="OpenAI embedding model (used by LlamaIndex RAG)",
|
| 42 |
)
|
| 43 |
local_embedding_model: str = Field(
|
| 44 |
-
default="
|
| 45 |
-
description="Local sentence-transformers model (used by EmbeddingService)"
|
|
|
|
| 46 |
)
|
| 47 |
embedding_provider: Literal["openai", "local", "huggingface"] = Field(
|
| 48 |
default="local",
|
|
@@ -58,8 +59,15 @@ class Settings(BaseSettings):
|
|
| 58 |
default=None, description="HuggingFace API token (HF_TOKEN or HUGGINGFACE_API_KEY)"
|
| 59 |
)
|
| 60 |
huggingface_model: str = Field(
|
| 61 |
-
default="
|
| 62 |
-
description="Default HuggingFace model ID for inference"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
)
|
| 64 |
|
| 65 |
# PubMed Configuration
|
|
|
|
| 41 |
description="OpenAI embedding model (used by LlamaIndex RAG)",
|
| 42 |
)
|
| 43 |
local_embedding_model: str = Field(
|
| 44 |
+
default="BAAI/bge-small-en-v1.5",
|
| 45 |
+
description="Local sentence-transformers model (used by EmbeddingService). "
|
| 46 |
+
"BAAI/bge-small-en-v1.5 is newer, faster, and better than all-MiniLM-L6-v2.",
|
| 47 |
)
|
| 48 |
embedding_provider: Literal["openai", "local", "huggingface"] = Field(
|
| 49 |
default="local",
|
|
|
|
| 59 |
default=None, description="HuggingFace API token (HF_TOKEN or HUGGINGFACE_API_KEY)"
|
| 60 |
)
|
| 61 |
huggingface_model: str = Field(
|
| 62 |
+
default="Qwen/Qwen3-Next-80B-A3B-Thinking",
|
| 63 |
+
description="Default HuggingFace model ID for inference (gated, requires auth). "
|
| 64 |
+
"Latest reasoning model with advanced thinking capabilities.",
|
| 65 |
+
)
|
| 66 |
+
huggingface_fallback_models: str = Field(
|
| 67 |
+
default="Qwen/Qwen3-Next-80B-A3B-Thinking,Qwen/Qwen3-Next-80B-A3B-Instruct,meta-llama/Llama-3.3-70B-Instruct,meta-llama/Llama-3.1-8B-Instruct,HuggingFaceH4/zephyr-7b-beta,Qwen/Qwen2-7B-Instruct",
|
| 68 |
+
description="Comma-separated list of fallback HuggingFace models for inference API. "
|
| 69 |
+
"Models are tried in order until one succeeds. "
|
| 70 |
+
"Default: Latest reasoning models (Qwen3-Next, Llama-3.3) followed by reliable fallbacks.",
|
| 71 |
)
|
| 72 |
|
| 73 |
# PubMed Configuration
|
src/utils/huggingface_chat_client.py
CHANGED
|
@@ -28,14 +28,14 @@ class HuggingFaceChatClient:
|
|
| 28 |
|
| 29 |
def __init__(
|
| 30 |
self,
|
| 31 |
-
model_name: str = "
|
| 32 |
api_key: str | None = None,
|
| 33 |
provider: str = "auto",
|
| 34 |
) -> None:
|
| 35 |
"""Initialize HuggingFace chat client.
|
| 36 |
|
| 37 |
Args:
|
| 38 |
-
model_name: HuggingFace model identifier (e.g., "
|
| 39 |
api_key: Optional HF_TOKEN for gated models. If None, uses environment token.
|
| 40 |
provider: Provider name or "auto" for automatic selection.
|
| 41 |
Options: "auto", "cerebras", "together", "sambanova", etc.
|
|
|
|
| 28 |
|
| 29 |
def __init__(
|
| 30 |
self,
|
| 31 |
+
model_name: str = "Qwen/Qwen3-Next-80B-A3B-Thinking",
|
| 32 |
api_key: str | None = None,
|
| 33 |
provider: str = "auto",
|
| 34 |
) -> None:
|
| 35 |
"""Initialize HuggingFace chat client.
|
| 36 |
|
| 37 |
Args:
|
| 38 |
+
model_name: HuggingFace model identifier (e.g., "Qwen/Qwen3-Next-80B-A3B-Thinking")
|
| 39 |
api_key: Optional HF_TOKEN for gated models. If None, uses environment token.
|
| 40 |
provider: Provider name or "auto" for automatic selection.
|
| 41 |
Options: "auto", "cerebras", "together", "sambanova", etc.
|
src/utils/inference_models.py
ADDED
|
@@ -0,0 +1,627 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Configuration for HuggingFace Inference Providers models.
|
| 2 |
+
|
| 3 |
+
Based on: https://huggingface.co/inference/models
|
| 4 |
+
|
| 5 |
+
This module provides model and provider configurations with verification
|
| 6 |
+
capabilities to ensure models are actually available on selected providers.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from typing import TypedDict
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class ModelProvider(TypedDict):
|
| 13 |
+
"""Provider information for a model."""
|
| 14 |
+
|
| 15 |
+
name: str
|
| 16 |
+
input_cost: float | None # $/1M tokens
|
| 17 |
+
output_cost: float | None # $/1M tokens
|
| 18 |
+
latency: float | None # seconds
|
| 19 |
+
throughput: float | None # tokens/second
|
| 20 |
+
supports_tools: bool
|
| 21 |
+
supports_structured: bool
|
| 22 |
+
requires_auth: bool # Whether this provider requires authentication
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class InferenceModel(TypedDict):
|
| 26 |
+
"""Model configuration with available providers."""
|
| 27 |
+
|
| 28 |
+
model_id: str
|
| 29 |
+
display_name: str
|
| 30 |
+
providers: dict[str, ModelProvider]
|
| 31 |
+
requires_auth: bool # Whether the model itself requires authentication (gated)
|
| 32 |
+
description: str
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
# Latest Reasoning Models from https://huggingface.co/inference/models
|
| 36 |
+
# Updated with latest reasoning models (Qwen3-Next, Qwen3-235B, Llama-3.3, etc.)
|
| 37 |
+
INFERENCE_MODELS: dict[str, InferenceModel] = {
|
| 38 |
+
# Top-tier reasoning models (latest)
|
| 39 |
+
"Qwen/Qwen3-Next-80B-A3B-Thinking": {
|
| 40 |
+
"model_id": "Qwen/Qwen3-Next-80B-A3B-Thinking",
|
| 41 |
+
"display_name": "Qwen3-Next-80B-A3B-Thinking",
|
| 42 |
+
"requires_auth": True, # Gated
|
| 43 |
+
"description": "Qwen's latest reasoning model - Advanced thinking capabilities, 262K context",
|
| 44 |
+
"providers": {
|
| 45 |
+
"together": {
|
| 46 |
+
"name": "together",
|
| 47 |
+
"input_cost": 0.15,
|
| 48 |
+
"output_cost": 1.5,
|
| 49 |
+
"latency": 0.48,
|
| 50 |
+
"throughput": 202.0,
|
| 51 |
+
"supports_tools": True,
|
| 52 |
+
"supports_structured": True,
|
| 53 |
+
"requires_auth": True,
|
| 54 |
+
},
|
| 55 |
+
"together-fastest": {
|
| 56 |
+
"name": "together-fastest",
|
| 57 |
+
"input_cost": 0.15,
|
| 58 |
+
"output_cost": 1.5,
|
| 59 |
+
"latency": 0.48,
|
| 60 |
+
"throughput": 202.0,
|
| 61 |
+
"supports_tools": True,
|
| 62 |
+
"supports_structured": True,
|
| 63 |
+
"requires_auth": True,
|
| 64 |
+
},
|
| 65 |
+
},
|
| 66 |
+
},
|
| 67 |
+
"Qwen/Qwen3-Next-80B-A3B-Instruct": {
|
| 68 |
+
"model_id": "Qwen/Qwen3-Next-80B-A3B-Instruct",
|
| 69 |
+
"display_name": "Qwen3-Next-80B-A3B-Instruct",
|
| 70 |
+
"requires_auth": True, # Gated
|
| 71 |
+
"description": "Qwen's latest instruction model - High performance, 262K context",
|
| 72 |
+
"providers": {
|
| 73 |
+
"together": {
|
| 74 |
+
"name": "together",
|
| 75 |
+
"input_cost": 0.15,
|
| 76 |
+
"output_cost": 1.5,
|
| 77 |
+
"latency": 0.60,
|
| 78 |
+
"throughput": 153.0,
|
| 79 |
+
"supports_tools": True,
|
| 80 |
+
"supports_structured": True,
|
| 81 |
+
"requires_auth": True,
|
| 82 |
+
},
|
| 83 |
+
"together-fastest": {
|
| 84 |
+
"name": "together-fastest",
|
| 85 |
+
"input_cost": 0.15,
|
| 86 |
+
"output_cost": 1.5,
|
| 87 |
+
"latency": 0.60,
|
| 88 |
+
"throughput": 153.0,
|
| 89 |
+
"supports_tools": True,
|
| 90 |
+
"supports_structured": True,
|
| 91 |
+
"requires_auth": True,
|
| 92 |
+
},
|
| 93 |
+
},
|
| 94 |
+
},
|
| 95 |
+
"Qwen/Qwen3-235B-A22B-Instruct-2507": {
|
| 96 |
+
"model_id": "Qwen/Qwen3-235B-A22B-Instruct-2507",
|
| 97 |
+
"display_name": "Qwen3-235B-A22B-Instruct",
|
| 98 |
+
"requires_auth": True, # Gated
|
| 99 |
+
"description": "Qwen's massive 235B model - Ultra-high performance, 262K context",
|
| 100 |
+
"providers": {
|
| 101 |
+
"cerebras": {
|
| 102 |
+
"name": "cerebras",
|
| 103 |
+
"input_cost": 0.6,
|
| 104 |
+
"output_cost": 1.2,
|
| 105 |
+
"latency": 0.23,
|
| 106 |
+
"throughput": 509.0,
|
| 107 |
+
"supports_tools": True,
|
| 108 |
+
"supports_structured": False,
|
| 109 |
+
"requires_auth": True,
|
| 110 |
+
},
|
| 111 |
+
"cerebras-fastest": {
|
| 112 |
+
"name": "cerebras-fastest",
|
| 113 |
+
"input_cost": 0.6,
|
| 114 |
+
"output_cost": 1.2,
|
| 115 |
+
"latency": 0.23,
|
| 116 |
+
"throughput": 509.0,
|
| 117 |
+
"supports_tools": True,
|
| 118 |
+
"supports_structured": False,
|
| 119 |
+
"requires_auth": True,
|
| 120 |
+
},
|
| 121 |
+
"together": {
|
| 122 |
+
"name": "together",
|
| 123 |
+
"input_cost": 0.2,
|
| 124 |
+
"output_cost": 0.6,
|
| 125 |
+
"latency": 0.39,
|
| 126 |
+
"throughput": 42.0,
|
| 127 |
+
"supports_tools": True,
|
| 128 |
+
"supports_structured": True,
|
| 129 |
+
"requires_auth": True,
|
| 130 |
+
},
|
| 131 |
+
},
|
| 132 |
+
},
|
| 133 |
+
"Qwen/Qwen3-235B-A22B-Thinking-2507": {
|
| 134 |
+
"model_id": "Qwen/Qwen3-235B-A22B-Thinking-2507",
|
| 135 |
+
"display_name": "Qwen3-235B-A22B-Thinking",
|
| 136 |
+
"requires_auth": True, # Gated
|
| 137 |
+
"description": "Qwen's massive 235B reasoning model - Advanced thinking, 262K context",
|
| 138 |
+
"providers": {
|
| 139 |
+
"cerebras": {
|
| 140 |
+
"name": "cerebras",
|
| 141 |
+
"input_cost": None,
|
| 142 |
+
"output_cost": None,
|
| 143 |
+
"latency": None,
|
| 144 |
+
"throughput": None,
|
| 145 |
+
"supports_tools": False,
|
| 146 |
+
"supports_structured": False,
|
| 147 |
+
"requires_auth": True,
|
| 148 |
+
},
|
| 149 |
+
},
|
| 150 |
+
},
|
| 151 |
+
"meta-llama/Llama-3.3-70B-Instruct": {
|
| 152 |
+
"model_id": "meta-llama/Llama-3.3-70B-Instruct",
|
| 153 |
+
"display_name": "Llama 3.3 70B Instruct",
|
| 154 |
+
"requires_auth": True, # Gated
|
| 155 |
+
"description": "Meta's latest Llama 3.3 - High performance, tools support",
|
| 156 |
+
"providers": {
|
| 157 |
+
"cerebras": {
|
| 158 |
+
"name": "cerebras",
|
| 159 |
+
"input_cost": 0.85,
|
| 160 |
+
"output_cost": 1.2,
|
| 161 |
+
"latency": 0.35,
|
| 162 |
+
"throughput": 948.0,
|
| 163 |
+
"supports_tools": True,
|
| 164 |
+
"supports_structured": False,
|
| 165 |
+
"requires_auth": True,
|
| 166 |
+
},
|
| 167 |
+
"cerebras-fastest": {
|
| 168 |
+
"name": "cerebras-fastest",
|
| 169 |
+
"input_cost": 0.85,
|
| 170 |
+
"output_cost": 1.2,
|
| 171 |
+
"latency": 0.35,
|
| 172 |
+
"throughput": 948.0,
|
| 173 |
+
"supports_tools": True,
|
| 174 |
+
"supports_structured": False,
|
| 175 |
+
"requires_auth": True,
|
| 176 |
+
},
|
| 177 |
+
},
|
| 178 |
+
},
|
| 179 |
+
"openai/gpt-oss-120b": {
|
| 180 |
+
"model_id": "openai/gpt-oss-120b",
|
| 181 |
+
"display_name": "GPT-OSS-120B",
|
| 182 |
+
"requires_auth": True, # Gated
|
| 183 |
+
"description": "OpenAI's open-source 120B model - Ultra-fast inference",
|
| 184 |
+
"providers": {
|
| 185 |
+
"cerebras": {
|
| 186 |
+
"name": "cerebras",
|
| 187 |
+
"input_cost": 0.25,
|
| 188 |
+
"output_cost": 0.69,
|
| 189 |
+
"latency": 0.23,
|
| 190 |
+
"throughput": 1051.0,
|
| 191 |
+
"supports_tools": True,
|
| 192 |
+
"supports_structured": False,
|
| 193 |
+
"requires_auth": True,
|
| 194 |
+
},
|
| 195 |
+
"cerebras-fastest": {
|
| 196 |
+
"name": "cerebras-fastest",
|
| 197 |
+
"input_cost": 0.25,
|
| 198 |
+
"output_cost": 0.69,
|
| 199 |
+
"latency": 0.23,
|
| 200 |
+
"throughput": 1051.0,
|
| 201 |
+
"supports_tools": True,
|
| 202 |
+
"supports_structured": False,
|
| 203 |
+
"requires_auth": True,
|
| 204 |
+
},
|
| 205 |
+
},
|
| 206 |
+
},
|
| 207 |
+
"CohereLabs/command-a-reasoning-08-2025": {
|
| 208 |
+
"model_id": "CohereLabs/command-a-reasoning-08-2025",
|
| 209 |
+
"display_name": "Command A Reasoning 08-2025",
|
| 210 |
+
"requires_auth": True, # Gated
|
| 211 |
+
"description": "Cohere's latest reasoning model - Specialized for reasoning tasks",
|
| 212 |
+
"providers": {
|
| 213 |
+
"cohere": {
|
| 214 |
+
"name": "cohere",
|
| 215 |
+
"input_cost": None,
|
| 216 |
+
"output_cost": None,
|
| 217 |
+
"latency": 0.18,
|
| 218 |
+
"throughput": 94.0,
|
| 219 |
+
"supports_tools": True,
|
| 220 |
+
"supports_structured": False,
|
| 221 |
+
"requires_auth": True,
|
| 222 |
+
},
|
| 223 |
+
},
|
| 224 |
+
},
|
| 225 |
+
"zai-org/GLM-4.6": {
|
| 226 |
+
"model_id": "zai-org/GLM-4.6",
|
| 227 |
+
"display_name": "GLM-4.6",
|
| 228 |
+
"requires_auth": True, # Gated
|
| 229 |
+
"description": "ZAI's GLM-4.6 - High performance reasoning model",
|
| 230 |
+
"providers": {
|
| 231 |
+
"cerebras": {
|
| 232 |
+
"name": "cerebras",
|
| 233 |
+
"input_cost": None,
|
| 234 |
+
"output_cost": None,
|
| 235 |
+
"latency": 0.27,
|
| 236 |
+
"throughput": 381.0,
|
| 237 |
+
"supports_tools": True,
|
| 238 |
+
"supports_structured": False,
|
| 239 |
+
"requires_auth": True,
|
| 240 |
+
},
|
| 241 |
+
"cerebras-fastest": {
|
| 242 |
+
"name": "cerebras-fastest",
|
| 243 |
+
"input_cost": None,
|
| 244 |
+
"output_cost": None,
|
| 245 |
+
"latency": 0.27,
|
| 246 |
+
"throughput": 381.0,
|
| 247 |
+
"supports_tools": True,
|
| 248 |
+
"supports_structured": False,
|
| 249 |
+
"requires_auth": True,
|
| 250 |
+
},
|
| 251 |
+
"zai-org": {
|
| 252 |
+
"name": "zai-org",
|
| 253 |
+
"input_cost": None,
|
| 254 |
+
"output_cost": None,
|
| 255 |
+
"latency": 3.08,
|
| 256 |
+
"throughput": 54.0,
|
| 257 |
+
"supports_tools": True,
|
| 258 |
+
"supports_structured": False,
|
| 259 |
+
"requires_auth": True,
|
| 260 |
+
},
|
| 261 |
+
},
|
| 262 |
+
},
|
| 263 |
+
"meta-llama/Llama-3.1-8B-Instruct": {
|
| 264 |
+
"model_id": "meta-llama/Llama-3.1-8B-Instruct",
|
| 265 |
+
"display_name": "Llama 3.1 8B Instruct",
|
| 266 |
+
"requires_auth": True, # Gated
|
| 267 |
+
"description": "Meta's Llama 3.1 8B - Fast, efficient reasoning",
|
| 268 |
+
"providers": {
|
| 269 |
+
"novita": {
|
| 270 |
+
"name": "novita",
|
| 271 |
+
"input_cost": 0.02,
|
| 272 |
+
"output_cost": 0.05,
|
| 273 |
+
"latency": 0.64,
|
| 274 |
+
"throughput": 84.0,
|
| 275 |
+
"supports_tools": False,
|
| 276 |
+
"supports_structured": False,
|
| 277 |
+
"requires_auth": True,
|
| 278 |
+
},
|
| 279 |
+
"nebius": {
|
| 280 |
+
"name": "nebius",
|
| 281 |
+
"input_cost": 0.03,
|
| 282 |
+
"output_cost": 0.09,
|
| 283 |
+
"latency": 0.35,
|
| 284 |
+
"throughput": 194.0,
|
| 285 |
+
"supports_tools": False,
|
| 286 |
+
"supports_structured": True,
|
| 287 |
+
"requires_auth": True,
|
| 288 |
+
},
|
| 289 |
+
"cerebras": {
|
| 290 |
+
"name": "cerebras",
|
| 291 |
+
"input_cost": 0.1,
|
| 292 |
+
"output_cost": 0.1,
|
| 293 |
+
"latency": 0.33,
|
| 294 |
+
"throughput": 1148.0,
|
| 295 |
+
"supports_tools": False,
|
| 296 |
+
"supports_structured": False,
|
| 297 |
+
"requires_auth": True,
|
| 298 |
+
},
|
| 299 |
+
"sambanova": {
|
| 300 |
+
"name": "sambanova",
|
| 301 |
+
"input_cost": 0.1,
|
| 302 |
+
"output_cost": 0.2,
|
| 303 |
+
"latency": 0.85,
|
| 304 |
+
"throughput": 527.0,
|
| 305 |
+
"supports_tools": True,
|
| 306 |
+
"supports_structured": True,
|
| 307 |
+
"requires_auth": True,
|
| 308 |
+
},
|
| 309 |
+
},
|
| 310 |
+
},
|
| 311 |
+
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B": {
|
| 312 |
+
"model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
|
| 313 |
+
"display_name": "DeepSeek R1 Distill Llama 70B",
|
| 314 |
+
"requires_auth": True, # Gated
|
| 315 |
+
"description": "DeepSeek's reasoning model - Advanced chain-of-thought",
|
| 316 |
+
"providers": {
|
| 317 |
+
"novita": {
|
| 318 |
+
"name": "novita",
|
| 319 |
+
"input_cost": 0.64,
|
| 320 |
+
"output_cost": 0.64,
|
| 321 |
+
"latency": 1.21,
|
| 322 |
+
"throughput": 31.0,
|
| 323 |
+
"supports_tools": False,
|
| 324 |
+
"supports_structured": False,
|
| 325 |
+
"requires_auth": True,
|
| 326 |
+
},
|
| 327 |
+
"sambanova": {
|
| 328 |
+
"name": "sambanova",
|
| 329 |
+
"input_cost": 0.7,
|
| 330 |
+
"output_cost": 1.4,
|
| 331 |
+
"latency": 2.67,
|
| 332 |
+
"throughput": 158.0,
|
| 333 |
+
"supports_tools": False,
|
| 334 |
+
"supports_structured": False,
|
| 335 |
+
"requires_auth": True,
|
| 336 |
+
},
|
| 337 |
+
"nscale": {
|
| 338 |
+
"name": "nscale",
|
| 339 |
+
"input_cost": 0.75,
|
| 340 |
+
"output_cost": 0.75,
|
| 341 |
+
"latency": 1.24,
|
| 342 |
+
"throughput": 16.0,
|
| 343 |
+
"supports_tools": False,
|
| 344 |
+
"supports_structured": False,
|
| 345 |
+
"requires_auth": True,
|
| 346 |
+
},
|
| 347 |
+
},
|
| 348 |
+
},
|
| 349 |
+
"moonshotai/Kimi-K2-Thinking": {
|
| 350 |
+
"model_id": "moonshotai/Kimi-K2-Thinking",
|
| 351 |
+
"display_name": "Kimi K2 Thinking",
|
| 352 |
+
"requires_auth": True, # Gated
|
| 353 |
+
"description": "Moonshot AI's thinking model - Long context reasoning",
|
| 354 |
+
"providers": {
|
| 355 |
+
"novita": {
|
| 356 |
+
"name": "novita",
|
| 357 |
+
"input_cost": 0.48,
|
| 358 |
+
"output_cost": 2.0,
|
| 359 |
+
"latency": 1.60,
|
| 360 |
+
"throughput": 16.0,
|
| 361 |
+
"supports_tools": True,
|
| 362 |
+
"supports_structured": False,
|
| 363 |
+
"requires_auth": True,
|
| 364 |
+
},
|
| 365 |
+
"nebius": {
|
| 366 |
+
"name": "nebius",
|
| 367 |
+
"input_cost": 0.6,
|
| 368 |
+
"output_cost": 2.5,
|
| 369 |
+
"latency": 0.34,
|
| 370 |
+
"throughput": 87.0,
|
| 371 |
+
"supports_tools": True,
|
| 372 |
+
"supports_structured": True,
|
| 373 |
+
"requires_auth": True,
|
| 374 |
+
},
|
| 375 |
+
"together": {
|
| 376 |
+
"name": "together",
|
| 377 |
+
"input_cost": 1.2,
|
| 378 |
+
"output_cost": 4.0,
|
| 379 |
+
"latency": 0.86,
|
| 380 |
+
"throughput": 97.0,
|
| 381 |
+
"supports_tools": True,
|
| 382 |
+
"supports_structured": True,
|
| 383 |
+
"requires_auth": True,
|
| 384 |
+
},
|
| 385 |
+
},
|
| 386 |
+
},
|
| 387 |
+
"allenai/Olmo-3-7B-Instruct": {
|
| 388 |
+
"model_id": "allenai/Olmo-3-7B-Instruct",
|
| 389 |
+
"display_name": "Olmo 3 7B Instruct",
|
| 390 |
+
"requires_auth": False, # Ungated
|
| 391 |
+
"description": "AllenAI's open model - Good reasoning, no auth needed",
|
| 392 |
+
"providers": {
|
| 393 |
+
"publicai": {
|
| 394 |
+
"name": "publicai",
|
| 395 |
+
"input_cost": None,
|
| 396 |
+
"output_cost": None,
|
| 397 |
+
"latency": 1.78,
|
| 398 |
+
"throughput": 36.0,
|
| 399 |
+
"supports_tools": True,
|
| 400 |
+
"supports_structured": True,
|
| 401 |
+
"requires_auth": False,
|
| 402 |
+
},
|
| 403 |
+
},
|
| 404 |
+
},
|
| 405 |
+
"Qwen/Qwen2-7B-Instruct": {
|
| 406 |
+
"model_id": "Qwen/Qwen2-7B-Instruct",
|
| 407 |
+
"display_name": "Qwen2 7B Instruct",
|
| 408 |
+
"requires_auth": False, # Ungated
|
| 409 |
+
"description": "Qwen's efficient model - Fast, no authentication",
|
| 410 |
+
"providers": {
|
| 411 |
+
"featherless-ai": {
|
| 412 |
+
"name": "featherless-ai",
|
| 413 |
+
"input_cost": None,
|
| 414 |
+
"output_cost": None,
|
| 415 |
+
"latency": None,
|
| 416 |
+
"throughput": None,
|
| 417 |
+
"supports_tools": False,
|
| 418 |
+
"supports_structured": False,
|
| 419 |
+
"requires_auth": False,
|
| 420 |
+
},
|
| 421 |
+
},
|
| 422 |
+
},
|
| 423 |
+
"HuggingFaceH4/zephyr-7b-beta": {
|
| 424 |
+
"model_id": "HuggingFaceH4/zephyr-7b-beta",
|
| 425 |
+
"display_name": "Zephyr 7B Beta",
|
| 426 |
+
"requires_auth": False, # Ungated
|
| 427 |
+
"description": "HuggingFace's fine-tuned model - Free tier friendly",
|
| 428 |
+
"providers": {
|
| 429 |
+
"featherless-ai": {
|
| 430 |
+
"name": "featherless-ai",
|
| 431 |
+
"input_cost": None,
|
| 432 |
+
"output_cost": None,
|
| 433 |
+
"latency": None,
|
| 434 |
+
"throughput": None,
|
| 435 |
+
"supports_tools": False,
|
| 436 |
+
"supports_structured": False,
|
| 437 |
+
"requires_auth": False,
|
| 438 |
+
},
|
| 439 |
+
},
|
| 440 |
+
},
|
| 441 |
+
"google/gemma-2-2b-it": {
|
| 442 |
+
"model_id": "google/gemma-2-2b-it",
|
| 443 |
+
"display_name": "Gemma 2 2B IT",
|
| 444 |
+
"requires_auth": True, # Gated
|
| 445 |
+
"description": "Google's compact model - Small but capable",
|
| 446 |
+
"providers": {
|
| 447 |
+
"nebius": {
|
| 448 |
+
"name": "nebius",
|
| 449 |
+
"input_cost": None,
|
| 450 |
+
"output_cost": None,
|
| 451 |
+
"latency": None,
|
| 452 |
+
"throughput": None,
|
| 453 |
+
"supports_tools": False,
|
| 454 |
+
"supports_structured": False,
|
| 455 |
+
"requires_auth": True,
|
| 456 |
+
},
|
| 457 |
+
},
|
| 458 |
+
},
|
| 459 |
+
"microsoft/Phi-3-mini-4k-instruct": {
|
| 460 |
+
"model_id": "microsoft/Phi-3-mini-4k-instruct",
|
| 461 |
+
"display_name": "Phi-3 Mini 4K Instruct",
|
| 462 |
+
"requires_auth": False, # Ungated
|
| 463 |
+
"description": "Microsoft's efficient model - Fast inference",
|
| 464 |
+
"providers": {
|
| 465 |
+
"featherless-ai": {
|
| 466 |
+
"name": "featherless-ai",
|
| 467 |
+
"input_cost": None,
|
| 468 |
+
"output_cost": None,
|
| 469 |
+
"latency": None,
|
| 470 |
+
"throughput": None,
|
| 471 |
+
"supports_tools": False,
|
| 472 |
+
"supports_structured": False,
|
| 473 |
+
"requires_auth": False,
|
| 474 |
+
},
|
| 475 |
+
},
|
| 476 |
+
},
|
| 477 |
+
}
|
| 478 |
+
|
| 479 |
+
|
| 480 |
+
def get_available_models(has_auth: bool = False) -> list[tuple[str, str]]:
|
| 481 |
+
"""
|
| 482 |
+
Get list of available models based on authentication status.
|
| 483 |
+
|
| 484 |
+
Args:
|
| 485 |
+
has_auth: Whether user has authentication (OAuth or HF_TOKEN)
|
| 486 |
+
|
| 487 |
+
Returns:
|
| 488 |
+
List of (model_id, display_name) tuples for dropdown
|
| 489 |
+
"""
|
| 490 |
+
models = []
|
| 491 |
+
for model_id, model_info in INFERENCE_MODELS.items():
|
| 492 |
+
# If no auth, only show ungated models
|
| 493 |
+
if not has_auth and model_info["requires_auth"]:
|
| 494 |
+
continue
|
| 495 |
+
models.append((model_id, model_info["display_name"]))
|
| 496 |
+
return models
|
| 497 |
+
|
| 498 |
+
|
| 499 |
+
def get_available_providers(model_id: str, has_auth: bool = False) -> list[tuple[str, str]]:
|
| 500 |
+
"""
|
| 501 |
+
Get list of available providers for a model based on authentication.
|
| 502 |
+
|
| 503 |
+
This is a convenience wrapper around get_available_providers_verified
|
| 504 |
+
that doesn't perform async verification.
|
| 505 |
+
|
| 506 |
+
Args:
|
| 507 |
+
model_id: The model ID
|
| 508 |
+
has_auth: Whether user has authentication
|
| 509 |
+
|
| 510 |
+
Returns:
|
| 511 |
+
List of (provider_name, display_name) tuples for dropdown
|
| 512 |
+
"""
|
| 513 |
+
return get_available_providers_verified(model_id, has_auth=has_auth, verify=False)
|
| 514 |
+
|
| 515 |
+
|
| 516 |
+
def get_model_info(model_id: str) -> InferenceModel | None:
|
| 517 |
+
"""Get model information."""
|
| 518 |
+
return INFERENCE_MODELS.get(model_id)
|
| 519 |
+
|
| 520 |
+
|
| 521 |
+
def get_provider_info(model_id: str, provider_name: str) -> ModelProvider | None:
|
| 522 |
+
"""Get provider information for a model."""
|
| 523 |
+
model = INFERENCE_MODELS.get(model_id)
|
| 524 |
+
if not model:
|
| 525 |
+
return None
|
| 526 |
+
return model["providers"].get(provider_name)
|
| 527 |
+
|
| 528 |
+
|
| 529 |
+
def verify_provider_availability(
|
| 530 |
+
model_id: str,
|
| 531 |
+
provider_name: str,
|
| 532 |
+
) -> bool:
|
| 533 |
+
"""
|
| 534 |
+
Verify that a model is available on the specified provider (static check).
|
| 535 |
+
|
| 536 |
+
This function checks the static configuration to see if a provider
|
| 537 |
+
is listed for the model. For dynamic verification via API calls,
|
| 538 |
+
use verify_provider_availability_async().
|
| 539 |
+
|
| 540 |
+
Args:
|
| 541 |
+
model_id: The model ID to verify
|
| 542 |
+
provider_name: The provider name to verify
|
| 543 |
+
|
| 544 |
+
Returns:
|
| 545 |
+
True if the model is configured for the provider, False otherwise
|
| 546 |
+
"""
|
| 547 |
+
model_config = INFERENCE_MODELS.get(model_id)
|
| 548 |
+
if not model_config:
|
| 549 |
+
return False
|
| 550 |
+
providers = model_config.get("providers", {})
|
| 551 |
+
return provider_name in providers
|
| 552 |
+
|
| 553 |
+
|
| 554 |
+
async def verify_provider_availability_async(
|
| 555 |
+
model_id: str,
|
| 556 |
+
provider_name: str,
|
| 557 |
+
api_key: str | None = None,
|
| 558 |
+
) -> bool:
|
| 559 |
+
"""
|
| 560 |
+
Verify that a model is actually available on the specified provider via API.
|
| 561 |
+
|
| 562 |
+
This function attempts to check if the model/provider combination is valid
|
| 563 |
+
by making a lightweight API call to the HuggingFace Inference API.
|
| 564 |
+
|
| 565 |
+
Note: This is an async function and should be called from an async context.
|
| 566 |
+
For synchronous checks, use verify_provider_availability().
|
| 567 |
+
|
| 568 |
+
Args:
|
| 569 |
+
model_id: The model ID to verify
|
| 570 |
+
provider_name: The provider name to verify
|
| 571 |
+
api_key: Optional API key for authentication (uses env vars if not provided)
|
| 572 |
+
|
| 573 |
+
Returns:
|
| 574 |
+
True if the model is available on the provider, False otherwise
|
| 575 |
+
"""
|
| 576 |
+
# For now, fall back to static check
|
| 577 |
+
# TODO: Implement actual API verification when needed
|
| 578 |
+
return verify_provider_availability(model_id, provider_name)
|
| 579 |
+
|
| 580 |
+
|
| 581 |
+
def get_available_providers_verified(
|
| 582 |
+
model_id: str,
|
| 583 |
+
has_auth: bool = False,
|
| 584 |
+
api_key: str | None = None,
|
| 585 |
+
verify: bool = False,
|
| 586 |
+
) -> list[tuple[str, str]]:
|
| 587 |
+
"""
|
| 588 |
+
Get list of available providers for a model with optional verification.
|
| 589 |
+
|
| 590 |
+
Args:
|
| 591 |
+
model_id: The model ID
|
| 592 |
+
has_auth: Whether user has authentication
|
| 593 |
+
api_key: Optional API key for verification
|
| 594 |
+
verify: Whether to verify provider availability (async, requires api_key)
|
| 595 |
+
|
| 596 |
+
Returns:
|
| 597 |
+
List of (provider_name, display_name) tuples for dropdown
|
| 598 |
+
"""
|
| 599 |
+
if model_id not in INFERENCE_MODELS:
|
| 600 |
+
return []
|
| 601 |
+
|
| 602 |
+
model = INFERENCE_MODELS[model_id]
|
| 603 |
+
providers = []
|
| 604 |
+
|
| 605 |
+
for provider_name, provider_info in model["providers"].items():
|
| 606 |
+
# If no auth, only show providers that don't require auth
|
| 607 |
+
if not has_auth and provider_info["requires_auth"]:
|
| 608 |
+
continue
|
| 609 |
+
|
| 610 |
+
# Create display name with cost/latency info
|
| 611 |
+
display_parts = [provider_name]
|
| 612 |
+
if provider_info["latency"]:
|
| 613 |
+
display_parts.append(f"{provider_info['latency']:.2f}s")
|
| 614 |
+
if provider_info["input_cost"]:
|
| 615 |
+
display_parts.append(f"${provider_info['input_cost']}/1M")
|
| 616 |
+
if provider_info["supports_tools"]:
|
| 617 |
+
display_parts.append("π§")
|
| 618 |
+
if provider_info["supports_structured"]:
|
| 619 |
+
display_parts.append("π")
|
| 620 |
+
display_name = " | ".join(display_parts)
|
| 621 |
+
|
| 622 |
+
providers.append((provider_name, display_name))
|
| 623 |
+
|
| 624 |
+
# Note: If verify=True, this should be called from an async context
|
| 625 |
+
# For now, we return static providers. Async verification can be done separately.
|
| 626 |
+
|
| 627 |
+
return providers
|