Spaces:
Running
Running
Joseph Pollack
commited on
fix interface
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .env copy.example +0 -124
- .env.example +93 -17
- .github/README.md +0 -25
- .github/workflows/ci.yml +7 -7
- .pre-commit-config.yaml +4 -4
- docs/api/agents.md +2 -0
- docs/api/models.md +2 -0
- docs/api/orchestrators.md +2 -0
- docs/api/services.md +2 -0
- docs/api/tools.md +2 -0
- docs/architecture/agents.md +2 -0
- docs/architecture/middleware.md +2 -0
- docs/architecture/services.md +2 -0
- docs/architecture/tools.md +2 -0
- docs/contributing/code-quality.md +2 -0
- docs/contributing/code-style.md +2 -0
- docs/contributing/error-handling.md +2 -0
- docs/contributing/implementation-patterns.md +2 -0
- docs/contributing/index.md +2 -0
- docs/contributing/prompt-engineering.md +2 -0
- docs/contributing/testing.md +2 -0
- docs/getting-started/examples.md +2 -0
- docs/getting-started/installation.md +2 -0
- docs/getting-started/mcp-integration.md +2 -0
- docs/getting-started/quick-start.md +2 -0
- docs/license.md +2 -0
- docs/overview/architecture.md +2 -0
- docs/overview/features.md +2 -0
- docs/team.md +2 -0
- pyproject.toml +1 -0
- requirements.txt +31 -18
- src/agent_factory/judges.py +15 -2
- src/agents/hypothesis_agent.py +1 -1
- src/agents/input_parser.py +1 -1
- src/agents/judge_agent_llm.py +2 -2
- src/agents/knowledge_gap.py +1 -1
- src/agents/long_writer.py +1 -1
- src/agents/proofreader.py +2 -2
- src/agents/report_agent.py +1 -1
- src/agents/thinking.py +2 -2
- src/agents/tool_selector.py +1 -1
- src/agents/writer.py +2 -2
- src/app.py +44 -12
- src/orchestrator/planner_agent.py +1 -1
- src/services/llamaindex_rag.py +2 -1
- src/services/statistical_analyzer.py +1 -1
- tests/integration/test_rag_integration.py +5 -2
- tests/unit/agent_factory/test_judges.py +3 -1
- tests/unit/agents/test_hypothesis_agent.py +13 -12
- tests/unit/agents/test_input_parser.py +36 -6
.env copy.example
DELETED
|
@@ -1,124 +0,0 @@
|
|
| 1 |
-
# ============== LLM CONFIGURATION ==============
|
| 2 |
-
|
| 3 |
-
# Provider: "openai", "anthropic", or "huggingface"
|
| 4 |
-
LLM_PROVIDER=openai
|
| 5 |
-
|
| 6 |
-
# API Keys (at least one required for full LLM analysis)
|
| 7 |
-
OPENAI_API_KEY=sk-your-key-here
|
| 8 |
-
ANTHROPIC_API_KEY=sk-ant-your-key-here
|
| 9 |
-
|
| 10 |
-
# Model names (optional - sensible defaults set in config.py)
|
| 11 |
-
# OPENAI_MODEL=gpt-5.1
|
| 12 |
-
# ANTHROPIC_MODEL=claude-sonnet-4-5-20250929
|
| 13 |
-
|
| 14 |
-
# ============== HUGGINGFACE CONFIGURATION ==============
|
| 15 |
-
|
| 16 |
-
# HuggingFace Token - enables gated models and higher rate limits
|
| 17 |
-
# Get yours at: https://huggingface.co/settings/tokens
|
| 18 |
-
#
|
| 19 |
-
# WITHOUT HF_TOKEN: Falls back to ungated models (zephyr-7b-beta, Qwen2-7B)
|
| 20 |
-
# WITH HF_TOKEN: Uses gated models (Llama 3.1, Gemma-2) via inference providers
|
| 21 |
-
#
|
| 22 |
-
# For HuggingFace Spaces deployment:
|
| 23 |
-
# Set this as a "Secret" in Space Settings -> Variables and secrets
|
| 24 |
-
# Users/judges don't need their own token - the Space secret is used
|
| 25 |
-
#
|
| 26 |
-
HF_TOKEN=hf_your-token-here
|
| 27 |
-
# Alternative: HUGGINGFACE_API_KEY (same as HF_TOKEN)
|
| 28 |
-
|
| 29 |
-
# Default HuggingFace model for inference (gated, requires auth)
|
| 30 |
-
# Can be overridden in UI dropdown
|
| 31 |
-
# Latest reasoning models: Qwen3-Next-80B-A3B-Thinking, Qwen3-Next-80B-A3B-Instruct, Llama-3.3-70B-Instruct
|
| 32 |
-
HUGGINGFACE_MODEL=Qwen/Qwen3-Next-80B-A3B-Thinking
|
| 33 |
-
|
| 34 |
-
# Fallback models for HuggingFace Inference API (comma-separated)
|
| 35 |
-
# Models are tried in order until one succeeds
|
| 36 |
-
# Format: model1,model2,model3
|
| 37 |
-
# Latest reasoning models first, then reliable fallbacks
|
| 38 |
-
# Reasoning models: Qwen3-Next (thinking/instruct), Llama-3.3-70B, Qwen3-235B
|
| 39 |
-
# Fallbacks: Llama-3.1-8B, Zephyr-7B (ungated), Qwen2-7B (ungated)
|
| 40 |
-
HF_FALLBACK_MODELS=Qwen/Qwen3-Next-80B-A3B-Thinking,Qwen/Qwen3-Next-80B-A3B-Instruct,meta-llama/Llama-3.3-70B-Instruct,meta-llama/Llama-3.1-8B-Instruct,HuggingFaceH4/zephyr-7b-beta,Qwen/Qwen2-7B-Instruct
|
| 41 |
-
|
| 42 |
-
# Override model/provider selection (optional, usually set via UI)
|
| 43 |
-
# HF_MODEL=Qwen/Qwen3-Next-80B-A3B-Thinking
|
| 44 |
-
# HF_PROVIDER=hyperbolic
|
| 45 |
-
|
| 46 |
-
# ============== EMBEDDING CONFIGURATION ==============
|
| 47 |
-
|
| 48 |
-
# Embedding Provider: "openai", "local", or "huggingface"
|
| 49 |
-
# Default: "local" (no API key required)
|
| 50 |
-
EMBEDDING_PROVIDER=local
|
| 51 |
-
|
| 52 |
-
# OpenAI Embedding Model (used if EMBEDDING_PROVIDER=openai)
|
| 53 |
-
OPENAI_EMBEDDING_MODEL=text-embedding-3-small
|
| 54 |
-
|
| 55 |
-
# Local Embedding Model (sentence-transformers, used if EMBEDDING_PROVIDER=local)
|
| 56 |
-
# BAAI/bge-small-en-v1.5 is newer, faster, and better than all-MiniLM-L6-v2
|
| 57 |
-
LOCAL_EMBEDDING_MODEL=BAAI/bge-small-en-v1.5
|
| 58 |
-
|
| 59 |
-
# HuggingFace Embedding Model (used if EMBEDDING_PROVIDER=huggingface)
|
| 60 |
-
HUGGINGFACE_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
|
| 61 |
-
|
| 62 |
-
# ============== AGENT CONFIGURATION ==============
|
| 63 |
-
|
| 64 |
-
MAX_ITERATIONS=10
|
| 65 |
-
SEARCH_TIMEOUT=30
|
| 66 |
-
LOG_LEVEL=INFO
|
| 67 |
-
|
| 68 |
-
# Graph-based execution (experimental)
|
| 69 |
-
# USE_GRAPH_EXECUTION=false
|
| 70 |
-
|
| 71 |
-
# Budget & Rate Limiting
|
| 72 |
-
# DEFAULT_TOKEN_LIMIT=100000
|
| 73 |
-
# DEFAULT_TIME_LIMIT_MINUTES=10
|
| 74 |
-
# DEFAULT_ITERATIONS_LIMIT=10
|
| 75 |
-
|
| 76 |
-
# ============== WEB SEARCH CONFIGURATION ==============
|
| 77 |
-
|
| 78 |
-
# Web Search Provider: "serper", "searchxng", "brave", "tavily", or "duckduckgo"
|
| 79 |
-
# Default: "duckduckgo" (no API key required)
|
| 80 |
-
WEB_SEARCH_PROVIDER=duckduckgo
|
| 81 |
-
|
| 82 |
-
# Serper API Key (for Google search via Serper)
|
| 83 |
-
# SERPER_API_KEY=your-serper-key-here
|
| 84 |
-
|
| 85 |
-
# SearchXNG Host URL (for self-hosted search)
|
| 86 |
-
# SEARCHXNG_HOST=http://localhost:8080
|
| 87 |
-
|
| 88 |
-
# Brave Search API Key
|
| 89 |
-
# BRAVE_API_KEY=your-brave-key-here
|
| 90 |
-
|
| 91 |
-
# Tavily API Key
|
| 92 |
-
# TAVILY_API_KEY=your-tavily-key-here
|
| 93 |
-
|
| 94 |
-
# ============== EXTERNAL SERVICES ==============
|
| 95 |
-
|
| 96 |
-
# PubMed (optional - higher rate limits: 10 req/sec vs 3 req/sec)
|
| 97 |
-
NCBI_API_KEY=your-ncbi-key-here
|
| 98 |
-
|
| 99 |
-
# Modal (optional - for secure code execution sandbox)
|
| 100 |
-
# MODAL_TOKEN_ID=your-modal-token-id
|
| 101 |
-
# MODAL_TOKEN_SECRET=your-modal-token-secret
|
| 102 |
-
|
| 103 |
-
# ============== VECTOR DATABASE (ChromaDB) ==============
|
| 104 |
-
|
| 105 |
-
# ChromaDB storage path
|
| 106 |
-
CHROMA_DB_PATH=./chroma_db
|
| 107 |
-
|
| 108 |
-
# Persist ChromaDB to disk (default: true)
|
| 109 |
-
# CHROMA_DB_PERSIST=true
|
| 110 |
-
|
| 111 |
-
# Remote ChromaDB server (optional)
|
| 112 |
-
# CHROMA_DB_HOST=localhost
|
| 113 |
-
# CHROMA_DB_PORT=8000
|
| 114 |
-
|
| 115 |
-
# ============== RAG SERVICE CONFIGURATION ==============
|
| 116 |
-
|
| 117 |
-
# ChromaDB collection name for RAG
|
| 118 |
-
# RAG_COLLECTION_NAME=deepcritical_evidence
|
| 119 |
-
|
| 120 |
-
# Number of top results to retrieve from RAG
|
| 121 |
-
# RAG_SIMILARITY_TOP_K=5
|
| 122 |
-
|
| 123 |
-
# Automatically ingest evidence into RAG
|
| 124 |
-
# RAG_AUTO_INGEST=true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.env.example
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
# ============== LLM CONFIGURATION ==============
|
| 2 |
|
| 3 |
-
# Provider: "openai" or "
|
| 4 |
LLM_PROVIDER=openai
|
| 5 |
|
| 6 |
# API Keys (at least one required for full LLM analysis)
|
|
@@ -8,30 +8,56 @@ OPENAI_API_KEY=sk-your-key-here
|
|
| 8 |
ANTHROPIC_API_KEY=sk-ant-your-key-here
|
| 9 |
|
| 10 |
# Model names (optional - sensible defaults set in config.py)
|
| 11 |
-
# ANTHROPIC_MODEL=claude-sonnet-4-5-20250929
|
| 12 |
# OPENAI_MODEL=gpt-5.1
|
|
|
|
| 13 |
|
| 14 |
-
# ==============
|
| 15 |
-
|
| 16 |
-
# OpenAI Embedding Model (used if LLM_PROVIDER is openai and performing RAG/Embeddings)
|
| 17 |
-
OPENAI_EMBEDDING_MODEL=text-embedding-3-small
|
| 18 |
-
|
| 19 |
-
# Local Embedding Model (used for local/offline embeddings)
|
| 20 |
-
LOCAL_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
|
| 21 |
-
|
| 22 |
-
# ============== HUGGINGFACE (FREE TIER) ==============
|
| 23 |
|
| 24 |
-
# HuggingFace Token - enables
|
| 25 |
# Get yours at: https://huggingface.co/settings/tokens
|
| 26 |
-
#
|
| 27 |
-
# WITHOUT HF_TOKEN: Falls back to ungated models (zephyr-7b-beta)
|
| 28 |
-
# WITH HF_TOKEN: Uses Llama 3.1
|
| 29 |
#
|
| 30 |
# For HuggingFace Spaces deployment:
|
| 31 |
# Set this as a "Secret" in Space Settings -> Variables and secrets
|
| 32 |
# Users/judges don't need their own token - the Space secret is used
|
| 33 |
#
|
| 34 |
HF_TOKEN=hf_your-token-here
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
# ============== AGENT CONFIGURATION ==============
|
| 37 |
|
|
@@ -39,10 +65,60 @@ MAX_ITERATIONS=10
|
|
| 39 |
SEARCH_TIMEOUT=30
|
| 40 |
LOG_LEVEL=INFO
|
| 41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
# ============== EXTERNAL SERVICES ==============
|
| 43 |
|
| 44 |
-
# PubMed (optional - higher rate limits)
|
| 45 |
NCBI_API_KEY=your-ncbi-key-here
|
| 46 |
|
| 47 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
CHROMA_DB_PATH=./chroma_db
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# ============== LLM CONFIGURATION ==============
|
| 2 |
|
| 3 |
+
# Provider: "openai", "anthropic", or "huggingface"
|
| 4 |
LLM_PROVIDER=openai
|
| 5 |
|
| 6 |
# API Keys (at least one required for full LLM analysis)
|
|
|
|
| 8 |
ANTHROPIC_API_KEY=sk-ant-your-key-here
|
| 9 |
|
| 10 |
# Model names (optional - sensible defaults set in config.py)
|
|
|
|
| 11 |
# OPENAI_MODEL=gpt-5.1
|
| 12 |
+
# ANTHROPIC_MODEL=claude-sonnet-4-5-20250929
|
| 13 |
|
| 14 |
+
# ============== HUGGINGFACE CONFIGURATION ==============
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
+
# HuggingFace Token - enables gated models and higher rate limits
|
| 17 |
# Get yours at: https://huggingface.co/settings/tokens
|
| 18 |
+
#
|
| 19 |
+
# WITHOUT HF_TOKEN: Falls back to ungated models (zephyr-7b-beta, Qwen2-7B)
|
| 20 |
+
# WITH HF_TOKEN: Uses gated models (Llama 3.1, Gemma-2) via inference providers
|
| 21 |
#
|
| 22 |
# For HuggingFace Spaces deployment:
|
| 23 |
# Set this as a "Secret" in Space Settings -> Variables and secrets
|
| 24 |
# Users/judges don't need their own token - the Space secret is used
|
| 25 |
#
|
| 26 |
HF_TOKEN=hf_your-token-here
|
| 27 |
+
# Alternative: HUGGINGFACE_API_KEY (same as HF_TOKEN)
|
| 28 |
+
|
| 29 |
+
# Default HuggingFace model for inference (gated, requires auth)
|
| 30 |
+
# Can be overridden in UI dropdown
|
| 31 |
+
# Latest reasoning models: Qwen3-Next-80B-A3B-Thinking, Qwen3-Next-80B-A3B-Instruct, Llama-3.3-70B-Instruct
|
| 32 |
+
HUGGINGFACE_MODEL=Qwen/Qwen3-Next-80B-A3B-Thinking
|
| 33 |
+
|
| 34 |
+
# Fallback models for HuggingFace Inference API (comma-separated)
|
| 35 |
+
# Models are tried in order until one succeeds
|
| 36 |
+
# Format: model1,model2,model3
|
| 37 |
+
# Latest reasoning models first, then reliable fallbacks
|
| 38 |
+
# Reasoning models: Qwen3-Next (thinking/instruct), Llama-3.3-70B, Qwen3-235B
|
| 39 |
+
# Fallbacks: Llama-3.1-8B, Zephyr-7B (ungated), Qwen2-7B (ungated)
|
| 40 |
+
HF_FALLBACK_MODELS=Qwen/Qwen3-Next-80B-A3B-Thinking,Qwen/Qwen3-Next-80B-A3B-Instruct,meta-llama/Llama-3.3-70B-Instruct,meta-llama/Llama-3.1-8B-Instruct,HuggingFaceH4/zephyr-7b-beta,Qwen/Qwen2-7B-Instruct
|
| 41 |
+
|
| 42 |
+
# Override model/provider selection (optional, usually set via UI)
|
| 43 |
+
# HF_MODEL=Qwen/Qwen3-Next-80B-A3B-Thinking
|
| 44 |
+
# HF_PROVIDER=hyperbolic
|
| 45 |
+
|
| 46 |
+
# ============== EMBEDDING CONFIGURATION ==============
|
| 47 |
+
|
| 48 |
+
# Embedding Provider: "openai", "local", or "huggingface"
|
| 49 |
+
# Default: "local" (no API key required)
|
| 50 |
+
EMBEDDING_PROVIDER=local
|
| 51 |
+
|
| 52 |
+
# OpenAI Embedding Model (used if EMBEDDING_PROVIDER=openai)
|
| 53 |
+
OPENAI_EMBEDDING_MODEL=text-embedding-3-small
|
| 54 |
+
|
| 55 |
+
# Local Embedding Model (sentence-transformers, used if EMBEDDING_PROVIDER=local)
|
| 56 |
+
# BAAI/bge-small-en-v1.5 is newer, faster, and better than all-MiniLM-L6-v2
|
| 57 |
+
LOCAL_EMBEDDING_MODEL=BAAI/bge-small-en-v1.5
|
| 58 |
+
|
| 59 |
+
# HuggingFace Embedding Model (used if EMBEDDING_PROVIDER=huggingface)
|
| 60 |
+
HUGGINGFACE_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
|
| 61 |
|
| 62 |
# ============== AGENT CONFIGURATION ==============
|
| 63 |
|
|
|
|
| 65 |
SEARCH_TIMEOUT=30
|
| 66 |
LOG_LEVEL=INFO
|
| 67 |
|
| 68 |
+
# Graph-based execution (experimental)
|
| 69 |
+
# USE_GRAPH_EXECUTION=false
|
| 70 |
+
|
| 71 |
+
# Budget & Rate Limiting
|
| 72 |
+
# DEFAULT_TOKEN_LIMIT=100000
|
| 73 |
+
# DEFAULT_TIME_LIMIT_MINUTES=10
|
| 74 |
+
# DEFAULT_ITERATIONS_LIMIT=10
|
| 75 |
+
|
| 76 |
+
# ============== WEB SEARCH CONFIGURATION ==============
|
| 77 |
+
|
| 78 |
+
# Web Search Provider: "serper", "searchxng", "brave", "tavily", or "duckduckgo"
|
| 79 |
+
# Default: "duckduckgo" (no API key required)
|
| 80 |
+
WEB_SEARCH_PROVIDER=duckduckgo
|
| 81 |
+
|
| 82 |
+
# Serper API Key (for Google search via Serper)
|
| 83 |
+
# SERPER_API_KEY=your-serper-key-here
|
| 84 |
+
|
| 85 |
+
# SearchXNG Host URL (for self-hosted search)
|
| 86 |
+
# SEARCHXNG_HOST=http://localhost:8080
|
| 87 |
+
|
| 88 |
+
# Brave Search API Key
|
| 89 |
+
# BRAVE_API_KEY=your-brave-key-here
|
| 90 |
+
|
| 91 |
+
# Tavily API Key
|
| 92 |
+
# TAVILY_API_KEY=your-tavily-key-here
|
| 93 |
+
|
| 94 |
# ============== EXTERNAL SERVICES ==============
|
| 95 |
|
| 96 |
+
# PubMed (optional - higher rate limits: 10 req/sec vs 3 req/sec)
|
| 97 |
NCBI_API_KEY=your-ncbi-key-here
|
| 98 |
|
| 99 |
+
# Modal (optional - for secure code execution sandbox)
|
| 100 |
+
# MODAL_TOKEN_ID=your-modal-token-id
|
| 101 |
+
# MODAL_TOKEN_SECRET=your-modal-token-secret
|
| 102 |
+
|
| 103 |
+
# ============== VECTOR DATABASE (ChromaDB) ==============
|
| 104 |
+
|
| 105 |
+
# ChromaDB storage path
|
| 106 |
CHROMA_DB_PATH=./chroma_db
|
| 107 |
+
|
| 108 |
+
# Persist ChromaDB to disk (default: true)
|
| 109 |
+
# CHROMA_DB_PERSIST=true
|
| 110 |
+
|
| 111 |
+
# Remote ChromaDB server (optional)
|
| 112 |
+
# CHROMA_DB_HOST=localhost
|
| 113 |
+
# CHROMA_DB_PORT=8000
|
| 114 |
+
|
| 115 |
+
# ============== RAG SERVICE CONFIGURATION ==============
|
| 116 |
+
|
| 117 |
+
# ChromaDB collection name for RAG
|
| 118 |
+
# RAG_COLLECTION_NAME=deepcritical_evidence
|
| 119 |
+
|
| 120 |
+
# Number of top results to retrieve from RAG
|
| 121 |
+
# RAG_SIMILARITY_TOP_K=5
|
| 122 |
+
|
| 123 |
+
# Automatically ingest evidence into RAG
|
| 124 |
+
# RAG_AUTO_INGEST=true
|
.github/README.md
CHANGED
|
@@ -1,28 +1,3 @@
|
|
| 1 |
-
---
|
| 2 |
-
title: DeepCritical
|
| 3 |
-
emoji: 🧬
|
| 4 |
-
colorFrom: blue
|
| 5 |
-
colorTo: purple
|
| 6 |
-
sdk: gradio
|
| 7 |
-
sdk_version: "6.0.1"
|
| 8 |
-
python_version: "3.11"
|
| 9 |
-
app_file: src/app.py
|
| 10 |
-
hf_oauth: true
|
| 11 |
-
hf_oauth_expiration_minutes: 480
|
| 12 |
-
hf_oauth_scopes:
|
| 13 |
-
- inference-api
|
| 14 |
-
pinned: true
|
| 15 |
-
license: mit
|
| 16 |
-
tags:
|
| 17 |
-
- mcp-in-action-track-enterprise
|
| 18 |
-
- mcp-hackathon
|
| 19 |
-
- drug-repurposing
|
| 20 |
-
- biomedical-ai
|
| 21 |
-
- pydantic-ai
|
| 22 |
-
- llamaindex
|
| 23 |
-
- modal
|
| 24 |
-
---
|
| 25 |
-
|
| 26 |
<div align="center">
|
| 27 |
|
| 28 |
[](https://github.com/DeepCritical/GradioDemo)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
<div align="center">
|
| 2 |
|
| 3 |
[](https://github.com/DeepCritical/GradioDemo)
|
.github/workflows/ci.yml
CHANGED
|
@@ -33,19 +33,19 @@ jobs:
|
|
| 33 |
- name: Lint with ruff
|
| 34 |
continue-on-error: true
|
| 35 |
run: |
|
| 36 |
-
uv run ruff check . --exclude tests
|
| 37 |
-
uv run ruff format --check . --exclude tests
|
| 38 |
|
| 39 |
- name: Type check with mypy
|
| 40 |
continue-on-error: true
|
| 41 |
run: |
|
| 42 |
-
uv run mypy src
|
| 43 |
|
| 44 |
-
- name: Run unit tests (No
|
| 45 |
env:
|
| 46 |
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 47 |
run: |
|
| 48 |
-
uv run pytest tests/unit/ -v -m "not openai and not embedding_provider" --tb=short -p no:logfire --cov --cov-branch --cov-report=xml
|
| 49 |
|
| 50 |
- name: Run local embeddings tests
|
| 51 |
env:
|
|
@@ -61,11 +61,11 @@ jobs:
|
|
| 61 |
uv run pytest tests/integration/ -v -m "huggingface and not embedding_provider" --tb=short -p no:logfire --cov --cov-branch --cov-report=xml --cov-append || true
|
| 62 |
continue-on-error: true # Allow failures if HF_TOKEN not set
|
| 63 |
|
| 64 |
-
- name: Run non-OpenAI integration tests (excluding embedding providers)
|
| 65 |
env:
|
| 66 |
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 67 |
run: |
|
| 68 |
-
uv run pytest tests/integration/ -v -m "integration and not openai and not embedding_provider" --tb=short -p no:logfire --cov --cov-branch --cov-report=xml --cov-append || true
|
| 69 |
continue-on-error: true # Allow failures if dependencies not available
|
| 70 |
|
| 71 |
- name: Upload coverage reports to Codecov
|
|
|
|
| 33 |
- name: Lint with ruff
|
| 34 |
continue-on-error: true
|
| 35 |
run: |
|
| 36 |
+
uv run ruff check . --exclude tests --exclude reference_repos
|
| 37 |
+
uv run ruff format --check . --exclude tests --exclude reference_repos
|
| 38 |
|
| 39 |
- name: Type check with mypy
|
| 40 |
continue-on-error: true
|
| 41 |
run: |
|
| 42 |
+
uv run mypy src --ignore-missing-imports
|
| 43 |
|
| 44 |
+
- name: Run unit tests (No OpenAI/Anthropic, HuggingFace only)
|
| 45 |
env:
|
| 46 |
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 47 |
run: |
|
| 48 |
+
uv run pytest tests/unit/ -v -m "not openai and not anthropic and not embedding_provider" --tb=short -p no:logfire --cov --cov-branch --cov-report=xml
|
| 49 |
|
| 50 |
- name: Run local embeddings tests
|
| 51 |
env:
|
|
|
|
| 61 |
uv run pytest tests/integration/ -v -m "huggingface and not embedding_provider" --tb=short -p no:logfire --cov --cov-branch --cov-report=xml --cov-append || true
|
| 62 |
continue-on-error: true # Allow failures if HF_TOKEN not set
|
| 63 |
|
| 64 |
+
- name: Run non-OpenAI/Anthropic integration tests (excluding embedding providers)
|
| 65 |
env:
|
| 66 |
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 67 |
run: |
|
| 68 |
+
uv run pytest tests/integration/ -v -m "integration and not openai and not anthropic and not embedding_provider" --tb=short -p no:logfire --cov --cov-branch --cov-report=xml --cov-append || true
|
| 69 |
continue-on-error: true # Allow failures if dependencies not available
|
| 70 |
|
| 71 |
- name: Upload coverage reports to Codecov
|
.pre-commit-config.yaml
CHANGED
|
@@ -1,16 +1,16 @@
|
|
| 1 |
repos:
|
| 2 |
- repo: https://github.com/astral-sh/ruff-pre-commit
|
| 3 |
-
rev: v0.
|
| 4 |
hooks:
|
| 5 |
- id: ruff
|
| 6 |
-
args: [--fix, --exclude, tests]
|
| 7 |
exclude: ^reference_repos/
|
| 8 |
- id: ruff-format
|
| 9 |
-
args: [--exclude, tests]
|
| 10 |
exclude: ^reference_repos/
|
| 11 |
|
| 12 |
- repo: https://github.com/pre-commit/mirrors-mypy
|
| 13 |
-
rev: v1.
|
| 14 |
hooks:
|
| 15 |
- id: mypy
|
| 16 |
files: ^src/
|
|
|
|
| 1 |
repos:
|
| 2 |
- repo: https://github.com/astral-sh/ruff-pre-commit
|
| 3 |
+
rev: v0.14.7 # Compatible with ruff>=0.14.6 (matches CI)
|
| 4 |
hooks:
|
| 5 |
- id: ruff
|
| 6 |
+
args: [--fix, --exclude, tests, --exclude, reference_repos]
|
| 7 |
exclude: ^reference_repos/
|
| 8 |
- id: ruff-format
|
| 9 |
+
args: [--exclude, tests, --exclude, reference_repos]
|
| 10 |
exclude: ^reference_repos/
|
| 11 |
|
| 12 |
- repo: https://github.com/pre-commit/mirrors-mypy
|
| 13 |
+
rev: v1.18.2 # Matches CI version mypy>=1.18.2
|
| 14 |
hooks:
|
| 15 |
- id: mypy
|
| 16 |
files: ^src/
|
docs/api/agents.md
CHANGED
|
@@ -262,3 +262,5 @@ def create_input_parser_agent(model: Any | None = None) -> InputParserAgent
|
|
| 262 |
|
| 263 |
|
| 264 |
|
|
|
|
|
|
|
|
|
| 262 |
|
| 263 |
|
| 264 |
|
| 265 |
+
|
| 266 |
+
|
docs/api/models.md
CHANGED
|
@@ -240,3 +240,5 @@ class BudgetStatus(BaseModel):
|
|
| 240 |
|
| 241 |
|
| 242 |
|
|
|
|
|
|
|
|
|
| 240 |
|
| 241 |
|
| 242 |
|
| 243 |
+
|
| 244 |
+
|
docs/api/orchestrators.md
CHANGED
|
@@ -187,3 +187,5 @@ Runs Magentic orchestration.
|
|
| 187 |
|
| 188 |
|
| 189 |
|
|
|
|
|
|
|
|
|
| 187 |
|
| 188 |
|
| 189 |
|
| 190 |
+
|
| 191 |
+
|
docs/api/services.md
CHANGED
|
@@ -193,3 +193,5 @@ Analyzes a hypothesis using statistical methods.
|
|
| 193 |
|
| 194 |
|
| 195 |
|
|
|
|
|
|
|
|
|
| 193 |
|
| 194 |
|
| 195 |
|
| 196 |
+
|
| 197 |
+
|
docs/api/tools.md
CHANGED
|
@@ -227,3 +227,5 @@ Searches multiple tools in parallel.
|
|
| 227 |
|
| 228 |
|
| 229 |
|
|
|
|
|
|
|
|
|
| 227 |
|
| 228 |
|
| 229 |
|
| 230 |
+
|
| 231 |
+
|
docs/architecture/agents.md
CHANGED
|
@@ -184,3 +184,5 @@ Factory functions:
|
|
| 184 |
|
| 185 |
|
| 186 |
|
|
|
|
|
|
|
|
|
| 184 |
|
| 185 |
|
| 186 |
|
| 187 |
+
|
| 188 |
+
|
docs/architecture/middleware.md
CHANGED
|
@@ -134,3 +134,5 @@ All middleware components use `ContextVar` for thread-safe isolation:
|
|
| 134 |
|
| 135 |
|
| 136 |
|
|
|
|
|
|
|
|
|
| 134 |
|
| 135 |
|
| 136 |
|
| 137 |
+
|
| 138 |
+
|
docs/architecture/services.md
CHANGED
|
@@ -134,3 +134,5 @@ if settings.has_openai_key:
|
|
| 134 |
|
| 135 |
|
| 136 |
|
|
|
|
|
|
|
|
|
| 134 |
|
| 135 |
|
| 136 |
|
| 137 |
+
|
| 138 |
+
|
docs/architecture/tools.md
CHANGED
|
@@ -167,3 +167,5 @@ search_handler = SearchHandler(
|
|
| 167 |
|
| 168 |
|
| 169 |
|
|
|
|
|
|
|
|
|
| 167 |
|
| 168 |
|
| 169 |
|
| 170 |
+
|
| 171 |
+
|
docs/contributing/code-quality.md
CHANGED
|
@@ -73,3 +73,5 @@ async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
|
|
| 73 |
|
| 74 |
|
| 75 |
|
|
|
|
|
|
|
|
|
| 73 |
|
| 74 |
|
| 75 |
|
| 76 |
+
|
| 77 |
+
|
docs/contributing/code-style.md
CHANGED
|
@@ -53,3 +53,5 @@ result = await loop.run_in_executor(None, cpu_bound_function, args)
|
|
| 53 |
|
| 54 |
|
| 55 |
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
|
| 55 |
|
| 56 |
+
|
| 57 |
+
|
docs/contributing/error-handling.md
CHANGED
|
@@ -61,3 +61,5 @@ except httpx.HTTPError as e:
|
|
| 61 |
|
| 62 |
|
| 63 |
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
|
| 63 |
|
| 64 |
+
|
| 65 |
+
|
docs/contributing/implementation-patterns.md
CHANGED
|
@@ -76,3 +76,5 @@ def get_embedding_service() -> EmbeddingService:
|
|
| 76 |
|
| 77 |
|
| 78 |
|
|
|
|
|
|
|
|
|
| 76 |
|
| 77 |
|
| 78 |
|
| 79 |
+
|
| 80 |
+
|
docs/contributing/index.md
CHANGED
|
@@ -155,3 +155,5 @@ Thank you for contributing to DeepCritical!
|
|
| 155 |
|
| 156 |
|
| 157 |
|
|
|
|
|
|
|
|
|
| 155 |
|
| 156 |
|
| 157 |
|
| 158 |
+
|
| 159 |
+
|
docs/contributing/prompt-engineering.md
CHANGED
|
@@ -61,3 +61,5 @@ This document outlines prompt engineering guidelines and citation validation rul
|
|
| 61 |
|
| 62 |
|
| 63 |
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
|
| 63 |
|
| 64 |
+
|
| 65 |
+
|
docs/contributing/testing.md
CHANGED
|
@@ -57,3 +57,5 @@ async def test_real_pubmed_search():
|
|
| 57 |
|
| 58 |
|
| 59 |
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
|
| 59 |
|
| 60 |
+
|
| 61 |
+
|
docs/getting-started/examples.md
CHANGED
|
@@ -201,3 +201,5 @@ USE_GRAPH_EXECUTION=true
|
|
| 201 |
|
| 202 |
|
| 203 |
|
|
|
|
|
|
|
|
|
| 201 |
|
| 202 |
|
| 203 |
|
| 204 |
+
|
| 205 |
+
|
docs/getting-started/installation.md
CHANGED
|
@@ -140,3 +140,5 @@ uv run pre-commit install
|
|
| 140 |
|
| 141 |
|
| 142 |
|
|
|
|
|
|
|
|
|
| 140 |
|
| 141 |
|
| 142 |
|
| 143 |
+
|
| 144 |
+
|
docs/getting-started/mcp-integration.md
CHANGED
|
@@ -207,3 +207,5 @@ You can configure multiple DeepCritical instances:
|
|
| 207 |
|
| 208 |
|
| 209 |
|
|
|
|
|
|
|
|
|
| 207 |
|
| 208 |
|
| 209 |
|
| 210 |
+
|
| 211 |
+
|
docs/getting-started/quick-start.md
CHANGED
|
@@ -111,3 +111,5 @@ What are the active clinical trials investigating Alzheimer's disease treatments
|
|
| 111 |
|
| 112 |
|
| 113 |
|
|
|
|
|
|
|
|
|
| 111 |
|
| 112 |
|
| 113 |
|
| 114 |
+
|
| 115 |
+
|
docs/license.md
CHANGED
|
@@ -31,3 +31,5 @@ SOFTWARE.
|
|
| 31 |
|
| 32 |
|
| 33 |
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
|
| 33 |
|
| 34 |
+
|
| 35 |
+
|
docs/overview/architecture.md
CHANGED
|
@@ -188,3 +188,5 @@ The system supports complex research workflows through:
|
|
| 188 |
|
| 189 |
|
| 190 |
|
|
|
|
|
|
|
|
|
| 188 |
|
| 189 |
|
| 190 |
|
| 191 |
+
|
| 192 |
+
|
docs/overview/features.md
CHANGED
|
@@ -140,3 +140,5 @@ DeepCritical provides a comprehensive set of features for AI-assisted research:
|
|
| 140 |
|
| 141 |
|
| 142 |
|
|
|
|
|
|
|
|
|
| 140 |
|
| 141 |
|
| 142 |
|
| 143 |
+
|
| 144 |
+
|
docs/team.md
CHANGED
|
@@ -36,3 +36,5 @@ We welcome contributions! See the [Contributing Guide](contributing/index.md) fo
|
|
| 36 |
|
| 37 |
|
| 38 |
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
|
| 38 |
|
| 39 |
+
|
| 40 |
+
|
pyproject.toml
CHANGED
|
@@ -29,6 +29,7 @@ dependencies = [
|
|
| 29 |
"tokenizers>=0.22.0,<=0.23.0",
|
| 30 |
"transformers>=4.57.2",
|
| 31 |
"chromadb>=0.4.0",
|
|
|
|
| 32 |
"sentence-transformers>=2.2.0",
|
| 33 |
"numpy<2.0",
|
| 34 |
"agent-framework-core>=1.0.0b251120,<2.0.0",
|
|
|
|
| 29 |
"tokenizers>=0.22.0,<=0.23.0",
|
| 30 |
"transformers>=4.57.2",
|
| 31 |
"chromadb>=0.4.0",
|
| 32 |
+
"rpds-py>=0.29.0", # Python implementation of rpds (required by chromadb on Windows)
|
| 33 |
"sentence-transformers>=2.2.0",
|
| 34 |
"numpy<2.0",
|
| 35 |
"agent-framework-core>=1.0.0b251120,<2.0.0",
|
requirements.txt
CHANGED
|
@@ -9,40 +9,53 @@ pydantic>=2.7
|
|
| 9 |
pydantic-settings>=2.2
|
| 10 |
pydantic-ai>=0.0.16
|
| 11 |
|
| 12 |
-
|
| 13 |
# OPTIONAL AI Providers
|
| 14 |
openai>=1.0.0
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
# Multi-agent orchestration (Advanced mode)
|
| 18 |
-
agent-framework-core>=1.0.0b251120
|
| 19 |
-
|
| 20 |
-
# Web search
|
| 21 |
-
duckduckgo-search>=5.0
|
| 22 |
|
| 23 |
# HTTP & Parsing
|
| 24 |
httpx>=0.27
|
| 25 |
beautifulsoup4>=4.12
|
| 26 |
xmltodict>=0.13
|
| 27 |
|
|
|
|
|
|
|
|
|
|
| 28 |
# UI (Gradio with MCP server support)
|
| 29 |
-
gradio[mcp]>=6.0.0
|
| 30 |
|
| 31 |
# Utils
|
| 32 |
python-dotenv>=1.0
|
| 33 |
tenacity>=8.2
|
| 34 |
structlog>=24.1
|
| 35 |
requests>=2.32.5
|
| 36 |
-
limits>=3.0 # Rate limiting
|
|
|
|
| 37 |
|
| 38 |
-
#
|
| 39 |
-
|
| 40 |
|
| 41 |
-
#
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
llama-index-
|
| 46 |
-
llama-index-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
chromadb>=0.4.0
|
|
|
|
| 48 |
sentence-transformers>=2.2.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
pydantic-settings>=2.2
|
| 10 |
pydantic-ai>=0.0.16
|
| 11 |
|
|
|
|
| 12 |
# OPTIONAL AI Providers
|
| 13 |
openai>=1.0.0
|
| 14 |
+
anthropic>=0.18.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
# HTTP & Parsing
|
| 17 |
httpx>=0.27
|
| 18 |
beautifulsoup4>=4.12
|
| 19 |
xmltodict>=0.13
|
| 20 |
|
| 21 |
+
# HuggingFace Hub
|
| 22 |
+
huggingface-hub>=0.20.0
|
| 23 |
+
|
| 24 |
# UI (Gradio with MCP server support)
|
| 25 |
+
gradio[mcp,oauth]>=6.0.0
|
| 26 |
|
| 27 |
# Utils
|
| 28 |
python-dotenv>=1.0
|
| 29 |
tenacity>=8.2
|
| 30 |
structlog>=24.1
|
| 31 |
requests>=2.32.5
|
| 32 |
+
limits>=3.0 # Rate limiting
|
| 33 |
+
pydantic-graph>=1.22.0
|
| 34 |
|
| 35 |
+
# Web search
|
| 36 |
+
duckduckgo-search>=5.0
|
| 37 |
|
| 38 |
+
# Multi-agent orchestration (Advanced mode)
|
| 39 |
+
agent-framework-core>=1.0.0b251120,<2.0.0
|
| 40 |
+
|
| 41 |
+
# LlamaIndex RAG
|
| 42 |
+
llama-index-llms-huggingface>=0.6.1
|
| 43 |
+
llama-index-llms-huggingface-api>=0.6.1
|
| 44 |
+
llama-index-vector-stores-chroma>=0.5.3
|
| 45 |
+
llama-index>=0.14.8
|
| 46 |
+
llama-index-llms-openai>=0.6.9
|
| 47 |
+
llama-index-embeddings-openai>=0.5.1
|
| 48 |
+
|
| 49 |
+
# Embeddings & Vector Store
|
| 50 |
+
tokenizers>=0.22.0,<=0.23.0
|
| 51 |
+
transformers>=4.57.2
|
| 52 |
chromadb>=0.4.0
|
| 53 |
+
rpds-py>=0.29.0 # Python implementation of rpds (required by chromadb on Windows)
|
| 54 |
sentence-transformers>=2.2.0
|
| 55 |
+
numpy<2.0
|
| 56 |
+
|
| 57 |
+
# Optional: Modal for code execution
|
| 58 |
+
modal>=0.63.0
|
| 59 |
+
|
| 60 |
+
# Pydantic AI with HuggingFace support
|
| 61 |
+
pydantic-ai-slim[huggingface]>=0.0.18
|
src/agent_factory/judges.py
CHANGED
|
@@ -8,10 +8,18 @@ from typing import Any
|
|
| 8 |
import structlog
|
| 9 |
from huggingface_hub import InferenceClient
|
| 10 |
from pydantic_ai import Agent
|
| 11 |
-
from pydantic_ai.models.anthropic import AnthropicModel
|
| 12 |
from pydantic_ai.models.openai import OpenAIModel # type: ignore[attr-defined]
|
| 13 |
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
# Try to import HuggingFace support (may not be available in all pydantic-ai versions)
|
| 16 |
# According to https://ai.pydantic.dev/models/huggingface/, HuggingFace support requires
|
| 17 |
# pydantic-ai with huggingface extra or pydantic-ai-slim[huggingface]
|
|
@@ -50,6 +58,11 @@ def get_model() -> Any:
|
|
| 50 |
llm_provider = settings.llm_provider
|
| 51 |
|
| 52 |
if llm_provider == "anthropic":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
return AnthropicModel(settings.anthropic_model, api_key=settings.anthropic_api_key) # type: ignore[call-arg]
|
| 54 |
|
| 55 |
if llm_provider == "huggingface":
|
|
@@ -144,7 +157,7 @@ class JudgeHandler:
|
|
| 144 |
try:
|
| 145 |
# Run the agent with structured output
|
| 146 |
result = await self.agent.run(user_prompt)
|
| 147 |
-
assessment = result.
|
| 148 |
|
| 149 |
logger.info(
|
| 150 |
"Assessment complete",
|
|
|
|
| 8 |
import structlog
|
| 9 |
from huggingface_hub import InferenceClient
|
| 10 |
from pydantic_ai import Agent
|
|
|
|
| 11 |
from pydantic_ai.models.openai import OpenAIModel # type: ignore[attr-defined]
|
| 12 |
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
|
| 13 |
|
| 14 |
+
# Try to import AnthropicModel (may not be available if anthropic package is missing)
|
| 15 |
+
try:
|
| 16 |
+
from pydantic_ai.models.anthropic import AnthropicModel
|
| 17 |
+
|
| 18 |
+
_ANTHROPIC_AVAILABLE = True
|
| 19 |
+
except ImportError:
|
| 20 |
+
AnthropicModel = None # type: ignore[assignment, misc]
|
| 21 |
+
_ANTHROPIC_AVAILABLE = False
|
| 22 |
+
|
| 23 |
# Try to import HuggingFace support (may not be available in all pydantic-ai versions)
|
| 24 |
# According to https://ai.pydantic.dev/models/huggingface/, HuggingFace support requires
|
| 25 |
# pydantic-ai with huggingface extra or pydantic-ai-slim[huggingface]
|
|
|
|
| 58 |
llm_provider = settings.llm_provider
|
| 59 |
|
| 60 |
if llm_provider == "anthropic":
|
| 61 |
+
if not _ANTHROPIC_AVAILABLE:
|
| 62 |
+
raise ImportError(
|
| 63 |
+
"Anthropic models are not available. "
|
| 64 |
+
"Please install with: uv add 'pydantic-ai[anthropic]' or use 'openai'/'huggingface' as the LLM provider."
|
| 65 |
+
)
|
| 66 |
return AnthropicModel(settings.anthropic_model, api_key=settings.anthropic_api_key) # type: ignore[call-arg]
|
| 67 |
|
| 68 |
if llm_provider == "huggingface":
|
|
|
|
| 157 |
try:
|
| 158 |
# Run the agent with structured output
|
| 159 |
result = await self.agent.run(user_prompt)
|
| 160 |
+
assessment = result.data
|
| 161 |
|
| 162 |
logger.info(
|
| 163 |
"Assessment complete",
|
src/agents/hypothesis_agent.py
CHANGED
|
@@ -75,7 +75,7 @@ class HypothesisAgent(BaseAgent): # type: ignore[misc]
|
|
| 75 |
# Generate hypotheses with diverse evidence selection
|
| 76 |
prompt = await format_hypothesis_prompt(query, evidence, embeddings=self._embeddings)
|
| 77 |
result = await self._get_agent().run(prompt)
|
| 78 |
-
assessment = result.
|
| 79 |
|
| 80 |
# Store hypotheses in shared context
|
| 81 |
existing = self._evidence_store.get("hypotheses", [])
|
|
|
|
| 75 |
# Generate hypotheses with diverse evidence selection
|
| 76 |
prompt = await format_hypothesis_prompt(query, evidence, embeddings=self._embeddings)
|
| 77 |
result = await self._get_agent().run(prompt)
|
| 78 |
+
assessment = result.data # type: ignore[attr-defined]
|
| 79 |
|
| 80 |
# Store hypotheses in shared context
|
| 81 |
existing = self._evidence_store.get("hypotheses", [])
|
src/agents/input_parser.py
CHANGED
|
@@ -92,7 +92,7 @@ class InputParserAgent:
|
|
| 92 |
try:
|
| 93 |
# Run the agent
|
| 94 |
result = await self.agent.run(user_message)
|
| 95 |
-
parsed_query = result.
|
| 96 |
|
| 97 |
# Validate parsed query
|
| 98 |
if not parsed_query.original_query:
|
|
|
|
| 92 |
try:
|
| 93 |
# Run the agent
|
| 94 |
result = await self.agent.run(user_message)
|
| 95 |
+
parsed_query = result.data
|
| 96 |
|
| 97 |
# Validate parsed query
|
| 98 |
if not parsed_query.original_query:
|
src/agents/judge_agent_llm.py
CHANGED
|
@@ -41,5 +41,5 @@ History of previous attempts: {len(history)}
|
|
| 41 |
Evaluate validity and sufficiency."""
|
| 42 |
|
| 43 |
run_result = await self.agent.run(prompt)
|
| 44 |
-
logger.info("LLM judge assessment complete", sufficient=run_result.
|
| 45 |
-
return run_result.
|
|
|
|
| 41 |
Evaluate validity and sufficiency."""
|
| 42 |
|
| 43 |
run_result = await self.agent.run(prompt)
|
| 44 |
+
logger.info("LLM judge assessment complete", sufficient=run_result.data.sufficient) # type: ignore[attr-defined]
|
| 45 |
+
return run_result.data # type: ignore[no-any-return,attr-defined]
|
src/agents/knowledge_gap.py
CHANGED
|
@@ -113,7 +113,7 @@ HISTORY OF ACTIONS, FINDINGS AND THOUGHTS:
|
|
| 113 |
try:
|
| 114 |
# Run the agent
|
| 115 |
result = await self.agent.run(user_message)
|
| 116 |
-
evaluation = result.
|
| 117 |
|
| 118 |
self.logger.info(
|
| 119 |
"Knowledge gap evaluation complete",
|
|
|
|
| 113 |
try:
|
| 114 |
# Run the agent
|
| 115 |
result = await self.agent.run(user_message)
|
| 116 |
+
evaluation = result.data
|
| 117 |
|
| 118 |
self.logger.info(
|
| 119 |
"Knowledge gap evaluation complete",
|
src/agents/long_writer.py
CHANGED
|
@@ -176,7 +176,7 @@ class LongWriterAgent:
|
|
| 176 |
try:
|
| 177 |
# Run the agent
|
| 178 |
result = await self.agent.run(user_message)
|
| 179 |
-
output = result.
|
| 180 |
|
| 181 |
# Validate output
|
| 182 |
if not output or not isinstance(output, LongWriterOutput):
|
|
|
|
| 176 |
try:
|
| 177 |
# Run the agent
|
| 178 |
result = await self.agent.run(user_message)
|
| 179 |
+
output = result.data
|
| 180 |
|
| 181 |
# Validate output
|
| 182 |
if not output or not isinstance(output, LongWriterOutput):
|
src/agents/proofreader.py
CHANGED
|
@@ -133,7 +133,7 @@ REPORT DRAFT:
|
|
| 133 |
try:
|
| 134 |
# Run the agent
|
| 135 |
result = await self.agent.run(user_message)
|
| 136 |
-
final_report = result.
|
| 137 |
|
| 138 |
# Validate output
|
| 139 |
if not final_report or not final_report.strip():
|
|
@@ -142,7 +142,7 @@ REPORT DRAFT:
|
|
| 142 |
|
| 143 |
self.logger.info("Report proofread", length=len(final_report), attempt=attempt + 1)
|
| 144 |
|
| 145 |
-
return final_report
|
| 146 |
|
| 147 |
except (TimeoutError, ConnectionError) as e:
|
| 148 |
# Transient errors - retry
|
|
|
|
| 133 |
try:
|
| 134 |
# Run the agent
|
| 135 |
result = await self.agent.run(user_message)
|
| 136 |
+
final_report = result.data # type: ignore[attr-defined]
|
| 137 |
|
| 138 |
# Validate output
|
| 139 |
if not final_report or not final_report.strip():
|
|
|
|
| 142 |
|
| 143 |
self.logger.info("Report proofread", length=len(final_report), attempt=attempt + 1)
|
| 144 |
|
| 145 |
+
return final_report # type: ignore[no-any-return]
|
| 146 |
|
| 147 |
except (TimeoutError, ConnectionError) as e:
|
| 148 |
# Transient errors - retry
|
src/agents/report_agent.py
CHANGED
|
@@ -91,7 +91,7 @@ class ReportAgent(BaseAgent): # type: ignore[misc]
|
|
| 91 |
)
|
| 92 |
|
| 93 |
result = await self._get_agent().run(prompt)
|
| 94 |
-
report = result.
|
| 95 |
|
| 96 |
# ═══════════════════════════════════════════════════════════════════
|
| 97 |
# 🚨 CRITICAL: Validate citations to prevent hallucination
|
|
|
|
| 91 |
)
|
| 92 |
|
| 93 |
result = await self._get_agent().run(prompt)
|
| 94 |
+
report = result.data # type: ignore[attr-defined]
|
| 95 |
|
| 96 |
# ═══════════════════════════════════════════════════════════════════
|
| 97 |
# 🚨 CRITICAL: Validate citations to prevent hallucination
|
src/agents/thinking.py
CHANGED
|
@@ -112,11 +112,11 @@ HISTORY OF ACTIONS, FINDINGS AND THOUGHTS:
|
|
| 112 |
try:
|
| 113 |
# Run the agent
|
| 114 |
result = await self.agent.run(user_message)
|
| 115 |
-
observations = result.
|
| 116 |
|
| 117 |
self.logger.info("Observations generated", length=len(observations))
|
| 118 |
|
| 119 |
-
return observations
|
| 120 |
|
| 121 |
except Exception as e:
|
| 122 |
self.logger.error("Observation generation failed", error=str(e))
|
|
|
|
| 112 |
try:
|
| 113 |
# Run the agent
|
| 114 |
result = await self.agent.run(user_message)
|
| 115 |
+
observations = result.data # type: ignore[attr-defined]
|
| 116 |
|
| 117 |
self.logger.info("Observations generated", length=len(observations))
|
| 118 |
|
| 119 |
+
return observations # type: ignore[no-any-return]
|
| 120 |
|
| 121 |
except Exception as e:
|
| 122 |
self.logger.error("Observation generation failed", error=str(e))
|
src/agents/tool_selector.py
CHANGED
|
@@ -117,7 +117,7 @@ HISTORY OF ACTIONS, FINDINGS AND THOUGHTS:
|
|
| 117 |
try:
|
| 118 |
# Run the agent
|
| 119 |
result = await self.agent.run(user_message)
|
| 120 |
-
selection_plan = result.
|
| 121 |
|
| 122 |
self.logger.info(
|
| 123 |
"Tool selection complete",
|
|
|
|
| 117 |
try:
|
| 118 |
# Run the agent
|
| 119 |
result = await self.agent.run(user_message)
|
| 120 |
+
selection_plan = result.data
|
| 121 |
|
| 122 |
self.logger.info(
|
| 123 |
"Tool selection complete",
|
src/agents/writer.py
CHANGED
|
@@ -136,7 +136,7 @@ FINDINGS:
|
|
| 136 |
try:
|
| 137 |
# Run the agent
|
| 138 |
result = await self.agent.run(user_message)
|
| 139 |
-
report = result.
|
| 140 |
|
| 141 |
# Validate output
|
| 142 |
if not report or not report.strip():
|
|
@@ -145,7 +145,7 @@ FINDINGS:
|
|
| 145 |
|
| 146 |
self.logger.info("Report written", length=len(report), attempt=attempt + 1)
|
| 147 |
|
| 148 |
-
return report
|
| 149 |
|
| 150 |
except (TimeoutError, ConnectionError) as e:
|
| 151 |
# Transient errors - retry
|
|
|
|
| 136 |
try:
|
| 137 |
# Run the agent
|
| 138 |
result = await self.agent.run(user_message)
|
| 139 |
+
report = result.data # type: ignore[attr-defined]
|
| 140 |
|
| 141 |
# Validate output
|
| 142 |
if not report or not report.strip():
|
|
|
|
| 145 |
|
| 146 |
self.logger.info("Report written", length=len(report), attempt=attempt + 1)
|
| 147 |
|
| 148 |
+
return report # type: ignore[no-any-return]
|
| 149 |
|
| 150 |
except (TimeoutError, ConnectionError) as e:
|
| 151 |
# Transient errors - retry
|
src/app.py
CHANGED
|
@@ -172,20 +172,29 @@ def event_to_chat_message(event: AgentEvent) -> dict[str, Any]:
|
|
| 172 |
"content": event.message,
|
| 173 |
}
|
| 174 |
|
| 175 |
-
# Build metadata for accordion
|
|
|
|
|
|
|
| 176 |
metadata: dict[str, Any] = {}
|
|
|
|
|
|
|
| 177 |
if config["title"]:
|
| 178 |
-
metadata["title"] = config["title"]
|
| 179 |
|
| 180 |
# Set status (pending shows spinner, done is collapsed)
|
|
|
|
| 181 |
if config["status"] == "pending":
|
| 182 |
metadata["status"] = "pending"
|
|
|
|
|
|
|
| 183 |
|
| 184 |
-
# Add duration if available in data
|
| 185 |
if event.data and isinstance(event.data, dict) and "duration" in event.data:
|
| 186 |
-
|
|
|
|
|
|
|
| 187 |
|
| 188 |
-
# Add log info (iteration number, etc.)
|
| 189 |
log_parts: list[str] = []
|
| 190 |
if event.iteration > 0:
|
| 191 |
log_parts.append(f"Iteration {event.iteration}")
|
|
@@ -198,12 +207,22 @@ def event_to_chat_message(event: AgentEvent) -> dict[str, Any]:
|
|
| 198 |
metadata["log"] = " | ".join(log_parts)
|
| 199 |
|
| 200 |
# Return as dict format for Gradio Chatbot compatibility
|
| 201 |
-
#
|
|
|
|
|
|
|
|
|
|
| 202 |
result: dict[str, Any] = {
|
| 203 |
"role": "assistant",
|
| 204 |
"content": event.message,
|
| 205 |
}
|
| 206 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
result["metadata"] = metadata
|
| 208 |
return result
|
| 209 |
|
|
@@ -455,10 +474,11 @@ async def research_agent(
|
|
| 455 |
yield msg
|
| 456 |
|
| 457 |
except Exception as e:
|
|
|
|
|
|
|
| 458 |
yield {
|
| 459 |
"role": "assistant",
|
| 460 |
-
"content": f"❌ **Error**: {e!s}",
|
| 461 |
-
"metadata": {"title": "❌ Error", "status": "done"},
|
| 462 |
}
|
| 463 |
|
| 464 |
|
|
@@ -681,9 +701,21 @@ def create_demo() -> gr.Blocks:
|
|
| 681 |
"**Sign in with HuggingFace** above to access premium models and providers."
|
| 682 |
),
|
| 683 |
examples=[
|
| 684 |
-
|
| 685 |
-
|
| 686 |
-
[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 687 |
],
|
| 688 |
additional_inputs_accordion=gr.Accordion(label="⚙️ Settings", open=False),
|
| 689 |
additional_inputs=[
|
|
|
|
| 172 |
"content": event.message,
|
| 173 |
}
|
| 174 |
|
| 175 |
+
# Build metadata for accordion according to Gradio ChatMessage spec
|
| 176 |
+
# Metadata keys: title (str), status ("pending"|"done"), log (str), duration (float)
|
| 177 |
+
# See: https://www.gradio.app/guides/agents-and-tool-usage
|
| 178 |
metadata: dict[str, Any] = {}
|
| 179 |
+
|
| 180 |
+
# Title is required for accordion display - must be string
|
| 181 |
if config["title"]:
|
| 182 |
+
metadata["title"] = str(config["title"])
|
| 183 |
|
| 184 |
# Set status (pending shows spinner, done is collapsed)
|
| 185 |
+
# Must be exactly "pending" or "done" per Gradio spec
|
| 186 |
if config["status"] == "pending":
|
| 187 |
metadata["status"] = "pending"
|
| 188 |
+
elif config["status"] == "done":
|
| 189 |
+
metadata["status"] = "done"
|
| 190 |
|
| 191 |
+
# Add duration if available in data (must be float)
|
| 192 |
if event.data and isinstance(event.data, dict) and "duration" in event.data:
|
| 193 |
+
duration = event.data["duration"]
|
| 194 |
+
if isinstance(duration, int | float):
|
| 195 |
+
metadata["duration"] = float(duration)
|
| 196 |
|
| 197 |
+
# Add log info (iteration number, etc.) - must be string
|
| 198 |
log_parts: list[str] = []
|
| 199 |
if event.iteration > 0:
|
| 200 |
log_parts.append(f"Iteration {event.iteration}")
|
|
|
|
| 207 |
metadata["log"] = " | ".join(log_parts)
|
| 208 |
|
| 209 |
# Return as dict format for Gradio Chatbot compatibility
|
| 210 |
+
# According to Gradio docs: https://www.gradio.app/guides/agents-and-tool-usage
|
| 211 |
+
# ChatMessage format: {"role": "assistant", "content": "...", "metadata": {...}}
|
| 212 |
+
# Metadata must have "title" key for accordion display
|
| 213 |
+
# Valid metadata keys: title (str), status ("pending"|"done"), log (str), duration (float)
|
| 214 |
result: dict[str, Any] = {
|
| 215 |
"role": "assistant",
|
| 216 |
"content": event.message,
|
| 217 |
}
|
| 218 |
+
# Only add metadata if it has a title (required for accordion display)
|
| 219 |
+
# Ensure metadata values match Gradio's expected types
|
| 220 |
+
if metadata and metadata.get("title"):
|
| 221 |
+
# Ensure status is valid if present
|
| 222 |
+
if "status" in metadata:
|
| 223 |
+
status = metadata["status"]
|
| 224 |
+
if status not in ("pending", "done"):
|
| 225 |
+
metadata["status"] = "done" # Default to "done" if invalid
|
| 226 |
result["metadata"] = metadata
|
| 227 |
return result
|
| 228 |
|
|
|
|
| 474 |
yield msg
|
| 475 |
|
| 476 |
except Exception as e:
|
| 477 |
+
# Return error message without metadata to avoid issues during example caching
|
| 478 |
+
# Metadata can cause validation errors when Gradio caches examples
|
| 479 |
yield {
|
| 480 |
"role": "assistant",
|
| 481 |
+
"content": f"❌ **Error**: {e!s}\n\n*Please check your configuration and try again.*",
|
|
|
|
| 482 |
}
|
| 483 |
|
| 484 |
|
|
|
|
| 701 |
"**Sign in with HuggingFace** above to access premium models and providers."
|
| 702 |
),
|
| 703 |
examples=[
|
| 704 |
+
# When additional_inputs are provided, examples must be lists of lists
|
| 705 |
+
# Each inner list: [message, mode, hf_model, hf_provider]
|
| 706 |
+
[
|
| 707 |
+
"What drugs could be repurposed for Alzheimer's disease?",
|
| 708 |
+
"iterative",
|
| 709 |
+
None,
|
| 710 |
+
None,
|
| 711 |
+
],
|
| 712 |
+
["Is metformin effective for treating cancer?", "iterative", None, None],
|
| 713 |
+
[
|
| 714 |
+
"What medications show promise for Long COVID treatment?",
|
| 715 |
+
"iterative",
|
| 716 |
+
None,
|
| 717 |
+
None,
|
| 718 |
+
],
|
| 719 |
],
|
| 720 |
additional_inputs_accordion=gr.Accordion(label="⚙️ Settings", open=False),
|
| 721 |
additional_inputs=[
|
src/orchestrator/planner_agent.py
CHANGED
|
@@ -109,7 +109,7 @@ class PlannerAgent:
|
|
| 109 |
try:
|
| 110 |
# Run the agent
|
| 111 |
result = await self.agent.run(user_message)
|
| 112 |
-
report_plan = result.
|
| 113 |
|
| 114 |
# Validate report plan
|
| 115 |
if not report_plan.report_outline:
|
|
|
|
| 109 |
try:
|
| 110 |
# Run the agent
|
| 111 |
result = await self.agent.run(user_message)
|
| 112 |
+
report_plan = result.data
|
| 113 |
|
| 114 |
# Validate report plan
|
| 115 |
if not report_plan.report_outline:
|
src/services/llamaindex_rag.py
CHANGED
|
@@ -136,7 +136,8 @@ class LlamaIndexRAGService:
|
|
| 136 |
}
|
| 137 |
except ImportError as e:
|
| 138 |
raise ImportError(
|
| 139 |
-
"LlamaIndex dependencies not installed.
|
|
|
|
| 140 |
) from e
|
| 141 |
|
| 142 |
def _configure_embeddings(
|
|
|
|
| 136 |
}
|
| 137 |
except ImportError as e:
|
| 138 |
raise ImportError(
|
| 139 |
+
"LlamaIndex dependencies not installed. Required packages: chromadb, llama-index, "
|
| 140 |
+
"and their dependencies. If rpds is missing, try: uv pip install rpds-py"
|
| 141 |
) from e
|
| 142 |
|
| 143 |
def _configure_embeddings(
|
src/services/statistical_analyzer.py
CHANGED
|
@@ -135,7 +135,7 @@ Generate executable Python code to analyze this evidence."""
|
|
| 135 |
# Generate code
|
| 136 |
agent = self._get_agent()
|
| 137 |
code_result = await agent.run(prompt)
|
| 138 |
-
generated_code = code_result.
|
| 139 |
|
| 140 |
# Execute in Modal sandbox
|
| 141 |
loop = asyncio.get_running_loop()
|
|
|
|
| 135 |
# Generate code
|
| 136 |
agent = self._get_agent()
|
| 137 |
code_result = await agent.run(prompt)
|
| 138 |
+
generated_code = code_result.data # type: ignore[attr-defined]
|
| 139 |
|
| 140 |
# Execute in Modal sandbox
|
| 141 |
loop = asyncio.get_running_loop()
|
tests/integration/test_rag_integration.py
CHANGED
|
@@ -121,9 +121,12 @@ class TestRAGServiceIntegration:
|
|
| 121 |
assert len(response) > 0
|
| 122 |
assert "python" in response.lower()
|
| 123 |
except Exception as e:
|
| 124 |
-
# If model is not available (404), skip the test
|
| 125 |
-
|
|
|
|
| 126 |
pytest.skip(f"HuggingFace model not available via inference API: {e}")
|
|
|
|
|
|
|
| 127 |
raise
|
| 128 |
|
| 129 |
# Cleanup
|
|
|
|
| 121 |
assert len(response) > 0
|
| 122 |
assert "python" in response.lower()
|
| 123 |
except Exception as e:
|
| 124 |
+
# If model is not available (404) or authentication required (401), skip the test
|
| 125 |
+
error_str = str(e)
|
| 126 |
+
if "404" in error_str or "Not Found" in error_str:
|
| 127 |
pytest.skip(f"HuggingFace model not available via inference API: {e}")
|
| 128 |
+
if "401" in error_str or "Unauthorized" in error_str or "Invalid username or password" in error_str:
|
| 129 |
+
pytest.skip(f"HuggingFace authentication required but not available: {e}")
|
| 130 |
raise
|
| 131 |
|
| 132 |
# Cleanup
|
tests/unit/agent_factory/test_judges.py
CHANGED
|
@@ -34,6 +34,7 @@ class TestJudgeHandler:
|
|
| 34 |
|
| 35 |
# Mock the PydanticAI agent
|
| 36 |
mock_result = MagicMock()
|
|
|
|
| 37 |
mock_result.output = mock_assessment
|
| 38 |
|
| 39 |
with (
|
|
@@ -88,7 +89,8 @@ class TestJudgeHandler:
|
|
| 88 |
)
|
| 89 |
|
| 90 |
mock_result = MagicMock()
|
| 91 |
-
mock_result.
|
|
|
|
| 92 |
|
| 93 |
with (
|
| 94 |
patch("src.agent_factory.judges.get_model") as mock_get_model,
|
|
|
|
| 34 |
|
| 35 |
# Mock the PydanticAI agent
|
| 36 |
mock_result = MagicMock()
|
| 37 |
+
type(mock_result).data = mock_assessment # pydantic-ai uses .data for structured output
|
| 38 |
mock_result.output = mock_assessment
|
| 39 |
|
| 40 |
with (
|
|
|
|
| 89 |
)
|
| 90 |
|
| 91 |
mock_result = MagicMock()
|
| 92 |
+
mock_result.data = mock_assessment
|
| 93 |
+
mock_result.output = mock_assessment # Some code may use .output
|
| 94 |
|
| 95 |
with (
|
| 96 |
patch("src.agent_factory.judges.get_model") as mock_get_model,
|
tests/unit/agents/test_hypothesis_agent.py
CHANGED
|
@@ -28,18 +28,17 @@ def sample_evidence():
|
|
| 28 |
|
| 29 |
@pytest.fixture
|
| 30 |
def mock_assessment():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
return HypothesisAssessment(
|
| 32 |
-
hypotheses=[
|
| 33 |
-
|
| 34 |
-
drug="Metformin",
|
| 35 |
-
target="AMPK",
|
| 36 |
-
pathway="mTOR inhibition",
|
| 37 |
-
effect="Reduced cancer cell proliferation",
|
| 38 |
-
confidence=0.75,
|
| 39 |
-
search_suggestions=["metformin AMPK cancer", "mTOR cancer therapy"],
|
| 40 |
-
)
|
| 41 |
-
],
|
| 42 |
-
primary_hypothesis=None,
|
| 43 |
knowledge_gaps=["Clinical trial data needed"],
|
| 44 |
recommended_searches=["metformin clinical trial cancer"],
|
| 45 |
)
|
|
@@ -54,8 +53,9 @@ async def test_hypothesis_agent_generates_hypotheses(sample_evidence, mock_asses
|
|
| 54 |
with patch("src.agents.hypothesis_agent.Agent") as mock_agent_class:
|
| 55 |
mock_get_model.return_value = MagicMock() # Mock model
|
| 56 |
mock_result = MagicMock()
|
|
|
|
| 57 |
mock_result.output = mock_assessment
|
| 58 |
-
# pydantic-ai Agent returns an object with .
|
| 59 |
mock_agent_class.return_value.run = AsyncMock(return_value=mock_result)
|
| 60 |
|
| 61 |
agent = HypothesisAgent(store)
|
|
@@ -94,6 +94,7 @@ async def test_hypothesis_agent_uses_embeddings(sample_evidence, mock_assessment
|
|
| 94 |
mock_format.return_value = "Prompt"
|
| 95 |
|
| 96 |
mock_result = MagicMock()
|
|
|
|
| 97 |
mock_result.output = mock_assessment
|
| 98 |
mock_agent_class.return_value.run = AsyncMock(return_value=mock_result)
|
| 99 |
|
|
|
|
| 28 |
|
| 29 |
@pytest.fixture
|
| 30 |
def mock_assessment():
|
| 31 |
+
primary_hyp = MechanismHypothesis(
|
| 32 |
+
drug="Metformin",
|
| 33 |
+
target="AMPK",
|
| 34 |
+
pathway="mTOR inhibition",
|
| 35 |
+
effect="Reduced cancer cell proliferation",
|
| 36 |
+
confidence=0.75,
|
| 37 |
+
search_suggestions=["metformin AMPK cancer", "mTOR cancer therapy"],
|
| 38 |
+
)
|
| 39 |
return HypothesisAssessment(
|
| 40 |
+
hypotheses=[primary_hyp],
|
| 41 |
+
primary_hypothesis=primary_hyp, # Set primary hypothesis
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
knowledge_gaps=["Clinical trial data needed"],
|
| 43 |
recommended_searches=["metformin clinical trial cancer"],
|
| 44 |
)
|
|
|
|
| 53 |
with patch("src.agents.hypothesis_agent.Agent") as mock_agent_class:
|
| 54 |
mock_get_model.return_value = MagicMock() # Mock model
|
| 55 |
mock_result = MagicMock()
|
| 56 |
+
type(mock_result).data = mock_assessment # pydantic-ai uses .data for structured output
|
| 57 |
mock_result.output = mock_assessment
|
| 58 |
+
# pydantic-ai Agent returns an object with .data for structured output
|
| 59 |
mock_agent_class.return_value.run = AsyncMock(return_value=mock_result)
|
| 60 |
|
| 61 |
agent = HypothesisAgent(store)
|
|
|
|
| 94 |
mock_format.return_value = "Prompt"
|
| 95 |
|
| 96 |
mock_result = MagicMock()
|
| 97 |
+
type(mock_result).data = mock_assessment # pydantic-ai uses .data for structured output
|
| 98 |
mock_result.output = mock_assessment
|
| 99 |
mock_agent_class.return_value.run = AsyncMock(return_value=mock_result)
|
| 100 |
|
tests/unit/agents/test_input_parser.py
CHANGED
|
@@ -18,6 +18,13 @@ def mock_model() -> MagicMock:
|
|
| 18 |
return model
|
| 19 |
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
@pytest.fixture
|
| 22 |
def mock_parsed_query_iterative() -> ParsedQuery:
|
| 23 |
"""Create a mock ParsedQuery for iterative mode."""
|
|
@@ -51,7 +58,9 @@ def mock_agent_result_iterative(
|
|
| 51 |
mock_parsed_query_iterative: ParsedQuery,
|
| 52 |
) -> RunResult[ParsedQuery]:
|
| 53 |
"""Create a mock agent result for iterative mode."""
|
| 54 |
-
result = MagicMock(
|
|
|
|
|
|
|
| 55 |
result.output = mock_parsed_query_iterative
|
| 56 |
return result
|
| 57 |
|
|
@@ -61,7 +70,9 @@ def mock_agent_result_deep(
|
|
| 61 |
mock_parsed_query_deep: ParsedQuery,
|
| 62 |
) -> RunResult[ParsedQuery]:
|
| 63 |
"""Create a mock agent result for deep mode."""
|
| 64 |
-
result = MagicMock(
|
|
|
|
|
|
|
| 65 |
result.output = mock_parsed_query_deep
|
| 66 |
return result
|
| 67 |
|
|
@@ -72,33 +83,52 @@ def input_parser_agent(mock_model: MagicMock) -> InputParserAgent:
|
|
| 72 |
return InputParserAgent(model=mock_model)
|
| 73 |
|
| 74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
class TestInputParserAgentInit:
|
| 76 |
"""Test InputParserAgent initialization."""
|
| 77 |
|
| 78 |
-
|
|
|
|
|
|
|
|
|
|
| 79 |
"""Test InputParserAgent initialization with provided model."""
|
|
|
|
| 80 |
agent = InputParserAgent(model=mock_model)
|
| 81 |
assert agent.model == mock_model
|
| 82 |
assert agent.agent is not None
|
| 83 |
|
| 84 |
@patch("src.agents.input_parser.get_model")
|
|
|
|
| 85 |
def test_input_parser_agent_init_without_model(
|
| 86 |
-
self,
|
|
|
|
|
|
|
|
|
|
| 87 |
) -> None:
|
| 88 |
"""Test InputParserAgent initialization without model (uses default)."""
|
| 89 |
mock_get_model.return_value = mock_model
|
|
|
|
| 90 |
agent = InputParserAgent()
|
| 91 |
assert agent.model == mock_model
|
| 92 |
mock_get_model.assert_called_once()
|
| 93 |
|
|
|
|
| 94 |
def test_input_parser_agent_has_correct_system_prompt(
|
| 95 |
-
self,
|
| 96 |
) -> None:
|
| 97 |
"""Test that InputParserAgent has correct system prompt."""
|
|
|
|
|
|
|
| 98 |
# System prompt should contain key instructions
|
| 99 |
# In pydantic_ai, system_prompt is a property that returns the prompt string
|
| 100 |
# For mocked agents, we check that the agent was created with a system prompt
|
| 101 |
-
assert
|
| 102 |
# The actual system prompt is set during agent creation
|
| 103 |
# We verify the agent exists and was properly initialized
|
| 104 |
# Note: Direct access to system_prompt may not work with mocks
|
|
|
|
| 18 |
return model
|
| 19 |
|
| 20 |
|
| 21 |
+
@pytest.fixture(autouse=True)
|
| 22 |
+
def patch_infer_model(mock_model: MagicMock):
|
| 23 |
+
"""Auto-patch infer_model for all tests to avoid OpenAI API key requirements."""
|
| 24 |
+
with patch("pydantic_ai.models.infer_model", return_value=mock_model):
|
| 25 |
+
yield
|
| 26 |
+
|
| 27 |
+
|
| 28 |
@pytest.fixture
|
| 29 |
def mock_parsed_query_iterative() -> ParsedQuery:
|
| 30 |
"""Create a mock ParsedQuery for iterative mode."""
|
|
|
|
| 58 |
mock_parsed_query_iterative: ParsedQuery,
|
| 59 |
) -> RunResult[ParsedQuery]:
|
| 60 |
"""Create a mock agent result for iterative mode."""
|
| 61 |
+
result = MagicMock()
|
| 62 |
+
# Configure the mock to return the actual output when .data is accessed
|
| 63 |
+
type(result).data = mock_parsed_query_iterative
|
| 64 |
result.output = mock_parsed_query_iterative
|
| 65 |
return result
|
| 66 |
|
|
|
|
| 70 |
mock_parsed_query_deep: ParsedQuery,
|
| 71 |
) -> RunResult[ParsedQuery]:
|
| 72 |
"""Create a mock agent result for deep mode."""
|
| 73 |
+
result = MagicMock()
|
| 74 |
+
# Configure the mock to return the actual output when .data is accessed
|
| 75 |
+
type(result).data = mock_parsed_query_deep
|
| 76 |
result.output = mock_parsed_query_deep
|
| 77 |
return result
|
| 78 |
|
|
|
|
| 83 |
return InputParserAgent(model=mock_model)
|
| 84 |
|
| 85 |
|
| 86 |
+
@pytest.fixture(autouse=True)
|
| 87 |
+
def patch_infer_model(mock_model: MagicMock):
|
| 88 |
+
"""Auto-patch infer_model for all tests to avoid OpenAI API key requirements."""
|
| 89 |
+
with patch("pydantic_ai.models.infer_model", return_value=mock_model):
|
| 90 |
+
yield
|
| 91 |
+
|
| 92 |
+
|
| 93 |
class TestInputParserAgentInit:
|
| 94 |
"""Test InputParserAgent initialization."""
|
| 95 |
|
| 96 |
+
@patch("pydantic_ai.models.infer_model")
|
| 97 |
+
def test_input_parser_agent_init_with_model(
|
| 98 |
+
self, mock_infer_model: MagicMock, mock_model: MagicMock
|
| 99 |
+
) -> None:
|
| 100 |
"""Test InputParserAgent initialization with provided model."""
|
| 101 |
+
mock_infer_model.return_value = mock_model
|
| 102 |
agent = InputParserAgent(model=mock_model)
|
| 103 |
assert agent.model == mock_model
|
| 104 |
assert agent.agent is not None
|
| 105 |
|
| 106 |
@patch("src.agents.input_parser.get_model")
|
| 107 |
+
@patch("pydantic_ai.models.infer_model")
|
| 108 |
def test_input_parser_agent_init_without_model(
|
| 109 |
+
self,
|
| 110 |
+
mock_infer_model: MagicMock,
|
| 111 |
+
mock_get_model: MagicMock,
|
| 112 |
+
mock_model: MagicMock,
|
| 113 |
) -> None:
|
| 114 |
"""Test InputParserAgent initialization without model (uses default)."""
|
| 115 |
mock_get_model.return_value = mock_model
|
| 116 |
+
mock_infer_model.return_value = mock_model
|
| 117 |
agent = InputParserAgent()
|
| 118 |
assert agent.model == mock_model
|
| 119 |
mock_get_model.assert_called_once()
|
| 120 |
|
| 121 |
+
@patch("pydantic_ai.models.infer_model")
|
| 122 |
def test_input_parser_agent_has_correct_system_prompt(
|
| 123 |
+
self, mock_infer_model: MagicMock, mock_model: MagicMock
|
| 124 |
) -> None:
|
| 125 |
"""Test that InputParserAgent has correct system prompt."""
|
| 126 |
+
mock_infer_model.return_value = mock_model
|
| 127 |
+
agent = InputParserAgent(model=mock_model)
|
| 128 |
# System prompt should contain key instructions
|
| 129 |
# In pydantic_ai, system_prompt is a property that returns the prompt string
|
| 130 |
# For mocked agents, we check that the agent was created with a system prompt
|
| 131 |
+
assert agent.agent is not None
|
| 132 |
# The actual system prompt is set during agent creation
|
| 133 |
# We verify the agent exists and was properly initialized
|
| 134 |
# Note: Direct access to system_prompt may not work with mocks
|