diff --git a/.env copy.example b/.env copy.example deleted file mode 100644 index b8061357538326dd7fad717c627cdcfa5c0b3eb9..0000000000000000000000000000000000000000 --- a/.env copy.example +++ /dev/null @@ -1,124 +0,0 @@ -# ============== LLM CONFIGURATION ============== - -# Provider: "openai", "anthropic", or "huggingface" -LLM_PROVIDER=openai - -# API Keys (at least one required for full LLM analysis) -OPENAI_API_KEY=sk-your-key-here -ANTHROPIC_API_KEY=sk-ant-your-key-here - -# Model names (optional - sensible defaults set in config.py) -# OPENAI_MODEL=gpt-5.1 -# ANTHROPIC_MODEL=claude-sonnet-4-5-20250929 - -# ============== HUGGINGFACE CONFIGURATION ============== - -# HuggingFace Token - enables gated models and higher rate limits -# Get yours at: https://huggingface.co/settings/tokens -# -# WITHOUT HF_TOKEN: Falls back to ungated models (zephyr-7b-beta, Qwen2-7B) -# WITH HF_TOKEN: Uses gated models (Llama 3.1, Gemma-2) via inference providers -# -# For HuggingFace Spaces deployment: -# Set this as a "Secret" in Space Settings -> Variables and secrets -# Users/judges don't need their own token - the Space secret is used -# -HF_TOKEN=hf_your-token-here -# Alternative: HUGGINGFACE_API_KEY (same as HF_TOKEN) - -# Default HuggingFace model for inference (gated, requires auth) -# Can be overridden in UI dropdown -# Latest reasoning models: Qwen3-Next-80B-A3B-Thinking, Qwen3-Next-80B-A3B-Instruct, Llama-3.3-70B-Instruct -HUGGINGFACE_MODEL=Qwen/Qwen3-Next-80B-A3B-Thinking - -# Fallback models for HuggingFace Inference API (comma-separated) -# Models are tried in order until one succeeds -# Format: model1,model2,model3 -# Latest reasoning models first, then reliable fallbacks -# Reasoning models: Qwen3-Next (thinking/instruct), Llama-3.3-70B, Qwen3-235B -# Fallbacks: Llama-3.1-8B, Zephyr-7B (ungated), Qwen2-7B (ungated) -HF_FALLBACK_MODELS=Qwen/Qwen3-Next-80B-A3B-Thinking,Qwen/Qwen3-Next-80B-A3B-Instruct,meta-llama/Llama-3.3-70B-Instruct,meta-llama/Llama-3.1-8B-Instruct,HuggingFaceH4/zephyr-7b-beta,Qwen/Qwen2-7B-Instruct - -# Override model/provider selection (optional, usually set via UI) -# HF_MODEL=Qwen/Qwen3-Next-80B-A3B-Thinking -# HF_PROVIDER=hyperbolic - -# ============== EMBEDDING CONFIGURATION ============== - -# Embedding Provider: "openai", "local", or "huggingface" -# Default: "local" (no API key required) -EMBEDDING_PROVIDER=local - -# OpenAI Embedding Model (used if EMBEDDING_PROVIDER=openai) -OPENAI_EMBEDDING_MODEL=text-embedding-3-small - -# Local Embedding Model (sentence-transformers, used if EMBEDDING_PROVIDER=local) -# BAAI/bge-small-en-v1.5 is newer, faster, and better than all-MiniLM-L6-v2 -LOCAL_EMBEDDING_MODEL=BAAI/bge-small-en-v1.5 - -# HuggingFace Embedding Model (used if EMBEDDING_PROVIDER=huggingface) -HUGGINGFACE_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2 - -# ============== AGENT CONFIGURATION ============== - -MAX_ITERATIONS=10 -SEARCH_TIMEOUT=30 -LOG_LEVEL=INFO - -# Graph-based execution (experimental) -# USE_GRAPH_EXECUTION=false - -# Budget & Rate Limiting -# DEFAULT_TOKEN_LIMIT=100000 -# DEFAULT_TIME_LIMIT_MINUTES=10 -# DEFAULT_ITERATIONS_LIMIT=10 - -# ============== WEB SEARCH CONFIGURATION ============== - -# Web Search Provider: "serper", "searchxng", "brave", "tavily", or "duckduckgo" -# Default: "duckduckgo" (no API key required) -WEB_SEARCH_PROVIDER=duckduckgo - -# Serper API Key (for Google search via Serper) -# SERPER_API_KEY=your-serper-key-here - -# SearchXNG Host URL (for self-hosted search) -# SEARCHXNG_HOST=http://localhost:8080 - -# Brave Search API Key -# BRAVE_API_KEY=your-brave-key-here - -# Tavily API Key -# TAVILY_API_KEY=your-tavily-key-here - -# ============== EXTERNAL SERVICES ============== - -# PubMed (optional - higher rate limits: 10 req/sec vs 3 req/sec) -NCBI_API_KEY=your-ncbi-key-here - -# Modal (optional - for secure code execution sandbox) -# MODAL_TOKEN_ID=your-modal-token-id -# MODAL_TOKEN_SECRET=your-modal-token-secret - -# ============== VECTOR DATABASE (ChromaDB) ============== - -# ChromaDB storage path -CHROMA_DB_PATH=./chroma_db - -# Persist ChromaDB to disk (default: true) -# CHROMA_DB_PERSIST=true - -# Remote ChromaDB server (optional) -# CHROMA_DB_HOST=localhost -# CHROMA_DB_PORT=8000 - -# ============== RAG SERVICE CONFIGURATION ============== - -# ChromaDB collection name for RAG -# RAG_COLLECTION_NAME=deepcritical_evidence - -# Number of top results to retrieve from RAG -# RAG_SIMILARITY_TOP_K=5 - -# Automatically ingest evidence into RAG -# RAG_AUTO_INGEST=true diff --git a/.env.example b/.env.example index cfea522c8e49c8e8de6145965e6269cbd616b788..b8061357538326dd7fad717c627cdcfa5c0b3eb9 100644 --- a/.env.example +++ b/.env.example @@ -1,6 +1,6 @@ # ============== LLM CONFIGURATION ============== -# Provider: "openai" or "anthropic" +# Provider: "openai", "anthropic", or "huggingface" LLM_PROVIDER=openai # API Keys (at least one required for full LLM analysis) @@ -8,30 +8,56 @@ OPENAI_API_KEY=sk-your-key-here ANTHROPIC_API_KEY=sk-ant-your-key-here # Model names (optional - sensible defaults set in config.py) -# ANTHROPIC_MODEL=claude-sonnet-4-5-20250929 # OPENAI_MODEL=gpt-5.1 +# ANTHROPIC_MODEL=claude-sonnet-4-5-20250929 -# ============== EMBEDDINGS ============== - -# OpenAI Embedding Model (used if LLM_PROVIDER is openai and performing RAG/Embeddings) -OPENAI_EMBEDDING_MODEL=text-embedding-3-small - -# Local Embedding Model (used for local/offline embeddings) -LOCAL_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2 - -# ============== HUGGINGFACE (FREE TIER) ============== +# ============== HUGGINGFACE CONFIGURATION ============== -# HuggingFace Token - enables Llama 3.1 (best quality free model) +# HuggingFace Token - enables gated models and higher rate limits # Get yours at: https://huggingface.co/settings/tokens -# -# WITHOUT HF_TOKEN: Falls back to ungated models (zephyr-7b-beta) -# WITH HF_TOKEN: Uses Llama 3.1 8B Instruct (requires accepting license) +# +# WITHOUT HF_TOKEN: Falls back to ungated models (zephyr-7b-beta, Qwen2-7B) +# WITH HF_TOKEN: Uses gated models (Llama 3.1, Gemma-2) via inference providers # # For HuggingFace Spaces deployment: # Set this as a "Secret" in Space Settings -> Variables and secrets # Users/judges don't need their own token - the Space secret is used # HF_TOKEN=hf_your-token-here +# Alternative: HUGGINGFACE_API_KEY (same as HF_TOKEN) + +# Default HuggingFace model for inference (gated, requires auth) +# Can be overridden in UI dropdown +# Latest reasoning models: Qwen3-Next-80B-A3B-Thinking, Qwen3-Next-80B-A3B-Instruct, Llama-3.3-70B-Instruct +HUGGINGFACE_MODEL=Qwen/Qwen3-Next-80B-A3B-Thinking + +# Fallback models for HuggingFace Inference API (comma-separated) +# Models are tried in order until one succeeds +# Format: model1,model2,model3 +# Latest reasoning models first, then reliable fallbacks +# Reasoning models: Qwen3-Next (thinking/instruct), Llama-3.3-70B, Qwen3-235B +# Fallbacks: Llama-3.1-8B, Zephyr-7B (ungated), Qwen2-7B (ungated) +HF_FALLBACK_MODELS=Qwen/Qwen3-Next-80B-A3B-Thinking,Qwen/Qwen3-Next-80B-A3B-Instruct,meta-llama/Llama-3.3-70B-Instruct,meta-llama/Llama-3.1-8B-Instruct,HuggingFaceH4/zephyr-7b-beta,Qwen/Qwen2-7B-Instruct + +# Override model/provider selection (optional, usually set via UI) +# HF_MODEL=Qwen/Qwen3-Next-80B-A3B-Thinking +# HF_PROVIDER=hyperbolic + +# ============== EMBEDDING CONFIGURATION ============== + +# Embedding Provider: "openai", "local", or "huggingface" +# Default: "local" (no API key required) +EMBEDDING_PROVIDER=local + +# OpenAI Embedding Model (used if EMBEDDING_PROVIDER=openai) +OPENAI_EMBEDDING_MODEL=text-embedding-3-small + +# Local Embedding Model (sentence-transformers, used if EMBEDDING_PROVIDER=local) +# BAAI/bge-small-en-v1.5 is newer, faster, and better than all-MiniLM-L6-v2 +LOCAL_EMBEDDING_MODEL=BAAI/bge-small-en-v1.5 + +# HuggingFace Embedding Model (used if EMBEDDING_PROVIDER=huggingface) +HUGGINGFACE_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2 # ============== AGENT CONFIGURATION ============== @@ -39,10 +65,60 @@ MAX_ITERATIONS=10 SEARCH_TIMEOUT=30 LOG_LEVEL=INFO +# Graph-based execution (experimental) +# USE_GRAPH_EXECUTION=false + +# Budget & Rate Limiting +# DEFAULT_TOKEN_LIMIT=100000 +# DEFAULT_TIME_LIMIT_MINUTES=10 +# DEFAULT_ITERATIONS_LIMIT=10 + +# ============== WEB SEARCH CONFIGURATION ============== + +# Web Search Provider: "serper", "searchxng", "brave", "tavily", or "duckduckgo" +# Default: "duckduckgo" (no API key required) +WEB_SEARCH_PROVIDER=duckduckgo + +# Serper API Key (for Google search via Serper) +# SERPER_API_KEY=your-serper-key-here + +# SearchXNG Host URL (for self-hosted search) +# SEARCHXNG_HOST=http://localhost:8080 + +# Brave Search API Key +# BRAVE_API_KEY=your-brave-key-here + +# Tavily API Key +# TAVILY_API_KEY=your-tavily-key-here + # ============== EXTERNAL SERVICES ============== -# PubMed (optional - higher rate limits) +# PubMed (optional - higher rate limits: 10 req/sec vs 3 req/sec) NCBI_API_KEY=your-ncbi-key-here -# Vector Database (optional - for LlamaIndex RAG) +# Modal (optional - for secure code execution sandbox) +# MODAL_TOKEN_ID=your-modal-token-id +# MODAL_TOKEN_SECRET=your-modal-token-secret + +# ============== VECTOR DATABASE (ChromaDB) ============== + +# ChromaDB storage path CHROMA_DB_PATH=./chroma_db + +# Persist ChromaDB to disk (default: true) +# CHROMA_DB_PERSIST=true + +# Remote ChromaDB server (optional) +# CHROMA_DB_HOST=localhost +# CHROMA_DB_PORT=8000 + +# ============== RAG SERVICE CONFIGURATION ============== + +# ChromaDB collection name for RAG +# RAG_COLLECTION_NAME=deepcritical_evidence + +# Number of top results to retrieve from RAG +# RAG_SIMILARITY_TOP_K=5 + +# Automatically ingest evidence into RAG +# RAG_AUTO_INGEST=true diff --git a/.github/README.md b/.github/README.md index 7f9634bf5d792f81f450f88f0d607ab1fe3f2956..c573b60ebe7f6f8264b6e31d32793100968a1965 100644 --- a/.github/README.md +++ b/.github/README.md @@ -1,28 +1,3 @@ ---- -title: DeepCritical -emoji: 🧬 -colorFrom: blue -colorTo: purple -sdk: gradio -sdk_version: "6.0.1" -python_version: "3.11" -app_file: src/app.py -hf_oauth: true -hf_oauth_expiration_minutes: 480 -hf_oauth_scopes: - - inference-api -pinned: true -license: mit -tags: - - mcp-in-action-track-enterprise - - mcp-hackathon - - drug-repurposing - - biomedical-ai - - pydantic-ai - - llamaindex - - modal ---- -
[![GitHub](https://img.shields.io/github/stars/DeepCritical/GradioDemo?style=for-the-badge&logo=github&logoColor=white&label=🐙%20GitHub&labelColor=181717&color=181717)](https://github.com/DeepCritical/GradioDemo) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cfeb4ebd2ef145b6f7da1af5e790d430bae99eff..4481459350e12a71337b5c32f804f13a24b33c62 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -33,19 +33,19 @@ jobs: - name: Lint with ruff continue-on-error: true run: | - uv run ruff check . --exclude tests - uv run ruff format --check . --exclude tests + uv run ruff check . --exclude tests --exclude reference_repos + uv run ruff format --check . --exclude tests --exclude reference_repos - name: Type check with mypy continue-on-error: true run: | - uv run mypy src + uv run mypy src --ignore-missing-imports - - name: Run unit tests (No Black Box Apis) + - name: Run unit tests (No OpenAI/Anthropic, HuggingFace only) env: HF_TOKEN: ${{ secrets.HF_TOKEN }} run: | - uv run pytest tests/unit/ -v -m "not openai and not embedding_provider" --tb=short -p no:logfire --cov --cov-branch --cov-report=xml + uv run pytest tests/unit/ -v -m "not openai and not anthropic and not embedding_provider" --tb=short -p no:logfire --cov --cov-branch --cov-report=xml - name: Run local embeddings tests env: @@ -61,11 +61,11 @@ jobs: uv run pytest tests/integration/ -v -m "huggingface and not embedding_provider" --tb=short -p no:logfire --cov --cov-branch --cov-report=xml --cov-append || true continue-on-error: true # Allow failures if HF_TOKEN not set - - name: Run non-OpenAI integration tests (excluding embedding providers) + - name: Run non-OpenAI/Anthropic integration tests (excluding embedding providers) env: HF_TOKEN: ${{ secrets.HF_TOKEN }} run: | - uv run pytest tests/integration/ -v -m "integration and not openai and not embedding_provider" --tb=short -p no:logfire --cov --cov-branch --cov-report=xml --cov-append || true + uv run pytest tests/integration/ -v -m "integration and not openai and not anthropic and not embedding_provider" --tb=short -p no:logfire --cov --cov-branch --cov-report=xml --cov-append || true continue-on-error: true # Allow failures if dependencies not available - name: Upload coverage reports to Codecov diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0d08dd3bf813709c4c4df5a8fc5f6ebdb16c84f3..66993b5ec97b1bfa659fc9cdc9b3a323372d56ee 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,16 +1,16 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.4.4 + rev: v0.14.7 # Compatible with ruff>=0.14.6 (matches CI) hooks: - id: ruff - args: [--fix, --exclude, tests] + args: [--fix, --exclude, tests, --exclude, reference_repos] exclude: ^reference_repos/ - id: ruff-format - args: [--exclude, tests] + args: [--exclude, tests, --exclude, reference_repos] exclude: ^reference_repos/ - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.10.0 + rev: v1.18.2 # Matches CI version mypy>=1.18.2 hooks: - id: mypy files: ^src/ diff --git a/docs/api/agents.md b/docs/api/agents.md index 9670001b3025c26ba041371a2faded2153b01ea8..8f0fa38939da25884c2dfef878ca84f94c7762fb 100644 --- a/docs/api/agents.md +++ b/docs/api/agents.md @@ -262,3 +262,5 @@ def create_input_parser_agent(model: Any | None = None) -> InputParserAgent + + diff --git a/docs/api/models.md b/docs/api/models.md index 22c35704b4bd1c5b30aea3f60166d594838a7350..f226647a52dc2d324877ce12e9311feffb8df591 100644 --- a/docs/api/models.md +++ b/docs/api/models.md @@ -240,3 +240,5 @@ class BudgetStatus(BaseModel): + + diff --git a/docs/api/orchestrators.md b/docs/api/orchestrators.md index 9c241236c7473b0e48f8e899ecd809553f3f5a8d..27c52249fc18fbcdb893036cdfcb4472e5d2f99e 100644 --- a/docs/api/orchestrators.md +++ b/docs/api/orchestrators.md @@ -187,3 +187,5 @@ Runs Magentic orchestration. + + diff --git a/docs/api/services.md b/docs/api/services.md index f276a342b2f7b998ce5a3a8e0610cc44c315b3cc..30edfc557afb8872d4262c5cdb4ebb2e149f46af 100644 --- a/docs/api/services.md +++ b/docs/api/services.md @@ -193,3 +193,5 @@ Analyzes a hypothesis using statistical methods. + + diff --git a/docs/api/tools.md b/docs/api/tools.md index b86993babad67b25cb06712a3136a69232cd2bbf..b93cd31e37e7a31413fec0ec282424fe6ae0ca82 100644 --- a/docs/api/tools.md +++ b/docs/api/tools.md @@ -227,3 +227,5 @@ Searches multiple tools in parallel. + + diff --git a/docs/architecture/agents.md b/docs/architecture/agents.md index d6599f11288888234009e325f1d20e695d7367fa..b65da9e379c329fc478bf7c9fe3ff4ca4c40745a 100644 --- a/docs/architecture/agents.md +++ b/docs/architecture/agents.md @@ -184,3 +184,5 @@ Factory functions: + + diff --git a/docs/architecture/middleware.md b/docs/architecture/middleware.md index 9d2f570d342774807910f450bceb49f08d79391c..82058ccf979591845b8c5ab87e42913ce8a62458 100644 --- a/docs/architecture/middleware.md +++ b/docs/architecture/middleware.md @@ -134,3 +134,5 @@ All middleware components use `ContextVar` for thread-safe isolation: + + diff --git a/docs/architecture/services.md b/docs/architecture/services.md index 1c9ca8099840c455f8f9d9aeff22151d90f26167..fda7c8367aac5c7f2a907f2c45372a91d7a7fc64 100644 --- a/docs/architecture/services.md +++ b/docs/architecture/services.md @@ -134,3 +134,5 @@ if settings.has_openai_key: + + diff --git a/docs/architecture/tools.md b/docs/architecture/tools.md index e3ab4820b5ca3146939393ea86f0cd56c2fc7e2e..7ddbe7eaaf0a579ddba89c63506ba37560d33405 100644 --- a/docs/architecture/tools.md +++ b/docs/architecture/tools.md @@ -167,3 +167,5 @@ search_handler = SearchHandler( + + diff --git a/docs/contributing/code-quality.md b/docs/contributing/code-quality.md index 003b98aa4aa58b0e6479863860c18db19609546e..b15ec66c60f46d285179fd83f5abc14a695a2a20 100644 --- a/docs/contributing/code-quality.md +++ b/docs/contributing/code-quality.md @@ -73,3 +73,5 @@ async def search(self, query: str, max_results: int = 10) -> list[Evidence]: + + diff --git a/docs/contributing/code-style.md b/docs/contributing/code-style.md index 6de664edcf801cad33e4a034a3af85a28b09f9ca..6a0ca8c0d62f7cff541a2abef854ffe49fa89ef8 100644 --- a/docs/contributing/code-style.md +++ b/docs/contributing/code-style.md @@ -53,3 +53,5 @@ result = await loop.run_in_executor(None, cpu_bound_function, args) + + diff --git a/docs/contributing/error-handling.md b/docs/contributing/error-handling.md index b1b55441cde24c94f54f3576d645e6b0731c7348..5d3ead5b23c77d8970f236b460b5668a40a1d566 100644 --- a/docs/contributing/error-handling.md +++ b/docs/contributing/error-handling.md @@ -61,3 +61,5 @@ except httpx.HTTPError as e: + + diff --git a/docs/contributing/implementation-patterns.md b/docs/contributing/implementation-patterns.md index 4f4075561edd03263e723e84cee784927ebc6cb6..d2cf076c39f24f6f42611c9bbd0bcff4ff05ee8a 100644 --- a/docs/contributing/implementation-patterns.md +++ b/docs/contributing/implementation-patterns.md @@ -76,3 +76,5 @@ def get_embedding_service() -> EmbeddingService: + + diff --git a/docs/contributing/index.md b/docs/contributing/index.md index 5c13e76d0b3310847b800160c64e21c232a8bb98..6fab401289f8a568b36096eb201bfe0453b3a6d3 100644 --- a/docs/contributing/index.md +++ b/docs/contributing/index.md @@ -155,3 +155,5 @@ Thank you for contributing to DeepCritical! + + diff --git a/docs/contributing/prompt-engineering.md b/docs/contributing/prompt-engineering.md index d02e67c11b449b0d4c24c54eb796155550f186d8..a1bae2444bb669cddb7d1e3c81081422420ee820 100644 --- a/docs/contributing/prompt-engineering.md +++ b/docs/contributing/prompt-engineering.md @@ -61,3 +61,5 @@ This document outlines prompt engineering guidelines and citation validation rul + + diff --git a/docs/contributing/testing.md b/docs/contributing/testing.md index 393a7f7efc638574a35812ba82f0176f00f89ab1..ebb1b21477c34a34c39cd8d49e1d898b684527ab 100644 --- a/docs/contributing/testing.md +++ b/docs/contributing/testing.md @@ -57,3 +57,5 @@ async def test_real_pubmed_search(): + + diff --git a/docs/getting-started/examples.md b/docs/getting-started/examples.md index 214f12f4f5d7b7d4ae8c09ba14af8a43f45ec448..e71e7b8360070341f38f526d1e2df344980e246a 100644 --- a/docs/getting-started/examples.md +++ b/docs/getting-started/examples.md @@ -201,3 +201,5 @@ USE_GRAPH_EXECUTION=true + + diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md index b29e03881c75941b1d034081e434da9fddb544ff..861e1ef751221b4844daad8221430067a71699e1 100644 --- a/docs/getting-started/installation.md +++ b/docs/getting-started/installation.md @@ -140,3 +140,5 @@ uv run pre-commit install + + diff --git a/docs/getting-started/mcp-integration.md b/docs/getting-started/mcp-integration.md index 87b2294fca6d956a37b9b47ecf6bceae2d476f94..28cb0806a9b669212221c13367a0326b7de0d14b 100644 --- a/docs/getting-started/mcp-integration.md +++ b/docs/getting-started/mcp-integration.md @@ -207,3 +207,5 @@ You can configure multiple DeepCritical instances: + + diff --git a/docs/getting-started/quick-start.md b/docs/getting-started/quick-start.md index ce36c4b6cc2c5492e12064747b0939895be67107..9c927dbe5cb373d4c4a289ca626d25c72d39610e 100644 --- a/docs/getting-started/quick-start.md +++ b/docs/getting-started/quick-start.md @@ -111,3 +111,5 @@ What are the active clinical trials investigating Alzheimer's disease treatments + + diff --git a/docs/license.md b/docs/license.md index 96da2dd2b44cb7d16e348309109d864255f6c9d4..18466be89051cf1fbcf15385a2eddb2875276a13 100644 --- a/docs/license.md +++ b/docs/license.md @@ -31,3 +31,5 @@ SOFTWARE. + + diff --git a/docs/overview/architecture.md b/docs/overview/architecture.md index 7d66e309012d9a27211f930b07884878ef01c070..e3c55c3d7eda510f0aca206f9113a4fef2055c71 100644 --- a/docs/overview/architecture.md +++ b/docs/overview/architecture.md @@ -188,3 +188,5 @@ The system supports complex research workflows through: + + diff --git a/docs/overview/features.md b/docs/overview/features.md index 9516164162c92122352771ea063e99f4dab70c0e..c5bbe713deee9b4c5e98aed945bd84cfe55da8e5 100644 --- a/docs/overview/features.md +++ b/docs/overview/features.md @@ -140,3 +140,5 @@ DeepCritical provides a comprehensive set of features for AI-assisted research: + + diff --git a/docs/team.md b/docs/team.md index e1a8bf6bfee5b0df95800884d68fd5e0205be006..e6901a846f7dafd627375238c5d4284ad05fe4c5 100644 --- a/docs/team.md +++ b/docs/team.md @@ -36,3 +36,5 @@ We welcome contributions! See the [Contributing Guide](contributing/index.md) fo + + diff --git a/pyproject.toml b/pyproject.toml index 2c0458ecbaeb6b07c27ba1fc61cb498811bcbf97..d262e758d8c6d5581b3ef6aae0123c13b59105bb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,6 +29,7 @@ dependencies = [ "tokenizers>=0.22.0,<=0.23.0", "transformers>=4.57.2", "chromadb>=0.4.0", + "rpds-py>=0.29.0", # Python implementation of rpds (required by chromadb on Windows) "sentence-transformers>=2.2.0", "numpy<2.0", "agent-framework-core>=1.0.0b251120,<2.0.0", diff --git a/requirements.txt b/requirements.txt index 21bdb3ca584609dd2ab695444e7eae639ca34b79..a50255a27c2a7e2568e6328e9f632f125eb609a8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,40 +9,53 @@ pydantic>=2.7 pydantic-settings>=2.2 pydantic-ai>=0.0.16 - # OPTIONAL AI Providers openai>=1.0.0 -# anthropic>=0.18.0 - -# Multi-agent orchestration (Advanced mode) -agent-framework-core>=1.0.0b251120 - -# Web search -duckduckgo-search>=5.0 +anthropic>=0.18.0 # HTTP & Parsing httpx>=0.27 beautifulsoup4>=4.12 xmltodict>=0.13 +# HuggingFace Hub +huggingface-hub>=0.20.0 + # UI (Gradio with MCP server support) -gradio[mcp]>=6.0.0 +gradio[mcp,oauth]>=6.0.0 # Utils python-dotenv>=1.0 tenacity>=8.2 structlog>=24.1 requests>=2.32.5 -limits>=3.0 # Rate limiting +limits>=3.0 # Rate limiting +pydantic-graph>=1.22.0 -# Optional: Modal for code execution -modal>=0.63.0 +# Web search +duckduckgo-search>=5.0 -# Optional: LlamaIndex RAG -llama-index>=0.11.0 -llama-index-llms-openai -llama-index-llms-huggingface -llama-index-embeddings-openai -llama-index-vector-stores-chroma +# Multi-agent orchestration (Advanced mode) +agent-framework-core>=1.0.0b251120,<2.0.0 + +# LlamaIndex RAG +llama-index-llms-huggingface>=0.6.1 +llama-index-llms-huggingface-api>=0.6.1 +llama-index-vector-stores-chroma>=0.5.3 +llama-index>=0.14.8 +llama-index-llms-openai>=0.6.9 +llama-index-embeddings-openai>=0.5.1 + +# Embeddings & Vector Store +tokenizers>=0.22.0,<=0.23.0 +transformers>=4.57.2 chromadb>=0.4.0 +rpds-py>=0.29.0 # Python implementation of rpds (required by chromadb on Windows) sentence-transformers>=2.2.0 +numpy<2.0 + +# Optional: Modal for code execution +modal>=0.63.0 + +# Pydantic AI with HuggingFace support +pydantic-ai-slim[huggingface]>=0.0.18 diff --git a/src/agent_factory/judges.py b/src/agent_factory/judges.py index 8413d678d1994b89e3f16e6a81bc3de4c8981934..9cd0e14eff838d5ca65bc71c348f2be3fc1c5973 100644 --- a/src/agent_factory/judges.py +++ b/src/agent_factory/judges.py @@ -8,10 +8,18 @@ from typing import Any import structlog from huggingface_hub import InferenceClient from pydantic_ai import Agent -from pydantic_ai.models.anthropic import AnthropicModel from pydantic_ai.models.openai import OpenAIModel # type: ignore[attr-defined] from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential +# Try to import AnthropicModel (may not be available if anthropic package is missing) +try: + from pydantic_ai.models.anthropic import AnthropicModel + + _ANTHROPIC_AVAILABLE = True +except ImportError: + AnthropicModel = None # type: ignore[assignment, misc] + _ANTHROPIC_AVAILABLE = False + # Try to import HuggingFace support (may not be available in all pydantic-ai versions) # According to https://ai.pydantic.dev/models/huggingface/, HuggingFace support requires # pydantic-ai with huggingface extra or pydantic-ai-slim[huggingface] @@ -50,6 +58,11 @@ def get_model() -> Any: llm_provider = settings.llm_provider if llm_provider == "anthropic": + if not _ANTHROPIC_AVAILABLE: + raise ImportError( + "Anthropic models are not available. " + "Please install with: uv add 'pydantic-ai[anthropic]' or use 'openai'/'huggingface' as the LLM provider." + ) return AnthropicModel(settings.anthropic_model, api_key=settings.anthropic_api_key) # type: ignore[call-arg] if llm_provider == "huggingface": @@ -144,7 +157,7 @@ class JudgeHandler: try: # Run the agent with structured output result = await self.agent.run(user_prompt) - assessment = result.output # type: ignore[attr-defined] + assessment = result.data logger.info( "Assessment complete", diff --git a/src/agents/hypothesis_agent.py b/src/agents/hypothesis_agent.py index b806396f36243cf81c6020f3b361a6724e75ea02..d946e7e2c72db190bdbaf6393ddf80bf7004676f 100644 --- a/src/agents/hypothesis_agent.py +++ b/src/agents/hypothesis_agent.py @@ -75,7 +75,7 @@ class HypothesisAgent(BaseAgent): # type: ignore[misc] # Generate hypotheses with diverse evidence selection prompt = await format_hypothesis_prompt(query, evidence, embeddings=self._embeddings) result = await self._get_agent().run(prompt) - assessment = result.output # pydantic-ai returns .output for structured output + assessment = result.data # type: ignore[attr-defined] # Store hypotheses in shared context existing = self._evidence_store.get("hypotheses", []) diff --git a/src/agents/input_parser.py b/src/agents/input_parser.py index 897dd4c31fb4079dadd6d362a69def3813f36318..0f23f7092c8744b5a3429452e8b93919fd6abf88 100644 --- a/src/agents/input_parser.py +++ b/src/agents/input_parser.py @@ -92,7 +92,7 @@ class InputParserAgent: try: # Run the agent result = await self.agent.run(user_message) - parsed_query = result.output + parsed_query = result.data # Validate parsed query if not parsed_query.original_query: diff --git a/src/agents/judge_agent_llm.py b/src/agents/judge_agent_llm.py index 52ab9e5519703b18579de22a770e28a97bad27bd..78447df1f0489ece4002fa01287c3bde6353317f 100644 --- a/src/agents/judge_agent_llm.py +++ b/src/agents/judge_agent_llm.py @@ -41,5 +41,5 @@ History of previous attempts: {len(history)} Evaluate validity and sufficiency.""" run_result = await self.agent.run(prompt) - logger.info("LLM judge assessment complete", sufficient=run_result.output.sufficient) - return run_result.output # type: ignore[no-any-return] + logger.info("LLM judge assessment complete", sufficient=run_result.data.sufficient) # type: ignore[attr-defined] + return run_result.data # type: ignore[no-any-return,attr-defined] diff --git a/src/agents/knowledge_gap.py b/src/agents/knowledge_gap.py index 2b4b118b885a1d41238a7a63a54997bcc875a0dc..ad3769d1a403998e908b770525230632fe4aebef 100644 --- a/src/agents/knowledge_gap.py +++ b/src/agents/knowledge_gap.py @@ -113,7 +113,7 @@ HISTORY OF ACTIONS, FINDINGS AND THOUGHTS: try: # Run the agent result = await self.agent.run(user_message) - evaluation = result.output + evaluation = result.data self.logger.info( "Knowledge gap evaluation complete", diff --git a/src/agents/long_writer.py b/src/agents/long_writer.py index 8b03a5263e4dea685bc8e07023444a5525ca6223..9014d2b0903798be07499db42fa5689cb5a889a5 100644 --- a/src/agents/long_writer.py +++ b/src/agents/long_writer.py @@ -176,7 +176,7 @@ class LongWriterAgent: try: # Run the agent result = await self.agent.run(user_message) - output = result.output + output = result.data # Validate output if not output or not isinstance(output, LongWriterOutput): diff --git a/src/agents/proofreader.py b/src/agents/proofreader.py index 72aeaf77881a9e8498ada3cb288b8edf135ddf4b..3d85ce1405376ff2a54178ddf52e7eaabacfab9b 100644 --- a/src/agents/proofreader.py +++ b/src/agents/proofreader.py @@ -133,7 +133,7 @@ REPORT DRAFT: try: # Run the agent result = await self.agent.run(user_message) - final_report = result.output + final_report = result.data # type: ignore[attr-defined] # Validate output if not final_report or not final_report.strip(): @@ -142,7 +142,7 @@ REPORT DRAFT: self.logger.info("Report proofread", length=len(final_report), attempt=attempt + 1) - return final_report + return final_report # type: ignore[no-any-return] except (TimeoutError, ConnectionError) as e: # Transient errors - retry diff --git a/src/agents/report_agent.py b/src/agents/report_agent.py index 2d86de86ed646f0f5f7ba870e280b993c072dc40..fbff4d948b94a313ff63cf0169b7fefbe3aad110 100644 --- a/src/agents/report_agent.py +++ b/src/agents/report_agent.py @@ -91,7 +91,7 @@ class ReportAgent(BaseAgent): # type: ignore[misc] ) result = await self._get_agent().run(prompt) - report = result.output + report = result.data # type: ignore[attr-defined] # ═══════════════════════════════════════════════════════════════════ # 🚨 CRITICAL: Validate citations to prevent hallucination diff --git a/src/agents/thinking.py b/src/agents/thinking.py index 230c5801fb6bc6822fa155a2b953046a3b5d0729..bf0c84952b14d7e572134758d28ec2dd711a8c2f 100644 --- a/src/agents/thinking.py +++ b/src/agents/thinking.py @@ -112,11 +112,11 @@ HISTORY OF ACTIONS, FINDINGS AND THOUGHTS: try: # Run the agent result = await self.agent.run(user_message) - observations = result.output + observations = result.data # type: ignore[attr-defined] self.logger.info("Observations generated", length=len(observations)) - return observations + return observations # type: ignore[no-any-return] except Exception as e: self.logger.error("Observation generation failed", error=str(e)) diff --git a/src/agents/tool_selector.py b/src/agents/tool_selector.py index 7137906f762786d228bef1a5691712627ee0ff38..dd3aac43006bbd614115b7a687ee2f84e25b5d79 100644 --- a/src/agents/tool_selector.py +++ b/src/agents/tool_selector.py @@ -117,7 +117,7 @@ HISTORY OF ACTIONS, FINDINGS AND THOUGHTS: try: # Run the agent result = await self.agent.run(user_message) - selection_plan = result.output + selection_plan = result.data self.logger.info( "Tool selection complete", diff --git a/src/agents/writer.py b/src/agents/writer.py index 73690f15f415bea42b8dfeb3d681b786b50b59a5..418a5105527b7ae5a7d91f53e2aed1ac7a0b83df 100644 --- a/src/agents/writer.py +++ b/src/agents/writer.py @@ -136,7 +136,7 @@ FINDINGS: try: # Run the agent result = await self.agent.run(user_message) - report = result.output + report = result.data # type: ignore[attr-defined] # Validate output if not report or not report.strip(): @@ -145,7 +145,7 @@ FINDINGS: self.logger.info("Report written", length=len(report), attempt=attempt + 1) - return report + return report # type: ignore[no-any-return] except (TimeoutError, ConnectionError) as e: # Transient errors - retry diff --git a/src/app.py b/src/app.py index 7275673a06bb7f938288ff5a2b95e9d50927229e..d88d931d1dc17d0a85fc47160d58752f00d5824d 100644 --- a/src/app.py +++ b/src/app.py @@ -172,20 +172,29 @@ def event_to_chat_message(event: AgentEvent) -> dict[str, Any]: "content": event.message, } - # Build metadata for accordion + # Build metadata for accordion according to Gradio ChatMessage spec + # Metadata keys: title (str), status ("pending"|"done"), log (str), duration (float) + # See: https://www.gradio.app/guides/agents-and-tool-usage metadata: dict[str, Any] = {} + + # Title is required for accordion display - must be string if config["title"]: - metadata["title"] = config["title"] + metadata["title"] = str(config["title"]) # Set status (pending shows spinner, done is collapsed) + # Must be exactly "pending" or "done" per Gradio spec if config["status"] == "pending": metadata["status"] = "pending" + elif config["status"] == "done": + metadata["status"] = "done" - # Add duration if available in data + # Add duration if available in data (must be float) if event.data and isinstance(event.data, dict) and "duration" in event.data: - metadata["duration"] = event.data["duration"] + duration = event.data["duration"] + if isinstance(duration, int | float): + metadata["duration"] = float(duration) - # Add log info (iteration number, etc.) + # Add log info (iteration number, etc.) - must be string log_parts: list[str] = [] if event.iteration > 0: log_parts.append(f"Iteration {event.iteration}") @@ -198,12 +207,22 @@ def event_to_chat_message(event: AgentEvent) -> dict[str, Any]: metadata["log"] = " | ".join(log_parts) # Return as dict format for Gradio Chatbot compatibility - # Gradio Chatbot expects dict format, not gr.ChatMessage objects + # According to Gradio docs: https://www.gradio.app/guides/agents-and-tool-usage + # ChatMessage format: {"role": "assistant", "content": "...", "metadata": {...}} + # Metadata must have "title" key for accordion display + # Valid metadata keys: title (str), status ("pending"|"done"), log (str), duration (float) result: dict[str, Any] = { "role": "assistant", "content": event.message, } - if metadata: + # Only add metadata if it has a title (required for accordion display) + # Ensure metadata values match Gradio's expected types + if metadata and metadata.get("title"): + # Ensure status is valid if present + if "status" in metadata: + status = metadata["status"] + if status not in ("pending", "done"): + metadata["status"] = "done" # Default to "done" if invalid result["metadata"] = metadata return result @@ -455,10 +474,11 @@ async def research_agent( yield msg except Exception as e: + # Return error message without metadata to avoid issues during example caching + # Metadata can cause validation errors when Gradio caches examples yield { "role": "assistant", - "content": f"❌ **Error**: {e!s}", - "metadata": {"title": "❌ Error", "status": "done"}, + "content": f"❌ **Error**: {e!s}\n\n*Please check your configuration and try again.*", } @@ -681,9 +701,21 @@ def create_demo() -> gr.Blocks: "**Sign in with HuggingFace** above to access premium models and providers." ), examples=[ - ["What drugs could be repurposed for Alzheimer's disease?", "simple"], - ["Is metformin effective for treating cancer?", "simple"], - ["What medications show promise for Long COVID treatment?", "simple"], + # When additional_inputs are provided, examples must be lists of lists + # Each inner list: [message, mode, hf_model, hf_provider] + [ + "What drugs could be repurposed for Alzheimer's disease?", + "iterative", + None, + None, + ], + ["Is metformin effective for treating cancer?", "iterative", None, None], + [ + "What medications show promise for Long COVID treatment?", + "iterative", + None, + None, + ], ], additional_inputs_accordion=gr.Accordion(label="⚙️ Settings", open=False), additional_inputs=[ diff --git a/src/orchestrator/planner_agent.py b/src/orchestrator/planner_agent.py index ea560afab9eab23c8fbe22bcd551b63a9b0f4398..110c1bade21c99a72f1c2b3b6d9cb96c2a2b89a5 100644 --- a/src/orchestrator/planner_agent.py +++ b/src/orchestrator/planner_agent.py @@ -109,7 +109,7 @@ class PlannerAgent: try: # Run the agent result = await self.agent.run(user_message) - report_plan = result.output + report_plan = result.data # Validate report plan if not report_plan.report_outline: diff --git a/src/services/llamaindex_rag.py b/src/services/llamaindex_rag.py index 322a6bc4afaa6534431daa96814ef48ee4731f31..00a6da967e191f1f4b2d0dfc4a29fb82f71feed4 100644 --- a/src/services/llamaindex_rag.py +++ b/src/services/llamaindex_rag.py @@ -136,7 +136,8 @@ class LlamaIndexRAGService: } except ImportError as e: raise ImportError( - "LlamaIndex dependencies not installed. Run: uv sync --extra modal" + "LlamaIndex dependencies not installed. Required packages: chromadb, llama-index, " + "and their dependencies. If rpds is missing, try: uv pip install rpds-py" ) from e def _configure_embeddings( diff --git a/src/services/statistical_analyzer.py b/src/services/statistical_analyzer.py index d43cfa1056ca2248807d54ae50dc7e4dcb025118..09d446c3ba7ebac75c97b6ac8cfa5f606998ea51 100644 --- a/src/services/statistical_analyzer.py +++ b/src/services/statistical_analyzer.py @@ -135,7 +135,7 @@ Generate executable Python code to analyze this evidence.""" # Generate code agent = self._get_agent() code_result = await agent.run(prompt) - generated_code = code_result.output + generated_code = code_result.data # type: ignore[attr-defined] # Execute in Modal sandbox loop = asyncio.get_running_loop() diff --git a/tests/integration/test_rag_integration.py b/tests/integration/test_rag_integration.py index 38d3f6ec09900ec3645af00bb181054577c6ce51..6a525bb1ab565c096a04d63316ae393c8ea19421 100644 --- a/tests/integration/test_rag_integration.py +++ b/tests/integration/test_rag_integration.py @@ -121,9 +121,12 @@ class TestRAGServiceIntegration: assert len(response) > 0 assert "python" in response.lower() except Exception as e: - # If model is not available (404), skip the test - if "404" in str(e) or "Not Found" in str(e): + # If model is not available (404) or authentication required (401), skip the test + error_str = str(e) + if "404" in error_str or "Not Found" in error_str: pytest.skip(f"HuggingFace model not available via inference API: {e}") + if "401" in error_str or "Unauthorized" in error_str or "Invalid username or password" in error_str: + pytest.skip(f"HuggingFace authentication required but not available: {e}") raise # Cleanup diff --git a/tests/unit/agent_factory/test_judges.py b/tests/unit/agent_factory/test_judges.py index c2075cdaa3b0d103d5a6b5f5fedb4c0c876356ce..342aa68997467d1fe88d1d31e579ff848d2d3528 100644 --- a/tests/unit/agent_factory/test_judges.py +++ b/tests/unit/agent_factory/test_judges.py @@ -34,6 +34,7 @@ class TestJudgeHandler: # Mock the PydanticAI agent mock_result = MagicMock() + type(mock_result).data = mock_assessment # pydantic-ai uses .data for structured output mock_result.output = mock_assessment with ( @@ -88,7 +89,8 @@ class TestJudgeHandler: ) mock_result = MagicMock() - mock_result.output = mock_assessment + mock_result.data = mock_assessment + mock_result.output = mock_assessment # Some code may use .output with ( patch("src.agent_factory.judges.get_model") as mock_get_model, diff --git a/tests/unit/agents/test_hypothesis_agent.py b/tests/unit/agents/test_hypothesis_agent.py index be9b8768b5fed19198ee3d721fff50dd9de8c44c..69772bb1397600b170891a9629499f3aba41bd89 100644 --- a/tests/unit/agents/test_hypothesis_agent.py +++ b/tests/unit/agents/test_hypothesis_agent.py @@ -28,18 +28,17 @@ def sample_evidence(): @pytest.fixture def mock_assessment(): + primary_hyp = MechanismHypothesis( + drug="Metformin", + target="AMPK", + pathway="mTOR inhibition", + effect="Reduced cancer cell proliferation", + confidence=0.75, + search_suggestions=["metformin AMPK cancer", "mTOR cancer therapy"], + ) return HypothesisAssessment( - hypotheses=[ - MechanismHypothesis( - drug="Metformin", - target="AMPK", - pathway="mTOR inhibition", - effect="Reduced cancer cell proliferation", - confidence=0.75, - search_suggestions=["metformin AMPK cancer", "mTOR cancer therapy"], - ) - ], - primary_hypothesis=None, + hypotheses=[primary_hyp], + primary_hypothesis=primary_hyp, # Set primary hypothesis knowledge_gaps=["Clinical trial data needed"], recommended_searches=["metformin clinical trial cancer"], ) @@ -54,8 +53,9 @@ async def test_hypothesis_agent_generates_hypotheses(sample_evidence, mock_asses with patch("src.agents.hypothesis_agent.Agent") as mock_agent_class: mock_get_model.return_value = MagicMock() # Mock model mock_result = MagicMock() + type(mock_result).data = mock_assessment # pydantic-ai uses .data for structured output mock_result.output = mock_assessment - # pydantic-ai Agent returns an object with .output for structured output + # pydantic-ai Agent returns an object with .data for structured output mock_agent_class.return_value.run = AsyncMock(return_value=mock_result) agent = HypothesisAgent(store) @@ -94,6 +94,7 @@ async def test_hypothesis_agent_uses_embeddings(sample_evidence, mock_assessment mock_format.return_value = "Prompt" mock_result = MagicMock() + type(mock_result).data = mock_assessment # pydantic-ai uses .data for structured output mock_result.output = mock_assessment mock_agent_class.return_value.run = AsyncMock(return_value=mock_result) diff --git a/tests/unit/agents/test_input_parser.py b/tests/unit/agents/test_input_parser.py index fd4f4a240c4c2387bad38b079952f2c5af04a35c..ea2736a9519875e4905399bbbb33659aafe3674f 100644 --- a/tests/unit/agents/test_input_parser.py +++ b/tests/unit/agents/test_input_parser.py @@ -18,6 +18,13 @@ def mock_model() -> MagicMock: return model +@pytest.fixture(autouse=True) +def patch_infer_model(mock_model: MagicMock): + """Auto-patch infer_model for all tests to avoid OpenAI API key requirements.""" + with patch("pydantic_ai.models.infer_model", return_value=mock_model): + yield + + @pytest.fixture def mock_parsed_query_iterative() -> ParsedQuery: """Create a mock ParsedQuery for iterative mode.""" @@ -51,7 +58,9 @@ def mock_agent_result_iterative( mock_parsed_query_iterative: ParsedQuery, ) -> RunResult[ParsedQuery]: """Create a mock agent result for iterative mode.""" - result = MagicMock(spec=RunResult) + result = MagicMock() + # Configure the mock to return the actual output when .data is accessed + type(result).data = mock_parsed_query_iterative result.output = mock_parsed_query_iterative return result @@ -61,7 +70,9 @@ def mock_agent_result_deep( mock_parsed_query_deep: ParsedQuery, ) -> RunResult[ParsedQuery]: """Create a mock agent result for deep mode.""" - result = MagicMock(spec=RunResult) + result = MagicMock() + # Configure the mock to return the actual output when .data is accessed + type(result).data = mock_parsed_query_deep result.output = mock_parsed_query_deep return result @@ -72,33 +83,52 @@ def input_parser_agent(mock_model: MagicMock) -> InputParserAgent: return InputParserAgent(model=mock_model) +@pytest.fixture(autouse=True) +def patch_infer_model(mock_model: MagicMock): + """Auto-patch infer_model for all tests to avoid OpenAI API key requirements.""" + with patch("pydantic_ai.models.infer_model", return_value=mock_model): + yield + + class TestInputParserAgentInit: """Test InputParserAgent initialization.""" - def test_input_parser_agent_init_with_model(self, mock_model: MagicMock) -> None: + @patch("pydantic_ai.models.infer_model") + def test_input_parser_agent_init_with_model( + self, mock_infer_model: MagicMock, mock_model: MagicMock + ) -> None: """Test InputParserAgent initialization with provided model.""" + mock_infer_model.return_value = mock_model agent = InputParserAgent(model=mock_model) assert agent.model == mock_model assert agent.agent is not None @patch("src.agents.input_parser.get_model") + @patch("pydantic_ai.models.infer_model") def test_input_parser_agent_init_without_model( - self, mock_get_model: MagicMock, mock_model: MagicMock + self, + mock_infer_model: MagicMock, + mock_get_model: MagicMock, + mock_model: MagicMock, ) -> None: """Test InputParserAgent initialization without model (uses default).""" mock_get_model.return_value = mock_model + mock_infer_model.return_value = mock_model agent = InputParserAgent() assert agent.model == mock_model mock_get_model.assert_called_once() + @patch("pydantic_ai.models.infer_model") def test_input_parser_agent_has_correct_system_prompt( - self, input_parser_agent: InputParserAgent + self, mock_infer_model: MagicMock, mock_model: MagicMock ) -> None: """Test that InputParserAgent has correct system prompt.""" + mock_infer_model.return_value = mock_model + agent = InputParserAgent(model=mock_model) # System prompt should contain key instructions # In pydantic_ai, system_prompt is a property that returns the prompt string # For mocked agents, we check that the agent was created with a system prompt - assert input_parser_agent.agent is not None + assert agent.agent is not None # The actual system prompt is set during agent creation # We verify the agent exists and was properly initialized # Note: Direct access to system_prompt may not work with mocks diff --git a/tests/unit/agents/test_long_writer.py b/tests/unit/agents/test_long_writer.py index 771c27e66b4fa8618cd0a80cdb7d63fdf6d447c7..a2d4bee0c5b62a2c82ef6fd970f4b76b7b676938 100644 --- a/tests/unit/agents/test_long_writer.py +++ b/tests/unit/agents/test_long_writer.py @@ -17,6 +17,13 @@ def mock_model() -> MagicMock: return model +@pytest.fixture(autouse=True) +def patch_infer_model(mock_model: MagicMock): + """Auto-patch infer_model for all tests to avoid OpenAI API key requirements.""" + with patch("pydantic_ai.models.infer_model", return_value=mock_model): + yield + + @pytest.fixture def mock_long_writer_output() -> LongWriterOutput: """Create a mock LongWriterOutput.""" @@ -31,7 +38,9 @@ def mock_agent_result( mock_long_writer_output: LongWriterOutput, ) -> RunResult[LongWriterOutput]: """Create a mock agent result.""" - result = MagicMock(spec=RunResult) + result = MagicMock() + # Configure the mock to return the actual output when .data is accessed + type(result).data = mock_long_writer_output result.output = mock_long_writer_output return result @@ -340,9 +349,11 @@ class TestWriteReport: references=["[1] https://example.com/2"], ) - result1 = MagicMock(spec=RunResult) + result1 = MagicMock() + type(result1).data = output1 # pydantic-ai uses .data for structured output result1.output = output1 - result2 = MagicMock(spec=RunResult) + result2 = MagicMock() + type(result2).data = output2 # pydantic-ai uses .data for structured output result2.output = output2 results = [result1, result2] long_writer_agent.agent.run = AsyncMock(side_effect=results) diff --git a/tests/unit/agents/test_proofreader.py b/tests/unit/agents/test_proofreader.py index bb21aa50c6c04981ad6ecd55462ba87a9a675cf3..18eab15d3137732f8a9409b681ceb917d5886917 100644 --- a/tests/unit/agents/test_proofreader.py +++ b/tests/unit/agents/test_proofreader.py @@ -18,6 +18,13 @@ def mock_model() -> MagicMock: return model +@pytest.fixture(autouse=True) +def patch_infer_model(mock_model: MagicMock): + """Auto-patch infer_model for all tests to avoid OpenAI API key requirements.""" + with patch("pydantic_ai.models.infer_model", return_value=mock_model): + yield + + @pytest.fixture def mock_agent_result() -> RunResult[Any]: """Create a mock agent result.""" diff --git a/tests/unit/agents/test_report_agent.py b/tests/unit/agents/test_report_agent.py index 4121dd22c7f389e661dd8d4aa2f85eac5a8b33c5..426519371021df25461219ba8a99559805bb3627 100644 --- a/tests/unit/agents/test_report_agent.py +++ b/tests/unit/agents/test_report_agent.py @@ -102,6 +102,7 @@ async def test_report_agent_generates_report( ): mock_get_model.return_value = MagicMock() mock_result = MagicMock() + type(mock_result).data = mock_report # pydantic-ai uses .data for structured output mock_result.output = mock_report mock_agent_class.return_value.run = AsyncMock(return_value=mock_result) diff --git a/tests/unit/agents/test_writer.py b/tests/unit/agents/test_writer.py index 752ca923c294acc6649cd50a5e788ccc856f12c4..975f7a19bad715319ed1058746454d950f338aeb 100644 --- a/tests/unit/agents/test_writer.py +++ b/tests/unit/agents/test_writer.py @@ -18,6 +18,13 @@ def mock_model() -> MagicMock: return model +@pytest.fixture(autouse=True) +def patch_infer_model(mock_model: MagicMock): + """Auto-patch infer_model for all tests to avoid OpenAI API key requirements.""" + with patch("pydantic_ai.models.infer_model", return_value=mock_model): + yield + + @pytest.fixture def mock_agent_result() -> RunResult[Any]: """Create a mock agent result.""" diff --git a/tests/unit/middleware/__init__.py b/tests/unit/middleware/__init__.py index 6471c91c5166c60c60a919b6cdf145781acfca7d..65aa34a2f83ea7450a83f3ed81fd3fa659075b85 100644 --- a/tests/unit/middleware/__init__.py +++ b/tests/unit/middleware/__init__.py @@ -7,3 +7,5 @@ + + diff --git a/tests/unit/middleware/test_budget_tracker_phase7.py b/tests/unit/middleware/test_budget_tracker_phase7.py index 8d881e807fd1aec37400f2f6f0244d8fff7efab2..1821ace8788cf1c2e6ec3410d37a072823a676bb 100644 --- a/tests/unit/middleware/test_budget_tracker_phase7.py +++ b/tests/unit/middleware/test_budget_tracker_phase7.py @@ -165,3 +165,5 @@ class TestIterationTokenTracking: + + diff --git a/tests/unit/middleware/test_state_machine.py b/tests/unit/middleware/test_state_machine.py index 730efc03904c9fbaffe584c041abeafec0eda1e3..b014e65fc2134be57bdbb0dbbc50a7d3da392046 100644 --- a/tests/unit/middleware/test_state_machine.py +++ b/tests/unit/middleware/test_state_machine.py @@ -362,3 +362,5 @@ class TestContextVarIsolation: + + diff --git a/tests/unit/middleware/test_workflow_manager.py b/tests/unit/middleware/test_workflow_manager.py index ebfef154f343e86ce748c301d6a795c2d17e8bd8..8df1c7af664918acd8b9abb6630cf69cf4aac966 100644 --- a/tests/unit/middleware/test_workflow_manager.py +++ b/tests/unit/middleware/test_workflow_manager.py @@ -292,3 +292,5 @@ class TestWorkflowManager: + + diff --git a/tests/unit/orchestrator/__init__.py b/tests/unit/orchestrator/__init__.py index ea3c8051b5f54fa058738b93afd3c2268613d964..f4189d35b5a34adaedb86cd55366a755e2b421ff 100644 --- a/tests/unit/orchestrator/__init__.py +++ b/tests/unit/orchestrator/__init__.py @@ -7,3 +7,5 @@ + + diff --git a/tests/unit/orchestrator/test_graph_orchestrator.py b/tests/unit/orchestrator/test_graph_orchestrator.py index 4136663f577f8a52cdbc22b888f8719322d8547e..3aa33202203294d53d986d6e9aa3b4d548bcee3c 100644 --- a/tests/unit/orchestrator/test_graph_orchestrator.py +++ b/tests/unit/orchestrator/test_graph_orchestrator.py @@ -209,10 +209,12 @@ class TestGraphOrchestrator: from src.orchestrator.research_flow import IterativeResearchFlow # Create flow and patch its run method to raise exception - original_flow = IterativeResearchFlow( - max_iterations=2, - max_time_minutes=5, - ) + mock_judge = MagicMock() + with patch("src.orchestrator.research_flow.create_judge_handler", return_value=mock_judge): + original_flow = IterativeResearchFlow( + max_iterations=2, + max_time_minutes=5, + ) orchestrator._iterative_flow = original_flow with patch.object(original_flow, "run", side_effect=Exception("Test error")): diff --git a/tests/unit/orchestrator/test_planner_agent.py b/tests/unit/orchestrator/test_planner_agent.py index 9d479e7a4af0bf3866b500d6468e07897d275fb0..00142f885da873abfa5ae1741347a683c2612d3b 100644 --- a/tests/unit/orchestrator/test_planner_agent.py +++ b/tests/unit/orchestrator/test_planner_agent.py @@ -20,7 +20,7 @@ class TestPlannerAgent: def mock_agent_run_result(self): """Create a mock agent run result.""" mock_result = MagicMock() - mock_result.output = ReportPlan( + report_plan = ReportPlan( background_context="Python is a programming language.", report_outline=[ ReportPlanSection( @@ -34,6 +34,8 @@ class TestPlannerAgent: ], report_title="Python Programming Language Overview", ) + type(mock_result).data = report_plan # pydantic-ai uses .data for structured output + mock_result.output = report_plan return mock_result @pytest.mark.asyncio @@ -63,11 +65,13 @@ class TestPlannerAgent: async def test_planner_agent_handles_empty_outline(self, mock_model): """PlannerAgent should return fallback plan when outline is empty.""" mock_result = MagicMock() - mock_result.output = ReportPlan( + report_plan = ReportPlan( background_context="Some context", report_outline=[], # Empty outline report_title="Test Report", ) + type(mock_result).data = report_plan # pydantic-ai uses .data for structured output + mock_result.output = report_plan mock_agent = AsyncMock() mock_agent.run = AsyncMock(return_value=mock_result) diff --git a/tests/unit/orchestrator/test_research_flow.py b/tests/unit/orchestrator/test_research_flow.py index c9a8f407ec2027feadddc0df68d615197001b3b4..1d5a11d6b7a06728b3dca1afe451ef4008f78196 100644 --- a/tests/unit/orchestrator/test_research_flow.py +++ b/tests/unit/orchestrator/test_research_flow.py @@ -1,6 +1,6 @@ """Unit tests for ResearchFlow classes.""" -from unittest.mock import AsyncMock, patch +from unittest.mock import AsyncMock, MagicMock, patch import pytest @@ -31,6 +31,27 @@ class TestIterativeResearchFlow: @pytest.fixture def flow(self, mock_agents): """Create an IterativeResearchFlow with mocked agents.""" + from src.utils.models import JudgeAssessment, AssessmentDetails + + mock_judge = MagicMock() + # Mock judge assessment - default to insufficient so loops continue + default_assessment = JudgeAssessment( + details=AssessmentDetails( + mechanism_score=5, + mechanism_reasoning="Test reasoning for mechanism assessment", + clinical_evidence_score=5, + clinical_reasoning="Test reasoning for clinical evidence assessment", + drug_candidates=[], + key_findings=[], + ), + sufficient=False, + confidence=0.5, + recommendation="continue", + next_search_queries=[], + reasoning="Test assessment for research flow testing purposes", + ) + mock_judge.assess = AsyncMock(return_value=default_assessment) + with ( patch("src.orchestrator.research_flow.create_knowledge_gap_agent") as mock_kg, patch("src.orchestrator.research_flow.create_tool_selector_agent") as mock_ts, @@ -38,14 +59,18 @@ class TestIterativeResearchFlow: patch("src.orchestrator.research_flow.create_writer_agent") as mock_writer, patch("src.orchestrator.research_flow.execute_tool_tasks") as mock_execute, patch("src.orchestrator.research_flow.get_rag_service") as mock_rag, + patch("src.orchestrator.research_flow.create_judge_handler", return_value=mock_judge), ): mock_kg.return_value = mock_agents["knowledge_gap"] mock_ts.return_value = mock_agents["tool_selector"] mock_thinking.return_value = mock_agents["thinking"] mock_writer.return_value = mock_agents["writer"] - mock_execute.return_value = { - "task_1": ToolAgentOutput(output="Finding 1", sources=["url1"]), - } + # execute_tool_tasks is async, so make the mock async + async def mock_execute_async(*args, **kwargs): + return { + "task_1": ToolAgentOutput(output="Finding 1", sources=["url1"]), + } + mock_execute.side_effect = mock_execute_async # Mock RAG service to return None to avoid ChromaDB initialization mock_rag.return_value = None @@ -54,6 +79,26 @@ class TestIterativeResearchFlow: @pytest.mark.asyncio async def test_iterative_flow_completes_when_research_complete(self, flow, mock_agents): """IterativeResearchFlow should complete when research is marked complete.""" + from src.utils.models import JudgeAssessment, AssessmentDetails + + # Mock judge to return sufficient=True so loop completes + sufficient_assessment = JudgeAssessment( + details=AssessmentDetails( + mechanism_score=8, + mechanism_reasoning="Strong evidence for mechanism of action", + clinical_evidence_score=7, + clinical_reasoning="Good support from clinical studies", + drug_candidates=["TestDrug"], + key_findings=["Finding 1"], + ), + sufficient=True, + confidence=0.9, + recommendation="synthesize", + next_search_queries=[], + reasoning="Evidence is sufficient", + ) + flow.judge_handler.assess = AsyncMock(return_value=sufficient_assessment) + # Mock knowledge gap agent to return complete mock_agents["knowledge_gap"].evaluate = AsyncMock( return_value=KnowledgeGapOutput( @@ -202,10 +247,32 @@ class TestDeepResearchFlow: @pytest.fixture def flow(self, mock_agents): """Create a DeepResearchFlow with mocked agents.""" + from src.utils.models import JudgeAssessment, AssessmentDetails + + mock_judge = MagicMock() + # Mock judge assessment - default to insufficient so loops continue + default_assessment = JudgeAssessment( + details=AssessmentDetails( + mechanism_score=5, + mechanism_reasoning="Test reasoning for mechanism assessment", + clinical_evidence_score=5, + clinical_reasoning="Test reasoning for clinical evidence assessment", + drug_candidates=[], + key_findings=[], + ), + sufficient=False, + confidence=0.5, + recommendation="continue", + next_search_queries=[], + reasoning="Test assessment for research flow testing purposes", + ) + mock_judge.assess = AsyncMock(return_value=default_assessment) + with ( patch("src.orchestrator.research_flow.create_planner_agent") as mock_planner, patch("src.orchestrator.research_flow.create_long_writer_agent") as mock_long_writer, patch("src.orchestrator.research_flow.create_proofreader_agent") as mock_proofreader, + patch("src.orchestrator.research_flow.create_judge_handler", return_value=mock_judge), ): mock_planner.return_value = mock_agents["planner"] mock_long_writer.return_value = mock_agents["long_writer"] diff --git a/tests/unit/services/test_statistical_analyzer.py b/tests/unit/services/test_statistical_analyzer.py index d5b2e39aad7c8e29a3f72d9d8b90c53e7294b4cd..978397530a5948e0c1faeeb31069374424471506 100644 --- a/tests/unit/services/test_statistical_analyzer.py +++ b/tests/unit/services/test_statistical_analyzer.py @@ -54,9 +54,10 @@ class TestStatisticalAnalyzer: patch.object(analyzer, "_get_code_executor") as mock_executor, ): # Mock LLM - mock_agent.return_value.run = AsyncMock( - return_value=MagicMock(output="print('SUPPORTED')") - ) + mock_code_result = MagicMock() + type(mock_code_result).data = "print('SUPPORTED')" # pydantic-ai uses .data + mock_code_result.output = "print('SUPPORTED')" + mock_agent.return_value.run = AsyncMock(return_value=mock_code_result) # Mock Modal mock_executor.return_value.execute.return_value = { diff --git a/tests/unit/test_app_smoke.py b/tests/unit/test_app_smoke.py index 74e88245814f12c1d80af1975ddf25b5b0dd634f..22fbed5f9ecb80ca7d55ea75a5aad3d2c25a3ee9 100644 --- a/tests/unit/test_app_smoke.py +++ b/tests/unit/test_app_smoke.py @@ -28,6 +28,11 @@ class TestAppSmoke: # OAuth dependencies may not be available in test environment # This is acceptable - OAuth is optional functionality + # Also skip if HF_TOKEN is not set (required for Gradio OAuth mocking) + import os + if not os.getenv("HF_TOKEN"): + pytest.skip("HF_TOKEN not set - required for Gradio OAuth mocking in tests") + try: demo = create_demo() assert demo is not None @@ -35,6 +40,10 @@ class TestAppSmoke: if "oauth" in str(e).lower() or "itsdangerous" in str(e).lower(): pytest.skip(f"OAuth dependencies not available: {e}") raise + except ValueError as e: + if "HF_TOKEN" in str(e) or "huggingface-cli login" in str(e): + pytest.skip(f"HF authentication not available: {e}") + raise def test_mcp_tools_importable(self) -> None: """MCP tool functions should be importable. diff --git a/uv.lock b/uv.lock index 8d086d578d5116973daac51e7abfc7f81162796d..797f9971f239a55a0d02a3ccf246dbb9f226f2da 100644 --- a/uv.lock +++ b/uv.lock @@ -1032,6 +1032,7 @@ dependencies = [ { name = "pytest-cov" }, { name = "python-dotenv" }, { name = "requests" }, + { name = "rpds-py" }, { name = "sentence-transformers" }, { name = "structlog" }, { name = "tenacity" }, @@ -1105,6 +1106,7 @@ requires-dist = [ { name = "python-dotenv", specifier = ">=1.0" }, { name = "requests", specifier = ">=2.32.5" }, { name = "respx", marker = "extra == 'dev'", specifier = ">=0.22.0" }, + { name = "rpds-py", specifier = ">=0.29.0" }, { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.14.6" }, { name = "sentence-transformers", specifier = ">=2.2.0" }, { name = "structlog", specifier = ">=24.1" },