Spaces:

DataQuests
/

DeepCritical

Running

App Files Files Community

VibecoderMcSwaggins commited on 14 days ago

Commit

e993253

2 Parent(s): b25ffdb ebc6429

Merge pull request #11 from The-Obstacle-Is-The-Way/feat/phase7-hypothesis-agent

Browse files

Files changed (7) hide show

src/agents/hypothesis_agent.py +144 -0
src/orchestrator_magentic.py +152 -121
src/prompts/hypothesis.py +68 -0
src/utils/models.py +41 -0
src/utils/text_utils.py +132 -0
tests/unit/agents/test_hypothesis_agent.py +105 -0
tests/unit/utils/test_text_utils.py +133 -0

src/agents/hypothesis_agent.py ADDED Viewed

	@@ -0,0 +1,144 @@

+"""Hypothesis agent for mechanistic reasoning."""
+from collections.abc import AsyncIterable
+from typing import TYPE_CHECKING, Any
+from agent_framework import (
+    AgentRunResponse,
+    AgentRunResponseUpdate,
+    AgentThread,
+    BaseAgent,
+    ChatMessage,
+    Role,
+)
+from pydantic_ai import Agent
+from src.agent_factory.judges import get_model
+from src.prompts.hypothesis import SYSTEM_PROMPT, format_hypothesis_prompt
+from src.utils.models import HypothesisAssessment
+if TYPE_CHECKING:
+    from src.services.embeddings import EmbeddingService
+class HypothesisAgent(BaseAgent):  # type: ignore[misc]
+    """Generates mechanistic hypotheses based on evidence."""
+    def __init__(
+        self,
+        evidence_store: dict[str, Any],
+        embedding_service: "EmbeddingService | None" = None,  # NEW: for diverse selection
+    ) -> None:
+        super().__init__(
+            name="HypothesisAgent",
+            description="Generates scientific hypotheses about drug mechanisms to guide research",
+        )
+        self._evidence_store = evidence_store
+        self._embeddings = embedding_service  # Used for MMR evidence selection
+        self._agent: Agent[None, HypothesisAssessment] | None = None  # Lazy init
+    def _get_agent(self) -> Agent[None, HypothesisAssessment]:
+        """Lazy initialization of LLM agent to avoid requiring API keys at import."""
+        if self._agent is None:
+            self._agent = Agent(
+                model=get_model(),  # Uses configured LLM (OpenAI/Anthropic)
+                output_type=HypothesisAssessment,
+                system_prompt=SYSTEM_PROMPT,
+            )
+        return self._agent
+    async def run(
+        self,
+        messages: str | ChatMessage | list[str] | list[ChatMessage] | None = None,
+        *,
+        thread: AgentThread | None = None,
+        **kwargs: Any,
+    ) -> AgentRunResponse:
+        """Generate hypotheses based on current evidence."""
+        # Extract query
+        query = self._extract_query(messages)
+        # Get current evidence
+        evidence = self._evidence_store.get("current", [])
+        if not evidence:
+            return AgentRunResponse(
+                messages=[
+                    ChatMessage(
+                        role=Role.ASSISTANT,
+                        text="No evidence available yet. Search for evidence first.",
+                    )
+                ],
+                response_id="hypothesis-no-evidence",
+            )
+        # Generate hypotheses with diverse evidence selection
+        prompt = await format_hypothesis_prompt(query, evidence, embeddings=self._embeddings)
+        result = await self._get_agent().run(prompt)
+        assessment = result.output  # pydantic-ai returns .output for structured output
+        # Store hypotheses in shared context
+        existing = self._evidence_store.get("hypotheses", [])
+        self._evidence_store["hypotheses"] = existing + assessment.hypotheses
+        # Format response
+        response_text = self._format_response(assessment)
+        return AgentRunResponse(
+            messages=[ChatMessage(role=Role.ASSISTANT, text=response_text)],
+            response_id=f"hypothesis-{len(assessment.hypotheses)}",
+            additional_properties={"assessment": assessment.model_dump()},
+        )
+    def _format_response(self, assessment: HypothesisAssessment) -> str:
+        """Format hypothesis assessment as markdown."""
+        lines = ["## Generated Hypotheses\n"]
+        for i, h in enumerate(assessment.hypotheses, 1):
+            lines.append(f"### Hypothesis {i} (Confidence: {h.confidence:.0%})")
+            lines.append(f"**Mechanism**: {h.drug} -> {h.target} -> {h.pathway} -> {h.effect}")
+            lines.append(f"**Suggested searches**: {', '.join(h.search_suggestions)}\n")
+        if assessment.primary_hypothesis:
+            lines.append("### Primary Hypothesis")
+            h = assessment.primary_hypothesis
+            lines.append(f"{h.drug} -> {h.target} -> {h.pathway} -> {h.effect}\n")
+        if assessment.knowledge_gaps:
+            lines.append("### Knowledge Gaps")
+            for gap in assessment.knowledge_gaps:
+                lines.append(f"- {gap}")
+        if assessment.recommended_searches:
+            lines.append("\n### Recommended Next Searches")
+            for search in assessment.recommended_searches:
+                lines.append(f"- `{search}`")
+        return "\n".join(lines)
+    def _extract_query(
+        self, messages: str | ChatMessage | list[str] | list[ChatMessage] | None
+    ) -> str:
+        """Extract query from messages."""
+        if isinstance(messages, str):
+            return messages
+        elif isinstance(messages, ChatMessage):
+            return messages.text or ""
+        elif isinstance(messages, list):
+            for msg in reversed(messages):
+                if isinstance(msg, ChatMessage) and msg.role == Role.USER:
+                    return msg.text or ""
+                elif isinstance(msg, str):
+                    return msg
+        return ""
+    async def run_stream(
+        self,
+        messages: str | ChatMessage | list[str] | list[ChatMessage] | None = None,
+        *,
+        thread: AgentThread | None = None,
+        **kwargs: Any,
+    ) -> AsyncIterable[AgentRunResponseUpdate]:
+        """Streaming wrapper."""
+        result = await self.run(messages, thread=thread, **kwargs)
+        yield AgentRunResponseUpdate(messages=result.messages, response_id=result.response_id)

src/orchestrator_magentic.py CHANGED Viewed

@@ -6,8 +6,13 @@ the agent_framework provides an AnthropicChatClient.
 """
 from collections.abc import AsyncGenerator
 import structlog
 from agent_framework import (
     MagenticAgentDeltaEvent,
     MagenticAgentMessageEvent,
@@ -18,6 +23,7 @@ from agent_framework import (
 )
 from agent_framework.openai import OpenAIChatClient
 from src.agents.judge_agent import JudgeAgent
 from src.agents.search_agent import SearchAgent
 from src.orchestrator import JudgeHandlerProtocol, SearchHandlerProtocol
@@ -28,6 +34,11 @@ from src.utils.models import AgentEvent, Evidence
 logger = structlog.get_logger()
 class MagenticOrchestrator:
     """
     Magentic-based orchestrator - same API as Orchestrator.
@@ -51,50 +62,38 @@ class MagenticOrchestrator:
         self._max_rounds = max_rounds
         self._evidence_store: dict[str, list[Evidence]] = {"current": []}
-    async def run(self, query: str) -> AsyncGenerator[AgentEvent, None]:
-        """
-        Run the Magentic workflow - same API as simple Orchestrator.
-        Yields AgentEvent objects for real-time UI updates.
-        """
-        logger.info("Starting Magentic orchestrator", query=query)
-        yield AgentEvent(
-            type="started",
-            message=f"Starting research (Magentic mode): {query}",
-            iteration=0,
-        )
-        # Initialize embedding service (optional)
-        embedding_service = None
         try:
             from src.services.embeddings import get_embedding_service
-            embedding_service = get_embedding_service()
             logger.info("Embedding service enabled")
         except ImportError:
             logger.info("Embedding service not available (dependencies missing)")
         except Exception as e:
             logger.warning("Failed to initialize embedding service", error=str(e))
-        # Create agent wrappers
-        search_agent = SearchAgent(
-            self._search_handler, self._evidence_store, embedding_service=embedding_service
-        )
-        judge_agent = JudgeAgent(self._judge_handler, self._evidence_store)
-        # Build Magentic workflow
-        # Note: MagenticBuilder requires OpenAI - validate key exists
         if not settings.openai_api_key:
             raise ConfigurationError(
                 "Magentic mode requires OPENAI_API_KEY. "
                 "Set the key or use mode='simple' with Anthropic."
             )
-        workflow = (
             MagenticBuilder()
             .participants(
                 searcher=search_agent,
                 judge=judge_agent,
             )
             .with_standard_manager(
@@ -108,114 +107,67 @@ class MagenticOrchestrator:
             .build()
         )
-        # Task instruction for the manager
         semantic_note = ""
-        if embedding_service:
             semantic_note = """
 The system has semantic search enabled. When evidence is found:
 1. Related concepts will be automatically surfaced
 2. Duplicates are removed by meaning, not just URL
 3. Use the surfaced related concepts to refine searches
 """
-        task = f"""Research drug repurposing opportunities for: {query}
 {semantic_note}
-Instructions:
-1. Use SearcherAgent to find evidence. SEND ONLY A SIMPLE KEYWORD QUERY (e.g. "metformin aging")
-   as the instruction. Complex queries fail.
-2. Use JudgeAgent to evaluate if evidence is sufficient.
-3. If JudgeAgent says "continue", search with refined queries.
-4. If JudgeAgent says "synthesize", provide final synthesis
-5. Stop when synthesis is ready or max rounds reached
-Focus on finding:
-- Mechanism of action evidence
-- Clinical/preclinical studies
-- Specific drug candidates
 """
         iteration = 0
         try:
-            # workflow.run_stream returns an async generator of workflow events
-            # We use 'await' in the for loop for async generator
             async for event in workflow.run_stream(task):
-                if isinstance(event, MagenticOrchestratorMessageEvent):
-                    # Manager events (planning, instruction, ledger)
-                    # The 'message' attribute might be None if it's just a state change,
-                    # check message presence
-                    message_text = (
-                        event.message.text
-                        if event.message and hasattr(event.message, "text")
-                        else ""
-                    )
-                    # kind might be 'plan', 'instruction', etc.
-                    kind = getattr(event, "kind", "manager")
-                    if message_text:
-                        yield AgentEvent(
-                            type="judging",
-                            message=f"Manager ({kind}): {message_text[:100]}...",
-                            iteration=iteration,
-                        )
-                elif isinstance(event, MagenticAgentMessageEvent):
-                    # Complete agent response
-                    iteration += 1
-                    agent_name = event.agent_id or "unknown"
-                    msg_text = (
-                        event.message.text
-                        if event.message and hasattr(event.message, "text")
-                        else ""
-                    )
-                    if "search" in agent_name.lower():
-                        # Check if we found evidence (based on SearchAgent logic)
-                        yield AgentEvent(
-                            type="search_complete",
-                            message=f"Search agent: {msg_text[:100]}...",
-                            iteration=iteration,
-                        )
-                    elif "judge" in agent_name.lower():
-                        yield AgentEvent(
-                            type="judge_complete",
-                            message=f"Judge agent: {msg_text[:100]}...",
-                            iteration=iteration,
-                        )
-                elif isinstance(event, MagenticFinalResultEvent):
-                    # Final workflow result
-                    final_text = (
-                        event.message.text
-                        if event.message and hasattr(event.message, "text")
-                        else "No result"
-                    )
-                    yield AgentEvent(
-                        type="complete",
-                        message=final_text,
-                        data={"iterations": iteration},
-                        iteration=iteration,
-                    )
-                elif isinstance(event, MagenticAgentDeltaEvent):
-                    # Streaming token chunks from agents (optional "typing" effect)
-                    # Only emit if we have actual text content
-                    if event.text:
-                        yield AgentEvent(
-                            type="streaming",
-                            message=event.text,
-                            data={"agent_id": event.agent_id},
-                            iteration=iteration,
-                        )
-                elif isinstance(event, WorkflowOutputEvent):
-                    # Alternative final output event
-                    if event.data:
-                        yield AgentEvent(
-                            type="complete",
-                            message=str(event.data),
-                            iteration=iteration,
-                        )
         except Exception as e:
             logger.error("Magentic workflow failed", error=str(e))
             yield AgentEvent(
@@ -223,3 +175,82 @@ Focus on finding:
                 message=f"Workflow error: {e!s}",
                 iteration=iteration,
             )

 """
 from collections.abc import AsyncGenerator
+from typing import TYPE_CHECKING, Any
 import structlog
+if TYPE_CHECKING:
+    from src.services.embeddings import EmbeddingService
 from agent_framework import (
     MagenticAgentDeltaEvent,
     MagenticAgentMessageEvent,
 )
 from agent_framework.openai import OpenAIChatClient
+from src.agents.hypothesis_agent import HypothesisAgent
 from src.agents.judge_agent import JudgeAgent
 from src.agents.search_agent import SearchAgent
 from src.orchestrator import JudgeHandlerProtocol, SearchHandlerProtocol
 logger = structlog.get_logger()
+def _truncate(text: str, max_len: int = 100) -> str:
+    """Truncate text with ellipsis only if needed."""
+    return f"{text[:max_len]}..." if len(text) > max_len else text
 class MagenticOrchestrator:
     """
     Magentic-based orchestrator - same API as Orchestrator.
         self._max_rounds = max_rounds
         self._evidence_store: dict[str, list[Evidence]] = {"current": []}
+    def _init_embedding_service(self) -> "EmbeddingService | None":
+        """Initialize embedding service if available."""
         try:
             from src.services.embeddings import get_embedding_service
+            service = get_embedding_service()
             logger.info("Embedding service enabled")
+            return service
         except ImportError:
             logger.info("Embedding service not available (dependencies missing)")
         except Exception as e:
             logger.warning("Failed to initialize embedding service", error=str(e))
+        return None
+    def _build_workflow(
+        self,
+        search_agent: SearchAgent,
+        hypothesis_agent: HypothesisAgent,
+        judge_agent: JudgeAgent,
+    ) -> Any:
+        """Build the Magentic workflow with participants."""
         if not settings.openai_api_key:
             raise ConfigurationError(
                 "Magentic mode requires OPENAI_API_KEY. "
                 "Set the key or use mode='simple' with Anthropic."
             )
+        return (
             MagenticBuilder()
             .participants(
                 searcher=search_agent,
+                hypothesizer=hypothesis_agent,
                 judge=judge_agent,
             )
             .with_standard_manager(
             .build()
         )
+    def _format_task(self, query: str, has_embeddings: bool) -> str:
+        """Format the task instruction for the manager."""
         semantic_note = ""
+        if has_embeddings:
             semantic_note = """
 The system has semantic search enabled. When evidence is found:
 1. Related concepts will be automatically surfaced
 2. Duplicates are removed by meaning, not just URL
 3. Use the surfaced related concepts to refine searches
 """
+        return f"""Research drug repurposing opportunities for: {query}
 {semantic_note}
+Workflow:
+1. SearcherAgent: Find initial evidence from PubMed and web. SEND ONLY A SIMPLE KEYWORD QUERY.
+2. HypothesisAgent: Generate mechanistic hypotheses (Drug -> Target -> Pathway -> Effect).
+3. SearcherAgent: Use hypothesis-suggested queries for targeted search.
+4. JudgeAgent: Evaluate if evidence supports hypotheses.
+5. Repeat until confident or max rounds.
+Focus on:
+- Identifying specific molecular targets
+- Understanding mechanism of action
+- Finding supporting/contradicting evidence for hypotheses
 """
+    async def run(self, query: str) -> AsyncGenerator[AgentEvent, None]:
+        """
+        Run the Magentic workflow - same API as simple Orchestrator.
+        Yields AgentEvent objects for real-time UI updates.
+        """
+        logger.info("Starting Magentic orchestrator", query=query)
+        yield AgentEvent(
+            type="started",
+            message=f"Starting research (Magentic mode): {query}",
+            iteration=0,
+        )
+        # Initialize services and agents
+        embedding_service = self._init_embedding_service()
+        search_agent = SearchAgent(
+            self._search_handler, self._evidence_store, embedding_service=embedding_service
+        )
+        judge_agent = JudgeAgent(self._judge_handler, self._evidence_store)
+        hypothesis_agent = HypothesisAgent(
+            self._evidence_store, embedding_service=embedding_service
+        )
+        # Build workflow and task
+        workflow = self._build_workflow(search_agent, hypothesis_agent, judge_agent)
+        task = self._format_task(query, embedding_service is not None)
         iteration = 0
         try:
             async for event in workflow.run_stream(task):
+                agent_event = self._process_event(event, iteration)
+                if agent_event:
+                    if isinstance(event, MagenticAgentMessageEvent):
+                        iteration += 1
+                    yield agent_event
         except Exception as e:
             logger.error("Magentic workflow failed", error=str(e))
             yield AgentEvent(
                 message=f"Workflow error: {e!s}",
                 iteration=iteration,
             )
+    def _process_event(self, event: Any, iteration: int) -> AgentEvent | None:
+        """Process a workflow event and return an AgentEvent if applicable."""
+        if isinstance(event, MagenticOrchestratorMessageEvent):
+            message_text = (
+                event.message.text if event.message and hasattr(event.message, "text") else ""
+            )
+            kind = getattr(event, "kind", "manager")
+            if message_text:
+                return AgentEvent(
+                    type="judging",
+                    message=f"Manager ({kind}): {_truncate(message_text)}",
+                    iteration=iteration,
+                )
+        elif isinstance(event, MagenticAgentMessageEvent):
+            agent_name = event.agent_id or "unknown"
+            msg_text = (
+                event.message.text if event.message and hasattr(event.message, "text") else ""
+            )
+            return self._agent_message_event(agent_name, msg_text, iteration + 1)
+        elif isinstance(event, MagenticFinalResultEvent):
+            final_text = (
+                event.message.text
+                if event.message and hasattr(event.message, "text")
+                else "No result"
+            )
+            return AgentEvent(
+                type="complete",
+                message=final_text,
+                data={"iterations": iteration},
+                iteration=iteration,
+            )
+        elif isinstance(event, MagenticAgentDeltaEvent):
+            if event.text:
+                return AgentEvent(
+                    type="streaming",
+                    message=event.text,
+                    data={"agent_id": event.agent_id},
+                    iteration=iteration,
+                )
+        elif isinstance(event, WorkflowOutputEvent):
+            if event.data:
+                return AgentEvent(
+                    type="complete",
+                    message=str(event.data),
+                    iteration=iteration,
+                )
+        return None
+    def _agent_message_event(self, agent_name: str, msg_text: str, iteration: int) -> AgentEvent:
+        """Create an AgentEvent for an agent message."""
+        if "search" in agent_name.lower():
+            return AgentEvent(
+                type="search_complete",
+                message=f"Search agent: {_truncate(msg_text)}",
+                iteration=iteration,
+            )
+        elif "hypothes" in agent_name.lower():
+            return AgentEvent(
+                type="hypothesizing",
+                message=f"Hypothesis agent: {_truncate(msg_text)}",
+                iteration=iteration,
+            )
+        elif "judge" in agent_name.lower():
+            return AgentEvent(
+                type="judge_complete",
+                message=f"Judge agent: {_truncate(msg_text)}",
+                iteration=iteration,
+            )
+        return AgentEvent(
+            type="judging",
+            message=f"{agent_name}: {_truncate(msg_text)}",
+            iteration=iteration,
+        )

src/prompts/hypothesis.py ADDED Viewed

	@@ -0,0 +1,68 @@

+"""Prompts for Hypothesis Agent."""
+from typing import TYPE_CHECKING
+from src.utils.text_utils import select_diverse_evidence, truncate_at_sentence
+if TYPE_CHECKING:
+    from src.services.embeddings import EmbeddingService
+    from src.utils.models import Evidence
+SYSTEM_PROMPT = """You are a biomedical research scientist specializing in drug repurposing.
+Your role is to generate mechanistic hypotheses based on evidence.
+A good hypothesis:
+1. Proposes a MECHANISM: Drug -> Target -> Pathway -> Effect
+2. Is TESTABLE: Can be supported or refuted by literature search
+3. Is SPECIFIC: Names actual molecular targets and pathways
+4. Generates SEARCH QUERIES: Helps find more evidence
+Example hypothesis format:
+- Drug: Metformin
+- Target: AMPK (AMP-activated protein kinase)
+- Pathway: mTOR inhibition -> autophagy activation
+- Effect: Enhanced clearance of amyloid-beta in Alzheimer's
+- Confidence: 0.7
+- Search suggestions: ["metformin AMPK brain", "autophagy amyloid clearance"]
+Be specific. Use actual gene/protein names when possible."""
+async def format_hypothesis_prompt(
+    query: str, evidence: list["Evidence"], embeddings: "EmbeddingService | None" = None
+) -> str:
+    """Format prompt for hypothesis generation.
+    Uses smart evidence selection instead of arbitrary truncation.
+    Args:
+        query: The research query
+        evidence: All collected evidence
+        embeddings: Optional EmbeddingService for diverse selection
+    """
+    # Select diverse, relevant evidence (not arbitrary first 10)
+    # We use n=10 as a reasonable context window limit
+    selected = await select_diverse_evidence(evidence, n=10, query=query, embeddings=embeddings)
+    # Format with sentence-aware truncation
+    evidence_text = "\n".join(
+        [
+            f"- **{e.citation.title}** ({e.citation.source}): "
+            f"{truncate_at_sentence(e.content, 300)}"
+            for e in selected
+        ]
+    )
+    return f"""Based on the following evidence about "{query}", generate mechanistic hypotheses.
+## Evidence ({len(selected)} papers selected for diversity)
+{evidence_text}
+## Task
+1. Identify potential drug targets mentioned in the evidence
+2. Propose mechanism hypotheses (Drug -> Target -> Pathway -> Effect)
+3. Rate confidence based on evidence strength
+4. Suggest searches to test each hypothesis
+Generate 2-4 hypotheses, prioritized by confidence."""

src/utils/models.py CHANGED Viewed

@@ -107,6 +107,7 @@ class AgentEvent(BaseModel):
         "complete",
         "error",
         "streaming",
     ]
     message: str
     data: Any = None
@@ -126,11 +127,51 @@ class AgentEvent(BaseModel):
             "complete": "🎉",
             "error": "❌",
             "streaming": "📡",
         }
         icon = icons.get(self.type, "•")
         return f"{icon} **{self.type.upper()}**: {self.message}"
 class OrchestratorConfig(BaseModel):
     """Configuration for the orchestrator."""

         "complete",
         "error",
         "streaming",
+        "hypothesizing",  # NEW for Phase 7
     ]
     message: str
     data: Any = None
             "complete": "🎉",
             "error": "❌",
             "streaming": "📡",
+            "hypothesizing": "🔬",  # NEW
         }
         icon = icons.get(self.type, "•")
         return f"{icon} **{self.type.upper()}**: {self.message}"
+class MechanismHypothesis(BaseModel):
+    """A scientific hypothesis about drug mechanism."""
+    drug: str = Field(description="The drug being studied")
+    target: str = Field(description="Molecular target (e.g., AMPK, mTOR)")
+    pathway: str = Field(description="Biological pathway affected")
+    effect: str = Field(description="Downstream effect on disease")
+    confidence: float = Field(ge=0, le=1, description="Confidence in hypothesis")
+    supporting_evidence: list[str] = Field(
+        default_factory=list, description="PMIDs or URLs supporting this hypothesis"
+    )
+    contradicting_evidence: list[str] = Field(
+        default_factory=list, description="PMIDs or URLs contradicting this hypothesis"
+    )
+    search_suggestions: list[str] = Field(
+        default_factory=list, description="Suggested searches to test this hypothesis"
+    )
+    def to_search_queries(self) -> list[str]:
+        """Generate search queries to test this hypothesis."""
+        return [
+            f"{self.drug} {self.target}",
+            f"{self.target} {self.pathway}",
+            f"{self.pathway} {self.effect}",
+            *self.search_suggestions,
+        ]
+class HypothesisAssessment(BaseModel):
+    """Assessment of evidence against hypotheses."""
+    hypotheses: list[MechanismHypothesis]
+    primary_hypothesis: MechanismHypothesis | None = Field(
+        default=None, description="Most promising hypothesis based on current evidence"
+    )
+    knowledge_gaps: list[str] = Field(description="What we don't know yet")
+    recommended_searches: list[str] = Field(description="Searches to fill knowledge gaps")
 class OrchestratorConfig(BaseModel):
     """Configuration for the orchestrator."""

src/utils/text_utils.py ADDED Viewed

	@@ -0,0 +1,132 @@

+"""Text processing utilities for evidence handling."""
+from typing import TYPE_CHECKING
+import numpy as np
+if TYPE_CHECKING:
+    from src.services.embeddings import EmbeddingService
+    from src.utils.models import Evidence
+def truncate_at_sentence(text: str, max_chars: int = 300) -> str:
+    """Truncate text at sentence boundary, preserving meaning.
+    Args:
+        text: The text to truncate
+        max_chars: Maximum characters (default 300)
+    Returns:
+        Text truncated at last complete sentence within limit
+    """
+    if len(text) <= max_chars:
+        return text
+    # Find truncation point
+    truncated = text[:max_chars]
+    # Look for sentence endings: . ! ? followed by space or end
+    # We check for sep at the END of the truncated string
+    for sep in [". ", "! ", "? ", ".\n", "!\n", "?\n"]:
+        last_sep = truncated.rfind(sep)
+        if last_sep > max_chars // 2:  # Don't truncate too aggressively (less than half)
+            return text[: last_sep + 1].strip()
+    # Fallback: find last period (even if not followed by space, e.g. end of string)
+    last_period = truncated.rfind(".")
+    if last_period > max_chars // 2:
+        return text[: last_period + 1].strip()
+    # Last resort: truncate at word boundary
+    last_space = truncated.rfind(" ")
+    if last_space > 0:
+        return text[:last_space].strip() + "..."
+    return truncated + "..."
+async def select_diverse_evidence(
+    evidence: list["Evidence"], n: int, query: str, embeddings: "EmbeddingService | None" = None
+) -> list["Evidence"]:
+    """Select n most diverse and relevant evidence items.
+    Uses Maximal Marginal Relevance (MMR) when embeddings available,
+    falls back to relevance_score sorting otherwise.
+    Args:
+        evidence: All available evidence
+        n: Number of items to select
+        query: Original query for relevance scoring
+        embeddings: Optional EmbeddingService for semantic diversity
+    Returns:
+        Selected evidence items, diverse and relevant
+    """
+    if not evidence:
+        return []
+    if n >= len(evidence):
+        return evidence
+    # Fallback: sort by relevance score if no embeddings
+    if embeddings is None:
+        return sorted(
+            evidence,
+            key=lambda e: e.relevance,  # Use .relevance (from Pydantic model)
+            reverse=True,
+        )[:n]
+    # MMR: Maximal Marginal Relevance for diverse selection
+    # Score = λ * relevance - (1-λ) * max_similarity_to_selected
+    lambda_param = 0.7  # Balance relevance vs diversity
+    # Get query embedding
+    query_emb = await embeddings.embed(query)
+    # Get all evidence embeddings
+    evidence_embs = await embeddings.embed_batch([e.content for e in evidence])
+    # Cosine similarity helper
+    def cosine(a: list[float], b: list[float]) -> float:
+        arr_a, arr_b = np.array(a), np.array(b)
+        denominator = float(np.linalg.norm(arr_a) * np.linalg.norm(arr_b))
+        if denominator == 0:
+            return 0.0
+        return float(np.dot(arr_a, arr_b) / denominator)
+    # Compute relevance scores (cosine similarity to query)
+    # Note: We use semantic relevance to query, not the keyword search 'relevance' score
+    relevance_scores = [cosine(query_emb, emb) for emb in evidence_embs]
+    # Greedy MMR selection
+    selected_indices: list[int] = []
+    remaining = set(range(len(evidence)))
+    for _ in range(n):
+        best_score = float("-inf")
+        best_idx = -1
+        for idx in remaining:
+            # Relevance component
+            relevance = relevance_scores[idx]
+            # Diversity component: max similarity to already selected
+            if selected_indices:
+                max_sim = max(
+                    cosine(evidence_embs[idx], evidence_embs[sel]) for sel in selected_indices
+                )
+            else:
+                max_sim = 0
+            # MMR score
+            mmr_score = lambda_param * relevance - (1 - lambda_param) * max_sim
+            if mmr_score > best_score:
+                best_score = mmr_score
+                best_idx = idx
+        if best_idx >= 0:
+            selected_indices.append(best_idx)
+            remaining.remove(best_idx)
+    return [evidence[i] for i in selected_indices]

tests/unit/agents/test_hypothesis_agent.py ADDED Viewed

	@@ -0,0 +1,105 @@

+"""Unit tests for HypothesisAgent."""
+from unittest.mock import AsyncMock, MagicMock, patch
+import pytest
+from agent_framework import AgentRunResponse
+from src.agents.hypothesis_agent import HypothesisAgent
+from src.utils.models import Citation, Evidence, HypothesisAssessment, MechanismHypothesis
+@pytest.fixture
+def sample_evidence():
+    return [
+        Evidence(
+            content="Metformin activates AMPK, which inhibits mTOR signaling...",
+            citation=Citation(
+                source="pubmed",
+                title="Metformin and AMPK",
+                url="https://pubmed.ncbi.nlm.nih.gov/12345/",
+                date="2023",
+            ),
+        )
+    ]
+@pytest.fixture
+def mock_assessment():
+    return HypothesisAssessment(
+        hypotheses=[
+            MechanismHypothesis(
+                drug="Metformin",
+                target="AMPK",
+                pathway="mTOR inhibition",
+                effect="Reduced cancer cell proliferation",
+                confidence=0.75,
+                search_suggestions=["metformin AMPK cancer", "mTOR cancer therapy"],
+            )
+        ],
+        primary_hypothesis=None,
+        knowledge_gaps=["Clinical trial data needed"],
+        recommended_searches=["metformin clinical trial cancer"],
+    )
+@pytest.mark.asyncio
+async def test_hypothesis_agent_generates_hypotheses(sample_evidence, mock_assessment):
+    """HypothesisAgent should generate mechanistic hypotheses."""
+    store = {"current": sample_evidence, "hypotheses": []}
+    with patch("src.agents.hypothesis_agent.get_model") as mock_get_model:
+        with patch("src.agents.hypothesis_agent.Agent") as mock_agent_class:
+            mock_get_model.return_value = MagicMock()  # Mock model
+            mock_result = MagicMock()
+            mock_result.output = mock_assessment
+            # pydantic-ai Agent returns an object with .output for structured output
+            mock_agent_class.return_value.run = AsyncMock(return_value=mock_result)
+            agent = HypothesisAgent(store)
+            response = await agent.run("metformin cancer")
+            assert isinstance(response, AgentRunResponse)
+            assert "AMPK" in response.messages[0].text
+            assert len(store["hypotheses"]) == 1
+            assert store["hypotheses"][0].drug == "Metformin"
+@pytest.mark.asyncio
+async def test_hypothesis_agent_no_evidence():
+    """HypothesisAgent should handle empty evidence gracefully."""
+    store = {"current": [], "hypotheses": []}
+    # No need to mock Agent/get_model - empty evidence returns early
+    agent = HypothesisAgent(store)
+    response = await agent.run("test query")
+    assert "No evidence" in response.messages[0].text
+    assert len(store["hypotheses"]) == 0
+@pytest.mark.asyncio
+async def test_hypothesis_agent_uses_embeddings(sample_evidence, mock_assessment):
+    """HypothesisAgent should pass embeddings to prompt formatter."""
+    store = {"current": sample_evidence, "hypotheses": []}
+    mock_embeddings = MagicMock()
+    with patch("src.agents.hypothesis_agent.get_model") as mock_get_model:
+        with patch("src.agents.hypothesis_agent.Agent") as mock_agent_class:
+            # Mock format_hypothesis_prompt to check if embeddings were passed
+            with patch("src.agents.hypothesis_agent.format_hypothesis_prompt") as mock_format:
+                mock_get_model.return_value = MagicMock()  # Mock model
+                mock_format.return_value = "Prompt"
+                mock_result = MagicMock()
+                mock_result.output = mock_assessment
+                mock_agent_class.return_value.run = AsyncMock(return_value=mock_result)
+                agent = HypothesisAgent(store, embedding_service=mock_embeddings)
+                await agent.run("query")
+                mock_format.assert_called_once()
+                _args, kwargs = mock_format.call_args
+                assert kwargs["embeddings"] == mock_embeddings
+                assert _args[0] == "query"  # First positional arg is query
+                assert _args[1] == sample_evidence  # Second positional arg is evidence

tests/unit/utils/test_text_utils.py ADDED Viewed

	@@ -0,0 +1,133 @@

+"""Unit tests for text utilities."""
+from unittest.mock import AsyncMock, MagicMock
+import pytest
+from src.utils.models import Citation, Evidence
+from src.utils.text_utils import select_diverse_evidence, truncate_at_sentence
+class TestTextUtils:
+    def test_truncate_at_sentence_short(self):
+        """Should return text as is if shorter than limit."""
+        text = "This is a short sentence."
+        assert truncate_at_sentence(text, 100) == text
+    def test_truncate_at_sentence_boundary(self):
+        """Should truncate at sentence ending."""
+        text = "First sentence. Second sentence. Third sentence."
+        # Limit should cut in the middle of second sentence
+        limit = len("First sentence. Second sentence") + 5
+        result = truncate_at_sentence(text, limit)
+        assert result == "First sentence. Second sentence."
+    def test_truncate_at_sentence_fallback_period(self):
+        """Should fall back to period if no sentence boundary found."""
+        text = "Dr. Smith went to the store. He bought apples."
+        # Limit cuts in "He bought"
+        limit = len("Dr. Smith went to the store.") + 5
+        result = truncate_at_sentence(text, limit)
+        assert result == "Dr. Smith went to the store."
+    def test_truncate_at_sentence_fallback_word(self):
+        """Should fall back to word boundary if no punctuation."""
+        text = "This is a very long sentence without any punctuation marks until the very end"
+        limit = 20
+        result = truncate_at_sentence(text, limit)
+        assert result == "This is a very long..."
+        # Ellipsis might add length, checking logic
+        assert len(result) <= limit + 3  # noqa: PLR2004
+    @pytest.mark.asyncio
+    async def test_select_diverse_evidence_no_embeddings(self):
+        """Should fallback to relevance sort if no embeddings."""
+        evidence = [
+            Evidence(
+                content="A",
+                relevance=0.9,
+                citation=Citation(source="web", title="A", url="a", date="2023"),
+            ),
+            Evidence(
+                content="B",
+                relevance=0.1,
+                citation=Citation(source="web", title="B", url="b", date="2023"),
+            ),
+            Evidence(
+                content="C",
+                relevance=0.8,
+                citation=Citation(source="web", title="C", url="c", date="2023"),
+            ),
+        ]
+        selected = await select_diverse_evidence(evidence, n=2, query="test", embeddings=None)
+        expected_count = 2
+        assert len(selected) == expected_count
+        assert selected[0].content == "A"  # Highest relevance
+        assert selected[1].content == "C"  # Second highest
+    @pytest.mark.asyncio
+    async def test_select_diverse_evidence_mmr(self):
+        """Should select diverse evidence using MMR."""
+        # Mock embeddings
+        mock_embeddings = MagicMock()
+        # Scenario: Query is equidistant to A and C.
+        # A and B are identical (clones).
+        # C is orthogonal to A/B.
+        # We expect A (first) then C (diverse), skipping B (clone).
+        # Query: [0.707, 0.707]
+        # A: [1.0, 0.0] -> Sim to Q: 0.707
+        # B: [1.0, 0.0] -> Sim to Q: 0.707, Sim to A: 1.0
+        # C: [0.0, 1.0] -> Sim to Q: 0.707, Sim to A: 0.0
+        async def mock_embed(text):
+            if text == "query":
+                return [0.707, 0.707]
+            return [0.0, 0.0]
+        async def mock_embed_batch(texts):
+            results = []
+            for t in texts:
+                if t == "A":
+                    results.append([1.0, 0.0])
+                elif t == "B":
+                    results.append([1.0, 0.0])  # Clone of A
+                elif t == "C":
+                    results.append([0.0, 1.0])  # Orthogonal
+                else:
+                    results.append([0.0, 0.0])
+            return results
+        mock_embeddings.embed = AsyncMock(side_effect=mock_embed)
+        mock_embeddings.embed_batch = AsyncMock(side_effect=mock_embed_batch)
+        evidence = [
+            Evidence(
+                content="A",
+                relevance=0.9,
+                citation=Citation(source="web", title="A", url="a", date="2023"),
+            ),
+            Evidence(
+                content="B",
+                relevance=0.9,
+                citation=Citation(source="web", title="B", url="b", date="2023"),
+            ),
+            Evidence(
+                content="C",
+                relevance=0.9,
+                citation=Citation(source="web", title="C", url="c", date="2023"),
+            ),
+        ]
+        # With n=2, we expect A then C.
+        selected = await select_diverse_evidence(
+            evidence, n=2, query="query", embeddings=mock_embeddings
+        )
+        expected_count = 2
+        assert len(selected) == expected_count
+        assert selected[0].content == "A"
+        assert selected[1].content == "C"