Spaces:

nothingworry
/

IntegraChat

Sleeping

App Files Files Community

nothingworry commited on about 1 month ago

Commit

6d531e9

1 Parent(s): 31f3625

Multi-Tool Parallel Execution

Browse files

Files changed (4) hide show

backend/api/services/agent_orchestrator.py +322 -90
backend/api/services/result_merger.py +136 -0
backend/api/services/tool_selector.py +98 -12
data/analytics.db +0 -0

backend/api/services/agent_orchestrator.py CHANGED Viewed

@@ -23,6 +23,7 @@ from .llm_client import LLMClient
 from ..mcp_clients.mcp_client import MCPClient
 from .tool_scoring import ToolScoringService
 from ..storage.analytics_store import AnalyticsStore
 import time
@@ -661,126 +662,322 @@ Response:"""
                                    reasoning_trace: List[Dict[str, Any]],
                                    pre_fetched_rag: Optional[Dict[str, Any]] = None) -> AgentResponse:
         """
-        Execute multiple tools in sequence and synthesize results with LLM.
         """
         rag_data = None
         web_data = None
         admin_data = None
         collected_data = []
-        parallel_tasks = {}
-        rag_parallel_query = self._first_query_for_tool(steps, "rag", req.message)
-        web_parallel_query = self._first_query_for_tool(steps, "web", req.message)
-        if rag_parallel_query and web_parallel_query and rag_parallel_query == web_parallel_query:
-            if not pre_fetched_rag:
-                parallel_tasks["rag"] = asyncio.create_task(self.mcp.call_rag(req.tenant_id, rag_parallel_query))
-            parallel_tasks["web"] = asyncio.create_task(self.mcp.call_web(req.tenant_id, web_parallel_query))
-        # Execute each step in sequence
         for step_info in steps:
-            tool_name = step_info.get("tool")
-            step_input = step_info.get("input") or {}
-            query = step_input.get("query") or req.message
-            try:
-                if tool_name == "rag":
-                    # Reuse pre-fetched RAG if available, otherwise fetch
-                    if pre_fetched_rag and query == rag_parallel_query:
-                        rag_resp = pre_fetched_rag
-                        tool_traces.append({"tool": "rag", "response": rag_resp, "note": "used_pre_fetched"})
-                    elif parallel_tasks.get("rag") and query == rag_parallel_query:
-                        rag_resp = await parallel_tasks["rag"]
-                        tool_traces.append({"tool": "rag", "response": rag_resp, "note": "parallel"})
-                    else:
-                        rag_resp = await self.mcp.call_rag(req.tenant_id, query)
-                        tool_traces.append({"tool": "rag", "response": rag_resp})
-                    rag_data = rag_resp
-                    reasoning_trace.append({
-                        "step": "tool_execution",
-                        "tool": "rag",
-                        "hit_count": len(self._extract_hits(rag_resp)),
-                        "summary": self._summarize_hits(rag_resp, limit=2)
-                    })
-                    # Extract snippets for prompt
-                    if isinstance(rag_resp, dict):
-                        hits = rag_resp.get("results") or rag_resp.get("hits") or []
-                        for h in hits[:5]:
-                            txt = h.get("text") or h.get("content") or str(h)
-                            collected_data.append(f"[RAG] {txt}")
-                elif tool_name == "web":
-                    if parallel_tasks.get("web") and query == web_parallel_query:
-                        web_resp = await parallel_tasks["web"]
-                        tool_traces.append({"tool": "web", "response": web_resp, "note": "parallel"})
                     else:
-                        web_resp = await self.mcp.call_web(req.tenant_id, query)
-                        tool_traces.append({"tool": "web", "response": web_resp})
-                    web_data = web_resp
-                    reasoning_trace.append({
-                        "step": "tool_execution",
-                        "tool": "web",
-                        "hit_count": len(self._extract_hits(web_resp)),
-                        "summary": self._summarize_hits(web_resp, limit=2)
-                    })
-                    # Extract snippets for prompt
-                    if isinstance(web_resp, dict):
-                        hits = web_resp.get("results") or web_resp.get("items") or []
-                        for h in hits[:5]:
-                            title = h.get("title") or h.get("headline") or ""
-                            snippet = h.get("snippet") or h.get("summary") or h.get("text") or ""
-                            url = h.get("url") or h.get("link") or ""
-                            collected_data.append(f"[WEB] {title}\n{snippet}\nSource: {url}")
-                elif tool_name == "admin":
-                    admin_resp = await self.mcp.call_admin(req.tenant_id, query)
-                    tool_traces.append({"tool": "admin", "response": admin_resp})
-                    admin_data = admin_resp
-                    collected_data.append(f"[ADMIN] {json.dumps(admin_resp)}")
-                    reasoning_trace.append({
-                        "step": "tool_execution",
-                        "tool": "admin",
-                        "status": "completed"
-                    })
-                elif tool_name == "llm":
-                    # LLM is always last - synthesize all collected data
-                    break
-            except Exception as e:
-                tool_traces.append({"tool": tool_name, "error": str(e)})
-                # Continue with other tools even if one fails
                 reasoning_trace.append({
-                    "step": "error",
-                    "tool": tool_name,
-                    "error": str(e)
                 })
-        # Build comprehensive prompt with all collected data
-        data_section = "\n---\n".join(collected_data) if collected_data else ""
         if data_section:
             prompt = (
                 f"You are an assistant helping tenant {req.tenant_id}.\n\n"
-                f"The following information has been gathered from multiple sources:\n\n"
                 f"{data_section}\n\n"
-                f"User question: {req.message}\n\n"
-                f"Provide a comprehensive, accurate answer using the information above. "
-                f"Cite sources where appropriate (RAG for internal docs, WEB for online sources)."
             )
         else:
             # No data collected, just answer the question
             prompt = req.message
         # Final LLM synthesis
         try:
             llm_out = await self.llm.simple_call(prompt, temperature=req.temperature)
             return AgentResponse(
                 text=llm_out,
                 decision=decision,
                 tool_traces=tool_traces,
                 reasoning_trace=reasoning_trace + [{
                     "step": "llm_response",
-                    "mode": "multi_step"
                 }]
             )
         except Exception as e:
@@ -826,10 +1023,21 @@ Response:"""
                 snippets.append(f"{title}\n{snippet}\nSource: {url}")
         snippet_text = "\n---\n".join(snippets) or ""
         prompt = (
-            f"You are an assistant with access to recent web search results. Use the following results to answer.\n{snippet_text}\n\n"
-            f"User question: {req.message}\nAnswer succinctly and indicate which results you used."
         )
         return prompt
     @staticmethod
@@ -849,6 +1057,30 @@ Response:"""
             summaries.append(snippet[:160])
         return summaries
     @staticmethod
     def _first_query_for_tool(steps: List[Dict[str, Any]], tool_name: str, default_query: str) -> Optional[str]:
         for step in steps:

 from ..mcp_clients.mcp_client import MCPClient
 from .tool_scoring import ToolScoringService
 from ..storage.analytics_store import AnalyticsStore
+from .result_merger import merge_parallel_results, format_merged_context_for_prompt
 import time
                                    reasoning_trace: List[Dict[str, Any]],
                                    pre_fetched_rag: Optional[Dict[str, Any]] = None) -> AgentResponse:
         """
+        Execute multiple tools in sequence or parallel and synthesize results with LLM.
+        Supports parallel execution when steps are marked with "parallel" flag.
         """
+        start_time = time.time()
         rag_data = None
         web_data = None
         admin_data = None
         collected_data = []
+        tools_used = []
+        total_tokens = 0
+        # Check if any step has parallel execution flag
+        parallel_step = None
         for step_info in steps:
+            if step_info.get("parallel"):
+                parallel_step = step_info
+                break
+        # Handle parallel execution if detected
+        if parallel_step and parallel_step.get("parallel"):
+            parallel_config = parallel_step.get("parallel")
+            parallel_tasks = {}
+            start_time_parallel = time.time()
+            # Prepare parallel tasks
+            if "rag" in parallel_config:
+                rag_query = parallel_config["rag"]
+                if pre_fetched_rag:
+                    # Use pre-fetched RAG if available - create a simple async function
+                    async def get_prefetched_rag():
+                        return pre_fetched_rag
+                    parallel_tasks["rag"] = get_prefetched_rag()
+                else:
+                    parallel_tasks["rag"] = self.mcp.call_rag(req.tenant_id, rag_query)
+            if "web" in parallel_config:
+                web_query = parallel_config["web"]
+                parallel_tasks["web"] = self.mcp.call_web(req.tenant_id, web_query)
+            # Execute tools in parallel
+            if parallel_tasks:
+                reasoning_trace.append({
+                    "step": "parallel_execution",
+                    "tools": list(parallel_tasks.keys()),
+                    "mode": "parallel"
+                })
+                parallel_results = await self.run_parallel_tools(parallel_tasks)
+                parallel_latency_ms = int((time.time() - start_time_parallel) * 1000)
+                # Process RAG results
+                if "rag" in parallel_results:
+                    rag_result = parallel_results["rag"]
+                    if isinstance(rag_result, Exception):
+                        tool_traces.append({"tool": "rag", "error": str(rag_result), "note": "parallel"})
+                        reasoning_trace.append({
+                            "step": "tool_execution",
+                            "tool": "rag",
+                            "status": "error",
+                            "error": str(rag_result),
+                            "latency_ms": parallel_latency_ms
+                        })
+                        self.analytics.log_tool_usage(
+                            tenant_id=req.tenant_id,
+                            tool_name="rag",
+                            latency_ms=parallel_latency_ms,
+                            success=False,
+                            error_message=str(rag_result)[:200],
+                            user_id=req.user_id
+                        )
                     else:
+                        rag_data = rag_result
+                        tools_used.append("rag")
+                        tool_traces.append({"tool": "rag", "response": rag_result, "note": "parallel"})
+                        hits_count = len(self._extract_hits(rag_result))
+                        avg_score = None
+                        top_score = None
+                        if hits_count > 0:
+                            scores = [h.get("score", 0.0) for h in self._extract_hits(rag_result) if isinstance(h, dict) and "score" in h]
+                            if scores:
+                                avg_score = sum(scores) / len(scores)
+                                top_score = max(scores)
+                        self.analytics.log_rag_search(
+                            tenant_id=req.tenant_id,
+                            query=req.message[:500],
+                            hits_count=hits_count,
+                            avg_score=avg_score,
+                            top_score=top_score,
+                            latency_ms=parallel_latency_ms
+                        )
+                        self.analytics.log_tool_usage(
+                            tenant_id=req.tenant_id,
+                            tool_name="rag",
+                            latency_ms=parallel_latency_ms,
+                            success=True,
+                            user_id=req.user_id
+                        )
+                        reasoning_trace.append({
+                            "step": "tool_execution",
+                            "tool": "rag",
+                            "hit_count": hits_count,
+                            "summary": self._summarize_hits(rag_result, limit=2),
+                            "latency_ms": parallel_latency_ms,
+                            "mode": "parallel"
+                        })
+                # Process Web results
+                if "web" in parallel_results:
+                    web_result = parallel_results["web"]
+                    if isinstance(web_result, Exception):
+                        tool_traces.append({"tool": "web", "error": str(web_result), "note": "parallel"})
+                        reasoning_trace.append({
+                            "step": "tool_execution",
+                            "tool": "web",
+                            "status": "error",
+                            "error": str(web_result),
+                            "latency_ms": parallel_latency_ms
+                        })
+                        self.analytics.log_tool_usage(
+                            tenant_id=req.tenant_id,
+                            tool_name="web",
+                            latency_ms=parallel_latency_ms,
+                            success=False,
+                            error_message=str(web_result)[:200],
+                            user_id=req.user_id
+                        )
+                    else:
+                        web_data = web_result
+                        tools_used.append("web")
+                        tool_traces.append({"tool": "web", "response": web_result, "note": "parallel"})
+                        hits_count = len(self._extract_hits(web_result))
+                        self.analytics.log_tool_usage(
+                            tenant_id=req.tenant_id,
+                            tool_name="web",
+                            latency_ms=parallel_latency_ms,
+                            success=True,
+                            user_id=req.user_id
+                        )
+                        reasoning_trace.append({
+                            "step": "tool_execution",
+                            "tool": "web",
+                            "hit_count": hits_count,
+                            "summary": self._summarize_hits(web_result, limit=2),
+                            "latency_ms": parallel_latency_ms,
+                            "mode": "parallel"
+                        })
+                # Merge parallel results
+                merged_context = merge_parallel_results(parallel_results)
+                sources_list = list(set(e.get("source") for e in merged_context if e.get("source"))) if merged_context else []
                 reasoning_trace.append({
+                    "step": "result_merger",
+                    "merged_items": len(merged_context),
+                    "sources": sources_list
                 })
+                # Format merged context for prompt
+                data_section = format_merged_context_for_prompt(merged_context, max_items=10)
+            else:
+                data_section = ""
+        else:
+            # Sequential execution (original logic)
+            parallel_tasks = {}
+            rag_parallel_query = self._first_query_for_tool(steps, "rag", req.message)
+            web_parallel_query = self._first_query_for_tool(steps, "web", req.message)
+            if rag_parallel_query and web_parallel_query and rag_parallel_query == web_parallel_query:
+                if not pre_fetched_rag:
+                    parallel_tasks["rag"] = asyncio.create_task(self.mcp.call_rag(req.tenant_id, rag_parallel_query))
+                parallel_tasks["web"] = asyncio.create_task(self.mcp.call_web(req.tenant_id, web_parallel_query))
+            # Execute each step in sequence
+            for step_info in steps:
+                tool_name = step_info.get("tool")
+                step_input = step_info.get("input") or {}
+                query = step_input.get("query") or req.message
+                try:
+                    if tool_name == "rag":
+                        # Reuse pre-fetched RAG if available, otherwise fetch
+                        if pre_fetched_rag and query == rag_parallel_query:
+                            rag_resp = pre_fetched_rag
+                            tool_traces.append({"tool": "rag", "response": rag_resp, "note": "used_pre_fetched"})
+                        elif parallel_tasks.get("rag") and query == rag_parallel_query:
+                            rag_resp = await parallel_tasks["rag"]
+                            tool_traces.append({"tool": "rag", "response": rag_resp, "note": "parallel"})
+                        else:
+                            rag_resp = await self.mcp.call_rag(req.tenant_id, query)
+                            tool_traces.append({"tool": "rag", "response": rag_resp})
+                        rag_data = rag_resp
+                        tools_used.append("rag")
+                        reasoning_trace.append({
+                            "step": "tool_execution",
+                            "tool": "rag",
+                            "hit_count": len(self._extract_hits(rag_resp)),
+                            "summary": self._summarize_hits(rag_resp, limit=2)
+                        })
+                        # Extract snippets for prompt
+                        if isinstance(rag_resp, dict):
+                            hits = rag_resp.get("results") or rag_resp.get("hits") or []
+                            for h in hits[:5]:
+                                txt = h.get("text") or h.get("content") or str(h)
+                                collected_data.append(f"[RAG] {txt}")
+                    elif tool_name == "web":
+                        if parallel_tasks.get("web") and query == web_parallel_query:
+                            web_resp = await parallel_tasks["web"]
+                            tool_traces.append({"tool": "web", "response": web_resp, "note": "parallel"})
+                        else:
+                            web_resp = await self.mcp.call_web(req.tenant_id, query)
+                            tool_traces.append({"tool": "web", "response": web_resp})
+                        web_data = web_resp
+                        tools_used.append("web")
+                        reasoning_trace.append({
+                            "step": "tool_execution",
+                            "tool": "web",
+                            "hit_count": len(self._extract_hits(web_resp)),
+                            "summary": self._summarize_hits(web_resp, limit=2)
+                        })
+                        # Extract snippets for prompt
+                        if isinstance(web_resp, dict):
+                            hits = web_resp.get("results") or web_resp.get("items") or []
+                            for h in hits[:5]:
+                                title = h.get("title") or h.get("headline") or ""
+                                snippet = h.get("snippet") or h.get("summary") or h.get("text") or ""
+                                url = h.get("url") or h.get("link") or ""
+                                collected_data.append(f"[WEB] {title}\n{snippet}\nSource: {url}")
+                    elif tool_name == "admin":
+                        admin_resp = await self.mcp.call_admin(req.tenant_id, query)
+                        tool_traces.append({"tool": "admin", "response": admin_resp})
+                        admin_data = admin_resp
+                        tools_used.append("admin")
+                        collected_data.append(f"[ADMIN] {json.dumps(admin_resp)}")
+                        reasoning_trace.append({
+                            "step": "tool_execution",
+                            "tool": "admin",
+                            "status": "completed"
+                        })
+                    elif tool_name == "llm":
+                        # LLM is always last - synthesize all collected data
+                        break
+                except Exception as e:
+                    tool_traces.append({"tool": tool_name, "error": str(e)})
+                    # Continue with other tools even if one fails
+                    reasoning_trace.append({
+                        "step": "error",
+                        "tool": tool_name,
+                        "error": str(e)
+                    })
+            # Build comprehensive prompt with all collected data
+            data_section = "\n---\n".join(collected_data) if collected_data else ""
+        # Build final prompt
         if data_section:
             prompt = (
                 f"You are an assistant helping tenant {req.tenant_id}.\n\n"
+                f"## Information Collected\n"
+                f"The following details have been gathered from multiple reliable sources:\n"
                 f"{data_section}\n\n"
+                f"## User Request\n"
+                f"{req.message}\n\n"
+                f"## Your Task\n"
+                f"Use the information above to directly address the user's request. "
+                f"Focus on giving the user exactly what they need—clear guidance, accurate facts, "
+                f"and practical steps whenever possible. If the information is incomplete, explain "
+                f"what can and cannot be concluded from the available data."
             )
         else:
             # No data collected, just answer the question
             prompt = req.message
         # Final LLM synthesis
         try:
+            llm_start = time.time()
             llm_out = await self.llm.simple_call(prompt, temperature=req.temperature)
+            llm_latency_ms = int((time.time() - llm_start) * 1000)
+            tools_used.append("llm")
+            estimated_tokens = len(llm_out) // 4 + len(prompt) // 4
+            total_tokens += estimated_tokens
+            self.analytics.log_tool_usage(
+                tenant_id=req.tenant_id,
+                tool_name="llm",
+                latency_ms=llm_latency_ms,
+                tokens_used=estimated_tokens,
+                success=True,
+                user_id=req.user_id
+            )
+            total_latency_ms = int((time.time() - start_time) * 1000)
+            self.analytics.log_agent_query(
+                tenant_id=req.tenant_id,
+                message_preview=req.message[:200],
+                intent="multi_step",
+                tools_used=tools_used,
+                total_tokens=total_tokens,
+                total_latency_ms=total_latency_ms,
+                success=True,
+                user_id=req.user_id
+            )
             return AgentResponse(
                 text=llm_out,
                 decision=decision,
                 tool_traces=tool_traces,
                 reasoning_trace=reasoning_trace + [{
                     "step": "llm_response",
+                    "mode": "multi_step_parallel" if parallel_step else "multi_step",
+                    "latency_ms": llm_latency_ms,
+                    "estimated_tokens": estimated_tokens
                 }]
             )
         except Exception as e:
                 snippets.append(f"{title}\n{snippet}\nSource: {url}")
         snippet_text = "\n---\n".join(snippets) or ""
+        # prompt = (
+        #     f"You are an assistant with access to recent web search results. Use the following results to answer.\n{snippet_text}\n\n"
+        #     f"User question: {req.message}\nAnswer succinctly and indicate which results you used."
+        # )
         prompt = (
+            f"You are an assistant with access to recent web search results.\n\n"
+            f"## Search Results\n"
+            f"{snippet_text}\n\n"
+            f"## User Question\n"
+            f"{req.message}\n\n"
+            f"## Your Task\n"
+            f"Provide a clear, accurate, and succinct answer based on the search results above. "
+            f"Indicate which results you used in your reasoning."
         )
         return prompt
     @staticmethod
             summaries.append(snippet[:160])
         return summaries
+    async def run_parallel_tools(self, tasks: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Run multiple tools in parallel using asyncio.gather.
+        Args:
+            tasks: Dictionary mapping tool names to coroutines, e.g.:
+                   {"rag": rag_coro, "web": web_coro}
+        Returns:
+            Dictionary mapping tool names to results, e.g.:
+            {"rag": rag_result, "web": web_result}
+            Exceptions are returned as values if a tool fails.
+        """
+        if not tasks:
+            return {}
+        names = list(tasks.keys())
+        coros = [tasks[name] for name in names]
+        # Run all coroutines in parallel, return exceptions instead of raising
+        results = await asyncio.gather(*coros, return_exceptions=True)
+        return {names[i]: results[i] for i in range(len(names))}
     @staticmethod
     def _first_query_for_tool(steps: List[Dict[str, Any]], tool_name: str, default_query: str) -> Optional[str]:
         for step in steps:

backend/api/services/result_merger.py ADDED Viewed

	@@ -0,0 +1,136 @@

+"""
+Result Merger Utility
+Merges and ranks results from parallel tool execution (RAG + Web).
+"""
+from typing import List, Dict, Any, Optional
+def merge_parallel_results(results: Dict[str, Any]) -> List[Dict[str, Any]]:
+    """
+    Merge results from parallel tool execution (RAG + Web).
+    Args:
+        results: Dictionary with keys like "rag" and "web" containing tool outputs
+    Returns:
+        List of merged context entries, sorted by score (descending)
+    """
+    final_context = []
+    # Extract RAG results
+    if "rag" in results and results["rag"]:
+        rag_data = results["rag"]
+        # Handle different RAG response formats
+        if isinstance(rag_data, dict):
+            hits = rag_data.get("results") or rag_data.get("hits") or []
+        elif isinstance(rag_data, list):
+            hits = rag_data
+        else:
+            hits = []
+        for hit in hits:
+            if isinstance(hit, dict):
+                content = hit.get("text") or hit.get("content") or str(hit)
+                score = hit.get("score", 0.0)
+                doc_id = hit.get("doc_id") or hit.get("id")
+                source = hit.get("source") or hit.get("url") or "internal_doc"
+            else:
+                content = str(hit)
+                score = 0.5  # Default score for non-dict hits
+                doc_id = None
+                source = "internal_doc"
+            if content:
+                final_context.append({
+                    "source": "internal_policy",
+                    "text": content,
+                    "score": float(score),
+                    "doc_id": doc_id,
+                    "source_url": source if isinstance(source, str) else None
+                })
+    # Extract Web results
+    if "web" in results and results["web"]:
+        web_data = results["web"]
+        # Handle different Web response formats
+        if isinstance(web_data, dict):
+            items = web_data.get("results") or web_data.get("items") or []
+        elif isinstance(web_data, list):
+            items = web_data
+        else:
+            items = []
+        for item in items:
+            if isinstance(item, dict):
+                title = item.get("title") or item.get("headline") or ""
+                snippet = item.get("snippet") or item.get("summary") or item.get("text") or ""
+                url = item.get("url") or item.get("link") or ""
+                # Web results get a baseline confidence score
+                score = item.get("score", 0.5)
+            else:
+                title = ""
+                snippet = str(item)
+                url = ""
+                score = 0.5
+            if snippet or title:
+                # Combine title and snippet for better context
+                text = f"{title}\n{snippet}" if title else snippet
+                final_context.append({
+                    "source": "live_web",
+                    "text": text,
+                    "score": float(score),
+                    "url": url,
+                    "title": title
+                })
+    # Sort by score descending (highest relevance first)
+    final_context.sort(key=lambda x: x["score"], reverse=True)
+    return final_context
+def format_merged_context_for_prompt(merged_context: List[Dict[str, Any]],
+                                     max_items: int = 10) -> str:
+    """
+    Format merged context into a readable prompt section.
+    Args:
+        merged_context: List of merged context entries from merge_parallel_results
+        max_items: Maximum number of items to include
+    Returns:
+        Formatted string ready for LLM prompt
+    """
+    if not merged_context:
+        return ""
+    sections = []
+    for entry in merged_context[:max_items]:
+        source_label = entry.get("source", "unknown")
+        text = entry.get("text", "")
+        score = entry.get("score", 0.0)
+        # Format based on source type
+        if source_label == "internal_policy":
+            source_url = entry.get("source_url")
+            if source_url:
+                sections.append(f"[INTERNAL DOCUMENT - {source_url}]\n{text}")
+            else:
+                sections.append(f"[INTERNAL DOCUMENT]\n{text}")
+        elif source_label == "live_web":
+            url = entry.get("url", "")
+            title = entry.get("title", "")
+            if url:
+                sections.append(f"[WEB SOURCE - {url}]\n{title}\n{text}")
+            else:
+                sections.append(f"[WEB SOURCE]\n{title}\n{text}")
+        else:
+            sections.append(f"[{source_label.upper()}]\n{text}")
+    return "\n\n---\n\n".join(sections)

backend/api/services/tool_selector.py CHANGED Viewed

@@ -80,10 +80,13 @@ class ToolSelector:
         # ---------------------------------
         # 6. Use LLM to enhance plan if we have partial steps or complex query
         # ---------------------------------
         if self.llm_client and (needs_multiple or (needs_rag and needs_web) or len(steps) == 0):
             plan_prompt = f"""
 You are an enterprise MCP agent.
-You can select MULTIPLE tools in sequence to provide comprehensive answers.
 TOOLS:
 - rag        → private knowledge retrieval (use for internal/company docs)
@@ -101,8 +104,22 @@ Determine which tools are needed. You can select:
 - Web + LLM (public fact questions)
 - RAG + Web + LLM (comprehensive questions needing both sources)
-Return a JSON list describing the steps, e.g.:
 [
   {{"tool": "rag", "reason": "Need internal documentation"}},
   {{"tool": "web", "reason": "Need current public information"}},
@@ -125,27 +142,96 @@ Only return the JSON array. Do not include markdown formatting.
                 steps_json = json.loads(out)
-                # Replace steps with LLM-planned steps (excluding LLM, we'll add it at end)
-                steps = [
-                    step(s["tool"], {"query": text})
-                    for s in steps_json if s.get("tool") != "llm"
-                ]
             except Exception as e:
-                # If LLM planning fails, keep existing steps or use fallback
-                if not steps:
                     steps = []
         # ---------------------------------
-        # 7. Always end with LLM synthesis
         # ---------------------------------
-        if not steps or steps[-1]["tool"] != "llm":
             steps.append(step("llm", {
                 "rag_data": rag_results if rag_has_data else None,
                 "query": text
             }))
         # Build reason string showing the tool sequence
-        tool_names = [s["tool"] for s in steps]
         reason = f"multi-tool plan: {' → '.join(tool_names)} | scores={tool_scores}"
         return _multi_step(steps, reason)

         # ---------------------------------
         # 6. Use LLM to enhance plan if we have partial steps or complex query
         # ---------------------------------
+        # Check if we should use parallel execution (both RAG and Web needed)
+        should_parallel = needs_rag and needs_web and (needs_multiple or rag_score >= 0.55 and web_score >= 0.55)
         if self.llm_client and (needs_multiple or (needs_rag and needs_web) or len(steps) == 0):
             plan_prompt = f"""
 You are an enterprise MCP agent.
+You can select MULTIPLE tools in sequence OR in parallel to provide comprehensive answers.
 TOOLS:
 - rag        → private knowledge retrieval (use for internal/company docs)
 - Web + LLM (public fact questions)
 - RAG + Web + LLM (comprehensive questions needing both sources)
+IMPORTANT: If the query needs BOTH internal docs (RAG) AND current/live info (Web),
+you can mark them for parallel execution by using a "parallel" step.
+Return a JSON list describing the steps. For parallel execution, use:
+[
+  {{
+    "parallel": {{
+      "rag": "query for internal docs",
+      "web": "query for live info"
+    }},
+    "reason": "Need both internal and live information simultaneously"
+  }},
+  {{"tool": "llm", "reason": "Synthesize all information"}}
+]
+For sequential execution, use:
 [
   {{"tool": "rag", "reason": "Need internal documentation"}},
   {{"tool": "web", "reason": "Need current public information"}},
                 steps_json = json.loads(out)
+                # Check if LLM returned a parallel step
+                has_parallel = any("parallel" in s for s in steps_json)
+                if has_parallel:
+                    # Extract parallel step and convert to our format
+                    parallel_step = None
+                    other_steps = []
+                    for s in steps_json:
+                        if "parallel" in s:
+                            parallel_step = {"parallel": s["parallel"]}
+                        elif s.get("tool") != "llm":
+                            other_steps.append(step(s["tool"], {"query": text}))
+                    if parallel_step:
+                        steps = [parallel_step] + other_steps
+                    else:
+                        # Fallback: convert to regular steps
+                        steps = [
+                            step(s["tool"], {"query": text})
+                            for s in steps_json if s.get("tool") != "llm"
+                        ]
+                else:
+                    # Replace steps with LLM-planned steps (excluding LLM, we'll add it at end)
+                    steps = [
+                        step(s["tool"], {"query": text})
+                        for s in steps_json if s.get("tool") != "llm"
+                    ]
             except Exception as e:
+                # If LLM planning fails, check if we should create parallel step manually
+                if should_parallel and needs_rag and needs_web:
+                    # Create parallel step manually
+                    steps = [{
+                        "parallel": {
+                            "rag": text,
+                            "web": text
+                        }
+                    }]
+                elif not steps:
                     steps = []
         # ---------------------------------
+        # 7. If we have both RAG and Web but no parallel step, consider creating one
+        # ---------------------------------
+        if should_parallel and needs_rag and needs_web:
+            # Check if we already have a parallel step
+            has_parallel = any("parallel" in s for s in steps)
+            if not has_parallel:
+                # Replace sequential RAG and Web steps with a parallel step
+                new_steps = []
+                rag_query = text
+                web_query = text
+                # Extract queries from existing steps if available
+                for s in steps:
+                    if s.get("tool") == "rag":
+                        rag_query = s.get("input", {}).get("query", text)
+                    elif s.get("tool") == "web":
+                        web_query = s.get("input", {}).get("query", text)
+                # Create parallel step
+                new_steps.append({
+                    "parallel": {
+                        "rag": rag_query,
+                        "web": web_query
+                    }
+                })
+                # Keep other non-RAG/Web steps
+                for s in steps:
+                    if s.get("tool") not in ["rag", "web"]:
+                        new_steps.append(s)
+                steps = new_steps
+        # ---------------------------------
+        # 8. Always end with LLM synthesis
         # ---------------------------------
+        if not steps or (isinstance(steps[-1], dict) and steps[-1].get("tool") != "llm" and "parallel" not in steps[-1]):
             steps.append(step("llm", {
                 "rag_data": rag_results if rag_has_data else None,
                 "query": text
             }))
         # Build reason string showing the tool sequence
+        tool_names = []
+        for s in steps:
+            if "parallel" in s:
+                tool_names.append("parallel(RAG+Web)")
+            elif isinstance(s, dict) and "tool" in s:
+                tool_names.append(s["tool"])
         reason = f"multi-tool plan: {' → '.join(tool_names)} | scores={tool_scores}"
         return _multi_step(steps, reason)

data/analytics.db CHANGED Viewed

Binary files a/data/analytics.db and b/data/analytics.db differ