Spaces:
Running
Running
| """Web search tool using DuckDuckGo.""" | |
| import asyncio | |
| from typing import Any | |
| from duckduckgo_search import DDGS | |
| from src.utils.exceptions import SearchError | |
| from src.utils.models import Citation, Evidence | |
| class WebTool: | |
| """Search tool for general web search via DuckDuckGo.""" | |
| def __init__(self) -> None: | |
| pass | |
| def name(self) -> str: | |
| return "web" | |
| async def search(self, query: str, max_results: int = 10) -> list[Evidence]: | |
| """ | |
| Search DuckDuckGo and return evidence. | |
| Note: duckduckgo-search is synchronous, so we run it in executor. | |
| """ | |
| loop = asyncio.get_running_loop() | |
| try: | |
| results = await loop.run_in_executor( | |
| None, | |
| lambda: self._sync_search(query, max_results), | |
| ) | |
| return results | |
| except Exception as e: | |
| raise SearchError(f"Web search failed: {e}") from e | |
| def _sync_search(self, query: str, max_results: int) -> list[Evidence]: | |
| """Synchronous search implementation.""" | |
| evidence_list = [] | |
| with DDGS() as ddgs: | |
| results: list[dict[str, Any]] = list(ddgs.text(query, max_results=max_results)) | |
| # Truncation rationale: LLM context limits + cost optimization | |
| # - Content: 1000 chars (~250 tokens) - web snippets are shorter than abstracts | |
| # - Title: 500 chars covers most web page titles | |
| for result in results: | |
| evidence_list.append( | |
| Evidence( | |
| content=result.get("body", "")[:1000], | |
| citation=Citation( | |
| source="web", | |
| title=result.get("title", "Unknown")[:500], | |
| url=result.get("href", ""), | |
| date="Unknown", | |
| authors=[], | |
| ), | |
| ) | |
| ) | |
| return evidence_list | |