Spaces:
Running
Running
| """Integration tests for research flows. | |
| These tests require API keys and may make real API calls. | |
| Marked with @pytest.mark.integration to skip in unit test runs. | |
| """ | |
| import pytest | |
| from src.agent_factory.agents import ( | |
| create_deep_flow, | |
| create_iterative_flow, | |
| create_planner_agent, | |
| ) | |
| from src.orchestrator.graph_orchestrator import create_graph_orchestrator | |
| from src.utils.config import settings | |
| class TestPlannerAgentIntegration: | |
| """Integration tests for PlannerAgent with real API calls.""" | |
| async def test_planner_agent_creates_plan(self): | |
| """PlannerAgent should create a valid report plan with real API.""" | |
| if not settings.has_openai_key and not settings.has_anthropic_key: | |
| pytest.skip("No OpenAI or Anthropic API key available") | |
| planner = create_planner_agent() | |
| result = await planner.run("What are the main features of Python programming language?") | |
| assert result.report_title | |
| assert len(result.report_outline) > 0 | |
| assert result.report_outline[0].title | |
| assert result.report_outline[0].key_question | |
| async def test_planner_agent_includes_background_context(self): | |
| """PlannerAgent should include background context in plan.""" | |
| if not settings.has_openai_key and not settings.has_anthropic_key: | |
| pytest.skip("No OpenAI or Anthropic API key available") | |
| planner = create_planner_agent() | |
| result = await planner.run("Explain quantum computing basics") | |
| assert result.background_context | |
| assert len(result.background_context) > 50 # Should have substantial context | |
| class TestIterativeResearchFlowIntegration: | |
| """Integration tests for IterativeResearchFlow with real API calls.""" | |
| async def test_iterative_flow_completes_simple_query(self): | |
| """IterativeResearchFlow should complete a simple research query.""" | |
| if not settings.has_openai_key and not settings.has_anthropic_key: | |
| pytest.skip("No OpenAI or Anthropic API key available") | |
| flow = create_iterative_flow(max_iterations=2, max_time_minutes=2) | |
| result = await flow.run( | |
| query="What is the capital of France?", | |
| output_length="A short paragraph", | |
| ) | |
| assert isinstance(result, str) | |
| assert len(result) > 0 | |
| # Should mention Paris | |
| assert "paris" in result.lower() or "france" in result.lower() | |
| async def test_iterative_flow_respects_max_iterations(self): | |
| """IterativeResearchFlow should respect max_iterations limit.""" | |
| if not settings.has_openai_key and not settings.has_anthropic_key: | |
| pytest.skip("No OpenAI or Anthropic API key available") | |
| flow = create_iterative_flow(max_iterations=1, max_time_minutes=5) | |
| result = await flow.run(query="What are the main features of Python?") | |
| assert isinstance(result, str) | |
| # Should complete within 1 iteration (or hit max) | |
| assert flow.iteration <= 1 | |
| async def test_iterative_flow_with_background_context(self): | |
| """IterativeResearchFlow should use background context.""" | |
| if not settings.has_openai_key and not settings.has_anthropic_key: | |
| pytest.skip("No OpenAI or Anthropic API key available") | |
| flow = create_iterative_flow(max_iterations=2, max_time_minutes=2) | |
| result = await flow.run( | |
| query="What is machine learning?", | |
| background_context="Machine learning is a subset of artificial intelligence.", | |
| ) | |
| assert isinstance(result, str) | |
| assert len(result) > 0 | |
| class TestDeepResearchFlowIntegration: | |
| """Integration tests for DeepResearchFlow with real API calls.""" | |
| async def test_deep_flow_creates_multi_section_report(self): | |
| """DeepResearchFlow should create a report with multiple sections.""" | |
| if not settings.has_openai_key and not settings.has_anthropic_key: | |
| pytest.skip("No OpenAI or Anthropic API key available") | |
| flow = create_deep_flow( | |
| max_iterations=1, # Keep it short for testing | |
| max_time_minutes=3, | |
| ) | |
| result = await flow.run("What are the main features of Python programming language?") | |
| assert isinstance(result, str) | |
| assert len(result) > 100 # Should have substantial content | |
| # Should have section structure | |
| assert "#" in result or "##" in result | |
| async def test_deep_flow_uses_long_writer(self): | |
| """DeepResearchFlow should use long writer by default.""" | |
| if not settings.has_openai_key and not settings.has_anthropic_key: | |
| pytest.skip("No OpenAI or Anthropic API key available") | |
| flow = create_deep_flow( | |
| max_iterations=1, | |
| max_time_minutes=3, | |
| use_long_writer=True, | |
| ) | |
| result = await flow.run("Explain the basics of quantum computing") | |
| assert isinstance(result, str) | |
| assert len(result) > 0 | |
| async def test_deep_flow_uses_proofreader_when_specified(self): | |
| """DeepResearchFlow should use proofreader when use_long_writer=False.""" | |
| if not settings.has_openai_key and not settings.has_anthropic_key: | |
| pytest.skip("No OpenAI or Anthropic API key available") | |
| flow = create_deep_flow( | |
| max_iterations=1, | |
| max_time_minutes=3, | |
| use_long_writer=False, | |
| ) | |
| result = await flow.run("What is artificial intelligence?") | |
| assert isinstance(result, str) | |
| assert len(result) > 0 | |
| class TestGraphOrchestratorIntegration: | |
| """Integration tests for GraphOrchestrator with real API calls.""" | |
| async def test_graph_orchestrator_iterative_mode(self): | |
| """GraphOrchestrator should run in iterative mode.""" | |
| if not settings.has_openai_key and not settings.has_anthropic_key: | |
| pytest.skip("No OpenAI or Anthropic API key available") | |
| orchestrator = create_graph_orchestrator( | |
| mode="iterative", | |
| max_iterations=1, | |
| max_time_minutes=2, | |
| ) | |
| events = [] | |
| async for event in orchestrator.run("What is Python?"): | |
| events.append(event) | |
| assert len(events) > 0 | |
| event_types = [e.type for e in events] | |
| assert "started" in event_types | |
| assert "complete" in event_types | |
| async def test_graph_orchestrator_deep_mode(self): | |
| """GraphOrchestrator should run in deep mode.""" | |
| if not settings.has_openai_key and not settings.has_anthropic_key: | |
| pytest.skip("No OpenAI or Anthropic API key available") | |
| orchestrator = create_graph_orchestrator( | |
| mode="deep", | |
| max_iterations=1, | |
| max_time_minutes=3, | |
| ) | |
| events = [] | |
| async for event in orchestrator.run("What are the main features of Python?"): | |
| events.append(event) | |
| assert len(events) > 0 | |
| event_types = [e.type for e in events] | |
| assert "started" in event_types | |
| assert "complete" in event_types | |
| async def test_graph_orchestrator_auto_mode(self): | |
| """GraphOrchestrator should auto-detect research mode.""" | |
| if not settings.has_openai_key and not settings.has_anthropic_key: | |
| pytest.skip("No OpenAI or Anthropic API key available") | |
| orchestrator = create_graph_orchestrator( | |
| mode="auto", | |
| max_iterations=1, | |
| max_time_minutes=2, | |
| ) | |
| events = [] | |
| async for event in orchestrator.run("What is Python?"): | |
| events.append(event) | |
| assert len(events) > 0 | |
| # Should complete successfully regardless of mode | |
| event_types = [e.type for e in events] | |
| assert "complete" in event_types | |
| class TestGraphOrchestrationIntegration: | |
| """Integration tests for graph-based orchestration with real API calls.""" | |
| async def test_iterative_flow_with_graph_execution(self): | |
| """IterativeResearchFlow should work with graph execution enabled.""" | |
| if not settings.has_openai_key and not settings.has_anthropic_key: | |
| pytest.skip("No OpenAI or Anthropic API key available") | |
| flow = create_iterative_flow( | |
| max_iterations=1, | |
| max_time_minutes=2, | |
| use_graph=True, | |
| ) | |
| result = await flow.run(query="What is the capital of France?") | |
| assert isinstance(result, str) | |
| assert len(result) > 0 | |
| # Should mention Paris | |
| assert "paris" in result.lower() or "france" in result.lower() | |
| async def test_deep_flow_with_graph_execution(self): | |
| """DeepResearchFlow should work with graph execution enabled.""" | |
| if not settings.has_openai_key and not settings.has_anthropic_key: | |
| pytest.skip("No OpenAI or Anthropic API key available") | |
| flow = create_deep_flow( | |
| max_iterations=1, | |
| max_time_minutes=3, | |
| use_graph=True, | |
| ) | |
| result = await flow.run("What are the main features of Python programming language?") | |
| assert isinstance(result, str) | |
| assert len(result) > 100 # Should have substantial content | |
| async def test_graph_orchestrator_with_graph_execution(self): | |
| """GraphOrchestrator should work with graph execution enabled.""" | |
| if not settings.has_openai_key and not settings.has_anthropic_key: | |
| pytest.skip("No OpenAI or Anthropic API key available") | |
| orchestrator = create_graph_orchestrator( | |
| mode="iterative", | |
| max_iterations=1, | |
| max_time_minutes=2, | |
| use_graph=True, | |
| ) | |
| events = [] | |
| async for event in orchestrator.run("What is Python?"): | |
| events.append(event) | |
| assert len(events) > 0 | |
| event_types = [e.type for e in events] | |
| assert "started" in event_types | |
| assert "complete" in event_types | |
| # Extract final report from complete event | |
| complete_events = [e for e in events if e.type == "complete"] | |
| assert len(complete_events) > 0 | |
| final_report = complete_events[0].message | |
| assert isinstance(final_report, str) | |
| assert len(final_report) > 0 | |
| async def test_graph_orchestrator_parallel_execution(self): | |
| """GraphOrchestrator should support parallel execution in deep mode.""" | |
| if not settings.has_openai_key and not settings.has_anthropic_key: | |
| pytest.skip("No OpenAI or Anthropic API key available") | |
| orchestrator = create_graph_orchestrator( | |
| mode="deep", | |
| max_iterations=1, | |
| max_time_minutes=3, | |
| use_graph=True, | |
| ) | |
| events = [] | |
| async for event in orchestrator.run("What are the main features of Python?"): | |
| events.append(event) | |
| assert len(events) > 0 | |
| event_types = [e.type for e in events] | |
| assert "started" in event_types | |
| assert "complete" in event_types | |
| async def test_graph_vs_chain_execution_comparison(self): | |
| """Both graph and chain execution should produce similar results.""" | |
| if not settings.has_openai_key and not settings.has_anthropic_key: | |
| pytest.skip("No OpenAI or Anthropic API key available") | |
| query = "What is the capital of France?" | |
| # Run with graph execution | |
| flow_graph = create_iterative_flow( | |
| max_iterations=1, | |
| max_time_minutes=2, | |
| use_graph=True, | |
| ) | |
| result_graph = await flow_graph.run(query=query) | |
| # Run with agent chains | |
| flow_chains = create_iterative_flow( | |
| max_iterations=1, | |
| max_time_minutes=2, | |
| use_graph=False, | |
| ) | |
| result_chains = await flow_chains.run(query=query) | |
| # Both should produce valid results | |
| assert isinstance(result_graph, str) | |
| assert isinstance(result_chains, str) | |
| assert len(result_graph) > 0 | |
| assert len(result_chains) > 0 | |
| # Both should mention the answer (Paris) | |
| assert "paris" in result_graph.lower() or "france" in result_graph.lower() | |
| assert "paris" in result_chains.lower() or "france" in result_chains.lower() | |
| class TestReportSynthesisIntegration: | |
| """Integration tests for report synthesis with writer agents.""" | |
| async def test_iterative_flow_generates_report(self): | |
| """IterativeResearchFlow should generate a report with writer agent.""" | |
| if not settings.has_openai_key and not settings.has_anthropic_key: | |
| pytest.skip("No OpenAI or Anthropic API key available") | |
| flow = create_iterative_flow(max_iterations=1, max_time_minutes=2) | |
| result = await flow.run( | |
| query="What is the capital of France?", | |
| output_length="A short paragraph", | |
| ) | |
| assert isinstance(result, str) | |
| assert len(result) > 0 | |
| # Should be a formatted report | |
| assert "paris" in result.lower() or "france" in result.lower() | |
| # Should have some structure (markdown headers or content) | |
| assert len(result) > 50 | |
| async def test_iterative_flow_includes_citations(self): | |
| """IterativeResearchFlow should include citations in the report.""" | |
| if not settings.has_openai_key and not settings.has_anthropic_key: | |
| pytest.skip("No OpenAI or Anthropic API key available") | |
| flow = create_iterative_flow(max_iterations=1, max_time_minutes=2) | |
| result = await flow.run( | |
| query="What is machine learning?", | |
| output_length="A short paragraph", | |
| ) | |
| assert isinstance(result, str) | |
| # Should have some form of citations or references | |
| # (either [1], [2] format or References section) | |
| # Note: Citations may not always be present depending on findings | |
| # This is a soft check - just verify report was generated | |
| assert len(result) > 0 | |
| async def test_iterative_flow_handles_empty_findings(self): | |
| """IterativeResearchFlow should handle empty findings gracefully.""" | |
| if not settings.has_openai_key and not settings.has_anthropic_key: | |
| pytest.skip("No OpenAI or Anthropic API key available") | |
| flow = create_iterative_flow(max_iterations=1, max_time_minutes=1) | |
| # Use a query that might not return findings quickly | |
| result = await flow.run( | |
| query="Test query with no findings", | |
| output_length="A short paragraph", | |
| ) | |
| # Should still return a report (even if minimal) | |
| assert isinstance(result, str) | |
| # Writer agent should handle empty findings with fallback | |
| async def test_deep_flow_with_long_writer(self): | |
| """DeepResearchFlow should use long writer to create sections.""" | |
| if not settings.has_openai_key and not settings.has_anthropic_key: | |
| pytest.skip("No OpenAI or Anthropic API key available") | |
| flow = create_deep_flow( | |
| max_iterations=1, | |
| max_time_minutes=3, | |
| use_long_writer=True, | |
| ) | |
| result = await flow.run("What are the main features of Python programming language?") | |
| assert isinstance(result, str) | |
| assert len(result) > 100 # Should have substantial content | |
| # Should have section structure (table of contents or sections) | |
| has_structure = ( | |
| "##" in result | |
| or "#" in result | |
| or "table of contents" in result.lower() | |
| or "introduction" in result.lower() | |
| ) | |
| # Long writer should create structured report | |
| assert has_structure or len(result) > 200 | |
| async def test_deep_flow_creates_sections(self): | |
| """DeepResearchFlow should create multiple sections in the report.""" | |
| if not settings.has_openai_key and not settings.has_anthropic_key: | |
| pytest.skip("No OpenAI or Anthropic API key available") | |
| flow = create_deep_flow( | |
| max_iterations=1, | |
| max_time_minutes=3, | |
| use_long_writer=True, | |
| ) | |
| result = await flow.run("Explain the basics of quantum computing") | |
| assert isinstance(result, str) | |
| # Should have multiple sections (indicated by headers) | |
| # Should have at least some structure | |
| assert len(result) > 100 | |
| async def test_deep_flow_aggregates_references(self): | |
| """DeepResearchFlow should aggregate references from all sections.""" | |
| if not settings.has_openai_key and not settings.has_anthropic_key: | |
| pytest.skip("No OpenAI or Anthropic API key available") | |
| flow = create_deep_flow( | |
| max_iterations=1, | |
| max_time_minutes=3, | |
| use_long_writer=True, | |
| ) | |
| result = await flow.run("What are the main features of Python programming language?") | |
| assert isinstance(result, str) | |
| # Long writer should aggregate references at the end | |
| # Check for references section or citation format | |
| # Note: References may not always be present | |
| # Just verify report structure is correct | |
| assert len(result) > 100 | |
| async def test_deep_flow_with_proofreader(self): | |
| """DeepResearchFlow should use proofreader to finalize report.""" | |
| if not settings.has_openai_key and not settings.has_anthropic_key: | |
| pytest.skip("No OpenAI or Anthropic API key available") | |
| flow = create_deep_flow( | |
| max_iterations=1, | |
| max_time_minutes=3, | |
| use_long_writer=False, # Use proofreader instead | |
| ) | |
| result = await flow.run("What is artificial intelligence?") | |
| assert isinstance(result, str) | |
| assert len(result) > 0 | |
| # Proofreader should create polished report | |
| # Should have some structure | |
| assert len(result) > 50 | |
| async def test_proofreader_removes_duplicates(self): | |
| """Proofreader should remove duplicate content from report.""" | |
| if not settings.has_openai_key and not settings.has_anthropic_key: | |
| pytest.skip("No OpenAI or Anthropic API key available") | |
| flow = create_deep_flow( | |
| max_iterations=1, | |
| max_time_minutes=3, | |
| use_long_writer=False, | |
| ) | |
| result = await flow.run("Explain machine learning basics") | |
| assert isinstance(result, str) | |
| # Proofreader should create polished, non-repetitive content | |
| # This is a soft check - just verify report was generated | |
| assert len(result) > 0 | |
| async def test_proofreader_adds_summary(self): | |
| """Proofreader should add a summary to the report.""" | |
| if not settings.has_openai_key and not settings.has_anthropic_key: | |
| pytest.skip("No OpenAI or Anthropic API key available") | |
| flow = create_deep_flow( | |
| max_iterations=1, | |
| max_time_minutes=3, | |
| use_long_writer=False, | |
| ) | |
| result = await flow.run("What is Python programming language?") | |
| assert isinstance(result, str) | |
| # Proofreader should add summary/outline | |
| # Check for summary indicators | |
| # Note: Summary format may vary | |
| # Just verify report was generated | |
| assert len(result) > 0 | |
| async def test_graph_orchestrator_uses_writer_agents(self): | |
| """GraphOrchestrator should use writer agents in iterative mode.""" | |
| if not settings.has_openai_key and not settings.has_anthropic_key: | |
| pytest.skip("No OpenAI or Anthropic API key available") | |
| orchestrator = create_graph_orchestrator( | |
| mode="iterative", | |
| max_iterations=1, | |
| max_time_minutes=2, | |
| use_graph=False, # Use agent chains to test writer integration | |
| ) | |
| events = [] | |
| async for event in orchestrator.run("What is the capital of France?"): | |
| events.append(event) | |
| assert len(events) > 0 | |
| event_types = [e.type for e in events] | |
| assert "started" in event_types | |
| assert "complete" in event_types | |
| # Extract final report from complete event | |
| complete_events = [e for e in events if e.type == "complete"] | |
| assert len(complete_events) > 0 | |
| final_report = complete_events[0].message | |
| assert isinstance(final_report, str) | |
| assert len(final_report) > 0 | |
| # Should have content from writer agent | |
| assert "paris" in final_report.lower() or "france" in final_report.lower() | |
| async def test_graph_orchestrator_uses_long_writer_in_deep_mode(self): | |
| """GraphOrchestrator should use long writer in deep mode.""" | |
| if not settings.has_openai_key and not settings.has_anthropic_key: | |
| pytest.skip("No OpenAI or Anthropic API key available") | |
| orchestrator = create_graph_orchestrator( | |
| mode="deep", | |
| max_iterations=1, | |
| max_time_minutes=3, | |
| use_graph=False, # Use agent chains | |
| ) | |
| events = [] | |
| async for event in orchestrator.run("What are the main features of Python?"): | |
| events.append(event) | |
| assert len(events) > 0 | |
| event_types = [e.type for e in events] | |
| assert "started" in event_types | |
| assert "complete" in event_types | |
| # Extract final report | |
| complete_events = [e for e in events if e.type == "complete"] | |
| assert len(complete_events) > 0 | |
| final_report = complete_events[0].message | |
| assert isinstance(final_report, str) | |
| assert len(final_report) > 0 | |
| # Should have structured content from long writer | |
| assert len(final_report) > 100 | |