From b627b4319ae19de687bbe98144ea94ec346a40b2 Mon Sep 17 00:00:00 2001 From: oho Date: Tue, 13 Jan 2026 14:24:15 +0100 Subject: [PATCH] test: add comprehensive integration tests for trading system Add 14 integration tests covering: - Full agent coordination flow (Research -> Risk -> Debate -> Execution) - Multi-agent debate system with multiple personas - Confidence calibration tracking - Signal aggregation from multiple sources - Event calendar integration - End-to-end trading cycle with mocked broker - Risk rejection scenarios - Error handling with LLM failures - Performance and latency tracking All tests use MockLLMProvider matching the real LLMResponse interface. Total test count: 328 passed, 2 skipped. --- tests/test_integration.py | 797 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 797 insertions(+) create mode 100644 tests/test_integration.py diff --git a/tests/test_integration.py b/tests/test_integration.py new file mode 100644 index 0000000..4486909 --- /dev/null +++ b/tests/test_integration.py @@ -0,0 +1,797 @@ +""" +Integration Tests for Alpha Arena Trading System. + +These tests verify that all components work together correctly: +- Multi-agent coordination (Research, Risk, Execution, Reflection) +- Debate system integration +- Signal aggregation +- Full trading loop +""" + +import asyncio +import json +import pytest +from datetime import datetime, timedelta +from decimal import Decimal +from typing import Any +from unittest.mock import AsyncMock, MagicMock, patch + +from src.core.types import ( + MarketState, OrderBook, PriceLevel, Signal, SignalType, + Edge, Confidence +) +from src.core.config import AgentConfig +from src.agents.coordinator import AgentCoordinator, CoordinatorDecision +from src.agents.base import AgentRole, AgentResponse +from src.agents.debate import ( + DebateOrchestrator, DebatePersona, DebatePosition, + DebateResult, ConfidenceCalibrator +) +from src.signals.aggregator import SignalAggregator, SignalSource, AggregatedSignal +from src.signals.events import EventCalendar, MarketEvent, EventType, EventImpact + +pytestmark = [pytest.mark.integration] + + +# ============================================================================ +# Mock LLM Provider +# ============================================================================ + +class MockLLMResponse: + """Mock LLM response object matching LLMResponse interface.""" + def __init__(self, content: str, tokens_used: int = 100): + self.content = content + self.model = "mock-model" + self.provider = "mock" + self.tokens_input = tokens_used // 2 + self.tokens_output = tokens_used // 2 + self.tokens_used = tokens_used + self.latency_ms = 50 + self.cost_estimate = Decimal("0.001") + self.finish_reason = "stop" + self.timestamp = datetime.utcnow() + self.metadata = {} + + @property + def total_tokens(self) -> int: + return self.tokens_input + self.tokens_output + + +class MockLLMProvider: + """Mock LLM provider that returns structured responses for agents.""" + + def __init__(self, response_map: dict[str, str] | None = None): + self.response_map = response_map or {} + self.call_history: list[dict] = [] + self.default_model = "mock-model" + self.call_count = 0 + + async def complete( + self, + messages: list | None = None, + system: str | None = None, + json_mode: bool = False, + **kwargs + ) -> MockLLMResponse: + """Return mock response based on system prompt.""" + self.call_count += 1 + + # Extract prompt from messages if provided + prompt = "" + if messages: + prompt = str(messages[0].content if hasattr(messages[0], 'content') else messages[0])[:200] + + self.call_history.append({ + "prompt": prompt, + "system": system[:100] if system else None, + }) + + # Determine response based on system prompt + content = self._get_response_content(system) + return MockLLMResponse(content=content, tokens_used=100) + + def _get_response_content(self, system_prompt: str | None) -> str: + """Get response content based on system prompt.""" + if system_prompt: + system_lower = system_prompt.lower() + if "research" in system_lower: + return self._research_response() + elif "risk" in system_lower: + return self._risk_response() + elif "execution" in system_lower: + return self._execution_response() + elif "reflection" in system_lower: + return self._reflection_response() + elif "debate" in system_lower: + return self._debate_response() + + # Default response + return json.dumps({"response": "mock response"}) + + def _research_response(self) -> str: + """Mock research agent response.""" + return json.dumps({ + "opportunities": [ + { + "market_id": "test_market_001", + "question": "Will test event occur?", + "direction": "YES", + "current_price": 0.45, + "fair_value_estimate": 0.65, + "confidence": 0.75, + "edge_percentage": 20, + "edge_source": "fundamental_analysis", + "reasoning": "Strong fundamentals suggest YES outcome", + "catalyst": "Upcoming announcement", + "timeframe": "1-2 weeks", + }, + ], + "market_analysis": "Market showing bullish signals", + "risk_factors": ["Market volatility", "Low liquidity"], + }) + + def _risk_response(self) -> str: + """Mock risk agent response.""" + return json.dumps({ + "approved_trades": [ + { + "market_id": "test_market_001", + "approved": True, + "recommended_size_usdc": 25, + "max_size_usdc": 50, + "kelly_fraction": 0.15, + "risk_score": 0.3, + "reasoning": "Acceptable risk-reward ratio", + }, + ], + "portfolio_risk": { + "current_exposure": 0.25, + "max_recommended": 0.5, + "correlation_risk": "low", + }, + "warnings": [], + }) + + def _execution_response(self) -> str: + """Mock execution agent response.""" + return json.dumps({ + "execution_plan": [ + { + "market_id": "test_market_001", + "strategy": "limit_order", + "entry_price": 0.44, + "slippage_estimate": 0.01, + "time_in_force": "GTC", + "split_orders": False, + "urgency": "normal", + }, + ], + "market_conditions": "favorable", + "recommended_timing": "immediate", + }) + + def _reflection_response(self) -> str: + """Mock reflection agent response.""" + return json.dumps({ + "lessons_learned": [ + "Entry timing was optimal", + "Position sizing was appropriate", + ], + "strategy_adjustments": [], + "confidence_calibration": 0.82, + }) + + def _debate_response(self) -> str: + """Mock debate response.""" + return json.dumps({ + "position": "bullish", + "confidence": 0.72, + "arguments": [ + "Strong market momentum", + "Favorable risk-reward", + ], + "counterarguments_addressed": [ + "Volatility risk is manageable", + ], + }) + + +# ============================================================================ +# Fixtures +# ============================================================================ + +@pytest.fixture +def mock_llm_provider(): + """Create mock LLM provider.""" + return MockLLMProvider() + + +@pytest.fixture +def mock_provider_registry(mock_llm_provider): + """Create mock provider registry.""" + registry = MagicMock() + registry.get_for_model.return_value = (mock_llm_provider, "mock-model") + registry.get_active.return_value = mock_llm_provider + return registry + + +@pytest.fixture +def mock_memory_manager(): + """Create mock memory manager.""" + memory = MagicMock() + memory.get_context_for_decision = AsyncMock(return_value="Historical context: Previous trades were profitable") + memory.get_performance_context = AsyncMock(return_value="Win rate: 65%, Sharpe: 1.2") + memory.get_episodes_for_reflection = AsyncMock(return_value=[]) + memory.store_decision = AsyncMock() + memory.update_calibration = AsyncMock() + return memory + + +@pytest.fixture +def agent_config(): + """Create agent configuration.""" + return AgentConfig( + research_agent_model="mock-model", + risk_agent_model="mock-model", + execution_agent_model="mock-model", + reflection_agent_model="mock-model", + enable_multi_agent_debate=True, + debate_rounds=2, + enable_reflection=True, + ) + + +@pytest.fixture +def sample_markets(): + """Create sample market states for testing.""" + return [ + MarketState( + market_id="test_market_001", + condition_id="cond_001", + question="Will test event occur by end of month?", + category="crypto", + yes_price=Decimal("0.45"), + no_price=Decimal("0.55"), + yes_token_id="yes_token_001", + no_token_id="no_token_001", + volume_24h=Decimal("50000"), + liquidity=Decimal("25000"), + end_date=datetime.utcnow() + timedelta(days=30), + yes_book=OrderBook( + token_id="yes_token_001", + bids=[PriceLevel(price=Decimal("0.44"), size=Decimal("1000"))], + asks=[PriceLevel(price=Decimal("0.46"), size=Decimal("1000"))], + ), + no_book=OrderBook( + token_id="no_token_001", + bids=[PriceLevel(price=Decimal("0.54"), size=Decimal("1000"))], + asks=[PriceLevel(price=Decimal("0.56"), size=Decimal("1000"))], + ), + ), + MarketState( + market_id="test_market_002", + condition_id="cond_002", + question="Will secondary event happen?", + category="politics", + yes_price=Decimal("0.60"), + no_price=Decimal("0.40"), + yes_token_id="yes_token_002", + no_token_id="no_token_002", + volume_24h=Decimal("30000"), + liquidity=Decimal("15000"), + end_date=datetime.utcnow() + timedelta(days=14), + yes_book=OrderBook( + token_id="yes_token_002", + bids=[PriceLevel(price=Decimal("0.59"), size=Decimal("500"))], + asks=[PriceLevel(price=Decimal("0.61"), size=Decimal("500"))], + ), + no_book=OrderBook( + token_id="no_token_002", + bids=[PriceLevel(price=Decimal("0.39"), size=Decimal("500"))], + asks=[PriceLevel(price=Decimal("0.41"), size=Decimal("500"))], + ), + ), + ] + + +# ============================================================================ +# Integration Tests: Agent Coordinator +# ============================================================================ + +class TestAgentCoordinatorIntegration: + """Test full agent coordination flow.""" + + @pytest.mark.asyncio + async def test_full_decision_cycle( + self, + mock_provider_registry, + mock_memory_manager, + agent_config, + sample_markets, + ): + """ + Test complete decision cycle: + Research -> Risk -> (Debate) -> Execution -> Final Decision + """ + coordinator = AgentCoordinator( + provider_registry=mock_provider_registry, + memory=mock_memory_manager, + config=agent_config, + ) + + await coordinator.initialize() + + decision = await coordinator.decide( + markets=sample_markets, + portfolio_value=Decimal("10000"), + positions=[], + ) + + # Verify decision structure + assert isinstance(decision, CoordinatorDecision) + assert decision.total_latency_ms >= 0 + assert decision.total_tokens >= 0 + + # Verify all agents participated + assert AgentRole.RESEARCH in decision.agent_responses + assert AgentRole.RISK in decision.agent_responses + assert AgentRole.EXECUTION in decision.agent_responses + + # Verify signals were generated + assert isinstance(decision.signals, list) + + # Verify consensus confidence is reasonable + assert Decimal("0") <= decision.consensus_confidence <= Decimal("1") + + @pytest.mark.asyncio + async def test_decision_with_no_opportunities( + self, + mock_memory_manager, + agent_config, + sample_markets, + ): + """Test decision when research finds no opportunities.""" + # Create provider that returns empty opportunities + provider = MockLLMProvider() + provider._research_response = lambda: json.dumps({ + "opportunities": [], + "market_analysis": "No compelling opportunities found", + }) + + registry = MagicMock() + registry.get_for_model.return_value = (provider, "mock-model") + + coordinator = AgentCoordinator( + provider_registry=registry, + memory=mock_memory_manager, + config=agent_config, + ) + + await coordinator.initialize() + + decision = await coordinator.decide( + markets=sample_markets, + portfolio_value=Decimal("10000"), + positions=[], + ) + + # Should return early with no signals + assert decision.signals == [] + assert decision.consensus_confidence == Decimal("0") + + @pytest.mark.asyncio + async def test_decision_with_existing_positions( + self, + mock_provider_registry, + mock_memory_manager, + agent_config, + sample_markets, + ): + """Test decision when portfolio has existing positions.""" + from src.broker.base import Position + + existing_positions = [ + Position( + market_id="existing_001", + token_id="token_001", + outcome="YES", + size=Decimal("100"), + avg_entry_price=Decimal("0.50"), + current_price=Decimal("0.55"), + unrealized_pnl=Decimal("5"), + ), + ] + + coordinator = AgentCoordinator( + provider_registry=mock_provider_registry, + memory=mock_memory_manager, + config=agent_config, + ) + + await coordinator.initialize() + + decision = await coordinator.decide( + markets=sample_markets, + portfolio_value=Decimal("10000"), + positions=existing_positions, + ) + + # Decision should still work with existing positions + assert isinstance(decision, CoordinatorDecision) + assert AgentRole.RESEARCH in decision.agent_responses + + +# ============================================================================ +# Integration Tests: Debate System +# ============================================================================ + +class TestDebateSystemIntegration: + """Test debate system integration with trading decisions.""" + + @pytest.mark.asyncio + async def test_debate_with_multiple_personas(self, mock_llm_provider, sample_markets): + """Test debate orchestrator with multiple personas.""" + # Create providers dict for all personas + providers = { + DebatePersona.OPTIMIST: mock_llm_provider, + DebatePersona.PESSIMIST: mock_llm_provider, + DebatePersona.FUNDAMENTALIST: mock_llm_provider, + DebatePersona.DEVILS_ADVOCATE: mock_llm_provider, + } + + orchestrator = DebateOrchestrator( + providers=providers, + max_rounds=2, + ) + + # Use correct method signature: debate() not run_debate() + result = await orchestrator.debate( + market=sample_markets[0], + context="Market context for debate", + ) + + assert isinstance(result, DebateResult) + assert result.total_rounds >= 0 # Correct attribute name + + def test_confidence_calibrator_tracking(self): + """Test confidence calibrator tracks predictions correctly.""" + calibrator = ConfidenceCalibrator() + + # Add predictions using correct signature + calibrator.record_prediction( + market_id="market_001", + persona=DebatePersona.OPTIMIST, + predicted_prob=Decimal("0.75"), + confidence=Decimal("0.8"), + timestamp=datetime.utcnow(), + ) + calibrator.record_prediction( + market_id="market_002", + persona=DebatePersona.PESSIMIST, + predicted_prob=Decimal("0.40"), + confidence=Decimal("0.7"), + timestamp=datetime.utcnow(), + ) + + # Record outcomes (bool: True=YES resolved, False=NO resolved) + calibrator.record_outcome("market_001", outcome=True) # YES + calibrator.record_outcome("market_002", outcome=False) # NO + + # Check calibration report + report = calibrator.get_calibration_report() + assert "total_predictions" in report + assert report["total_predictions"] == 2 + + +# ============================================================================ +# Integration Tests: Signal Aggregation +# ============================================================================ + +class TestSignalAggregationIntegration: + """Test signal aggregation with multiple signal sources.""" + + @pytest.mark.asyncio + async def test_signal_aggregation_flow(self, sample_markets): + """Test signal aggregation from multiple sources.""" + # Create aggregator with mock dependencies + aggregator = SignalAggregator( + event_calendar=None, # Will use default + news_provider=None, # Will use default + ) + + market = sample_markets[0] + + # Aggregate signals for market using correct signature + result = await aggregator.aggregate_signals( + market_id=market.market_id, + market_question=market.question, + current_price=market.yes_price, + fetch_news=False, # Don't try to fetch real news in test + check_events=True, + ) + + # Result should have aggregated signal info + assert result is not None + assert isinstance(result, AggregatedSignal) + + +# ============================================================================ +# Integration Tests: Event Calendar +# ============================================================================ + +class TestEventCalendarIntegration: + """Test event calendar integration.""" + + def test_event_calendar_add_and_get(self): + """Test adding and retrieving events.""" + calendar = EventCalendar() + + # Add event with correct signature + event = MarketEvent( + event_type=EventType.EARNINGS, + title="Q4 Earnings Release", + description="Company quarterly earnings announcement", + timestamp=datetime.utcnow() + timedelta(hours=2), + impact=EventImpact.HIGH, + related_markets=["test_market_001"], + related_keywords=["earnings", "revenue"], + ) + calendar.add_event(event) + + # Get upcoming events + upcoming = calendar.get_upcoming_events(hours_ahead=24) + assert len(upcoming) >= 1 + + def test_event_calendar_market_impact(self): + """Test getting events for a market.""" + calendar = EventCalendar() + + # Add high-impact event (using correct EventType enum value) + event = MarketEvent( + event_type=EventType.FED_MEETING, + title="Fed Rate Decision", + description="Federal Reserve interest rate announcement", + timestamp=datetime.utcnow() + timedelta(hours=1), + impact=EventImpact.HIGH, + related_keywords=["fed", "interest", "rate"], + ) + calendar.add_event(event) + + # Get events for a market - should find related event by keyword + events = calendar.get_events_for_market( + market_question="Will the Fed raise rates?" + ) + # Should find the Fed event + assert isinstance(events, list) + assert len(events) >= 1 + + +# ============================================================================ +# Integration Tests: Full Trading Loop +# ============================================================================ + +class TestFullTradingLoopIntegration: + """Test the complete trading loop integration.""" + + @pytest.mark.asyncio + async def test_end_to_end_trading_cycle( + self, + mock_provider_registry, + mock_memory_manager, + agent_config, + sample_markets, + mock_broker, + ): + """ + Test complete trading cycle: + 1. Agent coordination produces signals + 2. Signals are validated + 3. Orders would be placed (mocked) + 4. Position tracking updates + """ + # Step 1: Initialize coordinator + coordinator = AgentCoordinator( + provider_registry=mock_provider_registry, + memory=mock_memory_manager, + config=agent_config, + ) + await coordinator.initialize() + + # Step 2: Get trading decision + decision = await coordinator.decide( + markets=sample_markets, + portfolio_value=Decimal("10000"), + positions=[], + ) + + # Step 3: Validate signals + for signal in decision.signals: + assert signal.market_id in [m.market_id for m in sample_markets] + assert signal.size_recommendation > Decimal("0") + assert signal.confidence.overall >= Decimal("0") + + # Step 4: Execute trades (mocked) + for signal in decision.signals: + if signal.signal_type == SignalType.BUY: + order = await mock_broker.place_order( + market_id=signal.market_id, + token_id=signal.token_id, + side="buy", + size=signal.size_recommendation, + price=signal.target_price, + ) + assert order["status"] == "filled" + + # Step 5: Verify broker state + assert len(mock_broker.orders) == len(decision.signals) + + @pytest.mark.asyncio + async def test_trading_loop_with_risk_rejection( + self, + mock_memory_manager, + agent_config, + sample_markets, + ): + """Test trading loop when risk agent rejects trades.""" + # Provider that returns no approved trades + provider = MockLLMProvider() + provider._risk_response = lambda: json.dumps({ + "approved_trades": [ + { + "market_id": "test_market_001", + "approved": False, + "reason": "Risk too high", + "risk_score": 0.85, + }, + ], + "warnings": ["Portfolio exposure exceeded"], + }) + + registry = MagicMock() + registry.get_for_model.return_value = (provider, "mock-model") + + coordinator = AgentCoordinator( + provider_registry=registry, + memory=mock_memory_manager, + config=agent_config, + ) + await coordinator.initialize() + + decision = await coordinator.decide( + markets=sample_markets, + portfolio_value=Decimal("10000"), + positions=[], + ) + + # No signals should be generated if risk rejects + assert decision.signals == [] + + @pytest.mark.asyncio + async def test_trading_loop_with_debate( + self, + mock_provider_registry, + mock_memory_manager, + sample_markets, + ): + """Test trading loop with debate enabled.""" + config = AgentConfig( + research_agent_model="mock-model", + risk_agent_model="mock-model", + execution_agent_model="mock-model", + reflection_agent_model="mock-model", + enable_multi_agent_debate=True, + debate_rounds=2, + enable_reflection=True, + ) + + coordinator = AgentCoordinator( + provider_registry=mock_provider_registry, + memory=mock_memory_manager, + config=config, + ) + await coordinator.initialize() + + decision = await coordinator.decide( + markets=sample_markets, + portfolio_value=Decimal("10000"), + positions=[], + ) + + # With debate enabled, should track rounds + assert isinstance(decision.debate_rounds, int) + + +# ============================================================================ +# Integration Tests: Error Handling +# ============================================================================ + +class TestErrorHandlingIntegration: + """Test error handling across integrated components.""" + + @pytest.mark.asyncio + async def test_llm_failure_raises_exception( + self, + mock_memory_manager, + agent_config, + sample_markets, + ): + """Test system handles LLM failures by raising exception.""" + # Provider that raises exception + provider = MockLLMProvider() + provider.complete = AsyncMock(side_effect=Exception("LLM API Error")) + + registry = MagicMock() + registry.get_for_model.return_value = (provider, "mock-model") + + coordinator = AgentCoordinator( + provider_registry=registry, + memory=mock_memory_manager, + config=agent_config, + ) + await coordinator.initialize() + + # Should raise exception on LLM failure + with pytest.raises(Exception): + await coordinator.decide( + markets=sample_markets, + portfolio_value=Decimal("10000"), + positions=[], + ) + + +# ============================================================================ +# Integration Tests: Performance +# ============================================================================ + +class TestPerformanceIntegration: + """Test performance-related integration scenarios.""" + + @pytest.mark.asyncio + async def test_decision_latency_tracking( + self, + mock_provider_registry, + mock_memory_manager, + agent_config, + sample_markets, + ): + """Test that latency is properly tracked.""" + coordinator = AgentCoordinator( + provider_registry=mock_provider_registry, + memory=mock_memory_manager, + config=agent_config, + ) + await coordinator.initialize() + + decision = await coordinator.decide( + markets=sample_markets, + portfolio_value=Decimal("10000"), + positions=[], + ) + + # Latency should be tracked + assert decision.total_latency_ms >= 0 + + @pytest.mark.asyncio + async def test_token_usage_tracking( + self, + mock_provider_registry, + mock_memory_manager, + agent_config, + sample_markets, + ): + """Test that token usage is properly tracked.""" + coordinator = AgentCoordinator( + provider_registry=mock_provider_registry, + memory=mock_memory_manager, + config=agent_config, + ) + await coordinator.initialize() + + decision = await coordinator.decide( + markets=sample_markets, + portfolio_value=Decimal("10000"), + positions=[], + ) + + # Token usage should be tracked + assert decision.total_tokens >= 0