""" Integration Tests for Alpha Arena Trading System. These tests verify that all components work together correctly: - Multi-agent coordination (Research, Risk, Execution, Reflection) - Debate system integration - Signal aggregation - Full trading loop """ import asyncio import json import pytest from datetime import datetime, timedelta from decimal import Decimal from typing import Any from unittest.mock import AsyncMock, MagicMock, patch from src.core.types import ( MarketState, OrderBook, PriceLevel, Signal, SignalType, Edge, Confidence ) from src.core.config import AgentConfig from src.agents.coordinator import AgentCoordinator, CoordinatorDecision from src.agents.base import AgentRole, AgentResponse from src.agents.debate import ( DebateOrchestrator, DebatePersona, DebatePosition, DebateResult, ConfidenceCalibrator ) from src.signals.aggregator import SignalAggregator, SignalSource, AggregatedSignal from src.signals.events import EventCalendar, MarketEvent, EventType, EventImpact pytestmark = [pytest.mark.integration] # ============================================================================ # Mock LLM Provider # ============================================================================ class MockLLMResponse: """Mock LLM response object matching LLMResponse interface.""" def __init__(self, content: str, tokens_used: int = 100): self.content = content self.model = "mock-model" self.provider = "mock" self.tokens_input = tokens_used // 2 self.tokens_output = tokens_used // 2 self.tokens_used = tokens_used self.latency_ms = 50 self.cost_estimate = Decimal("0.001") self.finish_reason = "stop" self.timestamp = datetime.utcnow() self.metadata = {} @property def total_tokens(self) -> int: return self.tokens_input + self.tokens_output class MockLLMProvider: """Mock LLM provider that returns structured responses for agents.""" def __init__(self, response_map: dict[str, str] | None = None): self.response_map = response_map or {} self.call_history: list[dict] = [] self.default_model = "mock-model" self.call_count = 0 async def complete( self, messages: list | None = None, system: str | None = None, json_mode: bool = False, **kwargs ) -> MockLLMResponse: """Return mock response based on system prompt.""" self.call_count += 1 # Extract prompt from messages if provided prompt = "" if messages: prompt = str(messages[0].content if hasattr(messages[0], 'content') else messages[0])[:200] self.call_history.append({ "prompt": prompt, "system": system[:100] if system else None, }) # Determine response based on system prompt content = self._get_response_content(system) return MockLLMResponse(content=content, tokens_used=100) def _get_response_content(self, system_prompt: str | None) -> str: """Get response content based on system prompt.""" if system_prompt: system_lower = system_prompt.lower() if "research" in system_lower: return self._research_response() elif "risk" in system_lower: return self._risk_response() elif "execution" in system_lower: return self._execution_response() elif "reflection" in system_lower: return self._reflection_response() elif "debate" in system_lower: return self._debate_response() # Default response return json.dumps({"response": "mock response"}) def _research_response(self) -> str: """Mock research agent response.""" return json.dumps({ "opportunities": [ { "market_id": "test_market_001", "question": "Will test event occur?", "direction": "YES", "current_price": 0.45, "fair_value_estimate": 0.65, "confidence": 0.75, "edge_percentage": 20, "edge_source": "fundamental_analysis", "reasoning": "Strong fundamentals suggest YES outcome", "catalyst": "Upcoming announcement", "timeframe": "1-2 weeks", }, ], "market_analysis": "Market showing bullish signals", "risk_factors": ["Market volatility", "Low liquidity"], }) def _risk_response(self) -> str: """Mock risk agent response.""" return json.dumps({ "approved_trades": [ { "market_id": "test_market_001", "approved": True, "recommended_size_usdc": 25, "max_size_usdc": 50, "kelly_fraction": 0.15, "risk_score": 0.3, "reasoning": "Acceptable risk-reward ratio", }, ], "portfolio_risk": { "current_exposure": 0.25, "max_recommended": 0.5, "correlation_risk": "low", }, "warnings": [], }) def _execution_response(self) -> str: """Mock execution agent response.""" return json.dumps({ "execution_plan": [ { "market_id": "test_market_001", "strategy": "limit_order", "entry_price": 0.44, "slippage_estimate": 0.01, "time_in_force": "GTC", "split_orders": False, "urgency": "normal", }, ], "market_conditions": "favorable", "recommended_timing": "immediate", }) def _reflection_response(self) -> str: """Mock reflection agent response.""" return json.dumps({ "lessons_learned": [ "Entry timing was optimal", "Position sizing was appropriate", ], "strategy_adjustments": [], "confidence_calibration": 0.82, }) def _debate_response(self) -> str: """Mock debate response.""" return json.dumps({ "position": "bullish", "confidence": 0.72, "arguments": [ "Strong market momentum", "Favorable risk-reward", ], "counterarguments_addressed": [ "Volatility risk is manageable", ], }) # ============================================================================ # Fixtures # ============================================================================ @pytest.fixture def mock_llm_provider(): """Create mock LLM provider.""" return MockLLMProvider() @pytest.fixture def mock_provider_registry(mock_llm_provider): """Create mock provider registry.""" registry = MagicMock() registry.get_for_model.return_value = (mock_llm_provider, "mock-model") registry.get_active.return_value = mock_llm_provider return registry @pytest.fixture def mock_memory_manager(): """Create mock memory manager.""" memory = MagicMock() memory.get_context_for_decision = AsyncMock(return_value="Historical context: Previous trades were profitable") memory.get_performance_context = AsyncMock(return_value="Win rate: 65%, Sharpe: 1.2") memory.get_episodes_for_reflection = AsyncMock(return_value=[]) memory.store_decision = AsyncMock() memory.update_calibration = AsyncMock() return memory @pytest.fixture def agent_config(): """Create agent configuration.""" return AgentConfig( research_agent_model="mock-model", risk_agent_model="mock-model", execution_agent_model="mock-model", reflection_agent_model="mock-model", enable_multi_agent_debate=True, debate_rounds=2, enable_reflection=True, ) @pytest.fixture def sample_markets(): """Create sample market states for testing.""" return [ MarketState( market_id="test_market_001", condition_id="cond_001", question="Will test event occur by end of month?", category="crypto", yes_price=Decimal("0.45"), no_price=Decimal("0.55"), yes_token_id="yes_token_001", no_token_id="no_token_001", volume_24h=Decimal("50000"), liquidity=Decimal("25000"), end_date=datetime.utcnow() + timedelta(days=30), yes_book=OrderBook( token_id="yes_token_001", bids=[PriceLevel(price=Decimal("0.44"), size=Decimal("1000"))], asks=[PriceLevel(price=Decimal("0.46"), size=Decimal("1000"))], ), no_book=OrderBook( token_id="no_token_001", bids=[PriceLevel(price=Decimal("0.54"), size=Decimal("1000"))], asks=[PriceLevel(price=Decimal("0.56"), size=Decimal("1000"))], ), ), MarketState( market_id="test_market_002", condition_id="cond_002", question="Will secondary event happen?", category="politics", yes_price=Decimal("0.60"), no_price=Decimal("0.40"), yes_token_id="yes_token_002", no_token_id="no_token_002", volume_24h=Decimal("30000"), liquidity=Decimal("15000"), end_date=datetime.utcnow() + timedelta(days=14), yes_book=OrderBook( token_id="yes_token_002", bids=[PriceLevel(price=Decimal("0.59"), size=Decimal("500"))], asks=[PriceLevel(price=Decimal("0.61"), size=Decimal("500"))], ), no_book=OrderBook( token_id="no_token_002", bids=[PriceLevel(price=Decimal("0.39"), size=Decimal("500"))], asks=[PriceLevel(price=Decimal("0.41"), size=Decimal("500"))], ), ), ] # ============================================================================ # Integration Tests: Agent Coordinator # ============================================================================ class TestAgentCoordinatorIntegration: """Test full agent coordination flow.""" @pytest.mark.asyncio async def test_full_decision_cycle( self, mock_provider_registry, mock_memory_manager, agent_config, sample_markets, ): """ Test complete decision cycle: Research -> Risk -> (Debate) -> Execution -> Final Decision """ coordinator = AgentCoordinator( provider_registry=mock_provider_registry, memory=mock_memory_manager, config=agent_config, ) await coordinator.initialize() decision = await coordinator.decide( markets=sample_markets, portfolio_value=Decimal("10000"), positions=[], ) # Verify decision structure assert isinstance(decision, CoordinatorDecision) assert decision.total_latency_ms >= 0 assert decision.total_tokens >= 0 # Verify all agents participated assert AgentRole.RESEARCH in decision.agent_responses assert AgentRole.RISK in decision.agent_responses assert AgentRole.EXECUTION in decision.agent_responses # Verify signals were generated assert isinstance(decision.signals, list) # Verify consensus confidence is reasonable assert Decimal("0") <= decision.consensus_confidence <= Decimal("1") @pytest.mark.asyncio async def test_decision_with_no_opportunities( self, mock_memory_manager, agent_config, sample_markets, ): """Test decision when research finds no opportunities.""" # Create provider that returns empty opportunities provider = MockLLMProvider() provider._research_response = lambda: json.dumps({ "opportunities": [], "market_analysis": "No compelling opportunities found", }) registry = MagicMock() registry.get_for_model.return_value = (provider, "mock-model") coordinator = AgentCoordinator( provider_registry=registry, memory=mock_memory_manager, config=agent_config, ) await coordinator.initialize() decision = await coordinator.decide( markets=sample_markets, portfolio_value=Decimal("10000"), positions=[], ) # Should return early with no signals assert decision.signals == [] assert decision.consensus_confidence == Decimal("0") @pytest.mark.asyncio async def test_decision_with_existing_positions( self, mock_provider_registry, mock_memory_manager, agent_config, sample_markets, ): """Test decision when portfolio has existing positions.""" from src.broker.base import Position existing_positions = [ Position( market_id="existing_001", token_id="token_001", outcome="YES", size=Decimal("100"), avg_entry_price=Decimal("0.50"), current_price=Decimal("0.55"), unrealized_pnl=Decimal("5"), ), ] coordinator = AgentCoordinator( provider_registry=mock_provider_registry, memory=mock_memory_manager, config=agent_config, ) await coordinator.initialize() decision = await coordinator.decide( markets=sample_markets, portfolio_value=Decimal("10000"), positions=existing_positions, ) # Decision should still work with existing positions assert isinstance(decision, CoordinatorDecision) assert AgentRole.RESEARCH in decision.agent_responses # ============================================================================ # Integration Tests: Debate System # ============================================================================ class TestDebateSystemIntegration: """Test debate system integration with trading decisions.""" @pytest.mark.asyncio async def test_debate_with_multiple_personas(self, mock_llm_provider, sample_markets): """Test debate orchestrator with multiple personas.""" # Create providers dict for all personas providers = { DebatePersona.OPTIMIST: mock_llm_provider, DebatePersona.PESSIMIST: mock_llm_provider, DebatePersona.FUNDAMENTALIST: mock_llm_provider, DebatePersona.DEVILS_ADVOCATE: mock_llm_provider, } orchestrator = DebateOrchestrator( providers=providers, max_rounds=2, ) # Use correct method signature: debate() not run_debate() result = await orchestrator.debate( market=sample_markets[0], context="Market context for debate", ) assert isinstance(result, DebateResult) assert result.total_rounds >= 0 # Correct attribute name def test_confidence_calibrator_tracking(self): """Test confidence calibrator tracks predictions correctly.""" calibrator = ConfidenceCalibrator() # Add predictions using correct signature calibrator.record_prediction( market_id="market_001", persona=DebatePersona.OPTIMIST, predicted_prob=Decimal("0.75"), confidence=Decimal("0.8"), timestamp=datetime.utcnow(), ) calibrator.record_prediction( market_id="market_002", persona=DebatePersona.PESSIMIST, predicted_prob=Decimal("0.40"), confidence=Decimal("0.7"), timestamp=datetime.utcnow(), ) # Record outcomes (bool: True=YES resolved, False=NO resolved) calibrator.record_outcome("market_001", outcome=True) # YES calibrator.record_outcome("market_002", outcome=False) # NO # Check calibration report report = calibrator.get_calibration_report() assert "total_predictions" in report assert report["total_predictions"] == 2 # ============================================================================ # Integration Tests: Signal Aggregation # ============================================================================ class TestSignalAggregationIntegration: """Test signal aggregation with multiple signal sources.""" @pytest.mark.asyncio async def test_signal_aggregation_flow(self, sample_markets): """Test signal aggregation from multiple sources.""" # Create aggregator with mock dependencies aggregator = SignalAggregator( event_calendar=None, # Will use default news_provider=None, # Will use default ) market = sample_markets[0] # Aggregate signals for market using correct signature result = await aggregator.aggregate_signals( market_id=market.market_id, market_question=market.question, current_price=market.yes_price, fetch_news=False, # Don't try to fetch real news in test check_events=True, ) # Result should have aggregated signal info assert result is not None assert isinstance(result, AggregatedSignal) # ============================================================================ # Integration Tests: Event Calendar # ============================================================================ class TestEventCalendarIntegration: """Test event calendar integration.""" def test_event_calendar_add_and_get(self): """Test adding and retrieving events.""" calendar = EventCalendar() # Add event with correct signature event = MarketEvent( event_type=EventType.EARNINGS, title="Q4 Earnings Release", description="Company quarterly earnings announcement", timestamp=datetime.utcnow() + timedelta(hours=2), impact=EventImpact.HIGH, related_markets=["test_market_001"], related_keywords=["earnings", "revenue"], ) calendar.add_event(event) # Get upcoming events upcoming = calendar.get_upcoming_events(hours_ahead=24) assert len(upcoming) >= 1 def test_event_calendar_market_impact(self): """Test getting events for a market.""" calendar = EventCalendar() # Add high-impact event (using correct EventType enum value) event = MarketEvent( event_type=EventType.FED_MEETING, title="Fed Rate Decision", description="Federal Reserve interest rate announcement", timestamp=datetime.utcnow() + timedelta(hours=1), impact=EventImpact.HIGH, related_keywords=["fed", "interest", "rate"], ) calendar.add_event(event) # Get events for a market - should find related event by keyword events = calendar.get_events_for_market( market_question="Will the Fed raise rates?" ) # Should find the Fed event assert isinstance(events, list) assert len(events) >= 1 # ============================================================================ # Integration Tests: Full Trading Loop # ============================================================================ class TestFullTradingLoopIntegration: """Test the complete trading loop integration.""" @pytest.mark.asyncio async def test_end_to_end_trading_cycle( self, mock_provider_registry, mock_memory_manager, agent_config, sample_markets, mock_broker, ): """ Test complete trading cycle: 1. Agent coordination produces signals 2. Signals are validated 3. Orders would be placed (mocked) 4. Position tracking updates """ # Step 1: Initialize coordinator coordinator = AgentCoordinator( provider_registry=mock_provider_registry, memory=mock_memory_manager, config=agent_config, ) await coordinator.initialize() # Step 2: Get trading decision decision = await coordinator.decide( markets=sample_markets, portfolio_value=Decimal("10000"), positions=[], ) # Step 3: Validate signals for signal in decision.signals: assert signal.market_id in [m.market_id for m in sample_markets] assert signal.size_recommendation > Decimal("0") assert signal.confidence.overall >= Decimal("0") # Step 4: Execute trades (mocked) for signal in decision.signals: if signal.signal_type == SignalType.BUY: order = await mock_broker.place_order( market_id=signal.market_id, token_id=signal.token_id, side="buy", size=signal.size_recommendation, price=signal.target_price, ) assert order["status"] == "filled" # Step 5: Verify broker state assert len(mock_broker.orders) == len(decision.signals) @pytest.mark.asyncio async def test_trading_loop_with_risk_rejection( self, mock_memory_manager, agent_config, sample_markets, ): """Test trading loop when risk agent rejects trades.""" # Provider that returns no approved trades provider = MockLLMProvider() provider._risk_response = lambda: json.dumps({ "approved_trades": [ { "market_id": "test_market_001", "approved": False, "reason": "Risk too high", "risk_score": 0.85, }, ], "warnings": ["Portfolio exposure exceeded"], }) registry = MagicMock() registry.get_for_model.return_value = (provider, "mock-model") coordinator = AgentCoordinator( provider_registry=registry, memory=mock_memory_manager, config=agent_config, ) await coordinator.initialize() decision = await coordinator.decide( markets=sample_markets, portfolio_value=Decimal("10000"), positions=[], ) # No signals should be generated if risk rejects assert decision.signals == [] @pytest.mark.asyncio async def test_trading_loop_with_debate( self, mock_provider_registry, mock_memory_manager, sample_markets, ): """Test trading loop with debate enabled.""" config = AgentConfig( research_agent_model="mock-model", risk_agent_model="mock-model", execution_agent_model="mock-model", reflection_agent_model="mock-model", enable_multi_agent_debate=True, debate_rounds=2, enable_reflection=True, ) coordinator = AgentCoordinator( provider_registry=mock_provider_registry, memory=mock_memory_manager, config=config, ) await coordinator.initialize() decision = await coordinator.decide( markets=sample_markets, portfolio_value=Decimal("10000"), positions=[], ) # With debate enabled, should track rounds assert isinstance(decision.debate_rounds, int) # ============================================================================ # Integration Tests: Error Handling # ============================================================================ class TestErrorHandlingIntegration: """Test error handling across integrated components.""" @pytest.mark.asyncio async def test_llm_failure_raises_exception( self, mock_memory_manager, agent_config, sample_markets, ): """Test system handles LLM failures by raising exception.""" # Provider that raises exception provider = MockLLMProvider() provider.complete = AsyncMock(side_effect=Exception("LLM API Error")) registry = MagicMock() registry.get_for_model.return_value = (provider, "mock-model") coordinator = AgentCoordinator( provider_registry=registry, memory=mock_memory_manager, config=agent_config, ) await coordinator.initialize() # Should raise exception on LLM failure with pytest.raises(Exception): await coordinator.decide( markets=sample_markets, portfolio_value=Decimal("10000"), positions=[], ) # ============================================================================ # Integration Tests: Performance # ============================================================================ class TestPerformanceIntegration: """Test performance-related integration scenarios.""" @pytest.mark.asyncio async def test_decision_latency_tracking( self, mock_provider_registry, mock_memory_manager, agent_config, sample_markets, ): """Test that latency is properly tracked.""" coordinator = AgentCoordinator( provider_registry=mock_provider_registry, memory=mock_memory_manager, config=agent_config, ) await coordinator.initialize() decision = await coordinator.decide( markets=sample_markets, portfolio_value=Decimal("10000"), positions=[], ) # Latency should be tracked assert decision.total_latency_ms >= 0 @pytest.mark.asyncio async def test_token_usage_tracking( self, mock_provider_registry, mock_memory_manager, agent_config, sample_markets, ): """Test that token usage is properly tracked.""" coordinator = AgentCoordinator( provider_registry=mock_provider_registry, memory=mock_memory_manager, config=agent_config, ) await coordinator.initialize() decision = await coordinator.decide( markets=sample_markets, portfolio_value=Decimal("10000"), positions=[], ) # Token usage should be tracked assert decision.total_tokens >= 0