test: add comprehensive integration tests for trading system

Add 14 integration tests covering:
- Full agent coordination flow (Research -> Risk -> Debate -> Execution)
- Multi-agent debate system with multiple personas
- Confidence calibration tracking
- Signal aggregation from multiple sources
- Event calendar integration
- End-to-end trading cycle with mocked broker
- Risk rejection scenarios
- Error handling with LLM failures
- Performance and latency tracking

All tests use MockLLMProvider matching the real LLMResponse interface.
Total test count: 328 passed, 2 skipped.
This commit is contained in:
oho 2026-01-13 14:24:15 +01:00
parent 78f9ed7c0c
commit b627b4319a

797
tests/test_integration.py Normal file
View file

@ -0,0 +1,797 @@
"""
Integration Tests for Alpha Arena Trading System.
These tests verify that all components work together correctly:
- Multi-agent coordination (Research, Risk, Execution, Reflection)
- Debate system integration
- Signal aggregation
- Full trading loop
"""
import asyncio
import json
import pytest
from datetime import datetime, timedelta
from decimal import Decimal
from typing import Any
from unittest.mock import AsyncMock, MagicMock, patch
from src.core.types import (
MarketState, OrderBook, PriceLevel, Signal, SignalType,
Edge, Confidence
)
from src.core.config import AgentConfig
from src.agents.coordinator import AgentCoordinator, CoordinatorDecision
from src.agents.base import AgentRole, AgentResponse
from src.agents.debate import (
DebateOrchestrator, DebatePersona, DebatePosition,
DebateResult, ConfidenceCalibrator
)
from src.signals.aggregator import SignalAggregator, SignalSource, AggregatedSignal
from src.signals.events import EventCalendar, MarketEvent, EventType, EventImpact
pytestmark = [pytest.mark.integration]
# ============================================================================
# Mock LLM Provider
# ============================================================================
class MockLLMResponse:
"""Mock LLM response object matching LLMResponse interface."""
def __init__(self, content: str, tokens_used: int = 100):
self.content = content
self.model = "mock-model"
self.provider = "mock"
self.tokens_input = tokens_used // 2
self.tokens_output = tokens_used // 2
self.tokens_used = tokens_used
self.latency_ms = 50
self.cost_estimate = Decimal("0.001")
self.finish_reason = "stop"
self.timestamp = datetime.utcnow()
self.metadata = {}
@property
def total_tokens(self) -> int:
return self.tokens_input + self.tokens_output
class MockLLMProvider:
"""Mock LLM provider that returns structured responses for agents."""
def __init__(self, response_map: dict[str, str] | None = None):
self.response_map = response_map or {}
self.call_history: list[dict] = []
self.default_model = "mock-model"
self.call_count = 0
async def complete(
self,
messages: list | None = None,
system: str | None = None,
json_mode: bool = False,
**kwargs
) -> MockLLMResponse:
"""Return mock response based on system prompt."""
self.call_count += 1
# Extract prompt from messages if provided
prompt = ""
if messages:
prompt = str(messages[0].content if hasattr(messages[0], 'content') else messages[0])[:200]
self.call_history.append({
"prompt": prompt,
"system": system[:100] if system else None,
})
# Determine response based on system prompt
content = self._get_response_content(system)
return MockLLMResponse(content=content, tokens_used=100)
def _get_response_content(self, system_prompt: str | None) -> str:
"""Get response content based on system prompt."""
if system_prompt:
system_lower = system_prompt.lower()
if "research" in system_lower:
return self._research_response()
elif "risk" in system_lower:
return self._risk_response()
elif "execution" in system_lower:
return self._execution_response()
elif "reflection" in system_lower:
return self._reflection_response()
elif "debate" in system_lower:
return self._debate_response()
# Default response
return json.dumps({"response": "mock response"})
def _research_response(self) -> str:
"""Mock research agent response."""
return json.dumps({
"opportunities": [
{
"market_id": "test_market_001",
"question": "Will test event occur?",
"direction": "YES",
"current_price": 0.45,
"fair_value_estimate": 0.65,
"confidence": 0.75,
"edge_percentage": 20,
"edge_source": "fundamental_analysis",
"reasoning": "Strong fundamentals suggest YES outcome",
"catalyst": "Upcoming announcement",
"timeframe": "1-2 weeks",
},
],
"market_analysis": "Market showing bullish signals",
"risk_factors": ["Market volatility", "Low liquidity"],
})
def _risk_response(self) -> str:
"""Mock risk agent response."""
return json.dumps({
"approved_trades": [
{
"market_id": "test_market_001",
"approved": True,
"recommended_size_usdc": 25,
"max_size_usdc": 50,
"kelly_fraction": 0.15,
"risk_score": 0.3,
"reasoning": "Acceptable risk-reward ratio",
},
],
"portfolio_risk": {
"current_exposure": 0.25,
"max_recommended": 0.5,
"correlation_risk": "low",
},
"warnings": [],
})
def _execution_response(self) -> str:
"""Mock execution agent response."""
return json.dumps({
"execution_plan": [
{
"market_id": "test_market_001",
"strategy": "limit_order",
"entry_price": 0.44,
"slippage_estimate": 0.01,
"time_in_force": "GTC",
"split_orders": False,
"urgency": "normal",
},
],
"market_conditions": "favorable",
"recommended_timing": "immediate",
})
def _reflection_response(self) -> str:
"""Mock reflection agent response."""
return json.dumps({
"lessons_learned": [
"Entry timing was optimal",
"Position sizing was appropriate",
],
"strategy_adjustments": [],
"confidence_calibration": 0.82,
})
def _debate_response(self) -> str:
"""Mock debate response."""
return json.dumps({
"position": "bullish",
"confidence": 0.72,
"arguments": [
"Strong market momentum",
"Favorable risk-reward",
],
"counterarguments_addressed": [
"Volatility risk is manageable",
],
})
# ============================================================================
# Fixtures
# ============================================================================
@pytest.fixture
def mock_llm_provider():
"""Create mock LLM provider."""
return MockLLMProvider()
@pytest.fixture
def mock_provider_registry(mock_llm_provider):
"""Create mock provider registry."""
registry = MagicMock()
registry.get_for_model.return_value = (mock_llm_provider, "mock-model")
registry.get_active.return_value = mock_llm_provider
return registry
@pytest.fixture
def mock_memory_manager():
"""Create mock memory manager."""
memory = MagicMock()
memory.get_context_for_decision = AsyncMock(return_value="Historical context: Previous trades were profitable")
memory.get_performance_context = AsyncMock(return_value="Win rate: 65%, Sharpe: 1.2")
memory.get_episodes_for_reflection = AsyncMock(return_value=[])
memory.store_decision = AsyncMock()
memory.update_calibration = AsyncMock()
return memory
@pytest.fixture
def agent_config():
"""Create agent configuration."""
return AgentConfig(
research_agent_model="mock-model",
risk_agent_model="mock-model",
execution_agent_model="mock-model",
reflection_agent_model="mock-model",
enable_multi_agent_debate=True,
debate_rounds=2,
enable_reflection=True,
)
@pytest.fixture
def sample_markets():
"""Create sample market states for testing."""
return [
MarketState(
market_id="test_market_001",
condition_id="cond_001",
question="Will test event occur by end of month?",
category="crypto",
yes_price=Decimal("0.45"),
no_price=Decimal("0.55"),
yes_token_id="yes_token_001",
no_token_id="no_token_001",
volume_24h=Decimal("50000"),
liquidity=Decimal("25000"),
end_date=datetime.utcnow() + timedelta(days=30),
yes_book=OrderBook(
token_id="yes_token_001",
bids=[PriceLevel(price=Decimal("0.44"), size=Decimal("1000"))],
asks=[PriceLevel(price=Decimal("0.46"), size=Decimal("1000"))],
),
no_book=OrderBook(
token_id="no_token_001",
bids=[PriceLevel(price=Decimal("0.54"), size=Decimal("1000"))],
asks=[PriceLevel(price=Decimal("0.56"), size=Decimal("1000"))],
),
),
MarketState(
market_id="test_market_002",
condition_id="cond_002",
question="Will secondary event happen?",
category="politics",
yes_price=Decimal("0.60"),
no_price=Decimal("0.40"),
yes_token_id="yes_token_002",
no_token_id="no_token_002",
volume_24h=Decimal("30000"),
liquidity=Decimal("15000"),
end_date=datetime.utcnow() + timedelta(days=14),
yes_book=OrderBook(
token_id="yes_token_002",
bids=[PriceLevel(price=Decimal("0.59"), size=Decimal("500"))],
asks=[PriceLevel(price=Decimal("0.61"), size=Decimal("500"))],
),
no_book=OrderBook(
token_id="no_token_002",
bids=[PriceLevel(price=Decimal("0.39"), size=Decimal("500"))],
asks=[PriceLevel(price=Decimal("0.41"), size=Decimal("500"))],
),
),
]
# ============================================================================
# Integration Tests: Agent Coordinator
# ============================================================================
class TestAgentCoordinatorIntegration:
"""Test full agent coordination flow."""
@pytest.mark.asyncio
async def test_full_decision_cycle(
self,
mock_provider_registry,
mock_memory_manager,
agent_config,
sample_markets,
):
"""
Test complete decision cycle:
Research -> Risk -> (Debate) -> Execution -> Final Decision
"""
coordinator = AgentCoordinator(
provider_registry=mock_provider_registry,
memory=mock_memory_manager,
config=agent_config,
)
await coordinator.initialize()
decision = await coordinator.decide(
markets=sample_markets,
portfolio_value=Decimal("10000"),
positions=[],
)
# Verify decision structure
assert isinstance(decision, CoordinatorDecision)
assert decision.total_latency_ms >= 0
assert decision.total_tokens >= 0
# Verify all agents participated
assert AgentRole.RESEARCH in decision.agent_responses
assert AgentRole.RISK in decision.agent_responses
assert AgentRole.EXECUTION in decision.agent_responses
# Verify signals were generated
assert isinstance(decision.signals, list)
# Verify consensus confidence is reasonable
assert Decimal("0") <= decision.consensus_confidence <= Decimal("1")
@pytest.mark.asyncio
async def test_decision_with_no_opportunities(
self,
mock_memory_manager,
agent_config,
sample_markets,
):
"""Test decision when research finds no opportunities."""
# Create provider that returns empty opportunities
provider = MockLLMProvider()
provider._research_response = lambda: json.dumps({
"opportunities": [],
"market_analysis": "No compelling opportunities found",
})
registry = MagicMock()
registry.get_for_model.return_value = (provider, "mock-model")
coordinator = AgentCoordinator(
provider_registry=registry,
memory=mock_memory_manager,
config=agent_config,
)
await coordinator.initialize()
decision = await coordinator.decide(
markets=sample_markets,
portfolio_value=Decimal("10000"),
positions=[],
)
# Should return early with no signals
assert decision.signals == []
assert decision.consensus_confidence == Decimal("0")
@pytest.mark.asyncio
async def test_decision_with_existing_positions(
self,
mock_provider_registry,
mock_memory_manager,
agent_config,
sample_markets,
):
"""Test decision when portfolio has existing positions."""
from src.broker.base import Position
existing_positions = [
Position(
market_id="existing_001",
token_id="token_001",
outcome="YES",
size=Decimal("100"),
avg_entry_price=Decimal("0.50"),
current_price=Decimal("0.55"),
unrealized_pnl=Decimal("5"),
),
]
coordinator = AgentCoordinator(
provider_registry=mock_provider_registry,
memory=mock_memory_manager,
config=agent_config,
)
await coordinator.initialize()
decision = await coordinator.decide(
markets=sample_markets,
portfolio_value=Decimal("10000"),
positions=existing_positions,
)
# Decision should still work with existing positions
assert isinstance(decision, CoordinatorDecision)
assert AgentRole.RESEARCH in decision.agent_responses
# ============================================================================
# Integration Tests: Debate System
# ============================================================================
class TestDebateSystemIntegration:
"""Test debate system integration with trading decisions."""
@pytest.mark.asyncio
async def test_debate_with_multiple_personas(self, mock_llm_provider, sample_markets):
"""Test debate orchestrator with multiple personas."""
# Create providers dict for all personas
providers = {
DebatePersona.OPTIMIST: mock_llm_provider,
DebatePersona.PESSIMIST: mock_llm_provider,
DebatePersona.FUNDAMENTALIST: mock_llm_provider,
DebatePersona.DEVILS_ADVOCATE: mock_llm_provider,
}
orchestrator = DebateOrchestrator(
providers=providers,
max_rounds=2,
)
# Use correct method signature: debate() not run_debate()
result = await orchestrator.debate(
market=sample_markets[0],
context="Market context for debate",
)
assert isinstance(result, DebateResult)
assert result.total_rounds >= 0 # Correct attribute name
def test_confidence_calibrator_tracking(self):
"""Test confidence calibrator tracks predictions correctly."""
calibrator = ConfidenceCalibrator()
# Add predictions using correct signature
calibrator.record_prediction(
market_id="market_001",
persona=DebatePersona.OPTIMIST,
predicted_prob=Decimal("0.75"),
confidence=Decimal("0.8"),
timestamp=datetime.utcnow(),
)
calibrator.record_prediction(
market_id="market_002",
persona=DebatePersona.PESSIMIST,
predicted_prob=Decimal("0.40"),
confidence=Decimal("0.7"),
timestamp=datetime.utcnow(),
)
# Record outcomes (bool: True=YES resolved, False=NO resolved)
calibrator.record_outcome("market_001", outcome=True) # YES
calibrator.record_outcome("market_002", outcome=False) # NO
# Check calibration report
report = calibrator.get_calibration_report()
assert "total_predictions" in report
assert report["total_predictions"] == 2
# ============================================================================
# Integration Tests: Signal Aggregation
# ============================================================================
class TestSignalAggregationIntegration:
"""Test signal aggregation with multiple signal sources."""
@pytest.mark.asyncio
async def test_signal_aggregation_flow(self, sample_markets):
"""Test signal aggregation from multiple sources."""
# Create aggregator with mock dependencies
aggregator = SignalAggregator(
event_calendar=None, # Will use default
news_provider=None, # Will use default
)
market = sample_markets[0]
# Aggregate signals for market using correct signature
result = await aggregator.aggregate_signals(
market_id=market.market_id,
market_question=market.question,
current_price=market.yes_price,
fetch_news=False, # Don't try to fetch real news in test
check_events=True,
)
# Result should have aggregated signal info
assert result is not None
assert isinstance(result, AggregatedSignal)
# ============================================================================
# Integration Tests: Event Calendar
# ============================================================================
class TestEventCalendarIntegration:
"""Test event calendar integration."""
def test_event_calendar_add_and_get(self):
"""Test adding and retrieving events."""
calendar = EventCalendar()
# Add event with correct signature
event = MarketEvent(
event_type=EventType.EARNINGS,
title="Q4 Earnings Release",
description="Company quarterly earnings announcement",
timestamp=datetime.utcnow() + timedelta(hours=2),
impact=EventImpact.HIGH,
related_markets=["test_market_001"],
related_keywords=["earnings", "revenue"],
)
calendar.add_event(event)
# Get upcoming events
upcoming = calendar.get_upcoming_events(hours_ahead=24)
assert len(upcoming) >= 1
def test_event_calendar_market_impact(self):
"""Test getting events for a market."""
calendar = EventCalendar()
# Add high-impact event (using correct EventType enum value)
event = MarketEvent(
event_type=EventType.FED_MEETING,
title="Fed Rate Decision",
description="Federal Reserve interest rate announcement",
timestamp=datetime.utcnow() + timedelta(hours=1),
impact=EventImpact.HIGH,
related_keywords=["fed", "interest", "rate"],
)
calendar.add_event(event)
# Get events for a market - should find related event by keyword
events = calendar.get_events_for_market(
market_question="Will the Fed raise rates?"
)
# Should find the Fed event
assert isinstance(events, list)
assert len(events) >= 1
# ============================================================================
# Integration Tests: Full Trading Loop
# ============================================================================
class TestFullTradingLoopIntegration:
"""Test the complete trading loop integration."""
@pytest.mark.asyncio
async def test_end_to_end_trading_cycle(
self,
mock_provider_registry,
mock_memory_manager,
agent_config,
sample_markets,
mock_broker,
):
"""
Test complete trading cycle:
1. Agent coordination produces signals
2. Signals are validated
3. Orders would be placed (mocked)
4. Position tracking updates
"""
# Step 1: Initialize coordinator
coordinator = AgentCoordinator(
provider_registry=mock_provider_registry,
memory=mock_memory_manager,
config=agent_config,
)
await coordinator.initialize()
# Step 2: Get trading decision
decision = await coordinator.decide(
markets=sample_markets,
portfolio_value=Decimal("10000"),
positions=[],
)
# Step 3: Validate signals
for signal in decision.signals:
assert signal.market_id in [m.market_id for m in sample_markets]
assert signal.size_recommendation > Decimal("0")
assert signal.confidence.overall >= Decimal("0")
# Step 4: Execute trades (mocked)
for signal in decision.signals:
if signal.signal_type == SignalType.BUY:
order = await mock_broker.place_order(
market_id=signal.market_id,
token_id=signal.token_id,
side="buy",
size=signal.size_recommendation,
price=signal.target_price,
)
assert order["status"] == "filled"
# Step 5: Verify broker state
assert len(mock_broker.orders) == len(decision.signals)
@pytest.mark.asyncio
async def test_trading_loop_with_risk_rejection(
self,
mock_memory_manager,
agent_config,
sample_markets,
):
"""Test trading loop when risk agent rejects trades."""
# Provider that returns no approved trades
provider = MockLLMProvider()
provider._risk_response = lambda: json.dumps({
"approved_trades": [
{
"market_id": "test_market_001",
"approved": False,
"reason": "Risk too high",
"risk_score": 0.85,
},
],
"warnings": ["Portfolio exposure exceeded"],
})
registry = MagicMock()
registry.get_for_model.return_value = (provider, "mock-model")
coordinator = AgentCoordinator(
provider_registry=registry,
memory=mock_memory_manager,
config=agent_config,
)
await coordinator.initialize()
decision = await coordinator.decide(
markets=sample_markets,
portfolio_value=Decimal("10000"),
positions=[],
)
# No signals should be generated if risk rejects
assert decision.signals == []
@pytest.mark.asyncio
async def test_trading_loop_with_debate(
self,
mock_provider_registry,
mock_memory_manager,
sample_markets,
):
"""Test trading loop with debate enabled."""
config = AgentConfig(
research_agent_model="mock-model",
risk_agent_model="mock-model",
execution_agent_model="mock-model",
reflection_agent_model="mock-model",
enable_multi_agent_debate=True,
debate_rounds=2,
enable_reflection=True,
)
coordinator = AgentCoordinator(
provider_registry=mock_provider_registry,
memory=mock_memory_manager,
config=config,
)
await coordinator.initialize()
decision = await coordinator.decide(
markets=sample_markets,
portfolio_value=Decimal("10000"),
positions=[],
)
# With debate enabled, should track rounds
assert isinstance(decision.debate_rounds, int)
# ============================================================================
# Integration Tests: Error Handling
# ============================================================================
class TestErrorHandlingIntegration:
"""Test error handling across integrated components."""
@pytest.mark.asyncio
async def test_llm_failure_raises_exception(
self,
mock_memory_manager,
agent_config,
sample_markets,
):
"""Test system handles LLM failures by raising exception."""
# Provider that raises exception
provider = MockLLMProvider()
provider.complete = AsyncMock(side_effect=Exception("LLM API Error"))
registry = MagicMock()
registry.get_for_model.return_value = (provider, "mock-model")
coordinator = AgentCoordinator(
provider_registry=registry,
memory=mock_memory_manager,
config=agent_config,
)
await coordinator.initialize()
# Should raise exception on LLM failure
with pytest.raises(Exception):
await coordinator.decide(
markets=sample_markets,
portfolio_value=Decimal("10000"),
positions=[],
)
# ============================================================================
# Integration Tests: Performance
# ============================================================================
class TestPerformanceIntegration:
"""Test performance-related integration scenarios."""
@pytest.mark.asyncio
async def test_decision_latency_tracking(
self,
mock_provider_registry,
mock_memory_manager,
agent_config,
sample_markets,
):
"""Test that latency is properly tracked."""
coordinator = AgentCoordinator(
provider_registry=mock_provider_registry,
memory=mock_memory_manager,
config=agent_config,
)
await coordinator.initialize()
decision = await coordinator.decide(
markets=sample_markets,
portfolio_value=Decimal("10000"),
positions=[],
)
# Latency should be tracked
assert decision.total_latency_ms >= 0
@pytest.mark.asyncio
async def test_token_usage_tracking(
self,
mock_provider_registry,
mock_memory_manager,
agent_config,
sample_markets,
):
"""Test that token usage is properly tracked."""
coordinator = AgentCoordinator(
provider_registry=mock_provider_registry,
memory=mock_memory_manager,
config=agent_config,
)
await coordinator.initialize()
decision = await coordinator.decide(
markets=sample_markets,
portfolio_value=Decimal("10000"),
positions=[],
)
# Token usage should be tracked
assert decision.total_tokens >= 0