mirror of
https://github.com/saymrwulf/alpha-arena.git
synced 2026-05-14 20:37:51 +00:00
A comprehensive autonomous trading system for Polymarket prediction markets featuring multi-LLM provider support, a native macOS menu bar app, and a web-based control dashboard. Key features: - Multi-agent trading system (Research, Risk, Execution, Reflection agents) - LLM provider flexibility (Anthropic, OpenAI, Google, xAI, Local models) - Automatic provider fallback chain for resilience - Native Swift/SwiftUI macOS menu bar application - FastAPI web dashboard with real-time WebSocket updates - Risk management with kill switch - Technical indicators and market analysis
363 lines
12 KiB
Python
363 lines
12 KiB
Python
"""Tests for PnL accounting and metrics."""
|
|
|
|
import asyncio
|
|
import tempfile
|
|
from datetime import datetime, timedelta
|
|
from decimal import Decimal
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
import sys
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
from src.metrics.logger import (
|
|
DecisionLog,
|
|
MetricsLogger,
|
|
OrderLog,
|
|
PnLSnapshot,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def temp_dir():
|
|
"""Create temporary directory for test logs."""
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
yield tmpdir
|
|
|
|
|
|
@pytest.fixture
|
|
async def metrics_logger(temp_dir):
|
|
"""Create a metrics logger with temp storage."""
|
|
logger = MetricsLogger(
|
|
jsonl_path=f"{temp_dir}/decisions.jsonl",
|
|
sqlite_path=f"{temp_dir}/metrics.db",
|
|
)
|
|
await logger.connect()
|
|
yield logger
|
|
await logger.disconnect()
|
|
|
|
|
|
class TestPnLAccounting:
|
|
"""Test PnL tracking and calculations."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_pnl_snapshot_logging(self, metrics_logger: MetricsLogger):
|
|
"""Should log PnL snapshots correctly."""
|
|
snapshot = PnLSnapshot(
|
|
timestamp=datetime.utcnow(),
|
|
balance=Decimal("1000"),
|
|
unrealized_pnl=Decimal("50"),
|
|
realized_pnl=Decimal("25"),
|
|
total_equity=Decimal("1075"),
|
|
positions_value=Decimal("75"),
|
|
drawdown_pct=Decimal("0"),
|
|
high_water_mark=Decimal("1075"),
|
|
)
|
|
|
|
await metrics_logger.log_pnl_snapshot(snapshot)
|
|
|
|
# Verify it was logged
|
|
history = await metrics_logger.get_pnl_history()
|
|
assert len(history) == 1
|
|
assert history[0]["total_equity"] == 1075
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_high_water_mark_tracking(self, metrics_logger: MetricsLogger):
|
|
"""Should track high water mark correctly."""
|
|
# First snapshot sets HWM
|
|
await metrics_logger.log_pnl_snapshot(PnLSnapshot(
|
|
timestamp=datetime.utcnow(),
|
|
balance=Decimal("1000"),
|
|
unrealized_pnl=Decimal("0"),
|
|
realized_pnl=Decimal("0"),
|
|
total_equity=Decimal("1000"),
|
|
positions_value=Decimal("0"),
|
|
drawdown_pct=Decimal("0"),
|
|
high_water_mark=Decimal("1000"),
|
|
))
|
|
|
|
assert metrics_logger._high_water_mark == Decimal("1000")
|
|
|
|
# Higher equity updates HWM
|
|
await metrics_logger.log_pnl_snapshot(PnLSnapshot(
|
|
timestamp=datetime.utcnow(),
|
|
balance=Decimal("1100"),
|
|
unrealized_pnl=Decimal("0"),
|
|
realized_pnl=Decimal("100"),
|
|
total_equity=Decimal("1100"),
|
|
positions_value=Decimal("0"),
|
|
drawdown_pct=Decimal("0"),
|
|
high_water_mark=Decimal("1100"),
|
|
))
|
|
|
|
assert metrics_logger._high_water_mark == Decimal("1100")
|
|
|
|
# Lower equity doesn't change HWM
|
|
await metrics_logger.log_pnl_snapshot(PnLSnapshot(
|
|
timestamp=datetime.utcnow(),
|
|
balance=Decimal("1050"),
|
|
unrealized_pnl=Decimal("0"),
|
|
realized_pnl=Decimal("50"),
|
|
total_equity=Decimal("1050"),
|
|
positions_value=Decimal("0"),
|
|
drawdown_pct=Decimal("0.0455"),
|
|
high_water_mark=Decimal("1100"),
|
|
))
|
|
|
|
assert metrics_logger._high_water_mark == Decimal("1100")
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_drawdown_calculation(self, metrics_logger: MetricsLogger):
|
|
"""Should calculate drawdown correctly."""
|
|
# Set HWM
|
|
metrics_logger._high_water_mark = Decimal("1000")
|
|
|
|
# Calculate drawdown from HWM
|
|
drawdown = metrics_logger.calculate_drawdown(Decimal("900"))
|
|
assert drawdown == Decimal("0.1000") # 10% drawdown
|
|
|
|
drawdown = metrics_logger.calculate_drawdown(Decimal("750"))
|
|
assert drawdown == Decimal("0.2500") # 25% drawdown
|
|
|
|
drawdown = metrics_logger.calculate_drawdown(Decimal("1000"))
|
|
assert drawdown == Decimal("0.0000") # No drawdown at HWM
|
|
|
|
|
|
class TestDecisionLogging:
|
|
"""Test decision logging."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_decision_log(self, metrics_logger: MetricsLogger):
|
|
"""Should log decisions correctly."""
|
|
decision = DecisionLog(
|
|
timestamp=datetime.utcnow(),
|
|
iteration=1,
|
|
balance=Decimal("1000"),
|
|
positions_count=2,
|
|
markets_analyzed=50,
|
|
signals_generated=3,
|
|
signals_executed=1,
|
|
model_used="claude-sonnet-4-20250514",
|
|
latency_ms=1500,
|
|
tokens_used=2000,
|
|
reasoning="Test reasoning",
|
|
signals=[{"test": "signal"}],
|
|
)
|
|
|
|
await metrics_logger.log_decision(decision)
|
|
|
|
# Verify in database
|
|
decisions = await metrics_logger.get_recent_decisions(limit=1)
|
|
assert len(decisions) == 1
|
|
assert decisions[0]["iteration"] == 1
|
|
assert decisions[0]["signals_executed"] == 1
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_multiple_decisions(self, metrics_logger: MetricsLogger):
|
|
"""Should handle multiple decisions."""
|
|
for i in range(5):
|
|
await metrics_logger.log_decision(DecisionLog(
|
|
timestamp=datetime.utcnow(),
|
|
iteration=i + 1,
|
|
balance=Decimal("1000") + Decimal(str(i * 10)),
|
|
positions_count=i,
|
|
markets_analyzed=50,
|
|
signals_generated=i,
|
|
signals_executed=i,
|
|
model_used="test-model",
|
|
latency_ms=1000,
|
|
tokens_used=1000,
|
|
reasoning=f"Iteration {i + 1}",
|
|
))
|
|
|
|
decisions = await metrics_logger.get_recent_decisions(limit=10)
|
|
assert len(decisions) == 5
|
|
|
|
|
|
class TestOrderLogging:
|
|
"""Test order logging."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_order_log(self, metrics_logger: MetricsLogger):
|
|
"""Should log orders correctly."""
|
|
order = OrderLog(
|
|
timestamp=datetime.utcnow(),
|
|
order_id="order_123",
|
|
market_id="market_456",
|
|
token_id="token_789",
|
|
side="buy",
|
|
size=Decimal("10"),
|
|
price=Decimal("0.55"),
|
|
status="filled",
|
|
fill_size=Decimal("10"),
|
|
fill_price=Decimal("0.55"),
|
|
fee=Decimal("0.01"),
|
|
)
|
|
|
|
await metrics_logger.log_order(order)
|
|
|
|
# Verify via JSONL (check file exists and has content)
|
|
jsonl_path = Path(metrics_logger.jsonl_path)
|
|
assert jsonl_path.exists()
|
|
|
|
content = jsonl_path.read_text()
|
|
assert "order_123" in content
|
|
assert "buy" in content
|
|
|
|
|
|
class TestStatistics:
|
|
"""Test statistics aggregation."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_statistics_aggregation(self, metrics_logger: MetricsLogger):
|
|
"""Should aggregate statistics correctly."""
|
|
# Log some decisions
|
|
for i in range(3):
|
|
await metrics_logger.log_decision(DecisionLog(
|
|
timestamp=datetime.utcnow(),
|
|
iteration=i + 1,
|
|
balance=Decimal("1000"),
|
|
positions_count=0,
|
|
markets_analyzed=50,
|
|
signals_generated=2,
|
|
signals_executed=1,
|
|
model_used="test",
|
|
latency_ms=1000 + i * 100,
|
|
tokens_used=1000 + i * 100,
|
|
reasoning="test",
|
|
))
|
|
|
|
# Log PnL
|
|
await metrics_logger.log_pnl_snapshot(PnLSnapshot(
|
|
timestamp=datetime.utcnow(),
|
|
balance=Decimal("1050"),
|
|
unrealized_pnl=Decimal("25"),
|
|
realized_pnl=Decimal("50"),
|
|
total_equity=Decimal("1075"),
|
|
positions_value=Decimal("25"),
|
|
drawdown_pct=Decimal("0"),
|
|
high_water_mark=Decimal("1075"),
|
|
))
|
|
|
|
stats = await metrics_logger.get_statistics()
|
|
|
|
assert stats["total_decisions"] == 3
|
|
assert stats["total_trades"] == 3 # 1 per decision
|
|
assert stats["current_equity"] == 1075
|
|
assert stats["realized_pnl"] == 50
|
|
|
|
|
|
class TestPnLCalculations:
|
|
"""Test PnL calculation accuracy."""
|
|
|
|
def test_position_pnl(self):
|
|
"""Test position-level PnL calculation."""
|
|
from src.broker.base import Position
|
|
|
|
position = Position(
|
|
market_id="test",
|
|
token_id="test",
|
|
outcome="YES",
|
|
size=Decimal("100"),
|
|
avg_entry_price=Decimal("0.50"),
|
|
current_price=Decimal("0.60"),
|
|
unrealized_pnl=Decimal("10"), # (0.60 - 0.50) * 100
|
|
)
|
|
|
|
# Verify calculation
|
|
expected_pnl = (position.current_price - position.avg_entry_price) * position.size
|
|
assert expected_pnl == Decimal("10")
|
|
|
|
def test_portfolio_pnl(self):
|
|
"""Test portfolio-level PnL aggregation."""
|
|
from src.broker.base import Position
|
|
|
|
positions = [
|
|
Position(
|
|
market_id="market1",
|
|
token_id="token1",
|
|
outcome="YES",
|
|
size=Decimal("100"),
|
|
avg_entry_price=Decimal("0.50"),
|
|
current_price=Decimal("0.60"),
|
|
unrealized_pnl=Decimal("10"),
|
|
),
|
|
Position(
|
|
market_id="market2",
|
|
token_id="token2",
|
|
outcome="NO",
|
|
size=Decimal("50"),
|
|
avg_entry_price=Decimal("0.40"),
|
|
current_price=Decimal("0.35"),
|
|
unrealized_pnl=Decimal("-2.5"),
|
|
),
|
|
]
|
|
|
|
total_unrealized = sum(p.unrealized_pnl for p in positions)
|
|
assert total_unrealized == Decimal("7.5")
|
|
|
|
def test_realized_vs_unrealized(self):
|
|
"""Test distinction between realized and unrealized PnL."""
|
|
from src.broker.base import Position
|
|
|
|
# Position with both realized and unrealized
|
|
position = Position(
|
|
market_id="test",
|
|
token_id="test",
|
|
outcome="YES",
|
|
size=Decimal("50"), # After partial close
|
|
avg_entry_price=Decimal("0.50"),
|
|
current_price=Decimal("0.60"),
|
|
unrealized_pnl=Decimal("5"), # On remaining 50
|
|
realized_pnl=Decimal("5"), # From closed 50
|
|
)
|
|
|
|
total_pnl = position.unrealized_pnl + position.realized_pnl
|
|
assert total_pnl == Decimal("10")
|
|
|
|
|
|
class TestEdgeCases:
|
|
"""Test edge cases in PnL accounting."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_zero_balance(self, metrics_logger: MetricsLogger):
|
|
"""Should handle zero balance gracefully."""
|
|
snapshot = PnLSnapshot(
|
|
timestamp=datetime.utcnow(),
|
|
balance=Decimal("0"),
|
|
unrealized_pnl=Decimal("0"),
|
|
realized_pnl=Decimal("-100"),
|
|
total_equity=Decimal("0"),
|
|
positions_value=Decimal("0"),
|
|
drawdown_pct=Decimal("1"), # 100% drawdown
|
|
high_water_mark=Decimal("100"),
|
|
)
|
|
|
|
await metrics_logger.log_pnl_snapshot(snapshot)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_negative_pnl(self, metrics_logger: MetricsLogger):
|
|
"""Should handle negative PnL correctly."""
|
|
snapshot = PnLSnapshot(
|
|
timestamp=datetime.utcnow(),
|
|
balance=Decimal("900"),
|
|
unrealized_pnl=Decimal("-50"),
|
|
realized_pnl=Decimal("-50"),
|
|
total_equity=Decimal("850"),
|
|
positions_value=Decimal("50"),
|
|
drawdown_pct=Decimal("0.15"),
|
|
high_water_mark=Decimal("1000"),
|
|
)
|
|
|
|
await metrics_logger.log_pnl_snapshot(snapshot)
|
|
|
|
history = await metrics_logger.get_pnl_history()
|
|
assert len(history) == 1
|
|
assert history[0]["unrealized_pnl"] == -50
|
|
|
|
def test_drawdown_with_zero_hwm(self, metrics_logger: MetricsLogger):
|
|
"""Should handle zero high water mark."""
|
|
metrics_logger._high_water_mark = Decimal("0")
|
|
drawdown = metrics_logger.calculate_drawdown(Decimal("100"))
|
|
assert drawdown == Decimal("0")
|