mirror of
https://github.com/saymrwulf/crisis.git
synced 2026-05-14 20:37:54 +00:00
A new sibling Python package, `crisis_agents`, that lifts the Crisis
protocol from "consensus between machines" to "consensus between AI
agents". Threat model: a team of sub-agents normally talks freely
with its orchestrator (the "mothership"); when the team's boundary
opens and an external agent of unknown trust joins, the mothership
activates the Crisis layer so byzantine equivocation is detectable.
Two-phase orchestration model:
Phase 1 — closed team, no Crisis: agents emit claims directly, the
mothership collects them flat.
Phase 2 — boundary opens: every subsequent claim is wrapped into a
Crisis Message with the agent's stable process_id and a PoW nonce,
delivered into per-agent LamportGraphs, and after each turn the
mothership scans for mutations via LamportGraph.find_mutations.
Phase 3 — proof: when an alarm fires, the mothership emits a
replayable JSON proof-of-malfeasance document with the contradictory
witnesses, their delivery sets, and DAG cross-references showing
which honest agents saw what.
Modules:
- claim.py Claim dataclass + JSON round-trip
- boundary.py membership tracker + open() trigger
- agent.py CrisisAgent abstract + MockAgent + MockByzantineAgent
(the latter equivocates by emitting two variants to
disjoint peer subsets at the same logical turn)
- mothership.py orchestrator driving both phases, building Crisis
Messages from Claims, per-agent LamportGraphs, log
- alarm.py scan_for_mutations: same-agent same-turn distinct
digests with non-identical delivery sets, verified
spacelike via LamportGraph.are_spacelike on the
honest-agent graphs
- proof.py build_proof + ProofDocument + JSON serializer +
verify_proof_self_consistent
- cli.py `crisis-agents demo` + `crisis-agents verify`
- scenarios/ fact_check: reference doc + 6 statements + scripted
honest/byzantine agents producing a deterministic
equivocation on statement s03
Tests: 50 new tests across test_claim, test_boundary, test_mothership,
test_alarm, test_proof, test_demo_fact_check. End-to-end test runs the
fact_check scenario, asserts exactly one alarm raised, proof is built,
re-serialized JSON passes self-consistency. Full suite (existing
crisis + new crisis_agents) green in 0.77s — 145 tests.
Out of scope (deliberately): visualization (separate CrisisViz upgrade
later), real TCP gossip (agents talk via in-process function calls in
the mothership), false-claim detection without equivocation (an
agent that consistently lies but never equivocates is out-voted, not
"caught"; catching it would require a ground-truth oracle).
Reuse from existing crisis package: Message, Vertex, LamportGraph,
LamportGraph.find_mutations, ProofOfWorkWeight, digest. The new code
is a thin adapter layer; the protocol substrate did the heavy lifting.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
104 lines
4.1 KiB
Python
104 lines
4.1 KiB
Python
"""Tests for the Claim payload dataclass."""
|
|
|
|
import json
|
|
|
|
import pytest
|
|
|
|
from crisis_agents.claim import Claim
|
|
|
|
|
|
class TestClaimConstruction:
|
|
|
|
def test_basic_claim_roundtrip(self):
|
|
c = Claim(
|
|
statement_id="s01",
|
|
verdict="true",
|
|
confidence=0.95,
|
|
evidence="The reference doc states this directly in paragraph 2.",
|
|
timestamp_logical=3,
|
|
)
|
|
assert c.statement_id == "s01"
|
|
assert c.verdict == "true"
|
|
assert c.confidence == pytest.approx(0.95)
|
|
assert c.schema_version == 1
|
|
|
|
def test_unknown_verdict_accepted(self):
|
|
c = Claim(statement_id="s02", verdict="unknown", confidence=0.5,
|
|
evidence="no signal", timestamp_logical=0)
|
|
assert c.verdict == "unknown"
|
|
|
|
|
|
class TestClaimValidation:
|
|
|
|
def test_empty_statement_id_rejected(self):
|
|
with pytest.raises(ValueError, match="statement_id"):
|
|
Claim(statement_id="", verdict="true", confidence=0.9,
|
|
evidence="x", timestamp_logical=0)
|
|
|
|
def test_invalid_verdict_rejected(self):
|
|
with pytest.raises(ValueError, match="verdict"):
|
|
Claim(statement_id="s01", verdict="maybe", # type: ignore[arg-type]
|
|
confidence=0.9, evidence="x", timestamp_logical=0)
|
|
|
|
def test_confidence_out_of_range(self):
|
|
with pytest.raises(ValueError, match="confidence"):
|
|
Claim(statement_id="s01", verdict="true", confidence=1.5,
|
|
evidence="x", timestamp_logical=0)
|
|
with pytest.raises(ValueError, match="confidence"):
|
|
Claim(statement_id="s01", verdict="true", confidence=-0.1,
|
|
evidence="x", timestamp_logical=0)
|
|
|
|
def test_evidence_too_long(self):
|
|
with pytest.raises(ValueError, match="evidence too long"):
|
|
Claim(statement_id="s01", verdict="true", confidence=0.9,
|
|
evidence="x" * 500, timestamp_logical=0)
|
|
|
|
def test_negative_timestamp_rejected(self):
|
|
with pytest.raises(ValueError, match="timestamp"):
|
|
Claim(statement_id="s01", verdict="true", confidence=0.9,
|
|
evidence="x", timestamp_logical=-1)
|
|
|
|
|
|
class TestPayloadRoundtrip:
|
|
|
|
def test_to_payload_returns_bytes(self):
|
|
c = Claim(statement_id="s01", verdict="true", confidence=0.9,
|
|
evidence="ok", timestamp_logical=2)
|
|
b = c.to_payload()
|
|
assert isinstance(b, bytes)
|
|
# Valid JSON
|
|
obj = json.loads(b.decode("utf-8"))
|
|
assert obj["statement_id"] == "s01"
|
|
|
|
def test_from_payload_inverts_to_payload(self):
|
|
original = Claim(statement_id="s04", verdict="false", confidence=0.72,
|
|
evidence="contradicted by para 3", timestamp_logical=7)
|
|
roundtrip = Claim.from_payload(original.to_payload())
|
|
assert roundtrip == original
|
|
|
|
def test_payload_is_deterministic(self):
|
|
"""Same logical claim must produce identical bytes — required so two
|
|
equivocating-but-payload-identical messages can be detected as the
|
|
same payload (vs. different payload = real equivocation)."""
|
|
c1 = Claim(statement_id="s01", verdict="true", confidence=0.9,
|
|
evidence="evidence here", timestamp_logical=1)
|
|
c2 = Claim(statement_id="s01", verdict="true", confidence=0.9,
|
|
evidence="evidence here", timestamp_logical=1)
|
|
assert c1.to_payload() == c2.to_payload()
|
|
|
|
def test_from_payload_rejects_garbage(self):
|
|
with pytest.raises(ValueError, match="Claim JSON"):
|
|
Claim.from_payload(b"not json")
|
|
|
|
def test_from_payload_rejects_missing_fields(self):
|
|
with pytest.raises(TypeError):
|
|
# Missing required statement_id
|
|
Claim.from_payload(b'{"verdict":"true","confidence":0.9,"evidence":"x","timestamp_logical":0}')
|
|
|
|
def test_payload_size_is_bounded(self):
|
|
"""Confirms the EVIDENCE_MAX_LEN cap keeps payload under a sane size."""
|
|
c = Claim(statement_id="s99", verdict="true", confidence=1.0,
|
|
evidence="x" * 280, timestamp_logical=999)
|
|
b = c.to_payload()
|
|
# JSON overhead + 280 evidence + small fields ~= < 400 bytes
|
|
assert len(b) < 500
|