mirror of
https://github.com/saymrwulf/crisis.git
synced 2026-05-14 20:37:54 +00:00
A new sibling Python package, `crisis_agents`, that lifts the Crisis
protocol from "consensus between machines" to "consensus between AI
agents". Threat model: a team of sub-agents normally talks freely
with its orchestrator (the "mothership"); when the team's boundary
opens and an external agent of unknown trust joins, the mothership
activates the Crisis layer so byzantine equivocation is detectable.
Two-phase orchestration model:
Phase 1 — closed team, no Crisis: agents emit claims directly, the
mothership collects them flat.
Phase 2 — boundary opens: every subsequent claim is wrapped into a
Crisis Message with the agent's stable process_id and a PoW nonce,
delivered into per-agent LamportGraphs, and after each turn the
mothership scans for mutations via LamportGraph.find_mutations.
Phase 3 — proof: when an alarm fires, the mothership emits a
replayable JSON proof-of-malfeasance document with the contradictory
witnesses, their delivery sets, and DAG cross-references showing
which honest agents saw what.
Modules:
- claim.py Claim dataclass + JSON round-trip
- boundary.py membership tracker + open() trigger
- agent.py CrisisAgent abstract + MockAgent + MockByzantineAgent
(the latter equivocates by emitting two variants to
disjoint peer subsets at the same logical turn)
- mothership.py orchestrator driving both phases, building Crisis
Messages from Claims, per-agent LamportGraphs, log
- alarm.py scan_for_mutations: same-agent same-turn distinct
digests with non-identical delivery sets, verified
spacelike via LamportGraph.are_spacelike on the
honest-agent graphs
- proof.py build_proof + ProofDocument + JSON serializer +
verify_proof_self_consistent
- cli.py `crisis-agents demo` + `crisis-agents verify`
- scenarios/ fact_check: reference doc + 6 statements + scripted
honest/byzantine agents producing a deterministic
equivocation on statement s03
Tests: 50 new tests across test_claim, test_boundary, test_mothership,
test_alarm, test_proof, test_demo_fact_check. End-to-end test runs the
fact_check scenario, asserts exactly one alarm raised, proof is built,
re-serialized JSON passes self-consistency. Full suite (existing
crisis + new crisis_agents) green in 0.77s — 145 tests.
Out of scope (deliberately): visualization (separate CrisisViz upgrade
later), real TCP gossip (agents talk via in-process function calls in
the mothership), false-claim detection without equivocation (an
agent that consistently lies but never equivocates is out-voted, not
"caught"; catching it would require a ground-truth oracle).
Reuse from existing crisis package: Message, Vertex, LamportGraph,
LamportGraph.find_mutations, ProofOfWorkWeight, digest. The new code
is a thin adapter layer; the protocol substrate did the heavy lifting.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
134 lines
5.1 KiB
Python
134 lines
5.1 KiB
Python
"""Tests for the Mothership orchestrator — closed phase + Crisis-phase wiring."""
|
|
|
|
import pytest
|
|
|
|
from crisis_agents.agent import MockAgent, MockByzantineAgent
|
|
from crisis_agents.claim import Claim
|
|
from crisis_agents.mothership import Mothership
|
|
|
|
|
|
def _claim(sid: str, verdict: str = "true", turn: int = 0, evidence: str = "ok") -> Claim:
|
|
return Claim(
|
|
statement_id=sid, verdict=verdict, confidence=0.9, # type: ignore[arg-type]
|
|
evidence=evidence, timestamp_logical=turn,
|
|
)
|
|
|
|
|
|
class TestClosedPhase:
|
|
|
|
def test_no_dag_in_closed_phase(self):
|
|
m = Mothership()
|
|
m.add_agent(MockAgent("a", [[_claim("s01")]]))
|
|
m.add_agent(MockAgent("b", [[_claim("s01")]]))
|
|
result = m.run_closed_phase(num_turns=1)
|
|
|
|
# Two agents emitted one claim each
|
|
assert len(result.closed_log) == 2
|
|
names = [e.agent_name for e in result.closed_log]
|
|
assert "a" in names and "b" in names
|
|
|
|
# No graphs allocated
|
|
assert m.all_graphs() == {}
|
|
assert not m.boundary.is_open
|
|
|
|
def test_multi_turn_observes_prior_claims(self):
|
|
"""Each turn's agents see the claims emitted in previous turns."""
|
|
class WatcherAgent(MockAgent):
|
|
def __init__(self, name):
|
|
super().__init__(name, [[_claim("s01")], [_claim("s02")]])
|
|
self.received_per_turn: list[int] = []
|
|
|
|
def next_turn(self, turn, received):
|
|
self.received_per_turn.append(len(received))
|
|
return super().next_turn(turn, received)
|
|
|
|
w = WatcherAgent("watcher")
|
|
other = MockAgent("other", [[_claim("s99")], [_claim("s99")]])
|
|
m = Mothership()
|
|
m.add_agent(w)
|
|
m.add_agent(other)
|
|
m.run_closed_phase(num_turns=2)
|
|
# Turn 0: watcher sees 0 prior claims; Turn 1: watcher sees 2 from turn 0.
|
|
assert w.received_per_turn == [0, 2]
|
|
|
|
def test_add_agent_after_open_rejected(self):
|
|
m = Mothership()
|
|
m.add_agent(MockAgent("a", [[_claim("s01")]]))
|
|
m.open_boundary(MockAgent("byz", [[_claim("s01")]]))
|
|
with pytest.raises(RuntimeError, match="cannot add_agent"):
|
|
m.add_agent(MockAgent("late", []))
|
|
|
|
|
|
class TestCrisisPhaseWiring:
|
|
|
|
def test_open_boundary_initializes_graphs(self):
|
|
m = Mothership()
|
|
m.add_agent(MockAgent("a", [[_claim("s01")]]))
|
|
m.add_agent(MockAgent("b", [[_claim("s01")]]))
|
|
m.open_boundary(MockAgent("d", [[_claim("s01")]]))
|
|
|
|
# One graph per agent, including the joiner
|
|
graphs = m.all_graphs()
|
|
assert set(graphs.keys()) == {"a", "b", "d"}
|
|
for g in graphs.values():
|
|
assert g.vertex_count() == 0 # not yet run
|
|
|
|
def test_run_crisis_phase_extends_per_agent_graphs(self):
|
|
m = Mothership()
|
|
m.add_agent(MockAgent("a", [[_claim("s01")]]))
|
|
m.add_agent(MockAgent("b", [[_claim("s01")]]))
|
|
m.open_boundary(MockAgent("d", [[_claim("s01")]]))
|
|
result = m.run_crisis_phase(num_turns=1)
|
|
|
|
# Each agent's graph should now contain three vertices
|
|
# (broadcast claims from a, b, d delivered to everyone).
|
|
for name in ("a", "b", "d"):
|
|
assert m.graph_of(name).vertex_count() == 3
|
|
|
|
assert len(result.crisis_log) == 3
|
|
for entry in result.crisis_log:
|
|
assert set(entry.delivered_to) == {"a", "b", "d"}
|
|
|
|
def test_byzantine_equivocation_splits_delivery(self):
|
|
"""A MockByzantineAgent delivers two different claims to disjoint
|
|
subsets — the foundation of the equivocation detection demo."""
|
|
m = Mothership()
|
|
m.add_agent(MockAgent("a", [[]]))
|
|
m.add_agent(MockAgent("b", [[]]))
|
|
|
|
byz = MockByzantineAgent(
|
|
"d",
|
|
scripted_pairs=[(
|
|
_claim("s01", verdict="true", evidence="to_a"),
|
|
_claim("s01", verdict="false", evidence="to_b"),
|
|
)],
|
|
split_a={"a"},
|
|
split_b={"b"},
|
|
)
|
|
m.open_boundary(byz)
|
|
m.run_crisis_phase(num_turns=1)
|
|
|
|
# a sees the true-variant, b sees the false-variant.
|
|
# d (the byzantine sender) sees neither — see Mothership._emit docstring.
|
|
a_payloads = [v.payload for v in m.graph_of("a").all_vertices()]
|
|
b_payloads = [v.payload for v in m.graph_of("b").all_vertices()]
|
|
d_payloads = [v.payload for v in m.graph_of("d").all_vertices()]
|
|
|
|
assert any(b'"verdict":"true"' in p for p in a_payloads)
|
|
assert all(b'"verdict":"false"' not in p for p in a_payloads)
|
|
|
|
assert any(b'"verdict":"false"' in p for p in b_payloads)
|
|
assert all(b'"verdict":"true"' not in p for p in b_payloads)
|
|
|
|
# d's own graph holds neither equivocation (targeted delivery skips sender).
|
|
assert len(d_payloads) == 0
|
|
|
|
# But the crisis_log records both emissions so the mothership can
|
|
# generate proofs from this perspective.
|
|
assert len(m.run_result.crisis_log) == 2
|
|
|
|
def test_run_crisis_phase_requires_open_boundary(self):
|
|
m = Mothership()
|
|
m.add_agent(MockAgent("a", [[_claim("s01")]]))
|
|
with pytest.raises(RuntimeError, match="boundary not yet open"):
|
|
m.run_crisis_phase(num_turns=1)
|