mirror of
https://github.com/saymrwulf/crisis.git
synced 2026-05-14 20:37:54 +00:00
The previous driver imposed a synchronous turn-counted clock that the
Crisis paper explicitly forbids — Crisis is supposed to work in
asynchronous P2P networks, with any synchronicity being virtual and
derived inside the consensus algorithm from the DAG structure, not
imposed externally by a coordinator. This commit removes the wall clock.
What changed in the engine:
- `Mothership.run_crisis_phase(num_turns, gossip_rounds_per_turn)`
is replaced by `run_until_quiescent(max_steps=200)`. The loop
interleaves three concerns on each iteration — emissions, gossip,
and alarm emissions — until none make progress. Termination is by
quiescence, not by a fixed turn count. `max_steps` is a safety
bound (loop-iteration cap), not an exposed clock.
- `Mothership.run_closed_phase(num_turns)` becomes
`run_closed_phase(max_steps=50)`. Same quiescence model — the
closed-phase conversation runs until no agent has more to say.
- Agents grew `pending_alarm_claims()`: each agent checks its own
graph for un-alarmed mutations and produces AlarmClaims directly.
The driver loop calls this every iteration, so alarms emit and
propagate in the same loop as regular emissions and gossip — no
separate "alarm phase."
- `Mothership.emit_alarms_from_detectors()` and the explicit
`run_gossip_round()` step are no longer needed by callers; both
are subsumed by the async loop. `run_gossip_round()` stays as a
helper but tests no longer call it externally.
What changed in the agent interface:
- `CrisisAgent.next_turn(turn, received_claims)` becomes
`try_emit()` — no arguments. Agents in an async network don't see
a global tick. They decide based on their own internal state.
- `CrisisAgent.observe(claim)` is the new optional callback the
closed-phase loop uses to feed context into agents that care
(overridden by LiveClaudeAgent to populate its prompt buffer).
- `pending_alarm_claims()` is idempotent: an internal
`_already_alarmed` set tracks claims this agent has emitted, so
the loop calls it every step without flooding the network with
duplicate alarms.
What changed in the dataclass schema:
- `AlarmClaim.detected_at_turn` -> `emitted_at_step`. The word
"turn" implies a global clock; "step" is a per-agent sequence
number used only for log ordering — local, not networked.
- `ClosedPhaseEntry.turn` and `CrisisPhaseEntry.turn` -> `step`.
Same rename, same reasoning.
- `Scenario.closed_phase_turns` and `Scenario.crisis_phase_turns`
are gone. The scenario no longer prescribes how many turns; it
just provides agents and lets the async loop run them out.
What changed in the CLI:
- Phase 3 reports "drove to quiescence in N step(s)" with a
breakdown of regular emissions / gossip transfers / alarm
emissions, instead of "ran N turns".
- `QuiescenceReport` (new dataclass) carries the run statistics
back from `run_until_quiescent`/`run_closed_phase` — steps taken,
emissions made, gossip transfers, alarm claims emitted, plus
whether termination was via quiescence or max-step cap.
New regression tests (`test_async_quiescence.py`):
- `test_run_until_quiescent_terminates`: the loop must exit.
- `test_two_runs_produce_identical_final_state`: determinism check —
if anything in the loop depended on real wall time, this would
fail.
- `test_max_steps_bound_caps_runtime`: setting max_steps=1 exits
immediately and `QuiescenceReport.reached_quiescence` reflects
reality.
- `test_no_turn_argument_exposed_to_agents`: introspects
`CrisisAgent.try_emit` signature; fails if anyone re-adds a
`turn` parameter.
- `test_no_turn_field_on_alarmclaim`: introspects the dataclass
fields; fails if `detected_at_turn` reappears.
- `test_alarms_propagate_through_async_loop_alone`: the loop alone
(no manual emit_alarms / run_gossip_round) ratifies an alarm.
- `test_quiescence_report_counts_match_logs`: sanity check that
the report's emission count equals the crisis log length.
Suite: 163 -> 170 tests, all green in 0.79s.
Behavioral end-state is identical to the previous (synchronous)
version: same fact-check scenario, same byzantine equivocation, same
proof JSON shape, same three signers, same quorum-met outcome. The
difference is structural: the protocol now matches the paper's async
shape, and a future port to actual TCP gossip + concurrent agents
needs no change to this engine.
CrisisViz: still untouched. The `crisis_data.json` pipeline that
drives the visualizer is orthogonal.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
139 lines
4.9 KiB
Python
139 lines
4.9 KiB
Python
"""Tests for AlarmClaim + tally_alarms (the voting layer)."""
|
|
|
|
import pytest
|
|
|
|
from crisis_agents.agent import MockAgent, MockByzantineAgent
|
|
from crisis_agents.alarm import LocalAlarm
|
|
from crisis_agents.claim import Claim
|
|
from crisis_agents.mothership import Mothership
|
|
from crisis_agents.vote import (
|
|
AlarmClaim,
|
|
RatifiedAlarm,
|
|
collect_alarm_claims,
|
|
quorum_for,
|
|
tally_alarms,
|
|
)
|
|
|
|
|
|
def _claim(sid: str, verdict: str = "true", evidence: str = "ok") -> Claim:
|
|
return Claim(statement_id=sid, verdict=verdict, confidence=0.9, # type: ignore[arg-type]
|
|
evidence=evidence, timestamp_logical=0)
|
|
|
|
|
|
def _intro(name: str = "delta") -> Claim:
|
|
return Claim(statement_id=f"intro:{name}", verdict="unknown", confidence=1.0,
|
|
evidence=f"{name} joining the team", timestamp_logical=0)
|
|
|
|
|
|
def _full_run() -> Mothership:
|
|
"""3 honest + 1 byzantine; equivocation; gossip; alarms emitted;
|
|
final gossip propagates the AlarmClaims to every agent."""
|
|
m = Mothership()
|
|
m.add_agent(MockAgent("a", [[]]))
|
|
m.add_agent(MockAgent("b", [[]]))
|
|
m.add_agent(MockAgent("c", [[]]))
|
|
byz = MockByzantineAgent(
|
|
"d", _intro(),
|
|
scripted_pairs=[(
|
|
_claim("s03", verdict="true", evidence="to_ac"),
|
|
_claim("s03", verdict="false", evidence="to_b"),
|
|
)],
|
|
split_a={"a", "c"},
|
|
split_b={"b"},
|
|
)
|
|
m.open_boundary(byz)
|
|
m.run_until_quiescent()
|
|
# Honest agents emit AlarmClaims based on what they observed.
|
|
# One more gossip round so every honest agent sees all AlarmClaims.
|
|
return m
|
|
|
|
|
|
class TestQuorumThreshold:
|
|
|
|
def test_quorum_formulas(self):
|
|
# ceil(2N/3) — the classic BFT threshold
|
|
assert quorum_for(1) == 1
|
|
assert quorum_for(2) == 2
|
|
assert quorum_for(3) == 2
|
|
assert quorum_for(4) == 3
|
|
assert quorum_for(7) == 5
|
|
assert quorum_for(10) == 7
|
|
|
|
|
|
class TestAlarmClaimRoundtrip:
|
|
|
|
def test_serialize_deserialize(self):
|
|
ac = AlarmClaim(
|
|
accused_process_id_hex="76468f93",
|
|
statement_id="s03",
|
|
witness_digests=("aaaa", "bbbb"),
|
|
emitted_at_step=1,
|
|
)
|
|
roundtrip = AlarmClaim.from_payload(ac.to_payload())
|
|
assert roundtrip == ac
|
|
|
|
def test_from_local_alarm(self):
|
|
la = LocalAlarm(
|
|
detector_name="a",
|
|
detector_process_id_hex="11",
|
|
accused_process_id_hex="22",
|
|
statement_id="s03",
|
|
witness_digests=("aa", "bb"),
|
|
)
|
|
ac = AlarmClaim.from_local_alarm(la, emitted_at_step=5)
|
|
assert ac.accused_process_id_hex == "22"
|
|
assert ac.statement_id == "s03"
|
|
assert ac.witness_digests == ("aa", "bb")
|
|
assert ac.emitted_at_step == 5
|
|
|
|
def test_rejects_non_alarm_payload(self):
|
|
regular_claim = Claim(
|
|
statement_id="s01", verdict="true", confidence=0.9,
|
|
evidence="ok", timestamp_logical=0,
|
|
)
|
|
with pytest.raises(ValueError, match="not an AlarmClaim"):
|
|
AlarmClaim.from_payload(regular_claim.to_payload())
|
|
|
|
|
|
class TestTallyAlarms:
|
|
|
|
def test_collect_alarm_claims_finds_only_alarms(self):
|
|
"""Mixed-payload graphs: alarm claims are picked, regular claims skipped."""
|
|
m = _full_run()
|
|
for name in ("a", "b", "c"):
|
|
collected = collect_alarm_claims(m.agents[name].graph)
|
|
signers = {signer for signer, _ in collected}
|
|
# The 3 honest agents have each emitted exactly one AlarmClaim
|
|
assert len(signers) == 3
|
|
|
|
def test_tally_meets_quorum(self):
|
|
"""3 honest detectors + threshold of 3 (ceil(2*4/3)) ⇒ ratified."""
|
|
m = _full_run()
|
|
# boundary size = 4 (3 honest + 1 byzantine joined)
|
|
threshold = quorum_for(m.boundary.size())
|
|
for name in ("a", "b", "c"):
|
|
ratified = tally_alarms(m.agents[name].graph,
|
|
quorum_threshold=threshold)
|
|
assert len(ratified) == 1
|
|
r = ratified[0]
|
|
assert isinstance(r, RatifiedAlarm)
|
|
assert r.statement_id == "s03"
|
|
assert r.signer_count >= threshold
|
|
assert r.quorum_threshold == threshold
|
|
|
|
def test_tally_blocks_single_signer(self):
|
|
"""A single AlarmClaim cannot ratify on its own."""
|
|
m = _full_run()
|
|
# Force a high quorum (4 of 4): nothing should ratify.
|
|
ratified = tally_alarms(m.agents["a"].graph, quorum_threshold=4)
|
|
assert ratified == []
|
|
|
|
def test_mothership_ratified_alarms_from_helper(self):
|
|
"""The convenience method on the mothership produces the same set
|
|
as direct tallying."""
|
|
m = _full_run()
|
|
threshold = quorum_for(m.boundary.size())
|
|
ratified_via_helper = m.ratified_alarms_from("a")
|
|
ratified_direct = tally_alarms(m.agents["a"].graph,
|
|
quorum_threshold=threshold)
|
|
assert ratified_via_helper == ratified_direct
|