autoresearch-quantum/src/autoresearch_quantum/execution/hardware.py
saymrwulf f9b8f3457f Initial commit: autoresearch-quantum — automated magic-state preparation ratchet
Karpathy-style autoresearch engine for encoded magic-state preparation
on the [[4,2,2]] quantum error-detecting code using Qiskit Aer simulation.

Five-rung progressive search: baseline -> stability -> transfer -> factory -> Rosenfeld.
Smart challenger generation (neighbor walk + random combo + lesson-guided).
Machine-readable lesson feedback with per-dimension effects, interaction detection,
and cross-rung propagation. Factory throughput scoring. Resumable execution.
21 tests, all passing.
2026-04-05 12:37:39 +02:00

121 lines
5.6 KiB
Python

from __future__ import annotations
from statistics import fmean
from ..codes.four_two_two import MEASUREMENT_OPERATORS
from ..experiments.encoded_magic_state import build_circuit_bundle
from ..models import EvaluationMetrics, ExperimentSpec, RungConfig, TierResult
from ..scoring.score import score_metrics
from .analysis import logical_magic_witness, sampler_memory_records, stability_score, summarize_context
from .backends import backend_metadata, resolve_backend
from .transpile import circuit_metadata, count_two_qubit_gates, runtime_estimate, transpile_circuits
try:
from qiskit_ibm_runtime import SamplerV2
except ImportError: # pragma: no cover - exercised only when hardware extras missing
SamplerV2 = None
class IBMHardwareExecutor:
def evaluate(self, spec: ExperimentSpec, rung_config: RungConfig) -> TierResult:
if SamplerV2 is None:
raise RuntimeError(
"qiskit-ibm-runtime is not installed. Install the hardware extra to enable IBM execution."
)
backend_name = rung_config.hardware.backend_name or spec.target_backend
backend = resolve_backend(backend_name, rung_config.hardware)
bundle = build_circuit_bundle(spec)
context_names = ["acceptance", *bundle.witness_circuits.keys()]
raw_circuits = [bundle.acceptance, *bundle.witness_circuits.values()]
transpiled_contexts = transpile_circuits(raw_circuits, spec, backend)
circuits_by_name = dict(zip(context_names, transpiled_contexts, strict=True))
shots = rung_config.tier_policy.expensive_shots
repeats = rung_config.tier_policy.expensive_repeats
sampler = SamplerV2(mode=backend)
aggregate: dict[str, list[dict[str, object]]] = {name: [] for name in context_names}
repeat_scores: list[float] = []
notes: list[str] = []
for _ in range(repeats):
result = sampler.run(list(circuits_by_name.values()), shots=shots).result()
for context_name, pub_result, circuit in zip(
context_names,
result,
circuits_by_name.values(),
strict=True,
):
records = sampler_memory_records(
{
name: bit_array.get_bitstrings()
for name, bit_array in pub_result.data.items()
}
)
summary = summarize_context(
records,
syndrome_labels=list(circuit.metadata.get("syndrome_labels", [])),
postselection=str(circuit.metadata.get("postselection", "none")),
operator=MEASUREMENT_OPERATORS.get(context_name),
)
aggregate[context_name].append(summary)
x_value = float(aggregate["logical_x"][-1]["expectation"])
y_value = float(aggregate["logical_y"][-1]["expectation"])
spectator = float(aggregate["spectator_z"][-1]["expectation"])
acceptance = float(aggregate["acceptance"][-1]["acceptance_rate"])
repeat_scores.append(logical_magic_witness(x_value, y_value, spectator) * acceptance)
acceptance_rate = fmean(float(item["acceptance_rate"]) for item in aggregate["acceptance"])
logical_x = fmean(float(item["expectation"]) for item in aggregate["logical_x"])
logical_y = fmean(float(item["expectation"]) for item in aggregate["logical_y"])
spectator_z = fmean(float(item["expectation"]) for item in aggregate["spectator_z"])
metrics = EvaluationMetrics(
logical_magic_witness=logical_magic_witness(logical_x, logical_y, spectator_z),
acceptance_rate=acceptance_rate,
codespace_rate=fmean(
float(item["acceptance_rate"])
for summaries in aggregate.values()
for item in summaries
),
spectator_logical_z=spectator_z,
logical_x=logical_x,
logical_y=logical_y,
stability_score=stability_score(repeat_scores),
two_qubit_count=sum(count_two_qubit_gates(circuit) for circuit in circuits_by_name.values()),
depth=max(circuit.depth() for circuit in circuits_by_name.values()),
shot_count=shots * repeats * len(circuits_by_name),
runtime_estimate=sum(runtime_estimate(circuit) for circuit in circuits_by_name.values()),
queue_cost_proxy=1.0,
transpile_metadata={
name: circuit_metadata(circuit, spec) for name, circuit in circuits_by_name.items()
},
backend_metadata={"target_backend": backend_metadata(backend)},
)
metrics.dominant_failure_mode = (
"hardware drift sensitivity"
if (metrics.stability_score or 1.0) < 0.75
else "hardware confirmation run"
)
score, quality, _ = score_metrics(metrics, "expensive", rung_config.score)
notes.append(
f"Hardware-tier confirmation used backend {backend.name} with {shots} shots x {repeats} repeats."
)
return TierResult(
tier="expensive",
score=score,
quality_estimate=quality,
metrics=metrics,
counts_summary={
name: {
"mean_acceptance_rate": fmean(float(item["acceptance_rate"]) for item in summaries),
"mean_expectation": fmean(float(item["expectation"]) for item in summaries),
"latest": summaries[-1],
}
for name, summaries in aggregate.items()
},
notes=notes,
)