autoresearch-quantum/scripts/enhance_nb03.py
saymrwulf 29caba3a1a Add professional toolchain: mypy strict, CI pipeline, Playwright UX tests, pedagogy validation
Infrastructure:
- Configure mypy strict mode in pyproject.toml; fix all 53 type errors across 8 source files
- Add .pre-commit-config.yaml (ruff, mypy, nbstripout, trailing whitespace)
- Add .github/workflows/ci.yml: lint + type check, unit tests (Python 3.11/3.12), notebook execution
- Add scripts/app.sh consumer lifecycle manager (bootstrap, start, stop, status, validate, logs, reset)

Testing:
- Add tests/test_browser_ux.py: Playwright end-to-end UX tests covering JupyterLab launch,
  notebook rendering, navigation links, widget rendering, and full consumer walkthrough
- Add tests/test_pedagogy.py: 130 pedagogical structure tests validating prose quality
  (word counts, markdown ratio), section structure, assessment density and variety,
  Bloom's taxonomy coverage, checkpoint presence, tracker integration, key insight
  callouts, and cross-plan concept consistency

Quality:
- Fix ruff E741 (ambiguous variable name) across all builder scripts
- Add Key Insight callouts to plan_a/01_encoded_magic_state.ipynb
- Add pytest 'browser' marker for selective UX test runs
- Expand .gitignore with .logs/ and build artifacts

319 tests pass, 85% coverage, mypy strict clean, ruff clean.
2026-04-15 20:00:19 +02:00

143 lines
6.4 KiB
Python

"""Widget-based teaching cells for Plan A — Notebook 03: The Ratchet."""
import json
from pathlib import Path
NB_PATH = Path("notebooks/plan_a/03_the_ratchet.ipynb")
nb = json.loads(NB_PATH.read_text())
ORIG = len(nb["cells"])
def md(s):
lines = s.strip().split("\n")
return {"cell_type": "markdown", "metadata": {}, "source": [ln + "\n" for ln in lines[:-1]] + [lines[-1]]}
def code(s):
lines = s.strip().split("\n")
return {"cell_type": "code", "metadata": {}, "source": [ln + "\n" for ln in lines[:-1]] + [lines[-1]], "outputs": [], "execution_count": None}
ins = []
ins.append((1, [code("""from autoresearch_quantum.teaching import LearningTracker
from autoresearch_quantum.teaching.assess import quiz, predict_choice, reflect, order, checkpoint_summary
tracker = LearningTracker("plan_a_03")
print("Learning tracker active.")""")]))
ins.append((3, [
md("""### The ratchet guarantee\n\nThe key property: the incumbent **never gets worse**. A challenger must demonstrably beat the incumbent to replace it."""),
code("""quiz(tracker, "q1_ratchet_guarantee",
question="What is the ratchet guarantee?",
options=[
"Every step improves the score",
"The incumbent never gets worse \\u2014 challengers must beat it to replace it",
"The search space shrinks every step",
"The ratchet always converges to the global optimum",
],
correct=1, section="1. Incumbent-challenger", bloom="remember",
explanation="The ratchet is monotonic: if no challenger beats the incumbent, the incumbent stays. This does NOT guarantee finding the global optimum.")"""),
]))
ins.append((5, [
code("""quiz(tracker, "q2_neighborwalk",
question="How does NeighborWalk generate challengers?",
options=[
"Changes all parameters simultaneously to random values",
"Changes exactly one parameter at a time to each of its other possible values",
"Applies gradient descent to continuous parameters",
],
correct=1, section="2. Challengers", bloom="understand",
explanation="NeighborWalk is single-axis: for each dimension, try every alternative value while keeping all other dimensions fixed.")
checkpoint_summary(tracker, "2. Challengers")"""),
]))
ins.append((9, [
code("""predict_choice(tracker, "q3_challenger_wins",
question="Looking at the bar chart: did any challenger beat the incumbent?",
options=[
"Yes \\u2014 at least one bar is taller than INCUMBENT",
"No \\u2014 the incumbent bar is the tallest",
"Can't tell from a bar chart",
],
correct=0, section="3. Evaluation", bloom="apply",
explanation="In most runs, at least one challenger finds a better configuration.")"""),
]))
ins.append((11, [
code("""quiz(tracker, "q4_no_improvement",
question="What happens if ALL challengers score lower than the incumbent?",
options=[
"The harness picks the best challenger anyway",
"The incumbent stays and the step is logged with zero improvement",
"The harness generates more challengers until one wins",
],
correct=1, section="4. Ratchet step", bloom="understand",
explanation="Monotonic guarantee: if no challenger wins, the incumbent stays. Consecutive no-improvement steps trigger patience.")
checkpoint_summary(tracker, "4. Ratchet step")"""),
]))
ins.append((14, [
code("""reflect(tracker, "q5_lesson_quality",
question="Read the lesson narrative above. What actionable insight does it give? What would make it better?",
section="5. Lesson", bloom="evaluate",
model_answer="A good lesson names specific parameter values that helped/hurt and explains WHY. The machine-readable rules are often more actionable than the narrative.")"""),
]))
ins.append((18, [
md("""### Strategy comparison\n\n- **NeighborWalk**: 1 axis at a time, systematic\n- **RandomCombo**: multiple axes, random\n- **LessonGuided**: rule-biased from previous rungs"""),
code("""order(tracker, "q6_strategy_breadth",
instruction="Rank strategies from narrowest to broadest exploration:",
items=["NeighborWalk", "RandomCombo", "LessonGuided"],
correct_order=["NeighborWalk", "LessonGuided", "RandomCombo"],
section="6. Search strategies", bloom="analyze",
explanation="NeighborWalk: 1 param (narrowest). LessonGuided: focused by rules (medium). RandomCombo: multiple params randomly (broadest).")"""),
]))
ins.append((20, [
code("""quiz(tracker, "q7_fix_vs_avoid",
question="What is the difference between a 'fix' rule and an 'avoid' rule?",
options=[
"'fix' locks a value permanently; 'avoid' removes a value from the search space",
"'fix' repairs a bug; 'avoid' prevents a crash",
"They are synonyms",
],
correct=0, section="7. Lesson-guided", bloom="remember",
explanation="'fix': this value is clearly best, always use it. 'avoid': this value consistently hurts, remove it.")
checkpoint_summary(tracker, "7. Lesson-guided")"""),
]))
ins.append((23, [
code("""quiz(tracker, "q8_propagation",
question="Why does the ratchet propagate the winning spec to the next rung?",
options=[
"To save typing the spec again",
"The winner from rung N is a good starting point for rung N+1, avoiding cold-start",
"Each rung must use the same spec",
],
correct=1, section="8. Cross-rung", bloom="understand",
explanation="Cross-rung propagation transfers knowledge: best settings from one rung become the starting point for the next.")"""),
]))
ins.append((25, [
code("""quiz(tracker, "q9_transfer_quality",
question="When is a transfer score 'good'?",
options=[
"When it is higher than 0",
"When it is close to the original score on the source backend",
"When it is exactly 1.0",
],
correct=1, section="9. Transfer", bloom="evaluate",
explanation="Good transfer means settings work almost as well on the target backend. A large drop means overfitting to the source noise profile.")
checkpoint_summary(tracker, "9. Transfer")"""),
]))
ins.append((26, [
md("---\n## Final Assessment"),
code("""tracker.dashboard()
path = tracker.save()
print(f"\\nProgress saved to: {path}")"""),
]))
for after_idx, cells in reversed(ins):
for i, cell in enumerate(cells):
nb["cells"].insert(after_idx + 1 + i, cell)
NB_PATH.write_text(json.dumps(nb, indent=1, ensure_ascii=False))
print(f"Enhanced notebook 03: {ORIG} -> {len(nb['cells'])} cells")