mirror of
https://github.com/saymrwulf/autoresearch-quantum.git
synced 2026-05-14 20:37:51 +00:00
Infrastructure: - Configure mypy strict mode in pyproject.toml; fix all 53 type errors across 8 source files - Add .pre-commit-config.yaml (ruff, mypy, nbstripout, trailing whitespace) - Add .github/workflows/ci.yml: lint + type check, unit tests (Python 3.11/3.12), notebook execution - Add scripts/app.sh consumer lifecycle manager (bootstrap, start, stop, status, validate, logs, reset) Testing: - Add tests/test_browser_ux.py: Playwright end-to-end UX tests covering JupyterLab launch, notebook rendering, navigation links, widget rendering, and full consumer walkthrough - Add tests/test_pedagogy.py: 130 pedagogical structure tests validating prose quality (word counts, markdown ratio), section structure, assessment density and variety, Bloom's taxonomy coverage, checkpoint presence, tracker integration, key insight callouts, and cross-plan concept consistency Quality: - Fix ruff E741 (ambiguous variable name) across all builder scripts - Add Key Insight callouts to plan_a/01_encoded_magic_state.ipynb - Add pytest 'browser' marker for selective UX test runs - Expand .gitignore with .logs/ and build artifacts 319 tests pass, 85% coverage, mypy strict clean, ruff clean.
288 lines
12 KiB
Python
288 lines
12 KiB
Python
"""Pedagogical structure tests — validates educational quality invariants.
|
|
|
|
These tests enforce minimum standards for notebook prose, assessment density,
|
|
section structure, and learning progression. They catch pedagogical regressions
|
|
the same way unit tests catch code regressions.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
from pathlib import Path
|
|
|
|
import nbformat
|
|
import pytest
|
|
|
|
NOTEBOOK_DIR = Path("notebooks")
|
|
CONTENT_NOTEBOOKS = sorted(
|
|
p for p in NOTEBOOK_DIR.rglob("*.ipynb")
|
|
if p.name != "00_START_HERE.ipynb"
|
|
)
|
|
|
|
|
|
def _notebook_id(path: Path) -> str:
|
|
return str(path.relative_to(NOTEBOOK_DIR)).replace("/", "__").removesuffix(".ipynb")
|
|
|
|
|
|
def _read_notebook(path: Path) -> nbformat.NotebookNode:
|
|
return nbformat.read(str(path), as_version=4)
|
|
|
|
|
|
def _markdown_cells(nb: nbformat.NotebookNode) -> list[str]:
|
|
return ["".join(c.source) for c in nb.cells if c.cell_type == "markdown"]
|
|
|
|
|
|
def _code_cells(nb: nbformat.NotebookNode) -> list[str]:
|
|
return ["".join(c.source) for c in nb.cells if c.cell_type == "code"]
|
|
|
|
|
|
def _word_count(text: str) -> int:
|
|
"""Count words in text, stripping markdown/HTML/LaTeX markup."""
|
|
clean = re.sub(r"<[^>]+>", "", text) # strip HTML
|
|
clean = re.sub(r"\$[^$]+\$", "MATH", clean) # replace inline LaTeX
|
|
clean = re.sub(r"\$\$[^$]+\$\$", "MATH", clean) # block LaTeX
|
|
clean = re.sub(r"[#*_`|>~\-=]", "", clean) # strip markdown chars
|
|
clean = re.sub(r"\[([^\]]*)\]\([^)]*\)", r"\1", clean) # links → text
|
|
return len(clean.split())
|
|
|
|
|
|
# ── Fixtures ──────────────────────────────────────────────────────────
|
|
|
|
@pytest.fixture(params=CONTENT_NOTEBOOKS, ids=[_notebook_id(p) for p in CONTENT_NOTEBOOKS])
|
|
def notebook(request: pytest.FixtureRequest) -> tuple[Path, nbformat.NotebookNode]:
|
|
path = request.param
|
|
return path, _read_notebook(path)
|
|
|
|
|
|
# ── Prose Quality ─────────────────────────────────────────────────────
|
|
|
|
class TestProseQuality:
|
|
"""Every notebook must have sufficient explanatory text."""
|
|
|
|
MIN_TOTAL_WORDS = 200 # minimum words across all markdown cells
|
|
MIN_MARKDOWN_RATIO = 0.25 # at least 25% of cells should be markdown
|
|
|
|
def test_minimum_word_count(self, notebook: tuple[Path, nbformat.NotebookNode]) -> None:
|
|
"""Each notebook has at least MIN_TOTAL_WORDS of prose."""
|
|
path, nb = notebook
|
|
md_cells = _markdown_cells(nb)
|
|
total_words = sum(_word_count(cell) for cell in md_cells)
|
|
assert total_words >= self.MIN_TOTAL_WORDS, (
|
|
f"{path}: only {total_words} words of prose "
|
|
f"(minimum {self.MIN_TOTAL_WORDS})"
|
|
)
|
|
|
|
def test_markdown_to_code_ratio(self, notebook: tuple[Path, nbformat.NotebookNode]) -> None:
|
|
"""Notebooks are not code-only — sufficient markdown explanation exists."""
|
|
path, nb = notebook
|
|
md_count = len([c for c in nb.cells if c.cell_type == "markdown"])
|
|
total = len(nb.cells)
|
|
if total == 0:
|
|
pytest.skip("empty notebook")
|
|
ratio = md_count / total
|
|
assert ratio >= self.MIN_MARKDOWN_RATIO, (
|
|
f"{path}: markdown ratio {ratio:.0%} "
|
|
f"(minimum {self.MIN_MARKDOWN_RATIO:.0%}, "
|
|
f"{md_count} markdown / {total} total cells)"
|
|
)
|
|
|
|
|
|
# ── Section Structure ─────────────────────────────────────────────────
|
|
|
|
class TestSectionStructure:
|
|
"""Notebooks must have clear sectional organization."""
|
|
|
|
def test_has_title_header(self, notebook: tuple[Path, nbformat.NotebookNode]) -> None:
|
|
"""First cell is a markdown cell with a level-1 or level-2 heading."""
|
|
path, nb = notebook
|
|
first = nb.cells[0]
|
|
assert first.cell_type == "markdown", (
|
|
f"{path}: first cell is {first.cell_type}, expected markdown header"
|
|
)
|
|
src = "".join(first.source)
|
|
assert re.match(r"^#{1,2}\s", src), (
|
|
f"{path}: first cell doesn't start with # or ## heading"
|
|
)
|
|
|
|
def test_has_multiple_sections(self, notebook: tuple[Path, nbformat.NotebookNode]) -> None:
|
|
"""Notebook has at least 2 section headers (## or ###)."""
|
|
path, nb = notebook
|
|
md_text = "\n".join(_markdown_cells(nb))
|
|
sections = re.findall(r"^#{2,3}\s", md_text, re.MULTILINE)
|
|
assert len(sections) >= 2, (
|
|
f"{path}: only {len(sections)} section headers found (minimum 2)"
|
|
)
|
|
|
|
|
|
# ── Assessment Density ────────────────────────────────────────────────
|
|
|
|
ASSESSMENT_PATTERN = re.compile(r"(quiz|predict_choice|reflect|order)\s*\(")
|
|
|
|
|
|
class TestAssessmentDensity:
|
|
"""Notebooks must have sufficient interactive assessments."""
|
|
|
|
MIN_ASSESSMENTS = 2 # at least 2 assessment calls per notebook
|
|
|
|
def test_minimum_assessment_count(self, notebook: tuple[Path, nbformat.NotebookNode]) -> None:
|
|
"""Each notebook has at least MIN_ASSESSMENTS interactive assessments."""
|
|
path, nb = notebook
|
|
code = "\n".join(_code_cells(nb))
|
|
# Exclude the LearningTracker import/setup line
|
|
code_no_setup = "\n".join(
|
|
line for line in code.split("\n")
|
|
if "LearningTracker" not in line
|
|
)
|
|
matches = ASSESSMENT_PATTERN.findall(code_no_setup)
|
|
assert len(matches) >= self.MIN_ASSESSMENTS, (
|
|
f"{path}: only {len(matches)} assessments "
|
|
f"(minimum {self.MIN_ASSESSMENTS})"
|
|
)
|
|
|
|
def test_assessment_variety(self, notebook: tuple[Path, nbformat.NotebookNode]) -> None:
|
|
"""Each notebook uses at least 2 different assessment types."""
|
|
path, nb = notebook
|
|
code = "\n".join(_code_cells(nb))
|
|
code_no_setup = "\n".join(
|
|
line for line in code.split("\n")
|
|
if "LearningTracker" not in line
|
|
)
|
|
types_found = set(ASSESSMENT_PATTERN.findall(code_no_setup))
|
|
assert len(types_found) >= 2, (
|
|
f"{path}: only {len(types_found)} assessment type(s) "
|
|
f"({types_found}), minimum 2 for variety"
|
|
)
|
|
|
|
|
|
# ── Bloom's Taxonomy Coverage ─────────────────────────────────────────
|
|
|
|
BLOOM_PATTERN = re.compile(r'bloom\s*=\s*["\'](\w+)["\']')
|
|
|
|
|
|
class TestBloomCoverage:
|
|
"""Notebooks should exercise multiple Bloom's taxonomy levels."""
|
|
|
|
def test_bloom_levels_used(self, notebook: tuple[Path, nbformat.NotebookNode]) -> None:
|
|
"""Each notebook exercises at least 2 Bloom's taxonomy levels."""
|
|
path, nb = notebook
|
|
code = "\n".join(_code_cells(nb))
|
|
blooms = set(BLOOM_PATTERN.findall(code))
|
|
if not blooms:
|
|
pytest.skip("no bloom= parameters found")
|
|
assert len(blooms) >= 2, (
|
|
f"{path}: only {len(blooms)} Bloom level(s) ({blooms}), "
|
|
f"minimum 2 for cognitive depth"
|
|
)
|
|
|
|
|
|
# ── Checkpoint Coverage ───────────────────────────────────────────────
|
|
|
|
class TestCheckpointCoverage:
|
|
"""Notebooks with many assessments should include checkpoint summaries."""
|
|
|
|
MIN_ASSESSMENTS_FOR_CHECKPOINT = 4
|
|
|
|
def test_checkpoint_present_when_needed(
|
|
self, notebook: tuple[Path, nbformat.NotebookNode],
|
|
) -> None:
|
|
"""Notebooks with 4+ assessments should include checkpoint_summary calls."""
|
|
path, nb = notebook
|
|
code = "\n".join(_code_cells(nb))
|
|
assessment_count = len(ASSESSMENT_PATTERN.findall(code))
|
|
if assessment_count < self.MIN_ASSESSMENTS_FOR_CHECKPOINT:
|
|
pytest.skip(f"only {assessment_count} assessments (threshold: {self.MIN_ASSESSMENTS_FOR_CHECKPOINT})")
|
|
has_checkpoint = "checkpoint_summary" in code
|
|
assert has_checkpoint, (
|
|
f"{path}: {assessment_count} assessments but no checkpoint_summary call"
|
|
)
|
|
|
|
|
|
# ── Learning Tracker Integration ──────────────────────────────────────
|
|
|
|
class TestTrackerIntegration:
|
|
"""Every content notebook must integrate the learning tracker."""
|
|
|
|
def test_tracker_initialization(self, notebook: tuple[Path, nbformat.NotebookNode]) -> None:
|
|
"""Each notebook creates a LearningTracker instance."""
|
|
path, nb = notebook
|
|
code = "\n".join(_code_cells(nb))
|
|
assert "LearningTracker" in code, (
|
|
f"{path}: no LearningTracker initialization found"
|
|
)
|
|
|
|
def test_tracker_dashboard_at_end(self, notebook: tuple[Path, nbformat.NotebookNode]) -> None:
|
|
"""Each notebook calls tracker.dashboard() near the end."""
|
|
path, nb = notebook
|
|
code_cells = _code_cells(nb)
|
|
if not code_cells:
|
|
pytest.skip("no code cells")
|
|
# Check last 3 code cells for dashboard call
|
|
tail = "\n".join(code_cells[-3:])
|
|
assert "dashboard()" in tail, (
|
|
f"{path}: no tracker.dashboard() call in final code cells"
|
|
)
|
|
|
|
def test_tracker_save_at_end(self, notebook: tuple[Path, nbformat.NotebookNode]) -> None:
|
|
"""Each notebook saves tracker progress near the end."""
|
|
path, nb = notebook
|
|
code_cells = _code_cells(nb)
|
|
if not code_cells:
|
|
pytest.skip("no code cells")
|
|
tail = "\n".join(code_cells[-3:])
|
|
assert "save()" in tail, (
|
|
f"{path}: no tracker.save() call in final code cells"
|
|
)
|
|
|
|
|
|
# ── Key Insight Pattern ───────────────────────────────────────────────
|
|
|
|
class TestKeyInsights:
|
|
"""Notebooks should have 'Key Insight' callouts for important takeaways."""
|
|
|
|
# Interactive dashboards and short notebooks are exempt
|
|
EXEMPT = {"00_dashboard.ipynb"}
|
|
|
|
def test_has_key_insights(self, notebook: tuple[Path, nbformat.NotebookNode]) -> None:
|
|
"""Notebooks with 5+ sections should have at least one Key Insight callout."""
|
|
path, nb = notebook
|
|
if path.name in self.EXEMPT:
|
|
pytest.skip("interactive dashboard — exempt from insight callouts")
|
|
md_text = "\n".join(_markdown_cells(nb))
|
|
sections = re.findall(r"^#{2,3}\s", md_text, re.MULTILINE)
|
|
if len(sections) < 5:
|
|
pytest.skip(f"only {len(sections)} sections (threshold: 5)")
|
|
has_insight = bool(
|
|
re.search(
|
|
r"key insight|observe:|key fact|result:|proof summary|important|tip:",
|
|
md_text, re.IGNORECASE,
|
|
)
|
|
)
|
|
assert has_insight, (
|
|
f"{path}: {len(sections)} sections but no 'Key Insight' callout"
|
|
)
|
|
|
|
|
|
# ── Cross-Plan Consistency ────────────────────────────────────────────
|
|
|
|
class TestCrossPlanConsistency:
|
|
"""All four plans should cover core concepts."""
|
|
|
|
CORE_CONCEPTS = ["stabiliz", "magic", "witness", "ratchet"]
|
|
|
|
def test_all_plans_cover_core_concepts(self) -> None:
|
|
"""Each plan's notebooks collectively mention all core concepts."""
|
|
plans = {
|
|
"plan_a": sorted(NOTEBOOK_DIR.glob("plan_a/*.ipynb")),
|
|
"plan_b": sorted(NOTEBOOK_DIR.glob("plan_b/*.ipynb")),
|
|
"plan_c": sorted(NOTEBOOK_DIR.glob("plan_c/*.ipynb")),
|
|
"plan_d": sorted(NOTEBOOK_DIR.glob("plan_d/*.ipynb")),
|
|
}
|
|
for plan_name, notebooks in plans.items():
|
|
all_text = ""
|
|
for nb_path in notebooks:
|
|
nb = _read_notebook(nb_path)
|
|
all_text += "\n".join(_markdown_cells(nb) + _code_cells(nb))
|
|
all_text_lower = all_text.lower()
|
|
for concept in self.CORE_CONCEPTS:
|
|
assert concept in all_text_lower, (
|
|
f"{plan_name}: core concept '{concept}' not found in any notebook"
|
|
)
|