mirror of
https://github.com/saymrwulf/autoresearch-quantum.git
synced 2026-05-28 22:25:20 +00:00
Quality fixes: - Add deprecation warnings to 5 silent no-op legacy wrappers in assess.py - Remove dead code in tracker.py score_by_section (unused first loop) - Remove unused variable in assess.py _check_order - Fix .gitignore: add progress JSONs, checkpoints, .coverage, .DS_Store, LaTeX aux - Fix "all three plans" → "all four plans" in learning_objectives.md - Add teaching/ package to README project tree - Add compendium to README paper tree Testing: - Add 43 unit tests for teaching/assess.py and tracker.py (quiz, predict_choice, reflect, order, checkpoint_summary, legacy wrapper deprecation warnings, tracker scoring, persistence, mastery calculation) - Add notebook execution test suite (nbclient): all 11 notebooks execute without errors in a fresh kernel, structural validation (valid JSON, has code cells, has assessments, section parameters, learning objectives document) - Overall test count: 185 passing (was 107), coverage: 85% (was ~25% in tests) Toolchain: - Add pytest-cov, ruff, nbclient, nbformat to dev dependencies - Add ruff config (E, F, W, I, UP, B, SIM rules) - Add coverage config with term-missing output - Fix all ruff lint issues across src/ and tests/ (import sorting, unused imports) - Fix Plan D notebook paths (configs/rungs → ../../configs/rungs) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
53 lines
1.8 KiB
Python
53 lines
1.8 KiB
Python
"""Tests for scoring module — edge cases and registry."""
|
|
from __future__ import annotations
|
|
|
|
import pytest
|
|
|
|
from autoresearch_quantum.models import EvaluationMetrics, QualityWeights, ScoreConfig
|
|
from autoresearch_quantum.scoring.score import (
|
|
score_metrics,
|
|
weighted_acceptance_cost,
|
|
)
|
|
|
|
|
|
def test_score_all_zero_weights() -> None:
|
|
metrics = EvaluationMetrics(acceptance_rate=0.5, two_qubit_count=10, depth=20)
|
|
config = ScoreConfig(cheap_quality=QualityWeights()) # all zero weights
|
|
score, quality, cost = weighted_acceptance_cost(metrics, "cheap", config)
|
|
assert quality == 0.0
|
|
assert score == 0.0
|
|
|
|
|
|
def test_score_with_none_metrics() -> None:
|
|
metrics = EvaluationMetrics(acceptance_rate=0.8)
|
|
config = ScoreConfig(
|
|
cheap_quality=QualityWeights(
|
|
ideal_fidelity=1.0,
|
|
noisy_fidelity=1.0,
|
|
),
|
|
)
|
|
# ideal and noisy are None -> skipped
|
|
score, quality, cost = weighted_acceptance_cost(metrics, "cheap", config)
|
|
assert quality == 0.0
|
|
|
|
|
|
def test_score_expensive_tier_uses_expensive_weights() -> None:
|
|
metrics = EvaluationMetrics(
|
|
logical_magic_witness=0.9,
|
|
acceptance_rate=0.8,
|
|
)
|
|
config = ScoreConfig(
|
|
cheap_quality=QualityWeights(logical_witness=0.0), # zero weight
|
|
expensive_quality=QualityWeights(logical_witness=1.0), # full weight
|
|
)
|
|
score_cheap, _, _ = weighted_acceptance_cost(metrics, "cheap", config)
|
|
score_exp, _, _ = weighted_acceptance_cost(metrics, "expensive", config)
|
|
assert score_cheap == 0.0
|
|
assert score_exp > 0.0
|
|
|
|
|
|
def test_unknown_score_function_raises() -> None:
|
|
metrics = EvaluationMetrics()
|
|
config = ScoreConfig(name="nonexistent_scorer")
|
|
with pytest.raises(ValueError, match="Unknown score function"):
|
|
score_metrics(metrics, "cheap", config)
|