|
|
__init__.py
|
sway: scaffold standalone subproject (pyproject, LICENSE, README)
|
|
|
|
test_autogen_quality.py
|
tests/autogen: F07 skipped-probes rollup coverage
|
|
|
|
test_backend_api.py
|
tests/unit: backend_api — MockTransport coverage across all three scoring methods, retries, preflight
|
|
|
|
test_backend_dummy.py
|
sway(backends): DummyDifferentialBackend for unit tests
|
|
|
|
test_backend_hf_helpers.py
|
tests/backend_hf_helpers: unit cover _resolve_dtype + _detect_device
|
|
|
|
test_backend_instrumentation.py
|
tests/scoring+instrumentation: new FakeScoring.next_token_dist_batch + cached_batch coverage
|
|
|
|
test_backend_registry.py
|
tests/backend_registry: cover __sway_protocols__ stamp on full + minimal custom backends (B20)
|
|
|
|
test_batched_backend_s23.py
|
tests/batched_backend_s23: probe-level batched-path + results-equivalence + footer coverage
|
|
|
|
test_bootstrap_ci_narrowing.py
|
tests/unit: fix bootstrap-CI-narrowing flake — use hashlib over PYTHONHASHSEED-salted hash()
|
|
|
|
test_cli.py
|
tests/cli: doctor --json schema-shape snapshot (stronger-test #11)
|
|
|
|
test_cli_compare.py
|
tests/unit: sway compare CLI — formats, exit codes, --fail-on-regression gate
|
|
|
|
test_cli_mine.py
|
tests/cli_mine: bump fixture pool size to clear F04 2·top_k floor
|
|
|
|
test_cli_report_html.py
|
tests/unit: sway report --format html --out — file write, missing-plotly, format guards
|
|
|
|
test_cli_serve.py
|
Add sway serve CLI safety tests
|
|
|
|
test_cluster_kl_prove_value.py
|
tests/test_cluster_kl_prove_value: same sklearn-free kmeans stub
|
|
|
|
test_compare.py
|
tests/unit: compare — build_matrix + renderers + regression gate
|
|
|
|
test_compare_prove_value.py
|
tests/unit: prove-the-value — sway compare catches planted regression in committed history
|
|
|
|
test_cross_process_determinism.py
|
tests: subprocess determinism test — pins F08 + stronger-test #12
|
|
|
|
test_cross_verdict_consistency.py
|
tests: cross-verdict consistency — run/gate/junit tally agree (stronger-test #10)
|
|
|
|
test_determinism.py
|
tests/determinism: runner calls seed_everything before probes, populates DeterminismReport
|
|
|
|
test_divergence.py
|
tests/divergence: degenerate uniform TokenDist rejection (stronger-test #9)
|
|
|
|
test_dlm_bridge.py
|
tests: ruff N818/PT018 fixups on the F06 test additions
|
|
|
|
test_dlm_not_imported.py
|
tests/dlm_not_imported: dummy suite + autogen clean-error when dlm absent (C7)
|
|
|
|
test_errors.py
|
sway(core): exception hierarchy
|
|
|
|
test_ext_ppl_vs_calibration_drift.py
|
tests/unit: prove-the-value — diffuse forgetting splits external_perplexity vs calibration_drift verdicts
|
|
|
|
test_golden_comparator.py
|
tests/golden_comparator: update tolerance boundaries for 1e-4 default
|
|
|
|
test_mlx_convert.py
|
tests/mlx_convert: pytest.importorskip safetensors so fast lane (no [hf]) skips cleanly
|
|
|
|
test_model.py
|
tests/model: cover dtype/endpoint/trust_remote_code branches (DC5)
|
|
|
|
test_no_dead_options.py
|
tests/no_dead_options: meta-guard against P14/P15 regression (documented-but-unused)
|
|
|
|
test_null_cache.py
|
probes/null_adapter: on-disk cache keyed by backend identity + calibration params
|
|
|
|
test_null_calibration.py
|
tests/null_calibration: assert runs=1 is flagged degenerate + refused by z_score (F02)
|
|
|
|
test_null_multi_rank.py
|
tests/unit: multi-rank null calibration — rank_scale semantics, z-profile emission, prove-the-value rank saturation
|
|
|
|
test_outlier_miner.py
|
tests/outlier_miner: regress-test small-pool guard + actionable hint (F04)
|
|
|
|
test_pack_unpack.py
|
tests/pack_unpack: 17 unit tests round-tripping spec + dlm_source + golden + every error path (S26 X3-P6)
|
|
|
|
test_paraphrase_miner.py
|
tests: paraphrase_miner — ranker + diversity filter + input validation (S17.5)
|
|
|
|
test_paraphrase_miner_prove_value.py
|
tests: prove-value — mined paraphrases flip memorizing adapter PASS→FAIL (S17.6)
|
|
|
|
test_preflight_check.py
|
backends: PreflightCheckable protocol + finite-check on HF and dummy
|
|
|
|
test_probe_adapter_ablation.py
|
tests/adapter_ablation: probe-level saturation reason coverage (C8, B3 test side)
|
|
|
|
test_probe_adapter_revert.py
|
sway(probes): A2 adapter_revert via sentence embeddings
|
|
|
|
test_probe_base.py
|
tests/probe_base: cover validate_all_probes — multi-error collection + index-label fallback (B7)
|
|
|
|
test_probe_calibration_drift.py
|
tests/calibration_drift: split compound asserts (PT018)
|
|
|
|
test_probe_cluster_kl.py
|
tests/cluster_kl: perturb _dist_broad fixture to clear uniformity guard
|
|
|
|
test_probe_delta_kl.py
|
tests: delta_kl NaN-routes-to-error at both probe and runner levels (B1 regression)
|
|
|
|
test_probe_external_perplexity.py
|
tests/ext_ppl: assert runner threads null_stats even when degenerate (F02)
|
|
|
|
test_probe_gradient_ghost.py
|
probes/gradient_ghost: min-baseline ratio + 17 unit tests covering the verdict ladder (S25 P7)
|
|
|
|
test_probe_leakage.py
|
tests/leakage: cover the 4 new perturbations + expanded fixture canned-responses (B11)
|
|
|
|
test_probe_multi_turn_coherence.py
|
tests/unit: 22 tests for multi_turn_coherence probe + curve-fit math
|
|
|
|
test_probe_paraphrase_invariance.py
|
sway(probes): B2 paraphrase_invariance with intent-aware pass rule
|
|
|
|
test_probe_preference_flip.py
|
tests/preference_flip: cover one-bad-triple and all-fail paths (B14)
|
|
|
|
test_probe_prompt_collapse.py
|
tests/prompt_collapse: cover tokenizer path + legacy fallback + spec opt-out (B13)
|
|
|
|
test_probe_section_internalization.py
|
sway(probes): B1 section_internalization (flagship per-section attribution)
|
|
|
|
test_probe_style_fingerprint.py
|
probes/style_fingerprint: detect zero-fp ft as ERROR; replace cosine with projection (B4)
|
|
|
|
test_probe_tool_use_fidelity.py
|
tests/unit: 40 tests covering tool_use_fidelity probe + parsers + schema check
|
|
|
|
test_probe_training_drift.py
|
tests/unit: 30 tests for training_drift probe + helpers + real-fixture parse
|
|
|
|
test_pytest_plugin.py
|
tests/unit: pytest_plugin via pytester — expansion, verdict routing, gate, error paths, cache reuse
|
|
|
|
test_report_extras_rollup.py
|
tests/report: degenerate null rollup coverage (F02)
|
|
|
|
test_report_formatters.py
|
tests: add D3/D4/D6/D7/D10/D11/D12 coverage — formatters, extras rollup, CLI surfaces
|
|
|
|
test_report_html.py
|
tests/unit: report_html — renderer + panel divs + snapshot + missing-plotly hint
|
|
|
|
test_report_html_offline.py
|
tests/unit: prove-the-value — HTML from committed history fixture loads offline, no external URLs
|
|
|
|
test_report_snapshot.py
|
tests/report_snapshot: fixture probe carries ci_95 — locks F01 path
|
|
|
|
test_result.py
|
sway(core): ProbeResult, SuiteResult, SwayScore, Verdict
|
|
|
|
test_runner_backend_stats.py
|
tests/runner: trace writer ↔ analyzer round-trip regression (F09)
|
|
|
|
test_safe_finalize.py
|
core: safe_finalize helper — non-finite critical fields → Verdict.ERROR
|
|
|
|
test_score_weights_override.py
|
tests/score_weights: spec field validation + CLI parser + composite override
|
|
|
|
test_scoring.py
|
tests/scoring+instrumentation: new FakeScoring.next_token_dist_batch + cached_batch coverage
|
|
|
|
test_sections.py
|
sway(core): Section / SectionProbe / SectionPreference dataclasses
|
|
|
|
test_serve_app.py
|
Apply ruff format
|
|
|
|
test_serve_cache.py
|
Add BackendCache unit tests
|
|
|
|
test_serve_client.py
|
Add ServeClient unit tests
|
|
|
|
test_stats.py
|
tests/unit: fix type-narrowing and unused-import in stats tests
|
|
|
|
test_style_fingerprint_extended.py
|
tests/style_fingerprint: extended fingerprint fallback + extended=on SKIP path
|
|
|
|
test_suite_runner.py
|
probes/null_adapter: per-kind calibration matrix (fixes P02, B2, C9)
|
|
|
|
test_suite_score_report.py
|
rename CLI + source references to sway; keep dlm-sway as the PyPI wheel name
|
|
|
|
test_suite_spec.py
|
tests: cover ModelSpec.adapter normalization (tilde, relative, None) + update yaml-roundtrip assertion (B22)
|
|
|
|
test_trace_analysis.py
|
tests/unit: trace_analysis + trace_cmd CLI coverage (22 tests)
|
|
|
|
test_two_model_differential.py
|
tests/two_model: concurrency flag composition regression (F06)
|
|
|
|
test_visualize.py
|
sway(viz): matplotlib plots for SIS, adapter ablation, KL histogram (viz extra)
|
|
|
|
test_zscore_helpers.py
|
tests/zscore_helpers: degenerate flag rejects valid-floored std (F02)
|
|
|
|
test_zscore_threading.py
|
tests/zscore_threading: reformat
|
|