Block sprint jargon leaks
- SHA
add9a2416eea1fd02b927c78c3ccb234fd34b3e7- Parents
-
d8be404 - Tree
5214717
add9a24
add9a2416eea1fd02b927c78c3ccb234fd34b3e7d8be404
5214717| Status | File | + | - |
|---|---|---|---|
| M |
scripts/pregate.sh
|
43 | 0 |
| M |
src/dlm/export/preflight.py
|
2 | 2 |
| M |
src/dlm/export/weighted_merge.py
|
1 | 1 |
| M |
src/dlm/hardware/refusals.py
|
1 | 1 |
| M |
src/dlm/store/errors.py
|
1 | 1 |
| M |
src/dlm/templates/fetcher.py
|
2 | 2 |
| M |
tests/unit/export/test_preflight.py
|
2 | 1 |
| M |
tests/unit/hardware/test_refusals.py
|
2 | 1 |
| M |
tests/unit/store/test_manifest.py
|
2 | 0 |
| M |
tests/unit/templates/test_cli.py
|
2 | 0 |
scripts/pregate.shmodified@@ -72,6 +72,49 @@ if [[ -n "$scatter" ]]; then | ||
| 72 | 72 | exit 1 |
| 73 | 73 | fi |
| 74 | 74 | |
| 75 | +echo "==> new sprint jargon in src/dlm" | |
| 76 | +# Sprint 39 M4: planning terms like `Sprint 23` or `audit-08` should | |
| 77 | +# not leak into newly added product/runtime strings under src/dlm. | |
| 78 | +# Compare the current tree against the upstream merge-base when one | |
| 79 | +# exists, so committed fixes in the working tree override older | |
| 80 | +# branch-local additions that have not been pushed yet. | |
| 81 | +collect_src_dlm_diff() { | |
| 82 | + local upstream | |
| 83 | + upstream=$(git rev-parse --abbrev-ref --symbolic-full-name '@{upstream}' 2>/dev/null || true) | |
| 84 | + if [[ -n "$upstream" ]]; then | |
| 85 | + local merge_base | |
| 86 | + merge_base=$(git merge-base "$upstream" HEAD 2>/dev/null || true) | |
| 87 | + if [[ -n "$merge_base" ]]; then | |
| 88 | + git diff --unified=0 --no-color "$merge_base" -- 'src/dlm/**' 2>/dev/null || true | |
| 89 | + return | |
| 90 | + fi | |
| 91 | + fi | |
| 92 | + | |
| 93 | + git diff --unified=0 --no-color HEAD -- 'src/dlm/**' 2>/dev/null || true | |
| 94 | +} | |
| 95 | + | |
| 96 | +jargon_hits=$( | |
| 97 | + collect_src_dlm_diff | awk ' | |
| 98 | + /^diff --git / { | |
| 99 | + file = $4 | |
| 100 | + sub("^b/", "", file) | |
| 101 | + next | |
| 102 | + } | |
| 103 | + /^\+\+\+ b\// { | |
| 104 | + file = substr($0, 7) | |
| 105 | + next | |
| 106 | + } | |
| 107 | + /^\+[^+]/ && ($0 ~ /Sprint [0-9]+/ || $0 ~ /audit-[0-9]+/) { | |
| 108 | + print file ":" substr($0, 2) | |
| 109 | + } | |
| 110 | + ' | sort -u | |
| 111 | +) | |
| 112 | +if [[ -n "$jargon_hits" ]]; then | |
| 113 | + echo "$jargon_hits" | |
| 114 | + echo " new Sprint/audit jargon leaked into src/dlm/ — translate it into product or operator language." | |
| 115 | + exit 1 | |
| 116 | +fi | |
| 117 | + | |
| 75 | 118 | echo "==> stale dlm_version pin" |
| 76 | 119 | # Any test that hard-pins a frontmatter version exact-match should use |
| 77 | 120 | # >= so schema bumps don't retroactively break the test. Exact pins are |
src/dlm/export/preflight.pymodified@@ -86,8 +86,8 @@ def check_tokenizer_vocab(adapter_dir: Path) -> int: | ||
| 86 | 86 | probe="tokenizer_vocab", |
| 87 | 87 | detail=( |
| 88 | 88 | f"adapter dir {adapter_dir} is missing tokenizer_config.json. " |
| 89 | - "Sprint 07 bringup writes this at training end; a checkpoint " | |
| 90 | - "predating Sprint 07 can't be exported — re-train." | |
| 89 | + "This checkpoint predates tokenizer metadata capture, so " | |
| 90 | + "export cannot verify vocab size safely — re-train." | |
| 91 | 91 | ), |
| 92 | 92 | ) |
| 93 | 93 | try: |
src/dlm/export/weighted_merge.pymodified@@ -300,7 +300,7 @@ def build_and_stage( # pragma: no cover - heavy path | ||
| 300 | 300 | merge_dir = store.cache_dir_for("_export_merged_" + "_".join(e.name for e in entries)) |
| 301 | 301 | # Copy tokenizer + training_run.json from a source adapter so the |
| 302 | 302 | # downstream preflight (tokenizer_vocab) + shared precision-safety |
| 303 | - # gate both work on the composite (audit-07 B2). | |
| 303 | + # gate both work on the composite artifact. | |
| 304 | 304 | first_source = resolve_first_source_path(store, entries) |
| 305 | 305 | return save_merged_to_tmp( |
| 306 | 306 | merged, |
src/dlm/hardware/refusals.pymodified@@ -130,7 +130,7 @@ def check_multi_gpu_refusals(caps: Capabilities, world_size: int) -> None: | ||
| 130 | 130 | ) |
| 131 | 131 | if caps.backend == Backend.ROCM: |
| 132 | 132 | raise ResolutionError( |
| 133 | - "Multi-GPU training on ROCm is out of scope for Sprint 23; " | |
| 133 | + "Multi-GPU training on ROCm is not supported yet; " | |
| 134 | 134 | "train single-GPU on ROCm or use a CUDA host for multi-GPU runs.", |
| 135 | 135 | ) |
| 136 | 136 | # CUDA path — heterogeneous detection is the caller's responsibility |
src/dlm/store/errors.pymodified@@ -36,7 +36,7 @@ class ManifestVersionError(ManifestCorruptError): | ||
| 36 | 36 | self.expected_version = expected_version |
| 37 | 37 | reason = ( |
| 38 | 38 | f"schema_version {found_version} requires migration to {expected_version} " |
| 39 | - "(Sprint 12b owns the framework)" | |
| 39 | + "before this store can be used" | |
| 40 | 40 | ) |
| 41 | 41 | super().__init__(path, reason) |
| 42 | 42 | |
src/dlm/templates/fetcher.pymodified@@ -58,6 +58,6 @@ def fetch_all( | ||
| 58 | 58 | this always raises `RemoteFetchUnavailable`. |
| 59 | 59 | """ |
| 60 | 60 | raise RemoteFetchUnavailable( |
| 61 | - "remote template gallery fetch is not wired yet — upstream repo " | |
| 62 | - "and signing key are pending (Sprint 27 deferred polish).", | |
| 61 | + "remote template gallery fetch is not available yet — no pinned " | |
| 62 | + "upstream gallery or signing key has been configured.", | |
| 63 | 63 | ) |
tests/unit/export/test_preflight.pymodified@@ -76,8 +76,9 @@ class TestTokenizerVocab: | ||
| 76 | 76 | assert check_tokenizer_vocab(tmp_path) == 5000 |
| 77 | 77 | |
| 78 | 78 | def test_missing_tokenizer_config_raises(self, tmp_path: Path) -> None: |
| 79 | - with pytest.raises(PreflightError, match="tokenizer_config.json"): | |
| 79 | + with pytest.raises(PreflightError, match="tokenizer metadata capture") as exc_info: | |
| 80 | 80 | check_tokenizer_vocab(tmp_path) |
| 81 | + assert "Sprint" not in str(exc_info.value) | |
| 81 | 82 | |
| 82 | 83 | def test_malformed_config_raises(self, tmp_path: Path) -> None: |
| 83 | 84 | (tmp_path / "tokenizer_config.json").write_text("not json {{{") |
tests/unit/hardware/test_refusals.pymodified@@ -129,8 +129,9 @@ class TestMultiGpuRefusals: | ||
| 129 | 129 | ) -> None: |
| 130 | 130 | with ctx(): |
| 131 | 131 | caps = probe() |
| 132 | - with pytest.raises(ResolutionError, match=expected): | |
| 132 | + with pytest.raises(ResolutionError, match=expected) as exc_info: | |
| 133 | 133 | check_multi_gpu_refusals(caps, world_size=2) |
| 134 | + assert "Sprint" not in str(exc_info.value) | |
| 134 | 135 | |
| 135 | 136 | def test_homogeneous_cuda_accepts_unknown_or_matching_sms(self) -> None: |
| 136 | 137 | assert_homogeneous_cuda([(8, 0), (8, 0), None]) |
tests/unit/store/test_manifest.pymodified@@ -195,6 +195,8 @@ class TestCorruptHandling: | ||
| 195 | 195 | load_manifest(path) |
| 196 | 196 | assert exc_info.value.found_version == 999 |
| 197 | 197 | assert exc_info.value.expected_version == CURRENT_MANIFEST_SCHEMA_VERSION |
| 198 | + assert "requires migration to" in str(exc_info.value) | |
| 199 | + assert "Sprint" not in str(exc_info.value) | |
| 198 | 200 | # Still catchable as the parent class: |
| 199 | 201 | assert isinstance(exc_info.value, ManifestCorruptError) |
| 200 | 202 | |
tests/unit/templates/test_cli.pymodified@@ -47,6 +47,8 @@ def test_templates_list_refresh_falls_back_to_bundled() -> None: | ||
| 47 | 47 | # Refresh is currently a no-op that warns + falls back; the bundled |
| 48 | 48 | # list still renders and the exit code is 0. |
| 49 | 49 | assert result.exit_code == 0, result.output |
| 50 | + assert "not available yet" in result.output | |
| 51 | + assert "Sprint" not in result.output | |
| 50 | 52 | assert "coding-tutor" in result.output |
| 51 | 53 | |
| 52 | 54 | |