test(pack): unit matrix — format validation, integrity, migrations, packer F21, unpacker gates (14)
- SHA
979a1f3c73c89845f0490a1e42a8dc725d002a5a- Parents
-
e24f07b - Tree
1a818df
979a1f3
979a1f3c73c89845f0490a1e42a8dc725d002a5ae24f07b
1a818df| Status | File | + | - |
|---|---|---|---|
| A |
tests/unit/pack/__init__.py
|
0 | 0 |
| A |
tests/unit/pack/test_format.py
|
86 | 0 |
| A |
tests/unit/pack/test_integrity.py
|
124 | 0 |
| A |
tests/unit/pack/test_migrations.py
|
101 | 0 |
| A |
tests/unit/pack/test_packer.py
|
171 | 0 |
| A |
tests/unit/pack/test_unpacker.py
|
221 | 0 |
tests/unit/pack/__init__.pyaddedtests/unit/pack/test_format.pyadded@@ -0,0 +1,86 @@ | ||
| 1 | +"""`PackHeader` + `PackManifest` validation (Sprint 14).""" | |
| 2 | + | |
| 3 | +from __future__ import annotations | |
| 4 | + | |
| 5 | +from datetime import datetime | |
| 6 | + | |
| 7 | +import pytest | |
| 8 | + | |
| 9 | +from dlm.pack.format import CURRENT_PACK_FORMAT_VERSION, PackHeader, PackManifest | |
| 10 | + | |
| 11 | + | |
| 12 | +def _valid_header_kwargs() -> dict[str, object]: | |
| 13 | + return { | |
| 14 | + "pack_format_version": 1, | |
| 15 | + "created_at": datetime(2026, 4, 19, 12, 0, 0), | |
| 16 | + "tool_version": "0.1.0", | |
| 17 | + "content_type": "minimal", | |
| 18 | + "platform_hint": "linux", | |
| 19 | + } | |
| 20 | + | |
| 21 | + | |
| 22 | +class TestPackHeader: | |
| 23 | + def test_minimal_accepted(self) -> None: | |
| 24 | + h = PackHeader.model_validate(_valid_header_kwargs()) | |
| 25 | + assert h.pack_format_version == 1 | |
| 26 | + assert h.licensee_acceptance_url is None | |
| 27 | + | |
| 28 | + def test_version_must_be_positive(self) -> None: | |
| 29 | + kwargs = _valid_header_kwargs() | {"pack_format_version": 0} | |
| 30 | + with pytest.raises(ValueError): | |
| 31 | + PackHeader.model_validate(kwargs) | |
| 32 | + | |
| 33 | + def test_content_type_must_match_literal(self) -> None: | |
| 34 | + kwargs = _valid_header_kwargs() | {"content_type": "mystery"} | |
| 35 | + with pytest.raises(ValueError): | |
| 36 | + PackHeader.model_validate(kwargs) | |
| 37 | + | |
| 38 | + def test_extra_forbidden(self) -> None: | |
| 39 | + kwargs = _valid_header_kwargs() | {"new_field": "oops"} | |
| 40 | + with pytest.raises(ValueError): | |
| 41 | + PackHeader.model_validate(kwargs) | |
| 42 | + | |
| 43 | + def test_licensee_url_round_trips(self) -> None: | |
| 44 | + kwargs = _valid_header_kwargs() | {"licensee_acceptance_url": "https://example.com/accept"} | |
| 45 | + h = PackHeader.model_validate(kwargs) | |
| 46 | + assert h.licensee_acceptance_url == "https://example.com/accept" | |
| 47 | + | |
| 48 | + def test_frozen_forbids_mutation(self) -> None: | |
| 49 | + h = PackHeader.model_validate(_valid_header_kwargs()) | |
| 50 | + with pytest.raises((ValueError, TypeError)): | |
| 51 | + h.tool_version = "0.2.0" # type: ignore[misc] | |
| 52 | + | |
| 53 | + def test_current_version_present(self) -> None: | |
| 54 | + assert CURRENT_PACK_FORMAT_VERSION >= 1 | |
| 55 | + | |
| 56 | + | |
| 57 | +class TestPackManifest: | |
| 58 | + def test_minimal_accepted(self) -> None: | |
| 59 | + m = PackManifest( | |
| 60 | + dlm_id="01TEST", | |
| 61 | + base_model="smollm2-135m", | |
| 62 | + adapter_version=0, | |
| 63 | + entries={}, | |
| 64 | + content_sha256="0" * 64, | |
| 65 | + ) | |
| 66 | + assert m.adapter_version == 0 | |
| 67 | + | |
| 68 | + def test_content_sha256_must_be_64_hex(self) -> None: | |
| 69 | + with pytest.raises(ValueError): | |
| 70 | + PackManifest( | |
| 71 | + dlm_id="x", | |
| 72 | + base_model="x", | |
| 73 | + adapter_version=0, | |
| 74 | + entries={}, | |
| 75 | + content_sha256="too-short", | |
| 76 | + ) | |
| 77 | + | |
| 78 | + def test_adapter_version_non_negative(self) -> None: | |
| 79 | + with pytest.raises(ValueError): | |
| 80 | + PackManifest( | |
| 81 | + dlm_id="x", | |
| 82 | + base_model="x", | |
| 83 | + adapter_version=-1, | |
| 84 | + entries={}, | |
| 85 | + content_sha256="0" * 64, | |
| 86 | + ) | |
tests/unit/pack/test_integrity.pyadded@@ -0,0 +1,124 @@ | ||
| 1 | +"""`write_checksums` / `verify_checksums` / rollup_sha256 (Sprint 14).""" | |
| 2 | + | |
| 3 | +from __future__ import annotations | |
| 4 | + | |
| 5 | +import hashlib | |
| 6 | +from pathlib import Path | |
| 7 | + | |
| 8 | +import pytest | |
| 9 | + | |
| 10 | +from dlm.pack.errors import PackIntegrityError | |
| 11 | +from dlm.pack.integrity import ( | |
| 12 | + read_checksums, | |
| 13 | + rollup_sha256, | |
| 14 | + sha256_of_file, | |
| 15 | + verify_checksums, | |
| 16 | + write_checksums, | |
| 17 | +) | |
| 18 | + | |
| 19 | + | |
| 20 | +def _scratch_tree(tmp_path: Path) -> Path: | |
| 21 | + root = tmp_path / "tree" | |
| 22 | + root.mkdir() | |
| 23 | + (root / "a.txt").write_text("alpha\n") | |
| 24 | + (root / "b.txt").write_text("beta\n") | |
| 25 | + (root / "sub").mkdir() | |
| 26 | + (root / "sub" / "c.txt").write_text("gamma\n") | |
| 27 | + return root | |
| 28 | + | |
| 29 | + | |
| 30 | +class TestWriteChecksums: | |
| 31 | + def test_covers_all_regular_files(self, tmp_path: Path) -> None: | |
| 32 | + root = _scratch_tree(tmp_path) | |
| 33 | + checksums = write_checksums(root) | |
| 34 | + assert set(checksums) == {"a.txt", "b.txt", "sub/c.txt"} | |
| 35 | + assert (root / "CHECKSUMS.sha256").is_file() | |
| 36 | + | |
| 37 | + def test_hashes_match_hashlib(self, tmp_path: Path) -> None: | |
| 38 | + root = _scratch_tree(tmp_path) | |
| 39 | + checksums = write_checksums(root) | |
| 40 | + expected = hashlib.sha256((root / "a.txt").read_bytes()).hexdigest() | |
| 41 | + assert checksums["a.txt"] == expected | |
| 42 | + | |
| 43 | + def test_sorted_lines(self, tmp_path: Path) -> None: | |
| 44 | + root = _scratch_tree(tmp_path) | |
| 45 | + write_checksums(root) | |
| 46 | + lines = (root / "CHECKSUMS.sha256").read_text().splitlines() | |
| 47 | + relpaths = [line.split(" ", 1)[1] for line in lines] | |
| 48 | + assert relpaths == sorted(relpaths) | |
| 49 | + | |
| 50 | + def test_exclude_skips_listed_paths(self, tmp_path: Path) -> None: | |
| 51 | + root = _scratch_tree(tmp_path) | |
| 52 | + checksums = write_checksums(root, exclude=["a.txt"]) | |
| 53 | + assert "a.txt" not in checksums | |
| 54 | + | |
| 55 | + def test_self_always_excluded(self, tmp_path: Path) -> None: | |
| 56 | + """`CHECKSUMS.sha256` itself must never appear in the output.""" | |
| 57 | + root = _scratch_tree(tmp_path) | |
| 58 | + checksums = write_checksums(root) | |
| 59 | + assert "CHECKSUMS.sha256" not in checksums | |
| 60 | + | |
| 61 | + | |
| 62 | +class TestReadChecksums: | |
| 63 | + def test_missing_file_raises(self, tmp_path: Path) -> None: | |
| 64 | + with pytest.raises(PackIntegrityError): | |
| 65 | + read_checksums(tmp_path) | |
| 66 | + | |
| 67 | + def test_malformed_line_raises(self, tmp_path: Path) -> None: | |
| 68 | + (tmp_path / "CHECKSUMS.sha256").write_text("not a valid line\n") | |
| 69 | + with pytest.raises(PackIntegrityError): | |
| 70 | + read_checksums(tmp_path) | |
| 71 | + | |
| 72 | + def test_roundtrip(self, tmp_path: Path) -> None: | |
| 73 | + root = _scratch_tree(tmp_path) | |
| 74 | + original = write_checksums(root) | |
| 75 | + parsed = read_checksums(root) | |
| 76 | + assert parsed == original | |
| 77 | + | |
| 78 | + | |
| 79 | +class TestVerifyChecksums: | |
| 80 | + def test_clean_tree_passes(self, tmp_path: Path) -> None: | |
| 81 | + root = _scratch_tree(tmp_path) | |
| 82 | + write_checksums(root) | |
| 83 | + verify_checksums(root) # no raise | |
| 84 | + | |
| 85 | + def test_corrupted_file_raises(self, tmp_path: Path) -> None: | |
| 86 | + root = _scratch_tree(tmp_path) | |
| 87 | + write_checksums(root) | |
| 88 | + (root / "a.txt").write_text("tampered!\n") | |
| 89 | + with pytest.raises(PackIntegrityError) as excinfo: | |
| 90 | + verify_checksums(root) | |
| 91 | + assert excinfo.value.relpath == "a.txt" | |
| 92 | + | |
| 93 | + def test_missing_file_raises(self, tmp_path: Path) -> None: | |
| 94 | + root = _scratch_tree(tmp_path) | |
| 95 | + write_checksums(root) | |
| 96 | + (root / "b.txt").unlink() | |
| 97 | + with pytest.raises(PackIntegrityError) as excinfo: | |
| 98 | + verify_checksums(root) | |
| 99 | + assert excinfo.value.relpath == "b.txt" | |
| 100 | + | |
| 101 | + | |
| 102 | +class TestRollup: | |
| 103 | + def test_deterministic(self, tmp_path: Path) -> None: | |
| 104 | + checksums = {"a": "1" * 64, "b": "2" * 64} | |
| 105 | + assert rollup_sha256(checksums) == rollup_sha256(dict(checksums)) | |
| 106 | + | |
| 107 | + def test_order_independent(self) -> None: | |
| 108 | + a = rollup_sha256({"a": "1" * 64, "b": "2" * 64}) | |
| 109 | + b = rollup_sha256({"b": "2" * 64, "a": "1" * 64}) | |
| 110 | + assert a == b | |
| 111 | + | |
| 112 | + def test_different_inputs_differ(self) -> None: | |
| 113 | + a = rollup_sha256({"a": "1" * 64}) | |
| 114 | + b = rollup_sha256({"a": "2" * 64}) | |
| 115 | + assert a != b | |
| 116 | + | |
| 117 | + | |
| 118 | +class TestSha256OfFile: | |
| 119 | + def test_chunk_boundaries(self, tmp_path: Path) -> None: | |
| 120 | + """File larger than the 1MB chunk size hashes correctly.""" | |
| 121 | + big = tmp_path / "big.bin" | |
| 122 | + big.write_bytes(b"x" * (2 * 1024 * 1024 + 7)) | |
| 123 | + expected = hashlib.sha256(big.read_bytes()).hexdigest() | |
| 124 | + assert sha256_of_file(big) == expected | |
tests/unit/pack/test_migrations.pyadded@@ -0,0 +1,101 @@ | ||
| 1 | +"""Pack-migration registry + dispatcher + coverage enforcement (Sprint 14).""" | |
| 2 | + | |
| 3 | +from __future__ import annotations | |
| 4 | + | |
| 5 | +from collections.abc import Iterator | |
| 6 | +from pathlib import Path | |
| 7 | + | |
| 8 | +import pytest | |
| 9 | + | |
| 10 | +from dlm.pack.errors import PackFormatVersionError | |
| 11 | +from dlm.pack.format import CURRENT_PACK_FORMAT_VERSION | |
| 12 | +from dlm.pack.migrations import PACK_MIGRATORS, register | |
| 13 | +from dlm.pack.migrations.dispatch import apply_pending | |
| 14 | + | |
| 15 | + | |
| 16 | +@pytest.fixture | |
| 17 | +def scratch_registry() -> Iterator[None]: | |
| 18 | + """Snapshot + restore PACK_MIGRATORS around mutative tests.""" | |
| 19 | + saved = dict(PACK_MIGRATORS) | |
| 20 | + try: | |
| 21 | + PACK_MIGRATORS.clear() | |
| 22 | + yield | |
| 23 | + finally: | |
| 24 | + PACK_MIGRATORS.clear() | |
| 25 | + PACK_MIGRATORS.update(saved) | |
| 26 | + | |
| 27 | + | |
| 28 | +class TestRegister: | |
| 29 | + def test_duplicate_raises(self, scratch_registry: None) -> None: | |
| 30 | + @register(from_version=1) | |
| 31 | + def _a(root: Path) -> Path: | |
| 32 | + return root | |
| 33 | + | |
| 34 | + with pytest.raises(AssertionError, match="duplicate pack migrator"): | |
| 35 | + | |
| 36 | + @register(from_version=1) | |
| 37 | + def _b(root: Path) -> Path: | |
| 38 | + return root | |
| 39 | + | |
| 40 | + | |
| 41 | +class TestApplyPending: | |
| 42 | + def test_already_current_is_noop(self, scratch_registry: None, tmp_path: Path) -> None: | |
| 43 | + root, applied = apply_pending(tmp_path, from_version=CURRENT_PACK_FORMAT_VERSION) | |
| 44 | + assert root == tmp_path | |
| 45 | + assert applied == [] | |
| 46 | + | |
| 47 | + def test_newer_than_current_raises(self, scratch_registry: None, tmp_path: Path) -> None: | |
| 48 | + with pytest.raises(PackFormatVersionError): | |
| 49 | + apply_pending(tmp_path, from_version=CURRENT_PACK_FORMAT_VERSION + 1) | |
| 50 | + | |
| 51 | + def test_chain_runs(self, scratch_registry: None, tmp_path: Path) -> None: | |
| 52 | + calls: list[int] = [] | |
| 53 | + | |
| 54 | + @register(from_version=1) | |
| 55 | + def _v1_to_v2(root: Path) -> Path: | |
| 56 | + calls.append(1) | |
| 57 | + return root | |
| 58 | + | |
| 59 | + @register(from_version=2) | |
| 60 | + def _v2_to_v3(root: Path) -> Path: | |
| 61 | + calls.append(2) | |
| 62 | + return root | |
| 63 | + | |
| 64 | + # Simulate CURRENT=3 by targeting directly. | |
| 65 | + # (We can't mutate CURRENT safely; just verify the chain with | |
| 66 | + # a manual loop equivalent.) | |
| 67 | + # Use apply_pending with a temporary bump via monkeypatch. | |
| 68 | + import dlm.pack.migrations.dispatch as dispatch_module | |
| 69 | + | |
| 70 | + original = dispatch_module.CURRENT_PACK_FORMAT_VERSION | |
| 71 | + dispatch_module.CURRENT_PACK_FORMAT_VERSION = 3 | |
| 72 | + try: | |
| 73 | + apply_pending(tmp_path, from_version=1) | |
| 74 | + finally: | |
| 75 | + dispatch_module.CURRENT_PACK_FORMAT_VERSION = original | |
| 76 | + assert calls == [1, 2] | |
| 77 | + | |
| 78 | + def test_gap_in_registry_raises(self, scratch_registry: None, tmp_path: Path) -> None: | |
| 79 | + # CURRENT=2 but no v1 migrator registered → gap. | |
| 80 | + import dlm.pack.migrations.dispatch as dispatch_module | |
| 81 | + | |
| 82 | + original = dispatch_module.CURRENT_PACK_FORMAT_VERSION | |
| 83 | + dispatch_module.CURRENT_PACK_FORMAT_VERSION = 2 | |
| 84 | + try: | |
| 85 | + with pytest.raises(PackFormatVersionError): | |
| 86 | + apply_pending(tmp_path, from_version=1) | |
| 87 | + finally: | |
| 88 | + dispatch_module.CURRENT_PACK_FORMAT_VERSION = original | |
| 89 | + | |
| 90 | + | |
| 91 | +class TestCoverageEnforcement: | |
| 92 | + """A PR that bumps `CURRENT_PACK_FORMAT_VERSION` without registering a | |
| 93 | + migrator fails this test. At v1 the expected range is empty.""" | |
| 94 | + | |
| 95 | + def test_migrators_span_required_range(self) -> None: | |
| 96 | + expected = set(range(1, CURRENT_PACK_FORMAT_VERSION)) | |
| 97 | + assert set(PACK_MIGRATORS.keys()) == expected, ( | |
| 98 | + f"PACK_MIGRATORS keys {sorted(PACK_MIGRATORS)!r} do not match " | |
| 99 | + f"expected range [1, {CURRENT_PACK_FORMAT_VERSION}). Register a " | |
| 100 | + "migrator under src/dlm/pack/migrations/ when bumping the version." | |
| 101 | + ) | |
tests/unit/pack/test_packer.pyadded@@ -0,0 +1,171 @@ | ||
| 1 | +"""`dlm.pack.packer` — pack creation + content-type labelling + F21 gate (Sprint 14).""" | |
| 2 | + | |
| 3 | +from __future__ import annotations | |
| 4 | + | |
| 5 | +import tarfile | |
| 6 | +from pathlib import Path | |
| 7 | +from typing import Any | |
| 8 | + | |
| 9 | +import pytest | |
| 10 | +from typer.testing import CliRunner | |
| 11 | + | |
| 12 | +from dlm.cli.app import app | |
| 13 | +from dlm.pack.errors import BaseLicenseRefusedError | |
| 14 | +from dlm.pack.packer import pack | |
| 15 | + | |
| 16 | + | |
| 17 | +def _scaffold_doc_and_store(tmp_path: Path, *, base: str = "smollm2-135m") -> Path: | |
| 18 | + """Create a .dlm + its ensure_layout'd store with a valid manifest.""" | |
| 19 | + from dlm.doc.parser import parse_file | |
| 20 | + from dlm.store.manifest import Manifest, save_manifest | |
| 21 | + from dlm.store.paths import for_dlm | |
| 22 | + | |
| 23 | + runner = CliRunner() | |
| 24 | + doc = tmp_path / "doc.dlm" | |
| 25 | + # Gated bases (e.g. llama-3.2) require --i-accept-license in non-interactive runs. | |
| 26 | + result = runner.invoke( | |
| 27 | + app, | |
| 28 | + [ | |
| 29 | + "--home", | |
| 30 | + str(tmp_path / "dlm-home"), | |
| 31 | + "init", | |
| 32 | + str(doc), | |
| 33 | + "--base", | |
| 34 | + base, | |
| 35 | + "--i-accept-license", | |
| 36 | + ], | |
| 37 | + ) | |
| 38 | + assert result.exit_code == 0, result.output | |
| 39 | + | |
| 40 | + parsed = parse_file(doc) | |
| 41 | + import os | |
| 42 | + | |
| 43 | + os.environ["DLM_HOME"] = str(tmp_path / "dlm-home") | |
| 44 | + store = for_dlm(parsed.frontmatter.dlm_id) | |
| 45 | + store.ensure_layout() | |
| 46 | + save_manifest(store.manifest, Manifest(dlm_id=parsed.frontmatter.dlm_id, base_model=base)) | |
| 47 | + return doc | |
| 48 | + | |
| 49 | + | |
| 50 | +def _tar_members(pack_path: Path) -> list[tuple[str, int]]: | |
| 51 | + import zstandard as zstd | |
| 52 | + | |
| 53 | + dctx = zstd.ZstdDecompressor() | |
| 54 | + with pack_path.open("rb") as fh, dctx.stream_reader(fh) as reader: | |
| 55 | + with tarfile.open(fileobj=reader, mode="r|") as tar: | |
| 56 | + return [(m.name, m.size) for m in tar] | |
| 57 | + | |
| 58 | + | |
| 59 | +class TestPackShape: | |
| 60 | + def test_contains_required_entries(self, tmp_path: Path) -> None: | |
| 61 | + doc = _scaffold_doc_and_store(tmp_path) | |
| 62 | + result = pack(doc) | |
| 63 | + | |
| 64 | + names = {name for name, _size in _tar_members(result.path)} | |
| 65 | + assert "PACK_HEADER.json" in names | |
| 66 | + assert "manifest.json" in names | |
| 67 | + assert "CHECKSUMS.sha256" in names | |
| 68 | + assert f"dlm/{doc.name}" in names | |
| 69 | + assert "store/manifest.json" in names | |
| 70 | + | |
| 71 | + def test_default_out_path_next_to_dlm(self, tmp_path: Path) -> None: | |
| 72 | + doc = _scaffold_doc_and_store(tmp_path) | |
| 73 | + result = pack(doc) | |
| 74 | + assert result.path == doc.with_suffix(doc.suffix + ".pack") | |
| 75 | + assert result.path.is_file() | |
| 76 | + | |
| 77 | + def test_explicit_out_path(self, tmp_path: Path) -> None: | |
| 78 | + doc = _scaffold_doc_and_store(tmp_path) | |
| 79 | + custom = tmp_path / "sub" / "custom.pack" | |
| 80 | + result = pack(doc, out=custom) | |
| 81 | + assert result.path == custom | |
| 82 | + assert custom.is_file() | |
| 83 | + | |
| 84 | + | |
| 85 | +class TestContentTypeLabel: | |
| 86 | + def test_minimal_default(self, tmp_path: Path) -> None: | |
| 87 | + doc = _scaffold_doc_and_store(tmp_path) | |
| 88 | + result = pack(doc) | |
| 89 | + assert result.content_type == "minimal" | |
| 90 | + | |
| 91 | + def test_include_exports_flips_to_no_base(self, tmp_path: Path) -> None: | |
| 92 | + doc = _scaffold_doc_and_store(tmp_path) | |
| 93 | + result = pack(doc, include_exports=True) | |
| 94 | + assert result.content_type == "no-base" | |
| 95 | + | |
| 96 | + def test_include_base_flips_to_no_exports(self, tmp_path: Path) -> None: | |
| 97 | + doc = _scaffold_doc_and_store(tmp_path) | |
| 98 | + result = pack(doc, include_base=True) | |
| 99 | + assert result.content_type == "no-exports" | |
| 100 | + | |
| 101 | + def test_both_flags_equal_full(self, tmp_path: Path) -> None: | |
| 102 | + doc = _scaffold_doc_and_store(tmp_path) | |
| 103 | + result = pack(doc, include_base=True, include_exports=True) | |
| 104 | + assert result.content_type == "full" | |
| 105 | + | |
| 106 | + | |
| 107 | +class TestRedistributionGate: | |
| 108 | + """Audit F21: --include-base on non-redistributable spec refuses unless licensee URL.""" | |
| 109 | + | |
| 110 | + def test_refuses_without_licensee_url(self, tmp_path: Path) -> None: | |
| 111 | + doc = _scaffold_doc_and_store(tmp_path, base="llama-3.2-1b") | |
| 112 | + with pytest.raises(BaseLicenseRefusedError) as excinfo: | |
| 113 | + pack(doc, include_base=True) | |
| 114 | + assert excinfo.value.base_key == "llama-3.2-1b" | |
| 115 | + | |
| 116 | + def test_accepts_with_licensee_url(self, tmp_path: Path) -> None: | |
| 117 | + doc = _scaffold_doc_and_store(tmp_path, base="llama-3.2-1b") | |
| 118 | + result = pack( | |
| 119 | + doc, | |
| 120 | + include_base=True, | |
| 121 | + licensee_acceptance_url="https://example.com/accept", | |
| 122 | + ) | |
| 123 | + assert result.applied_licensee_url == "https://example.com/accept" | |
| 124 | + # Header records the URL. | |
| 125 | + import json | |
| 126 | + | |
| 127 | + import zstandard as zstd | |
| 128 | + | |
| 129 | + dctx = zstd.ZstdDecompressor() | |
| 130 | + with result.path.open("rb") as fh, dctx.stream_reader(fh) as reader: | |
| 131 | + with tarfile.open(fileobj=reader, mode="r|") as tar: | |
| 132 | + for member in tar: | |
| 133 | + if member.name == "PACK_HEADER.json": | |
| 134 | + extracted = tar.extractfile(member) | |
| 135 | + assert extracted is not None | |
| 136 | + header = json.loads(extracted.read()) | |
| 137 | + assert header["licensee_acceptance_url"] == "https://example.com/accept" | |
| 138 | + return | |
| 139 | + pytest.fail("PACK_HEADER.json not found in tar") | |
| 140 | + | |
| 141 | + def test_redistributable_base_needs_no_licensee(self, tmp_path: Path) -> None: | |
| 142 | + """Apache-licensed bases like smollm2 don't gate on the URL.""" | |
| 143 | + doc = _scaffold_doc_and_store(tmp_path, base="smollm2-135m") | |
| 144 | + result = pack(doc, include_base=True) # no licensee url | |
| 145 | + assert result.content_type == "no-exports" | |
| 146 | + | |
| 147 | + | |
| 148 | +class TestStoreLock: | |
| 149 | + def test_acquires_lock_during_pack( | |
| 150 | + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch | |
| 151 | + ) -> None: | |
| 152 | + """The store exclusive lock must be held for the duration of the copy.""" | |
| 153 | + doc = _scaffold_doc_and_store(tmp_path) | |
| 154 | + | |
| 155 | + from dlm.doc.parser import parse_file | |
| 156 | + from dlm.store.paths import for_dlm | |
| 157 | + | |
| 158 | + parsed = parse_file(doc) | |
| 159 | + store = for_dlm(parsed.frontmatter.dlm_id) | |
| 160 | + | |
| 161 | + seen_lock_state: dict[str, bool] = {} | |
| 162 | + | |
| 163 | + original_copytree = __import__("shutil").copytree | |
| 164 | + | |
| 165 | + def watcher(*args: Any, **kwargs: Any) -> Any: | |
| 166 | + seen_lock_state["locked"] = store.lock.exists() | |
| 167 | + return original_copytree(*args, **kwargs) | |
| 168 | + | |
| 169 | + monkeypatch.setattr("shutil.copytree", watcher) | |
| 170 | + pack(doc) | |
| 171 | + assert seen_lock_state.get("locked") is True | |
tests/unit/pack/test_unpacker.pyadded@@ -0,0 +1,221 @@ | ||
| 1 | +"""`dlm.pack.unpacker` — extract + verify + header gate + install (Sprint 14).""" | |
| 2 | + | |
| 3 | +from __future__ import annotations | |
| 4 | + | |
| 5 | +import json | |
| 6 | +import tarfile | |
| 7 | +import tempfile | |
| 8 | +from pathlib import Path | |
| 9 | + | |
| 10 | +import pytest | |
| 11 | +import zstandard as zstd | |
| 12 | + | |
| 13 | +from dlm.pack.errors import ( | |
| 14 | + PackFormatVersionError, | |
| 15 | + PackIntegrityError, | |
| 16 | + PackLayoutError, | |
| 17 | +) | |
| 18 | +from dlm.pack.integrity import rollup_sha256, write_checksums | |
| 19 | +from dlm.pack.layout import ( | |
| 20 | + HEADER_FILENAME, | |
| 21 | + MANIFEST_FILENAME, | |
| 22 | + SHA256_FILENAME, | |
| 23 | +) | |
| 24 | +from dlm.pack.unpacker import unpack | |
| 25 | + | |
| 26 | + | |
| 27 | +def _synth_pack( | |
| 28 | + tmp_path: Path, | |
| 29 | + *, | |
| 30 | + pack_format_version: int = 1, | |
| 31 | + content_override: bytes | None = None, | |
| 32 | + skip_entry: str | None = None, | |
| 33 | +) -> Path: | |
| 34 | + """Hand-assemble a tarball/zstd pack with full control over shape. | |
| 35 | + | |
| 36 | + Used to exercise the unpacker's header/layout/checksum gates | |
| 37 | + without running the real packer (which always produces valid | |
| 38 | + output). | |
| 39 | + """ | |
| 40 | + staging = tmp_path / "stage" | |
| 41 | + staging.mkdir() | |
| 42 | + dlm_dir = staging / "dlm" | |
| 43 | + dlm_dir.mkdir() | |
| 44 | + (dlm_dir / "mydoc.dlm").write_text("---\ndlm_id: 01TEST\nbase_model: smollm2-135m\n---\n") | |
| 45 | + | |
| 46 | + store_dir = staging / "store" | |
| 47 | + store_dir.mkdir() | |
| 48 | + (store_dir / "manifest.json").write_text( | |
| 49 | + json.dumps({"dlm_id": "01TEST", "base_model": "smollm2-135m"}) | |
| 50 | + ) | |
| 51 | + | |
| 52 | + header = { | |
| 53 | + "pack_format_version": pack_format_version, | |
| 54 | + "created_at": "2026-04-19T12:00:00", | |
| 55 | + "tool_version": "0.1.0", | |
| 56 | + "content_type": "minimal", | |
| 57 | + "platform_hint": "linux", | |
| 58 | + "licensee_acceptance_url": None, | |
| 59 | + } | |
| 60 | + (staging / HEADER_FILENAME).write_text(json.dumps(header)) | |
| 61 | + | |
| 62 | + checksums = write_checksums(staging, exclude=(SHA256_FILENAME, MANIFEST_FILENAME)) | |
| 63 | + pack_manifest = { | |
| 64 | + "dlm_id": "01TEST", | |
| 65 | + "base_model": "smollm2-135m", | |
| 66 | + "base_model_revision": None, | |
| 67 | + "base_model_sha256": None, | |
| 68 | + "adapter_version": 0, | |
| 69 | + "entries": {rel: (staging / rel).stat().st_size for rel in checksums}, | |
| 70 | + "content_sha256": rollup_sha256(checksums), | |
| 71 | + } | |
| 72 | + (staging / MANIFEST_FILENAME).write_text(json.dumps(pack_manifest)) | |
| 73 | + | |
| 74 | + # Optional post-write mutations for negative tests. | |
| 75 | + if content_override is not None: | |
| 76 | + (staging / "dlm" / "mydoc.dlm").write_bytes(content_override) | |
| 77 | + if skip_entry is not None: | |
| 78 | + (staging / skip_entry).unlink() | |
| 79 | + | |
| 80 | + out = tmp_path / "synth.pack" | |
| 81 | + cctx = zstd.ZstdCompressor(level=1) | |
| 82 | + with out.open("wb") as fh, cctx.stream_writer(fh) as compressor: | |
| 83 | + with tarfile.open(fileobj=compressor, mode="w|") as tar: | |
| 84 | + for path in sorted(staging.rglob("*")): | |
| 85 | + if path.is_file(): | |
| 86 | + tar.add(path, arcname=path.relative_to(staging).as_posix()) | |
| 87 | + return out | |
| 88 | + | |
| 89 | + | |
| 90 | +class TestHappyPath: | |
| 91 | + def test_unpacks_into_dlm_home(self, tmp_path: Path) -> None: | |
| 92 | + pack_path = _synth_pack(tmp_path) | |
| 93 | + result = unpack(pack_path, home=tmp_path / "home", out_dir=tmp_path / "out") | |
| 94 | + assert result.dlm_id == "01TEST" | |
| 95 | + assert result.store_path == tmp_path / "home" / "store" / "01TEST" | |
| 96 | + assert (result.store_path / "manifest.json").exists() | |
| 97 | + assert result.dlm_path == tmp_path / "out" / "mydoc.dlm" | |
| 98 | + assert result.dlm_path.read_text().startswith("---") | |
| 99 | + | |
| 100 | + | |
| 101 | +class TestVersionGate: | |
| 102 | + def test_newer_than_current_refused(self, tmp_path: Path) -> None: | |
| 103 | + pack_path = _synth_pack(tmp_path, pack_format_version=999) | |
| 104 | + with pytest.raises(PackFormatVersionError): | |
| 105 | + unpack(pack_path, home=tmp_path / "home") | |
| 106 | + | |
| 107 | + | |
| 108 | +class TestIntegrity: | |
| 109 | + def test_corrupted_pack_refused(self, tmp_path: Path) -> None: | |
| 110 | + """Build a pack whose .dlm content disagrees with the recorded checksum.""" | |
| 111 | + staging = tmp_path / "stage" | |
| 112 | + staging.mkdir() | |
| 113 | + (staging / "dlm").mkdir() | |
| 114 | + (staging / "dlm" / "x.dlm").write_text("original") | |
| 115 | + (staging / "store").mkdir() | |
| 116 | + (staging / "store" / "manifest.json").write_text("{}") | |
| 117 | + (staging / HEADER_FILENAME).write_text( | |
| 118 | + json.dumps( | |
| 119 | + { | |
| 120 | + "pack_format_version": 1, | |
| 121 | + "created_at": "2026-04-19T12:00:00", | |
| 122 | + "tool_version": "0.1.0", | |
| 123 | + "content_type": "minimal", | |
| 124 | + "platform_hint": "linux", | |
| 125 | + "licensee_acceptance_url": None, | |
| 126 | + } | |
| 127 | + ) | |
| 128 | + ) | |
| 129 | + | |
| 130 | + # Freeze checksums against the original content, then tamper. | |
| 131 | + write_checksums(staging, exclude=(SHA256_FILENAME, MANIFEST_FILENAME)) | |
| 132 | + (staging / "dlm" / "x.dlm").write_text("TAMPERED!") | |
| 133 | + | |
| 134 | + # Manifest is required for layout but its content_sha256 isn't | |
| 135 | + # rechecked at unpack (integrity gate catches the tamper first). | |
| 136 | + (staging / MANIFEST_FILENAME).write_text( | |
| 137 | + json.dumps( | |
| 138 | + { | |
| 139 | + "dlm_id": "01TEST", | |
| 140 | + "base_model": "smollm2-135m", | |
| 141 | + "base_model_revision": None, | |
| 142 | + "base_model_sha256": None, | |
| 143 | + "adapter_version": 0, | |
| 144 | + "entries": {}, | |
| 145 | + "content_sha256": "0" * 64, | |
| 146 | + } | |
| 147 | + ) | |
| 148 | + ) | |
| 149 | + | |
| 150 | + out = tmp_path / "tampered.pack" | |
| 151 | + cctx = zstd.ZstdCompressor(level=1) | |
| 152 | + with out.open("wb") as fh, cctx.stream_writer(fh) as compressor: | |
| 153 | + with tarfile.open(fileobj=compressor, mode="w|") as tar: | |
| 154 | + for path in sorted(staging.rglob("*")): | |
| 155 | + if path.is_file(): | |
| 156 | + tar.add(path, arcname=path.relative_to(staging).as_posix()) | |
| 157 | + | |
| 158 | + with pytest.raises(PackIntegrityError): | |
| 159 | + unpack(out, home=tmp_path / "home") | |
| 160 | + | |
| 161 | + | |
| 162 | +class TestLayoutGate: | |
| 163 | + def test_missing_header_refused(self, tmp_path: Path) -> None: | |
| 164 | + # Build a tarball without PACK_HEADER.json. | |
| 165 | + staging = tmp_path / "stage" | |
| 166 | + staging.mkdir() | |
| 167 | + (staging / "dlm").mkdir() | |
| 168 | + (staging / "dlm" / "x.dlm").write_text("x") | |
| 169 | + (staging / "store").mkdir() | |
| 170 | + (staging / "store" / "manifest.json").write_text("{}") | |
| 171 | + (staging / MANIFEST_FILENAME).write_text("{}") | |
| 172 | + write_checksums(staging, exclude=(SHA256_FILENAME, MANIFEST_FILENAME)) | |
| 173 | + out = tmp_path / "no-header.pack" | |
| 174 | + cctx = zstd.ZstdCompressor(level=1) | |
| 175 | + with out.open("wb") as fh, cctx.stream_writer(fh) as compressor: | |
| 176 | + with tarfile.open(fileobj=compressor, mode="w|") as tar: | |
| 177 | + for path in sorted(staging.rglob("*")): | |
| 178 | + if path.is_file(): | |
| 179 | + tar.add(path, arcname=path.relative_to(staging).as_posix()) | |
| 180 | + with pytest.raises(PackLayoutError): | |
| 181 | + unpack(out, home=tmp_path / "home") | |
| 182 | + | |
| 183 | + def test_unsafe_tar_entry_refused(self, tmp_path: Path) -> None: | |
| 184 | + """An entry whose path escapes the extraction root is rejected.""" | |
| 185 | + staging = tmp_path / "s" | |
| 186 | + staging.mkdir() | |
| 187 | + evil = staging / "evil" | |
| 188 | + evil.write_text("x") | |
| 189 | + out = tmp_path / "evil.pack" | |
| 190 | + cctx = zstd.ZstdCompressor(level=1) | |
| 191 | + with out.open("wb") as fh, cctx.stream_writer(fh) as compressor: | |
| 192 | + with tarfile.open(fileobj=compressor, mode="w|") as tar: | |
| 193 | + info = tar.gettarinfo(evil, arcname="../escape") | |
| 194 | + with evil.open("rb") as src: | |
| 195 | + tar.addfile(info, src) | |
| 196 | + with pytest.raises(PackLayoutError): | |
| 197 | + unpack(out, home=tmp_path / "home") | |
| 198 | + | |
| 199 | + | |
| 200 | +class TestForce: | |
| 201 | + def test_existing_store_refused_without_force(self, tmp_path: Path) -> None: | |
| 202 | + pack_path = _synth_pack(tmp_path) | |
| 203 | + home = tmp_path / "home" | |
| 204 | + unpack(pack_path, home=home, out_dir=tmp_path / "out1") | |
| 205 | + # Second unpack without force fails because the store exists. | |
| 206 | + with pytest.raises(PackIntegrityError): | |
| 207 | + unpack(pack_path, home=home, out_dir=tmp_path / "out2") | |
| 208 | + | |
| 209 | + def test_force_replaces_existing_store(self, tmp_path: Path) -> None: | |
| 210 | + pack_path = _synth_pack(tmp_path) | |
| 211 | + home = tmp_path / "home" | |
| 212 | + unpack(pack_path, home=home, out_dir=tmp_path / "out1") | |
| 213 | + # Add a marker to the prior store; --force should wipe it. | |
| 214 | + marker = home / "store" / "01TEST" / "marker.txt" | |
| 215 | + marker.write_text("prior") | |
| 216 | + unpack(pack_path, home=home, force=True, out_dir=tmp_path / "out2") | |
| 217 | + assert not marker.exists() | |
| 218 | + | |
| 219 | + | |
| 220 | +# Keep tempfile import reachable for downstream contributors extending this file. | |
| 221 | +_ = tempfile | |