@@ -0,0 +1,89 @@ |
| | 1 | +"""B12.1 regression: `dlm train` on a hand-authored `.dlm` (no prior `dlm init`). |
| | 2 | + |
| | 3 | +The original bug surfaced via Audit 12 E2E-1: an authored `.dlm` with a |
| | 4 | +fresh ULID frontmatter passed straight to `dlm train` crashes with |
| | 5 | +`manifest is corrupt: read failed: No such file or directory` after the |
| | 6 | +trainer creates `<store>/{adapter,logs}/` but before any code writes |
| | 7 | +the manifest. |
| | 8 | + |
| | 9 | +The fix in `src/dlm/cli/commands.py:train_cmd` bootstraps a manifest |
| | 10 | +whenever the store layout exists but `manifest.json` does not (covers |
| | 11 | +both the auto-scaffold path and this hand-authored path). |
| | 12 | + |
| | 13 | +This test reproduces the original failure mode end-to-end via |
| | 14 | +`CliRunner` so the bootstrap can't silently regress. |
| | 15 | +""" |
| | 16 | + |
| | 17 | +from __future__ import annotations |
| | 18 | + |
| | 19 | +import os |
| | 20 | +from pathlib import Path |
| | 21 | + |
| | 22 | +import pytest |
| | 23 | +from typer.testing import CliRunner |
| | 24 | + |
| | 25 | +pytestmark = [pytest.mark.slow, pytest.mark.online] |
| | 26 | + |
| | 27 | + |
| | 28 | +def test_fresh_train_without_init_writes_manifest_and_advances( |
| | 29 | + tmp_path: Path, |
| | 30 | + tiny_model_dir: Path, # noqa: ARG001 — session-cached download |
| | 31 | + monkeypatch: pytest.MonkeyPatch, |
| | 32 | +) -> None: |
| | 33 | + monkeypatch.setenv("DLM_HOME", str(tmp_path / "dlm-home")) |
| | 34 | + |
| | 35 | + doc = tmp_path / "fresh.dlm" |
| | 36 | + doc.write_text( |
| | 37 | + "---\n" |
| | 38 | + "dlm_id: 01KQB000FRESHB12B12B12B12B\n" |
| | 39 | + "dlm_version: 14\n" |
| | 40 | + "base_model: smollm2-135m\n" |
| | 41 | + "training:\n" |
| | 42 | + " adapter: lora\n" |
| | 43 | + " lora_r: 4\n" |
| | 44 | + " sequence_len: 256\n" |
| | 45 | + " micro_batch_size: 1\n" |
| | 46 | + " grad_accum: 1\n" |
| | 47 | + " num_epochs: 1\n" |
| | 48 | + "---\n" |
| | 49 | + "# Fresh\n" |
| | 50 | + "\n" |
| | 51 | + "::instruction::\n" |
| | 52 | + "### Q\n" |
| | 53 | + "What is two plus two?\n" |
| | 54 | + "\n" |
| | 55 | + "### A\n" |
| | 56 | + "Four.\n" |
| | 57 | + "\n" |
| | 58 | + "::instruction::\n" |
| | 59 | + "### Q\n" |
| | 60 | + "What is the capital of France?\n" |
| | 61 | + "\n" |
| | 62 | + "### A\n" |
| | 63 | + "Paris.\n", |
| | 64 | + encoding="utf-8", |
| | 65 | + ) |
| | 66 | + |
| | 67 | + from dlm.cli.app import app |
| | 68 | + from dlm.store.paths import for_dlm |
| | 69 | + |
| | 70 | + runner = CliRunner() |
| | 71 | + result = runner.invoke( |
| | 72 | + app, |
| | 73 | + ["train", str(doc), "--max-steps", "1", "--fresh"], |
| | 74 | + env={**os.environ, "DLM_HOME": str(tmp_path / "dlm-home")}, |
| | 75 | + catch_exceptions=False, |
| | 76 | + ) |
| | 77 | + |
| | 78 | + assert result.exit_code == 0, f"train failed:\n{result.output}" |
| | 79 | + |
| | 80 | + store = for_dlm("01KQB000FRESHB12B12B12B12B") |
| | 81 | + assert store.manifest.exists(), ( |
| | 82 | + "B12.1 regression: manifest.json was not bootstrapped on first train" |
| | 83 | + ) |
| | 84 | + versions_dir = store.adapter / "versions" |
| | 85 | + assert versions_dir.exists(), "adapter/versions dir missing" |
| | 86 | + written_versions = sorted(p.name for p in versions_dir.iterdir() if p.is_dir()) |
| | 87 | + assert "v0001" in written_versions, ( |
| | 88 | + f"expected v0001 adapter after first train, got {written_versions}" |
| | 89 | + ) |