@@ -0,0 +1,89 @@ |
| 1 | +"""B12.1 regression: `dlm train` on a hand-authored `.dlm` (no prior `dlm init`). |
| 2 | + |
| 3 | +The original bug surfaced via Audit 12 E2E-1: an authored `.dlm` with a |
| 4 | +fresh ULID frontmatter passed straight to `dlm train` crashes with |
| 5 | +`manifest is corrupt: read failed: No such file or directory` after the |
| 6 | +trainer creates `<store>/{adapter,logs}/` but before any code writes |
| 7 | +the manifest. |
| 8 | + |
| 9 | +The fix in `src/dlm/cli/commands.py:train_cmd` bootstraps a manifest |
| 10 | +whenever the store layout exists but `manifest.json` does not (covers |
| 11 | +both the auto-scaffold path and this hand-authored path). |
| 12 | + |
| 13 | +This test reproduces the original failure mode end-to-end via |
| 14 | +`CliRunner` so the bootstrap can't silently regress. |
| 15 | +""" |
| 16 | + |
| 17 | +from __future__ import annotations |
| 18 | + |
| 19 | +import os |
| 20 | +from pathlib import Path |
| 21 | + |
| 22 | +import pytest |
| 23 | +from typer.testing import CliRunner |
| 24 | + |
| 25 | +pytestmark = [pytest.mark.slow, pytest.mark.online] |
| 26 | + |
| 27 | + |
| 28 | +def test_fresh_train_without_init_writes_manifest_and_advances( |
| 29 | + tmp_path: Path, |
| 30 | + tiny_model_dir: Path, # noqa: ARG001 — session-cached download |
| 31 | + monkeypatch: pytest.MonkeyPatch, |
| 32 | +) -> None: |
| 33 | + monkeypatch.setenv("DLM_HOME", str(tmp_path / "dlm-home")) |
| 34 | + |
| 35 | + doc = tmp_path / "fresh.dlm" |
| 36 | + doc.write_text( |
| 37 | + "---\n" |
| 38 | + "dlm_id: 01KQB000FRESHB12B12B12B12B\n" |
| 39 | + "dlm_version: 14\n" |
| 40 | + "base_model: smollm2-135m\n" |
| 41 | + "training:\n" |
| 42 | + " adapter: lora\n" |
| 43 | + " lora_r: 4\n" |
| 44 | + " sequence_len: 256\n" |
| 45 | + " micro_batch_size: 1\n" |
| 46 | + " grad_accum: 1\n" |
| 47 | + " num_epochs: 1\n" |
| 48 | + "---\n" |
| 49 | + "# Fresh\n" |
| 50 | + "\n" |
| 51 | + "::instruction::\n" |
| 52 | + "### Q\n" |
| 53 | + "What is two plus two?\n" |
| 54 | + "\n" |
| 55 | + "### A\n" |
| 56 | + "Four.\n" |
| 57 | + "\n" |
| 58 | + "::instruction::\n" |
| 59 | + "### Q\n" |
| 60 | + "What is the capital of France?\n" |
| 61 | + "\n" |
| 62 | + "### A\n" |
| 63 | + "Paris.\n", |
| 64 | + encoding="utf-8", |
| 65 | + ) |
| 66 | + |
| 67 | + from dlm.cli.app import app |
| 68 | + from dlm.store.paths import for_dlm |
| 69 | + |
| 70 | + runner = CliRunner() |
| 71 | + result = runner.invoke( |
| 72 | + app, |
| 73 | + ["train", str(doc), "--max-steps", "1", "--fresh"], |
| 74 | + env={**os.environ, "DLM_HOME": str(tmp_path / "dlm-home")}, |
| 75 | + catch_exceptions=False, |
| 76 | + ) |
| 77 | + |
| 78 | + assert result.exit_code == 0, f"train failed:\n{result.output}" |
| 79 | + |
| 80 | + store = for_dlm("01KQB000FRESHB12B12B12B12B") |
| 81 | + assert store.manifest.exists(), ( |
| 82 | + "B12.1 regression: manifest.json was not bootstrapped on first train" |
| 83 | + ) |
| 84 | + versions_dir = store.adapter / "versions" |
| 85 | + assert versions_dir.exists(), "adapter/versions dir missing" |
| 86 | + written_versions = sorted(p.name for p in versions_dir.iterdir() if p.is_dir()) |
| 87 | + assert "v0001" in written_versions, ( |
| 88 | + f"expected v0001 adapter after first train, got {written_versions}" |
| 89 | + ) |