`a19a302`

Slow integration test: dlm train succeeds without prior dlm init

Authored by mfwolffe <wolffemf@dukes.jmu.edu> 2 weeks ago

SHA: a19a302defe916efbea9d8d426ca5710ca8fd722
Parents: f6aba6c
Tree: f09a5e5

1 changed file

Status	File	+	-
A	`tests/integration/train/test_fresh_train_without_init.py`	89	0

tests/integration/train/test_fresh_train_without_init.pyadded

++"""B12.1 regression: `dlm train` on a hand-authored `.dlm` (no prior `dlm init`).
++
++The original bug surfaced via Audit 12 E2E-1: an authored `.dlm` with a
++fresh ULID frontmatter passed straight to `dlm train` crashes with
++`manifest is corrupt: read failed: No such file or directory` after the
++trainer creates `<store>/{adapter,logs}/` but before any code writes
++the manifest.
++
++The fix in `src/dlm/cli/commands.py:train_cmd` bootstraps a manifest
++whenever the store layout exists but `manifest.json` does not (covers
++both the auto-scaffold path and this hand-authored path).
++
++This test reproduces the original failure mode end-to-end via
++`CliRunner` so the bootstrap can't silently regress.
++"""
++
++from __future__ import annotations
++
++import os
++from pathlib import Path
++
++import pytest
++from typer.testing import CliRunner
++
++pytestmark = [pytest.mark.slow, pytest.mark.online]
++
++
++def test_fresh_train_without_init_writes_manifest_and_advances(
++    tmp_path: Path,
++    tiny_model_dir: Path,  # noqa: ARG001 — session-cached download
++    monkeypatch: pytest.MonkeyPatch,
++) -> None:
++    monkeypatch.setenv("DLM_HOME", str(tmp_path / "dlm-home"))
++
++    doc = tmp_path / "fresh.dlm"
++    doc.write_text(
++        "---\n"
++        "dlm_id: 01KQB000FRESHB12B12B12B12B\n"
++        "dlm_version: 14\n"
++        "base_model: smollm2-135m\n"
++        "training:\n"
++        "  adapter: lora\n"
++        "  lora_r: 4\n"
++        "  sequence_len: 256\n"
++        "  micro_batch_size: 1\n"
++        "  grad_accum: 1\n"
++        "  num_epochs: 1\n"
++        "---\n"
++        "# Fresh\n"
++        "\n"
++        "::instruction::\n"
++        "### Q\n"
++        "What is two plus two?\n"
++        "\n"
++        "### A\n"
++        "Four.\n"
++        "\n"
++        "::instruction::\n"
++        "### Q\n"
++        "What is the capital of France?\n"
++        "\n"
++        "### A\n"
++        "Paris.\n",
++        encoding="utf-8",
++    )
++
++    from dlm.cli.app import app
++    from dlm.store.paths import for_dlm
++
++    runner = CliRunner()
++    result = runner.invoke(
++        app,
++        ["train", str(doc), "--max-steps", "1", "--fresh"],
++        env={**os.environ, "DLM_HOME": str(tmp_path / "dlm-home")},
++        catch_exceptions=False,
++    )
++
++    assert result.exit_code == 0, f"train failed:\n{result.output}"
++
++    store = for_dlm("01KQB000FRESHB12B12B12B12B")
++    assert store.manifest.exists(), (
++        "B12.1 regression: manifest.json was not bootstrapped on first train"
++    )
++    versions_dir = store.adapter / "versions"
++    assert versions_dir.exists(), "adapter/versions dir missing"
++    written_versions = sorted(p.name for p in versions_dir.iterdir() if p.is_dir())
++    assert "v0001" in written_versions, (
++        f"expected v0001 adapter after first train, got {written_versions}"
++    )