`6443ebd`

ci,scripts(export): bump-llama-cpp.sh real impl + slow stubs + CI gate

Authored by mfwolffe <wolffemf@dukes.jmu.edu> 3 weeks ago

SHA: 6443ebdfcb47b44fc92298739680b061ee9695f2
Parents: 84e4652
Tree: 3085cac

4 changed files

Status	File	+	-
M	`.github/workflows/ci.yml`	8	0
M	`scripts/bump-llama-cpp.sh`	93	39
A	`tests/integration/export/__init__.py`	0	0
A	`tests/integration/export/test_export_tinymodel.py`	61	0

.github/workflows/ci.ymlmodified

              --cov-report=term-missing \
              --cov-fail-under=95
 +      - name: Coverage gate — src/dlm/export ≥ 95% (Sprint 11)
 +        if: matrix.os == 'ubuntu-latest'
 +        run: |
 +          uv run pytest tests/unit/export \
 +            --cov=src/dlm/export \
 +            --cov-report=term-missing \
 +            --cov-fail-under=95
++
    no-network-sandbox:
      # audit F13: dlm init / doctor / show must work with zero outbound network.
      name: no-network sandbox (ubuntu-latest)

scripts/bump-llama-cpp.shmodified

  #!/usr/bin/env bash
 -# Bump the vendored llama.cpp submodule to a new tag and re-extract the
 -# pre-tokenizer hash table.
 -#
 -# This script is a skeleton — Sprint 11 adds the actual submodule at
 -# `vendor/llama.cpp`. Sprint 06 ships the script + the shape of
 -# `vendor/llama_cpp_pretokenizer_hashes.json` so the compatibility
 -# probes (base_models/probes.py) have somewhere to read from.
 +# Bump the vendored llama.cpp submodule, build its tools, and refresh
 +# the pre-tokenizer hash table.
+ #
  # Usage:
 -#   scripts/bump-llama-cpp.sh <tag>
 -#       Fast-forward submodule to `<tag>`, re-extract hashes, stage.
 +#   scripts/bump-llama-cpp.sh bump <tag>
 +#       Fast-forward submodule to <tag>, re-extract hashes, write VERSION,
 +#       stage changes.
 +#   scripts/bump-llama-cpp.sh build
 +#       Build `llama-quantize` (+ siblings) via cmake. Idempotent.
 +#   scripts/bump-llama-cpp.sh refresh-labels
 +#       Regenerate vendor/llama_cpp_pretokenizer_hashes.json from the
 +#       current submodule contents. Does not touch the submodule itself.
  set -euo pipefail
 -TAG="${1:-}"
 -if [ -z "$TAG" ]; then
 -  echo "usage: scripts/bump-llama-cpp.sh <tag>" >&2
 -  exit 2
 -fi
+-
 -if [ -n "$(git status --porcelain)" ]; then
 -  echo "error: working tree must be clean before a submodule bump" >&2
 -  exit 1
 -fi
+-
  REPO_ROOT="$(git rev-parse --show-toplevel)"
  VENDOR_DIR="$REPO_ROOT/vendor/llama.cpp"
  HASHES_PATH="$REPO_ROOT/vendor/llama_cpp_pretokenizer_hashes.json"
 +VERSION_PATH="$VENDOR_DIR/VERSION"
 -if [ ! -d "$VENDOR_DIR" ]; then
 -  echo "error: $VENDOR_DIR missing — Sprint 11 vendors llama.cpp as a submodule" >&2
 -  exit 1
 -fi
+-
 -echo "--> fetching tags in $VENDOR_DIR"
 -git -C "$VENDOR_DIR" fetch --tags origin
 +cmd="${1:-}"
 -echo "--> checking out $TAG"
 -git -C "$VENDOR_DIR" checkout "tags/$TAG"
+-
 -echo "--> re-extracting pre-tokenizer hash labels to $HASHES_PATH"
 -uv run python - <<'PY'
 +refresh_labels() {
 +  echo "--> re-extracting pre-tokenizer hash labels to $HASHES_PATH"
 +  uv run python - <<'PY'
  import json
  import re
  import sys
  converter = repo_root / "vendor" / "llama.cpp" / "convert_hf_to_gguf.py"
  hashes_path = repo_root / "vendor" / "llama_cpp_pretokenizer_hashes.json"
 +if not converter.is_file():
 +    print(f"ERROR: {converter} not found", file=sys.stderr)
 +    sys.exit(1)
++
  source = converter.read_text(encoding="utf-8", errors="replace")
 -# llama.cpp declares pre-tokenizer labels inside `get_vocab_base_pre`
 -# via `res = "<label>"` assignments.
  pattern = re.compile(r"""\bres\s*=\s*["']([^"']+)["']""")
  labels = sorted(set(pattern.findall(source)))
  if not labels:
  hashes_path.write_text(json.dumps(labels, indent=2) + "\n", encoding="utf-8")
  print(f"wrote {len(labels)} labels to {hashes_path}")
  PY
 +}
 -echo "--> staging changes"
 -git -C "$REPO_ROOT" add vendor/llama.cpp vendor/llama_cpp_pretokenizer_hashes.json
 +do_bump() {
 +  local tag="${1:-}"
 +  if [ -z "$tag" ]; then
 +    echo "usage: scripts/bump-llama-cpp.sh bump <tag>" >&2
 +    exit 2
 +  fi
 +  if [ -n "$(git status --porcelain)" ]; then
 +    echo "error: working tree must be clean before a submodule bump" >&2
 +    exit 1
 +  fi
 +  if [ ! -d "$VENDOR_DIR" ]; then
 +    echo "error: $VENDOR_DIR missing — initialize the submodule first:" >&2
 +    echo "  git submodule add https://github.com/ggerganov/llama.cpp vendor/llama.cpp" >&2
 +    exit 1
 +  fi
 -cat <<EOF
 +  echo "--> fetching tags in $VENDOR_DIR"
 +  git -C "$VENDOR_DIR" fetch --tags origin
 +  echo "--> checking out $tag"
 +  git -C "$VENDOR_DIR" checkout "tags/$tag"
++
 +  echo "--> writing $VERSION_PATH"
 +  echo "$tag" > "$VERSION_PATH"
++
 +  refresh_labels
++
 +  echo "--> staging changes"
 +  git -C "$REPO_ROOT" add vendor/llama.cpp vendor/llama_cpp_pretokenizer_hashes.json
++
 +  cat <<EOF
  Done. Review the staged diff and commit with:
 -  git commit -m "chore: bump llama.cpp to $TAG + refresh pre-tokenizer hashes"
 +  git commit -m "chore: bump llama.cpp to $tag + refresh pre-tokenizer hashes"
 -Then re-run the registry probe suite:
 +Then build the binaries:
 +  scripts/bump-llama-cpp.sh build
++
 +And re-run the registry probe suite:
    uv run python scripts/refresh-registry.py
  EOF
 +}
++
 +do_build() {
 +  if [ ! -d "$VENDOR_DIR" ]; then
 +    echo "error: $VENDOR_DIR missing — run 'bump <tag>' first" >&2
 +    exit 1
 +  fi
 +  echo "--> configuring llama.cpp via cmake"
 +  cmake -S "$VENDOR_DIR" -B "$VENDOR_DIR/build" -DCMAKE_BUILD_TYPE=Release
 +  echo "--> building llama-quantize + siblings"
 +  cmake --build "$VENDOR_DIR/build" --target llama-quantize --config Release
 +  if [ -f "$VENDOR_DIR/build/bin/llama-quantize" ]; then
 +    echo "OK: $VENDOR_DIR/build/bin/llama-quantize"
 +  else
 +    echo "error: build finished but llama-quantize not found under build/bin" >&2
 +    exit 1
 +  fi
 +}
++
 +case "$cmd" in
 +  bump)
 +    do_bump "${2:-}"
 +    ;;
 +  build)
 +    do_build
 +    ;;
 +  refresh-labels)
 +    refresh_labels
 +    ;;
 +  "")
 +    echo "usage: scripts/bump-llama-cpp.sh <bump|build|refresh-labels> [args]" >&2
 +    exit 2
 +    ;;
 +  *)
 +    echo "unknown command: $cmd" >&2
 +    echo "usage: scripts/bump-llama-cpp.sh <bump|build|refresh-labels> [args]" >&2
 +    exit 2
 +    ;;
 +esac

tests/integration/export/__init__.pyadded

tests/integration/export/test_export_tinymodel.pyadded

 +"""End-to-end GGUF export on the SmolLM2-135M fixture.
++
 +Sprint 11 DoD: produce a valid GGUF file readable by `llama-cli`, with
 +LoRA A/B tensors referencing the correct base tensor names.
++
 +Marked `@pytest.mark.slow`. Requires:
 +- `vendor/llama.cpp/` submodule initialized and built (`scripts/bump-llama-cpp.sh build`)
 +- SmolLM2-135M offline cache (from Sprint 02's fixture)
 +- A prior `dlm train` run against that base to produce an adapter
++
 +When any dependency is missing the test skips with a clear message.
 +"""
++
 +from __future__ import annotations
++
 +from pathlib import Path
++
 +import pytest
++
 +pytestmark = pytest.mark.slow
++
++
 +@pytest.mark.slow
 +def test_export_produces_valid_gguf() -> None:
 +    """Full `dlm export` cycle on the tiny model.
++
 +    Shape:
 +      1. `vendor/llama.cpp/build/bin/llama-quantize` exists → else skip.
 +      2. SmolLM2-135M fixture resolvable → else skip.
 +      3. `dlm train` produces an adapter in a fresh tmp store → else skip.
 +      4. `run_export(store, spec, plan=Q4_K_M)` emits base + adapter GGUF.
 +      5. `llama-cli -m base.Q4_K_M.gguf --lora adapter.gguf -p "..."` returns
 +         non-empty stdout.
 +    """
 +    vendor_root = Path(__file__).resolve().parents[3] / "vendor" / "llama.cpp"
 +    if not (vendor_root / "build" / "bin" / "llama-quantize").is_file():
 +        pytest.skip(
 +            "vendor/llama.cpp not built; "
 +            "run `scripts/bump-llama-cpp.sh build` to enable."
 +        )
++
 +    try:
 +        from tests.fixtures.tiny_model import tiny_model_path
++
 +        tiny_model_path()
 +    except Exception as exc:  # pragma: no cover
 +        pytest.skip(f"tiny-model fixture unavailable: {exc}")
++
 +    pytest.xfail("export integration scaffolded; body deferred to first CI slow run")
++
++
 +@pytest.mark.slow
 +def test_qlora_merge_requires_dequantize_flag() -> None:
 +    """Contract: `--merged` on a QLoRA adapter without `--dequantize` refuses.
++
 +    Handled entirely in the plan's safety gate; unit-tested at
 +    `tests/unit/export/test_plan.py::TestMergeSafetyGate`. This
 +    integration test re-asserts it survives the full CLI path so a
 +    future refactor doesn't silently remove the guardrail.
 +    """
 +    pytest.xfail("CLI integration scaffolded; body deferred")