tenseleyflow/documentlanguagemodel / 6443ebd

Browse files

ci,scripts(export): bump-llama-cpp.sh real impl + slow stubs + CI gate

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
6443ebdfcb47b44fc92298739680b061ee9695f2
Parents
84e4652
Tree
3085cac

4 changed files

StatusFile+-
M .github/workflows/ci.yml 8 0
M scripts/bump-llama-cpp.sh 93 39
A tests/integration/export/__init__.py 0 0
A tests/integration/export/test_export_tinymodel.py 61 0
.github/workflows/ci.ymlmodified
@@ -119,6 +119,14 @@ jobs:
119119
             --cov-report=term-missing \
120120
             --cov-fail-under=95
121121
 
122
+      - name: Coverage gate — src/dlm/export ≥ 95% (Sprint 11)
123
+        if: matrix.os == 'ubuntu-latest'
124
+        run: |
125
+          uv run pytest tests/unit/export \
126
+            --cov=src/dlm/export \
127
+            --cov-report=term-missing \
128
+            --cov-fail-under=95
129
+
122130
   no-network-sandbox:
123131
     # audit F13: dlm init / doctor / show must work with zero outbound network.
124132
     name: no-network sandbox (ubuntu-latest)
scripts/bump-llama-cpp.shmodified
@@ -1,46 +1,29 @@
11
 #!/usr/bin/env bash
2
-# Bump the vendored llama.cpp submodule to a new tag and re-extract the
3
-# pre-tokenizer hash table.
4
-#
5
-# This script is a skeleton — Sprint 11 adds the actual submodule at
6
-# `vendor/llama.cpp`. Sprint 06 ships the script + the shape of
7
-# `vendor/llama_cpp_pretokenizer_hashes.json` so the compatibility
8
-# probes (base_models/probes.py) have somewhere to read from.
2
+# Bump the vendored llama.cpp submodule, build its tools, and refresh
3
+# the pre-tokenizer hash table.
94
 #
105
 # Usage:
11
-#   scripts/bump-llama-cpp.sh <tag>
12
-#       Fast-forward submodule to `<tag>`, re-extract hashes, stage.
6
+#   scripts/bump-llama-cpp.sh bump <tag>
7
+#       Fast-forward submodule to <tag>, re-extract hashes, write VERSION,
8
+#       stage changes.
9
+#   scripts/bump-llama-cpp.sh build
10
+#       Build `llama-quantize` (+ siblings) via cmake. Idempotent.
11
+#   scripts/bump-llama-cpp.sh refresh-labels
12
+#       Regenerate vendor/llama_cpp_pretokenizer_hashes.json from the
13
+#       current submodule contents. Does not touch the submodule itself.
1314
 
1415
 set -euo pipefail
1516
 
16
-TAG="${1:-}"
17
-if [ -z "$TAG" ]; then
18
-  echo "usage: scripts/bump-llama-cpp.sh <tag>" >&2
19
-  exit 2
20
-fi
21
-
22
-if [ -n "$(git status --porcelain)" ]; then
23
-  echo "error: working tree must be clean before a submodule bump" >&2
24
-  exit 1
25
-fi
26
-
2717
 REPO_ROOT="$(git rev-parse --show-toplevel)"
2818
 VENDOR_DIR="$REPO_ROOT/vendor/llama.cpp"
2919
 HASHES_PATH="$REPO_ROOT/vendor/llama_cpp_pretokenizer_hashes.json"
20
+VERSION_PATH="$VENDOR_DIR/VERSION"
3021
 
31
-if [ ! -d "$VENDOR_DIR" ]; then
32
-  echo "error: $VENDOR_DIR missing — Sprint 11 vendors llama.cpp as a submodule" >&2
33
-  exit 1
34
-fi
35
-
36
-echo "--> fetching tags in $VENDOR_DIR"
37
-git -C "$VENDOR_DIR" fetch --tags origin
22
+cmd="${1:-}"
3823
 
39
-echo "--> checking out $TAG"
40
-git -C "$VENDOR_DIR" checkout "tags/$TAG"
41
-
42
-echo "--> re-extracting pre-tokenizer hash labels to $HASHES_PATH"
43
-uv run python - <<'PY'
24
+refresh_labels() {
25
+  echo "--> re-extracting pre-tokenizer hash labels to $HASHES_PATH"
26
+  uv run python - <<'PY'
4427
 import json
4528
 import re
4629
 import sys
@@ -50,9 +33,11 @@ repo_root = Path.cwd()
5033
 converter = repo_root / "vendor" / "llama.cpp" / "convert_hf_to_gguf.py"
5134
 hashes_path = repo_root / "vendor" / "llama_cpp_pretokenizer_hashes.json"
5235
 
36
+if not converter.is_file():
37
+    print(f"ERROR: {converter} not found", file=sys.stderr)
38
+    sys.exit(1)
39
+
5340
 source = converter.read_text(encoding="utf-8", errors="replace")
54
-# llama.cpp declares pre-tokenizer labels inside `get_vocab_base_pre`
55
-# via `res = "<label>"` assignments.
5641
 pattern = re.compile(r"""\bres\s*=\s*["']([^"']+)["']""")
5742
 labels = sorted(set(pattern.findall(source)))
5843
 if not labels:
@@ -63,14 +48,83 @@ if not labels:
6348
 hashes_path.write_text(json.dumps(labels, indent=2) + "\n", encoding="utf-8")
6449
 print(f"wrote {len(labels)} labels to {hashes_path}")
6550
 PY
51
+}
6652
 
67
-echo "--> staging changes"
68
-git -C "$REPO_ROOT" add vendor/llama.cpp vendor/llama_cpp_pretokenizer_hashes.json
53
+do_bump() {
54
+  local tag="${1:-}"
55
+  if [ -z "$tag" ]; then
56
+    echo "usage: scripts/bump-llama-cpp.sh bump <tag>" >&2
57
+    exit 2
58
+  fi
59
+  if [ -n "$(git status --porcelain)" ]; then
60
+    echo "error: working tree must be clean before a submodule bump" >&2
61
+    exit 1
62
+  fi
63
+  if [ ! -d "$VENDOR_DIR" ]; then
64
+    echo "error: $VENDOR_DIR missing — initialize the submodule first:" >&2
65
+    echo "  git submodule add https://github.com/ggerganov/llama.cpp vendor/llama.cpp" >&2
66
+    exit 1
67
+  fi
6968
 
70
-cat <<EOF
69
+  echo "--> fetching tags in $VENDOR_DIR"
70
+  git -C "$VENDOR_DIR" fetch --tags origin
71
+  echo "--> checking out $tag"
72
+  git -C "$VENDOR_DIR" checkout "tags/$tag"
73
+
74
+  echo "--> writing $VERSION_PATH"
75
+  echo "$tag" > "$VERSION_PATH"
76
+
77
+  refresh_labels
78
+
79
+  echo "--> staging changes"
80
+  git -C "$REPO_ROOT" add vendor/llama.cpp vendor/llama_cpp_pretokenizer_hashes.json
81
+
82
+  cat <<EOF
7183
 Done. Review the staged diff and commit with:
72
-  git commit -m "chore: bump llama.cpp to $TAG + refresh pre-tokenizer hashes"
84
+  git commit -m "chore: bump llama.cpp to $tag + refresh pre-tokenizer hashes"
7385
 
74
-Then re-run the registry probe suite:
86
+Then build the binaries:
87
+  scripts/bump-llama-cpp.sh build
88
+
89
+And re-run the registry probe suite:
7590
   uv run python scripts/refresh-registry.py
7691
 EOF
92
+}
93
+
94
+do_build() {
95
+  if [ ! -d "$VENDOR_DIR" ]; then
96
+    echo "error: $VENDOR_DIR missing — run 'bump <tag>' first" >&2
97
+    exit 1
98
+  fi
99
+  echo "--> configuring llama.cpp via cmake"
100
+  cmake -S "$VENDOR_DIR" -B "$VENDOR_DIR/build" -DCMAKE_BUILD_TYPE=Release
101
+  echo "--> building llama-quantize + siblings"
102
+  cmake --build "$VENDOR_DIR/build" --target llama-quantize --config Release
103
+  if [ -f "$VENDOR_DIR/build/bin/llama-quantize" ]; then
104
+    echo "OK: $VENDOR_DIR/build/bin/llama-quantize"
105
+  else
106
+    echo "error: build finished but llama-quantize not found under build/bin" >&2
107
+    exit 1
108
+  fi
109
+}
110
+
111
+case "$cmd" in
112
+  bump)
113
+    do_bump "${2:-}"
114
+    ;;
115
+  build)
116
+    do_build
117
+    ;;
118
+  refresh-labels)
119
+    refresh_labels
120
+    ;;
121
+  "")
122
+    echo "usage: scripts/bump-llama-cpp.sh <bump|build|refresh-labels> [args]" >&2
123
+    exit 2
124
+    ;;
125
+  *)
126
+    echo "unknown command: $cmd" >&2
127
+    echo "usage: scripts/bump-llama-cpp.sh <bump|build|refresh-labels> [args]" >&2
128
+    exit 2
129
+    ;;
130
+esac
tests/integration/export/__init__.pyadded
tests/integration/export/test_export_tinymodel.pyadded
@@ -0,0 +1,61 @@
1
+"""End-to-end GGUF export on the SmolLM2-135M fixture.
2
+
3
+Sprint 11 DoD: produce a valid GGUF file readable by `llama-cli`, with
4
+LoRA A/B tensors referencing the correct base tensor names.
5
+
6
+Marked `@pytest.mark.slow`. Requires:
7
+- `vendor/llama.cpp/` submodule initialized and built (`scripts/bump-llama-cpp.sh build`)
8
+- SmolLM2-135M offline cache (from Sprint 02's fixture)
9
+- A prior `dlm train` run against that base to produce an adapter
10
+
11
+When any dependency is missing the test skips with a clear message.
12
+"""
13
+
14
+from __future__ import annotations
15
+
16
+from pathlib import Path
17
+
18
+import pytest
19
+
20
+pytestmark = pytest.mark.slow
21
+
22
+
23
+@pytest.mark.slow
24
+def test_export_produces_valid_gguf() -> None:
25
+    """Full `dlm export` cycle on the tiny model.
26
+
27
+    Shape:
28
+      1. `vendor/llama.cpp/build/bin/llama-quantize` exists → else skip.
29
+      2. SmolLM2-135M fixture resolvable → else skip.
30
+      3. `dlm train` produces an adapter in a fresh tmp store → else skip.
31
+      4. `run_export(store, spec, plan=Q4_K_M)` emits base + adapter GGUF.
32
+      5. `llama-cli -m base.Q4_K_M.gguf --lora adapter.gguf -p "..."` returns
33
+         non-empty stdout.
34
+    """
35
+    vendor_root = Path(__file__).resolve().parents[3] / "vendor" / "llama.cpp"
36
+    if not (vendor_root / "build" / "bin" / "llama-quantize").is_file():
37
+        pytest.skip(
38
+            "vendor/llama.cpp not built; "
39
+            "run `scripts/bump-llama-cpp.sh build` to enable."
40
+        )
41
+
42
+    try:
43
+        from tests.fixtures.tiny_model import tiny_model_path
44
+
45
+        tiny_model_path()
46
+    except Exception as exc:  # pragma: no cover
47
+        pytest.skip(f"tiny-model fixture unavailable: {exc}")
48
+
49
+    pytest.xfail("export integration scaffolded; body deferred to first CI slow run")
50
+
51
+
52
+@pytest.mark.slow
53
+def test_qlora_merge_requires_dequantize_flag() -> None:
54
+    """Contract: `--merged` on a QLoRA adapter without `--dequantize` refuses.
55
+
56
+    Handled entirely in the plan's safety gate; unit-tested at
57
+    `tests/unit/export/test_plan.py::TestMergeSafetyGate`. This
58
+    integration test re-asserts it survives the full CLI path so a
59
+    future refactor doesn't silently remove the guardrail.
60
+    """
61
+    pytest.xfail("CLI integration scaffolded; body deferred")