documentlanguagemodel Public

Watch 0 Fork 0 Star 0

Bash · 7148 bytes Raw Blame History

  
        1
        #!/usr/bin/env bash
      
        2
        # Bump the vendored llama.cpp submodule, build its tools, and refresh
      
        3
        # the pre-tokenizer hash table.
      
        4
        #
      
        5
        # Usage:
      
        6
        #   scripts/bump-llama-cpp.sh bump <tag>
      
        7
        #       Fast-forward submodule to <tag>, re-extract hashes, write VERSION,
      
        8
        #       stage changes.
      
        9
        #   scripts/bump-llama-cpp.sh build
      
        10
        #       Build `llama-quantize` (+ siblings) via cmake. Idempotent.
      
        11
        #   scripts/bump-llama-cpp.sh build --portable
      
        12
        #       Build portable CPU binaries (`GGML_NATIVE=OFF`) suitable for CI
      
        13
        #       caches or redistribution across heterogeneous hosts.
      
        14
        #   scripts/bump-llama-cpp.sh build --with-server
      
        15
        #       Also build `llama-server` for Sprint 41's local HTTP target.
      
        16
        #   scripts/bump-llama-cpp.sh build --portable --with-server
      
        17
        #       Portable build plus `llama-server`.
      
        18
        #   scripts/bump-llama-cpp.sh refresh-labels
      
        19
        #       Regenerate vendor/llama_cpp_pretokenizer_hashes.json from the
      
        20
        #       current submodule contents. Does not touch the submodule itself.
      
        21
        #   scripts/bump-llama-cpp.sh probe-vl-arch
      
        22
        #       Re-run the VL arch probe (Sprint 35.4) and cache the verdicts
      
        23
        #       under vendor/llama_cpp_vl_arch_support.json. Fast path for the
      
        24
        #       runtime probe; omitted or stale means the runtime falls back to
      
        25
        #       a live scan.
      
        26
        
        27
        set -euo pipefail
      
        28
        
        29
        REPO_ROOT="$(git rev-parse --show-toplevel)"
      
        30
        VENDOR_DIR="$REPO_ROOT/vendor/llama.cpp"
      
        31
        HASHES_PATH="$REPO_ROOT/vendor/llama_cpp_pretokenizer_hashes.json"
      
        32
        VL_ARCH_PATH="$REPO_ROOT/vendor/llama_cpp_vl_arch_support.json"
      
        33
        VERSION_PATH="$VENDOR_DIR/VERSION"
      
        34
        
        35
        cmd="${1:-}"
      
        36
        
        37
        refresh_labels() {
      
        38
          echo "--> re-extracting pre-tokenizer hash labels to $HASHES_PATH"
      
        39
          uv run python - <<'PY'
      
        40
        import json
      
        41
        import re
      
        42
        import sys
      
        43
        from pathlib import Path
      
        44
        
        45
        repo_root = Path.cwd()
      
        46
        converter = repo_root / "vendor" / "llama.cpp" / "convert_hf_to_gguf.py"
      
        47
        hashes_path = repo_root / "vendor" / "llama_cpp_pretokenizer_hashes.json"
      
        48
        
        49
        if not converter.is_file():
      
        50
            print(f"ERROR: {converter} not found", file=sys.stderr)
      
        51
            sys.exit(1)
      
        52
        
        53
        source = converter.read_text(encoding="utf-8", errors="replace")
      
        54
        pattern = re.compile(r"""\bres\s*=\s*["']([^"']+)["']""")
      
        55
        labels = sorted(set(pattern.findall(source)))
      
        56
        if not labels:
      
        57
            print("ERROR: no pre-tokenizer labels found in convert_hf_to_gguf.py",
      
        58
                  file=sys.stderr)
      
        59
            sys.exit(1)
      
        60
        
        61
        hashes_path.write_text(json.dumps(labels, indent=2) + "\n", encoding="utf-8")
      
        62
        print(f"wrote {len(labels)} labels to {hashes_path}")
      
        63
        PY
      
        64
        }
      
        65
        
        66
        probe_vl_arch() {
      
        67
          echo "--> probing VL arch support in vendored llama.cpp"
      
        68
          uv run python - <<'PY'
      
        69
        import json
      
        70
        import sys
      
        71
        from pathlib import Path
      
        72
        
        73
        # Import dlm's probe directly — this script runs from the repo root
      
        74
        # with dlm importable via `uv run`. Failing imports abort with a
      
        75
        # readable error; no silent half-cache files.
      
        76
        sys.path.insert(0, str(Path.cwd() / "src"))
      
        77
        from dlm.base_models import BASE_MODELS
      
        78
        from dlm.export.arch_probe import SupportLevel, clear_cache, probe_gguf_arch
      
        79
        
        80
        # Fresh probe — the cache may carry a stale verdict from an earlier
      
        81
        # run in the same process; clear before enumerating.
      
        82
        clear_cache()
      
        83
        
        84
        out_path = Path.cwd() / "vendor" / "llama_cpp_vl_arch_support.json"
      
        85
        entries: dict[str, dict[str, str | None]] = {}
      
        86
        for key, spec in BASE_MODELS.items():
      
        87
            if spec.modality != "vision-language":
      
        88
                continue
      
        89
            result = probe_gguf_arch(spec.architecture)
      
        90
            entries[key] = {
      
        91
                "architecture": spec.architecture,
      
        92
                "support": result.support.value,
      
        93
                "llama_cpp_tag": result.llama_cpp_tag,
      
        94
                "reason": result.reason,
      
        95
            }
      
        96
        
        97
        payload = {
      
        98
            "schema": 1,
      
        99
            "bases": entries,
      
        100
        }
      
        101
        out_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
      
        102
        
        103
        # Print a summary table for operators.
      
        104
        print(f"wrote {len(entries)} VL verdicts to {out_path}")
      
        105
        for key, entry in sorted(entries.items()):
      
        106
            print(f"  {entry['support']:<12} {key}  ({entry['architecture']})")
      
        107
        PY
      
        108
        }
      
        109
        
        110
        do_bump() {
      
        111
          local tag="${1:-}"
      
        112
          if [ -z "$tag" ]; then
      
        113
            echo "usage: scripts/bump-llama-cpp.sh bump <tag>" >&2
      
        114
            exit 2
      
        115
          fi
      
        116
          if [ -n "$(git status --porcelain)" ]; then
      
        117
            echo "error: working tree must be clean before a submodule bump" >&2
      
        118
            exit 1
      
        119
          fi
      
        120
          if [ ! -d "$VENDOR_DIR" ]; then
      
        121
            echo "error: $VENDOR_DIR missing — initialize the submodule first:" >&2
      
        122
            echo "  git submodule add https://github.com/ggerganov/llama.cpp vendor/llama.cpp" >&2
      
        123
            exit 1
      
        124
          fi
      
        125
        
        126
          echo "--> fetching tags in $VENDOR_DIR"
      
        127
          git -C "$VENDOR_DIR" fetch --tags origin
      
        128
          echo "--> checking out $tag"
      
        129
          git -C "$VENDOR_DIR" checkout "tags/$tag"
      
        130
        
        131
          echo "--> writing $VERSION_PATH"
      
        132
          echo "$tag" > "$VERSION_PATH"
      
        133
        
        134
          refresh_labels
      
        135
          probe_vl_arch
      
        136
        
        137
          echo "--> staging changes"
      
        138
          git -C "$REPO_ROOT" add \
      
        139
            vendor/llama.cpp \
      
        140
            vendor/llama_cpp_pretokenizer_hashes.json \
      
        141
            vendor/llama_cpp_vl_arch_support.json
      
        142
        
        143
          cat <<EOF
      
        144
        Done. Review the staged diff and commit with:
      
        145
          git commit -m "chore: bump llama.cpp to $tag + refresh pre-tokenizer hashes"
      
        146
        
        147
        Then build the binaries:
      
        148
          scripts/bump-llama-cpp.sh build
      
        149
        
        150
        And re-run the registry probe suite:
      
        151
          uv run python scripts/refresh-registry.py
      
        152
        EOF
      
        153
        }
      
        154
        
        155
        do_build() {
      
        156
          local with_server=0
      
        157
          local portable=0
      
        158
          while [ "$#" -gt 0 ]; do
      
        159
            case "$1" in
      
        160
              --with-server)
      
        161
                with_server=1
      
        162
                ;;
      
        163
              --portable)
      
        164
                portable=1
      
        165
                ;;
      
        166
              *)
      
        167
                echo "usage: scripts/bump-llama-cpp.sh build [--portable] [--with-server]" >&2
      
        168
                exit 2
      
        169
                ;;
      
        170
            esac
      
        171
            shift
      
        172
          done
      
        173
          if [ ! -d "$VENDOR_DIR" ]; then
      
        174
            echo "error: $VENDOR_DIR missing — run 'bump <tag>' first" >&2
      
        175
            exit 1
      
        176
          fi
      
        177
          echo "--> configuring llama.cpp via cmake"
      
        178
          local cmake_args=(
      
        179
            -S "$VENDOR_DIR"
      
        180
            -B "$VENDOR_DIR/build"
      
        181
            -DCMAKE_BUILD_TYPE=Release
      
        182
          )
      
        183
          if [ "$portable" -eq 1 ]; then
      
        184
            echo "--> portable build: forcing GGML_NATIVE=OFF for cross-runner compatibility"
      
        185
            cmake_args+=(-DGGML_NATIVE=OFF)
      
        186
          fi
      
        187
          cmake "${cmake_args[@]}"
      
        188
          # `llama-quantize` does the actual per-tensor quantization; `llama-imatrix`
      
        189
          # produces the importance-matrix file we feed to quantize for k-quant
      
        190
          # calibration (Sprint 11.6). Both are required for the full export
      
        191
          # pipeline; building them separately means a missing target fails the
      
        192
          # build loudly rather than silently shipping a half-built toolchain.
      
        193
          local targets=(llama-quantize llama-imatrix)
      
        194
          if [ "$with_server" -eq 1 ]; then
      
        195
            targets+=(llama-server)
      
        196
          fi
      
        197
          for target in "${targets[@]}"; do
      
        198
            echo "--> building $target"
      
        199
            cmake --build "$VENDOR_DIR/build" --target "$target" --config Release
      
        200
            if [ ! -f "$VENDOR_DIR/build/bin/$target" ]; then
      
        201
              echo "error: build finished but $target not found under build/bin" >&2
      
        202
              exit 1
      
        203
            fi
      
        204
            echo "OK: $VENDOR_DIR/build/bin/$target"
      
        205
          done
      
        206
        }
      
        207
        
        208
        case "$cmd" in
      
        209
          bump)
      
        210
            do_bump "${2:-}"
      
        211
            ;;
      
        212
          build)
      
        213
            do_build "${2:-}"
      
        214
            ;;
      
        215
          refresh-labels)
      
        216
            refresh_labels
      
        217
            ;;
      
        218
          probe-vl-arch)
      
        219
            probe_vl_arch
      
        220
            ;;
      
        221
          "")
      
        222
            echo "usage: scripts/bump-llama-cpp.sh <bump|build|refresh-labels|probe-vl-arch> [args]" >&2
      
        223
            exit 2
      
        224
            ;;
      
        225
          *)
      
        226
            echo "unknown command: $cmd" >&2
      
        227
            echo "usage: scripts/bump-llama-cpp.sh <bump|build|refresh-labels|probe-vl-arch> [args]" >&2
      
        228
            exit 2
      
        229
            ;;
      
        230
        esac

1	#!/usr/bin/env bash
2	# Bump the vendored llama.cpp submodule, build its tools, and refresh
3	# the pre-tokenizer hash table.
4	#
5	# Usage:
6	# scripts/bump-llama-cpp.sh bump <tag>
7	# Fast-forward submodule to <tag>, re-extract hashes, write VERSION,
8	# stage changes.
9	# scripts/bump-llama-cpp.sh build
10	# Build `llama-quantize` (+ siblings) via cmake. Idempotent.
11	# scripts/bump-llama-cpp.sh build --portable
12	# Build portable CPU binaries (`GGML_NATIVE=OFF`) suitable for CI
13	# caches or redistribution across heterogeneous hosts.
14	# scripts/bump-llama-cpp.sh build --with-server
15	# Also build `llama-server` for Sprint 41's local HTTP target.
16	# scripts/bump-llama-cpp.sh build --portable --with-server
17	# Portable build plus `llama-server`.
18	# scripts/bump-llama-cpp.sh refresh-labels
19	# Regenerate vendor/llama_cpp_pretokenizer_hashes.json from the
20	# current submodule contents. Does not touch the submodule itself.
21	# scripts/bump-llama-cpp.sh probe-vl-arch
22	# Re-run the VL arch probe (Sprint 35.4) and cache the verdicts
23	# under vendor/llama_cpp_vl_arch_support.json. Fast path for the
24	# runtime probe; omitted or stale means the runtime falls back to
25	# a live scan.
26
27	set -euo pipefail
28
29	REPO_ROOT="$(git rev-parse --show-toplevel)"
30	VENDOR_DIR="$REPO_ROOT/vendor/llama.cpp"
31	HASHES_PATH="$REPO_ROOT/vendor/llama_cpp_pretokenizer_hashes.json"
32	VL_ARCH_PATH="$REPO_ROOT/vendor/llama_cpp_vl_arch_support.json"
33	VERSION_PATH="$VENDOR_DIR/VERSION"
34
35	cmd="${1:-}"
36
37	refresh_labels() {
38	echo "--> re-extracting pre-tokenizer hash labels to $HASHES_PATH"
39	uv run python - <<'PY'
40	import json
41	import re
42	import sys
43	from pathlib import Path
44
45	repo_root = Path.cwd()
46	converter = repo_root / "vendor" / "llama.cpp" / "convert_hf_to_gguf.py"
47	hashes_path = repo_root / "vendor" / "llama_cpp_pretokenizer_hashes.json"
48
49	if not converter.is_file():
50	print(f"ERROR: {converter} not found", file=sys.stderr)
51	sys.exit(1)
52
53	source = converter.read_text(encoding="utf-8", errors="replace")
54	pattern = re.compile(r"""\bres\s=\s["']([^"']+)["']""")
55	labels = sorted(set(pattern.findall(source)))
56	if not labels:
57	print("ERROR: no pre-tokenizer labels found in convert_hf_to_gguf.py",
58	file=sys.stderr)
59	sys.exit(1)
60
61	hashes_path.write_text(json.dumps(labels, indent=2) + "\n", encoding="utf-8")
62	print(f"wrote {len(labels)} labels to {hashes_path}")
63	PY
64	}
65
66	probe_vl_arch() {
67	echo "--> probing VL arch support in vendored llama.cpp"
68	uv run python - <<'PY'
69	import json
70	import sys
71	from pathlib import Path
72
73	# Import dlm's probe directly — this script runs from the repo root
74	# with dlm importable via `uv run`. Failing imports abort with a
75	# readable error; no silent half-cache files.
76	sys.path.insert(0, str(Path.cwd() / "src"))
77	from dlm.base_models import BASE_MODELS
78	from dlm.export.arch_probe import SupportLevel, clear_cache, probe_gguf_arch
79
80	# Fresh probe — the cache may carry a stale verdict from an earlier
81	# run in the same process; clear before enumerating.
82	clear_cache()
83
84	out_path = Path.cwd() / "vendor" / "llama_cpp_vl_arch_support.json"
85	entries: dict[str, dict[str, str \| None]] = {}
86	for key, spec in BASE_MODELS.items():
87	if spec.modality != "vision-language":
88	continue
89	result = probe_gguf_arch(spec.architecture)
90	entries[key] = {
91	"architecture": spec.architecture,
92	"support": result.support.value,
93	"llama_cpp_tag": result.llama_cpp_tag,
94	"reason": result.reason,
95	}
96
97	payload = {
98	"schema": 1,
99	"bases": entries,
100	}
101	out_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
102
103	# Print a summary table for operators.
104	print(f"wrote {len(entries)} VL verdicts to {out_path}")
105	for key, entry in sorted(entries.items()):
106	print(f" {entry['support']:<12} {key} ({entry['architecture']})")
107	PY
108	}
109
110	do_bump() {
111	local tag="${1:-}"
112	if [ -z "$tag" ]; then
113	echo "usage: scripts/bump-llama-cpp.sh bump <tag>" >&2
114	exit 2
115	fi
116	if [ -n "$(git status --porcelain)" ]; then
117	echo "error: working tree must be clean before a submodule bump" >&2
118	exit 1
119	fi
120	if [ ! -d "$VENDOR_DIR" ]; then
121	echo "error: $VENDOR_DIR missing — initialize the submodule first:" >&2
122	echo " git submodule add https://github.com/ggerganov/llama.cpp vendor/llama.cpp" >&2
123	exit 1
124	fi
125
126	echo "--> fetching tags in $VENDOR_DIR"
127	git -C "$VENDOR_DIR" fetch --tags origin
128	echo "--> checking out $tag"
129	git -C "$VENDOR_DIR" checkout "tags/$tag"
130
131	echo "--> writing $VERSION_PATH"
132	echo "$tag" > "$VERSION_PATH"
133
134	refresh_labels
135	probe_vl_arch
136
137	echo "--> staging changes"
138	git -C "$REPO_ROOT" add \
139	vendor/llama.cpp \
140	vendor/llama_cpp_pretokenizer_hashes.json \
141	vendor/llama_cpp_vl_arch_support.json
142
143	cat <<EOF
144	Done. Review the staged diff and commit with:
145	git commit -m "chore: bump llama.cpp to $tag + refresh pre-tokenizer hashes"
146
147	Then build the binaries:
148	scripts/bump-llama-cpp.sh build
149
150	And re-run the registry probe suite:
151	uv run python scripts/refresh-registry.py
152	EOF
153	}
154
155	do_build() {
156	local with_server=0
157	local portable=0
158	while [ "$#" -gt 0 ]; do
159	case "$1" in
160	--with-server)
161	with_server=1
162	;;
163	--portable)
164	portable=1
165	;;
166	*)
167	echo "usage: scripts/bump-llama-cpp.sh build [--portable] [--with-server]" >&2
168	exit 2
169	;;
170	esac
171	shift
172	done
173	if [ ! -d "$VENDOR_DIR" ]; then
174	echo "error: $VENDOR_DIR missing — run 'bump <tag>' first" >&2
175	exit 1
176	fi
177	echo "--> configuring llama.cpp via cmake"
178	local cmake_args=(
179	-S "$VENDOR_DIR"
180	-B "$VENDOR_DIR/build"
181	-DCMAKE_BUILD_TYPE=Release
182	)
183	if [ "$portable" -eq 1 ]; then
184	echo "--> portable build: forcing GGML_NATIVE=OFF for cross-runner compatibility"
185	cmake_args+=(-DGGML_NATIVE=OFF)
186	fi
187	cmake "${cmake_args[@]}"
188	# `llama-quantize` does the actual per-tensor quantization; `llama-imatrix`
189	# produces the importance-matrix file we feed to quantize for k-quant
190	# calibration (Sprint 11.6). Both are required for the full export
191	# pipeline; building them separately means a missing target fails the
192	# build loudly rather than silently shipping a half-built toolchain.
193	local targets=(llama-quantize llama-imatrix)
194	if [ "$with_server" -eq 1 ]; then
195	targets+=(llama-server)
196	fi
197	for target in "${targets[@]}"; do
198	echo "--> building $target"
199	cmake --build "$VENDOR_DIR/build" --target "$target" --config Release
200	if [ ! -f "$VENDOR_DIR/build/bin/$target" ]; then
201	echo "error: build finished but $target not found under build/bin" >&2
202	exit 1
203	fi
204	echo "OK: $VENDOR_DIR/build/bin/$target"
205	done
206	}
207
208	case "$cmd" in
209	bump)
210	do_bump "${2:-}"
211	;;
212	build)
213	do_build "${2:-}"
214	;;
215	refresh-labels)
216	refresh_labels
217	;;
218	probe-vl-arch)
219	probe_vl_arch
220	;;
221	"")
222	echo "usage: scripts/bump-llama-cpp.sh <bump\|build\|refresh-labels\|probe-vl-arch> [args]" >&2
223	exit 2
224	;;
225	*)
226	echo "unknown command: $cmd" >&2
227	echo "usage: scripts/bump-llama-cpp.sh <bump\|build\|refresh-labels\|probe-vl-arch> [args]" >&2
228	exit 2
229	;;
230	esac