@@ -8,8 +8,13 @@ |
| 8 | 8 | # stage changes. |
| 9 | 9 | # scripts/bump-llama-cpp.sh build |
| 10 | 10 | # Build `llama-quantize` (+ siblings) via cmake. Idempotent. |
| 11 | +# scripts/bump-llama-cpp.sh build --portable |
| 12 | +# Build portable CPU binaries (`GGML_NATIVE=OFF`) suitable for CI |
| 13 | +# caches or redistribution across heterogeneous hosts. |
| 11 | 14 | # scripts/bump-llama-cpp.sh build --with-server |
| 12 | 15 | # Also build `llama-server` for Sprint 41's local HTTP target. |
| 16 | +# scripts/bump-llama-cpp.sh build --portable --with-server |
| 17 | +# Portable build plus `llama-server`. |
| 13 | 18 | # scripts/bump-llama-cpp.sh refresh-labels |
| 14 | 19 | # Regenerate vendor/llama_cpp_pretokenizer_hashes.json from the |
| 15 | 20 | # current submodule contents. Does not touch the submodule itself. |
@@ -149,23 +154,37 @@ EOF |
| 149 | 154 | |
| 150 | 155 | do_build() { |
| 151 | 156 | local with_server=0 |
| 152 | | - case "${1:-}" in |
| 153 | | - "") |
| 154 | | - ;; |
| 155 | | - --with-server) |
| 156 | | - with_server=1 |
| 157 | | - ;; |
| 158 | | - *) |
| 159 | | - echo "usage: scripts/bump-llama-cpp.sh build [--with-server]" >&2 |
| 160 | | - exit 2 |
| 161 | | - ;; |
| 162 | | - esac |
| 157 | + local portable=0 |
| 158 | + while [ "$#" -gt 0 ]; do |
| 159 | + case "$1" in |
| 160 | + --with-server) |
| 161 | + with_server=1 |
| 162 | + ;; |
| 163 | + --portable) |
| 164 | + portable=1 |
| 165 | + ;; |
| 166 | + *) |
| 167 | + echo "usage: scripts/bump-llama-cpp.sh build [--portable] [--with-server]" >&2 |
| 168 | + exit 2 |
| 169 | + ;; |
| 170 | + esac |
| 171 | + shift |
| 172 | + done |
| 163 | 173 | if [ ! -d "$VENDOR_DIR" ]; then |
| 164 | 174 | echo "error: $VENDOR_DIR missing — run 'bump <tag>' first" >&2 |
| 165 | 175 | exit 1 |
| 166 | 176 | fi |
| 167 | 177 | echo "--> configuring llama.cpp via cmake" |
| 168 | | - cmake -S "$VENDOR_DIR" -B "$VENDOR_DIR/build" -DCMAKE_BUILD_TYPE=Release |
| 178 | + local cmake_args=( |
| 179 | + -S "$VENDOR_DIR" |
| 180 | + -B "$VENDOR_DIR/build" |
| 181 | + -DCMAKE_BUILD_TYPE=Release |
| 182 | + ) |
| 183 | + if [ "$portable" -eq 1 ]; then |
| 184 | + echo "--> portable build: forcing GGML_NATIVE=OFF for cross-runner compatibility" |
| 185 | + cmake_args+=(-DGGML_NATIVE=OFF) |
| 186 | + fi |
| 187 | + cmake "${cmake_args[@]}" |
| 169 | 188 | # `llama-quantize` does the actual per-tensor quantization; `llama-imatrix` |
| 170 | 189 | # produces the importance-matrix file we feed to quantize for k-quant |
| 171 | 190 | # calibration (Sprint 11.6). Both are required for the full export |