tenseleyflow/documentlanguagemodel / ae134b7

Browse files

Build portable llama.cpp tools in CI

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
ae134b787ea8fe0195f89b2cf2621aad6e2595ee
Parents
5f73c65
Tree
4371257

2 changed files

StatusFile+-
M .github/workflows/ci.yml 5 3
M scripts/bump-llama-cpp.sh 31 12
.github/workflows/ci.ymlmodified
@@ -286,8 +286,10 @@ jobs:
286286
         uses: actions/cache@v4
287287
         with:
288288
           path: vendor/llama.cpp/build
289
-          # Cache key: submodule HEAD sha. Bumping llama.cpp invalidates the cache.
290
-          key: llama-cpp-build-${{ hashFiles('.gitmodules', 'vendor/llama.cpp/VERSION') }}
289
+          # Cache key: submodule HEAD sha + build profile. CI uses a
290
+          # portable CPU build so cached binaries stay runnable across
291
+          # heterogeneous ubuntu runner hosts.
292
+          key: llama-cpp-build-portable-v1-${{ runner.os }}-${{ hashFiles('.gitmodules', 'vendor/llama.cpp/VERSION') }}
291293
 
292294
       - name: Build llama-quantize (if not cached)
293295
         if: steps.llama-cpp-cache.outputs.cache-hit != 'true'
@@ -295,7 +297,7 @@ jobs:
295297
           set -euxo pipefail
296298
           # ubuntu-latest ships cmake; `sudo apt-get install -y cmake` is a no-op fallback.
297299
           command -v cmake >/dev/null 2>&1 || sudo apt-get install -y cmake
298
-          scripts/bump-llama-cpp.sh build
300
+          scripts/bump-llama-cpp.sh build --portable
299301
 
300302
       - name: Run slow tests
301303
         env:
scripts/bump-llama-cpp.shmodified
@@ -8,8 +8,13 @@
88
 #       stage changes.
99
 #   scripts/bump-llama-cpp.sh build
1010
 #       Build `llama-quantize` (+ siblings) via cmake. Idempotent.
11
+#   scripts/bump-llama-cpp.sh build --portable
12
+#       Build portable CPU binaries (`GGML_NATIVE=OFF`) suitable for CI
13
+#       caches or redistribution across heterogeneous hosts.
1114
 #   scripts/bump-llama-cpp.sh build --with-server
1215
 #       Also build `llama-server` for Sprint 41's local HTTP target.
16
+#   scripts/bump-llama-cpp.sh build --portable --with-server
17
+#       Portable build plus `llama-server`.
1318
 #   scripts/bump-llama-cpp.sh refresh-labels
1419
 #       Regenerate vendor/llama_cpp_pretokenizer_hashes.json from the
1520
 #       current submodule contents. Does not touch the submodule itself.
@@ -149,23 +154,37 @@ EOF
149154
 
150155
 do_build() {
151156
   local with_server=0
152
-  case "${1:-}" in
153
-    "")
154
-      ;;
155
-    --with-server)
156
-      with_server=1
157
-      ;;
158
-    *)
159
-      echo "usage: scripts/bump-llama-cpp.sh build [--with-server]" >&2
160
-      exit 2
161
-      ;;
162
-  esac
157
+  local portable=0
158
+  while [ "$#" -gt 0 ]; do
159
+    case "$1" in
160
+      --with-server)
161
+        with_server=1
162
+        ;;
163
+      --portable)
164
+        portable=1
165
+        ;;
166
+      *)
167
+        echo "usage: scripts/bump-llama-cpp.sh build [--portable] [--with-server]" >&2
168
+        exit 2
169
+        ;;
170
+    esac
171
+    shift
172
+  done
163173
   if [ ! -d "$VENDOR_DIR" ]; then
164174
     echo "error: $VENDOR_DIR missing — run 'bump <tag>' first" >&2
165175
     exit 1
166176
   fi
167177
   echo "--> configuring llama.cpp via cmake"
168
-  cmake -S "$VENDOR_DIR" -B "$VENDOR_DIR/build" -DCMAKE_BUILD_TYPE=Release
178
+  local cmake_args=(
179
+    -S "$VENDOR_DIR"
180
+    -B "$VENDOR_DIR/build"
181
+    -DCMAKE_BUILD_TYPE=Release
182
+  )
183
+  if [ "$portable" -eq 1 ]; then
184
+    echo "--> portable build: forcing GGML_NATIVE=OFF for cross-runner compatibility"
185
+    cmake_args+=(-DGGML_NATIVE=OFF)
186
+  fi
187
+  cmake "${cmake_args[@]}"
169188
   # `llama-quantize` does the actual per-tensor quantization; `llama-imatrix`
170189
   # produces the importance-matrix file we feed to quantize for k-quant
171190
   # calibration (Sprint 11.6). Both are required for the full export