tenseleyflow/documentlanguagemodel / 6f8a2d8

Browse files

build(llama.cpp): scripts/build-llama-cpp-rocm.sh — HIPBLAS cmake wrapper

Authored by espadonne
SHA
6f8a2d8f68fb42543f6eff277cdb9b525f0aadda
Parents
24d1fbd
Tree
9001053

1 changed file

StatusFile+-
A scripts/build-llama-cpp-rocm.sh 61 0
scripts/build-llama-cpp-rocm.shadded
@@ -0,0 +1,61 @@
1
+#!/usr/bin/env bash
2
+# Build the vendored llama.cpp with ROCm (HIP) acceleration for
3
+# `dlm export` on AMD hosts. Default `scripts/bump-llama-cpp.sh build`
4
+# produces a CPU-only build; ROCm users rerun this script to replace
5
+# the `llama-quantize` / `llama-imatrix` binaries with ROCm-accelerated
6
+# ones.
7
+#
8
+# Toolchain prerequisites (sprint 22 docs/hardware/rocm.md):
9
+#   - ROCm >= 5.7 (6.0+ preferred; we test against 6.0 / 6.2)
10
+#   - hipcc on PATH
11
+#   - cmake >= 3.22
12
+#   - AMDGPU_TARGETS env var set to the arch(es) you want to build for.
13
+#     Common values: gfx90a (MI200), gfx942 (MI300), gfx1100 (RDNA3).
14
+#     Example: `export AMDGPU_TARGETS="gfx1100"`
15
+#
16
+# Usage:
17
+#   scripts/build-llama-cpp-rocm.sh
18
+#
19
+# Idempotent. Re-running rebuilds the same targets in
20
+# `vendor/llama.cpp/build-rocm/` without touching the CPU build dir.
21
+
22
+set -euo pipefail
23
+
24
+REPO_ROOT="$(git rev-parse --show-toplevel)"
25
+VENDOR_DIR="$REPO_ROOT/vendor/llama.cpp"
26
+BUILD_DIR="$VENDOR_DIR/build-rocm"
27
+
28
+if [[ ! -d "$VENDOR_DIR" ]]; then
29
+  echo "ERROR: vendored llama.cpp not found at $VENDOR_DIR." >&2
30
+  echo "Run 'git submodule update --init --recursive' first." >&2
31
+  exit 1
32
+fi
33
+
34
+if ! command -v hipcc >/dev/null 2>&1; then
35
+  echo "ERROR: hipcc not on PATH. Install ROCm and re-try." >&2
36
+  exit 1
37
+fi
38
+
39
+if [[ -z "${AMDGPU_TARGETS:-}" ]]; then
40
+  echo "ERROR: AMDGPU_TARGETS is empty. Set it to your GPU arch, e.g.:" >&2
41
+  echo "  export AMDGPU_TARGETS=\"gfx1100\"   # RDNA3" >&2
42
+  echo "  export AMDGPU_TARGETS=\"gfx90a\"    # MI200" >&2
43
+  echo "  export AMDGPU_TARGETS=\"gfx942\"    # MI300" >&2
44
+  exit 2
45
+fi
46
+
47
+echo "--> configuring ROCm build for AMDGPU_TARGETS=$AMDGPU_TARGETS"
48
+cmake -S "$VENDOR_DIR" -B "$BUILD_DIR" \
49
+  -DCMAKE_BUILD_TYPE=Release \
50
+  -DGGML_HIPBLAS=ON \
51
+  -DAMDGPU_TARGETS="$AMDGPU_TARGETS" \
52
+  -DCMAKE_C_COMPILER="$(command -v hipcc)" \
53
+  -DCMAKE_CXX_COMPILER="$(command -v hipcc)"
54
+
55
+echo "--> building llama-quantize + llama-imatrix (ROCm)"
56
+cmake --build "$BUILD_DIR" --target llama-quantize llama-imatrix -- -j
57
+
58
+echo
59
+echo "ROCm-accelerated binaries in: $BUILD_DIR/bin/"
60
+echo "Point DLM_LLAMA_CPP_BUILD=$BUILD_DIR before running \`dlm export\`"
61
+echo "to prefer this build over the default CPU build."