feat(export): honor DLM_LLAMA_CPP_BUILD env var for ROCm binaries (audit-08 M6)
- SHA
e34ec1cb9c06a4b778b0c467bae8afaecc234ee9- Parents
-
5aeb871 - Tree
599ab1b
e34ec1c
e34ec1cb9c06a4b778b0c467bae8afaecc234ee95aeb871
599ab1b| Status | File | + | - |
|---|---|---|---|
| M |
docs/hardware/rocm.md
|
4 | 3 |
| M |
src/dlm/export/vendoring.py
|
18 | 0 |
| M |
tests/unit/export/test_vendoring.py
|
41 | 0 |
docs/hardware/rocm.mdmodified@@ -79,9 +79,10 @@ The script writes to `vendor/llama.cpp/build-rocm/`. To make | ||
| 79 | 79 | export DLM_LLAMA_CPP_BUILD=vendor/llama.cpp/build-rocm |
| 80 | 80 | ``` |
| 81 | 81 | |
| 82 | -(Environment-variable plumbing in `dlm.export.vendoring` lands as | |
| 83 | -part of the next ROCm polish pass — for now, manually invoke the | |
| 84 | -ROCm binaries if you need them.) | |
| 82 | +`DLM_LLAMA_CPP_BUILD` is honored by `dlm.export.vendoring` — when | |
| 83 | +set, it's checked before the default vendor dir for each binary, so | |
| 84 | +the ROCm-accelerated `llama-quantize` / `llama-imatrix` win over any | |
| 85 | +CPU-only build left behind from `scripts/bump-llama-cpp.sh build`. | |
| 85 | 86 | |
| 86 | 87 | ## CI / testing |
| 87 | 88 | |
src/dlm/export/vendoring.pymodified@@ -42,6 +42,11 @@ from dlm.export.errors import VendoringError | ||
| 42 | 42 | _REPO_ROOT: Final[Path] = Path(__file__).resolve().parents[3] |
| 43 | 43 | VENDOR_LLAMA_CPP: Final[Path] = _REPO_ROOT / "vendor" / "llama.cpp" |
| 44 | 44 | _ENV_VAR: Final[str] = "DLM_LLAMA_CPP_ROOT" |
| 45 | +_BUILD_ENV_VAR: Final[str] = "DLM_LLAMA_CPP_BUILD" | |
| 46 | +"""Sprint 22 / audit-08 M6: when set, `_resolve_binary` checks | |
| 47 | +`<DLM_LLAMA_CPP_BUILD>/bin/<name>` before the default vendor layout. | |
| 48 | +Lets users point `dlm export` at a HIP-built llama.cpp without | |
| 49 | +rebuilding the vendor dir itself (see docs/hardware/rocm.md).""" | |
| 45 | 50 | |
| 46 | 51 | CONVERT_HF_TO_GGUF: Final[str] = "convert_hf_to_gguf.py" |
| 47 | 52 | CONVERT_LORA_TO_GGUF: Final[str] = "convert_lora_to_gguf.py" |
@@ -131,7 +136,20 @@ def _resolve_binary( | ||
| 131 | 136 | binary, fall back to `shutil.which(name)` — covers the common |
| 132 | 137 | `brew install llama.cpp` case where the binary lives under |
| 133 | 138 | `/opt/homebrew/bin/`. |
| 139 | + | |
| 140 | + Audit-08 M6: `$DLM_LLAMA_CPP_BUILD`, when set, is checked BEFORE | |
| 141 | + the default vendor tree. Lets ROCm users point at the HIP build | |
| 142 | + dir produced by `scripts/build-llama-cpp-rocm.sh` without | |
| 143 | + clobbering the CPU build. | |
| 134 | 144 | """ |
| 145 | + if override is None: | |
| 146 | + build_env = os.environ.get(_BUILD_ENV_VAR) | |
| 147 | + if build_env: | |
| 148 | + build_root = Path(build_env) | |
| 149 | + for candidate in candidates: | |
| 150 | + path = build_root / candidate | |
| 151 | + if path.is_file(): | |
| 152 | + return path | |
| 135 | 153 | root = llama_cpp_root(override) |
| 136 | 154 | for candidate in candidates: |
| 137 | 155 | path = root / candidate |
tests/unit/export/test_vendoring.pymodified@@ -83,6 +83,47 @@ class TestLlamaQuantizeBin: | ||
| 83 | 83 | with pytest.raises(VendoringError, match="llama-quantize"): |
| 84 | 84 | llama_quantize_bin(override=root) |
| 85 | 85 | |
| 86 | + def test_dlm_llama_cpp_build_env_preferred( | |
| 87 | + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch | |
| 88 | + ) -> None: | |
| 89 | + """Audit-08 M6: `DLM_LLAMA_CPP_BUILD` overrides the default vendor dir. | |
| 90 | + | |
| 91 | + The env var points at a build-only dir (e.g. the ROCm | |
| 92 | + `vendor/llama.cpp/build-rocm`) that contains only binaries. | |
| 93 | + `_resolve_binary` must find `bin/llama-quantize` there before | |
| 94 | + falling through to the vendor tree. | |
| 95 | + | |
| 96 | + The production path has `override=None`; we mirror that here | |
| 97 | + by driving vendor resolution through `DLM_LLAMA_CPP_ROOT` so | |
| 98 | + both env vars coexist (ROCm users set both). | |
| 99 | + """ | |
| 100 | + rocm_build = tmp_path / "build-rocm" | |
| 101 | + (rocm_build / "bin").mkdir(parents=True) | |
| 102 | + rocm_bin = rocm_build / "bin" / "llama-quantize" | |
| 103 | + rocm_bin.write_text("#!/bin/sh\necho rocm\n") | |
| 104 | + rocm_bin.chmod(0o755) | |
| 105 | + | |
| 106 | + vendor_root = _populate_vendor(tmp_path / "llama.cpp") | |
| 107 | + | |
| 108 | + monkeypatch.setenv("DLM_LLAMA_CPP_BUILD", str(rocm_build)) | |
| 109 | + monkeypatch.setenv("DLM_LLAMA_CPP_ROOT", str(vendor_root)) | |
| 110 | + path = llama_quantize_bin() | |
| 111 | + # The ROCm build binary wins over the vendored CPU build. | |
| 112 | + assert path == rocm_bin | |
| 113 | + | |
| 114 | + def test_dlm_llama_cpp_build_env_missing_binary_falls_through( | |
| 115 | + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch | |
| 116 | + ) -> None: | |
| 117 | + """Env var pointing at an incomplete dir falls through to vendor.""" | |
| 118 | + empty_build = tmp_path / "build-rocm" | |
| 119 | + empty_build.mkdir() | |
| 120 | + vendor_root = _populate_vendor(tmp_path / "llama.cpp") | |
| 121 | + monkeypatch.setenv("DLM_LLAMA_CPP_BUILD", str(empty_build)) | |
| 122 | + monkeypatch.setenv("DLM_LLAMA_CPP_ROOT", str(vendor_root)) | |
| 123 | + path = llama_quantize_bin() | |
| 124 | + assert path.is_file() | |
| 125 | + assert str(vendor_root) in str(path) | |
| 126 | + | |
| 86 | 127 | def test_legacy_quantize_name_found(self, tmp_path: Path) -> None: |
| 87 | 128 | """Pre-rename builds shipped `quantize` rather than `llama-quantize`.""" |
| 88 | 129 | root = _populate_vendor(tmp_path / "llama.cpp", with_binary=False) |