tenseleyflow/documentlanguagemodel / e34ec1c

Browse files

feat(export): honor DLM_LLAMA_CPP_BUILD env var for ROCm binaries (audit-08 M6)

Authored by espadonne
SHA
e34ec1cb9c06a4b778b0c467bae8afaecc234ee9
Parents
5aeb871
Tree
599ab1b

3 changed files

StatusFile+-
M docs/hardware/rocm.md 4 3
M src/dlm/export/vendoring.py 18 0
M tests/unit/export/test_vendoring.py 41 0
docs/hardware/rocm.mdmodified
@@ -79,9 +79,10 @@ The script writes to `vendor/llama.cpp/build-rocm/`. To make
7979
 export DLM_LLAMA_CPP_BUILD=vendor/llama.cpp/build-rocm
8080
 ```
8181
 
82
-(Environment-variable plumbing in `dlm.export.vendoring` lands as
83
-part of the next ROCm polish pass — for now, manually invoke the
84
-ROCm binaries if you need them.)
82
+`DLM_LLAMA_CPP_BUILD` is honored by `dlm.export.vendoring` — when
83
+set, it's checked before the default vendor dir for each binary, so
84
+the ROCm-accelerated `llama-quantize` / `llama-imatrix` win over any
85
+CPU-only build left behind from `scripts/bump-llama-cpp.sh build`.
8586
 
8687
 ## CI / testing
8788
 
src/dlm/export/vendoring.pymodified
@@ -42,6 +42,11 @@ from dlm.export.errors import VendoringError
4242
 _REPO_ROOT: Final[Path] = Path(__file__).resolve().parents[3]
4343
 VENDOR_LLAMA_CPP: Final[Path] = _REPO_ROOT / "vendor" / "llama.cpp"
4444
 _ENV_VAR: Final[str] = "DLM_LLAMA_CPP_ROOT"
45
+_BUILD_ENV_VAR: Final[str] = "DLM_LLAMA_CPP_BUILD"
46
+"""Sprint 22 / audit-08 M6: when set, `_resolve_binary` checks
47
+`<DLM_LLAMA_CPP_BUILD>/bin/<name>` before the default vendor layout.
48
+Lets users point `dlm export` at a HIP-built llama.cpp without
49
+rebuilding the vendor dir itself (see docs/hardware/rocm.md)."""
4550
 
4651
 CONVERT_HF_TO_GGUF: Final[str] = "convert_hf_to_gguf.py"
4752
 CONVERT_LORA_TO_GGUF: Final[str] = "convert_lora_to_gguf.py"
@@ -131,7 +136,20 @@ def _resolve_binary(
131136
     binary, fall back to `shutil.which(name)` — covers the common
132137
     `brew install llama.cpp` case where the binary lives under
133138
     `/opt/homebrew/bin/`.
139
+
140
+    Audit-08 M6: `$DLM_LLAMA_CPP_BUILD`, when set, is checked BEFORE
141
+    the default vendor tree. Lets ROCm users point at the HIP build
142
+    dir produced by `scripts/build-llama-cpp-rocm.sh` without
143
+    clobbering the CPU build.
134144
     """
145
+    if override is None:
146
+        build_env = os.environ.get(_BUILD_ENV_VAR)
147
+        if build_env:
148
+            build_root = Path(build_env)
149
+            for candidate in candidates:
150
+                path = build_root / candidate
151
+                if path.is_file():
152
+                    return path
135153
     root = llama_cpp_root(override)
136154
     for candidate in candidates:
137155
         path = root / candidate
tests/unit/export/test_vendoring.pymodified
@@ -83,6 +83,47 @@ class TestLlamaQuantizeBin:
8383
         with pytest.raises(VendoringError, match="llama-quantize"):
8484
             llama_quantize_bin(override=root)
8585
 
86
+    def test_dlm_llama_cpp_build_env_preferred(
87
+        self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
88
+    ) -> None:
89
+        """Audit-08 M6: `DLM_LLAMA_CPP_BUILD` overrides the default vendor dir.
90
+
91
+        The env var points at a build-only dir (e.g. the ROCm
92
+        `vendor/llama.cpp/build-rocm`) that contains only binaries.
93
+        `_resolve_binary` must find `bin/llama-quantize` there before
94
+        falling through to the vendor tree.
95
+
96
+        The production path has `override=None`; we mirror that here
97
+        by driving vendor resolution through `DLM_LLAMA_CPP_ROOT` so
98
+        both env vars coexist (ROCm users set both).
99
+        """
100
+        rocm_build = tmp_path / "build-rocm"
101
+        (rocm_build / "bin").mkdir(parents=True)
102
+        rocm_bin = rocm_build / "bin" / "llama-quantize"
103
+        rocm_bin.write_text("#!/bin/sh\necho rocm\n")
104
+        rocm_bin.chmod(0o755)
105
+
106
+        vendor_root = _populate_vendor(tmp_path / "llama.cpp")
107
+
108
+        monkeypatch.setenv("DLM_LLAMA_CPP_BUILD", str(rocm_build))
109
+        monkeypatch.setenv("DLM_LLAMA_CPP_ROOT", str(vendor_root))
110
+        path = llama_quantize_bin()
111
+        # The ROCm build binary wins over the vendored CPU build.
112
+        assert path == rocm_bin
113
+
114
+    def test_dlm_llama_cpp_build_env_missing_binary_falls_through(
115
+        self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
116
+    ) -> None:
117
+        """Env var pointing at an incomplete dir falls through to vendor."""
118
+        empty_build = tmp_path / "build-rocm"
119
+        empty_build.mkdir()
120
+        vendor_root = _populate_vendor(tmp_path / "llama.cpp")
121
+        monkeypatch.setenv("DLM_LLAMA_CPP_BUILD", str(empty_build))
122
+        monkeypatch.setenv("DLM_LLAMA_CPP_ROOT", str(vendor_root))
123
+        path = llama_quantize_bin()
124
+        assert path.is_file()
125
+        assert str(vendor_root) in str(path)
126
+
86127
     def test_legacy_quantize_name_found(self, tmp_path: Path) -> None:
87128
         """Pre-rename builds shipped `quantize` rather than `llama-quantize`."""
88129
         root = _populate_vendor(tmp_path / "llama.cpp", with_binary=False)