tenseleyflow/documentlanguagemodel / 82b797f

Browse files

Add llama-server HTTP smoke

Authored by espadonne
SHA
82b797f4401c59ab54a21527c6cbc6229852205b
Parents
135311c
Tree
580c358

9 changed files

StatusFile+-
M README.md 1 2
M docs/cli/reference.md 1 1
M src/dlm/cli/commands.py 9 8
M src/dlm/export/errors.py 4 0
A src/dlm/export/smoke.py 228 0
M src/dlm/export/targets/llama_server.py 51 23
M tests/unit/cli/test_export_target_flag.py 7 2
A tests/unit/export/targets/test_llama_server_smoke.py 90 0
A tests/unit/export/test_smoke.py 128 0
README.mdmodified
@@ -20,8 +20,7 @@ Ollama and `llama-server`.
2020
 **Status:** pre-v1.0, but far beyond the original MVP framing. The core
2121
 author/train/prompt/export/pack/share loop is real, and newer runtime-target
2222
 work is landing incrementally. Current export targets are `ollama` and
23
-`llama-server` (`llama-server` currently requires `--no-smoke` while the HTTP
24
-smoke harness lands).
23
+`llama-server`.
2524
 
2625
 ## What A `.dlm` Actually Is
2726
 
docs/cli/reference.mdmodified
@@ -203,7 +203,7 @@ dlm export <path> [--target NAME] [--quant Q] [--merged [--dequantize]]
203203
 
204204
 | Option | Default | Notes |
205205
 |---|---|---|
206
-| `--target NAME` | `ollama` | Export destination. Sprint 41 currently supports `ollama` and `llama-server`. The `llama-server` path writes launch artifacts against the existing GGUF export and currently requires `--no-smoke` while the HTTP smoke harness lands. |
206
+| `--target NAME` | `ollama` | Export destination. Sprint 41 currently supports `ollama` and `llama-server`. The `llama-server` path writes launch artifacts against the existing GGUF export and uses the shared OpenAI-compatible HTTP smoke harness when `--no-smoke` is not passed. |
207207
 | `--quant Q` | frontmatter.export.default_quant | `Q4_K_M` / `Q5_K_M` / `Q6_K` / `Q8_0` / `F16`. |
208208
 | `--merged` | false | Merge LoRA into base before quantizing. |
209209
 | `--dequantize` | false | Required with `--merged` on a QLoRA adapter (pitfall #3). |
src/dlm/cli/commands.pymodified
@@ -1696,14 +1696,6 @@ def export_cmd(
16961696
     except UnknownExportTargetError as exc:
16971697
         console.print(f"[red]export:[/red] {exc}")
16981698
         raise typer.Exit(code=2) from exc
1699
-    if resolved_target.name == "llama-server" and not no_smoke:
1700
-        console.print(
1701
-            "[red]export:[/red] --target llama-server currently requires "
1702
-            "`--no-smoke`; the HTTP smoke harness lands in a follow-up "
1703
-            "Sprint 41 slice."
1704
-        )
1705
-        raise typer.Exit(code=2)
1706
-
17071699
     parsed = parse_file(path)
17081700
     adapters_declared = parsed.frontmatter.training.adapters
17091701
     if adapter is not None:
@@ -1970,6 +1962,13 @@ def export_cmd(
19701962
         except ExportError as exc:
19711963
             console.print(f"[red]export:[/red] {exc}")
19721964
             raise typer.Exit(code=1) from exc
1965
+        llama_server_smoke = None if no_smoke else resolved_target.smoke_test(llama_server_result)
1966
+        if llama_server_smoke is not None and not llama_server_smoke.ok:
1967
+            console.print(
1968
+                f"[red]smoke:[/red] {llama_server_smoke.detail}\n"
1969
+                "  re-run with `--no-smoke` to skip the smoke test."
1970
+            )
1971
+            raise typer.Exit(code=1)
19731972
 
19741973
     cached_tag = " [dim](cached base)[/dim]" if result.cached else ""
19751974
     console.print(f"[green]exported:[/green] {result.export_dir}{cached_tag}")
@@ -1981,6 +1980,8 @@ def export_cmd(
19811980
         console.print(f"target:  {result.target}")
19821981
         console.print(f"launch:  {llama_server_result.launch_script_path.name}")
19831982
         console.print(f"template: {llama_server_result.config_path.name}")
1983
+        if llama_server_smoke is not None and llama_server_smoke.detail:
1984
+            console.print(f"smoke:   {llama_server_smoke.detail}")
19841985
         return
19851986
     if result.ollama_name:
19861987
         console.print(f"ollama:  {result.ollama_name} (v{result.ollama_version})")
src/dlm/export/errors.pymodified
@@ -81,6 +81,10 @@ class UnknownExportTargetError(ExportError):
8181
         self.available = available
8282
 
8383
 
84
+class TargetSmokeError(ExportError):
85
+    """A runtime-target smoke check failed to start or answer correctly."""
86
+
87
+
8488
 class ProcessorLoadError(ExportError):
8589
     """HF-snapshot export couldn't load the processor for a VL/audio base.
8690
 
src/dlm/export/smoke.pyadded
@@ -0,0 +1,228 @@
1
+"""Shared HTTP smoke helpers for OpenAI-compatible local runtimes."""
2
+
3
+from __future__ import annotations
4
+
5
+import json
6
+import socket
7
+import subprocess  # nosec B404
8
+import tempfile
9
+import time
10
+import urllib.error
11
+import urllib.request
12
+from collections.abc import Sequence
13
+from typing import TextIO
14
+
15
+from dlm.export.errors import TargetSmokeError
16
+
17
+_DEFAULT_HOST = "127.0.0.1"
18
+_DEFAULT_STARTUP_TIMEOUT_SECONDS = 30.0
19
+_DEFAULT_REQUEST_TIMEOUT_SECONDS = 5.0
20
+_DEFAULT_POLL_INTERVAL_SECONDS = 0.1
21
+_DEFAULT_PROMPT = "Hello."
22
+
23
+
24
+def reserve_local_port(host: str = _DEFAULT_HOST) -> int:
25
+    """Ask the OS for a free loopback TCP port."""
26
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
27
+        sock.bind((host, 0))
28
+        return int(sock.getsockname()[1])
29
+
30
+
31
+def smoke_openai_compat_server(
32
+    command: Sequence[str],
33
+    *,
34
+    host: str = _DEFAULT_HOST,
35
+    port: int | None = None,
36
+    startup_timeout: float = _DEFAULT_STARTUP_TIMEOUT_SECONDS,
37
+    request_timeout: float = _DEFAULT_REQUEST_TIMEOUT_SECONDS,
38
+    poll_interval: float = _DEFAULT_POLL_INTERVAL_SECONDS,
39
+    prompt: str = _DEFAULT_PROMPT,
40
+) -> str:
41
+    """Start a local OpenAI-compatible server, wait for readiness, then chat."""
42
+
43
+    real_port = port if port is not None else reserve_local_port(host)
44
+    argv = _replace_or_append_flag(list(command), "--host", host)
45
+    argv = _replace_or_append_flag(argv, "--port", str(real_port))
46
+
47
+    with tempfile.TemporaryFile(mode="w+t", encoding="utf-8") as log:
48
+        proc = subprocess.Popen(  # nosec B603
49
+            argv,
50
+            stdout=log,
51
+            stderr=subprocess.STDOUT,
52
+            text=True,
53
+        )
54
+        try:
55
+            model_id = _wait_for_models(
56
+                proc,
57
+                log,
58
+                host=host,
59
+                port=real_port,
60
+                startup_timeout=startup_timeout,
61
+                request_timeout=request_timeout,
62
+                poll_interval=poll_interval,
63
+            )
64
+            content = _chat_completion(
65
+                host=host,
66
+                port=real_port,
67
+                model_id=model_id,
68
+                prompt=prompt,
69
+                request_timeout=request_timeout,
70
+            )
71
+            first = _first_non_empty_line(content)
72
+            if not first:
73
+                raise TargetSmokeError("openai-compatible smoke returned empty assistant content")
74
+            return first
75
+        finally:
76
+            _stop_process(proc)
77
+
78
+
79
+def _wait_for_models(
80
+    proc: subprocess.Popen[str],
81
+    log: TextIO,
82
+    *,
83
+    host: str,
84
+    port: int,
85
+    startup_timeout: float,
86
+    request_timeout: float,
87
+    poll_interval: float,
88
+) -> str | None:
89
+    deadline = time.monotonic() + startup_timeout
90
+    last_error: str | None = None
91
+    while time.monotonic() < deadline:
92
+        if proc.poll() is not None:
93
+            raise TargetSmokeError(
94
+                f"server exited before readiness (exit {proc.returncode}){_log_tail(log)}"
95
+            )
96
+        try:
97
+            return _fetch_model_id(host=host, port=port, request_timeout=request_timeout)
98
+        except (
99
+            OSError,
100
+            TimeoutError,
101
+            ValueError,
102
+            urllib.error.HTTPError,
103
+            urllib.error.URLError,
104
+        ) as exc:
105
+            last_error = f"{type(exc).__name__}: {exc}"
106
+            time.sleep(poll_interval)
107
+    suffix = f" last error: {last_error}." if last_error else "."
108
+    raise TargetSmokeError(
109
+        f"server did not become ready on http://{host}:{port}/v1/models within "
110
+        f"{startup_timeout:.1f}s.{suffix}{_log_tail(log)}"
111
+    )
112
+
113
+
114
+def _fetch_model_id(*, host: str, port: int, request_timeout: float) -> str | None:
115
+    req = urllib.request.Request(
116
+        f"http://{host}:{port}/v1/models",
117
+        headers={"Accept": "application/json"},
118
+        method="GET",
119
+    )
120
+    with urllib.request.urlopen(req, timeout=request_timeout) as resp:  # noqa: S310
121
+        payload = json.loads(resp.read())
122
+    data = payload.get("data")
123
+    if not isinstance(data, list) or not data:
124
+        return None
125
+    first = data[0]
126
+    if not isinstance(first, dict):
127
+        return None
128
+    model_id = first.get("id")
129
+    return model_id if isinstance(model_id, str) and model_id.strip() else None
130
+
131
+
132
+def _chat_completion(
133
+    *,
134
+    host: str,
135
+    port: int,
136
+    model_id: str | None,
137
+    prompt: str,
138
+    request_timeout: float,
139
+) -> str:
140
+    payload = {
141
+        "model": model_id or "dlm-smoke",
142
+        "messages": [{"role": "user", "content": prompt}],
143
+    }
144
+    req = urllib.request.Request(
145
+        f"http://{host}:{port}/v1/chat/completions",
146
+        data=json.dumps(payload).encode("utf-8"),
147
+        headers={
148
+            "Accept": "application/json",
149
+            "Content-Type": "application/json",
150
+        },
151
+        method="POST",
152
+    )
153
+    with urllib.request.urlopen(req, timeout=request_timeout) as resp:  # noqa: S310
154
+        body = json.loads(resp.read())
155
+    choices = body.get("choices")
156
+    if not isinstance(choices, list) or not choices:
157
+        raise TargetSmokeError("chat completion response missing choices")
158
+    first_choice = choices[0]
159
+    if not isinstance(first_choice, dict):
160
+        raise TargetSmokeError("chat completion response has non-object choices[0]")
161
+    message = first_choice.get("message")
162
+    if not isinstance(message, dict):
163
+        raise TargetSmokeError("chat completion response missing choices[0].message")
164
+    content = _normalize_message_content(message.get("content"))
165
+    if content is None:
166
+        raise TargetSmokeError(
167
+            "chat completion response missing non-empty choices[0].message.content"
168
+        )
169
+    return content
170
+
171
+
172
+def _normalize_message_content(content: object) -> str | None:
173
+    if isinstance(content, str):
174
+        stripped = content.strip()
175
+        return stripped if stripped else None
176
+    if isinstance(content, list):
177
+        parts: list[str] = []
178
+        for item in content:
179
+            if not isinstance(item, dict):
180
+                continue
181
+            text = item.get("text")
182
+            if isinstance(text, str) and text.strip():
183
+                parts.append(text.strip())
184
+        merged = "\n".join(parts).strip()
185
+        return merged if merged else None
186
+    return None
187
+
188
+
189
+def _replace_or_append_flag(argv: list[str], flag: str, value: str) -> list[str]:
190
+    updated = list(argv)
191
+    try:
192
+        idx = updated.index(flag)
193
+    except ValueError:
194
+        updated.extend([flag, value])
195
+        return updated
196
+    if idx + 1 >= len(updated):
197
+        updated.append(value)
198
+        return updated
199
+    updated[idx + 1] = value
200
+    return updated
201
+
202
+
203
+def _first_non_empty_line(text: str) -> str:
204
+    for line in text.splitlines():
205
+        stripped = line.strip()
206
+        if stripped:
207
+            return stripped
208
+    return ""
209
+
210
+
211
+def _stop_process(proc: subprocess.Popen[str]) -> None:
212
+    if proc.poll() is not None:
213
+        return
214
+    proc.terminate()
215
+    try:
216
+        proc.wait(timeout=5.0)
217
+    except subprocess.TimeoutExpired:
218
+        proc.kill()
219
+        proc.wait(timeout=5.0)
220
+
221
+
222
+def _log_tail(log: TextIO, *, lines: int = 20) -> str:
223
+    log.seek(0)
224
+    text = log.read().strip()
225
+    if not text:
226
+        return ""
227
+    tail = "\n".join(text.splitlines()[-lines:])
228
+    return f"\n--- server log tail ---\n{tail}"
src/dlm/export/targets/llama_server.pymodified
@@ -5,20 +5,17 @@ from __future__ import annotations
55
 import json
66
 import shlex
77
 from pathlib import Path
8
-from typing import TYPE_CHECKING
98
 
9
+from dlm.base_models import BaseModelSpec
1010
 from dlm.export.dispatch import DispatchResult
11
-from dlm.export.errors import ExportError
11
+from dlm.export.errors import ExportError, TargetSmokeError
1212
 from dlm.export.manifest import build_artifact, load_export_manifest, save_export_manifest
1313
 from dlm.export.ollama.modelfile_shared import resolve_num_ctx
14
+from dlm.export.smoke import smoke_openai_compat_server
1415
 from dlm.export.targets.base import ExportTarget, SmokeResult, TargetResult
1516
 from dlm.export.vendoring import llama_server_bin
1617
 from dlm.io.atomic import write_text
1718
 
18
-if TYPE_CHECKING:
19
-    from dlm.base_models import BaseModelSpec
20
-
21
-
2219
 CHAT_TEMPLATE_FILENAME = "chat-template.jinja"
2320
 LAUNCH_SCRIPT_FILENAME = "llama-server_launch.sh"
2421
 
@@ -29,11 +26,13 @@ class LlamaServerTarget:
2926
     name = "llama-server"
3027
 
3128
     def prepare(self, ctx: DispatchResult) -> TargetResult:
32
-        model_path = _require_path_extra(ctx, "model_path")
3329
         adapter_dir = _require_path_extra(ctx, "adapter_dir")
34
-        context_length = _require_int_extra(ctx, "context_length")
35
-        adapter_gguf_path = _optional_path_extra(ctx, "adapter_gguf_path")
30
+        training_sequence_len = _optional_int_extra(ctx, "training_sequence_len")
31
+        spec = _require_spec_extra(ctx, "spec")
3632
         vendor_override = _optional_path_extra(ctx, "vendor_override")
33
+        model_path = _find_artifact(ctx.artifacts, prefix="base.")
34
+        adapter_gguf_path = _find_optional_artifact(ctx.artifacts, exact_name="adapter.gguf")
35
+        context_length = resolve_num_ctx(training_sequence_len, spec.context_length)
3736
 
3837
         template_path = ctx.export_dir / CHAT_TEMPLATE_FILENAME
3938
         write_text(template_path, _read_chat_template(adapter_dir))
@@ -84,12 +83,36 @@ class LlamaServerTarget:
8483
         return command
8584
 
8685
     def smoke_test(self, prepared: TargetResult) -> SmokeResult:
87
-        _ = prepared
88
-        return SmokeResult(
89
-            attempted=False,
90
-            ok=True,
91
-            detail="llama-server HTTP smoke lands in a follow-up Sprint 41 slice",
92
-        )
86
+        try:
87
+            first_line = smoke_openai_compat_server(self._runtime_command(prepared))
88
+        except (OSError, TargetSmokeError, ExportError) as exc:
89
+            return SmokeResult(attempted=True, ok=False, detail=str(exc))
90
+        return SmokeResult(attempted=True, ok=True, detail=first_line)
91
+
92
+    def _runtime_command(self, prepared: TargetResult) -> list[str]:
93
+        model_path = _require_prepared_path(prepared, "model_path")
94
+        adapter_gguf_path = _optional_prepared_path(prepared, "adapter_gguf_path")
95
+        context_length = _require_prepared_int(prepared, "context_length")
96
+        vendor_override = _optional_prepared_path(prepared, "vendor_override")
97
+
98
+        command = [
99
+            str(llama_server_bin(vendor_override)),
100
+            "--model",
101
+            str(model_path),
102
+            "--api-key",
103
+            "disabled",
104
+            "--ctx-size",
105
+            str(context_length),
106
+            "--chat-template-file",
107
+            str(prepared.config_path),
108
+            "--host",
109
+            "127.0.0.1",
110
+            "--port",
111
+            "8000",
112
+        ]
113
+        if adapter_gguf_path is not None:
114
+            command.extend(["--lora", str(adapter_gguf_path)])
115
+        return command
93116
 
94117
 
95118
 def prepare_llama_server_export(
@@ -104,19 +127,15 @@ def prepare_llama_server_export(
104127
 ) -> TargetResult:
105128
     """Build launch artifacts for a text GGUF export."""
106129
 
107
-    model_path = _find_artifact(artifacts, prefix="base.")
108
-    adapter_gguf_path = _find_optional_artifact(artifacts, exact_name="adapter.gguf")
109
-    context_length = resolve_num_ctx(training_sequence_len, spec.context_length)
110130
     ctx = DispatchResult(
111131
         export_dir=export_dir,
112132
         manifest_path=manifest_path,
113133
         artifacts=list(artifacts),
114134
         banner_lines=[],
115135
         extras={
116
-            "model_path": model_path,
117136
             "adapter_dir": adapter_dir,
118
-            "adapter_gguf_path": adapter_gguf_path,
119
-            "context_length": context_length,
137
+            "training_sequence_len": training_sequence_len,
138
+            "spec": spec,
120139
             "vendor_override": vendor_override,
121140
         },
122141
     )
@@ -203,10 +222,19 @@ def _optional_path_extra(ctx: DispatchResult, key: str) -> Path | None:
203222
     return value
204223
 
205224
 
206
-def _require_int_extra(ctx: DispatchResult, key: str) -> int:
225
+def _optional_int_extra(ctx: DispatchResult, key: str) -> int | None:
207226
     value = ctx.extras.get(key)
227
+    if value is None:
228
+        return None
208229
     if not isinstance(value, int):
209
-        raise ExportError(f"llama-server target missing int extra {key!r}")
230
+        raise ExportError(f"llama-server target extra {key!r} must be an int")
231
+    return value
232
+
233
+
234
+def _require_spec_extra(ctx: DispatchResult, key: str) -> BaseModelSpec:
235
+    value = ctx.extras.get(key)
236
+    if not isinstance(value, BaseModelSpec):
237
+        raise ExportError(f"llama-server target missing BaseModelSpec extra {key!r}")
210238
     return value
211239
 
212240
 
tests/unit/cli/test_export_target_flag.pymodified
@@ -72,7 +72,7 @@ class TestExportTargetFlag:
7272
         assert result.exit_code == 2
7373
         assert "mutually exclusive" in _joined(result)
7474
 
75
-    def test_llama_server_requires_no_smoke_for_now(self, tmp_path: Path) -> None:
75
+    def test_llama_server_target_reaches_existing_mutex_validation(self, tmp_path: Path) -> None:
7676
         runner = CliRunner()
7777
         result = runner.invoke(
7878
             app,
@@ -83,7 +83,12 @@ class TestExportTargetFlag:
8383
                 str(tmp_path / "ghost.dlm"),
8484
                 "--target",
8585
                 "llama-server",
86
+                "--draft",
87
+                "qwen2.5:0.5b",
88
+                "--no-draft",
8689
             ],
8790
         )
8891
         assert result.exit_code == 2
89
-        assert "--no-smoke" in _joined(result)
92
+        text = _joined(result)
93
+        assert "mutually exclusive" in text
94
+        assert "--no-smoke" not in text
tests/unit/export/targets/test_llama_server_smoke.pyadded
@@ -0,0 +1,90 @@
1
+"""llama-server smoke wiring."""
2
+
3
+from __future__ import annotations
4
+
5
+from pathlib import Path
6
+
7
+from dlm.export.errors import TargetSmokeError
8
+from dlm.export.targets.base import TargetResult
9
+from dlm.export.targets.llama_server import LLAMA_SERVER_TARGET
10
+
11
+
12
+def _vendor_tree(tmp_path: Path) -> Path:
13
+    vendor = tmp_path / "vendor" / "llama.cpp"
14
+    (vendor / "build" / "bin").mkdir(parents=True)
15
+    server = vendor / "build" / "bin" / "llama-server"
16
+    server.write_text("#!/bin/sh\n", encoding="utf-8")
17
+    server.chmod(0o755)
18
+    return vendor
19
+
20
+
21
+def _prepared_target(tmp_path: Path) -> TargetResult:
22
+    export_dir = tmp_path / "exports" / "Q4_K_M"
23
+    export_dir.mkdir(parents=True)
24
+    manifest_path = export_dir / "export_manifest.json"
25
+    manifest_path.write_text("{}\n", encoding="utf-8")
26
+    model = export_dir / "base.Q4_K_M.gguf"
27
+    model.write_bytes(b"base")
28
+    adapter = export_dir / "adapter.gguf"
29
+    adapter.write_bytes(b"adapter")
30
+    template = export_dir / "chat-template.jinja"
31
+    template.write_text("{{ .Prompt }}\n", encoding="utf-8")
32
+    return TargetResult(
33
+        name="llama-server",
34
+        export_dir=export_dir,
35
+        manifest_path=manifest_path,
36
+        artifacts=(model, adapter, template),
37
+        config_path=template,
38
+        extras={
39
+            "model_path": model,
40
+            "adapter_gguf_path": adapter,
41
+            "context_length": 4096,
42
+            "vendor_override": _vendor_tree(tmp_path),
43
+        },
44
+    )
45
+
46
+
47
+class TestLlamaServerSmoke:
48
+    def test_smoke_uses_absolute_runtime_argv(self, tmp_path: Path, monkeypatch: object) -> None:
49
+        prepared = _prepared_target(tmp_path)
50
+        seen: list[list[str]] = []
51
+
52
+        def _fake_smoke(argv: list[str], **_: object) -> str:
53
+            seen.append(list(argv))
54
+            return "server replied"
55
+
56
+        monkeypatch.setattr(
57
+            "dlm.export.targets.llama_server.smoke_openai_compat_server", _fake_smoke
58
+        )
59
+
60
+        result = LLAMA_SERVER_TARGET.smoke_test(prepared)
61
+
62
+        assert result.attempted is True
63
+        assert result.ok is True
64
+        assert result.detail == "server replied"
65
+        assert len(seen) == 1
66
+        argv = seen[0]
67
+        assert argv[0].endswith("llama-server")
68
+        assert "$SCRIPT_DIR" not in " ".join(argv)
69
+        assert str(prepared.extras["model_path"]) in argv
70
+        assert str(prepared.config_path) in argv
71
+        assert str(prepared.extras["adapter_gguf_path"]) in argv
72
+        assert "--host" in argv
73
+        assert "--port" in argv
74
+
75
+    def test_smoke_failure_returns_failed_result(self, tmp_path: Path, monkeypatch: object) -> None:
76
+        prepared = _prepared_target(tmp_path)
77
+
78
+        def _fake_smoke(argv: list[str], **_: object) -> str:
79
+            _ = argv
80
+            raise TargetSmokeError("boom")
81
+
82
+        monkeypatch.setattr(
83
+            "dlm.export.targets.llama_server.smoke_openai_compat_server", _fake_smoke
84
+        )
85
+
86
+        result = LLAMA_SERVER_TARGET.smoke_test(prepared)
87
+
88
+        assert result.attempted is True
89
+        assert result.ok is False
90
+        assert result.detail == "boom"
tests/unit/export/test_smoke.pyadded
@@ -0,0 +1,128 @@
1
+"""Shared OpenAI-compatible smoke harness."""
2
+
3
+from __future__ import annotations
4
+
5
+import socket
6
+import sys
7
+from pathlib import Path
8
+
9
+import pytest
10
+
11
+from dlm.export.errors import TargetSmokeError
12
+from dlm.export.smoke import smoke_openai_compat_server
13
+
14
+
15
+def _require_loopback_bind() -> None:
16
+    try:
17
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
18
+            sock.bind(("127.0.0.1", 0))
19
+    except PermissionError as exc:
20
+        pytest.skip(f"loopback bind blocked on this host: {exc}")
21
+
22
+
23
+def _write_server_script(tmp_path: Path, *, mode: str) -> Path:
24
+    script = tmp_path / f"fake_server_{mode}.py"
25
+    script.write_text(
26
+        (
27
+            "from __future__ import annotations\n"
28
+            "import argparse\n"
29
+            "import json\n"
30
+            "from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer\n"
31
+            "\n"
32
+            "parser = argparse.ArgumentParser()\n"
33
+            "parser.add_argument('--host', required=True)\n"
34
+            "parser.add_argument('--port', required=True, type=int)\n"
35
+            "parser.add_argument('--mode', required=True)\n"
36
+            "args = parser.parse_args()\n"
37
+            "\n"
38
+            "if args.mode == 'exit':\n"
39
+            "    raise SystemExit(3)\n"
40
+            "\n"
41
+            "class Handler(BaseHTTPRequestHandler):\n"
42
+            "    def do_GET(self) -> None:\n"
43
+            "        if self.path != '/v1/models':\n"
44
+            "            self.send_response(404)\n"
45
+            "            self.end_headers()\n"
46
+            "            return\n"
47
+            "        body = json.dumps({'data': [{'id': 'fake-model'}]}).encode('utf-8')\n"
48
+            "        self.send_response(200)\n"
49
+            "        self.send_header('Content-Type', 'application/json')\n"
50
+            "        self.send_header('Content-Length', str(len(body)))\n"
51
+            "        self.end_headers()\n"
52
+            "        self.wfile.write(body)\n"
53
+            "\n"
54
+            "    def do_POST(self) -> None:\n"
55
+            "        if self.path != '/v1/chat/completions':\n"
56
+            "            self.send_response(404)\n"
57
+            "            self.end_headers()\n"
58
+            "            return\n"
59
+            "        _ = self.rfile.read(int(self.headers.get('Content-Length', '0')))\n"
60
+            "        if args.mode == 'empty':\n"
61
+            "            payload = {'choices': [{'message': {'content': ''}}]}\n"
62
+            "        else:\n"
63
+            "            payload = {'choices': [{'message': {'content': 'hello from fake server'}}]}\n"
64
+            "        body = json.dumps(payload).encode('utf-8')\n"
65
+            "        self.send_response(200)\n"
66
+            "        self.send_header('Content-Type', 'application/json')\n"
67
+            "        self.send_header('Content-Length', str(len(body)))\n"
68
+            "        self.end_headers()\n"
69
+            "        self.wfile.write(body)\n"
70
+            "\n"
71
+            "    def log_message(self, format: str, *args: object) -> None:\n"
72
+            "        return\n"
73
+            "\n"
74
+            "server = ThreadingHTTPServer((args.host, args.port), Handler)\n"
75
+            "server.serve_forever()\n"
76
+        ),
77
+        encoding="utf-8",
78
+    )
79
+    return script
80
+
81
+
82
+class TestSmokeOpenAiCompatServer:
83
+    def test_returns_first_response_line(self, tmp_path: Path) -> None:
84
+        _require_loopback_bind()
85
+        script = _write_server_script(tmp_path, mode="ok")
86
+
87
+        first_line = smoke_openai_compat_server(
88
+            [sys.executable, str(script), "--mode", "ok", "--host", "127.0.0.1", "--port", "8000"]
89
+        )
90
+
91
+        assert first_line == "hello from fake server"
92
+
93
+    def test_empty_content_raises(self, tmp_path: Path) -> None:
94
+        _require_loopback_bind()
95
+        script = _write_server_script(tmp_path, mode="empty")
96
+
97
+        with pytest.raises(TargetSmokeError, match="non-empty"):
98
+            smoke_openai_compat_server(
99
+                [
100
+                    sys.executable,
101
+                    str(script),
102
+                    "--mode",
103
+                    "empty",
104
+                    "--host",
105
+                    "127.0.0.1",
106
+                    "--port",
107
+                    "8000",
108
+                ]
109
+            )
110
+
111
+    def test_early_exit_raises_with_readiness_message(self, tmp_path: Path) -> None:
112
+        _require_loopback_bind()
113
+        script = _write_server_script(tmp_path, mode="exit")
114
+
115
+        with pytest.raises(TargetSmokeError, match="exited before readiness"):
116
+            smoke_openai_compat_server(
117
+                [
118
+                    sys.executable,
119
+                    str(script),
120
+                    "--mode",
121
+                    "exit",
122
+                    "--host",
123
+                    "127.0.0.1",
124
+                    "--port",
125
+                    "8000",
126
+                ],
127
+                startup_timeout=1.0,
128
+            )