documentlanguagemodel Public

Watch 0 Fork 0 Star 0

YAML · 11051 bytes Raw Blame History

  
        1
        name: CI
      
        2
        
        3
        on:
      
        4
          push:
      
        5
            branches: [trunk]
      
        6
          pull_request:
      
        7
            branches: [trunk]
      
        8
        
        9
        concurrency:
      
        10
          group: ${{ github.workflow }}-${{ github.ref }}
      
        11
          cancel-in-progress: true
      
        12
        
        13
        env:
      
        14
          UV_VERSION: "0.11.6"
      
        15
          PYTHON_VERSION: "3.11"
      
        16
          # Pinned to BASE_MODELS["smollm2-135m"].revision (Sprint 06 registry).
      
        17
          TINY_MODEL_REVISION: "12fd25f77366fa6b3b4b768ec3050bf629380bac"
      
        18
        
        19
        jobs:
      
        20
          lint-type-test:
      
        21
            name: lint / typecheck / test (${{ matrix.os }})
      
        22
            runs-on: ${{ matrix.os }}
      
        23
            strategy:
      
        24
              fail-fast: false
      
        25
              matrix:
      
        26
                os: [ubuntu-latest, macos-latest]
      
        27
            steps:
      
        28
              - uses: actions/checkout@v4
      
        29
        
        30
              - name: Install uv
      
        31
                uses: astral-sh/setup-uv@v4
      
        32
                with:
      
        33
                  version: ${{ env.UV_VERSION }}
      
        34
        
        35
              - name: Sync dependencies
      
        36
                run: uv sync --all-extras --dev
      
        37
        
        38
              - name: Install minisign (for share/signing coverage)
      
        39
                # The signing code path probes `shutil.which("minisign")` and
      
        40
                # refuses with a typed error when absent. CI installs it so the
      
        41
                # "available → sign/verify" branch runs alongside the "absent"
      
        42
                # refusal branch that's exercised on developer machines without
      
        43
                # it. Best-effort: if the install fails (e.g. Homebrew rate
      
        44
                # limit), tests still pass via the refusal path.
      
        45
                run: |
      
        46
                  if [ "${{ matrix.os }}" = "ubuntu-latest" ]; then
      
        47
                    sudo apt-get update -qq
      
        48
                    sudo apt-get install -y minisign || true
      
        49
                  elif [ "${{ matrix.os }}" = "macos-latest" ]; then
      
        50
                    brew install minisign || true
      
        51
                  fi
      
        52
                  command -v minisign && minisign -v || echo "minisign not available; tests use the refusal path"
      
        53
        
        54
              - name: Ruff lint
      
        55
                run: uv run ruff check .
      
        56
        
        57
              - name: Ruff format check
      
        58
                run: uv run ruff format --check .
      
        59
        
        60
              - name: Mypy
      
        61
                run: uv run mypy src/dlm
      
        62
        
        63
              - name: Pytest (unit + integration, non-slow)
      
        64
                run: uv run pytest
      
        65
        
        66
              - name: Coverage gate — src/dlm/doc = 100% (audit 02 M4)
      
        67
                if: matrix.os == 'ubuntu-latest'
      
        68
                run: |
      
        69
                  uv run pytest tests/unit/doc \
      
        70
                    --cov=src/dlm/doc \
      
        71
                    --cov-report=term-missing \
      
        72
                    --cov-fail-under=100
      
        73
        
        74
              - name: Coverage gate — src/dlm/store = 100% (Sprint 04)
      
        75
                if: matrix.os == 'ubuntu-latest'
      
        76
                run: |
      
        77
                  uv run pytest tests/unit/store \
      
        78
                    --cov=src/dlm/store \
      
        79
                    --cov-report=term-missing \
      
        80
                    --cov-fail-under=100
      
        81
        
        82
              - name: Coverage gate — src/dlm/hardware = 100% (Sprint 05)
      
        83
                if: matrix.os == 'ubuntu-latest'
      
        84
                run: |
      
        85
                  uv run pytest tests/unit/hardware \
      
        86
                    --cov=src/dlm/hardware \
      
        87
                    --cov-report=term-missing \
      
        88
                    --cov-fail-under=100
      
        89
        
        90
              - name: Coverage gate — src/dlm/base_models = 100% (Sprint 06)
      
        91
                if: matrix.os == 'ubuntu-latest'
      
        92
                run: |
      
        93
                  uv run pytest tests/unit/base_models \
      
        94
                    --cov=src/dlm/base_models \
      
        95
                    --cov-report=term-missing \
      
        96
                    --cov-fail-under=100
      
        97
        
        98
              - name: Coverage gate — src/dlm/data = 100% (Sprint 07)
      
        99
                if: matrix.os == 'ubuntu-latest'
      
        100
                run: |
      
        101
                  uv run pytest tests/unit/data \
      
        102
                    --cov=src/dlm/data \
      
        103
                    --cov-report=term-missing \
      
        104
                    --cov-fail-under=100
      
        105
        
        106
              - name: Coverage gate — src/dlm/replay = 100% (Sprint 08)
      
        107
                if: matrix.os == 'ubuntu-latest'
      
        108
                run: |
      
        109
                  uv run pytest tests/unit/replay \
      
        110
                    --cov=src/dlm/replay \
      
        111
                    --cov-report=term-missing \
      
        112
                    --cov-fail-under=100
      
        113
        
        114
              - name: Coverage gate — src/dlm/train = 100% (Sprint 09)
      
        115
                if: matrix.os == 'ubuntu-latest'
      
        116
                run: |
      
        117
                  uv run pytest tests/unit/train \
      
        118
                    --cov=src/dlm/train \
      
        119
                    --cov-report=term-missing \
      
        120
                    --cov-fail-under=100
      
        121
        
        122
              - name: Coverage gate — src/dlm/train/preference = 100%
      
        123
                if: matrix.os == 'ubuntu-latest'
      
        124
                run: |
      
        125
                  uv run pytest tests/unit/train/preference \
      
        126
                    --cov=src/dlm/train/preference \
      
        127
                    --cov-report=term-missing \
      
        128
                    --cov-fail-under=100
      
        129
        
        130
              - name: Coverage gate — src/dlm/eval = 100% (Sprint 10)
      
        131
                if: matrix.os == 'ubuntu-latest'
      
        132
                run: |
      
        133
                  uv run pytest tests/unit/eval \
      
        134
                    --cov=src/dlm/eval \
      
        135
                    --cov-report=term-missing \
      
        136
                    --cov-fail-under=100
      
        137
        
        138
              - name: Coverage gate — src/dlm/inference = 100% (Sprint 10)
      
        139
                if: matrix.os == 'ubuntu-latest'
      
        140
                run: |
      
        141
                  uv run pytest tests/unit/inference \
      
        142
                    --cov=src/dlm/inference \
      
        143
                    --cov-report=term-missing \
      
        144
                    --cov-fail-under=100
      
        145
        
        146
              - name: Coverage gate — src/dlm/export = 100% (Sprint 11)
      
        147
                if: matrix.os == 'ubuntu-latest'
      
        148
                run: |
      
        149
                  uv run pytest tests/unit/export \
      
        150
                    --cov=src/dlm/export \
      
        151
                    --cov-report=term-missing \
      
        152
                    --cov-fail-under=100
      
        153
        
        154
              - name: Coverage gate — src/dlm/export/ollama = 100% (Sprint 12)
      
        155
                if: matrix.os == 'ubuntu-latest'
      
        156
                run: |
      
        157
                  uv run pytest tests/unit/export/ollama \
      
        158
                    --cov=src/dlm/export/ollama \
      
        159
                    --cov-report=term-missing \
      
        160
                    --cov-fail-under=100
      
        161
        
        162
              - name: Coverage gate — src/dlm/cli/reporter = 100% (Sprint 13)
      
        163
                if: matrix.os == 'ubuntu-latest'
      
        164
                run: |
      
        165
                  uv run pytest tests/unit/cli \
      
        166
                    --cov=dlm.cli.reporter \
      
        167
                    --cov-report=term-missing \
      
        168
                    --cov-fail-under=100
      
        169
        
        170
              - name: Coverage gate — src/dlm/io/ulid = 100% (Sprint 13)
      
        171
                if: matrix.os == 'ubuntu-latest'
      
        172
                run: |
      
        173
                  uv run pytest tests/unit/test_io_ulid.py \
      
        174
                    --cov=dlm.io.ulid \
      
        175
                    --cov-report=term-missing \
      
        176
                    --cov-fail-under=100
      
        177
        
        178
              - name: Coverage gate — src/dlm/pack = 100% (Sprint 14)
      
        179
                if: matrix.os == 'ubuntu-latest'
      
        180
                run: |
      
        181
                  uv run pytest tests/unit/pack tests/integration/pack \
      
        182
                    --cov=src/dlm/pack \
      
        183
                    --cov-report=term-missing \
      
        184
                    --cov-fail-under=100
      
        185
        
        186
              - name: Coverage gate — src/dlm/lock = 100% (Sprint 15)
      
        187
                if: matrix.os == 'ubuntu-latest'
      
        188
                run: |
      
        189
                  uv run pytest tests/unit/lock \
      
        190
                    --cov=src/dlm/lock \
      
        191
                    --cov-report=term-missing \
      
        192
                    --cov-fail-under=100
      
        193
        
        194
          no-network-sandbox:
      
        195
            # audit F13: dlm init / doctor / show must work with zero outbound network.
      
        196
            name: no-network sandbox (ubuntu-latest)
      
        197
            runs-on: ubuntu-latest
      
        198
            steps:
      
        199
              - uses: actions/checkout@v4
      
        200
        
        201
              - name: Install uv
      
        202
                uses: astral-sh/setup-uv@v4
      
        203
                with:
      
        204
                  version: ${{ env.UV_VERSION }}
      
        205
        
        206
              - name: Sync dependencies (before blocking network)
      
        207
                run: uv sync --all-extras --dev
      
        208
        
        209
              - name: Block egress then exercise local-only CLI commands
      
        210
                env:
      
        211
                  # Belt-and-braces: force HF / transformers offline posture.
      
        212
                  HF_HUB_OFFLINE: "1"
      
        213
                  TRANSFORMERS_OFFLINE: "1"
      
        214
                  HF_DATASETS_OFFLINE: "1"
      
        215
                run: |
      
        216
                  set -euxo pipefail
      
        217
                  # ALWAYS flush OUTPUT on exit — otherwise the post-step hooks
      
        218
                  # (cache upload, artifact collection) lose the runner's
      
        219
                  # heartbeat to GitHub Actions and the job fails with
      
        220
                  # "hosted runner lost communication with the server".
      
        221
                  trap 'sudo iptables -F OUTPUT || true' EXIT
      
        222
                  # Drop all non-loopback egress. Commands that try to reach out
      
        223
                  # will fail — CI fails if any currently-"local-only" command
      
        224
                  # attempts network.
      
        225
                  sudo iptables -A OUTPUT -o lo -j ACCEPT
      
        226
                  sudo iptables -A OUTPUT -d 127.0.0.0/8 -j ACCEPT
      
        227
                  sudo iptables -A OUTPUT -j REJECT
      
        228
                  # Sanity check: confirm egress is blocked.
      
        229
                  (! curl --max-time 3 -sS https://example.com -o /dev/null) || (echo "egress not blocked" && exit 1)
      
        230
                  # Exercise CLI surfaces that must be local-only at this sprint.
      
        231
                  uv run dlm --version
      
        232
                  uv run dlm --help
      
        233
                  # Sprint 05 landed: `dlm doctor` probes torch + psutil only
      
        234
                  # and emits JSON with no outbound traffic. If it ever reaches
      
        235
                  # for the network under the iptables-blocked sandbox, this job
      
        236
                  # fails loudly (audit-03 M4).
      
        237
                  uv run dlm doctor --json >/dev/null
      
        238
                  uv run dlm doctor >/dev/null
      
        239
                  # `dlm show` lands in Sprint 13 (CLI finalization); add here
      
        240
                  # when it's wired.
      
        241
        
        242
          slow-tests:
      
        243
            # Sprint 02: marker-gated tests that touch HF. Cache-keyed on
      
        244
            # (pyproject.toml hash, tiny-model revision) per audit guidance.
      
        245
            # Sprint 11: also initializes + builds `vendor/llama.cpp` so export
      
        246
            # integration tests can exercise real GGUF conversion.
      
        247
            name: slow tests (hf-cache + llama.cpp)
      
        248
            runs-on: ubuntu-latest
      
        249
            steps:
      
        250
              - name: Checkout with llama.cpp submodule
      
        251
                uses: actions/checkout@v4
      
        252
                with:
      
        253
                  submodules: recursive
      
        254
        
        255
              - name: Install uv
      
        256
                uses: astral-sh/setup-uv@v4
      
        257
                with:
      
        258
                  version: ${{ env.UV_VERSION }}
      
        259
        
        260
              - name: Sync dependencies
      
        261
                run: uv sync --all-extras --dev
      
        262
        
        263
              - name: Restore HF cache
      
        264
                id: hf-cache
      
        265
                uses: actions/cache@v4
      
        266
                with:
      
        267
                  path: ${{ github.workspace }}/.hf-cache
      
        268
                  key: hf-tiny-${{ env.TINY_MODEL_REVISION }}-${{ hashFiles('pyproject.toml') }}
      
        269
                  restore-keys: |
      
        270
                    hf-tiny-${{ env.TINY_MODEL_REVISION }}-
      
        271
        
        272
              - name: Pre-warm tiny model
      
        273
                env:
      
        274
                  HF_HOME: ${{ github.workspace }}/.hf-cache
      
        275
                  DLM_TINY_MODEL_REVISION: ${{ env.TINY_MODEL_REVISION }}
      
        276
                run: |
      
        277
                  set -euxo pipefail
      
        278
                  echo "Cache hit: ${{ steps.hf-cache.outputs.cache-hit }}"
      
        279
                  uv run python - <<'PY'
      
        280
                  from tests.fixtures.tiny_model import tiny_model_path
      
        281
                  print("tiny model at:", tiny_model_path())
      
        282
                  PY
      
        283
        
        284
              - name: Restore llama.cpp build cache
      
        285
                id: llama-cpp-cache
      
        286
                uses: actions/cache@v4
      
        287
                with:
      
        288
                  path: vendor/llama.cpp/build
      
        289
                  # Cache key: submodule HEAD sha + build profile. CI uses a
      
        290
                  # portable CPU build so cached binaries stay runnable across
      
        291
                  # heterogeneous ubuntu runner hosts.
      
        292
                  key: llama-cpp-build-portable-v1-${{ runner.os }}-${{ hashFiles('.gitmodules', 'vendor/llama.cpp/VERSION') }}
      
        293
        
        294
              - name: Build llama.cpp tools (if not cached)
      
        295
                if: steps.llama-cpp-cache.outputs.cache-hit != 'true'
      
        296
                run: |
      
        297
                  set -euxo pipefail
      
        298
                  # ubuntu-latest ships cmake; `sudo apt-get install -y cmake` is a no-op fallback.
      
        299
                  command -v cmake >/dev/null 2>&1 || sudo apt-get install -y cmake
      
        300
                  scripts/bump-llama-cpp.sh build --portable --with-server
      
        301
        
        302
              - name: Run slow tests
      
        303
                env:
      
        304
                  HF_HOME: ${{ github.workspace }}/.hf-cache
      
        305
                  DLM_TINY_MODEL_REVISION: ${{ env.TINY_MODEL_REVISION }}
      
        306
                  DLM_ENABLE_SLOW_INTEGRATION: "1"
      
        307
                run: uv run pytest -m "slow" -v
      
        308

1	name: CI
2
3	on:
4	push:
5	branches: [trunk]
6	pull_request:
7	branches: [trunk]
8
9	concurrency:
10	group: ${{ github.workflow }}-${{ github.ref }}
11	cancel-in-progress: true
12
13	env:
14	UV_VERSION: "0.11.6"
15	PYTHON_VERSION: "3.11"
16	# Pinned to BASE_MODELS["smollm2-135m"].revision (Sprint 06 registry).
17	TINY_MODEL_REVISION: "12fd25f77366fa6b3b4b768ec3050bf629380bac"
18
19	jobs:
20	lint-type-test:
21	name: lint / typecheck / test (${{ matrix.os }})
22	runs-on: ${{ matrix.os }}
23	strategy:
24	fail-fast: false
25	matrix:
26	os: [ubuntu-latest, macos-latest]
27	steps:
28	- uses: actions/checkout@v4
29
30	- name: Install uv
31	uses: astral-sh/setup-uv@v4
32	with:
33	version: ${{ env.UV_VERSION }}
34
35	- name: Sync dependencies
36	run: uv sync --all-extras --dev
37
38	- name: Install minisign (for share/signing coverage)
39	# The signing code path probes `shutil.which("minisign")` and
40	# refuses with a typed error when absent. CI installs it so the
41	# "available → sign/verify" branch runs alongside the "absent"
42	# refusal branch that's exercised on developer machines without
43	# it. Best-effort: if the install fails (e.g. Homebrew rate
44	# limit), tests still pass via the refusal path.
45	run: \|
46	if [ "${{ matrix.os }}" = "ubuntu-latest" ]; then
47	sudo apt-get update -qq
48	sudo apt-get install -y minisign \|\| true
49	elif [ "${{ matrix.os }}" = "macos-latest" ]; then
50	brew install minisign \|\| true
51	fi
52	command -v minisign && minisign -v \|\| echo "minisign not available; tests use the refusal path"
53
54	- name: Ruff lint
55	run: uv run ruff check .
56
57	- name: Ruff format check
58	run: uv run ruff format --check .
59
60	- name: Mypy
61	run: uv run mypy src/dlm
62
63	- name: Pytest (unit + integration, non-slow)
64	run: uv run pytest
65
66	- name: Coverage gate — src/dlm/doc = 100% (audit 02 M4)
67	if: matrix.os == 'ubuntu-latest'
68	run: \|
69	uv run pytest tests/unit/doc \
70	--cov=src/dlm/doc \
71	--cov-report=term-missing \
72	--cov-fail-under=100
73
74	- name: Coverage gate — src/dlm/store = 100% (Sprint 04)
75	if: matrix.os == 'ubuntu-latest'
76	run: \|
77	uv run pytest tests/unit/store \
78	--cov=src/dlm/store \
79	--cov-report=term-missing \
80	--cov-fail-under=100
81
82	- name: Coverage gate — src/dlm/hardware = 100% (Sprint 05)
83	if: matrix.os == 'ubuntu-latest'
84	run: \|
85	uv run pytest tests/unit/hardware \
86	--cov=src/dlm/hardware \
87	--cov-report=term-missing \
88	--cov-fail-under=100
89
90	- name: Coverage gate — src/dlm/base_models = 100% (Sprint 06)
91	if: matrix.os == 'ubuntu-latest'
92	run: \|
93	uv run pytest tests/unit/base_models \
94	--cov=src/dlm/base_models \
95	--cov-report=term-missing \
96	--cov-fail-under=100
97
98	- name: Coverage gate — src/dlm/data = 100% (Sprint 07)
99	if: matrix.os == 'ubuntu-latest'
100	run: \|
101	uv run pytest tests/unit/data \
102	--cov=src/dlm/data \
103	--cov-report=term-missing \
104	--cov-fail-under=100
105
106	- name: Coverage gate — src/dlm/replay = 100% (Sprint 08)
107	if: matrix.os == 'ubuntu-latest'
108	run: \|
109	uv run pytest tests/unit/replay \
110	--cov=src/dlm/replay \
111	--cov-report=term-missing \
112	--cov-fail-under=100
113
114	- name: Coverage gate — src/dlm/train = 100% (Sprint 09)
115	if: matrix.os == 'ubuntu-latest'
116	run: \|
117	uv run pytest tests/unit/train \
118	--cov=src/dlm/train \
119	--cov-report=term-missing \
120	--cov-fail-under=100
121
122	- name: Coverage gate — src/dlm/train/preference = 100%
123	if: matrix.os == 'ubuntu-latest'
124	run: \|
125	uv run pytest tests/unit/train/preference \
126	--cov=src/dlm/train/preference \
127	--cov-report=term-missing \
128	--cov-fail-under=100
129
130	- name: Coverage gate — src/dlm/eval = 100% (Sprint 10)
131	if: matrix.os == 'ubuntu-latest'
132	run: \|
133	uv run pytest tests/unit/eval \
134	--cov=src/dlm/eval \
135	--cov-report=term-missing \
136	--cov-fail-under=100
137
138	- name: Coverage gate — src/dlm/inference = 100% (Sprint 10)
139	if: matrix.os == 'ubuntu-latest'
140	run: \|
141	uv run pytest tests/unit/inference \
142	--cov=src/dlm/inference \
143	--cov-report=term-missing \
144	--cov-fail-under=100
145
146	- name: Coverage gate — src/dlm/export = 100% (Sprint 11)
147	if: matrix.os == 'ubuntu-latest'
148	run: \|
149	uv run pytest tests/unit/export \
150	--cov=src/dlm/export \
151	--cov-report=term-missing \
152	--cov-fail-under=100
153
154	- name: Coverage gate — src/dlm/export/ollama = 100% (Sprint 12)
155	if: matrix.os == 'ubuntu-latest'
156	run: \|
157	uv run pytest tests/unit/export/ollama \
158	--cov=src/dlm/export/ollama \
159	--cov-report=term-missing \
160	--cov-fail-under=100
161
162	- name: Coverage gate — src/dlm/cli/reporter = 100% (Sprint 13)
163	if: matrix.os == 'ubuntu-latest'
164	run: \|
165	uv run pytest tests/unit/cli \
166	--cov=dlm.cli.reporter \
167	--cov-report=term-missing \
168	--cov-fail-under=100
169
170	- name: Coverage gate — src/dlm/io/ulid = 100% (Sprint 13)
171	if: matrix.os == 'ubuntu-latest'
172	run: \|
173	uv run pytest tests/unit/test_io_ulid.py \
174	--cov=dlm.io.ulid \
175	--cov-report=term-missing \
176	--cov-fail-under=100
177
178	- name: Coverage gate — src/dlm/pack = 100% (Sprint 14)
179	if: matrix.os == 'ubuntu-latest'
180	run: \|
181	uv run pytest tests/unit/pack tests/integration/pack \
182	--cov=src/dlm/pack \
183	--cov-report=term-missing \
184	--cov-fail-under=100
185
186	- name: Coverage gate — src/dlm/lock = 100% (Sprint 15)
187	if: matrix.os == 'ubuntu-latest'
188	run: \|
189	uv run pytest tests/unit/lock \
190	--cov=src/dlm/lock \
191	--cov-report=term-missing \
192	--cov-fail-under=100
193
194	no-network-sandbox:
195	# audit F13: dlm init / doctor / show must work with zero outbound network.
196	name: no-network sandbox (ubuntu-latest)
197	runs-on: ubuntu-latest
198	steps:
199	- uses: actions/checkout@v4
200
201	- name: Install uv
202	uses: astral-sh/setup-uv@v4
203	with:
204	version: ${{ env.UV_VERSION }}
205
206	- name: Sync dependencies (before blocking network)
207	run: uv sync --all-extras --dev
208
209	- name: Block egress then exercise local-only CLI commands
210	env:
211	# Belt-and-braces: force HF / transformers offline posture.
212	HF_HUB_OFFLINE: "1"
213	TRANSFORMERS_OFFLINE: "1"
214	HF_DATASETS_OFFLINE: "1"
215	run: \|
216	set -euxo pipefail
217	# ALWAYS flush OUTPUT on exit — otherwise the post-step hooks
218	# (cache upload, artifact collection) lose the runner's
219	# heartbeat to GitHub Actions and the job fails with
220	# "hosted runner lost communication with the server".
221	trap 'sudo iptables -F OUTPUT \|\| true' EXIT
222	# Drop all non-loopback egress. Commands that try to reach out
223	# will fail — CI fails if any currently-"local-only" command
224	# attempts network.
225	sudo iptables -A OUTPUT -o lo -j ACCEPT
226	sudo iptables -A OUTPUT -d 127.0.0.0/8 -j ACCEPT
227	sudo iptables -A OUTPUT -j REJECT
228	# Sanity check: confirm egress is blocked.
229	(! curl --max-time 3 -sS https://example.com -o /dev/null) \|\| (echo "egress not blocked" && exit 1)
230	# Exercise CLI surfaces that must be local-only at this sprint.
231	uv run dlm --version
232	uv run dlm --help
233	# Sprint 05 landed: `dlm doctor` probes torch + psutil only
234	# and emits JSON with no outbound traffic. If it ever reaches
235	# for the network under the iptables-blocked sandbox, this job
236	# fails loudly (audit-03 M4).
237	uv run dlm doctor --json >/dev/null
238	uv run dlm doctor >/dev/null
239	# `dlm show` lands in Sprint 13 (CLI finalization); add here
240	# when it's wired.
241
242	slow-tests:
243	# Sprint 02: marker-gated tests that touch HF. Cache-keyed on
244	# (pyproject.toml hash, tiny-model revision) per audit guidance.
245	# Sprint 11: also initializes + builds `vendor/llama.cpp` so export
246	# integration tests can exercise real GGUF conversion.
247	name: slow tests (hf-cache + llama.cpp)
248	runs-on: ubuntu-latest
249	steps:
250	- name: Checkout with llama.cpp submodule
251	uses: actions/checkout@v4
252	with:
253	submodules: recursive
254
255	- name: Install uv
256	uses: astral-sh/setup-uv@v4
257	with:
258	version: ${{ env.UV_VERSION }}
259
260	- name: Sync dependencies
261	run: uv sync --all-extras --dev
262
263	- name: Restore HF cache
264	id: hf-cache
265	uses: actions/cache@v4
266	with:
267	path: ${{ github.workspace }}/.hf-cache
268	key: hf-tiny-${{ env.TINY_MODEL_REVISION }}-${{ hashFiles('pyproject.toml') }}
269	restore-keys: \|
270	hf-tiny-${{ env.TINY_MODEL_REVISION }}-
271
272	- name: Pre-warm tiny model
273	env:
274	HF_HOME: ${{ github.workspace }}/.hf-cache
275	DLM_TINY_MODEL_REVISION: ${{ env.TINY_MODEL_REVISION }}
276	run: \|
277	set -euxo pipefail
278	echo "Cache hit: ${{ steps.hf-cache.outputs.cache-hit }}"
279	uv run python - <<'PY'
280	from tests.fixtures.tiny_model import tiny_model_path
281	print("tiny model at:", tiny_model_path())
282	PY
283
284	- name: Restore llama.cpp build cache
285	id: llama-cpp-cache
286	uses: actions/cache@v4
287	with:
288	path: vendor/llama.cpp/build
289	# Cache key: submodule HEAD sha + build profile. CI uses a
290	# portable CPU build so cached binaries stay runnable across
291	# heterogeneous ubuntu runner hosts.
292	key: llama-cpp-build-portable-v1-${{ runner.os }}-${{ hashFiles('.gitmodules', 'vendor/llama.cpp/VERSION') }}
293
294	- name: Build llama.cpp tools (if not cached)
295	if: steps.llama-cpp-cache.outputs.cache-hit != 'true'
296	run: \|
297	set -euxo pipefail
298	# ubuntu-latest ships cmake; `sudo apt-get install -y cmake` is a no-op fallback.
299	command -v cmake >/dev/null 2>&1 \|\| sudo apt-get install -y cmake
300	scripts/bump-llama-cpp.sh build --portable --with-server
301
302	- name: Run slow tests
303	env:
304	HF_HOME: ${{ github.workspace }}/.hf-cache
305	DLM_TINY_MODEL_REVISION: ${{ env.TINY_MODEL_REVISION }}
306	DLM_ENABLE_SLOW_INTEGRATION: "1"
307	run: uv run pytest -m "slow" -v
308