| 1 |
name: CI |
| 2 |
|
| 3 |
on: |
| 4 |
push: |
| 5 |
branches: [trunk] |
| 6 |
pull_request: |
| 7 |
branches: [trunk] |
| 8 |
|
| 9 |
concurrency: |
| 10 |
group: ${{ github.workflow }}-${{ github.ref }} |
| 11 |
cancel-in-progress: true |
| 12 |
|
| 13 |
env: |
| 14 |
UV_VERSION: "0.11.6" |
| 15 |
PYTHON_VERSION: "3.11" |
| 16 |
# Pinned to BASE_MODELS["smollm2-135m"].revision (Sprint 06 registry). |
| 17 |
TINY_MODEL_REVISION: "12fd25f77366fa6b3b4b768ec3050bf629380bac" |
| 18 |
|
| 19 |
jobs: |
| 20 |
lint-type-test: |
| 21 |
name: lint / typecheck / test (${{ matrix.os }}) |
| 22 |
runs-on: ${{ matrix.os }} |
| 23 |
strategy: |
| 24 |
fail-fast: false |
| 25 |
matrix: |
| 26 |
os: [ubuntu-latest, macos-latest] |
| 27 |
steps: |
| 28 |
- uses: actions/checkout@v4 |
| 29 |
|
| 30 |
- name: Install uv |
| 31 |
uses: astral-sh/setup-uv@v4 |
| 32 |
with: |
| 33 |
version: ${{ env.UV_VERSION }} |
| 34 |
|
| 35 |
- name: Sync dependencies |
| 36 |
run: uv sync --all-extras --dev |
| 37 |
|
| 38 |
- name: Install minisign (for share/signing coverage) |
| 39 |
# The signing code path probes `shutil.which("minisign")` and |
| 40 |
# refuses with a typed error when absent. CI installs it so the |
| 41 |
# "available → sign/verify" branch runs alongside the "absent" |
| 42 |
# refusal branch that's exercised on developer machines without |
| 43 |
# it. Best-effort: if the install fails (e.g. Homebrew rate |
| 44 |
# limit), tests still pass via the refusal path. |
| 45 |
run: | |
| 46 |
if [ "${{ matrix.os }}" = "ubuntu-latest" ]; then |
| 47 |
sudo apt-get update -qq |
| 48 |
sudo apt-get install -y minisign || true |
| 49 |
elif [ "${{ matrix.os }}" = "macos-latest" ]; then |
| 50 |
brew install minisign || true |
| 51 |
fi |
| 52 |
command -v minisign && minisign -v || echo "minisign not available; tests use the refusal path" |
| 53 |
|
| 54 |
- name: Ruff lint |
| 55 |
run: uv run ruff check . |
| 56 |
|
| 57 |
- name: Ruff format check |
| 58 |
run: uv run ruff format --check . |
| 59 |
|
| 60 |
- name: Mypy |
| 61 |
run: uv run mypy src/dlm |
| 62 |
|
| 63 |
- name: Pytest (unit + integration, non-slow) |
| 64 |
run: uv run pytest |
| 65 |
|
| 66 |
- name: Coverage gate — src/dlm/doc = 100% (audit 02 M4) |
| 67 |
if: matrix.os == 'ubuntu-latest' |
| 68 |
run: | |
| 69 |
uv run pytest tests/unit/doc \ |
| 70 |
--cov=src/dlm/doc \ |
| 71 |
--cov-report=term-missing \ |
| 72 |
--cov-fail-under=100 |
| 73 |
|
| 74 |
- name: Coverage gate — src/dlm/store = 100% (Sprint 04) |
| 75 |
if: matrix.os == 'ubuntu-latest' |
| 76 |
run: | |
| 77 |
uv run pytest tests/unit/store \ |
| 78 |
--cov=src/dlm/store \ |
| 79 |
--cov-report=term-missing \ |
| 80 |
--cov-fail-under=100 |
| 81 |
|
| 82 |
- name: Coverage gate — src/dlm/hardware = 100% (Sprint 05) |
| 83 |
if: matrix.os == 'ubuntu-latest' |
| 84 |
run: | |
| 85 |
uv run pytest tests/unit/hardware \ |
| 86 |
--cov=src/dlm/hardware \ |
| 87 |
--cov-report=term-missing \ |
| 88 |
--cov-fail-under=100 |
| 89 |
|
| 90 |
- name: Coverage gate — src/dlm/base_models = 100% (Sprint 06) |
| 91 |
if: matrix.os == 'ubuntu-latest' |
| 92 |
run: | |
| 93 |
uv run pytest tests/unit/base_models \ |
| 94 |
--cov=src/dlm/base_models \ |
| 95 |
--cov-report=term-missing \ |
| 96 |
--cov-fail-under=100 |
| 97 |
|
| 98 |
- name: Coverage gate — src/dlm/data = 100% (Sprint 07) |
| 99 |
if: matrix.os == 'ubuntu-latest' |
| 100 |
run: | |
| 101 |
uv run pytest tests/unit/data \ |
| 102 |
--cov=src/dlm/data \ |
| 103 |
--cov-report=term-missing \ |
| 104 |
--cov-fail-under=100 |
| 105 |
|
| 106 |
- name: Coverage gate — src/dlm/replay = 100% (Sprint 08) |
| 107 |
if: matrix.os == 'ubuntu-latest' |
| 108 |
run: | |
| 109 |
uv run pytest tests/unit/replay \ |
| 110 |
--cov=src/dlm/replay \ |
| 111 |
--cov-report=term-missing \ |
| 112 |
--cov-fail-under=100 |
| 113 |
|
| 114 |
- name: Coverage gate — src/dlm/train = 100% (Sprint 09) |
| 115 |
if: matrix.os == 'ubuntu-latest' |
| 116 |
run: | |
| 117 |
uv run pytest tests/unit/train \ |
| 118 |
--cov=src/dlm/train \ |
| 119 |
--cov-report=term-missing \ |
| 120 |
--cov-fail-under=100 |
| 121 |
|
| 122 |
- name: Coverage gate — src/dlm/train/preference = 100% |
| 123 |
if: matrix.os == 'ubuntu-latest' |
| 124 |
run: | |
| 125 |
uv run pytest tests/unit/train/preference \ |
| 126 |
--cov=src/dlm/train/preference \ |
| 127 |
--cov-report=term-missing \ |
| 128 |
--cov-fail-under=100 |
| 129 |
|
| 130 |
- name: Coverage gate — src/dlm/eval = 100% (Sprint 10) |
| 131 |
if: matrix.os == 'ubuntu-latest' |
| 132 |
run: | |
| 133 |
uv run pytest tests/unit/eval \ |
| 134 |
--cov=src/dlm/eval \ |
| 135 |
--cov-report=term-missing \ |
| 136 |
--cov-fail-under=100 |
| 137 |
|
| 138 |
- name: Coverage gate — src/dlm/inference = 100% (Sprint 10) |
| 139 |
if: matrix.os == 'ubuntu-latest' |
| 140 |
run: | |
| 141 |
uv run pytest tests/unit/inference \ |
| 142 |
--cov=src/dlm/inference \ |
| 143 |
--cov-report=term-missing \ |
| 144 |
--cov-fail-under=100 |
| 145 |
|
| 146 |
- name: Coverage gate — src/dlm/export = 100% (Sprint 11) |
| 147 |
if: matrix.os == 'ubuntu-latest' |
| 148 |
run: | |
| 149 |
uv run pytest tests/unit/export \ |
| 150 |
--cov=src/dlm/export \ |
| 151 |
--cov-report=term-missing \ |
| 152 |
--cov-fail-under=100 |
| 153 |
|
| 154 |
- name: Coverage gate — src/dlm/export/ollama = 100% (Sprint 12) |
| 155 |
if: matrix.os == 'ubuntu-latest' |
| 156 |
run: | |
| 157 |
uv run pytest tests/unit/export/ollama \ |
| 158 |
--cov=src/dlm/export/ollama \ |
| 159 |
--cov-report=term-missing \ |
| 160 |
--cov-fail-under=100 |
| 161 |
|
| 162 |
- name: Coverage gate — src/dlm/cli/reporter = 100% (Sprint 13) |
| 163 |
if: matrix.os == 'ubuntu-latest' |
| 164 |
run: | |
| 165 |
uv run pytest tests/unit/cli \ |
| 166 |
--cov=dlm.cli.reporter \ |
| 167 |
--cov-report=term-missing \ |
| 168 |
--cov-fail-under=100 |
| 169 |
|
| 170 |
- name: Coverage gate — src/dlm/io/ulid = 100% (Sprint 13) |
| 171 |
if: matrix.os == 'ubuntu-latest' |
| 172 |
run: | |
| 173 |
uv run pytest tests/unit/test_io_ulid.py \ |
| 174 |
--cov=dlm.io.ulid \ |
| 175 |
--cov-report=term-missing \ |
| 176 |
--cov-fail-under=100 |
| 177 |
|
| 178 |
- name: Coverage gate — src/dlm/pack = 100% (Sprint 14) |
| 179 |
if: matrix.os == 'ubuntu-latest' |
| 180 |
run: | |
| 181 |
uv run pytest tests/unit/pack tests/integration/pack \ |
| 182 |
--cov=src/dlm/pack \ |
| 183 |
--cov-report=term-missing \ |
| 184 |
--cov-fail-under=100 |
| 185 |
|
| 186 |
- name: Coverage gate — src/dlm/lock = 100% (Sprint 15) |
| 187 |
if: matrix.os == 'ubuntu-latest' |
| 188 |
run: | |
| 189 |
uv run pytest tests/unit/lock \ |
| 190 |
--cov=src/dlm/lock \ |
| 191 |
--cov-report=term-missing \ |
| 192 |
--cov-fail-under=100 |
| 193 |
|
| 194 |
no-network-sandbox: |
| 195 |
# audit F13: dlm init / doctor / show must work with zero outbound network. |
| 196 |
name: no-network sandbox (ubuntu-latest) |
| 197 |
runs-on: ubuntu-latest |
| 198 |
steps: |
| 199 |
- uses: actions/checkout@v4 |
| 200 |
|
| 201 |
- name: Install uv |
| 202 |
uses: astral-sh/setup-uv@v4 |
| 203 |
with: |
| 204 |
version: ${{ env.UV_VERSION }} |
| 205 |
|
| 206 |
- name: Sync dependencies (before blocking network) |
| 207 |
run: uv sync --all-extras --dev |
| 208 |
|
| 209 |
- name: Block egress then exercise local-only CLI commands |
| 210 |
env: |
| 211 |
# Belt-and-braces: force HF / transformers offline posture. |
| 212 |
HF_HUB_OFFLINE: "1" |
| 213 |
TRANSFORMERS_OFFLINE: "1" |
| 214 |
HF_DATASETS_OFFLINE: "1" |
| 215 |
run: | |
| 216 |
set -euxo pipefail |
| 217 |
# ALWAYS flush OUTPUT on exit — otherwise the post-step hooks |
| 218 |
# (cache upload, artifact collection) lose the runner's |
| 219 |
# heartbeat to GitHub Actions and the job fails with |
| 220 |
# "hosted runner lost communication with the server". |
| 221 |
trap 'sudo iptables -F OUTPUT || true' EXIT |
| 222 |
# Drop all non-loopback egress. Commands that try to reach out |
| 223 |
# will fail — CI fails if any currently-"local-only" command |
| 224 |
# attempts network. |
| 225 |
sudo iptables -A OUTPUT -o lo -j ACCEPT |
| 226 |
sudo iptables -A OUTPUT -d 127.0.0.0/8 -j ACCEPT |
| 227 |
sudo iptables -A OUTPUT -j REJECT |
| 228 |
# Sanity check: confirm egress is blocked. |
| 229 |
(! curl --max-time 3 -sS https://example.com -o /dev/null) || (echo "egress not blocked" && exit 1) |
| 230 |
# Exercise CLI surfaces that must be local-only at this sprint. |
| 231 |
uv run dlm --version |
| 232 |
uv run dlm --help |
| 233 |
# Sprint 05 landed: `dlm doctor` probes torch + psutil only |
| 234 |
# and emits JSON with no outbound traffic. If it ever reaches |
| 235 |
# for the network under the iptables-blocked sandbox, this job |
| 236 |
# fails loudly (audit-03 M4). |
| 237 |
uv run dlm doctor --json >/dev/null |
| 238 |
uv run dlm doctor >/dev/null |
| 239 |
# `dlm show` lands in Sprint 13 (CLI finalization); add here |
| 240 |
# when it's wired. |
| 241 |
|
| 242 |
slow-tests: |
| 243 |
# Sprint 02: marker-gated tests that touch HF. Cache-keyed on |
| 244 |
# (pyproject.toml hash, tiny-model revision) per audit guidance. |
| 245 |
# Sprint 11: also initializes + builds `vendor/llama.cpp` so export |
| 246 |
# integration tests can exercise real GGUF conversion. |
| 247 |
name: slow tests (hf-cache + llama.cpp) |
| 248 |
runs-on: ubuntu-latest |
| 249 |
steps: |
| 250 |
- name: Checkout with llama.cpp submodule |
| 251 |
uses: actions/checkout@v4 |
| 252 |
with: |
| 253 |
submodules: recursive |
| 254 |
|
| 255 |
- name: Install uv |
| 256 |
uses: astral-sh/setup-uv@v4 |
| 257 |
with: |
| 258 |
version: ${{ env.UV_VERSION }} |
| 259 |
|
| 260 |
- name: Sync dependencies |
| 261 |
run: uv sync --all-extras --dev |
| 262 |
|
| 263 |
- name: Restore HF cache |
| 264 |
id: hf-cache |
| 265 |
uses: actions/cache@v4 |
| 266 |
with: |
| 267 |
path: ${{ github.workspace }}/.hf-cache |
| 268 |
key: hf-tiny-${{ env.TINY_MODEL_REVISION }}-${{ hashFiles('pyproject.toml') }} |
| 269 |
restore-keys: | |
| 270 |
hf-tiny-${{ env.TINY_MODEL_REVISION }}- |
| 271 |
|
| 272 |
- name: Pre-warm tiny model |
| 273 |
env: |
| 274 |
HF_HOME: ${{ github.workspace }}/.hf-cache |
| 275 |
DLM_TINY_MODEL_REVISION: ${{ env.TINY_MODEL_REVISION }} |
| 276 |
run: | |
| 277 |
set -euxo pipefail |
| 278 |
echo "Cache hit: ${{ steps.hf-cache.outputs.cache-hit }}" |
| 279 |
uv run python - <<'PY' |
| 280 |
from tests.fixtures.tiny_model import tiny_model_path |
| 281 |
print("tiny model at:", tiny_model_path()) |
| 282 |
PY |
| 283 |
|
| 284 |
- name: Restore llama.cpp build cache |
| 285 |
id: llama-cpp-cache |
| 286 |
uses: actions/cache@v4 |
| 287 |
with: |
| 288 |
path: vendor/llama.cpp/build |
| 289 |
# Cache key: submodule HEAD sha + build profile. CI uses a |
| 290 |
# portable CPU build so cached binaries stay runnable across |
| 291 |
# heterogeneous ubuntu runner hosts. |
| 292 |
key: llama-cpp-build-portable-v1-${{ runner.os }}-${{ hashFiles('.gitmodules', 'vendor/llama.cpp/VERSION') }} |
| 293 |
|
| 294 |
- name: Build llama.cpp tools (if not cached) |
| 295 |
if: steps.llama-cpp-cache.outputs.cache-hit != 'true' |
| 296 |
run: | |
| 297 |
set -euxo pipefail |
| 298 |
# ubuntu-latest ships cmake; `sudo apt-get install -y cmake` is a no-op fallback. |
| 299 |
command -v cmake >/dev/null 2>&1 || sudo apt-get install -y cmake |
| 300 |
scripts/bump-llama-cpp.sh build --portable --with-server |
| 301 |
|
| 302 |
- name: Run slow tests |
| 303 |
env: |
| 304 |
HF_HOME: ${{ github.workspace }}/.hf-cache |
| 305 |
DLM_TINY_MODEL_REVISION: ${{ env.TINY_MODEL_REVISION }} |
| 306 |
DLM_ENABLE_SLOW_INTEGRATION: "1" |
| 307 |
run: uv run pytest -m "slow" -v |
| 308 |
|