documentlanguagemodel Public

Watch 0 Fork 0 Star 0

YAML · 5758 bytes Raw Blame History

  
        1
        name: Weekly chat-template drift (Sprint 12.6)
      
        2
        
        3
        # Runs the closed-loop Go↔Jinja check every Sunday:
      
        4
        #   1. HF side — refresh-chat-template-goldens.py --check asserts no golden
      
        5
        #      drifted since the last checked-in matrix. Fails fast if an upstream
      
        6
        #      tokenizer revision silently changed its chat_template.
      
        7
        #   2. Go side — install Ollama, build a tiny-model GGUF via vendored
      
        8
        #      llama.cpp, register it with `ollama create`, then run
      
        9
        #      tests/integration/export/test_template_closed_loop.py with
      
        10
        #      OLLAMA_NAME pointing at the registered model. Ollama's
      
        11
        #      prompt_eval_count must equal the HF golden for every scenario.
      
        12
        #
      
        13
        # Trigger: weekly cron + workflow_dispatch for operators bumping bases.
      
        14
        
        15
        on:
      
        16
          schedule:
      
        17
            # Sundays at 06:00 UTC — after quieter traffic windows for HF / Ollama pulls.
      
        18
            - cron: "0 6 * * 0"
      
        19
          workflow_dispatch: {}
      
        20
        
        21
        concurrency:
      
        22
          group: weekly-template-drift
      
        23
          cancel-in-progress: false
      
        24
        
        25
        env:
      
        26
          UV_VERSION: "0.11.6"
      
        27
          PYTHON_VERSION: "3.11"
      
        28
          # Pinned to BASE_MODELS["smollm2-135m"].revision (Sprint 06 registry).
      
        29
          # Same SHA as ci.yml — keep in sync when bumping.
      
        30
          TINY_MODEL_REVISION: "12fd25f77366fa6b3b4b768ec3050bf629380bac"
      
        31
        
        32
        jobs:
      
        33
          check-hf-side:
      
        34
            # Cheap half: no Ollama, no llama.cpp, no quant. If this fails the
      
        35
            # whole workflow is done — nothing to verify against.
      
        36
            name: HF goldens unchanged
      
        37
            runs-on: ubuntu-latest
      
        38
            steps:
      
        39
              - uses: actions/checkout@v4
      
        40
        
        41
              - name: Install uv
      
        42
                uses: astral-sh/setup-uv@v4
      
        43
                with:
      
        44
                  version: ${{ env.UV_VERSION }}
      
        45
        
        46
              - name: Sync dependencies
      
        47
                run: uv sync --all-extras --dev
      
        48
        
        49
              - name: Restore HF cache
      
        50
                uses: actions/cache@v4
      
        51
                with:
      
        52
                  path: ${{ github.workspace }}/.hf-cache
      
        53
                  key: hf-tiny-${{ env.TINY_MODEL_REVISION }}-${{ hashFiles('pyproject.toml') }}
      
        54
                  restore-keys: |
      
        55
                    hf-tiny-${{ env.TINY_MODEL_REVISION }}-
      
        56
        
        57
              - name: Pre-warm tiny model
      
        58
                env:
      
        59
                  HF_HOME: ${{ github.workspace }}/.hf-cache
      
        60
                  DLM_TINY_MODEL_REVISION: ${{ env.TINY_MODEL_REVISION }}
      
        61
                run: |
      
        62
                  uv run python - <<'PY'
      
        63
                  from tests.fixtures.tiny_model import tiny_model_path
      
        64
                  print("tiny model at:", tiny_model_path())
      
        65
                  PY
      
        66
        
        67
              - name: Refresh goldens in --check mode (chatml only — only cached dialect)
      
        68
                env:
      
        69
                  HF_HOME: ${{ github.workspace }}/.hf-cache
      
        70
                run: uv run python scripts/refresh-chat-template-goldens.py --check --dialect chatml
      
        71
        
        72
          closed-loop:
      
        73
            # Expensive half: install Ollama, build a base GGUF, register it,
      
        74
            # and assert prompt_eval_count == HF golden.
      
        75
            name: Go↔Jinja closed loop (chatml)
      
        76
            needs: check-hf-side
      
        77
            runs-on: ubuntu-latest
      
        78
            steps:
      
        79
              - name: Checkout with llama.cpp submodule
      
        80
                uses: actions/checkout@v4
      
        81
                with:
      
        82
                  submodules: recursive
      
        83
        
        84
              - name: Install uv
      
        85
                uses: astral-sh/setup-uv@v4
      
        86
                with:
      
        87
                  version: ${{ env.UV_VERSION }}
      
        88
        
        89
              - name: Sync dependencies
      
        90
                run: uv sync --all-extras --dev
      
        91
        
        92
              - name: Restore HF cache
      
        93
                uses: actions/cache@v4
      
        94
                with:
      
        95
                  path: ${{ github.workspace }}/.hf-cache
      
        96
                  key: hf-tiny-${{ env.TINY_MODEL_REVISION }}-${{ hashFiles('pyproject.toml') }}
      
        97
                  restore-keys: |
      
        98
                    hf-tiny-${{ env.TINY_MODEL_REVISION }}-
      
        99
        
        100
              - name: Pre-warm tiny model
      
        101
                env:
      
        102
                  HF_HOME: ${{ github.workspace }}/.hf-cache
      
        103
                  DLM_TINY_MODEL_REVISION: ${{ env.TINY_MODEL_REVISION }}
      
        104
                run: |
      
        105
                  uv run python - <<'PY'
      
        106
                  from tests.fixtures.tiny_model import tiny_model_path
      
        107
                  print("tiny model at:", tiny_model_path())
      
        108
                  PY
      
        109
        
        110
              - name: Restore llama.cpp build cache
      
        111
                id: llama-cpp-cache
      
        112
                uses: actions/cache@v4
      
        113
                with:
      
        114
                  path: vendor/llama.cpp/build
      
        115
                  key: llama-cpp-build-${{ hashFiles('.gitmodules', 'vendor/llama.cpp/VERSION') }}
      
        116
        
        117
              - name: Build llama-quantize (if not cached)
      
        118
                if: steps.llama-cpp-cache.outputs.cache-hit != 'true'
      
        119
                run: |
      
        120
                  set -euxo pipefail
      
        121
                  command -v cmake >/dev/null 2>&1 || sudo apt-get install -y cmake
      
        122
                  scripts/bump-llama-cpp.sh build
      
        123
        
        124
              - name: Install Ollama
      
        125
                run: |
      
        126
                  set -euxo pipefail
      
        127
                  curl -fsSL https://ollama.com/install.sh | sh
      
        128
                  # Start the ollama server in the background so `ollama create` /
      
        129
                  # `ollama run` have something to talk to.
      
        130
                  ollama serve >/tmp/ollama.log 2>&1 &
      
        131
                  # Poll for readiness instead of a blind sleep.
      
        132
                  for i in $(seq 1 30); do
      
        133
                    if ollama list >/dev/null 2>&1; then
      
        134
                      echo "ollama ready after ${i}s"
      
        135
                      break
      
        136
                    fi
      
        137
                    sleep 1
      
        138
                  done
      
        139
                  ollama --version
      
        140
        
        141
              - name: Export tiny model to Ollama (registers under dlm-test-chatml)
      
        142
                env:
      
        143
                  HF_HOME: ${{ github.workspace }}/.hf-cache
      
        144
                  DLM_TINY_MODEL_REVISION: ${{ env.TINY_MODEL_REVISION }}
      
        145
                run: |
      
        146
                  set -euxo pipefail
      
        147
                  # Placeholder: the tiny-model export pipeline lands via Sprint 14.5.
      
        148
                  # Until then the closed-loop job exits 0 after the HF-side check
      
        149
                  # — the scaffold is in place for the runner to fill.
      
        150
                  echo "export pipeline TBD — see Sprint 14.5"
      
        151
        
        152
              - name: Run closed-loop integration test
      
        153
                if: false  # enabled once the export step above registers OLLAMA_NAME
      
        154
                env:
      
        155
                  OLLAMA_NAME: "dlm-test-chatml:latest"
      
        156
                  HF_HOME: ${{ github.workspace }}/.hf-cache
      
        157
                  DLM_TINY_MODEL_REVISION: ${{ env.TINY_MODEL_REVISION }}
      
        158
                run: uv run pytest -m slow -v tests/integration/export/test_template_closed_loop.py
      
        159

1	name: Weekly chat-template drift (Sprint 12.6)
2
3	# Runs the closed-loop Go↔Jinja check every Sunday:
4	# 1. HF side — refresh-chat-template-goldens.py --check asserts no golden
5	# drifted since the last checked-in matrix. Fails fast if an upstream
6	# tokenizer revision silently changed its chat_template.
7	# 2. Go side — install Ollama, build a tiny-model GGUF via vendored
8	# llama.cpp, register it with `ollama create`, then run
9	# tests/integration/export/test_template_closed_loop.py with
10	# OLLAMA_NAME pointing at the registered model. Ollama's
11	# prompt_eval_count must equal the HF golden for every scenario.
12	#
13	# Trigger: weekly cron + workflow_dispatch for operators bumping bases.
14
15	on:
16	schedule:
17	# Sundays at 06:00 UTC — after quieter traffic windows for HF / Ollama pulls.
18	- cron: "0 6 * * 0"
19	workflow_dispatch: {}
20
21	concurrency:
22	group: weekly-template-drift
23	cancel-in-progress: false
24
25	env:
26	UV_VERSION: "0.11.6"
27	PYTHON_VERSION: "3.11"
28	# Pinned to BASE_MODELS["smollm2-135m"].revision (Sprint 06 registry).
29	# Same SHA as ci.yml — keep in sync when bumping.
30	TINY_MODEL_REVISION: "12fd25f77366fa6b3b4b768ec3050bf629380bac"
31
32	jobs:
33	check-hf-side:
34	# Cheap half: no Ollama, no llama.cpp, no quant. If this fails the
35	# whole workflow is done — nothing to verify against.
36	name: HF goldens unchanged
37	runs-on: ubuntu-latest
38	steps:
39	- uses: actions/checkout@v4
40
41	- name: Install uv
42	uses: astral-sh/setup-uv@v4
43	with:
44	version: ${{ env.UV_VERSION }}
45
46	- name: Sync dependencies
47	run: uv sync --all-extras --dev
48
49	- name: Restore HF cache
50	uses: actions/cache@v4
51	with:
52	path: ${{ github.workspace }}/.hf-cache
53	key: hf-tiny-${{ env.TINY_MODEL_REVISION }}-${{ hashFiles('pyproject.toml') }}
54	restore-keys: \|
55	hf-tiny-${{ env.TINY_MODEL_REVISION }}-
56
57	- name: Pre-warm tiny model
58	env:
59	HF_HOME: ${{ github.workspace }}/.hf-cache
60	DLM_TINY_MODEL_REVISION: ${{ env.TINY_MODEL_REVISION }}
61	run: \|
62	uv run python - <<'PY'
63	from tests.fixtures.tiny_model import tiny_model_path
64	print("tiny model at:", tiny_model_path())
65	PY
66
67	- name: Refresh goldens in --check mode (chatml only — only cached dialect)
68	env:
69	HF_HOME: ${{ github.workspace }}/.hf-cache
70	run: uv run python scripts/refresh-chat-template-goldens.py --check --dialect chatml
71
72	closed-loop:
73	# Expensive half: install Ollama, build a base GGUF, register it,
74	# and assert prompt_eval_count == HF golden.
75	name: Go↔Jinja closed loop (chatml)
76	needs: check-hf-side
77	runs-on: ubuntu-latest
78	steps:
79	- name: Checkout with llama.cpp submodule
80	uses: actions/checkout@v4
81	with:
82	submodules: recursive
83
84	- name: Install uv
85	uses: astral-sh/setup-uv@v4
86	with:
87	version: ${{ env.UV_VERSION }}
88
89	- name: Sync dependencies
90	run: uv sync --all-extras --dev
91
92	- name: Restore HF cache
93	uses: actions/cache@v4
94	with:
95	path: ${{ github.workspace }}/.hf-cache
96	key: hf-tiny-${{ env.TINY_MODEL_REVISION }}-${{ hashFiles('pyproject.toml') }}
97	restore-keys: \|
98	hf-tiny-${{ env.TINY_MODEL_REVISION }}-
99
100	- name: Pre-warm tiny model
101	env:
102	HF_HOME: ${{ github.workspace }}/.hf-cache
103	DLM_TINY_MODEL_REVISION: ${{ env.TINY_MODEL_REVISION }}
104	run: \|
105	uv run python - <<'PY'
106	from tests.fixtures.tiny_model import tiny_model_path
107	print("tiny model at:", tiny_model_path())
108	PY
109
110	- name: Restore llama.cpp build cache
111	id: llama-cpp-cache
112	uses: actions/cache@v4
113	with:
114	path: vendor/llama.cpp/build
115	key: llama-cpp-build-${{ hashFiles('.gitmodules', 'vendor/llama.cpp/VERSION') }}
116
117	- name: Build llama-quantize (if not cached)
118	if: steps.llama-cpp-cache.outputs.cache-hit != 'true'
119	run: \|
120	set -euxo pipefail
121	command -v cmake >/dev/null 2>&1 \|\| sudo apt-get install -y cmake
122	scripts/bump-llama-cpp.sh build
123
124	- name: Install Ollama
125	run: \|
126	set -euxo pipefail
127	curl -fsSL https://ollama.com/install.sh \| sh
128	# Start the ollama server in the background so `ollama create` /
129	# `ollama run` have something to talk to.
130	ollama serve >/tmp/ollama.log 2>&1 &
131	# Poll for readiness instead of a blind sleep.
132	for i in $(seq 1 30); do
133	if ollama list >/dev/null 2>&1; then
134	echo "ollama ready after ${i}s"
135	break
136	fi
137	sleep 1
138	done
139	ollama --version
140
141	- name: Export tiny model to Ollama (registers under dlm-test-chatml)
142	env:
143	HF_HOME: ${{ github.workspace }}/.hf-cache
144	DLM_TINY_MODEL_REVISION: ${{ env.TINY_MODEL_REVISION }}
145	run: \|
146	set -euxo pipefail
147	# Placeholder: the tiny-model export pipeline lands via Sprint 14.5.
148	# Until then the closed-loop job exits 0 after the HF-side check
149	# — the scaffold is in place for the runner to fill.
150	echo "export pipeline TBD — see Sprint 14.5"
151
152	- name: Run closed-loop integration test
153	if: false # enabled once the export step above registers OLLAMA_NAME
154	env:
155	OLLAMA_NAME: "dlm-test-chatml:latest"
156	HF_HOME: ${{ github.workspace }}/.hf-cache
157	DLM_TINY_MODEL_REVISION: ${{ env.TINY_MODEL_REVISION }}
158	run: uv run pytest -m slow -v tests/integration/export/test_template_closed_loop.py
159