Python · 12152 bytes Raw Blame History
1 """Tests for transcript compaction and summary compression."""
2
3 from __future__ import annotations
4
5 from loader.llm.base import Message, Role, ToolCall
6 from loader.runtime.compaction import (
7 SummaryCompressionBudget,
8 build_session_summary,
9 compact_session_messages,
10 compress_summary,
11 infer_preferred_next_step,
12 resolve_auto_compaction_input_tokens_threshold,
13 summarize_confirmed_facts,
14 )
15
16
17 def test_compress_summary_dedupes_lines_and_collapses_whitespace() -> None:
18 summary = "\n".join(
19 [
20 "Conversation summary:",
21 "- Scope: compact earlier messages.",
22 "- Scope: compact earlier messages.",
23 "- Current work: finish session persistence.",
24 "- Current work: finish session persistence.",
25 ]
26 )
27
28 result = compress_summary(summary, budget=SummaryCompressionBudget())
29
30 assert result.removed_duplicate_lines == 2
31 assert "- Scope: compact earlier messages." in result.summary
32 assert " compact earlier" not in result.summary
33
34
35 def test_compact_session_messages_preserves_recent_messages() -> None:
36 messages = [
37 Message(role=Role.USER, content="First task framing"),
38 Message(role=Role.ASSISTANT, content="Initial plan"),
39 Message(role=Role.USER, content="Focus on runtime quality"),
40 Message(role=Role.ASSISTANT, content="Tracked updated files"),
41 Message(role=Role.USER, content="Verify the result"),
42 Message(role=Role.ASSISTANT, content="Verification passed"),
43 ]
44
45 result = compact_session_messages(
46 messages,
47 keep_last_messages=4,
48 current_task="Improve Loader runtime continuity",
49 )
50
51 assert result is not None
52 assert result.removed_message_count == 2
53 assert [message.content for message in result.messages[-4:]] == [
54 message.content for message in messages[-4:]
55 ]
56 assert result.messages[0].content.startswith("[COMPACTED CONTEXT]")
57 assert "Continuation instructions:" in result.messages[0].content
58 assert (
59 "authoritative over older summaries or durable memory notes"
60 in result.messages[0].content
61 )
62
63
64 def test_compact_session_messages_includes_active_dod_summary() -> None:
65 messages = [
66 Message(role=Role.USER, content="Create the generated guide."),
67 Message(role=Role.ASSISTANT, content="Wrote the guide files."),
68 Message(
69 role=Role.TOOL,
70 content="Observation [notepad_read]: Result: guide complete",
71 ),
72 Message(role=Role.ASSISTANT, content="I will finish."),
73 Message(role=Role.USER, content="Continue repairing."),
74 ]
75
76 result = compact_session_messages(
77 messages,
78 keep_last_messages=2,
79 current_task="Create the generated guide.",
80 active_dod_summary=(
81 "status=fixing; last verification=failed; "
82 "latest failed verifier=/tmp/guide/chapter.html: thin content"
83 ),
84 )
85
86 assert result is not None
87 assert "- Active DoD: status=fixing; last verification=failed;" in result.summary
88 assert "/tmp/guide/chapter.html: thin content" in result.summary
89
90
91 def test_build_session_summary_skips_nested_compacted_context_content() -> None:
92 messages = [
93 Message(
94 role=Role.USER,
95 content=(
96 "[COMPACTED CONTEXT]\nConversation summary:\n"
97 "- Scope: older work\n- Current work: old state"
98 ),
99 ),
100 Message(role=Role.ASSISTANT, content="Read the chapter index."),
101 Message(role=Role.USER, content="Update the chapter links."),
102 ]
103
104 summary = build_session_summary(
105 messages,
106 previous_summary="[COMPACTED CONTEXT]\nConversation summary:\n- Scope: older work",
107 current_task="Repair the table of contents links",
108 )
109
110 assert "Recent user requests: [COMPACTED CONTEXT]" not in summary
111 assert "Pending work: [COMPACTED CONTEXT]" not in summary
112 assert "- Previously compacted context retained." in summary
113
114
115 def test_build_session_summary_preserves_confirmed_facts_and_next_step() -> None:
116 messages = [
117 Message(
118 role=Role.TOOL,
119 content=(
120 "Observation [notepad_write_working]: Result: "
121 "02-basic-syntax.html -> 02-setup.html\n"
122 "03-variables-data-types.html -> 03-basics.html"
123 ),
124 ),
125 Message(
126 role=Role.ASSISTANT,
127 content="Checking the index before editing it.",
128 tool_calls=[
129 ToolCall(
130 id="read-1",
131 name="read",
132 arguments={"file_path": "~/Loader/guides/fortran/index.html"},
133 )
134 ],
135 ),
136 Message(
137 role=Role.ASSISTANT,
138 content="Inspecting the setup chapter title.",
139 tool_calls=[
140 ToolCall(
141 id="read-2",
142 name="read",
143 arguments={"file_path": "~/Loader/guides/fortran/chapters/02-setup.html"},
144 )
145 ],
146 ),
147 Message.tool_result_message(
148 tool_call_id="read-2",
149 display_content=(
150 " 1\t<!DOCTYPE html>\n"
151 " 2\t<html>\n"
152 " 61\t<h1>Chapter 2: Setting Up Fortran</h1>\n"
153 " 62\t</html>\n"
154 ),
155 result_content=(
156 " 1\t<!DOCTYPE html>\n"
157 " 2\t<html>\n"
158 " 61\t<h1>Chapter 2: Setting Up Fortran</h1>\n"
159 " 62\t</html>\n"
160 ),
161 ),
162 Message(
163 role=Role.TOOL,
164 content=(
165 "Observation [glob]: Result: "
166 "/Users/mfwolffe/Loader/guides/fortran/chapters/01-introduction.html\n"
167 "/Users/mfwolffe/Loader/guides/fortran/chapters/02-setup.html\n"
168 "/Users/mfwolffe/Loader/guides/fortran/chapters/03-basics.html\n"
169 "/Users/mfwolffe/Loader/guides/fortran/chapters/04-variables.html"
170 ),
171 ),
172 ]
173
174 summary = build_session_summary(
175 messages,
176 current_task=(
177 "Update ~/Loader/guides/fortran/index.html with the correct chapter links."
178 ),
179 )
180
181 assert "Confirmed facts:" in summary
182 assert "02-basic-syntax.html -> 02-setup.html" in summary
183 assert "02-setup.html = Chapter 2: Setting Up Fortran" not in summary
184 assert "Preferred next step:" in summary
185 assert "`~/Loader/guides/fortran/index.html`" in summary
186
187
188 def test_summarize_confirmed_facts_ignores_reference_chapter_title_reads() -> None:
189 messages = [
190 Message(
191 role=Role.ASSISTANT,
192 content="I will inspect the chapter files.",
193 tool_calls=[
194 ToolCall(
195 id="read-1",
196 name="read",
197 arguments={"file_path": "/tmp/fortran/chapters/01-introduction.html"},
198 ),
199 ToolCall(
200 id="read-2",
201 name="read",
202 arguments={"file_path": "/tmp/fortran/chapters/02-setup.html"},
203 ),
204 ],
205 ),
206 Message.tool_result_message(
207 tool_call_id="read-1",
208 display_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
209 result_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
210 ),
211 Message.tool_result_message(
212 tool_call_id="read-2",
213 display_content="<title>Chapter 2: Setting Up Fortran</title>\n",
214 result_content="<title>Chapter 2: Setting Up Fortran</title>\n",
215 ),
216 ]
217
218 confirmed_facts = summarize_confirmed_facts(messages, max_items=2)
219
220 assert confirmed_facts is None
221
222
223 def test_infer_preferred_next_step_uses_confirmed_chapter_pairs() -> None:
224 messages = [
225 Message(
226 role=Role.ASSISTANT,
227 content="I should inspect the chapter and then update the index.",
228 tool_calls=[
229 ToolCall(
230 id="read-index",
231 name="read",
232 arguments={"file_path": "/tmp/fortran/index.html"},
233 ),
234 ToolCall(
235 id="read-1",
236 name="read",
237 arguments={"file_path": "/tmp/fortran/chapters/01-introduction.html"},
238 ),
239 ],
240 ),
241 Message.tool_result_message(
242 tool_call_id="read-1",
243 display_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
244 result_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
245 ),
246 ]
247
248 next_step = infer_preferred_next_step(
249 messages,
250 current_task="Update /tmp/fortran/index.html so the chapter list matches the real files.",
251 )
252
253 assert next_step is None
254
255
256 def test_infer_preferred_next_step_uses_latest_verification_gap() -> None:
257 messages = [
258 Message(
259 role=Role.ASSISTANT,
260 content="I should inspect the chapter and then update the index.",
261 tool_calls=[
262 ToolCall(
263 id="read-index",
264 name="read",
265 arguments={"file_path": "/tmp/fortran/index.html"},
266 ),
267 ToolCall(
268 id="read-1",
269 name="read",
270 arguments={"file_path": "/tmp/fortran/chapters/01-introduction.html"},
271 ),
272 ToolCall(
273 id="verify-1",
274 name="bash",
275 arguments={"command": "python3 - <<'PY'\n...\nPY"},
276 ),
277 ],
278 ),
279 Message.tool_result_message(
280 tool_call_id="read-1",
281 display_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
282 result_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
283 ),
284 Message.tool_result_message(
285 tool_call_id="verify-1",
286 display_content=(
287 "Missing links:\n"
288 "chapters/05-control-structures.html -> missing\n"
289 "chapters/06-input-output.html -> missing\n"
290 ),
291 result_content=(
292 "Missing links:\n"
293 "chapters/05-control-structures.html -> missing\n"
294 "chapters/06-input-output.html -> missing\n"
295 ),
296 is_error=True,
297 ),
298 ]
299
300 confirmed_facts = summarize_confirmed_facts(messages, max_items=2)
301 next_step = infer_preferred_next_step(
302 messages,
303 current_task="Update /tmp/fortran/index.html so the chapter list matches the real files.",
304 )
305
306 assert confirmed_facts is None
307 assert next_step is None
308
309
310 def test_compact_session_messages_uses_single_continuation_instruction_block() -> None:
311 messages = [
312 Message(role=Role.USER, content="Task framing"),
313 Message(role=Role.ASSISTANT, content="Initial plan"),
314 Message(role=Role.USER, content="Keep going"),
315 Message(role=Role.ASSISTANT, content="Still working"),
316 Message(role=Role.USER, content="Use the known mapping"),
317 ]
318
319 result = compact_session_messages(
320 messages,
321 keep_last_messages=2,
322 current_task="Repair the table of contents links",
323 )
324
325 assert result is not None
326 assert result.messages[0].content.count("Continuation instructions:") == 1
327
328
329 def test_resolve_auto_compaction_threshold_uses_context_window_as_upper_bound() -> None:
330 assert resolve_auto_compaction_input_tokens_threshold(
331 100_000,
332 context_window=131_072,
333 ) == 98_304
334 assert resolve_auto_compaction_input_tokens_threshold(
335 100_000,
336 context_window=262_144,
337 ) == 100_000
338 assert resolve_auto_compaction_input_tokens_threshold(
339 100_000,
340 context_window=8_192,
341 ) == 6_144
342 assert resolve_auto_compaction_input_tokens_threshold(
343 100_000,
344 context_window=16_000,
345 ) == 12_000