Python · 10901 bytes Raw Blame History
1 """Tests for transcript compaction and summary compression."""
2
3 from __future__ import annotations
4
5 from loader.llm.base import Message, Role, ToolCall
6 from loader.runtime.compaction import (
7 SummaryCompressionBudget,
8 build_session_summary,
9 compact_session_messages,
10 compress_summary,
11 infer_preferred_next_step,
12 resolve_auto_compaction_input_tokens_threshold,
13 summarize_confirmed_facts,
14 )
15
16
17 def test_compress_summary_dedupes_lines_and_collapses_whitespace() -> None:
18 summary = "\n".join(
19 [
20 "Conversation summary:",
21 "- Scope: compact earlier messages.",
22 "- Scope: compact earlier messages.",
23 "- Current work: finish session persistence.",
24 "- Current work: finish session persistence.",
25 ]
26 )
27
28 result = compress_summary(summary, budget=SummaryCompressionBudget())
29
30 assert result.removed_duplicate_lines == 2
31 assert "- Scope: compact earlier messages." in result.summary
32 assert " compact earlier" not in result.summary
33
34
35 def test_compact_session_messages_preserves_recent_messages() -> None:
36 messages = [
37 Message(role=Role.USER, content="First task framing"),
38 Message(role=Role.ASSISTANT, content="Initial plan"),
39 Message(role=Role.USER, content="Focus on runtime quality"),
40 Message(role=Role.ASSISTANT, content="Tracked updated files"),
41 Message(role=Role.USER, content="Verify the result"),
42 Message(role=Role.ASSISTANT, content="Verification passed"),
43 ]
44
45 result = compact_session_messages(
46 messages,
47 keep_last_messages=4,
48 current_task="Improve Loader runtime continuity",
49 )
50
51 assert result is not None
52 assert result.removed_message_count == 2
53 assert [message.content for message in result.messages[-4:]] == [
54 message.content for message in messages[-4:]
55 ]
56 assert result.messages[0].content.startswith("[COMPACTED CONTEXT]")
57 assert "Continuation instructions:" in result.messages[0].content
58
59
60 def test_build_session_summary_skips_nested_compacted_context_content() -> None:
61 messages = [
62 Message(
63 role=Role.USER,
64 content=(
65 "[COMPACTED CONTEXT]\nConversation summary:\n"
66 "- Scope: older work\n- Current work: old state"
67 ),
68 ),
69 Message(role=Role.ASSISTANT, content="Read the chapter index."),
70 Message(role=Role.USER, content="Update the chapter links."),
71 ]
72
73 summary = build_session_summary(
74 messages,
75 previous_summary="[COMPACTED CONTEXT]\nConversation summary:\n- Scope: older work",
76 current_task="Repair the table of contents links",
77 )
78
79 assert "Recent user requests: [COMPACTED CONTEXT]" not in summary
80 assert "Pending work: [COMPACTED CONTEXT]" not in summary
81 assert "- Previously compacted context retained." in summary
82
83
84 def test_build_session_summary_preserves_confirmed_facts_and_next_step() -> None:
85 messages = [
86 Message(
87 role=Role.TOOL,
88 content=(
89 "Observation [notepad_write_working]: Result: "
90 "02-basic-syntax.html -> 02-setup.html\n"
91 "03-variables-data-types.html -> 03-basics.html"
92 ),
93 ),
94 Message(
95 role=Role.ASSISTANT,
96 content="Checking the index before editing it.",
97 tool_calls=[
98 ToolCall(
99 id="read-1",
100 name="read",
101 arguments={"file_path": "~/Loader/guides/fortran/index.html"},
102 )
103 ],
104 ),
105 Message(
106 role=Role.ASSISTANT,
107 content="Inspecting the setup chapter title.",
108 tool_calls=[
109 ToolCall(
110 id="read-2",
111 name="read",
112 arguments={"file_path": "~/Loader/guides/fortran/chapters/02-setup.html"},
113 )
114 ],
115 ),
116 Message.tool_result_message(
117 tool_call_id="read-2",
118 display_content=(
119 " 1\t<!DOCTYPE html>\n"
120 " 2\t<html>\n"
121 " 61\t<h1>Chapter 2: Setting Up Fortran</h1>\n"
122 " 62\t</html>\n"
123 ),
124 result_content=(
125 " 1\t<!DOCTYPE html>\n"
126 " 2\t<html>\n"
127 " 61\t<h1>Chapter 2: Setting Up Fortran</h1>\n"
128 " 62\t</html>\n"
129 ),
130 ),
131 Message(
132 role=Role.TOOL,
133 content=(
134 "Observation [glob]: Result: "
135 "/Users/mfwolffe/Loader/guides/fortran/chapters/01-introduction.html\n"
136 "/Users/mfwolffe/Loader/guides/fortran/chapters/02-setup.html\n"
137 "/Users/mfwolffe/Loader/guides/fortran/chapters/03-basics.html\n"
138 "/Users/mfwolffe/Loader/guides/fortran/chapters/04-variables.html"
139 ),
140 ),
141 ]
142
143 summary = build_session_summary(
144 messages,
145 current_task=(
146 "Update ~/Loader/guides/fortran/index.html with the correct chapter links."
147 ),
148 )
149
150 assert "Confirmed facts:" in summary
151 assert "02-basic-syntax.html -> 02-setup.html" in summary
152 assert "02-setup.html = Chapter 2: Setting Up Fortran" not in summary
153 assert "Preferred next step:" in summary
154 assert "`~/Loader/guides/fortran/index.html`" in summary
155
156
157 def test_summarize_confirmed_facts_ignores_reference_chapter_title_reads() -> None:
158 messages = [
159 Message(
160 role=Role.ASSISTANT,
161 content="I will inspect the chapter files.",
162 tool_calls=[
163 ToolCall(
164 id="read-1",
165 name="read",
166 arguments={"file_path": "/tmp/fortran/chapters/01-introduction.html"},
167 ),
168 ToolCall(
169 id="read-2",
170 name="read",
171 arguments={"file_path": "/tmp/fortran/chapters/02-setup.html"},
172 ),
173 ],
174 ),
175 Message.tool_result_message(
176 tool_call_id="read-1",
177 display_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
178 result_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
179 ),
180 Message.tool_result_message(
181 tool_call_id="read-2",
182 display_content="<title>Chapter 2: Setting Up Fortran</title>\n",
183 result_content="<title>Chapter 2: Setting Up Fortran</title>\n",
184 ),
185 ]
186
187 confirmed_facts = summarize_confirmed_facts(messages, max_items=2)
188
189 assert confirmed_facts is None
190
191
192 def test_infer_preferred_next_step_uses_confirmed_chapter_pairs() -> None:
193 messages = [
194 Message(
195 role=Role.ASSISTANT,
196 content="I should inspect the chapter and then update the index.",
197 tool_calls=[
198 ToolCall(
199 id="read-index",
200 name="read",
201 arguments={"file_path": "/tmp/fortran/index.html"},
202 ),
203 ToolCall(
204 id="read-1",
205 name="read",
206 arguments={"file_path": "/tmp/fortran/chapters/01-introduction.html"},
207 ),
208 ],
209 ),
210 Message.tool_result_message(
211 tool_call_id="read-1",
212 display_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
213 result_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
214 ),
215 ]
216
217 next_step = infer_preferred_next_step(
218 messages,
219 current_task="Update /tmp/fortran/index.html so the chapter list matches the real files.",
220 )
221
222 assert next_step is None
223
224
225 def test_infer_preferred_next_step_uses_latest_verification_gap() -> None:
226 messages = [
227 Message(
228 role=Role.ASSISTANT,
229 content="I should inspect the chapter and then update the index.",
230 tool_calls=[
231 ToolCall(
232 id="read-index",
233 name="read",
234 arguments={"file_path": "/tmp/fortran/index.html"},
235 ),
236 ToolCall(
237 id="read-1",
238 name="read",
239 arguments={"file_path": "/tmp/fortran/chapters/01-introduction.html"},
240 ),
241 ToolCall(
242 id="verify-1",
243 name="bash",
244 arguments={"command": "python3 - <<'PY'\n...\nPY"},
245 ),
246 ],
247 ),
248 Message.tool_result_message(
249 tool_call_id="read-1",
250 display_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
251 result_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
252 ),
253 Message.tool_result_message(
254 tool_call_id="verify-1",
255 display_content=(
256 "Missing links:\n"
257 "chapters/05-control-structures.html -> missing\n"
258 "chapters/06-input-output.html -> missing\n"
259 ),
260 result_content=(
261 "Missing links:\n"
262 "chapters/05-control-structures.html -> missing\n"
263 "chapters/06-input-output.html -> missing\n"
264 ),
265 is_error=True,
266 ),
267 ]
268
269 confirmed_facts = summarize_confirmed_facts(messages, max_items=2)
270 next_step = infer_preferred_next_step(
271 messages,
272 current_task="Update /tmp/fortran/index.html so the chapter list matches the real files.",
273 )
274
275 assert confirmed_facts is None
276 assert next_step is None
277
278
279 def test_compact_session_messages_uses_single_continuation_instruction_block() -> None:
280 messages = [
281 Message(role=Role.USER, content="Task framing"),
282 Message(role=Role.ASSISTANT, content="Initial plan"),
283 Message(role=Role.USER, content="Keep going"),
284 Message(role=Role.ASSISTANT, content="Still working"),
285 Message(role=Role.USER, content="Use the known mapping"),
286 ]
287
288 result = compact_session_messages(
289 messages,
290 keep_last_messages=2,
291 current_task="Repair the table of contents links",
292 )
293
294 assert result is not None
295 assert result.messages[0].content.count("Continuation instructions:") == 1
296
297
298 def test_resolve_auto_compaction_threshold_uses_context_window_as_upper_bound() -> None:
299 assert resolve_auto_compaction_input_tokens_threshold(
300 100_000,
301 context_window=131_072,
302 ) == 98_304
303 assert resolve_auto_compaction_input_tokens_threshold(
304 100_000,
305 context_window=262_144,
306 ) == 100_000
307 assert resolve_auto_compaction_input_tokens_threshold(
308 100_000,
309 context_window=8_192,
310 ) == 12_000