loader Public

Watch 0 Fork 0 Star 0

Python · 10901 bytes Raw Blame History

  
        1
        """Tests for transcript compaction and summary compression."""
      
        2
        
        3
        from __future__ import annotations
      
        4
        
        5
        from loader.llm.base import Message, Role, ToolCall
      
        6
        from loader.runtime.compaction import (
      
        7
            SummaryCompressionBudget,
      
        8
            build_session_summary,
      
        9
            compact_session_messages,
      
        10
            compress_summary,
      
        11
            infer_preferred_next_step,
      
        12
            resolve_auto_compaction_input_tokens_threshold,
      
        13
            summarize_confirmed_facts,
      
        14
        )
      
        15
        
        16
        
        17
        def test_compress_summary_dedupes_lines_and_collapses_whitespace() -> None:
      
        18
            summary = "\n".join(
      
        19
                [
      
        20
                    "Conversation summary:",
      
        21
                    "- Scope:   compact   earlier   messages.",
      
        22
                    "- Scope: compact earlier messages.",
      
        23
                    "- Current work: finish session persistence.",
      
        24
                    "- Current work: finish session persistence.",
      
        25
                ]
      
        26
            )
      
        27
        
        28
            result = compress_summary(summary, budget=SummaryCompressionBudget())
      
        29
        
        30
            assert result.removed_duplicate_lines == 2
      
        31
            assert "- Scope: compact earlier messages." in result.summary
      
        32
            assert "  compact   earlier" not in result.summary
      
        33
        
        34
        
        35
        def test_compact_session_messages_preserves_recent_messages() -> None:
      
        36
            messages = [
      
        37
                Message(role=Role.USER, content="First task framing"),
      
        38
                Message(role=Role.ASSISTANT, content="Initial plan"),
      
        39
                Message(role=Role.USER, content="Focus on runtime quality"),
      
        40
                Message(role=Role.ASSISTANT, content="Tracked updated files"),
      
        41
                Message(role=Role.USER, content="Verify the result"),
      
        42
                Message(role=Role.ASSISTANT, content="Verification passed"),
      
        43
            ]
      
        44
        
        45
            result = compact_session_messages(
      
        46
                messages,
      
        47
                keep_last_messages=4,
      
        48
                current_task="Improve Loader runtime continuity",
      
        49
            )
      
        50
        
        51
            assert result is not None
      
        52
            assert result.removed_message_count == 2
      
        53
            assert [message.content for message in result.messages[-4:]] == [
      
        54
                message.content for message in messages[-4:]
      
        55
            ]
      
        56
            assert result.messages[0].content.startswith("[COMPACTED CONTEXT]")
      
        57
            assert "Continuation instructions:" in result.messages[0].content
      
        58
        
        59
        
        60
        def test_build_session_summary_skips_nested_compacted_context_content() -> None:
      
        61
            messages = [
      
        62
                Message(
      
        63
                    role=Role.USER,
      
        64
                    content=(
      
        65
                        "[COMPACTED CONTEXT]\nConversation summary:\n"
      
        66
                        "- Scope: older work\n- Current work: old state"
      
        67
                    ),
      
        68
                ),
      
        69
                Message(role=Role.ASSISTANT, content="Read the chapter index."),
      
        70
                Message(role=Role.USER, content="Update the chapter links."),
      
        71
            ]
      
        72
        
        73
            summary = build_session_summary(
      
        74
                messages,
      
        75
                previous_summary="[COMPACTED CONTEXT]\nConversation summary:\n- Scope: older work",
      
        76
                current_task="Repair the table of contents links",
      
        77
            )
      
        78
        
        79
            assert "Recent user requests: [COMPACTED CONTEXT]" not in summary
      
        80
            assert "Pending work: [COMPACTED CONTEXT]" not in summary
      
        81
            assert "- Previously compacted context retained." in summary
      
        82
        
        83
        
        84
        def test_build_session_summary_preserves_confirmed_facts_and_next_step() -> None:
      
        85
            messages = [
      
        86
                Message(
      
        87
                    role=Role.TOOL,
      
        88
                    content=(
      
        89
                        "Observation [notepad_write_working]: Result: "
      
        90
                        "02-basic-syntax.html -> 02-setup.html\n"
      
        91
                        "03-variables-data-types.html -> 03-basics.html"
      
        92
                    ),
      
        93
                ),
      
        94
                Message(
      
        95
                    role=Role.ASSISTANT,
      
        96
                    content="Checking the index before editing it.",
      
        97
                    tool_calls=[
      
        98
                        ToolCall(
      
        99
                            id="read-1",
      
        100
                            name="read",
      
        101
                            arguments={"file_path": "~/Loader/guides/fortran/index.html"},
      
        102
                        )
      
        103
                    ],
      
        104
                ),
      
        105
                Message(
      
        106
                    role=Role.ASSISTANT,
      
        107
                    content="Inspecting the setup chapter title.",
      
        108
                    tool_calls=[
      
        109
                        ToolCall(
      
        110
                            id="read-2",
      
        111
                            name="read",
      
        112
                            arguments={"file_path": "~/Loader/guides/fortran/chapters/02-setup.html"},
      
        113
                        )
      
        114
                    ],
      
        115
                ),
      
        116
                Message.tool_result_message(
      
        117
                    tool_call_id="read-2",
      
        118
                    display_content=(
      
        119
                        "   1\t<!DOCTYPE html>\n"
      
        120
                        "   2\t<html>\n"
      
        121
                        "  61\t<h1>Chapter 2: Setting Up Fortran</h1>\n"
      
        122
                        "  62\t</html>\n"
      
        123
                    ),
      
        124
                    result_content=(
      
        125
                        "   1\t<!DOCTYPE html>\n"
      
        126
                        "   2\t<html>\n"
      
        127
                        "  61\t<h1>Chapter 2: Setting Up Fortran</h1>\n"
      
        128
                        "  62\t</html>\n"
      
        129
                    ),
      
        130
                ),
      
        131
                Message(
      
        132
                    role=Role.TOOL,
      
        133
                    content=(
      
        134
                        "Observation [glob]: Result: "
      
        135
                        "/Users/mfwolffe/Loader/guides/fortran/chapters/01-introduction.html\n"
      
        136
                        "/Users/mfwolffe/Loader/guides/fortran/chapters/02-setup.html\n"
      
        137
                        "/Users/mfwolffe/Loader/guides/fortran/chapters/03-basics.html\n"
      
        138
                        "/Users/mfwolffe/Loader/guides/fortran/chapters/04-variables.html"
      
        139
                    ),
      
        140
                ),
      
        141
            ]
      
        142
        
        143
            summary = build_session_summary(
      
        144
                messages,
      
        145
                current_task=(
      
        146
                    "Update ~/Loader/guides/fortran/index.html with the correct chapter links."
      
        147
                ),
      
        148
            )
      
        149
        
        150
            assert "Confirmed facts:" in summary
      
        151
            assert "02-basic-syntax.html -> 02-setup.html" in summary
      
        152
            assert "02-setup.html = Chapter 2: Setting Up Fortran" not in summary
      
        153
            assert "Preferred next step:" in summary
      
        154
            assert "`~/Loader/guides/fortran/index.html`" in summary
      
        155
        
        156
        
        157
        def test_summarize_confirmed_facts_ignores_reference_chapter_title_reads() -> None:
      
        158
            messages = [
      
        159
                Message(
      
        160
                    role=Role.ASSISTANT,
      
        161
                    content="I will inspect the chapter files.",
      
        162
                    tool_calls=[
      
        163
                        ToolCall(
      
        164
                            id="read-1",
      
        165
                            name="read",
      
        166
                            arguments={"file_path": "/tmp/fortran/chapters/01-introduction.html"},
      
        167
                        ),
      
        168
                        ToolCall(
      
        169
                            id="read-2",
      
        170
                            name="read",
      
        171
                            arguments={"file_path": "/tmp/fortran/chapters/02-setup.html"},
      
        172
                        ),
      
        173
                    ],
      
        174
                ),
      
        175
                Message.tool_result_message(
      
        176
                    tool_call_id="read-1",
      
        177
                    display_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
      
        178
                    result_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
      
        179
                ),
      
        180
                Message.tool_result_message(
      
        181
                    tool_call_id="read-2",
      
        182
                    display_content="<title>Chapter 2: Setting Up Fortran</title>\n",
      
        183
                    result_content="<title>Chapter 2: Setting Up Fortran</title>\n",
      
        184
                ),
      
        185
            ]
      
        186
        
        187
            confirmed_facts = summarize_confirmed_facts(messages, max_items=2)
      
        188
        
        189
            assert confirmed_facts is None
      
        190
        
        191
        
        192
        def test_infer_preferred_next_step_uses_confirmed_chapter_pairs() -> None:
      
        193
            messages = [
      
        194
                Message(
      
        195
                    role=Role.ASSISTANT,
      
        196
                    content="I should inspect the chapter and then update the index.",
      
        197
                    tool_calls=[
      
        198
                        ToolCall(
      
        199
                            id="read-index",
      
        200
                            name="read",
      
        201
                            arguments={"file_path": "/tmp/fortran/index.html"},
      
        202
                        ),
      
        203
                        ToolCall(
      
        204
                            id="read-1",
      
        205
                            name="read",
      
        206
                            arguments={"file_path": "/tmp/fortran/chapters/01-introduction.html"},
      
        207
                        ),
      
        208
                    ],
      
        209
                ),
      
        210
                Message.tool_result_message(
      
        211
                    tool_call_id="read-1",
      
        212
                    display_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
      
        213
                    result_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
      
        214
                ),
      
        215
            ]
      
        216
        
        217
            next_step = infer_preferred_next_step(
      
        218
                messages,
      
        219
                current_task="Update /tmp/fortran/index.html so the chapter list matches the real files.",
      
        220
            )
      
        221
        
        222
            assert next_step is None
      
        223
        
        224
        
        225
        def test_infer_preferred_next_step_uses_latest_verification_gap() -> None:
      
        226
            messages = [
      
        227
                Message(
      
        228
                    role=Role.ASSISTANT,
      
        229
                    content="I should inspect the chapter and then update the index.",
      
        230
                    tool_calls=[
      
        231
                        ToolCall(
      
        232
                            id="read-index",
      
        233
                            name="read",
      
        234
                            arguments={"file_path": "/tmp/fortran/index.html"},
      
        235
                        ),
      
        236
                        ToolCall(
      
        237
                            id="read-1",
      
        238
                            name="read",
      
        239
                            arguments={"file_path": "/tmp/fortran/chapters/01-introduction.html"},
      
        240
                        ),
      
        241
                        ToolCall(
      
        242
                            id="verify-1",
      
        243
                            name="bash",
      
        244
                            arguments={"command": "python3 - <<'PY'\n...\nPY"},
      
        245
                        ),
      
        246
                    ],
      
        247
                ),
      
        248
                Message.tool_result_message(
      
        249
                    tool_call_id="read-1",
      
        250
                    display_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
      
        251
                    result_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
      
        252
                ),
      
        253
                Message.tool_result_message(
      
        254
                    tool_call_id="verify-1",
      
        255
                    display_content=(
      
        256
                        "Missing links:\n"
      
        257
                        "chapters/05-control-structures.html -> missing\n"
      
        258
                        "chapters/06-input-output.html -> missing\n"
      
        259
                    ),
      
        260
                    result_content=(
      
        261
                        "Missing links:\n"
      
        262
                        "chapters/05-control-structures.html -> missing\n"
      
        263
                        "chapters/06-input-output.html -> missing\n"
      
        264
                    ),
      
        265
                    is_error=True,
      
        266
                ),
      
        267
            ]
      
        268
        
        269
            confirmed_facts = summarize_confirmed_facts(messages, max_items=2)
      
        270
            next_step = infer_preferred_next_step(
      
        271
                messages,
      
        272
                current_task="Update /tmp/fortran/index.html so the chapter list matches the real files.",
      
        273
            )
      
        274
        
        275
            assert confirmed_facts is None
      
        276
            assert next_step is None
      
        277
        
        278
        
        279
        def test_compact_session_messages_uses_single_continuation_instruction_block() -> None:
      
        280
            messages = [
      
        281
                Message(role=Role.USER, content="Task framing"),
      
        282
                Message(role=Role.ASSISTANT, content="Initial plan"),
      
        283
                Message(role=Role.USER, content="Keep going"),
      
        284
                Message(role=Role.ASSISTANT, content="Still working"),
      
        285
                Message(role=Role.USER, content="Use the known mapping"),
      
        286
            ]
      
        287
        
        288
            result = compact_session_messages(
      
        289
                messages,
      
        290
                keep_last_messages=2,
      
        291
                current_task="Repair the table of contents links",
      
        292
            )
      
        293
        
        294
            assert result is not None
      
        295
            assert result.messages[0].content.count("Continuation instructions:") == 1
      
        296
        
        297
        
        298
        def test_resolve_auto_compaction_threshold_uses_context_window_as_upper_bound() -> None:
      
        299
            assert resolve_auto_compaction_input_tokens_threshold(
      
        300
                100_000,
      
        301
                context_window=131_072,
      
        302
            ) == 98_304
      
        303
            assert resolve_auto_compaction_input_tokens_threshold(
      
        304
                100_000,
      
        305
                context_window=262_144,
      
        306
            ) == 100_000
      
        307
            assert resolve_auto_compaction_input_tokens_threshold(
      
        308
                100_000,
      
        309
                context_window=8_192,
      
        310
            ) == 12_000

1	"""Tests for transcript compaction and summary compression."""
2
3	from __future__ import annotations
4
5	from loader.llm.base import Message, Role, ToolCall
6	from loader.runtime.compaction import (
7	SummaryCompressionBudget,
8	build_session_summary,
9	compact_session_messages,
10	compress_summary,
11	infer_preferred_next_step,
12	resolve_auto_compaction_input_tokens_threshold,
13	summarize_confirmed_facts,
14	)
15
16
17	def test_compress_summary_dedupes_lines_and_collapses_whitespace() -> None:
18	summary = "\n".join(
19	[
20	"Conversation summary:",
21	"- Scope: compact earlier messages.",
22	"- Scope: compact earlier messages.",
23	"- Current work: finish session persistence.",
24	"- Current work: finish session persistence.",
25	]
26	)
27
28	result = compress_summary(summary, budget=SummaryCompressionBudget())
29
30	assert result.removed_duplicate_lines == 2
31	assert "- Scope: compact earlier messages." in result.summary
32	assert " compact earlier" not in result.summary
33
34
35	def test_compact_session_messages_preserves_recent_messages() -> None:
36	messages = [
37	Message(role=Role.USER, content="First task framing"),
38	Message(role=Role.ASSISTANT, content="Initial plan"),
39	Message(role=Role.USER, content="Focus on runtime quality"),
40	Message(role=Role.ASSISTANT, content="Tracked updated files"),
41	Message(role=Role.USER, content="Verify the result"),
42	Message(role=Role.ASSISTANT, content="Verification passed"),
43	]
44
45	result = compact_session_messages(
46	messages,
47	keep_last_messages=4,
48	current_task="Improve Loader runtime continuity",
49	)
50
51	assert result is not None
52	assert result.removed_message_count == 2
53	assert [message.content for message in result.messages[-4:]] == [
54	message.content for message in messages[-4:]
55	]
56	assert result.messages[0].content.startswith("[COMPACTED CONTEXT]")
57	assert "Continuation instructions:" in result.messages[0].content
58
59
60	def test_build_session_summary_skips_nested_compacted_context_content() -> None:
61	messages = [
62	Message(
63	role=Role.USER,
64	content=(
65	"[COMPACTED CONTEXT]\nConversation summary:\n"
66	"- Scope: older work\n- Current work: old state"
67	),
68	),
69	Message(role=Role.ASSISTANT, content="Read the chapter index."),
70	Message(role=Role.USER, content="Update the chapter links."),
71	]
72
73	summary = build_session_summary(
74	messages,
75	previous_summary="[COMPACTED CONTEXT]\nConversation summary:\n- Scope: older work",
76	current_task="Repair the table of contents links",
77	)
78
79	assert "Recent user requests: [COMPACTED CONTEXT]" not in summary
80	assert "Pending work: [COMPACTED CONTEXT]" not in summary
81	assert "- Previously compacted context retained." in summary
82
83
84	def test_build_session_summary_preserves_confirmed_facts_and_next_step() -> None:
85	messages = [
86	Message(
87	role=Role.TOOL,
88	content=(
89	"Observation [notepad_write_working]: Result: "
90	"02-basic-syntax.html -> 02-setup.html\n"
91	"03-variables-data-types.html -> 03-basics.html"
92	),
93	),
94	Message(
95	role=Role.ASSISTANT,
96	content="Checking the index before editing it.",
97	tool_calls=[
98	ToolCall(
99	id="read-1",
100	name="read",
101	arguments={"file_path": "~/Loader/guides/fortran/index.html"},
102	)
103	],
104	),
105	Message(
106	role=Role.ASSISTANT,
107	content="Inspecting the setup chapter title.",
108	tool_calls=[
109	ToolCall(
110	id="read-2",
111	name="read",
112	arguments={"file_path": "~/Loader/guides/fortran/chapters/02-setup.html"},
113	)
114	],
115	),
116	Message.tool_result_message(
117	tool_call_id="read-2",
118	display_content=(
119	" 1\t<!DOCTYPE html>\n"
120	" 2\t<html>\n"
121	" 61\t<h1>Chapter 2: Setting Up Fortran</h1>\n"
122	" 62\t</html>\n"
123	),
124	result_content=(
125	" 1\t<!DOCTYPE html>\n"
126	" 2\t<html>\n"
127	" 61\t<h1>Chapter 2: Setting Up Fortran</h1>\n"
128	" 62\t</html>\n"
129	),
130	),
131	Message(
132	role=Role.TOOL,
133	content=(
134	"Observation [glob]: Result: "
135	"/Users/mfwolffe/Loader/guides/fortran/chapters/01-introduction.html\n"
136	"/Users/mfwolffe/Loader/guides/fortran/chapters/02-setup.html\n"
137	"/Users/mfwolffe/Loader/guides/fortran/chapters/03-basics.html\n"
138	"/Users/mfwolffe/Loader/guides/fortran/chapters/04-variables.html"
139	),
140	),
141	]
142
143	summary = build_session_summary(
144	messages,
145	current_task=(
146	"Update ~/Loader/guides/fortran/index.html with the correct chapter links."
147	),
148	)
149
150	assert "Confirmed facts:" in summary
151	assert "02-basic-syntax.html -> 02-setup.html" in summary
152	assert "02-setup.html = Chapter 2: Setting Up Fortran" not in summary
153	assert "Preferred next step:" in summary
154	assert "`~/Loader/guides/fortran/index.html`" in summary
155
156
157	def test_summarize_confirmed_facts_ignores_reference_chapter_title_reads() -> None:
158	messages = [
159	Message(
160	role=Role.ASSISTANT,
161	content="I will inspect the chapter files.",
162	tool_calls=[
163	ToolCall(
164	id="read-1",
165	name="read",
166	arguments={"file_path": "/tmp/fortran/chapters/01-introduction.html"},
167	),
168	ToolCall(
169	id="read-2",
170	name="read",
171	arguments={"file_path": "/tmp/fortran/chapters/02-setup.html"},
172	),
173	],
174	),
175	Message.tool_result_message(
176	tool_call_id="read-1",
177	display_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
178	result_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
179	),
180	Message.tool_result_message(
181	tool_call_id="read-2",
182	display_content="<title>Chapter 2: Setting Up Fortran</title>\n",
183	result_content="<title>Chapter 2: Setting Up Fortran</title>\n",
184	),
185	]
186
187	confirmed_facts = summarize_confirmed_facts(messages, max_items=2)
188
189	assert confirmed_facts is None
190
191
192	def test_infer_preferred_next_step_uses_confirmed_chapter_pairs() -> None:
193	messages = [
194	Message(
195	role=Role.ASSISTANT,
196	content="I should inspect the chapter and then update the index.",
197	tool_calls=[
198	ToolCall(
199	id="read-index",
200	name="read",
201	arguments={"file_path": "/tmp/fortran/index.html"},
202	),
203	ToolCall(
204	id="read-1",
205	name="read",
206	arguments={"file_path": "/tmp/fortran/chapters/01-introduction.html"},
207	),
208	],
209	),
210	Message.tool_result_message(
211	tool_call_id="read-1",
212	display_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
213	result_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
214	),
215	]
216
217	next_step = infer_preferred_next_step(
218	messages,
219	current_task="Update /tmp/fortran/index.html so the chapter list matches the real files.",
220	)
221
222	assert next_step is None
223
224
225	def test_infer_preferred_next_step_uses_latest_verification_gap() -> None:
226	messages = [
227	Message(
228	role=Role.ASSISTANT,
229	content="I should inspect the chapter and then update the index.",
230	tool_calls=[
231	ToolCall(
232	id="read-index",
233	name="read",
234	arguments={"file_path": "/tmp/fortran/index.html"},
235	),
236	ToolCall(
237	id="read-1",
238	name="read",
239	arguments={"file_path": "/tmp/fortran/chapters/01-introduction.html"},
240	),
241	ToolCall(
242	id="verify-1",
243	name="bash",
244	arguments={"command": "python3 - <<'PY'\n...\nPY"},
245	),
246	],
247	),
248	Message.tool_result_message(
249	tool_call_id="read-1",
250	display_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
251	result_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
252	),
253	Message.tool_result_message(
254	tool_call_id="verify-1",
255	display_content=(
256	"Missing links:\n"
257	"chapters/05-control-structures.html -> missing\n"
258	"chapters/06-input-output.html -> missing\n"
259	),
260	result_content=(
261	"Missing links:\n"
262	"chapters/05-control-structures.html -> missing\n"
263	"chapters/06-input-output.html -> missing\n"
264	),
265	is_error=True,
266	),
267	]
268
269	confirmed_facts = summarize_confirmed_facts(messages, max_items=2)
270	next_step = infer_preferred_next_step(
271	messages,
272	current_task="Update /tmp/fortran/index.html so the chapter list matches the real files.",
273	)
274
275	assert confirmed_facts is None
276	assert next_step is None
277
278
279	def test_compact_session_messages_uses_single_continuation_instruction_block() -> None:
280	messages = [
281	Message(role=Role.USER, content="Task framing"),
282	Message(role=Role.ASSISTANT, content="Initial plan"),
283	Message(role=Role.USER, content="Keep going"),
284	Message(role=Role.ASSISTANT, content="Still working"),
285	Message(role=Role.USER, content="Use the known mapping"),
286	]
287
288	result = compact_session_messages(
289	messages,
290	keep_last_messages=2,
291	current_task="Repair the table of contents links",
292	)
293
294	assert result is not None
295	assert result.messages[0].content.count("Continuation instructions:") == 1
296
297
298	def test_resolve_auto_compaction_threshold_uses_context_window_as_upper_bound() -> None:
299	assert resolve_auto_compaction_input_tokens_threshold(
300	100_000,
301	context_window=131_072,
302	) == 98_304
303	assert resolve_auto_compaction_input_tokens_threshold(
304	100_000,
305	context_window=262_144,
306	) == 100_000
307	assert resolve_auto_compaction_input_tokens_threshold(
308	100_000,
309	context_window=8_192,
310	) == 12_000