| 1 | """Tests for transcript compaction and summary compression.""" |
| 2 | |
| 3 | from __future__ import annotations |
| 4 | |
| 5 | from loader.llm.base import Message, Role, ToolCall |
| 6 | from loader.runtime.compaction import ( |
| 7 | SummaryCompressionBudget, |
| 8 | build_session_summary, |
| 9 | compact_session_messages, |
| 10 | compress_summary, |
| 11 | infer_preferred_next_step, |
| 12 | resolve_auto_compaction_input_tokens_threshold, |
| 13 | summarize_confirmed_facts, |
| 14 | ) |
| 15 | |
| 16 | |
| 17 | def test_compress_summary_dedupes_lines_and_collapses_whitespace() -> None: |
| 18 | summary = "\n".join( |
| 19 | [ |
| 20 | "Conversation summary:", |
| 21 | "- Scope: compact earlier messages.", |
| 22 | "- Scope: compact earlier messages.", |
| 23 | "- Current work: finish session persistence.", |
| 24 | "- Current work: finish session persistence.", |
| 25 | ] |
| 26 | ) |
| 27 | |
| 28 | result = compress_summary(summary, budget=SummaryCompressionBudget()) |
| 29 | |
| 30 | assert result.removed_duplicate_lines == 2 |
| 31 | assert "- Scope: compact earlier messages." in result.summary |
| 32 | assert " compact earlier" not in result.summary |
| 33 | |
| 34 | |
| 35 | def test_compact_session_messages_preserves_recent_messages() -> None: |
| 36 | messages = [ |
| 37 | Message(role=Role.USER, content="First task framing"), |
| 38 | Message(role=Role.ASSISTANT, content="Initial plan"), |
| 39 | Message(role=Role.USER, content="Focus on runtime quality"), |
| 40 | Message(role=Role.ASSISTANT, content="Tracked updated files"), |
| 41 | Message(role=Role.USER, content="Verify the result"), |
| 42 | Message(role=Role.ASSISTANT, content="Verification passed"), |
| 43 | ] |
| 44 | |
| 45 | result = compact_session_messages( |
| 46 | messages, |
| 47 | keep_last_messages=4, |
| 48 | current_task="Improve Loader runtime continuity", |
| 49 | ) |
| 50 | |
| 51 | assert result is not None |
| 52 | assert result.removed_message_count == 2 |
| 53 | assert [message.content for message in result.messages[-4:]] == [ |
| 54 | message.content for message in messages[-4:] |
| 55 | ] |
| 56 | assert result.messages[0].content.startswith("[COMPACTED CONTEXT]") |
| 57 | assert "Continuation instructions:" in result.messages[0].content |
| 58 | assert ( |
| 59 | "authoritative over older summaries or durable memory notes" |
| 60 | in result.messages[0].content |
| 61 | ) |
| 62 | |
| 63 | |
| 64 | def test_compact_session_messages_includes_active_dod_summary() -> None: |
| 65 | messages = [ |
| 66 | Message(role=Role.USER, content="Create the generated guide."), |
| 67 | Message(role=Role.ASSISTANT, content="Wrote the guide files."), |
| 68 | Message( |
| 69 | role=Role.TOOL, |
| 70 | content="Observation [notepad_read]: Result: guide complete", |
| 71 | ), |
| 72 | Message(role=Role.ASSISTANT, content="I will finish."), |
| 73 | Message(role=Role.USER, content="Continue repairing."), |
| 74 | ] |
| 75 | |
| 76 | result = compact_session_messages( |
| 77 | messages, |
| 78 | keep_last_messages=2, |
| 79 | current_task="Create the generated guide.", |
| 80 | active_dod_summary=( |
| 81 | "status=fixing; last verification=failed; " |
| 82 | "latest failed verifier=/tmp/guide/chapter.html: thin content" |
| 83 | ), |
| 84 | ) |
| 85 | |
| 86 | assert result is not None |
| 87 | assert "- Active DoD: status=fixing; last verification=failed;" in result.summary |
| 88 | assert "/tmp/guide/chapter.html: thin content" in result.summary |
| 89 | |
| 90 | |
| 91 | def test_build_session_summary_skips_nested_compacted_context_content() -> None: |
| 92 | messages = [ |
| 93 | Message( |
| 94 | role=Role.USER, |
| 95 | content=( |
| 96 | "[COMPACTED CONTEXT]\nConversation summary:\n" |
| 97 | "- Scope: older work\n- Current work: old state" |
| 98 | ), |
| 99 | ), |
| 100 | Message(role=Role.ASSISTANT, content="Read the chapter index."), |
| 101 | Message(role=Role.USER, content="Update the chapter links."), |
| 102 | ] |
| 103 | |
| 104 | summary = build_session_summary( |
| 105 | messages, |
| 106 | previous_summary="[COMPACTED CONTEXT]\nConversation summary:\n- Scope: older work", |
| 107 | current_task="Repair the table of contents links", |
| 108 | ) |
| 109 | |
| 110 | assert "Recent user requests: [COMPACTED CONTEXT]" not in summary |
| 111 | assert "Pending work: [COMPACTED CONTEXT]" not in summary |
| 112 | assert "- Previously compacted context retained." in summary |
| 113 | |
| 114 | |
| 115 | def test_build_session_summary_preserves_confirmed_facts_and_next_step() -> None: |
| 116 | messages = [ |
| 117 | Message( |
| 118 | role=Role.TOOL, |
| 119 | content=( |
| 120 | "Observation [notepad_write_working]: Result: " |
| 121 | "02-basic-syntax.html -> 02-setup.html\n" |
| 122 | "03-variables-data-types.html -> 03-basics.html" |
| 123 | ), |
| 124 | ), |
| 125 | Message( |
| 126 | role=Role.ASSISTANT, |
| 127 | content="Checking the index before editing it.", |
| 128 | tool_calls=[ |
| 129 | ToolCall( |
| 130 | id="read-1", |
| 131 | name="read", |
| 132 | arguments={"file_path": "~/Loader/guides/fortran/index.html"}, |
| 133 | ) |
| 134 | ], |
| 135 | ), |
| 136 | Message( |
| 137 | role=Role.ASSISTANT, |
| 138 | content="Inspecting the setup chapter title.", |
| 139 | tool_calls=[ |
| 140 | ToolCall( |
| 141 | id="read-2", |
| 142 | name="read", |
| 143 | arguments={"file_path": "~/Loader/guides/fortran/chapters/02-setup.html"}, |
| 144 | ) |
| 145 | ], |
| 146 | ), |
| 147 | Message.tool_result_message( |
| 148 | tool_call_id="read-2", |
| 149 | display_content=( |
| 150 | " 1\t<!DOCTYPE html>\n" |
| 151 | " 2\t<html>\n" |
| 152 | " 61\t<h1>Chapter 2: Setting Up Fortran</h1>\n" |
| 153 | " 62\t</html>\n" |
| 154 | ), |
| 155 | result_content=( |
| 156 | " 1\t<!DOCTYPE html>\n" |
| 157 | " 2\t<html>\n" |
| 158 | " 61\t<h1>Chapter 2: Setting Up Fortran</h1>\n" |
| 159 | " 62\t</html>\n" |
| 160 | ), |
| 161 | ), |
| 162 | Message( |
| 163 | role=Role.TOOL, |
| 164 | content=( |
| 165 | "Observation [glob]: Result: " |
| 166 | "/Users/mfwolffe/Loader/guides/fortran/chapters/01-introduction.html\n" |
| 167 | "/Users/mfwolffe/Loader/guides/fortran/chapters/02-setup.html\n" |
| 168 | "/Users/mfwolffe/Loader/guides/fortran/chapters/03-basics.html\n" |
| 169 | "/Users/mfwolffe/Loader/guides/fortran/chapters/04-variables.html" |
| 170 | ), |
| 171 | ), |
| 172 | ] |
| 173 | |
| 174 | summary = build_session_summary( |
| 175 | messages, |
| 176 | current_task=( |
| 177 | "Update ~/Loader/guides/fortran/index.html with the correct chapter links." |
| 178 | ), |
| 179 | ) |
| 180 | |
| 181 | assert "Confirmed facts:" in summary |
| 182 | assert "02-basic-syntax.html -> 02-setup.html" in summary |
| 183 | assert "02-setup.html = Chapter 2: Setting Up Fortran" not in summary |
| 184 | assert "Preferred next step:" in summary |
| 185 | assert "`~/Loader/guides/fortran/index.html`" in summary |
| 186 | |
| 187 | |
| 188 | def test_summarize_confirmed_facts_ignores_reference_chapter_title_reads() -> None: |
| 189 | messages = [ |
| 190 | Message( |
| 191 | role=Role.ASSISTANT, |
| 192 | content="I will inspect the chapter files.", |
| 193 | tool_calls=[ |
| 194 | ToolCall( |
| 195 | id="read-1", |
| 196 | name="read", |
| 197 | arguments={"file_path": "/tmp/fortran/chapters/01-introduction.html"}, |
| 198 | ), |
| 199 | ToolCall( |
| 200 | id="read-2", |
| 201 | name="read", |
| 202 | arguments={"file_path": "/tmp/fortran/chapters/02-setup.html"}, |
| 203 | ), |
| 204 | ], |
| 205 | ), |
| 206 | Message.tool_result_message( |
| 207 | tool_call_id="read-1", |
| 208 | display_content="<h1>Chapter 1: Introduction to Fortran</h1>\n", |
| 209 | result_content="<h1>Chapter 1: Introduction to Fortran</h1>\n", |
| 210 | ), |
| 211 | Message.tool_result_message( |
| 212 | tool_call_id="read-2", |
| 213 | display_content="<title>Chapter 2: Setting Up Fortran</title>\n", |
| 214 | result_content="<title>Chapter 2: Setting Up Fortran</title>\n", |
| 215 | ), |
| 216 | ] |
| 217 | |
| 218 | confirmed_facts = summarize_confirmed_facts(messages, max_items=2) |
| 219 | |
| 220 | assert confirmed_facts is None |
| 221 | |
| 222 | |
| 223 | def test_infer_preferred_next_step_uses_confirmed_chapter_pairs() -> None: |
| 224 | messages = [ |
| 225 | Message( |
| 226 | role=Role.ASSISTANT, |
| 227 | content="I should inspect the chapter and then update the index.", |
| 228 | tool_calls=[ |
| 229 | ToolCall( |
| 230 | id="read-index", |
| 231 | name="read", |
| 232 | arguments={"file_path": "/tmp/fortran/index.html"}, |
| 233 | ), |
| 234 | ToolCall( |
| 235 | id="read-1", |
| 236 | name="read", |
| 237 | arguments={"file_path": "/tmp/fortran/chapters/01-introduction.html"}, |
| 238 | ), |
| 239 | ], |
| 240 | ), |
| 241 | Message.tool_result_message( |
| 242 | tool_call_id="read-1", |
| 243 | display_content="<h1>Chapter 1: Introduction to Fortran</h1>\n", |
| 244 | result_content="<h1>Chapter 1: Introduction to Fortran</h1>\n", |
| 245 | ), |
| 246 | ] |
| 247 | |
| 248 | next_step = infer_preferred_next_step( |
| 249 | messages, |
| 250 | current_task="Update /tmp/fortran/index.html so the chapter list matches the real files.", |
| 251 | ) |
| 252 | |
| 253 | assert next_step is None |
| 254 | |
| 255 | |
| 256 | def test_infer_preferred_next_step_uses_latest_verification_gap() -> None: |
| 257 | messages = [ |
| 258 | Message( |
| 259 | role=Role.ASSISTANT, |
| 260 | content="I should inspect the chapter and then update the index.", |
| 261 | tool_calls=[ |
| 262 | ToolCall( |
| 263 | id="read-index", |
| 264 | name="read", |
| 265 | arguments={"file_path": "/tmp/fortran/index.html"}, |
| 266 | ), |
| 267 | ToolCall( |
| 268 | id="read-1", |
| 269 | name="read", |
| 270 | arguments={"file_path": "/tmp/fortran/chapters/01-introduction.html"}, |
| 271 | ), |
| 272 | ToolCall( |
| 273 | id="verify-1", |
| 274 | name="bash", |
| 275 | arguments={"command": "python3 - <<'PY'\n...\nPY"}, |
| 276 | ), |
| 277 | ], |
| 278 | ), |
| 279 | Message.tool_result_message( |
| 280 | tool_call_id="read-1", |
| 281 | display_content="<h1>Chapter 1: Introduction to Fortran</h1>\n", |
| 282 | result_content="<h1>Chapter 1: Introduction to Fortran</h1>\n", |
| 283 | ), |
| 284 | Message.tool_result_message( |
| 285 | tool_call_id="verify-1", |
| 286 | display_content=( |
| 287 | "Missing links:\n" |
| 288 | "chapters/05-control-structures.html -> missing\n" |
| 289 | "chapters/06-input-output.html -> missing\n" |
| 290 | ), |
| 291 | result_content=( |
| 292 | "Missing links:\n" |
| 293 | "chapters/05-control-structures.html -> missing\n" |
| 294 | "chapters/06-input-output.html -> missing\n" |
| 295 | ), |
| 296 | is_error=True, |
| 297 | ), |
| 298 | ] |
| 299 | |
| 300 | confirmed_facts = summarize_confirmed_facts(messages, max_items=2) |
| 301 | next_step = infer_preferred_next_step( |
| 302 | messages, |
| 303 | current_task="Update /tmp/fortran/index.html so the chapter list matches the real files.", |
| 304 | ) |
| 305 | |
| 306 | assert confirmed_facts is None |
| 307 | assert next_step is None |
| 308 | |
| 309 | |
| 310 | def test_compact_session_messages_uses_single_continuation_instruction_block() -> None: |
| 311 | messages = [ |
| 312 | Message(role=Role.USER, content="Task framing"), |
| 313 | Message(role=Role.ASSISTANT, content="Initial plan"), |
| 314 | Message(role=Role.USER, content="Keep going"), |
| 315 | Message(role=Role.ASSISTANT, content="Still working"), |
| 316 | Message(role=Role.USER, content="Use the known mapping"), |
| 317 | ] |
| 318 | |
| 319 | result = compact_session_messages( |
| 320 | messages, |
| 321 | keep_last_messages=2, |
| 322 | current_task="Repair the table of contents links", |
| 323 | ) |
| 324 | |
| 325 | assert result is not None |
| 326 | assert result.messages[0].content.count("Continuation instructions:") == 1 |
| 327 | |
| 328 | |
| 329 | def test_resolve_auto_compaction_threshold_uses_context_window_as_upper_bound() -> None: |
| 330 | assert resolve_auto_compaction_input_tokens_threshold( |
| 331 | 100_000, |
| 332 | context_window=131_072, |
| 333 | ) == 98_304 |
| 334 | assert resolve_auto_compaction_input_tokens_threshold( |
| 335 | 100_000, |
| 336 | context_window=262_144, |
| 337 | ) == 100_000 |
| 338 | assert resolve_auto_compaction_input_tokens_threshold( |
| 339 | 100_000, |
| 340 | context_window=8_192, |
| 341 | ) == 6_144 |
| 342 | assert resolve_auto_compaction_input_tokens_threshold( |
| 343 | 100_000, |
| 344 | context_window=16_000, |
| 345 | ) == 12_000 |