| 1 | """Tests for transcript compaction and summary compression.""" |
| 2 | |
| 3 | from __future__ import annotations |
| 4 | |
| 5 | from loader.llm.base import Message, Role, ToolCall |
| 6 | from loader.runtime.compaction import ( |
| 7 | SummaryCompressionBudget, |
| 8 | build_session_summary, |
| 9 | compact_session_messages, |
| 10 | compress_summary, |
| 11 | infer_preferred_next_step, |
| 12 | resolve_auto_compaction_input_tokens_threshold, |
| 13 | summarize_confirmed_facts, |
| 14 | ) |
| 15 | |
| 16 | |
| 17 | def test_compress_summary_dedupes_lines_and_collapses_whitespace() -> None: |
| 18 | summary = "\n".join( |
| 19 | [ |
| 20 | "Conversation summary:", |
| 21 | "- Scope: compact earlier messages.", |
| 22 | "- Scope: compact earlier messages.", |
| 23 | "- Current work: finish session persistence.", |
| 24 | "- Current work: finish session persistence.", |
| 25 | ] |
| 26 | ) |
| 27 | |
| 28 | result = compress_summary(summary, budget=SummaryCompressionBudget()) |
| 29 | |
| 30 | assert result.removed_duplicate_lines == 2 |
| 31 | assert "- Scope: compact earlier messages." in result.summary |
| 32 | assert " compact earlier" not in result.summary |
| 33 | |
| 34 | |
| 35 | def test_compact_session_messages_preserves_recent_messages() -> None: |
| 36 | messages = [ |
| 37 | Message(role=Role.USER, content="First task framing"), |
| 38 | Message(role=Role.ASSISTANT, content="Initial plan"), |
| 39 | Message(role=Role.USER, content="Focus on runtime quality"), |
| 40 | Message(role=Role.ASSISTANT, content="Tracked updated files"), |
| 41 | Message(role=Role.USER, content="Verify the result"), |
| 42 | Message(role=Role.ASSISTANT, content="Verification passed"), |
| 43 | ] |
| 44 | |
| 45 | result = compact_session_messages( |
| 46 | messages, |
| 47 | keep_last_messages=4, |
| 48 | current_task="Improve Loader runtime continuity", |
| 49 | ) |
| 50 | |
| 51 | assert result is not None |
| 52 | assert result.removed_message_count == 2 |
| 53 | assert [message.content for message in result.messages[-4:]] == [ |
| 54 | message.content for message in messages[-4:] |
| 55 | ] |
| 56 | assert result.messages[0].content.startswith("[COMPACTED CONTEXT]") |
| 57 | assert "Continuation instructions:" in result.messages[0].content |
| 58 | |
| 59 | |
| 60 | def test_build_session_summary_skips_nested_compacted_context_content() -> None: |
| 61 | messages = [ |
| 62 | Message( |
| 63 | role=Role.USER, |
| 64 | content=( |
| 65 | "[COMPACTED CONTEXT]\nConversation summary:\n" |
| 66 | "- Scope: older work\n- Current work: old state" |
| 67 | ), |
| 68 | ), |
| 69 | Message(role=Role.ASSISTANT, content="Read the chapter index."), |
| 70 | Message(role=Role.USER, content="Update the chapter links."), |
| 71 | ] |
| 72 | |
| 73 | summary = build_session_summary( |
| 74 | messages, |
| 75 | previous_summary="[COMPACTED CONTEXT]\nConversation summary:\n- Scope: older work", |
| 76 | current_task="Repair the table of contents links", |
| 77 | ) |
| 78 | |
| 79 | assert "Recent user requests: [COMPACTED CONTEXT]" not in summary |
| 80 | assert "Pending work: [COMPACTED CONTEXT]" not in summary |
| 81 | assert "- Previously compacted context retained." in summary |
| 82 | |
| 83 | |
| 84 | def test_build_session_summary_preserves_confirmed_facts_and_next_step() -> None: |
| 85 | messages = [ |
| 86 | Message( |
| 87 | role=Role.TOOL, |
| 88 | content=( |
| 89 | "Observation [notepad_write_working]: Result: " |
| 90 | "02-basic-syntax.html -> 02-setup.html\n" |
| 91 | "03-variables-data-types.html -> 03-basics.html" |
| 92 | ), |
| 93 | ), |
| 94 | Message( |
| 95 | role=Role.ASSISTANT, |
| 96 | content="Checking the index before editing it.", |
| 97 | tool_calls=[ |
| 98 | ToolCall( |
| 99 | id="read-1", |
| 100 | name="read", |
| 101 | arguments={"file_path": "~/Loader/guides/fortran/index.html"}, |
| 102 | ) |
| 103 | ], |
| 104 | ), |
| 105 | Message( |
| 106 | role=Role.ASSISTANT, |
| 107 | content="Inspecting the setup chapter title.", |
| 108 | tool_calls=[ |
| 109 | ToolCall( |
| 110 | id="read-2", |
| 111 | name="read", |
| 112 | arguments={"file_path": "~/Loader/guides/fortran/chapters/02-setup.html"}, |
| 113 | ) |
| 114 | ], |
| 115 | ), |
| 116 | Message.tool_result_message( |
| 117 | tool_call_id="read-2", |
| 118 | display_content=( |
| 119 | " 1\t<!DOCTYPE html>\n" |
| 120 | " 2\t<html>\n" |
| 121 | " 61\t<h1>Chapter 2: Setting Up Fortran</h1>\n" |
| 122 | " 62\t</html>\n" |
| 123 | ), |
| 124 | result_content=( |
| 125 | " 1\t<!DOCTYPE html>\n" |
| 126 | " 2\t<html>\n" |
| 127 | " 61\t<h1>Chapter 2: Setting Up Fortran</h1>\n" |
| 128 | " 62\t</html>\n" |
| 129 | ), |
| 130 | ), |
| 131 | Message( |
| 132 | role=Role.TOOL, |
| 133 | content=( |
| 134 | "Observation [glob]: Result: " |
| 135 | "/Users/mfwolffe/Loader/guides/fortran/chapters/01-introduction.html\n" |
| 136 | "/Users/mfwolffe/Loader/guides/fortran/chapters/02-setup.html\n" |
| 137 | "/Users/mfwolffe/Loader/guides/fortran/chapters/03-basics.html\n" |
| 138 | "/Users/mfwolffe/Loader/guides/fortran/chapters/04-variables.html" |
| 139 | ), |
| 140 | ), |
| 141 | ] |
| 142 | |
| 143 | summary = build_session_summary( |
| 144 | messages, |
| 145 | current_task=( |
| 146 | "Update ~/Loader/guides/fortran/index.html with the correct chapter links." |
| 147 | ), |
| 148 | ) |
| 149 | |
| 150 | assert "Confirmed facts:" in summary |
| 151 | assert "02-basic-syntax.html -> 02-setup.html" in summary |
| 152 | assert "02-setup.html = Chapter 2: Setting Up Fortran" not in summary |
| 153 | assert "Preferred next step:" in summary |
| 154 | assert "`~/Loader/guides/fortran/index.html`" in summary |
| 155 | |
| 156 | |
| 157 | def test_summarize_confirmed_facts_ignores_reference_chapter_title_reads() -> None: |
| 158 | messages = [ |
| 159 | Message( |
| 160 | role=Role.ASSISTANT, |
| 161 | content="I will inspect the chapter files.", |
| 162 | tool_calls=[ |
| 163 | ToolCall( |
| 164 | id="read-1", |
| 165 | name="read", |
| 166 | arguments={"file_path": "/tmp/fortran/chapters/01-introduction.html"}, |
| 167 | ), |
| 168 | ToolCall( |
| 169 | id="read-2", |
| 170 | name="read", |
| 171 | arguments={"file_path": "/tmp/fortran/chapters/02-setup.html"}, |
| 172 | ), |
| 173 | ], |
| 174 | ), |
| 175 | Message.tool_result_message( |
| 176 | tool_call_id="read-1", |
| 177 | display_content="<h1>Chapter 1: Introduction to Fortran</h1>\n", |
| 178 | result_content="<h1>Chapter 1: Introduction to Fortran</h1>\n", |
| 179 | ), |
| 180 | Message.tool_result_message( |
| 181 | tool_call_id="read-2", |
| 182 | display_content="<title>Chapter 2: Setting Up Fortran</title>\n", |
| 183 | result_content="<title>Chapter 2: Setting Up Fortran</title>\n", |
| 184 | ), |
| 185 | ] |
| 186 | |
| 187 | confirmed_facts = summarize_confirmed_facts(messages, max_items=2) |
| 188 | |
| 189 | assert confirmed_facts is None |
| 190 | |
| 191 | |
| 192 | def test_infer_preferred_next_step_uses_confirmed_chapter_pairs() -> None: |
| 193 | messages = [ |
| 194 | Message( |
| 195 | role=Role.ASSISTANT, |
| 196 | content="I should inspect the chapter and then update the index.", |
| 197 | tool_calls=[ |
| 198 | ToolCall( |
| 199 | id="read-index", |
| 200 | name="read", |
| 201 | arguments={"file_path": "/tmp/fortran/index.html"}, |
| 202 | ), |
| 203 | ToolCall( |
| 204 | id="read-1", |
| 205 | name="read", |
| 206 | arguments={"file_path": "/tmp/fortran/chapters/01-introduction.html"}, |
| 207 | ), |
| 208 | ], |
| 209 | ), |
| 210 | Message.tool_result_message( |
| 211 | tool_call_id="read-1", |
| 212 | display_content="<h1>Chapter 1: Introduction to Fortran</h1>\n", |
| 213 | result_content="<h1>Chapter 1: Introduction to Fortran</h1>\n", |
| 214 | ), |
| 215 | ] |
| 216 | |
| 217 | next_step = infer_preferred_next_step( |
| 218 | messages, |
| 219 | current_task="Update /tmp/fortran/index.html so the chapter list matches the real files.", |
| 220 | ) |
| 221 | |
| 222 | assert next_step is None |
| 223 | |
| 224 | |
| 225 | def test_infer_preferred_next_step_uses_latest_verification_gap() -> None: |
| 226 | messages = [ |
| 227 | Message( |
| 228 | role=Role.ASSISTANT, |
| 229 | content="I should inspect the chapter and then update the index.", |
| 230 | tool_calls=[ |
| 231 | ToolCall( |
| 232 | id="read-index", |
| 233 | name="read", |
| 234 | arguments={"file_path": "/tmp/fortran/index.html"}, |
| 235 | ), |
| 236 | ToolCall( |
| 237 | id="read-1", |
| 238 | name="read", |
| 239 | arguments={"file_path": "/tmp/fortran/chapters/01-introduction.html"}, |
| 240 | ), |
| 241 | ToolCall( |
| 242 | id="verify-1", |
| 243 | name="bash", |
| 244 | arguments={"command": "python3 - <<'PY'\n...\nPY"}, |
| 245 | ), |
| 246 | ], |
| 247 | ), |
| 248 | Message.tool_result_message( |
| 249 | tool_call_id="read-1", |
| 250 | display_content="<h1>Chapter 1: Introduction to Fortran</h1>\n", |
| 251 | result_content="<h1>Chapter 1: Introduction to Fortran</h1>\n", |
| 252 | ), |
| 253 | Message.tool_result_message( |
| 254 | tool_call_id="verify-1", |
| 255 | display_content=( |
| 256 | "Missing links:\n" |
| 257 | "chapters/05-control-structures.html -> missing\n" |
| 258 | "chapters/06-input-output.html -> missing\n" |
| 259 | ), |
| 260 | result_content=( |
| 261 | "Missing links:\n" |
| 262 | "chapters/05-control-structures.html -> missing\n" |
| 263 | "chapters/06-input-output.html -> missing\n" |
| 264 | ), |
| 265 | is_error=True, |
| 266 | ), |
| 267 | ] |
| 268 | |
| 269 | confirmed_facts = summarize_confirmed_facts(messages, max_items=2) |
| 270 | next_step = infer_preferred_next_step( |
| 271 | messages, |
| 272 | current_task="Update /tmp/fortran/index.html so the chapter list matches the real files.", |
| 273 | ) |
| 274 | |
| 275 | assert confirmed_facts is None |
| 276 | assert next_step is None |
| 277 | |
| 278 | |
| 279 | def test_compact_session_messages_uses_single_continuation_instruction_block() -> None: |
| 280 | messages = [ |
| 281 | Message(role=Role.USER, content="Task framing"), |
| 282 | Message(role=Role.ASSISTANT, content="Initial plan"), |
| 283 | Message(role=Role.USER, content="Keep going"), |
| 284 | Message(role=Role.ASSISTANT, content="Still working"), |
| 285 | Message(role=Role.USER, content="Use the known mapping"), |
| 286 | ] |
| 287 | |
| 288 | result = compact_session_messages( |
| 289 | messages, |
| 290 | keep_last_messages=2, |
| 291 | current_task="Repair the table of contents links", |
| 292 | ) |
| 293 | |
| 294 | assert result is not None |
| 295 | assert result.messages[0].content.count("Continuation instructions:") == 1 |
| 296 | |
| 297 | |
| 298 | def test_resolve_auto_compaction_threshold_uses_context_window_as_upper_bound() -> None: |
| 299 | assert resolve_auto_compaction_input_tokens_threshold( |
| 300 | 100_000, |
| 301 | context_window=131_072, |
| 302 | ) == 98_304 |
| 303 | assert resolve_auto_compaction_input_tokens_threshold( |
| 304 | 100_000, |
| 305 | context_window=262_144, |
| 306 | ) == 100_000 |
| 307 | assert resolve_auto_compaction_input_tokens_threshold( |
| 308 | 100_000, |
| 309 | context_window=8_192, |
| 310 | ) == 12_000 |