Preserve active DoD in compaction
Authored by
mfwolffe <wolffemf@dukes.jmu.edu>
- SHA
7f616beb7a05d0e125379de2ed98e8d437247e59- Parents
-
cce89ea - Tree
382889a
7f616be
7f616beb7a05d0e125379de2ed98e8d437247e59cce89ea
382889a| Status | File | + | - |
|---|---|---|---|
| M |
src/loader/runtime/compaction.py
|
8 | 0 |
| M |
src/loader/runtime/session.py
|
115 | 0 |
| M |
tests/test_compaction.py
|
31 | 0 |
| M |
tests/test_session_state.py
|
53 | 0 |
src/loader/runtime/compaction.pymodified@@ -167,6 +167,7 @@ def compact_session_messages( | ||
| 167 | 167 | budget: SummaryCompressionBudget | None = None, |
| 168 | 168 | previous_summary: str | None = None, |
| 169 | 169 | current_task: str | None = None, |
| 170 | + active_dod_summary: str | None = None, | |
| 170 | 171 | original_input_tokens: int | None = None, |
| 171 | 172 | ) -> SessionCompactionResult | None: |
| 172 | 173 | """Compact older messages into one continuation summary message.""" |
@@ -180,6 +181,7 @@ def compact_session_messages( | ||
| 180 | 181 | removed_messages, |
| 181 | 182 | previous_summary=previous_summary, |
| 182 | 183 | current_task=current_task, |
| 184 | + active_dod_summary=active_dod_summary, | |
| 183 | 185 | ) |
| 184 | 186 | compression = compress_summary(summary_text, budget=budget) |
| 185 | 187 | summary_message = Message( |
@@ -190,6 +192,8 @@ def compact_session_messages( | ||
| 190 | 192 | "Continuation instructions:\n" |
| 191 | 193 | "- Continue from the preserved recent messages.\n" |
| 192 | 194 | "- Honor the active DoD, workflow mode, and permission mode.\n" |
| 195 | + "- Treat active DoD/check-failing messages as authoritative over " | |
| 196 | + "older summaries or durable memory notes.\n" | |
| 193 | 197 | "- Do not ask the user to repeat already-captured context unless essential." |
| 194 | 198 | ), |
| 195 | 199 | ) |
@@ -212,6 +216,7 @@ def build_session_summary( | ||
| 212 | 216 | *, |
| 213 | 217 | previous_summary: str | None = None, |
| 214 | 218 | current_task: str | None = None, |
| 219 | + active_dod_summary: str | None = None, | |
| 215 | 220 | ) -> str: |
| 216 | 221 | """Build a structured session summary before compression.""" |
| 217 | 222 | |
@@ -266,6 +271,8 @@ def build_session_summary( | ||
| 266 | 271 | lines.append(f"- Confirmed facts: {confirmed_facts}") |
| 267 | 272 | if preferred_next_step: |
| 268 | 273 | lines.append(f"- Preferred next step: {preferred_next_step}") |
| 274 | + if active_dod_summary: | |
| 275 | + lines.append(f"- Active DoD: {active_dod_summary}") | |
| 269 | 276 | if previous_summary: |
| 270 | 277 | lines.append("- Previously compacted context retained.") |
| 271 | 278 | lines.append(f"- Newly compacted context: {len(messages)} earlier message(s) summarized.") |
@@ -386,6 +393,7 @@ def _is_core_detail(line: str) -> bool: | ||
| 386 | 393 | "- Recent user requests:", |
| 387 | 394 | "- Confirmed facts:", |
| 388 | 395 | "- Preferred next step:", |
| 396 | + "- Active DoD:", | |
| 389 | 397 | "- Previously compacted context:", |
| 390 | 398 | "- Newly compacted context:", |
| 391 | 399 | ) |
src/loader/runtime/session.pymodified@@ -3,6 +3,7 @@ | ||
| 3 | 3 | from __future__ import annotations |
| 4 | 4 | |
| 5 | 5 | import json |
| 6 | +import re | |
| 6 | 7 | import secrets |
| 7 | 8 | from collections.abc import Callable |
| 8 | 9 | from dataclasses import dataclass, field |
@@ -37,6 +38,9 @@ DEFAULT_ROTATE_AFTER_BYTES = 256 * 1024 | ||
| 37 | 38 | MAX_ROTATED_FILES = 3 |
| 38 | 39 | _UNSET = object() |
| 39 | 40 | _REQUEST_TOOL_PAYLOAD_SUMMARY_THRESHOLD = 240 |
| 41 | +_ABSOLUTE_PATH_SUMMARY_PATTERN = re.compile( | |
| 42 | + r"(?P<path>/(?:Users|home|tmp|var|private)/[^\s:]+)" | |
| 43 | +) | |
| 40 | 44 | |
| 41 | 45 | |
| 42 | 46 | def _project_request_tool_call(tool_call: ToolCall) -> ToolCall | None: |
@@ -87,6 +91,102 @@ def _utc_now() -> str: | ||
| 87 | 91 | return datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%SZ") |
| 88 | 92 | |
| 89 | 93 | |
| 94 | +def _compact_inline_text(value: Any, *, max_chars: int = 220) -> str: | |
| 95 | + text = " ".join(str(value or "").split()) | |
| 96 | + if len(text) <= max_chars: | |
| 97 | + return text | |
| 98 | + return f"{text[: max_chars - 3]}..." | |
| 99 | + | |
| 100 | + | |
| 101 | +def _shorten_summary_paths(value: str) -> str: | |
| 102 | + def replace(match: re.Match[str]) -> str: | |
| 103 | + path = Path(match.group("path")) | |
| 104 | + parent = path.parent.name | |
| 105 | + if parent: | |
| 106 | + return f".../{parent}/{path.name}" | |
| 107 | + return f".../{path.name}" | |
| 108 | + | |
| 109 | + return _ABSOLUTE_PATH_SUMMARY_PATTERN.sub(replace, value) | |
| 110 | + | |
| 111 | + | |
| 112 | +def _preview_sequence(values: Any, *, limit: int = 3) -> str: | |
| 113 | + if not isinstance(values, list): | |
| 114 | + return "" | |
| 115 | + items = [ | |
| 116 | + _compact_inline_text(value, max_chars=140) | |
| 117 | + for value in values | |
| 118 | + if str(value or "").strip() | |
| 119 | + ] | |
| 120 | + if not items: | |
| 121 | + return "" | |
| 122 | + preview = ", ".join(items[:limit]) | |
| 123 | + if len(items) > limit: | |
| 124 | + preview += ", ..." | |
| 125 | + return preview | |
| 126 | + | |
| 127 | + | |
| 128 | +def _latest_failed_evidence_preview( | |
| 129 | + evidence_items: Any, | |
| 130 | + *, | |
| 131 | + limit: int = 3, | |
| 132 | +) -> str: | |
| 133 | + if not isinstance(evidence_items, list): | |
| 134 | + return "" | |
| 135 | + for item in reversed(evidence_items): | |
| 136 | + if not isinstance(item, dict): | |
| 137 | + continue | |
| 138 | + if item.get("passed") is True or item.get("skipped") is True: | |
| 139 | + continue | |
| 140 | + text = "\n".join( | |
| 141 | + str(item.get(key) or "") | |
| 142 | + for key in ("output", "stdout", "stderr") | |
| 143 | + if item.get(key) | |
| 144 | + ) | |
| 145 | + issue_lines: list[str] = [] | |
| 146 | + for raw_line in text.splitlines(): | |
| 147 | + line = _compact_inline_text( | |
| 148 | + _shorten_summary_paths(str(raw_line)), | |
| 149 | + max_chars=180, | |
| 150 | + ) | |
| 151 | + if not line: | |
| 152 | + continue | |
| 153 | + lowered = line.lower() | |
| 154 | + if lowered.startswith( | |
| 155 | + ( | |
| 156 | + "exit code ", | |
| 157 | + "html guide content quality issues:", | |
| 158 | + "missing local html links:", | |
| 159 | + "verification:", | |
| 160 | + ) | |
| 161 | + ): | |
| 162 | + continue | |
| 163 | + issue_lines.append(line) | |
| 164 | + if len(issue_lines) >= limit: | |
| 165 | + break | |
| 166 | + if issue_lines: | |
| 167 | + return "; ".join(issue_lines) | |
| 168 | + return "" | |
| 169 | + | |
| 170 | + | |
| 171 | +def _summarize_active_dod_snapshot(data: dict[str, Any]) -> str | None: | |
| 172 | + parts: list[str] = [] | |
| 173 | + status = _compact_inline_text(data.get("status")) | |
| 174 | + if status: | |
| 175 | + parts.append(f"status={status}") | |
| 176 | + verification = _compact_inline_text(data.get("last_verification_result")) | |
| 177 | + if verification: | |
| 178 | + parts.append(f"last verification={verification}") | |
| 179 | + failed = _latest_failed_evidence_preview(data.get("evidence")) | |
| 180 | + if failed: | |
| 181 | + parts.append(f"latest failed verifier={failed}") | |
| 182 | + pending = _preview_sequence(data.get("pending_items")) | |
| 183 | + if pending: | |
| 184 | + parts.append(f"pending={pending}") | |
| 185 | + if not parts: | |
| 186 | + return None | |
| 187 | + return _compact_inline_text("; ".join(parts), max_chars=700) | |
| 188 | + | |
| 189 | + | |
| 90 | 190 | def _generate_session_id() -> str: |
| 91 | 191 | timestamp = datetime.now(UTC).strftime("%Y%m%dT%H%M%SZ") |
| 92 | 192 | return f"{timestamp}-{secrets.token_hex(4)}" |
@@ -826,6 +926,7 @@ class ConversationSession: | ||
| 826 | 926 | keep_last_messages=self.compaction_keep_last_messages, |
| 827 | 927 | previous_summary=self.compaction.summary if self.compaction else None, |
| 828 | 928 | current_task=self.current_task, |
| 929 | + active_dod_summary=self._active_dod_compaction_summary(), | |
| 829 | 930 | original_input_tokens=estimated_input_tokens, |
| 830 | 931 | ) |
| 831 | 932 | if result is None: |
@@ -843,6 +944,20 @@ class ConversationSession: | ||
| 843 | 944 | self.persist() |
| 844 | 945 | return result |
| 845 | 946 | |
| 947 | + def _active_dod_compaction_summary(self) -> str | None: | |
| 948 | + if not self.active_dod_path: | |
| 949 | + return None | |
| 950 | + path = Path(self.active_dod_path) | |
| 951 | + if not path.exists(): | |
| 952 | + return None | |
| 953 | + try: | |
| 954 | + data = json.loads(path.read_text()) | |
| 955 | + except (OSError, json.JSONDecodeError): | |
| 956 | + return None | |
| 957 | + if not isinstance(data, dict): | |
| 958 | + return None | |
| 959 | + return _summarize_active_dod_snapshot(data) | |
| 960 | + | |
| 846 | 961 | def record_turn_usage( |
| 847 | 962 | self, |
| 848 | 963 | usage: dict[str, int], |
tests/test_compaction.pymodified@@ -55,6 +55,37 @@ def test_compact_session_messages_preserves_recent_messages() -> None: | ||
| 55 | 55 | ] |
| 56 | 56 | assert result.messages[0].content.startswith("[COMPACTED CONTEXT]") |
| 57 | 57 | assert "Continuation instructions:" in result.messages[0].content |
| 58 | + assert ( | |
| 59 | + "authoritative over older summaries or durable memory notes" | |
| 60 | + in result.messages[0].content | |
| 61 | + ) | |
| 62 | + | |
| 63 | + | |
| 64 | +def test_compact_session_messages_includes_active_dod_summary() -> None: | |
| 65 | + messages = [ | |
| 66 | + Message(role=Role.USER, content="Create the generated guide."), | |
| 67 | + Message(role=Role.ASSISTANT, content="Wrote the guide files."), | |
| 68 | + Message( | |
| 69 | + role=Role.TOOL, | |
| 70 | + content="Observation [notepad_read]: Result: guide complete", | |
| 71 | + ), | |
| 72 | + Message(role=Role.ASSISTANT, content="I will finish."), | |
| 73 | + Message(role=Role.USER, content="Continue repairing."), | |
| 74 | + ] | |
| 75 | + | |
| 76 | + result = compact_session_messages( | |
| 77 | + messages, | |
| 78 | + keep_last_messages=2, | |
| 79 | + current_task="Create the generated guide.", | |
| 80 | + active_dod_summary=( | |
| 81 | + "status=fixing; last verification=failed; " | |
| 82 | + "latest failed verifier=/tmp/guide/chapter.html: thin content" | |
| 83 | + ), | |
| 84 | + ) | |
| 85 | + | |
| 86 | + assert result is not None | |
| 87 | + assert "- Active DoD: status=fixing; last verification=failed;" in result.summary | |
| 88 | + assert "/tmp/guide/chapter.html: thin content" in result.summary | |
| 58 | 89 | |
| 59 | 90 | |
| 60 | 91 | def test_build_session_summary_skips_nested_compacted_context_content() -> None: |
tests/test_session_state.pymodified@@ -10,6 +10,11 @@ import pytest | ||
| 10 | 10 | from loader.agent.loop import Agent, AgentConfig, ReasoningConfig |
| 11 | 11 | from loader.llm.base import CompletionResponse, Message, Role, ToolCall |
| 12 | 12 | from loader.runtime.completion_trace import CompletionTraceEntry |
| 13 | +from loader.runtime.dod import ( | |
| 14 | + DefinitionOfDoneStore, | |
| 15 | + VerificationEvidence, | |
| 16 | + create_definition_of_done, | |
| 17 | +) | |
| 13 | 18 | from loader.runtime.evidence_provenance import EvidenceProvenance |
| 14 | 19 | from loader.runtime.prompt_history import PromptSnapshot |
| 15 | 20 | from loader.runtime.runtime_handle import RuntimeHandle |
@@ -159,6 +164,54 @@ def test_session_compaction_persists_summary_and_recent_messages(temp_dir: Path) | ||
| 159 | 164 | ] |
| 160 | 165 | |
| 161 | 166 | |
| 167 | +def test_session_compaction_summarizes_active_dod_failure(temp_dir: Path) -> None: | |
| 168 | + dod_store = DefinitionOfDoneStore(temp_dir) | |
| 169 | + dod = create_definition_of_done("Create a generated guide.") | |
| 170 | + dod.status = "fixing" | |
| 171 | + dod.last_verification_result = "failed" | |
| 172 | + dod.pending_items = ["Expand generated chapters to satisfy quality verification"] | |
| 173 | + dod.evidence.append( | |
| 174 | + VerificationEvidence( | |
| 175 | + command="python3 verify_html_quality.py", | |
| 176 | + passed=False, | |
| 177 | + output=( | |
| 178 | + "Exit code 1\n" | |
| 179 | + "HTML guide content quality issues:\n" | |
| 180 | + f"{temp_dir / 'guide' / 'chapters' / '05-load-balancing.html'}: " | |
| 181 | + "thin content (1500 text chars, expected at least 1758)\n" | |
| 182 | + ), | |
| 183 | + ) | |
| 184 | + ) | |
| 185 | + dod_path = dod_store.save(dod) | |
| 186 | + session = ConversationSession( | |
| 187 | + system_message_factory=_dummy_system, | |
| 188 | + few_shot_factory=_dummy_few_shots, | |
| 189 | + project_root=temp_dir, | |
| 190 | + messages=[ | |
| 191 | + Message(role=Role.USER, content="Create the guide."), | |
| 192 | + Message(role=Role.ASSISTANT, content="Created draft files."), | |
| 193 | + Message( | |
| 194 | + role=Role.TOOL, | |
| 195 | + content="Observation [notepad_read]: Result: guide complete", | |
| 196 | + ), | |
| 197 | + Message(role=Role.ASSISTANT, content="Trying to finish."), | |
| 198 | + Message(role=Role.USER, content="Continue repairing."), | |
| 199 | + ], | |
| 200 | + active_dod_path=str(dod_path), | |
| 201 | + auto_compaction_input_tokens_threshold=1, | |
| 202 | + compaction_keep_last_messages=2, | |
| 203 | + ) | |
| 204 | + | |
| 205 | + result = session.maybe_compact() | |
| 206 | + | |
| 207 | + assert result is not None | |
| 208 | + assert session.messages[0].content.startswith("[COMPACTED CONTEXT]") | |
| 209 | + assert "- Active DoD: status=fixing; last verification=failed" in result.summary | |
| 210 | + assert "05-load-balancing.html" in result.summary | |
| 211 | + assert "thin content" in result.summary | |
| 212 | + assert "authoritative over older summaries or durable memory notes" in result.summary | |
| 213 | + | |
| 214 | + | |
| 162 | 215 | def test_build_request_messages_omits_large_mutation_tool_calls_from_history( |
| 163 | 216 | temp_dir: Path, |
| 164 | 217 | ) -> None: |