tenseleyflow/loader / 7f616be

Browse files

Preserve active DoD in compaction

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
7f616beb7a05d0e125379de2ed98e8d437247e59
Parents
cce89ea
Tree
382889a

4 changed files

StatusFile+-
M src/loader/runtime/compaction.py 8 0
M src/loader/runtime/session.py 115 0
M tests/test_compaction.py 31 0
M tests/test_session_state.py 53 0
src/loader/runtime/compaction.pymodified
@@ -167,6 +167,7 @@ def compact_session_messages(
167167
     budget: SummaryCompressionBudget | None = None,
168168
     previous_summary: str | None = None,
169169
     current_task: str | None = None,
170
+    active_dod_summary: str | None = None,
170171
     original_input_tokens: int | None = None,
171172
 ) -> SessionCompactionResult | None:
172173
     """Compact older messages into one continuation summary message."""
@@ -180,6 +181,7 @@ def compact_session_messages(
180181
         removed_messages,
181182
         previous_summary=previous_summary,
182183
         current_task=current_task,
184
+        active_dod_summary=active_dod_summary,
183185
     )
184186
     compression = compress_summary(summary_text, budget=budget)
185187
     summary_message = Message(
@@ -190,6 +192,8 @@ def compact_session_messages(
190192
             "Continuation instructions:\n"
191193
             "- Continue from the preserved recent messages.\n"
192194
             "- Honor the active DoD, workflow mode, and permission mode.\n"
195
+            "- Treat active DoD/check-failing messages as authoritative over "
196
+            "older summaries or durable memory notes.\n"
193197
             "- Do not ask the user to repeat already-captured context unless essential."
194198
         ),
195199
     )
@@ -212,6 +216,7 @@ def build_session_summary(
212216
     *,
213217
     previous_summary: str | None = None,
214218
     current_task: str | None = None,
219
+    active_dod_summary: str | None = None,
215220
 ) -> str:
216221
     """Build a structured session summary before compression."""
217222
 
@@ -266,6 +271,8 @@ def build_session_summary(
266271
         lines.append(f"- Confirmed facts: {confirmed_facts}")
267272
     if preferred_next_step:
268273
         lines.append(f"- Preferred next step: {preferred_next_step}")
274
+    if active_dod_summary:
275
+        lines.append(f"- Active DoD: {active_dod_summary}")
269276
     if previous_summary:
270277
         lines.append("- Previously compacted context retained.")
271278
     lines.append(f"- Newly compacted context: {len(messages)} earlier message(s) summarized.")
@@ -386,6 +393,7 @@ def _is_core_detail(line: str) -> bool:
386393
             "- Recent user requests:",
387394
             "- Confirmed facts:",
388395
             "- Preferred next step:",
396
+            "- Active DoD:",
389397
             "- Previously compacted context:",
390398
             "- Newly compacted context:",
391399
         )
src/loader/runtime/session.pymodified
@@ -3,6 +3,7 @@
33
 from __future__ import annotations
44
 
55
 import json
6
+import re
67
 import secrets
78
 from collections.abc import Callable
89
 from dataclasses import dataclass, field
@@ -37,6 +38,9 @@ DEFAULT_ROTATE_AFTER_BYTES = 256 * 1024
3738
 MAX_ROTATED_FILES = 3
3839
 _UNSET = object()
3940
 _REQUEST_TOOL_PAYLOAD_SUMMARY_THRESHOLD = 240
41
+_ABSOLUTE_PATH_SUMMARY_PATTERN = re.compile(
42
+    r"(?P<path>/(?:Users|home|tmp|var|private)/[^\s:]+)"
43
+)
4044
 
4145
 
4246
 def _project_request_tool_call(tool_call: ToolCall) -> ToolCall | None:
@@ -87,6 +91,102 @@ def _utc_now() -> str:
8791
     return datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%SZ")
8892
 
8993
 
94
+def _compact_inline_text(value: Any, *, max_chars: int = 220) -> str:
95
+    text = " ".join(str(value or "").split())
96
+    if len(text) <= max_chars:
97
+        return text
98
+    return f"{text[: max_chars - 3]}..."
99
+
100
+
101
+def _shorten_summary_paths(value: str) -> str:
102
+    def replace(match: re.Match[str]) -> str:
103
+        path = Path(match.group("path"))
104
+        parent = path.parent.name
105
+        if parent:
106
+            return f".../{parent}/{path.name}"
107
+        return f".../{path.name}"
108
+
109
+    return _ABSOLUTE_PATH_SUMMARY_PATTERN.sub(replace, value)
110
+
111
+
112
+def _preview_sequence(values: Any, *, limit: int = 3) -> str:
113
+    if not isinstance(values, list):
114
+        return ""
115
+    items = [
116
+        _compact_inline_text(value, max_chars=140)
117
+        for value in values
118
+        if str(value or "").strip()
119
+    ]
120
+    if not items:
121
+        return ""
122
+    preview = ", ".join(items[:limit])
123
+    if len(items) > limit:
124
+        preview += ", ..."
125
+    return preview
126
+
127
+
128
+def _latest_failed_evidence_preview(
129
+    evidence_items: Any,
130
+    *,
131
+    limit: int = 3,
132
+) -> str:
133
+    if not isinstance(evidence_items, list):
134
+        return ""
135
+    for item in reversed(evidence_items):
136
+        if not isinstance(item, dict):
137
+            continue
138
+        if item.get("passed") is True or item.get("skipped") is True:
139
+            continue
140
+        text = "\n".join(
141
+            str(item.get(key) or "")
142
+            for key in ("output", "stdout", "stderr")
143
+            if item.get(key)
144
+        )
145
+        issue_lines: list[str] = []
146
+        for raw_line in text.splitlines():
147
+            line = _compact_inline_text(
148
+                _shorten_summary_paths(str(raw_line)),
149
+                max_chars=180,
150
+            )
151
+            if not line:
152
+                continue
153
+            lowered = line.lower()
154
+            if lowered.startswith(
155
+                (
156
+                    "exit code ",
157
+                    "html guide content quality issues:",
158
+                    "missing local html links:",
159
+                    "verification:",
160
+                )
161
+            ):
162
+                continue
163
+            issue_lines.append(line)
164
+            if len(issue_lines) >= limit:
165
+                break
166
+        if issue_lines:
167
+            return "; ".join(issue_lines)
168
+    return ""
169
+
170
+
171
+def _summarize_active_dod_snapshot(data: dict[str, Any]) -> str | None:
172
+    parts: list[str] = []
173
+    status = _compact_inline_text(data.get("status"))
174
+    if status:
175
+        parts.append(f"status={status}")
176
+    verification = _compact_inline_text(data.get("last_verification_result"))
177
+    if verification:
178
+        parts.append(f"last verification={verification}")
179
+    failed = _latest_failed_evidence_preview(data.get("evidence"))
180
+    if failed:
181
+        parts.append(f"latest failed verifier={failed}")
182
+    pending = _preview_sequence(data.get("pending_items"))
183
+    if pending:
184
+        parts.append(f"pending={pending}")
185
+    if not parts:
186
+        return None
187
+    return _compact_inline_text("; ".join(parts), max_chars=700)
188
+
189
+
90190
 def _generate_session_id() -> str:
91191
     timestamp = datetime.now(UTC).strftime("%Y%m%dT%H%M%SZ")
92192
     return f"{timestamp}-{secrets.token_hex(4)}"
@@ -826,6 +926,7 @@ class ConversationSession:
826926
             keep_last_messages=self.compaction_keep_last_messages,
827927
             previous_summary=self.compaction.summary if self.compaction else None,
828928
             current_task=self.current_task,
929
+            active_dod_summary=self._active_dod_compaction_summary(),
829930
             original_input_tokens=estimated_input_tokens,
830931
         )
831932
         if result is None:
@@ -843,6 +944,20 @@ class ConversationSession:
843944
         self.persist()
844945
         return result
845946
 
947
+    def _active_dod_compaction_summary(self) -> str | None:
948
+        if not self.active_dod_path:
949
+            return None
950
+        path = Path(self.active_dod_path)
951
+        if not path.exists():
952
+            return None
953
+        try:
954
+            data = json.loads(path.read_text())
955
+        except (OSError, json.JSONDecodeError):
956
+            return None
957
+        if not isinstance(data, dict):
958
+            return None
959
+        return _summarize_active_dod_snapshot(data)
960
+
846961
     def record_turn_usage(
847962
         self,
848963
         usage: dict[str, int],
tests/test_compaction.pymodified
@@ -55,6 +55,37 @@ def test_compact_session_messages_preserves_recent_messages() -> None:
5555
     ]
5656
     assert result.messages[0].content.startswith("[COMPACTED CONTEXT]")
5757
     assert "Continuation instructions:" in result.messages[0].content
58
+    assert (
59
+        "authoritative over older summaries or durable memory notes"
60
+        in result.messages[0].content
61
+    )
62
+
63
+
64
+def test_compact_session_messages_includes_active_dod_summary() -> None:
65
+    messages = [
66
+        Message(role=Role.USER, content="Create the generated guide."),
67
+        Message(role=Role.ASSISTANT, content="Wrote the guide files."),
68
+        Message(
69
+            role=Role.TOOL,
70
+            content="Observation [notepad_read]: Result: guide complete",
71
+        ),
72
+        Message(role=Role.ASSISTANT, content="I will finish."),
73
+        Message(role=Role.USER, content="Continue repairing."),
74
+    ]
75
+
76
+    result = compact_session_messages(
77
+        messages,
78
+        keep_last_messages=2,
79
+        current_task="Create the generated guide.",
80
+        active_dod_summary=(
81
+            "status=fixing; last verification=failed; "
82
+            "latest failed verifier=/tmp/guide/chapter.html: thin content"
83
+        ),
84
+    )
85
+
86
+    assert result is not None
87
+    assert "- Active DoD: status=fixing; last verification=failed;" in result.summary
88
+    assert "/tmp/guide/chapter.html: thin content" in result.summary
5889
 
5990
 
6091
 def test_build_session_summary_skips_nested_compacted_context_content() -> None:
tests/test_session_state.pymodified
@@ -10,6 +10,11 @@ import pytest
1010
 from loader.agent.loop import Agent, AgentConfig, ReasoningConfig
1111
 from loader.llm.base import CompletionResponse, Message, Role, ToolCall
1212
 from loader.runtime.completion_trace import CompletionTraceEntry
13
+from loader.runtime.dod import (
14
+    DefinitionOfDoneStore,
15
+    VerificationEvidence,
16
+    create_definition_of_done,
17
+)
1318
 from loader.runtime.evidence_provenance import EvidenceProvenance
1419
 from loader.runtime.prompt_history import PromptSnapshot
1520
 from loader.runtime.runtime_handle import RuntimeHandle
@@ -159,6 +164,54 @@ def test_session_compaction_persists_summary_and_recent_messages(temp_dir: Path)
159164
     ]
160165
 
161166
 
167
+def test_session_compaction_summarizes_active_dod_failure(temp_dir: Path) -> None:
168
+    dod_store = DefinitionOfDoneStore(temp_dir)
169
+    dod = create_definition_of_done("Create a generated guide.")
170
+    dod.status = "fixing"
171
+    dod.last_verification_result = "failed"
172
+    dod.pending_items = ["Expand generated chapters to satisfy quality verification"]
173
+    dod.evidence.append(
174
+        VerificationEvidence(
175
+            command="python3 verify_html_quality.py",
176
+            passed=False,
177
+            output=(
178
+                "Exit code 1\n"
179
+                "HTML guide content quality issues:\n"
180
+                f"{temp_dir / 'guide' / 'chapters' / '05-load-balancing.html'}: "
181
+                "thin content (1500 text chars, expected at least 1758)\n"
182
+            ),
183
+        )
184
+    )
185
+    dod_path = dod_store.save(dod)
186
+    session = ConversationSession(
187
+        system_message_factory=_dummy_system,
188
+        few_shot_factory=_dummy_few_shots,
189
+        project_root=temp_dir,
190
+        messages=[
191
+            Message(role=Role.USER, content="Create the guide."),
192
+            Message(role=Role.ASSISTANT, content="Created draft files."),
193
+            Message(
194
+                role=Role.TOOL,
195
+                content="Observation [notepad_read]: Result: guide complete",
196
+            ),
197
+            Message(role=Role.ASSISTANT, content="Trying to finish."),
198
+            Message(role=Role.USER, content="Continue repairing."),
199
+        ],
200
+        active_dod_path=str(dod_path),
201
+        auto_compaction_input_tokens_threshold=1,
202
+        compaction_keep_last_messages=2,
203
+    )
204
+
205
+    result = session.maybe_compact()
206
+
207
+    assert result is not None
208
+    assert session.messages[0].content.startswith("[COMPACTED CONTEXT]")
209
+    assert "- Active DoD: status=fixing; last verification=failed" in result.summary
210
+    assert "05-load-balancing.html" in result.summary
211
+    assert "thin content" in result.summary
212
+    assert "authoritative over older summaries or durable memory notes" in result.summary
213
+
214
+
162215
 def test_build_request_messages_omits_large_mutation_tool_calls_from_history(
163216
     temp_dir: Path,
164217
 ) -> None: