tenseleyflow/loader / 6ee06b1

Browse files

Soften recovered handoffs

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
6ee06b156ec0a550b3976c82b11dc279d956b65d
Parents
1403a6a
Tree
6dfed4b

2 changed files

StatusFile+-
M src/loader/runtime/tool_batches.py 26 6
M tests/test_tool_batches.py 117 0
src/loader/runtime/tool_batches.pymodified
@@ -1130,6 +1130,9 @@ class ToolBatchRunner:
11301130
             dod,
11311131
             project_root=self.context.project_root,
11321132
         )
1133
+        session_messages = list(getattr(self.context.session, "messages", []) or [])
1134
+        if use_persistent_handoff and _recent_recovery_prompt(session_messages):
1135
+            use_persistent_handoff = False
11331136
         queue_message = (
11341137
             self.context.queue_steering_message
11351138
             if use_persistent_handoff
@@ -1143,7 +1146,7 @@ class ToolBatchRunner:
11431146
             compact_resume = _compact_missing_artifact_handoff(
11441147
                 (resume_target, False),
11451148
                 project_root=self.context.project_root,
1146
-                messages=list(getattr(self.context.session, "messages", []) or []),
1149
+                messages=session_messages,
11471150
             )
11481151
             if compact_resume:
11491152
                 queue_message(
@@ -1160,7 +1163,7 @@ class ToolBatchRunner:
11601163
             compact_handoff = _compact_missing_artifact_handoff(
11611164
                 missing_artifact,
11621165
                 project_root=self.context.project_root,
1163
-                messages=list(getattr(self.context.session, "messages", []) or []),
1166
+                messages=session_messages,
11641167
             )
11651168
             if compact_handoff:
11661169
                 queue_message(
@@ -1194,10 +1197,11 @@ class ToolBatchRunner:
11941197
         *,
11951198
         dod: DefinitionOfDone,
11961199
     ) -> None:
1200
+        session_messages = list(getattr(self.context.session, "messages", []) or [])
11971201
         missing_artifact = _next_missing_planned_artifact(
11981202
             dod,
11991203
             project_root=self.context.project_root,
1200
-            messages=list(getattr(self.context.session, "messages", []) or []),
1204
+            messages=session_messages,
12011205
         )
12021206
         next_pending = preferred_pending_todo_item(
12031207
             dod,
@@ -1311,7 +1315,7 @@ class ToolBatchRunner:
13111315
             + _missing_artifact_resume_suffix(
13121316
                 missing_artifact,
13131317
                 project_root=self.context.project_root,
1314
-                messages=list(getattr(self.context.session, "messages", []) or []),
1318
+                messages=session_messages,
13151319
             )
13161320
             + todo_refresh
13171321
             + " Do not spend the next turn on TodoWrite alone, bookkeeping notes, "
@@ -1327,10 +1331,11 @@ class ToolBatchRunner:
13271331
         if tool_call.name not in _BOOKKEEPING_NOTE_TOOL_NAMES:
13281332
             return
13291333
 
1334
+        session_messages = list(getattr(self.context.session, "messages", []) or [])
13301335
         missing_artifact = _next_missing_planned_artifact(
13311336
             dod,
13321337
             project_root=self.context.project_root,
1333
-            messages=list(getattr(self.context.session, "messages", []) or []),
1338
+            messages=session_messages,
13341339
         )
13351340
         if missing_artifact is None:
13361341
             return
@@ -1377,7 +1382,7 @@ class ToolBatchRunner:
13771382
             + _missing_artifact_resume_suffix(
13781383
                 missing_artifact,
13791384
                 project_root=self.context.project_root,
1380
-                messages=list(getattr(self.context.session, "messages", []) or []),
1385
+                messages=session_messages,
13811386
             )
13821387
             + todo_refresh
13831388
             + " Do not spend the next turn on additional notes, rediscovery, "
@@ -2133,6 +2138,21 @@ def _is_pure_directory_creation_tool_call(tool_call: ToolCall) -> bool:
21332138
     return bool(parts) and parts[0] == "mkdir"
21342139
 
21352140
 
2141
+def _recent_recovery_prompt(messages: list[Any]) -> bool:
2142
+    for message in reversed(messages[-4:]):
2143
+        role = getattr(message, "role", None)
2144
+        if getattr(role, "value", role) != "user":
2145
+            continue
2146
+        content = getattr(message, "content", "")
2147
+        if not isinstance(content, str):
2148
+            continue
2149
+        if content.startswith("[EMPTY ASSISTANT RESPONSE]"):
2150
+            return True
2151
+        if content.startswith("[CONTINUE CURRENT STEP]"):
2152
+            return True
2153
+    return False
2154
+
2155
+
21362156
 def _tool_call_label(tool_call: ToolCall) -> str:
21372157
     """Human-readable label for one tool call."""
21382158
     name = tool_call.name
tests/test_tool_batches.pymodified
@@ -2238,6 +2238,123 @@ async def test_tool_batch_runner_first_file_handoff_stays_persistent(
22382238
     assert ephemeral_messages == []
22392239
 
22402240
 
2241
+@pytest.mark.asyncio
2242
+async def test_tool_batch_runner_softens_first_file_handoff_after_recovery_prompt(
2243
+    temp_dir: Path,
2244
+) -> None:
2245
+    async def assess_confidence(
2246
+        tool_name: str,
2247
+        tool_args: dict,
2248
+        context: str,
2249
+    ) -> ConfidenceAssessment:
2250
+        raise AssertionError("Confidence scoring should be disabled in this scenario")
2251
+
2252
+    async def verify_action(
2253
+        tool_name: str,
2254
+        tool_args: dict,
2255
+        result: str,
2256
+        expected: str = "",
2257
+    ) -> ActionVerification:
2258
+        raise AssertionError("Verification should not run for this scenario")
2259
+
2260
+    nginx_root = temp_dir / "guides" / "nginx"
2261
+    chapters = nginx_root / "chapters"
2262
+    chapters.mkdir(parents=True)
2263
+    index_path = nginx_root / "index.html"
2264
+
2265
+    implementation_plan = temp_dir / "implementation.md"
2266
+    implementation_plan.write_text(
2267
+        "\n".join(
2268
+            [
2269
+                "# Implementation Plan",
2270
+                "",
2271
+                "## File Changes",
2272
+                f"- `{chapters}/`",
2273
+                f"- `{index_path}`",
2274
+                f"- `{chapters / '01-introduction.html'}`",
2275
+                "",
2276
+            ]
2277
+        )
2278
+    )
2279
+
2280
+    context = build_context(
2281
+        temp_dir=temp_dir,
2282
+        messages=[
2283
+            Message(
2284
+                role=Role.USER,
2285
+                content=(
2286
+                    "[EMPTY ASSISTANT RESPONSE]\n"
2287
+                    "Respond with that concrete mutation tool call now. Do not return an empty response."
2288
+                ),
2289
+            )
2290
+        ],
2291
+        safeguards=FakeSafeguards(),
2292
+        assess_confidence=assess_confidence,
2293
+        verify_action=verify_action,
2294
+        auto_recover=False,
2295
+    )
2296
+    persistent_messages: list[str] = []
2297
+    ephemeral_messages: list[str] = []
2298
+    context.queue_steering_message_callback = persistent_messages.append
2299
+    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
2300
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
2301
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
2302
+    dod.implementation_plan = str(implementation_plan)
2303
+    sync_todos_to_definition_of_done(
2304
+        dod,
2305
+        [
2306
+            {
2307
+                "content": "Create the main index.html file with proper structure",
2308
+                "active_form": "Creating the main index.html file with proper structure",
2309
+                "status": "pending",
2310
+            },
2311
+            {
2312
+                "content": "Create each chapter file with appropriate content",
2313
+                "active_form": "Creating each chapter file with appropriate content",
2314
+                "status": "pending",
2315
+            },
2316
+        ],
2317
+    )
2318
+
2319
+    tool_call = ToolCall(
2320
+        id="write-index-recovered",
2321
+        name="write",
2322
+        arguments={
2323
+            "file_path": str(index_path),
2324
+            "content": "<html></html>\n",
2325
+        },
2326
+    )
2327
+    executor = FakeExecutor(
2328
+        [
2329
+            tool_outcome(
2330
+                tool_call=tool_call,
2331
+                output=f"Successfully wrote 14 bytes to {index_path}",
2332
+                is_error=False,
2333
+            )
2334
+        ]
2335
+    )
2336
+
2337
+    summary = TurnSummary(final_response="")
2338
+    await runner.execute_batch(
2339
+        tool_calls=[tool_call],
2340
+        tool_source="assistant",
2341
+        pending_tool_calls_seen=set(),
2342
+        emit=_noop_emit,
2343
+        summary=summary,
2344
+        dod=dod,
2345
+        executor=executor,  # type: ignore[arg-type]
2346
+        on_confirmation=None,
2347
+        on_user_question=None,
2348
+        emit_confirmation=None,
2349
+        consecutive_errors=0,
2350
+    )
2351
+
2352
+    assert persistent_messages == []
2353
+    assert ephemeral_messages
2354
+    message = ephemeral_messages[-1]
2355
+    assert "Resume by creating `01-introduction.html` now." in message
2356
+
2357
+
22412358
 @pytest.mark.asyncio
22422359
 async def test_duplicate_observation_nudge_prioritizes_missing_artifact_over_review(
22432360
     temp_dir: Path,