tenseleyflow/loader / 720cf3c

Browse files

Steer past self-audit rereads

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
720cf3cec4f12f4abcdf50ba60804431e0900f27
Parents
e590b51
Tree
c09ef54

2 changed files

StatusFile+-
M src/loader/runtime/tool_batches.py 52 0
M tests/test_tool_batches.py 108 0
src/loader/runtime/tool_batches.pymodified
@@ -310,6 +310,8 @@ class ToolBatchRunner:
310310
                 )
311311
                 self._queue_blocked_shell_rewrite_nudge(tool_call)
312312
                 self._queue_blocked_html_edit_nudge(tool_call, outcome.event_content)
313
+            else:
314
+                self._queue_post_mutation_self_audit_nudge(tool_call, dod=dod)
313315
 
314316
             should_continue = await self.verification_gate.should_continue(
315317
                 tool_call=tool_call,
@@ -491,6 +493,45 @@ class ToolBatchRunner:
491493
             "Choose a different next step that makes progress."
492494
         )
493495
 
496
+    def _queue_post_mutation_self_audit_nudge(
497
+        self,
498
+        tool_call: ToolCall,
499
+        *,
500
+        dod: DefinitionOfDone,
501
+    ) -> None:
502
+        """Steer out of rereading the file that was just written when the next output is known."""
503
+
504
+        if tool_call.name != "read":
505
+            return
506
+
507
+        file_path = str(tool_call.arguments.get("file_path", "")).strip()
508
+        if not file_path:
509
+            return
510
+
511
+        missing_artifact = _next_missing_planned_artifact(
512
+            dod,
513
+            project_root=self.context.project_root,
514
+            messages=list(getattr(self.context.session, "messages", []) or []),
515
+        )
516
+        if missing_artifact is None:
517
+            return
518
+
519
+        read_target = Path(file_path).expanduser().resolve(strict=False)
520
+        last_touched = _last_touched_file_path(dod)
521
+        if last_touched is None or read_target != last_touched:
522
+            return
523
+
524
+        self.context.queue_steering_message(
525
+            f"You already have the current contents of `{read_target.name}` from the successful write. "
526
+            "A declared output artifact is still missing."
527
+            + _missing_artifact_resume_suffix(
528
+                missing_artifact,
529
+                project_root=self.context.project_root,
530
+                messages=list(getattr(self.context.session, "messages", []) or []),
531
+            )
532
+            + " Do not spend another turn rereading the file you just wrote or on TodoWrite alone."
533
+        )
534
+
494535
     def _queue_blocked_shell_rewrite_nudge(self, tool_call: ToolCall) -> None:
495536
         """Steer the model back to file tools after a blocked shell text rewrite."""
496537
 
@@ -1615,6 +1656,17 @@ def _has_confirmed_file_artifact_progress(
16151656
     return _confirmed_file_artifact_count(dod, project_root=project_root) > 0
16161657
 
16171658
 
1659
+def _last_touched_file_path(dod: DefinitionOfDone) -> Path | None:
1660
+    for raw_path in reversed(dod.touched_files):
1661
+        path_text = str(raw_path or "").strip()
1662
+        if not path_text:
1663
+            continue
1664
+        candidate = Path(path_text).expanduser().resolve(strict=False)
1665
+        if candidate.suffix:
1666
+            return candidate
1667
+    return None
1668
+
1669
+
16181670
 def _confirmed_file_artifact_count(
16191671
     dod: DefinitionOfDone,
16201672
     *,
tests/test_tool_batches.pymodified
@@ -2363,6 +2363,114 @@ async def test_tool_batch_runner_first_chapter_handoff_becomes_ephemeral_after_f
23632363
     assert "Do not reread reference material or spend the next turn on bookkeeping." in message
23642364
 
23652365
 
2366
+@pytest.mark.asyncio
2367
+async def test_tool_batch_runner_redirects_post_write_self_audit_to_next_missing_artifact(
2368
+    temp_dir: Path,
2369
+) -> None:
2370
+    async def assess_confidence(
2371
+        tool_name: str,
2372
+        tool_args: dict,
2373
+        context: str,
2374
+    ) -> ConfidenceAssessment:
2375
+        raise AssertionError("Confidence scoring should not run in this scenario")
2376
+
2377
+    async def verify_action(
2378
+        tool_name: str,
2379
+        tool_args: dict,
2380
+        result: str,
2381
+        expected: str = "",
2382
+    ) -> ActionVerification:
2383
+        raise AssertionError("Verification should not run in this scenario")
2384
+
2385
+    nginx_root = temp_dir / "guides" / "nginx"
2386
+    chapters = nginx_root / "chapters"
2387
+    chapters.mkdir(parents=True)
2388
+    index_path = nginx_root / "index.html"
2389
+    index_path.write_text(
2390
+        "\n".join(
2391
+            [
2392
+                "<html>",
2393
+                '<a href="chapters/01-introduction.html">Chapter 1: Introduction to Nginx</a>',
2394
+                '<a href="chapters/02-installation.html">Chapter 2: Installation and Setup</a>',
2395
+                "</html>",
2396
+            ]
2397
+        )
2398
+        + "\n"
2399
+    )
2400
+
2401
+    implementation_plan = temp_dir / "implementation.md"
2402
+    implementation_plan.write_text(
2403
+        "\n".join(
2404
+            [
2405
+                "# Implementation Plan",
2406
+                "",
2407
+                "## File Changes",
2408
+                f"- `{nginx_root}/`",
2409
+                f"- `{chapters}/`",
2410
+                f"- `{index_path}`",
2411
+                f"- `{chapters / '01-introduction.html'}`",
2412
+                "",
2413
+            ]
2414
+        )
2415
+    )
2416
+
2417
+    context = build_context(
2418
+        temp_dir=temp_dir,
2419
+        messages=[],
2420
+        safeguards=FakeSafeguards(),
2421
+        assess_confidence=assess_confidence,
2422
+        verify_action=verify_action,
2423
+        auto_recover=False,
2424
+    )
2425
+    persistent_messages: list[str] = []
2426
+    ephemeral_messages: list[str] = []
2427
+    context.queue_steering_message_callback = persistent_messages.append
2428
+    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
2429
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
2430
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
2431
+    dod.implementation_plan = str(implementation_plan)
2432
+    dod.touched_files.append(str(index_path))
2433
+    dod.completed_items.append("Develop the main index.html file for the nginx guide")
2434
+    dod.pending_items.append("Create chapter files for the nginx guide")
2435
+
2436
+    tool_call = ToolCall(
2437
+        id="read-index-self-audit",
2438
+        name="read",
2439
+        arguments={"file_path": str(index_path)},
2440
+    )
2441
+    executor = FakeExecutor(
2442
+        [
2443
+            tool_outcome(
2444
+                tool_call=tool_call,
2445
+                output="1\t<html>\n",
2446
+                is_error=False,
2447
+            )
2448
+        ]
2449
+    )
2450
+
2451
+    summary = TurnSummary(final_response="")
2452
+    await runner.execute_batch(
2453
+        tool_calls=[tool_call],
2454
+        tool_source="assistant",
2455
+        pending_tool_calls_seen=set(),
2456
+        emit=_noop_emit,
2457
+        summary=summary,
2458
+        dod=dod,
2459
+        executor=executor,  # type: ignore[arg-type]
2460
+        on_confirmation=None,
2461
+        on_user_question=None,
2462
+        emit_confirmation=None,
2463
+        consecutive_errors=0,
2464
+    )
2465
+
2466
+    assert persistent_messages
2467
+    message = persistent_messages[-1]
2468
+    assert "You already have the current contents of `index.html` from the successful write." in message
2469
+    assert "Resume by creating `01-introduction.html` now." in message
2470
+    assert "Do not spend another turn rereading the file you just wrote or on TodoWrite alone." in message
2471
+    assert ephemeral_messages == []
2472
+
2473
+
23662474
 @pytest.mark.asyncio
23672475
 async def test_tool_batch_runner_softens_first_file_handoff_after_recovery_prompt(
23682476
     temp_dir: Path,