tenseleyflow/loader / 39d15a6

Browse files

Delay setup-only verification planning

Authored by espadonne
SHA
39d15a655a7fa1ee038c8f6e5ca8be9c0bcf4d1f
Parents
36515f8
Tree
bf5d20b

2 changed files

StatusFile+-
M src/loader/runtime/tool_batches.py 37 1
M tests/test_tool_batches.py 80 0
src/loader/runtime/tool_batches.pymodified
@@ -698,7 +698,11 @@ class ToolBatchRunner:
698698
                 dod=dod,
699699
                 tool_call=tool_call,
700700
             )
701
-        elif is_mutating:
701
+        elif is_mutating and _should_plan_verification_for_tool_call(
702
+            dod,
703
+            tool_call=tool_call,
704
+            project_root=self.context.project_root,
705
+        ):
702706
             _mark_verification_planned(
703707
                 context=self.context,
704708
                 summary=summary,
@@ -1509,6 +1513,38 @@ def _todo_is_mutation_step(label: str) -> bool:
15091513
     return any(token in lowered for token in _MUTATION_TODO_HINTS)
15101514
 
15111515
 
1516
+def _should_plan_verification_for_tool_call(
1517
+    dod: DefinitionOfDone,
1518
+    *,
1519
+    tool_call: ToolCall,
1520
+    project_root: Path,
1521
+) -> bool:
1522
+    if tool_call.name in {"write", "edit", "patch"}:
1523
+        return True
1524
+    if tool_call.name != "bash":
1525
+        return False
1526
+    if any(
1527
+        Path(path).expanduser().resolve(strict=False).suffix
1528
+        for path in dod.touched_files
1529
+        if str(path).strip()
1530
+    ):
1531
+        return True
1532
+    return any(
1533
+        not expect_directory
1534
+        and planned_artifact_target_satisfied(
1535
+            dod,
1536
+            target=target,
1537
+            expect_directory=False,
1538
+            project_root=project_root,
1539
+        )
1540
+        for target, expect_directory in collect_planned_artifact_targets(
1541
+            dod,
1542
+            project_root=project_root,
1543
+            max_paths=12,
1544
+        )
1545
+    )
1546
+
1547
+
15121548
 def _mark_verification_planned(
15131549
     *,
15141550
     context: RuntimeContext,
tests/test_tool_batches.pymodified
@@ -4029,6 +4029,86 @@ async def test_tool_batch_runner_marks_verification_planned_after_new_mutation(
40294029
     )
40304030
 
40314031
 
4032
+@pytest.mark.asyncio
4033
+async def test_tool_batch_runner_does_not_mark_verification_planned_after_setup_only_mkdir(
4034
+    temp_dir: Path,
4035
+) -> None:
4036
+    async def assess_confidence(
4037
+        tool_name: str,
4038
+        tool_args: dict,
4039
+        context: str,
4040
+    ) -> ConfidenceAssessment:
4041
+        raise AssertionError("Confidence scoring should be disabled in this scenario")
4042
+
4043
+    async def verify_action(
4044
+        tool_name: str,
4045
+        tool_args: dict,
4046
+        result: str,
4047
+        expected: str = "",
4048
+    ) -> ActionVerification:
4049
+        raise AssertionError("Verification should not run in this scenario")
4050
+
4051
+    context = build_context(
4052
+        temp_dir=temp_dir,
4053
+        messages=[],
4054
+        safeguards=FakeSafeguards(),
4055
+        assess_confidence=assess_confidence,
4056
+        verify_action=verify_action,
4057
+    )
4058
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
4059
+    nginx_root = temp_dir / "Loader" / "guides" / "nginx"
4060
+    chapters = nginx_root / "chapters"
4061
+    implementation_plan = temp_dir / "implementation.md"
4062
+    implementation_plan.write_text(
4063
+        "\n".join(
4064
+            [
4065
+                "# Implementation Plan",
4066
+                "",
4067
+                "## File Changes",
4068
+                f"- `{chapters}/`",
4069
+                f"- `{nginx_root / 'index.html'}`",
4070
+                "",
4071
+            ]
4072
+        )
4073
+    )
4074
+
4075
+    tool_call = ToolCall(
4076
+        id="mkdir-1",
4077
+        name="bash",
4078
+        arguments={"command": f"mkdir -p {chapters}"},
4079
+    )
4080
+    executor = FakeExecutor(
4081
+        [tool_outcome(tool_call=tool_call, output="", is_error=False)]
4082
+    )
4083
+    summary = TurnSummary(final_response="")
4084
+    dod = create_definition_of_done("Create an equally thorough nginx guide with chapters.")
4085
+    dod.implementation_plan = str(implementation_plan)
4086
+    events: list[AgentEvent] = []
4087
+
4088
+    async def emit(event: AgentEvent) -> None:
4089
+        events.append(event)
4090
+
4091
+    await runner.execute_batch(
4092
+        tool_calls=[tool_call],
4093
+        tool_source="assistant",
4094
+        pending_tool_calls_seen=set(),
4095
+        emit=emit,
4096
+        summary=summary,
4097
+        dod=dod,
4098
+        executor=executor,  # type: ignore[arg-type]
4099
+        on_confirmation=None,
4100
+        on_user_question=None,
4101
+        emit_confirmation=None,
4102
+        consecutive_errors=0,
4103
+    )
4104
+
4105
+    assert dod.last_verification_result is None
4106
+    assert "Collect verification evidence" not in dod.pending_items
4107
+    assert not any(
4108
+        entry.reason_code == "verification_planned" for entry in summary.workflow_timeline
4109
+    )
4110
+
4111
+
40324112
 @pytest.mark.asyncio
40334113
 async def test_tool_batch_runner_marks_passed_verification_stale_after_new_mutation(
40344114
     temp_dir: Path,