tenseleyflow/loader / 78735b6

Browse files

Stop reinferring complete guide children

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
78735b613d47105f5fc5659047dc80a70e581563
Parents
0bd46e0
Tree
82cb2c8

2 changed files

StatusFile+-
M src/loader/runtime/tool_batches.py 18 0
M tests/test_tool_batches.py 127 0
src/loader/runtime/tool_batches.pymodified
@@ -2015,6 +2015,11 @@ def _next_missing_planned_artifact(
20152015
             project_root=project_root,
20162016
         ):
20172017
             return target, expect_directory
2018
+    if not _pending_mutation_work_requires_inferred_children(
2019
+        dod,
2020
+        project_root=project_root,
2021
+    ):
2022
+        return None
20182023
     for target, expect_directory in collect_planned_artifact_targets(
20192024
         dod,
20202025
         project_root=project_root,
@@ -2032,6 +2037,19 @@ def _next_missing_planned_artifact(
20322037
     return None
20332038
 
20342039
 
2040
+def _pending_mutation_work_requires_inferred_children(
2041
+    dod: DefinitionOfDone,
2042
+    *,
2043
+    project_root: Path,
2044
+) -> bool:
2045
+    actionable_pending = [
2046
+        item
2047
+        for item in effective_pending_todo_items(dod, project_root=project_root)
2048
+        if item not in _TODO_NUDGE_EXCLUDED_ITEMS
2049
+    ]
2050
+    return any(_todo_is_mutation_step(item) for item in actionable_pending)
2051
+
2052
+
20352053
 def _prefer_missing_artifact_for_pending_item(
20362054
     dod: DefinitionOfDone,
20372055
     *,
tests/test_tool_batches.pymodified
@@ -4280,6 +4280,133 @@ async def test_tool_batch_runner_preempts_post_build_audit_after_todowrite_verif
42804280
     assert "Verification should run next." in queued_messages[-1]
42814281
 
42824282
 
4283
+@pytest.mark.asyncio
4284
+async def test_tool_batch_runner_todowrite_complete_directory_plan_does_not_reinfer_first_child(
4285
+    temp_dir: Path,
4286
+) -> None:
4287
+    async def assess_confidence(
4288
+        tool_name: str,
4289
+        tool_args: dict,
4290
+        context: str,
4291
+    ) -> ConfidenceAssessment:
4292
+        raise AssertionError("Confidence scoring should not run for this scenario")
4293
+
4294
+    async def verify_action(
4295
+        tool_name: str,
4296
+        tool_args: dict,
4297
+        result: str,
4298
+        expected: str = "",
4299
+    ) -> ActionVerification:
4300
+        raise AssertionError("Verification should not run for this scenario")
4301
+
4302
+    reference = temp_dir / "fortran" / "chapters" / "01-introduction.html"
4303
+    reference.parent.mkdir(parents=True)
4304
+    reference.write_text("<h1>Introduction</h1>\n")
4305
+
4306
+    guide_root = temp_dir / "Loader" / "guides" / "nginx"
4307
+    chapters = guide_root / "chapters"
4308
+    guide_root.mkdir(parents=True)
4309
+    chapters.mkdir()
4310
+    index_path = guide_root / "index.html"
4311
+    chapter_one = chapters / "01-introduction.html"
4312
+    chapter_two = chapters / "02-installation.html"
4313
+    chapter_three = chapters / "03-basic-configuration.html"
4314
+    index_path.write_text(
4315
+        "\n".join(
4316
+            [
4317
+                '<a href="chapters/01-introduction.html">Introduction</a>',
4318
+                '<a href="chapters/02-installation.html">Installation</a>',
4319
+                '<a href="chapters/03-basic-configuration.html">Configuration</a>',
4320
+                "",
4321
+            ]
4322
+        )
4323
+    )
4324
+    chapter_one.write_text("<html></html>\n")
4325
+    chapter_two.write_text("<html></html>\n")
4326
+    chapter_three.write_text("<html></html>\n")
4327
+
4328
+    implementation_plan = temp_dir / "implementation.md"
4329
+    implementation_plan.write_text(
4330
+        "\n".join(
4331
+            [
4332
+                "# Implementation Plan",
4333
+                "",
4334
+                "## File Changes",
4335
+                f"- `{guide_root / 'index.html'}`",
4336
+                f"- `{chapters}/`",
4337
+                "",
4338
+            ]
4339
+        )
4340
+    )
4341
+
4342
+    messages = [
4343
+        Message(
4344
+            role=Role.ASSISTANT,
4345
+            content="I examined the reference guide structure.",
4346
+            tool_calls=[
4347
+                ToolCall(
4348
+                    id="read-reference-child",
4349
+                    name="read",
4350
+                    arguments={"file_path": str(reference)},
4351
+                )
4352
+            ],
4353
+        )
4354
+    ]
4355
+    context = build_context(
4356
+        temp_dir=temp_dir,
4357
+        messages=messages,
4358
+        safeguards=FakeSafeguards(),
4359
+        assess_confidence=assess_confidence,
4360
+        verify_action=verify_action,
4361
+        auto_recover=False,
4362
+    )
4363
+    queued_messages: list[str] = []
4364
+    context.queue_steering_message_callback = queued_messages.append
4365
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
4366
+    dod = create_definition_of_done("Create an equally thorough nginx guide.")
4367
+    dod.implementation_plan = str(implementation_plan)
4368
+    dod.verification_commands = [f"ls -la {guide_root}"]
4369
+
4370
+    todo_call = ToolCall(
4371
+        id="todo-complete-directory-plan",
4372
+        name="TodoWrite",
4373
+        arguments={"todos": []},
4374
+    )
4375
+    executor = FakeExecutor(
4376
+        [
4377
+            tool_outcome(
4378
+                tool_call=todo_call,
4379
+                output="Todos updated",
4380
+                is_error=False,
4381
+                metadata={"new_todos": []},
4382
+            )
4383
+        ]
4384
+    )
4385
+
4386
+    summary = TurnSummary(final_response="")
4387
+    result = await runner.execute_batch(
4388
+        tool_calls=[todo_call],
4389
+        tool_source="assistant",
4390
+        pending_tool_calls_seen=set(),
4391
+        emit=_noop_emit,
4392
+        summary=summary,
4393
+        dod=dod,
4394
+        executor=executor,  # type: ignore[arg-type]
4395
+        on_confirmation=None,
4396
+        on_user_question=None,
4397
+        emit_confirmation=None,
4398
+        consecutive_errors=0,
4399
+    )
4400
+
4401
+    assert result.continue_after_batch is True
4402
+    assert queued_messages
4403
+    message = queued_messages[-1]
4404
+    assert "Verification should run next." in message
4405
+    assert "01-introduction.html" not in message
4406
+    assert "chapter files" not in message.lower()
4407
+    assert context.workflow_mode == "verify"
4408
+
4409
+
42834410
 @pytest.mark.asyncio
42844411
 async def test_tool_batch_runner_todowrite_drops_unplanned_expansion_after_outputs_exist(
42854412
     temp_dir: Path,