tenseleyflow/loader / 6d1c5ec

Browse files

Name concrete TodoWrite targets

Authored by espadonne
SHA
6d1c5ec23dbc5fc901c34bff3b791ad0f8cfb258
Parents
67175cc
Tree
dc0e0f5

5 changed files

StatusFile+-
M src/loader/runtime/repair.py 3 158
M src/loader/runtime/tool_batches.py 28 0
M src/loader/runtime/workflow.py 172 0
M tests/test_runtime_repair_flows.py 102 0
M tests/test_tool_batches.py 132 0
src/loader/runtime/repair.pymodified
@@ -16,6 +16,7 @@ from .dod import (
1616
 )
1717
 from .parsing import parse_tool_calls
1818
 from .workflow import (
19
+    infer_pending_todo_output_target,
1920
     preferred_pending_todo_item,
2021
     reconcile_aggregate_completion_steps,
2122
     todo_file_candidates,
@@ -680,112 +681,11 @@ class ResponseRepairer:
680681
         dod: DefinitionOfDone,
681682
         item: str,
682683
     ) -> Path | None:
683
-        candidates = todo_file_candidates(item)
684
-        if not candidates:
685
-            return self._infer_pending_item_target_from_html_graph(dod, item)
686
-
687
-        planned_targets = collect_planned_artifact_targets(
688
-            dod,
689
-            project_root=self.context.project_root,
690
-            max_paths=12,
691
-        )
692
-        planned_files = {
693
-            target.name.lower(): target
694
-            for target, expect_directory in planned_targets
695
-            if not expect_directory
696
-        }
697
-        planned_directories = [
698
-            target
699
-            for target, expect_directory in planned_targets
700
-            if expect_directory
701
-        ]
702
-        touched_paths = [
703
-            Path(path)
704
-            for path in dod.touched_files
705
-            if str(path).strip()
706
-        ]
707
-
708
-        for candidate in candidates:
709
-            candidate_str = str(candidate)
710
-            if candidate.is_absolute() or candidate_str.startswith("~"):
711
-                return Path(candidate_str).expanduser()
712
-
713
-            planned_match = planned_files.get(candidate.name.lower())
714
-            if planned_match is not None:
715
-                return planned_match
716
-
717
-            for touched in reversed(touched_paths):
718
-                if touched.name.lower() == candidate.name.lower():
719
-                    continue
720
-                if candidate.suffix and touched.suffix.lower() != candidate.suffix.lower():
721
-                    continue
722
-                return touched.parent / candidate.name
723
-
724
-            for directory in planned_directories:
725
-                return directory / candidate.name
726
-
727
-        return None
728
-
729
-    def _infer_pending_item_target_from_html_graph(
730
-        self,
731
-        dod: DefinitionOfDone,
732
-        item: str,
733
-    ) -> Path | None:
734
-        target_label = _normalize_pending_output_label(item)
735
-        if not target_label:
736
-            return None
737
-
738
-        html_files = self._pending_item_html_sources(dod)
739
-        matches: list[tuple[int, bool, Path]] = []
740
-        for html_file in html_files:
741
-            try:
742
-                content = html_file.read_text()
743
-            except OSError:
744
-                continue
745
-            for href, link_text in _iter_local_html_links(html_file, content):
746
-                resolved = (html_file.parent / href).resolve(strict=False)
747
-                score = _pending_output_link_match_score(
748
-                    target_label,
749
-                    _normalize_pending_output_label(link_text),
750
-                )
751
-                if score <= 0:
752
-                    continue
753
-                matches.append((score, not resolved.exists(), resolved))
754
-
755
-        if not matches:
756
-            return None
757
-        matches.sort(key=lambda item: (item[0], item[1], str(item[2])), reverse=True)
758
-        return matches[0][2]
759
-
760
-    def _pending_item_html_sources(self, dod: DefinitionOfDone) -> list[Path]:
761
-        planned_targets = collect_planned_artifact_targets(
684
+        return infer_pending_todo_output_target(
762685
             dod,
686
+            item,
763687
             project_root=self.context.project_root,
764
-            max_paths=12,
765688
         )
766
-        html_sources: list[Path] = []
767
-        seen: set[str] = set()
768
-
769
-        for raw_path in dod.touched_files:
770
-            path = Path(raw_path).expanduser().resolve(strict=False)
771
-            if path.suffix.lower() not in {".html", ".htm"}:
772
-                continue
773
-            key = str(path)
774
-            if key in seen:
775
-                continue
776
-            seen.add(key)
777
-            html_sources.append(path)
778
-
779
-        for target, expect_directory in planned_targets:
780
-            if expect_directory or target.suffix.lower() not in {".html", ".htm"}:
781
-                continue
782
-            key = str(target)
783
-            if key in seen:
784
-                continue
785
-            seen.add(key)
786
-            html_sources.append(target)
787
-
788
-        return html_sources
789689
 
790690
     def _preferred_resume_pending_item(
791691
         self,
@@ -855,58 +755,3 @@ def _todo_is_mutation_step(label: str) -> bool:
855755
 def _todo_is_consistency_review_step(label: str) -> bool:
856756
     lowered = label.lower()
857757
     return any(token in lowered for token in _CONSISTENCY_REVIEW_HINTS)
858
-
859
-
860
-def _normalize_pending_output_label(value: str) -> str:
861
-    text = " ".join(str(value).strip().split()).lower()
862
-    if not text:
863
-        return ""
864
-    text = re.sub(
865
-        r"^(?:working on:\s*)?(?:create|creating|write|writing|build|building|develop|developing)\s+",
866
-        "",
867
-        text,
868
-    )
869
-    text = re.sub(r"\bfor nginx guide\b", "", text)
870
-    text = re.sub(r"[^a-z0-9]+", " ", text)
871
-    return " ".join(text.split())
872
-
873
-
874
-def _pending_output_link_match_score(todo_label: str, link_label: str) -> int:
875
-    if not todo_label or not link_label:
876
-        return 0
877
-    if todo_label == link_label:
878
-        return 3
879
-    if todo_label in link_label or link_label in todo_label:
880
-        return 2
881
-    todo_tokens = {token for token in todo_label.split() if len(token) > 2}
882
-    link_tokens = {token for token in link_label.split() if len(token) > 2}
883
-    if not todo_tokens or not link_tokens:
884
-        return 0
885
-    overlap = todo_tokens & link_tokens
886
-    if len(overlap) >= min(3, len(todo_tokens), len(link_tokens)):
887
-        return 1
888
-    return 0
889
-
890
-
891
-def _iter_local_html_links(file_path: Path, content: str) -> list[tuple[str, str]]:
892
-    pattern = re.compile(
893
-        r"<a\b[^>]*href\s*=\s*[\"']([^\"']+)[\"'][^>]*>(.*?)</a>",
894
-        re.IGNORECASE | re.DOTALL,
895
-    )
896
-    links: list[tuple[str, str]] = []
897
-    seen: set[tuple[str, str]] = set()
898
-    for href, inner_html in pattern.findall(content):
899
-        target = href.strip()
900
-        if not target or target.startswith(("#", "http://", "https://", "mailto:")):
901
-            continue
902
-        trimmed_target = target.split("?", 1)[0].split("#", 1)[0]
903
-        if Path(trimmed_target).suffix.lower() not in {".html", ".htm"}:
904
-            continue
905
-        label = re.sub(r"<[^>]+>", " ", inner_html)
906
-        label = " ".join(label.split())
907
-        key = (trimmed_target, label)
908
-        if key in seen:
909
-            continue
910
-        seen.add(key)
911
-        links.append((trimmed_target, label))
912
-    return links
src/loader/runtime/tool_batches.pymodified
@@ -40,6 +40,7 @@ from .verification_observations import (
4040
 from .workflow import (
4141
     advance_todos_from_tool_call,
4242
     effective_pending_todo_items,
43
+    infer_pending_todo_output_target,
4344
     preferred_pending_todo_item,
4445
     reconcile_aggregate_completion_steps,
4546
     sync_todos_to_definition_of_done,
@@ -937,6 +938,33 @@ class ToolBatchRunner:
937938
         )
938939
         if missing_artifact is None:
939940
             if next_pending and _todo_is_mutation_step(next_pending):
941
+                pending_target = infer_pending_todo_output_target(
942
+                    dod,
943
+                    next_pending,
944
+                    project_root=self.context.project_root,
945
+                )
946
+                if pending_target is not None:
947
+                    concrete_message = (
948
+                        "Todo tracking is updated. Continue with the next pending item: "
949
+                        f"`{next_pending}`. Resume by creating `{pending_target.name}` now. "
950
+                        f"Prefer one `write` call for `{pending_target}` instead of more rereads. "
951
+                    )
952
+                    if not pending_target.parent.exists():
953
+                        concrete_message += (
954
+                            "The `write` tool can create that file's parent directories "
955
+                            "automatically, so do the write in one step instead of stopping "
956
+                            "for a separate mkdir. "
957
+                        )
958
+                    concrete_message += (
959
+                        "Use the current output files as the source of truth, and do not "
960
+                        "reopen reference materials unless one specific fact required for "
961
+                        "that step is still unknown. Make your next response the concrete "
962
+                        "mutation tool call itself, not another bookkeeping-only turn. "
963
+                        "Perform the mutation now instead of spending another turn on "
964
+                        "planning, rereads, or verification."
965
+                    )
966
+                    self.context.queue_steering_message(concrete_message)
967
+                    return
940968
                 self.context.queue_steering_message(
941969
                     "Todo tracking is updated. Continue with the next pending item: "
942970
                     f"`{next_pending}`. Use the current output files as the source of "
src/loader/runtime/workflow.pymodified
@@ -54,6 +54,7 @@ __all__ = [
5454
     "effective_pending_todo_items",
5555
     "enrich_clarify_brief_with_grounding",
5656
     "extract_verification_commands_from_markdown",
57
+    "infer_pending_todo_output_target",
5758
     "load_brief",
5859
     "load_planning_artifacts",
5960
     "merge_refreshed_todos_with_existing_scope",
@@ -887,6 +888,87 @@ def todo_file_candidates(item: str) -> list[Path]:
887888
     return candidates
888889
 
889890
 
891
+def infer_pending_todo_output_target(
892
+    dod,
893
+    item: str,
894
+    *,
895
+    project_root: Path | None = None,
896
+) -> Path | None:
897
+    """Infer the concrete file path a pending todo is asking the model to mutate."""
898
+
899
+    root = project_root or Path.cwd()
900
+    candidates = todo_file_candidates(item)
901
+    planned_targets = collect_planned_artifact_targets(
902
+        dod,
903
+        project_root=root,
904
+        max_paths=12,
905
+    )
906
+
907
+    if candidates:
908
+        planned_files = {
909
+            target.name.lower(): target
910
+            for target, expect_directory in planned_targets
911
+            if not expect_directory
912
+        }
913
+        planned_directories = [
914
+            target
915
+            for target, expect_directory in planned_targets
916
+            if expect_directory
917
+        ]
918
+        touched_paths = [
919
+            Path(path)
920
+            for path in dod.touched_files
921
+            if str(path).strip()
922
+        ]
923
+
924
+        for candidate in candidates:
925
+            candidate_str = str(candidate)
926
+            if candidate.is_absolute() or candidate_str.startswith("~"):
927
+                return Path(candidate_str).expanduser()
928
+
929
+            planned_match = planned_files.get(candidate.name.lower())
930
+            if planned_match is not None:
931
+                return planned_match
932
+
933
+            for touched in reversed(touched_paths):
934
+                if touched.name.lower() == candidate.name.lower():
935
+                    continue
936
+                if candidate.suffix and touched.suffix.lower() != candidate.suffix.lower():
937
+                    continue
938
+                return touched.parent / candidate.name
939
+
940
+            for directory in planned_directories:
941
+                return directory / candidate.name
942
+
943
+    target_label = _normalize_pending_output_label(item)
944
+    if not target_label:
945
+        return None
946
+
947
+    matches: list[tuple[int, bool, Path]] = []
948
+    for html_file in _pending_item_html_sources(
949
+        dod,
950
+        project_root=root,
951
+    ):
952
+        try:
953
+            content = html_file.read_text()
954
+        except OSError:
955
+            continue
956
+        for href, link_text in _iter_local_html_links(content):
957
+            resolved = (html_file.parent / href).resolve(strict=False)
958
+            score = _pending_output_link_match_score(
959
+                target_label,
960
+                _normalize_pending_output_label(link_text),
961
+            )
962
+            if score <= 0:
963
+                continue
964
+            matches.append((score, not resolved.exists(), resolved))
965
+
966
+    if not matches:
967
+        return None
968
+    matches.sort(key=lambda item: (item[0], item[1], str(item[2])), reverse=True)
969
+    return matches[0][2]
970
+
971
+
890972
 def preserve_task_grounded_acceptance_criteria(
891973
     task_statement: str,
892974
     *,
@@ -905,6 +987,96 @@ def preserve_task_grounded_acceptance_criteria(
905987
     return list(dict.fromkeys([*grounded_existing, *refreshed_acceptance_criteria]))
906988
 
907989
 
990
+def _pending_item_html_sources(
991
+    dod,
992
+    *,
993
+    project_root: Path,
994
+) -> list[Path]:
995
+    planned_targets = collect_planned_artifact_targets(
996
+        dod,
997
+        project_root=project_root,
998
+        max_paths=12,
999
+    )
1000
+    html_sources: list[Path] = []
1001
+    seen: set[str] = set()
1002
+
1003
+    for raw_path in dod.touched_files:
1004
+        path = Path(raw_path).expanduser().resolve(strict=False)
1005
+        if path.suffix.lower() not in {".html", ".htm"}:
1006
+            continue
1007
+        key = str(path)
1008
+        if key in seen:
1009
+            continue
1010
+        seen.add(key)
1011
+        html_sources.append(path)
1012
+
1013
+    for target, expect_directory in planned_targets:
1014
+        if expect_directory or target.suffix.lower() not in {".html", ".htm"}:
1015
+            continue
1016
+        key = str(target)
1017
+        if key in seen:
1018
+            continue
1019
+        seen.add(key)
1020
+        html_sources.append(target)
1021
+
1022
+    return html_sources
1023
+
1024
+
1025
+def _normalize_pending_output_label(value: str) -> str:
1026
+    text = " ".join(str(value).strip().split()).lower()
1027
+    if not text:
1028
+        return ""
1029
+    text = re.sub(
1030
+        r"^(?:working on:\s*)?(?:create|creating|write|writing|build|building|develop|developing)\s+",
1031
+        "",
1032
+        text,
1033
+    )
1034
+    text = re.sub(r"\bfor nginx guide\b", "", text)
1035
+    text = re.sub(r"[^a-z0-9]+", " ", text)
1036
+    return " ".join(text.split())
1037
+
1038
+
1039
+def _pending_output_link_match_score(todo_label: str, link_label: str) -> int:
1040
+    if not todo_label or not link_label:
1041
+        return 0
1042
+    if todo_label == link_label:
1043
+        return 3
1044
+    if todo_label in link_label or link_label in todo_label:
1045
+        return 2
1046
+    todo_tokens = {token for token in todo_label.split() if len(token) > 2}
1047
+    link_tokens = {token for token in link_label.split() if len(token) > 2}
1048
+    if not todo_tokens or not link_tokens:
1049
+        return 0
1050
+    overlap = todo_tokens & link_tokens
1051
+    if len(overlap) >= min(3, len(todo_tokens), len(link_tokens)):
1052
+        return 1
1053
+    return 0
1054
+
1055
+
1056
+def _iter_local_html_links(content: str) -> list[tuple[str, str]]:
1057
+    pattern = re.compile(
1058
+        r"<a\b[^>]*href\s*=\s*[\"']([^\"']+)[\"'][^>]*>(.*?)</a>",
1059
+        re.IGNORECASE | re.DOTALL,
1060
+    )
1061
+    links: list[tuple[str, str]] = []
1062
+    seen: set[tuple[str, str]] = set()
1063
+    for href, inner_html in pattern.findall(content):
1064
+        target = href.strip()
1065
+        if not target or target.startswith(("#", "http://", "https://", "mailto:")):
1066
+            continue
1067
+        trimmed_target = target.split("?", 1)[0].split("#", 1)[0]
1068
+        if Path(trimmed_target).suffix.lower() not in {".html", ".htm"}:
1069
+            continue
1070
+        label = re.sub(r"<[^>]+>", " ", inner_html)
1071
+        label = " ".join(label.split())
1072
+        key = (trimmed_target, label)
1073
+        if key in seen:
1074
+            continue
1075
+        seen.add(key)
1076
+        links.append((trimmed_target, label))
1077
+    return links
1078
+
1079
+
9081080
 def merge_refreshed_todos_with_existing_scope(
9091081
     task_statement: str,
9101082
     *,
tests/test_runtime_repair_flows.pymodified
@@ -204,6 +204,108 @@ async def test_empty_response_retry_budget_resets_after_successful_turn(
204204
     assert sum("retry 1/2" in message for message in retry_messages) >= 2
205205
 
206206
 
207
+@pytest.mark.asyncio
208
+async def test_empty_response_retry_budget_resets_after_todowrite_turn(
209
+    temp_dir: Path,
210
+) -> None:
211
+    first = temp_dir / "index.html"
212
+    second = temp_dir / "chapters" / "01-introduction.html"
213
+    backend = ScriptedBackend(
214
+        completions=[
215
+            CompletionResponse(content=""),
216
+            CompletionResponse(
217
+                content="I'll create the guide index now.",
218
+                tool_calls=[
219
+                    ToolCall(
220
+                        id="write-1",
221
+                        name="write",
222
+                        arguments={
223
+                            "file_path": str(first),
224
+                            "content": "<html></html>\n",
225
+                        },
226
+                    )
227
+                ],
228
+            ),
229
+            CompletionResponse(
230
+                content="I'll create the first chapter now.",
231
+                tool_calls=[
232
+                    ToolCall(
233
+                        id="write-2",
234
+                        name="write",
235
+                        arguments={
236
+                            "file_path": str(second),
237
+                            "content": "<html></html>\n",
238
+                        },
239
+                    )
240
+                ],
241
+            ),
242
+            CompletionResponse(
243
+                content="I'll update the task list now.",
244
+                tool_calls=[
245
+                    ToolCall(
246
+                        id="todo-1",
247
+                        name="TodoWrite",
248
+                        arguments={
249
+                            "todos": [
250
+                                {
251
+                                    "content": "Create index.html",
252
+                                    "status": "completed",
253
+                                    "active_form": "Creating index.html",
254
+                                },
255
+                                {
256
+                                    "content": "Create 01-introduction.html",
257
+                                    "status": "completed",
258
+                                    "active_form": "Creating 01-introduction.html",
259
+                                },
260
+                                {
261
+                                    "content": "Create 02-installation.html",
262
+                                    "status": "pending",
263
+                                    "active_form": "Creating 02-installation.html",
264
+                                },
265
+                            ]
266
+                        },
267
+                    )
268
+                ],
269
+            ),
270
+            CompletionResponse(content=""),
271
+            CompletionResponse(
272
+                content="I'll create the second chapter now.",
273
+                tool_calls=[
274
+                    ToolCall(
275
+                        id="write-3",
276
+                        name="write",
277
+                        arguments={
278
+                            "file_path": str(temp_dir / "chapters" / "02-installation.html"),
279
+                            "content": "<html></html>\n",
280
+                        },
281
+                    )
282
+                ],
283
+            ),
284
+            CompletionResponse(content="The guide files are created."),
285
+        ]
286
+    )
287
+
288
+    run = await run_scenario(
289
+        "Create a small nginx guide.",
290
+        backend,
291
+        config=non_streaming_config(),
292
+        project_root=temp_dir,
293
+    )
294
+
295
+    assert run.response.startswith("The guide files are created.")
296
+    retry_messages: list[str] = []
297
+    for invocation in backend.invocations:
298
+        for message in invocation.messages:
299
+            if message.role != Role.USER or "[EMPTY ASSISTANT RESPONSE]" not in message.content:
300
+                continue
301
+            if retry_messages and retry_messages[-1] == message.content:
302
+                continue
303
+            retry_messages.append(message.content)
304
+    assert len(retry_messages) >= 2
305
+    assert all("retry 2/2" not in message for message in retry_messages)
306
+    assert sum("retry 1/2" in message for message in retry_messages) >= 2
307
+
308
+
207309
 @pytest.mark.asyncio
208310
 async def test_repeated_empty_responses_fail_honestly_after_one_retry(
209311
     temp_dir: Path,
tests/test_tool_batches.pymodified
@@ -2884,6 +2884,138 @@ async def test_tool_batch_runner_todowrite_with_declared_child_targets_names_nex
28842884
     assert "Do not spend the next turn on TodoWrite alone" in message
28852885
 
28862886
 
2887
+@pytest.mark.asyncio
2888
+async def test_tool_batch_runner_todowrite_names_concrete_pending_file_after_artifacts_exist(
2889
+    temp_dir: Path,
2890
+) -> None:
2891
+    async def assess_confidence(
2892
+        tool_name: str,
2893
+        tool_args: dict,
2894
+        context: str,
2895
+    ) -> ConfidenceAssessment:
2896
+        raise AssertionError("Confidence scoring should not run in this scenario")
2897
+
2898
+    async def verify_action(
2899
+        tool_name: str,
2900
+        tool_args: dict,
2901
+        result: str,
2902
+        expected: str = "",
2903
+    ) -> ActionVerification:
2904
+        raise AssertionError("Verification should not run in this scenario")
2905
+
2906
+    guide_root = temp_dir / "guides" / "nginx"
2907
+    chapters = guide_root / "chapters"
2908
+    guide_root.mkdir(parents=True)
2909
+    chapters.mkdir()
2910
+    index_path = guide_root / "index.html"
2911
+    chapter_one = chapters / "01-introduction.html"
2912
+    index_path.write_text(
2913
+        "\n".join(
2914
+            [
2915
+                "<html>",
2916
+                '<a href="chapters/01-introduction.html">Chapter 1: Introduction to NGINX Tool</a>',
2917
+                '<a href="chapters/02-installation.html">Chapter 2: Installation and Setup</a>',
2918
+                "</html>",
2919
+            ]
2920
+        )
2921
+        + "\n"
2922
+    )
2923
+    chapter_one.write_text("<html></html>\n")
2924
+
2925
+    implementation_plan = temp_dir / "implementation.md"
2926
+    implementation_plan.write_text(
2927
+        "\n".join(
2928
+            [
2929
+                "# Implementation Plan",
2930
+                "",
2931
+                "## File Changes",
2932
+                f"- `{guide_root}/`",
2933
+                f"- `{chapters}/`",
2934
+                f"- `{index_path}`",
2935
+                "",
2936
+            ]
2937
+        )
2938
+    )
2939
+
2940
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
2941
+    dod.implementation_plan = str(implementation_plan)
2942
+    dod.pending_items = [
2943
+        "Creating Chapter 2: Installation and Setup",
2944
+        "Complete the requested work",
2945
+    ]
2946
+    dod.touched_files.extend([str(index_path), str(chapter_one)])
2947
+
2948
+    queued_messages: list[str] = []
2949
+    context = build_context(
2950
+        temp_dir=temp_dir,
2951
+        messages=[],
2952
+        safeguards=FakeSafeguards(),
2953
+        assess_confidence=assess_confidence,
2954
+        verify_action=verify_action,
2955
+        auto_recover=False,
2956
+    )
2957
+    context.queue_steering_message_callback = queued_messages.append
2958
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
2959
+
2960
+    tool_call = ToolCall(
2961
+        id="todo-1",
2962
+        name="TodoWrite",
2963
+        arguments={
2964
+            "todos": [
2965
+                {
2966
+                    "content": "Creating Chapter 2: Installation and Setup",
2967
+                    "activeForm": "Creating Chapter 2: Installation and Setup",
2968
+                    "status": "pending",
2969
+                }
2970
+            ]
2971
+        },
2972
+    )
2973
+    executor = FakeExecutor(
2974
+        [
2975
+            tool_outcome(
2976
+                tool_call=tool_call,
2977
+                output="Todos updated",
2978
+                is_error=False,
2979
+                metadata={
2980
+                    "new_todos": [
2981
+                        {
2982
+                            "content": "Creating Chapter 2: Installation and Setup",
2983
+                            "active_form": "Creating Chapter 2: Installation and Setup",
2984
+                            "status": "pending",
2985
+                        }
2986
+                    ]
2987
+                },
2988
+            )
2989
+        ]
2990
+    )
2991
+
2992
+    summary = TurnSummary(final_response="")
2993
+    await runner.execute_batch(
2994
+        tool_calls=[tool_call],
2995
+        tool_source="assistant",
2996
+        pending_tool_calls_seen=set(),
2997
+        emit=_noop_emit,
2998
+        summary=summary,
2999
+        dod=dod,
3000
+        executor=executor,  # type: ignore[arg-type]
3001
+        on_confirmation=None,
3002
+        on_user_question=None,
3003
+        emit_confirmation=None,
3004
+        consecutive_errors=0,
3005
+    )
3006
+
3007
+    assert queued_messages
3008
+    message = queued_messages[-1]
3009
+    assert "Todo tracking is updated. Continue with the next pending item: `Creating Chapter 2: Installation and Setup`." in message
3010
+    assert "Resume by creating `02-installation.html` now." in message
3011
+    assert (
3012
+        f"Prefer one `write` call for `{(chapters / '02-installation.html').resolve(strict=False)}` "
3013
+        "instead of more rereads."
3014
+        in message
3015
+    )
3016
+    assert "Make your next response the concrete mutation tool call itself" in message
3017
+
3018
+
28873019
 @pytest.mark.asyncio
28883020
 async def test_tool_batch_runner_todowrite_uses_observed_sibling_pattern_for_next_file(
28893021
     temp_dir: Path,