tenseleyflow/loader / 18ae40f

Browse files

Name missing files from observed shapes

Authored by espadonne
SHA
18ae40ffbc2319aac5500fca07bd6adefd7c3b5a
Parents
39c0fab
Tree
50f77f0

5 changed files

StatusFile+-
M src/loader/runtime/dod.py 73 1
M src/loader/runtime/repair.py 17 5
M src/loader/runtime/tool_batches.py 24 4
M tests/test_repair.py 77 1
M tests/test_tool_batches.py 137 0
src/loader/runtime/dod.pymodified
@@ -10,7 +10,7 @@ from datetime import UTC, datetime
1010
 from pathlib import Path
1111
 from typing import Any, Literal
1212
 
13
-from ..llm.base import ToolCall
13
+from ..llm.base import Message, ToolCall
1414
 from ..tools.shell_tools import BashTool
1515
 from .verification_observations import VerificationAttempt, verification_attempt_id
1616
 
@@ -742,6 +742,35 @@ def infer_next_declared_html_output_file(
742742
     return missing_targets[0] if missing_targets else None
743743
 
744744
 
745
+def infer_next_output_file(
746
+    *,
747
+    target: Path,
748
+    project_root: Path,
749
+    messages: list[Message] | None = None,
750
+) -> tuple[Path | None, str | None]:
751
+    """Infer the next concrete output file for a planned output directory.
752
+
753
+    Returns a tuple of `(path, source)` where source is one of:
754
+    - `"declared"` when inferred from the current artifact graph
755
+    - `"observed"` when mirrored from an already-inspected sibling directory
756
+    """
757
+
758
+    declared_target = infer_next_declared_html_output_file(
759
+        target=target,
760
+        project_root=project_root,
761
+    )
762
+    if declared_target is not None:
763
+        return declared_target, "declared"
764
+
765
+    observed_target = _infer_next_observed_output_file(
766
+        target=target,
767
+        messages=messages or [],
768
+    )
769
+    if observed_target is not None:
770
+        return observed_target, "observed"
771
+    return None, None
772
+
773
+
745774
 def collect_missing_declared_html_output_files(
746775
     *,
747776
     target: Path,
@@ -786,6 +815,49 @@ def collect_missing_declared_html_output_files(
786815
     return tuple(missing_targets)
787816
 
788817
 
818
+def _infer_next_observed_output_file(
819
+    *,
820
+    target: Path,
821
+    messages: list[Message],
822
+) -> Path | None:
823
+    normalized_target = target.resolve(strict=False)
824
+    if normalized_target.suffix:
825
+        return None
826
+
827
+    existing_names = {
828
+        path.name
829
+        for path in normalized_target.glob("*.html")
830
+        if path.is_file()
831
+    }
832
+    candidate_names: set[str] = set()
833
+    for message in messages:
834
+        for tool_call in getattr(message, "tool_calls", []) or []:
835
+            if tool_call.name != "read":
836
+                continue
837
+            raw_path = str(tool_call.arguments.get("file_path", "")).strip()
838
+            if not raw_path:
839
+                continue
840
+            observed_path = Path(raw_path).expanduser().resolve(strict=False)
841
+            if observed_path.suffix.lower() not in {".html", ".htm"}:
842
+                continue
843
+            if observed_path.name.lower() == "index.html":
844
+                continue
845
+            if observed_path.parent.name != normalized_target.name:
846
+                continue
847
+            try:
848
+                observed_path.relative_to(normalized_target)
849
+                continue
850
+            except ValueError:
851
+                pass
852
+            if observed_path.name in existing_names:
853
+                continue
854
+            candidate_names.add(observed_path.name)
855
+
856
+    if not candidate_names:
857
+        return None
858
+    return normalized_target / sorted(candidate_names)[0]
859
+
860
+
789861
 def _build_planned_artifact_verification_commands(
790862
     targets: list[tuple[Path, bool]],
791863
 ) -> list[str]:
src/loader/runtime/repair.pymodified
@@ -11,7 +11,7 @@ from .context import RuntimeContext
1111
 from .dod import (
1212
     DefinitionOfDone,
1313
     collect_planned_artifact_targets,
14
-    infer_next_declared_html_output_file,
14
+    infer_next_output_file,
1515
     planned_artifact_target_satisfied,
1616
 )
1717
 from .parsing import parse_tool_calls
@@ -430,14 +430,20 @@ class ResponseRepairer:
430430
             (None, False),
431431
         )
432432
         if first_missing_target is not None and first_missing_is_directory:
433
-            next_output_file = infer_next_declared_html_output_file(
433
+            next_output_file, next_output_source = infer_next_output_file(
434434
                 target=first_missing_target,
435435
                 project_root=self.context.project_root,
436
+                messages=list(getattr(self.context.session, "messages", []) or []),
436437
             )
437438
             if next_output_file is not None:
438
-                lines.append(
439
+                next_output_detail = (
439440
                     "Next declared output under "
440
-                    f"{self._format_artifact_label(first_missing_target, expect_directory=True)}: "
441
+                    if next_output_source == "declared"
442
+                    else "Next observed output pattern under "
443
+                )
444
+                lines.append(
445
+                    next_output_detail
446
+                    + f"{self._format_artifact_label(first_missing_target, expect_directory=True)}: "
441447
                     f"{self._format_artifact_label(next_output_file, expect_directory=False)}"
442448
                 )
443449
         if len(missing_labels) > 1:
@@ -509,9 +515,10 @@ class ResponseRepairer:
509515
                 expect_directory=expect_directory,
510516
             )
511517
             if expect_directory:
512
-                next_output_file = infer_next_declared_html_output_file(
518
+                next_output_file, next_output_source = infer_next_output_file(
513519
                     target=target,
514520
                     project_root=self.context.project_root,
521
+                    messages=list(getattr(self.context.session, "messages", []) or []),
515522
                 )
516523
                 if next_output_file is not None:
517524
                     next_output_label = self._format_artifact_label(
@@ -530,6 +537,11 @@ class ResponseRepairer:
530537
                         ]
531538
                     lines.append(
532539
                         f"It is the next missing declared output under {label}."
540
+                        if next_output_source == "declared"
541
+                        else (
542
+                            "It mirrors the observed filename pattern from another "
543
+                            f"{label} directory you already inspected."
544
+                        )
533545
                     )
534546
                     lines.append(
535547
                         f"Prefer one `write` call for `{next_output_file}` before more research."
src/loader/runtime/tool_batches.pymodified
@@ -18,7 +18,7 @@ from .dod import (
1818
     collect_planned_artifact_targets,
1919
     derive_verification_commands,
2020
     ensure_active_verification_attempt,
21
-    infer_next_declared_html_output_file,
21
+    infer_next_output_file,
2222
     is_state_mutating_tool_call,
2323
     planned_artifact_target_satisfied,
2424
     record_successful_tool_call,
@@ -365,6 +365,7 @@ class ToolBatchRunner:
365365
                 + _missing_artifact_resume_suffix(
366366
                     missing_artifact,
367367
                     project_root=self.context.project_root,
368
+                    messages=list(getattr(self.context.session, "messages", []) or []),
368369
                 )
369370
                 + " Do not switch into review or consistency-check mode until the missing artifact exists."
370371
             )
@@ -375,6 +376,7 @@ class ToolBatchRunner:
375376
                 mutation_suffix = _missing_artifact_resume_suffix(
376377
                     missing_artifact,
377378
                     project_root=self.context.project_root,
379
+                    messages=list(getattr(self.context.session, "messages", []) or []),
378380
                 )
379381
                 if not mutation_suffix:
380382
                     mutation_suffix = (
@@ -404,6 +406,7 @@ class ToolBatchRunner:
404406
                 + _missing_artifact_resume_suffix(
405407
                     missing_artifact,
406408
                     project_root=self.context.project_root,
409
+                    messages=list(getattr(self.context.session, "messages", []) or []),
407410
                 ).strip()
408411
             )
409412
             return
@@ -594,6 +597,7 @@ class ToolBatchRunner:
594597
             + _missing_artifact_resume_suffix(
595598
                 missing_artifact,
596599
                 project_root=self.context.project_root,
600
+                messages=list(getattr(self.context.session, "messages", []) or []),
597601
             )
598602
             + f" Stay within the current output roots under {roots_preview}"
599603
             + " and finish that artifact before reopening older reference materials."
@@ -818,6 +822,7 @@ class ToolBatchRunner:
818822
                 + _missing_artifact_resume_suffix(
819823
                     missing_artifact,
820824
                     project_root=self.context.project_root,
825
+                    messages=list(getattr(self.context.session, "messages", []) or []),
821826
                 )
822827
                 + " Do not switch into review or consistency-check mode until the missing artifact exists."
823828
             )
@@ -828,6 +833,7 @@ class ToolBatchRunner:
828833
             mutation_suffix = _missing_artifact_resume_suffix(
829834
                 missing_artifact,
830835
                 project_root=self.context.project_root,
836
+                messages=list(getattr(self.context.session, "messages", []) or []),
831837
             )
832838
             if not mutation_suffix:
833839
                 mutation_suffix = (
@@ -922,6 +928,7 @@ class ToolBatchRunner:
922928
                 + _missing_artifact_resume_suffix(
923929
                     missing_artifact,
924930
                     project_root=self.context.project_root,
931
+                    messages=list(getattr(self.context.session, "messages", []) or []),
925932
                 )
926933
                 + " No TodoWrite, no verification, no rereads until that artifact exists."
927934
             )
@@ -932,6 +939,7 @@ class ToolBatchRunner:
932939
             + _missing_artifact_resume_suffix(
933940
                 missing_artifact,
934941
                 project_root=self.context.project_root,
942
+                messages=list(getattr(self.context.session, "messages", []) or []),
935943
             )
936944
             + todo_refresh
937945
             + " Do not move to verification, final confirmation, or TodoWrite-only "
@@ -1039,6 +1047,7 @@ class ToolBatchRunner:
10391047
             + _missing_artifact_resume_suffix(
10401048
                 missing_artifact,
10411049
                 project_root=self.context.project_root,
1050
+                messages=list(getattr(self.context.session, "messages", []) or []),
10421051
             )
10431052
             + todo_refresh
10441053
             + " Do not spend the next turn on TodoWrite alone, bookkeeping notes, "
@@ -1096,6 +1105,7 @@ class ToolBatchRunner:
10961105
             + _missing_artifact_resume_suffix(
10971106
                 missing_artifact,
10981107
                 project_root=self.context.project_root,
1108
+                messages=list(getattr(self.context.session, "messages", []) or []),
10991109
             )
11001110
             + todo_refresh
11011111
             + " Do not spend the next turn on additional notes, rediscovery, "
@@ -1170,6 +1180,7 @@ def _missing_artifact_resume_suffix(
11701180
     missing_artifact: tuple[Path, bool] | None,
11711181
     *,
11721182
     project_root: Path,
1183
+    messages: list[Any] | None = None,
11731184
 ) -> str:
11741185
     if missing_artifact is None:
11751186
         return ""
@@ -1179,14 +1190,23 @@ def _missing_artifact_resume_suffix(
11791190
     if expect_directory and not label.endswith("/"):
11801191
         label += "/"
11811192
     if expect_directory:
1182
-        next_output_file = infer_next_declared_html_output_file(
1193
+        next_output_file, next_output_source = infer_next_output_file(
11831194
             target=target,
11841195
             project_root=project_root,
1196
+            messages=list(messages or []),
11851197
         )
11861198
         if next_output_file is not None:
1199
+            guidance_origin = (
1200
+                f"It is the next missing declared output under `{label}`."
1201
+                if next_output_source == "declared"
1202
+                else (
1203
+                    "It mirrors the observed filename pattern from another "
1204
+                    f"`{label}` directory you already inspected."
1205
+                )
1206
+            )
11871207
             guidance = (
1188
-                f" Resume by creating `{next_output_file.name}` now. It is the next missing "
1189
-                f"declared output under `{label}`. Prefer one `write` call for "
1208
+                f" Resume by creating `{next_output_file.name}` now. {guidance_origin} "
1209
+                f"Prefer one `write` call for "
11901210
                 f"`{next_output_file}` instead of more rereads."
11911211
             )
11921212
             if not next_output_file.parent.exists():
tests/test_repair.pymodified
@@ -6,7 +6,7 @@ import json
66
 from pathlib import Path
77
 from types import SimpleNamespace
88
 
9
-from loader.llm.base import ToolCall
9
+from loader.llm.base import Message, Role, ToolCall
1010
 from loader.runtime.context import RuntimeContext
1111
 from loader.runtime.dod import create_definition_of_done
1212
 from loader.runtime.permissions import (
@@ -909,3 +909,79 @@ def test_empty_response_retry_omits_stale_aggregate_completed_work_when_artifact
909909
     assert decision.retry_message is not None
910910
     assert "Link all chapters together properly" not in decision.retry_message
911911
     assert "Create the main index.html file with proper structure" in decision.retry_message
912
+
913
+
914
+def test_empty_response_retry_names_next_file_from_observed_sibling_directory(
915
+    temp_dir: Path,
916
+) -> None:
917
+    context = build_context(
918
+        temp_dir=temp_dir,
919
+        use_react=False,
920
+    )
921
+    repairer = ResponseRepairer(context)
922
+
923
+    reference_chapters = temp_dir / "fortran" / "chapters"
924
+    reference_chapters.mkdir(parents=True)
925
+    (reference_chapters / "01-introduction.html").write_text("<h1>Introduction</h1>\n")
926
+
927
+    guide_root = temp_dir / "guides" / "nginx"
928
+    chapters = guide_root / "chapters"
929
+    chapters.mkdir(parents=True)
930
+    index_path = guide_root / "index.html"
931
+    index_path.write_text("<html></html>\n")
932
+
933
+    implementation_plan = temp_dir / "implementation.md"
934
+    implementation_plan.write_text(
935
+        "\n".join(
936
+            [
937
+                "# Implementation Plan",
938
+                "",
939
+                "## File Changes",
940
+                f"- `{guide_root}/`",
941
+                f"- `{chapters}/`",
942
+                f"- `{index_path}`",
943
+                "",
944
+            ]
945
+        )
946
+    )
947
+
948
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
949
+    dod.implementation_plan = str(implementation_plan)
950
+    dod.touched_files.append(str(index_path))
951
+    dod.pending_items.append("Write the introduction chapter")
952
+    context.session.append(
953
+        Message(
954
+            role=Role.ASSISTANT,
955
+            content="",
956
+            tool_calls=[
957
+                ToolCall(
958
+                    id="read-ref-1",
959
+                    name="read",
960
+                    arguments={"file_path": str(reference_chapters / "01-introduction.html")},
961
+                )
962
+            ],
963
+        )
964
+    )
965
+
966
+    decision = repairer.handle_empty_response(
967
+        task="Create a multi-file nginx guide.",
968
+        original_task=None,
969
+        empty_retry_count=1,
970
+        max_empty_retries=2,
971
+        dod=dod,
972
+    )
973
+
974
+    assert decision.should_continue is True
975
+    assert decision.retry_message is not None
976
+    assert "Next missing planned artifact: `chapters/`" in decision.retry_message
977
+    assert "Next observed output pattern under `chapters/`: `01-introduction.html`" in decision.retry_message
978
+    assert (
979
+        "Resume with this exact next step: continue `Write the introduction chapter` "
980
+        "by creating `01-introduction.html`."
981
+        in decision.retry_message
982
+    )
983
+    assert (
984
+        "It mirrors the observed filename pattern from another `chapters/` directory "
985
+        "you already inspected."
986
+        in decision.retry_message
987
+    )
tests/test_tool_batches.pymodified
@@ -2884,6 +2884,143 @@ async def test_tool_batch_runner_todowrite_with_declared_child_targets_names_nex
28842884
     assert "Do not spend the next turn on TodoWrite alone" in message
28852885
 
28862886
 
2887
+@pytest.mark.asyncio
2888
+async def test_tool_batch_runner_todowrite_uses_observed_sibling_pattern_for_next_file(
2889
+    temp_dir: Path,
2890
+) -> None:
2891
+    async def assess_confidence(
2892
+        tool_name: str,
2893
+        tool_args: dict,
2894
+        context: str,
2895
+    ) -> ConfidenceAssessment:
2896
+        raise AssertionError("Confidence scoring should not run in this scenario")
2897
+
2898
+    async def verify_action(
2899
+        tool_name: str,
2900
+        tool_args: dict,
2901
+        result: str,
2902
+        expected: str = "",
2903
+    ) -> ActionVerification:
2904
+        raise AssertionError("Verification should not run in this scenario")
2905
+
2906
+    reference_chapters = temp_dir / "fortran" / "chapters"
2907
+    reference_chapters.mkdir(parents=True)
2908
+    (reference_chapters / "01-introduction.html").write_text("<h1>Introduction</h1>\n")
2909
+
2910
+    guide_root = temp_dir / "guides" / "nginx"
2911
+    chapters = guide_root / "chapters"
2912
+    guide_root.mkdir(parents=True)
2913
+    chapters.mkdir()
2914
+    index_path = guide_root / "index.html"
2915
+    index_path.write_text("<html></html>\n")
2916
+
2917
+    implementation_plan = temp_dir / "implementation.md"
2918
+    implementation_plan.write_text(
2919
+        "\n".join(
2920
+            [
2921
+                "# Implementation Plan",
2922
+                "",
2923
+                "## File Changes",
2924
+                f"- `{guide_root}/`",
2925
+                f"- `{chapters}/`",
2926
+                f"- `{index_path}`",
2927
+                "",
2928
+            ]
2929
+        )
2930
+    )
2931
+
2932
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
2933
+    dod.implementation_plan = str(implementation_plan)
2934
+    dod.pending_items = [
2935
+        "Write the introduction chapter",
2936
+        "Complete the requested work",
2937
+    ]
2938
+    dod.touched_files.append(str(index_path))
2939
+
2940
+    queued_messages: list[str] = []
2941
+    context = build_context(
2942
+        temp_dir=temp_dir,
2943
+        messages=[
2944
+            Message(
2945
+                role=Role.ASSISTANT,
2946
+                content="",
2947
+                tool_calls=[
2948
+                    ToolCall(
2949
+                        id="read-ref-1",
2950
+                        name="read",
2951
+                        arguments={"file_path": str(reference_chapters / "01-introduction.html")},
2952
+                    )
2953
+                ],
2954
+            )
2955
+        ],
2956
+        safeguards=FakeSafeguards(),
2957
+        assess_confidence=assess_confidence,
2958
+        verify_action=verify_action,
2959
+        auto_recover=False,
2960
+    )
2961
+    context.queue_steering_message_callback = queued_messages.append
2962
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
2963
+
2964
+    tool_call = ToolCall(
2965
+        id="todo-observed-1",
2966
+        name="TodoWrite",
2967
+        arguments={
2968
+            "todos": [
2969
+                {
2970
+                    "content": "Write the introduction chapter",
2971
+                    "activeForm": "Writing the introduction chapter",
2972
+                    "status": "pending",
2973
+                }
2974
+            ]
2975
+        },
2976
+    )
2977
+    executor = FakeExecutor(
2978
+        [
2979
+            tool_outcome(
2980
+                tool_call=tool_call,
2981
+                output="Todos updated",
2982
+                is_error=False,
2983
+                metadata={
2984
+                    "new_todos": [
2985
+                        {
2986
+                            "content": "Write the introduction chapter",
2987
+                            "active_form": "Writing the introduction chapter",
2988
+                            "status": "pending",
2989
+                        }
2990
+                    ]
2991
+                },
2992
+            )
2993
+        ]
2994
+    )
2995
+
2996
+    summary = TurnSummary(final_response="")
2997
+    await runner.execute_batch(
2998
+        tool_calls=[tool_call],
2999
+        tool_source="assistant",
3000
+        pending_tool_calls_seen=set(),
3001
+        emit=_noop_emit,
3002
+        summary=summary,
3003
+        dod=dod,
3004
+        executor=executor,  # type: ignore[arg-type]
3005
+        on_confirmation=None,
3006
+        on_user_question=None,
3007
+        emit_confirmation=None,
3008
+        consecutive_errors=0,
3009
+    )
3010
+
3011
+    assert queued_messages
3012
+    message = queued_messages[-1]
3013
+    assert "Todo tracking is updated. An explicitly planned artifact is still missing." in message
3014
+    assert "Continue with the next pending item: `Write the introduction chapter`." in message
3015
+    assert "Resume by creating `01-introduction.html` now." in message
3016
+    assert (
3017
+        "It mirrors the observed filename pattern from another `chapters/` directory "
3018
+        "you already inspected."
3019
+        in message
3020
+    )
3021
+    assert "01-introduction.html` instead of more rereads." in message
3022
+
3023
+
28873024
 @pytest.mark.asyncio
28883025
 async def test_tool_batch_runner_bookkeeping_note_with_missing_artifact_requeues_resume_step(
28893026
     temp_dir: Path,