tenseleyflow/loader / aa4c3b0

Browse files

Persist first file handoff

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
aa4c3b0ed21070560eebca04bbde26570d8f62fd
Parents
3842320
Tree
ec346af

2 changed files

StatusFile+-
M src/loader/runtime/tool_batches.py 95 14
M tests/test_tool_batches.py 116 0
src/loader/runtime/tool_batches.pymodified
@@ -1106,9 +1106,19 @@ class ToolBatchRunner:
11061106
             dod,
11071107
             project_root=self.context.project_root,
11081108
         )
1109
+        resume_suffix = _pending_item_resume_suffix(
1110
+            dod,
1111
+            next_pending=next_pending,
1112
+            missing_artifact=missing_artifact,
1113
+            project_root=self.context.project_root,
1114
+            messages=list(getattr(self.context.session, "messages", []) or []),
1115
+        )
11091116
         queue_message = (
11101117
             self.context.queue_steering_message
1111
-            if not has_file_artifact_progress
1118
+            if _should_use_persistent_missing_artifact_handoff(
1119
+                dod,
1120
+                project_root=self.context.project_root,
1121
+            )
11121122
             else self.context.queue_ephemeral_steering_message
11131123
         )
11141124
         todo_refresh = _todo_refresh_guidance(
@@ -1134,22 +1144,14 @@ class ToolBatchRunner:
11341144
         ):
11351145
             queue_message(
11361146
                 f"Confirmed progress: {current_label} is now recorded."
1137
-                + _missing_artifact_resume_suffix(
1138
-                    missing_artifact,
1139
-                    project_root=self.context.project_root,
1140
-                    messages=list(getattr(self.context.session, "messages", []) or []),
1141
-                )
1147
+                + resume_suffix
11421148
                 + " No TodoWrite, no verification, no rereads until that artifact exists."
11431149
             )
11441150
             return
11451151
         queue_message(
11461152
             f"Confirmed progress: {current_label} is now recorded."
11471153
             " One declared output artifact is still missing."
1148
-            + _missing_artifact_resume_suffix(
1149
-                missing_artifact,
1150
-                project_root=self.context.project_root,
1151
-                messages=list(getattr(self.context.session, "messages", []) or []),
1152
-            )
1154
+            + resume_suffix
11531155
             + todo_refresh
11541156
             + " Do not move to verification, final confirmation, or TodoWrite-only "
11551157
             "bookkeeping until that artifact exists."
@@ -1527,6 +1529,15 @@ def _has_confirmed_file_artifact_progress(
15271529
     *,
15281530
     project_root: Path,
15291531
 ) -> bool:
1532
+    return _confirmed_file_artifact_count(dod, project_root=project_root) > 0
1533
+
1534
+
1535
+def _confirmed_file_artifact_count(
1536
+    dod: DefinitionOfDone,
1537
+    *,
1538
+    project_root: Path,
1539
+) -> int:
1540
+    count = 0
15301541
     for target, expect_directory in collect_planned_artifact_targets(
15311542
         dod,
15321543
         project_root=project_root,
@@ -1540,14 +1551,61 @@ def _has_confirmed_file_artifact_progress(
15401551
             expect_directory=False,
15411552
             project_root=project_root,
15421553
         ):
1543
-            return True
1544
-    return any(
1545
-        Path(path).expanduser().resolve(strict=False).suffix
1554
+            count += 1
1555
+    if count:
1556
+        return count
1557
+    return sum(
1558
+        1
15461559
         for path in dod.touched_files
15471560
         if str(path).strip()
1561
+        and Path(path).expanduser().resolve(strict=False).suffix
15481562
     )
15491563
 
15501564
 
1565
+def _should_use_persistent_missing_artifact_handoff(
1566
+    dod: DefinitionOfDone,
1567
+    *,
1568
+    project_root: Path,
1569
+) -> bool:
1570
+    return _confirmed_file_artifact_count(
1571
+        dod,
1572
+        project_root=project_root,
1573
+    ) < 2
1574
+
1575
+
1576
+def _next_missing_planned_file_within_directory(
1577
+    dod: DefinitionOfDone,
1578
+    *,
1579
+    target: Path,
1580
+    project_root: Path,
1581
+) -> Path | None:
1582
+    normalized_target = target.expanduser().resolve(strict=False)
1583
+    if normalized_target.suffix:
1584
+        return None
1585
+
1586
+    for planned_target, expect_directory in collect_planned_artifact_targets(
1587
+        dod,
1588
+        project_root=project_root,
1589
+        max_paths=12,
1590
+    ):
1591
+        if expect_directory:
1592
+            continue
1593
+        normalized_planned = planned_target.expanduser().resolve(strict=False)
1594
+        try:
1595
+            normalized_planned.relative_to(normalized_target)
1596
+        except ValueError:
1597
+            continue
1598
+        if planned_artifact_target_satisfied(
1599
+            dod,
1600
+            target=normalized_planned,
1601
+            expect_directory=False,
1602
+            project_root=project_root,
1603
+        ):
1604
+            continue
1605
+        return normalized_planned
1606
+    return None
1607
+
1608
+
15511609
 def _missing_artifact_resume_suffix(
15521610
     missing_artifact: tuple[Path, bool] | None,
15531611
     *,
@@ -1589,6 +1647,21 @@ def _pending_item_resume_suffix(
15891647
                 messages=messages,
15901648
                 allow_inferred_child=False,
15911649
             )
1650
+    if missing_artifact is not None and missing_artifact[1]:
1651
+        next_planned_file = _next_missing_planned_file_within_directory(
1652
+            dod,
1653
+            target=missing_artifact[0],
1654
+            project_root=project_root,
1655
+        )
1656
+        if next_planned_file is not None:
1657
+            parent_label = missing_artifact[0].name or str(missing_artifact[0])
1658
+            return (
1659
+                f" Resume by creating `{next_planned_file.name}` now."
1660
+                f" It is the next missing declared output under `{parent_label}/`."
1661
+                f" Prefer one `write` call for `{next_planned_file}` instead of more rereads."
1662
+                " Make your next response the concrete mutation tool call itself, not another"
1663
+                " bookkeeping-only turn."
1664
+            )
15921665
     return _missing_artifact_resume_suffix(
15931666
         missing_artifact,
15941667
         project_root=project_root,
@@ -1621,6 +1694,14 @@ def _preferred_resume_target_path(
16211694
     if not expect_directory:
16221695
         return normalized_target
16231696
 
1697
+    next_planned_file = _next_missing_planned_file_within_directory(
1698
+        dod,
1699
+        target=normalized_target,
1700
+        project_root=project_root,
1701
+    )
1702
+    if next_planned_file is not None:
1703
+        return next_planned_file.expanduser().resolve(strict=False)
1704
+
16241705
     next_output_file, _ = infer_next_output_file(
16251706
         target=normalized_target,
16261707
         project_root=project_root,
tests/test_tool_batches.pymodified
@@ -2132,6 +2132,122 @@ async def test_tool_batch_runner_missing_artifact_nudge_prefers_pending_index_af
21322132
     assert ephemeral_messages == []
21332133
 
21342134
 
2135
+@pytest.mark.asyncio
2136
+async def test_tool_batch_runner_first_file_handoff_stays_persistent(
2137
+    temp_dir: Path,
2138
+) -> None:
2139
+    async def assess_confidence(
2140
+        tool_name: str,
2141
+        tool_args: dict,
2142
+        context: str,
2143
+    ) -> ConfidenceAssessment:
2144
+        raise AssertionError("Confidence scoring should be disabled in this scenario")
2145
+
2146
+    async def verify_action(
2147
+        tool_name: str,
2148
+        tool_args: dict,
2149
+        result: str,
2150
+        expected: str = "",
2151
+    ) -> ActionVerification:
2152
+        raise AssertionError("Verification should not run for this scenario")
2153
+
2154
+    nginx_root = temp_dir / "guides" / "nginx"
2155
+    chapters = nginx_root / "chapters"
2156
+    chapters.mkdir(parents=True)
2157
+    index_path = nginx_root / "index.html"
2158
+
2159
+    implementation_plan = temp_dir / "implementation.md"
2160
+    implementation_plan.write_text(
2161
+        "\n".join(
2162
+            [
2163
+                "# Implementation Plan",
2164
+                "",
2165
+                "## File Changes",
2166
+                f"- `{chapters}/`",
2167
+                f"- `{index_path}`",
2168
+                f"- `{chapters / '01-introduction.html'}`",
2169
+                "",
2170
+            ]
2171
+        )
2172
+    )
2173
+
2174
+    context = build_context(
2175
+        temp_dir=temp_dir,
2176
+        messages=[],
2177
+        safeguards=FakeSafeguards(),
2178
+        assess_confidence=assess_confidence,
2179
+        verify_action=verify_action,
2180
+        auto_recover=False,
2181
+    )
2182
+    persistent_messages: list[str] = []
2183
+    ephemeral_messages: list[str] = []
2184
+    context.queue_steering_message_callback = persistent_messages.append
2185
+    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
2186
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
2187
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
2188
+    dod.implementation_plan = str(implementation_plan)
2189
+    sync_todos_to_definition_of_done(
2190
+        dod,
2191
+        [
2192
+            {
2193
+                "content": "Create the main index.html file with proper structure",
2194
+                "active_form": "Creating the main index.html file with proper structure",
2195
+                "status": "pending",
2196
+            },
2197
+            {
2198
+                "content": "Create each chapter file with appropriate content",
2199
+                "active_form": "Creating each chapter file with appropriate content",
2200
+                "status": "pending",
2201
+            },
2202
+        ],
2203
+    )
2204
+
2205
+    tool_call = ToolCall(
2206
+        id="write-index",
2207
+        name="write",
2208
+        arguments={
2209
+            "file_path": str(index_path),
2210
+            "content": "<html></html>\n",
2211
+        },
2212
+    )
2213
+    executor = FakeExecutor(
2214
+        [
2215
+            tool_outcome(
2216
+                tool_call=tool_call,
2217
+                output=f"Successfully wrote 14 bytes to {index_path}",
2218
+                is_error=False,
2219
+            )
2220
+        ]
2221
+    )
2222
+
2223
+    summary = TurnSummary(final_response="")
2224
+    await runner.execute_batch(
2225
+        tool_calls=[tool_call],
2226
+        tool_source="assistant",
2227
+        pending_tool_calls_seen=set(),
2228
+        emit=_noop_emit,
2229
+        summary=summary,
2230
+        dod=dod,
2231
+        executor=executor,  # type: ignore[arg-type]
2232
+        on_confirmation=None,
2233
+        on_user_question=None,
2234
+        emit_confirmation=None,
2235
+        consecutive_errors=0,
2236
+    )
2237
+
2238
+    assert persistent_messages
2239
+    message = persistent_messages[-1]
2240
+    assert "Confirmed progress:" in message
2241
+    assert "Resume by creating `01-introduction.html` now." in message
2242
+    assert (
2243
+        f"Prefer one `write` call for `{(chapters / '01-introduction.html').resolve(strict=False)}` "
2244
+        "instead of more rereads."
2245
+        in message
2246
+    )
2247
+    assert "Do not move to verification, final confirmation, or TodoWrite-only bookkeeping" in message
2248
+    assert ephemeral_messages == []
2249
+
2250
+
21352251
 @pytest.mark.asyncio
21362252
 async def test_duplicate_observation_nudge_prioritizes_missing_artifact_over_review(
21372253
     temp_dir: Path,