tenseleyflow/loader / 8331de4

Browse files

Tighten late-stage build nudges

Authored by espadonne
SHA
8331de4e0a4e71229c4b667413521822169c53c8
Parents
d913493
Tree
8b87952

4 changed files

StatusFile+-
M src/loader/runtime/repair.py 27 0
M src/loader/runtime/tool_batches.py 37 0
M tests/test_repair.py 81 0
M tests/test_tool_batches.py 120 0
src/loader/runtime/repair.pymodified
@@ -239,6 +239,29 @@ class ResponseRepairer:
239239
         retry_number: int,
240240
         max_empty_retries: int,
241241
     ) -> str:
242
+        if dod is not None and self._should_compact_empty_retry_message(dod):
243
+            compact_lines: list[str] = []
244
+            compact_lines.extend(self._planned_artifact_progress_lines(dod)[:2])
245
+            compact_lines.extend(
246
+                self._next_step_resume_lines(
247
+                    dod,
248
+                    retry_number=retry_number,
249
+                )
250
+            )
251
+            return "\n".join(
252
+                [
253
+                    "[EMPTY ASSISTANT RESPONSE]",
254
+                    (
255
+                        "Your last response was empty "
256
+                        f"(retry {retry_number}/{max_empty_retries}). Continue from the "
257
+                        "exact next step below."
258
+                    ),
259
+                    *[f"- {line}" for line in compact_lines],
260
+                    "",
261
+                    "Respond with that concrete mutation tool call now. Do not return an empty response.",
262
+                ]
263
+            )
264
+
242265
         progress_lines: list[str] = []
243266
         if dod is not None:
244267
             reconcile_aggregate_completion_steps(
@@ -343,6 +366,10 @@ class ResponseRepairer:
343366
             return base_max_empty_retries
344367
         return base_max_empty_retries + _LATE_STAGE_EMPTY_RETRY_EXTRA
345368
 
369
+    def _should_compact_empty_retry_message(self, dod: DefinitionOfDone) -> bool:
370
+        completed_artifacts, missing_artifacts = self._planned_artifact_counts(dod)
371
+        return completed_artifacts >= 7 and missing_artifacts > 0
372
+
346373
     def _planned_artifact_counts(self, dod: DefinitionOfDone) -> tuple[int, int]:
347374
         completed = 0
348375
         missing = 0
src/loader/runtime/tool_batches.pymodified
@@ -913,6 +913,19 @@ class ToolBatchRunner:
913913
             dod,
914914
             project_root=self.context.project_root,
915915
         )
916
+        if _late_stage_missing_artifact_build(
917
+            dod,
918
+            project_root=self.context.project_root,
919
+        ):
920
+            self.context.queue_steering_message(
921
+                f"Confirmed progress: {current_label} is now recorded."
922
+                + _missing_artifact_resume_suffix(
923
+                    missing_artifact,
924
+                    project_root=self.context.project_root,
925
+                )
926
+                + " No TodoWrite, no verification, no rereads until that artifact exists."
927
+            )
928
+            return
916929
         self.context.queue_steering_message(
917930
             f"Confirmed progress: {current_label} is now recorded."
918931
             " One explicitly planned artifact is still missing."
@@ -1129,6 +1142,30 @@ def _next_missing_planned_artifact(
11291142
     return None
11301143
 
11311144
 
1145
+def _late_stage_missing_artifact_build(
1146
+    dod: DefinitionOfDone,
1147
+    *,
1148
+    project_root: Path,
1149
+) -> bool:
1150
+    completed = 0
1151
+    missing = 0
1152
+    for target, expect_directory in collect_planned_artifact_targets(
1153
+        dod,
1154
+        project_root=project_root,
1155
+        max_paths=12,
1156
+    ):
1157
+        if planned_artifact_target_satisfied(
1158
+            dod,
1159
+            target=target,
1160
+            expect_directory=expect_directory,
1161
+            project_root=project_root,
1162
+        ):
1163
+            completed += 1
1164
+        else:
1165
+            missing += 1
1166
+    return completed >= 7 and missing > 0
1167
+
1168
+
11321169
 def _missing_artifact_resume_suffix(
11331170
     missing_artifact: tuple[Path, bool] | None,
11341171
     *,
tests/test_repair.pymodified
@@ -459,6 +459,87 @@ def test_empty_response_retry_budget_extends_for_late_stage_multi_artifact_progr
459459
     assert "Follow the same one-file-at-a-time mutation pattern" in decision.retry_message
460460
 
461461
 
462
+def test_empty_response_retry_uses_compact_prompt_after_substantial_progress(
463
+    temp_dir: Path,
464
+) -> None:
465
+    context = build_context(
466
+        temp_dir=temp_dir,
467
+        use_react=False,
468
+    )
469
+    context.session.messages.append(
470
+        SimpleNamespace(
471
+            content=(
472
+                "Observation [notepad_write_working]: Result: "
473
+                "- [2026-04-23T19:00:00Z] Creating fifth chapter file: Advanced features"
474
+            )
475
+        )
476
+    )
477
+    repairer = ResponseRepairer(context)
478
+
479
+    guide_root = temp_dir / "guides" / "nginx"
480
+    chapters = guide_root / "chapters"
481
+    chapters.mkdir(parents=True)
482
+    index_path = guide_root / "index.html"
483
+    chapter_one = chapters / "01-getting-started.html"
484
+    chapter_two = chapters / "02-installation.html"
485
+    chapter_three = chapters / "03-first-website.html"
486
+    chapter_four = chapters / "04-configuration-basics.html"
487
+    chapter_five = chapters / "05-advanced-features.html"
488
+    index_path.write_text("<html></html>\n")
489
+    chapter_one.write_text("<h1>One</h1>\n")
490
+    chapter_two.write_text("<h1>Two</h1>\n")
491
+    chapter_three.write_text("<h1>Three</h1>\n")
492
+    chapter_four.write_text("<h1>Four</h1>\n")
493
+
494
+    implementation_plan = temp_dir / "implementation.md"
495
+    implementation_plan.write_text(
496
+        "\n".join(
497
+            [
498
+                "# Implementation Plan",
499
+                "",
500
+                "## File Changes",
501
+                f"- `{guide_root}/`",
502
+                f"- `{chapters}/`",
503
+                f"- `{index_path}`",
504
+                f"- `{chapter_one}`",
505
+                f"- `{chapter_two}`",
506
+                f"- `{chapter_three}`",
507
+                f"- `{chapter_four}`",
508
+                f"- `{chapter_five}`",
509
+                "",
510
+            ]
511
+        )
512
+    )
513
+
514
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
515
+    dod.implementation_plan = str(implementation_plan)
516
+    dod.touched_files.extend(
517
+        [str(index_path), str(chapter_one), str(chapter_two), str(chapter_three)]
518
+    )
519
+    dod.completed_items.extend(
520
+        [
521
+            "Create the directory structure for the new nginx guide",
522
+            "Create the main index.html file with proper structure",
523
+        ]
524
+    )
525
+    dod.pending_items.append("Create each chapter file in sequence")
526
+
527
+    decision = repairer.handle_empty_response(
528
+        task="Create a multi-file nginx guide.",
529
+        original_task=None,
530
+        empty_retry_count=3,
531
+        max_empty_retries=2,
532
+        dod=dod,
533
+    )
534
+
535
+    assert decision.should_continue is True
536
+    assert decision.retry_message is not None
537
+    assert "Continue from the exact next step below." in decision.retry_message
538
+    assert "Latest working note:" not in decision.retry_message
539
+    assert "Confirmed completed work:" not in decision.retry_message
540
+    assert "Next pending item:" not in decision.retry_message
541
+
542
+
462543
 def test_empty_response_retry_points_at_next_output_file_when_planned_directory_is_empty(
463544
     temp_dir: Path,
464545
 ) -> None:
tests/test_tool_batches.pymodified
@@ -2183,6 +2183,126 @@ async def test_tool_batch_runner_large_plan_does_not_claim_completion_early(
21832183
     )
21842184
 
21852185
 
2186
+@pytest.mark.asyncio
2187
+async def test_tool_batch_runner_uses_compact_missing_artifact_nudge_after_substantial_progress(
2188
+    temp_dir: Path,
2189
+) -> None:
2190
+    async def assess_confidence(
2191
+        tool_name: str,
2192
+        tool_args: dict,
2193
+        context: str,
2194
+    ) -> ConfidenceAssessment:
2195
+        raise AssertionError("Confidence scoring should not run in this scenario")
2196
+
2197
+    async def verify_action(
2198
+        tool_name: str,
2199
+        tool_args: dict,
2200
+        result: str,
2201
+        expected: str = "",
2202
+    ) -> ActionVerification:
2203
+        raise AssertionError("Verification should not run in this scenario")
2204
+
2205
+    guide_root = temp_dir / "guides" / "nginx"
2206
+    chapters = guide_root / "chapters"
2207
+    guide_root.mkdir(parents=True)
2208
+    chapters.mkdir()
2209
+    index_path = guide_root / "index.html"
2210
+    chapter_paths = [
2211
+        chapters / "01-introduction.html",
2212
+        chapters / "02-installation.html",
2213
+        chapters / "03-configuration.html",
2214
+        chapters / "04-basic-usage.html",
2215
+        chapters / "05-advanced-features.html",
2216
+    ]
2217
+    for path in (index_path, *chapter_paths[:4]):
2218
+        path.write_text("<html></html>\n")
2219
+
2220
+    implementation_plan = temp_dir / "implementation.md"
2221
+    implementation_plan.write_text(
2222
+        "\n".join(
2223
+            [
2224
+                "# Implementation Plan",
2225
+                "",
2226
+                "## File Changes",
2227
+                f"- `{guide_root}/`",
2228
+                f"- `{chapters}/`",
2229
+                f"- `{index_path}`",
2230
+                *[f"- `{path}`" for path in chapter_paths],
2231
+                "",
2232
+            ]
2233
+        )
2234
+    )
2235
+
2236
+    context = build_context(
2237
+        temp_dir=temp_dir,
2238
+        messages=[],
2239
+        safeguards=FakeSafeguards(),
2240
+        assess_confidence=assess_confidence,
2241
+        verify_action=verify_action,
2242
+        auto_recover=False,
2243
+    )
2244
+    queued_messages: list[str] = []
2245
+    context.queue_steering_message_callback = queued_messages.append
2246
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
2247
+    dod = create_definition_of_done("Create a thorough nginx guide.")
2248
+    dod.implementation_plan = str(implementation_plan)
2249
+    dod.touched_files.extend(str(path) for path in (index_path, *chapter_paths[:4]))
2250
+    dod.completed_items.extend(
2251
+        [
2252
+            "Create the nginx directory structure",
2253
+            "Create the main index.html file with proper structure",
2254
+        ]
2255
+    )
2256
+    sync_todos_to_definition_of_done(
2257
+        dod,
2258
+        [
2259
+            {
2260
+                "content": "Create each chapter file with appropriate content",
2261
+                "active_form": "Creating each chapter file with appropriate content",
2262
+                "status": "pending",
2263
+            }
2264
+        ],
2265
+    )
2266
+    tool_call = ToolCall(
2267
+        id="write-chapter-04",
2268
+        name="write",
2269
+        arguments={
2270
+            "file_path": str(chapter_paths[3]),
2271
+            "content": "<html>updated</html>\n",
2272
+        },
2273
+    )
2274
+    executor = FakeExecutor(
2275
+        [
2276
+            tool_outcome(
2277
+                tool_call=tool_call,
2278
+                output=f"Successfully wrote {chapter_paths[3]}",
2279
+                is_error=False,
2280
+            )
2281
+        ]
2282
+    )
2283
+
2284
+    summary = TurnSummary(final_response="")
2285
+    await runner.execute_batch(
2286
+        tool_calls=[tool_call],
2287
+        tool_source="assistant",
2288
+        pending_tool_calls_seen=set(),
2289
+        emit=_noop_emit,
2290
+        summary=summary,
2291
+        dod=dod,
2292
+        executor=executor,  # type: ignore[arg-type]
2293
+        on_confirmation=None,
2294
+        on_user_question=None,
2295
+        emit_confirmation=None,
2296
+        consecutive_errors=0,
2297
+    )
2298
+
2299
+    assert queued_messages
2300
+    message = queued_messages[-1]
2301
+    assert "Resume by creating `05-advanced-features.html` now." in message
2302
+    assert "No TodoWrite, no verification, no rereads until that artifact exists." in message
2303
+    assert "refresh `TodoWrite`" not in message
2304
+
2305
+
21862306
 @pytest.mark.asyncio
21872307
 async def test_tool_batch_runner_todowrite_with_missing_artifact_requeues_exact_resume_step(
21882308
     temp_dir: Path,