tenseleyflow/loader / 7944195

Browse files

Keep quality repairs active

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
7944195c93f4dab6bbe17927eb588042628c9438
Parents
9097b6f
Tree
eac3f59

2 changed files

StatusFile+-
M src/loader/runtime/tool_batches.py 78 0
M tests/test_tool_batches.py 237 0
src/loader/runtime/tool_batches.pymodified
@@ -534,6 +534,9 @@ class ToolBatchRunner:
534
     ) -> str | None:
534
     ) -> str | None:
535
         if tool_call.name != "TodoWrite" or not single_tool_batch:
535
         if tool_call.name != "TodoWrite" or not single_tool_batch:
536
             return None
536
             return None
537
+        repair = extract_active_repair_context(self.context.session.messages)
538
+        if repair is not None and _repair_context_is_html_quality(repair):
539
+            return None
537
         if not all_planned_artifact_outputs_exist(
540
         if not all_planned_artifact_outputs_exist(
538
             dod,
541
             dod,
539
             project_root=self.context.project_root,
542
             project_root=self.context.project_root,
@@ -1164,6 +1167,27 @@ class ToolBatchRunner:
1164
         if not target:
1167
         if not target:
1165
             return
1168
             return
1166
 
1169
 
1170
+        if _repair_context_is_html_quality(repair):
1171
+            repair_issue = _quality_repair_issue_for_target(repair, target)
1172
+            issue_line = (
1173
+                f"- {repair_issue}\n"
1174
+                if repair_issue
1175
+                else f"- Improve `{target}` until it satisfies the active content-quality verifier.\n"
1176
+            )
1177
+            self.context.queue_steering_message(
1178
+                "That edit would make no on-disk change. "
1179
+                f"`{target}` already matches the change you attempted, but the active "
1180
+                "content-quality repair is not complete until a verifier-targeted "
1181
+                "mutation actually changes the file. Do not mark the task complete, "
1182
+                "do not use TodoWrite as a substitute for repair, and do not finish yet.\n\n"
1183
+                "Repair focus:\n"
1184
+                f"{issue_line}"
1185
+                f"- Immediate next step: edit `{target}`.\n"
1186
+                "- Submit one `edit`, `patch`, or `write` call that adds substantial "
1187
+                "new content or structure to the current generated file."
1188
+            )
1189
+            return
1190
+
1167
         verification_commands = dod.verification_commands or derive_verification_commands(
1191
         verification_commands = dod.verification_commands or derive_verification_commands(
1168
             dod,
1192
             dod,
1169
             project_root=self.context.project_root,
1193
             project_root=self.context.project_root,
@@ -1973,6 +1997,33 @@ class ToolBatchRunner:
1973
         *,
1997
         *,
1974
         dod: DefinitionOfDone,
1998
         dod: DefinitionOfDone,
1975
     ) -> None:
1999
     ) -> None:
2000
+        repair = extract_active_repair_context(self.context.session.messages)
2001
+        if repair is not None and _repair_context_is_html_quality(repair):
2002
+            target = repair.artifact_path or (
2003
+                repair.allowed_paths[0] if repair.allowed_paths else ""
2004
+            )
2005
+            if not target:
2006
+                return
2007
+            repair_issue = _quality_repair_issue_for_target(repair, target)
2008
+            issue_line = (
2009
+                f"- {repair_issue}\n"
2010
+                if repair_issue
2011
+                else f"- Improve `{target}` until it satisfies the active content-quality verifier.\n"
2012
+            )
2013
+            self.context.set_workflow_mode("execute")
2014
+            self.context.queue_steering_message(
2015
+                "Todo tracking is updated, but verification still has an active "
2016
+                "HTML content-quality repair. TodoWrite cannot satisfy that verifier "
2017
+                "or close the repair by itself. Do not mark the task complete and do "
2018
+                "not finish yet.\n\n"
2019
+                "Repair focus:\n"
2020
+                f"{issue_line}"
2021
+                f"- Immediate next step: edit `{target}`.\n"
2022
+                "- Continue with one concrete `edit`, `patch`, or `write` call that "
2023
+                "actually changes the current generated file."
2024
+            )
2025
+            return
2026
+
1976
         session_messages = list(getattr(self.context.session, "messages", []) or [])
2027
         session_messages = list(getattr(self.context.session, "messages", []) or [])
1977
         missing_artifact = _next_missing_planned_artifact(
2028
         missing_artifact = _next_missing_planned_artifact(
1978
             dod,
2029
             dod,
@@ -3384,6 +3435,33 @@ def _active_repair_focus_preview(repair_lines: list[str], *, max_lines: int = 4)
3384
     return "; ".join(preview)
3435
     return "; ".join(preview)
3385
 
3436
 
3386
 
3437
 
3438
+def _quality_repair_issue_for_target(repair: Any, target: str) -> str:
3439
+    """Return the most relevant content-quality repair line for a target path."""
3440
+
3441
+    target_text = str(target or "").strip()
3442
+    try:
3443
+        normalized_target = str(Path(target_text).expanduser().resolve(strict=False))
3444
+    except (OSError, RuntimeError, ValueError):
3445
+        normalized_target = str(Path(target_text).expanduser()) if target_text else ""
3446
+
3447
+    first_quality_line = ""
3448
+    for raw_line in getattr(repair, "repair_lines", ()) or ():
3449
+        line = str(raw_line or "").strip()
3450
+        if not line:
3451
+            continue
3452
+        clean_line = line[2:].strip() if line.startswith("- ") else line
3453
+        if not _repair_line_is_html_quality(clean_line):
3454
+            continue
3455
+        if not first_quality_line:
3456
+            first_quality_line = clean_line
3457
+        if target_text and target_text in clean_line:
3458
+            return clean_line
3459
+        if normalized_target and normalized_target in clean_line:
3460
+            return clean_line
3461
+
3462
+    return first_quality_line
3463
+
3464
+
3387
 def _tool_call_label(tool_call: ToolCall) -> str:
3465
 def _tool_call_label(tool_call: ToolCall) -> str:
3388
     """Human-readable label for one tool call."""
3466
     """Human-readable label for one tool call."""
3389
     name = tool_call.name
3467
     name = tool_call.name
tests/test_tool_batches.pymodified
@@ -4276,6 +4276,151 @@ async def test_tool_batch_runner_todowrite_after_outputs_exist_but_links_missing
4276
     assert context.workflow_mode == "verify"
4276
     assert context.workflow_mode == "verify"
4277
 
4277
 
4278
 
4278
 
4279
+@pytest.mark.asyncio
4280
+async def test_tool_batch_runner_todowrite_during_quality_repair_requires_mutation(
4281
+    temp_dir: Path,
4282
+) -> None:
4283
+    async def assess_confidence(
4284
+        tool_name: str,
4285
+        tool_args: dict,
4286
+        context: str,
4287
+    ) -> ConfidenceAssessment:
4288
+        raise AssertionError("Confidence scoring should not run for this scenario")
4289
+
4290
+    async def verify_action(
4291
+        tool_name: str,
4292
+        tool_args: dict,
4293
+        result: str,
4294
+        expected: str = "",
4295
+    ) -> ActionVerification:
4296
+        raise AssertionError("Verification should not run for this scenario")
4297
+
4298
+    guide_root = temp_dir / "guides" / "nginx"
4299
+    chapters = guide_root / "chapters"
4300
+    chapters.mkdir(parents=True)
4301
+    index_path = guide_root / "index.html"
4302
+    chapter_one = chapters / "01-introduction.html"
4303
+    index_path.write_text("<html></html>\n")
4304
+    chapter_one.write_text("<html></html>\n")
4305
+
4306
+    implementation_plan = temp_dir / "implementation.md"
4307
+    implementation_plan.write_text(
4308
+        "\n".join(
4309
+            [
4310
+                "# Implementation Plan",
4311
+                "",
4312
+                "## File Changes",
4313
+                f"- `{guide_root}/`",
4314
+                f"- `{chapters}/`",
4315
+                f"- `{index_path}`",
4316
+                f"- `{chapter_one}`",
4317
+                "",
4318
+            ]
4319
+        )
4320
+    )
4321
+
4322
+    context = build_context(
4323
+        temp_dir=temp_dir,
4324
+        messages=[
4325
+            Message(
4326
+                role=Role.USER,
4327
+                content=(
4328
+                    "Repair focus:\n"
4329
+                    f"- Improve `{chapter_one}`: thin content (409 text chars, expected at least 1758).\n"
4330
+                    f"- Improve `{chapter_one}`: insufficient structured content (6 blocks, expected at least 18).\n"
4331
+                    f"- Immediate next step: edit `{chapter_one}`.\n"
4332
+                ),
4333
+            )
4334
+        ],
4335
+        safeguards=FakeSafeguards(),
4336
+        assess_confidence=assess_confidence,
4337
+        verify_action=verify_action,
4338
+        auto_recover=False,
4339
+    )
4340
+    context.set_workflow_mode("verify")
4341
+    queued_messages: list[str] = []
4342
+    emitted_responses: list[str] = []
4343
+    context.queue_steering_message_callback = queued_messages.append
4344
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
4345
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
4346
+    dod.implementation_plan = str(implementation_plan)
4347
+    dod.verification_commands = [f"ls -la {guide_root}"]
4348
+    sync_todos_to_definition_of_done(
4349
+        dod,
4350
+        [
4351
+            {
4352
+                "content": "Expand generated chapters to satisfy quality verification",
4353
+                "active_form": "Expanding generated chapters",
4354
+                "status": "in_progress",
4355
+            }
4356
+        ],
4357
+        project_root=temp_dir,
4358
+    )
4359
+
4360
+    tool_call = ToolCall(
4361
+        id="todo-quality",
4362
+        name="TodoWrite",
4363
+        arguments={
4364
+            "todos": [
4365
+                {
4366
+                    "content": "Expand generated chapters to satisfy quality verification",
4367
+                    "active_form": "Expanding generated chapters",
4368
+                    "status": "completed",
4369
+                }
4370
+            ]
4371
+        },
4372
+    )
4373
+    executor = FakeExecutor(
4374
+        [
4375
+            tool_outcome(
4376
+                tool_call=tool_call,
4377
+                output="Todos updated",
4378
+                is_error=False,
4379
+                metadata={
4380
+                    "new_todos": [
4381
+                        {
4382
+                            "content": "Expand generated chapters to satisfy quality verification",
4383
+                            "active_form": "Expanding generated chapters",
4384
+                            "status": "completed",
4385
+                        }
4386
+                    ]
4387
+                },
4388
+            )
4389
+        ]
4390
+    )
4391
+
4392
+    async def emit(event: AgentEvent) -> None:
4393
+        if event.type == "response":
4394
+            emitted_responses.append(str(event.content))
4395
+
4396
+    summary = TurnSummary(final_response="")
4397
+    result = await runner.execute_batch(
4398
+        tool_calls=[tool_call],
4399
+        tool_source="assistant",
4400
+        pending_tool_calls_seen=set(),
4401
+        emit=emit,
4402
+        summary=summary,
4403
+        dod=dod,
4404
+        executor=executor,  # type: ignore[arg-type]
4405
+        on_confirmation=None,
4406
+        on_user_question=None,
4407
+        emit_confirmation=None,
4408
+        consecutive_errors=0,
4409
+    )
4410
+
4411
+    assert queued_messages
4412
+    message = queued_messages[-1]
4413
+    assert "verification still has an active HTML content-quality repair" in message
4414
+    assert "TodoWrite cannot satisfy that verifier" in message
4415
+    assert f"Immediate next step: edit `{chapter_one.resolve(strict=False)}`" in message
4416
+    assert "thin content" in message
4417
+    assert "Finish with a final response now" not in message
4418
+    assert context.workflow_mode == "execute"
4419
+    assert result.halted is False
4420
+    assert summary.final_response == ""
4421
+    assert not emitted_responses
4422
+
4423
+
4279
 @pytest.mark.asyncio
4424
 @pytest.mark.asyncio
4280
 async def test_tool_batch_runner_preempts_post_build_audit_after_todowrite_verify_handoff(
4425
 async def test_tool_batch_runner_preempts_post_build_audit_after_todowrite_verify_handoff(
4281
     temp_dir: Path,
4426
     temp_dir: Path,
@@ -6491,6 +6636,98 @@ def test_tool_batch_runner_blocked_noop_edit_after_full_build_prefers_verificati
6491
     assert "replace the surrounding block" not in queued[0]
6636
     assert "replace the surrounding block" not in queued[0]
6492
 
6637
 
6493
 
6638
 
6639
+def test_tool_batch_runner_blocked_noop_edit_keeps_quality_repair_active_after_full_build(
6640
+    temp_dir: Path,
6641
+) -> None:
6642
+    async def assess_confidence(
6643
+        tool_name: str,
6644
+        tool_args: dict,
6645
+        context: str,
6646
+    ) -> ConfidenceAssessment:
6647
+        raise AssertionError("Confidence scoring should be disabled in this scenario")
6648
+
6649
+    async def verify_action(
6650
+        tool_name: str,
6651
+        tool_args: dict,
6652
+        result: str,
6653
+        expected: str = "",
6654
+    ) -> ActionVerification:
6655
+        raise AssertionError("Verification should not run in this scenario")
6656
+
6657
+    guide_root = temp_dir / "guide"
6658
+    chapters = guide_root / "chapters"
6659
+    chapters.mkdir(parents=True)
6660
+    index_path = guide_root / "index.html"
6661
+    chapter_one = chapters / "01-introduction.html"
6662
+    chapter_two = chapters / "02-installation.html"
6663
+    index_path.write_text("<html></html>\n")
6664
+    chapter_one.write_text("<html></html>\n")
6665
+    chapter_two.write_text("<html></html>\n")
6666
+
6667
+    implementation_plan = temp_dir / "implementation.md"
6668
+    implementation_plan.write_text(
6669
+        "\n".join(
6670
+            [
6671
+                "# Implementation Plan",
6672
+                "",
6673
+                "## File Changes",
6674
+                f"- `{index_path}`",
6675
+                f"- `{chapter_one}`",
6676
+                f"- `{chapter_two}`",
6677
+                "",
6678
+            ]
6679
+        )
6680
+    )
6681
+
6682
+    context = build_context(
6683
+        temp_dir=temp_dir,
6684
+        messages=[
6685
+            Message(
6686
+                role=Role.USER,
6687
+                content=(
6688
+                    "Repair focus:\n"
6689
+                    f"- Improve `{chapter_two}`: thin content (504 text chars, expected at least 1758).\n"
6690
+                    f"- Improve `{chapter_two}`: insufficient structured content (6 blocks, expected at least 18).\n"
6691
+                    f"- Immediate next step: edit `{chapter_two}`.\n"
6692
+                ),
6693
+            )
6694
+        ],
6695
+        safeguards=FakeSafeguards(),
6696
+        assess_confidence=assess_confidence,
6697
+        verify_action=verify_action,
6698
+    )
6699
+    queued: list[str] = []
6700
+    context.queue_steering_message_callback = queued.append
6701
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
6702
+
6703
+    dod = create_definition_of_done("Create a multi-file guide.")
6704
+    dod.implementation_plan = str(implementation_plan)
6705
+    dod.touched_files.extend([str(index_path), str(chapter_one), str(chapter_two)])
6706
+    dod.verification_commands = [f"ls -la {guide_root}"]
6707
+
6708
+    runner._queue_blocked_html_edit_nudge(
6709
+        ToolCall(
6710
+            id="edit-1",
6711
+            name="edit",
6712
+            arguments={
6713
+                "file_path": str(chapter_two),
6714
+                "old_string": "same",
6715
+                "new_string": "same",
6716
+            },
6717
+        ),
6718
+        "[Blocked - old_string and new_string are identical - no change would occur] Suggestion: Provide different old and new strings",
6719
+        dod=dod,
6720
+    )
6721
+
6722
+    assert queued
6723
+    assert "active content-quality repair is not complete" in queued[0]
6724
+    assert "Repair focus:" in queued[0]
6725
+    assert f"Immediate next step: edit `{chapter_two}`" in queued[0]
6726
+    assert "thin content" in queued[0]
6727
+    assert "TodoWrite cannot satisfy" not in queued[0]
6728
+    assert "Finish with a final response now" not in queued[0]
6729
+
6730
+
6494
 async def _noop_emit(event: AgentEvent) -> None:
6731
 async def _noop_emit(event: AgentEvent) -> None:
6495
     return None
6732
     return None
6496
 
6733