`7944195`

Keep quality repairs active

Authored by mfwolffe <wolffemf@dukes.jmu.edu> 1 week ago

SHA: 7944195c93f4dab6bbe17927eb588042628c9438
Parents: 9097b6f
Tree: eac3f59

2 changed files

Status	File	+	-
M	`src/loader/runtime/tool_batches.py`	78	0
M	`tests/test_tool_batches.py`	237	0

src/loader/runtime/tool_batches.pymodified

      ) -> str | None:
          if tool_call.name != "TodoWrite" or not single_tool_batch:
              return None
++        repair = extract_active_repair_context(self.context.session.messages)
++        if repair is not None and _repair_context_is_html_quality(repair):
++            return None
          if not all_planned_artifact_outputs_exist(
              dod,
              project_root=self.context.project_root,
          if not target:
              return
++        if _repair_context_is_html_quality(repair):
++            repair_issue = _quality_repair_issue_for_target(repair, target)
++            issue_line = (
++                f"- {repair_issue}\n"
++                if repair_issue
++                else f"- Improve `{target}` until it satisfies the active content-quality verifier.\n"
++            )
++            self.context.queue_steering_message(
++                "That edit would make no on-disk change. "
++                f"`{target}` already matches the change you attempted, but the active "
++                "content-quality repair is not complete until a verifier-targeted "
++                "mutation actually changes the file. Do not mark the task complete, "
++                "do not use TodoWrite as a substitute for repair, and do not finish yet.\n\n"
++                "Repair focus:\n"
++                f"{issue_line}"
++                f"- Immediate next step: edit `{target}`.\n"
++                "- Submit one `edit`, `patch`, or `write` call that adds substantial "
++                "new content or structure to the current generated file."
++            )
++            return
++
          verification_commands = dod.verification_commands or derive_verification_commands(
              dod,
              project_root=self.context.project_root,
          *,
          dod: DefinitionOfDone,
      ) -> None:
++        repair = extract_active_repair_context(self.context.session.messages)
++        if repair is not None and _repair_context_is_html_quality(repair):
++            target = repair.artifact_path or (
++                repair.allowed_paths[0] if repair.allowed_paths else ""
++            )
++            if not target:
++                return
++            repair_issue = _quality_repair_issue_for_target(repair, target)
++            issue_line = (
++                f"- {repair_issue}\n"
++                if repair_issue
++                else f"- Improve `{target}` until it satisfies the active content-quality verifier.\n"
++            )
++            self.context.set_workflow_mode("execute")
++            self.context.queue_steering_message(
++                "Todo tracking is updated, but verification still has an active "
++                "HTML content-quality repair. TodoWrite cannot satisfy that verifier "
++                "or close the repair by itself. Do not mark the task complete and do "
++                "not finish yet.\n\n"
++                "Repair focus:\n"
++                f"{issue_line}"
++                f"- Immediate next step: edit `{target}`.\n"
++                "- Continue with one concrete `edit`, `patch`, or `write` call that "
++                "actually changes the current generated file."
++            )
++            return
++
          session_messages = list(getattr(self.context.session, "messages", []) or [])
          missing_artifact = _next_missing_planned_artifact(
              dod,
      return "; ".join(preview)
++def _quality_repair_issue_for_target(repair: Any, target: str) -> str:
++    """Return the most relevant content-quality repair line for a target path."""
++
++    target_text = str(target or "").strip()
++    try:
++        normalized_target = str(Path(target_text).expanduser().resolve(strict=False))
++    except (OSError, RuntimeError, ValueError):
++        normalized_target = str(Path(target_text).expanduser()) if target_text else ""
++
++    first_quality_line = ""
++    for raw_line in getattr(repair, "repair_lines", ()) or ():
++        line = str(raw_line or "").strip()
++        if not line:
++            continue
++        clean_line = line[2:].strip() if line.startswith("- ") else line
++        if not _repair_line_is_html_quality(clean_line):
++            continue
++        if not first_quality_line:
++            first_quality_line = clean_line
++        if target_text and target_text in clean_line:
++            return clean_line
++        if normalized_target and normalized_target in clean_line:
++            return clean_line
++
++    return first_quality_line
++
++
  def _tool_call_label(tool_call: ToolCall) -> str:
      """Human-readable label for one tool call."""
      name = tool_call.name

tests/test_tool_batches.pymodified

      assert context.workflow_mode == "verify"
++@pytest.mark.asyncio
++async def test_tool_batch_runner_todowrite_during_quality_repair_requires_mutation(
++    temp_dir: Path,
++) -> None:
++    async def assess_confidence(
++        tool_name: str,
++        tool_args: dict,
++        context: str,
++    ) -> ConfidenceAssessment:
++        raise AssertionError("Confidence scoring should not run for this scenario")
++
++    async def verify_action(
++        tool_name: str,
++        tool_args: dict,
++        result: str,
++        expected: str = "",
++    ) -> ActionVerification:
++        raise AssertionError("Verification should not run for this scenario")
++
++    guide_root = temp_dir / "guides" / "nginx"
++    chapters = guide_root / "chapters"
++    chapters.mkdir(parents=True)
++    index_path = guide_root / "index.html"
++    chapter_one = chapters / "01-introduction.html"
++    index_path.write_text("<html></html>\n")
++    chapter_one.write_text("<html></html>\n")
++
++    implementation_plan = temp_dir / "implementation.md"
++    implementation_plan.write_text(
++        "\n".join(
++            [
++                "# Implementation Plan",
++                "",
++                "## File Changes",
++                f"- `{guide_root}/`",
++                f"- `{chapters}/`",
++                f"- `{index_path}`",
++                f"- `{chapter_one}`",
++                "",
++            ]
++        )
++    )
++
++    context = build_context(
++        temp_dir=temp_dir,
++        messages=[
++            Message(
++                role=Role.USER,
++                content=(
++                    "Repair focus:\n"
++                    f"- Improve `{chapter_one}`: thin content (409 text chars, expected at least 1758).\n"
++                    f"- Improve `{chapter_one}`: insufficient structured content (6 blocks, expected at least 18).\n"
++                    f"- Immediate next step: edit `{chapter_one}`.\n"
++                ),
++            )
++        ],
++        safeguards=FakeSafeguards(),
++        assess_confidence=assess_confidence,
++        verify_action=verify_action,
++        auto_recover=False,
++    )
++    context.set_workflow_mode("verify")
++    queued_messages: list[str] = []
++    emitted_responses: list[str] = []
++    context.queue_steering_message_callback = queued_messages.append
++    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
++    dod = create_definition_of_done("Create a multi-file nginx guide.")
++    dod.implementation_plan = str(implementation_plan)
++    dod.verification_commands = [f"ls -la {guide_root}"]
++    sync_todos_to_definition_of_done(
++        dod,
++        [
++            {
++                "content": "Expand generated chapters to satisfy quality verification",
++                "active_form": "Expanding generated chapters",
++                "status": "in_progress",
++            }
++        ],
++        project_root=temp_dir,
++    )
++
++    tool_call = ToolCall(
++        id="todo-quality",
++        name="TodoWrite",
++        arguments={
++            "todos": [
++                {
++                    "content": "Expand generated chapters to satisfy quality verification",
++                    "active_form": "Expanding generated chapters",
++                    "status": "completed",
++                }
++            ]
++        },
++    )
++    executor = FakeExecutor(
++        [
++            tool_outcome(
++                tool_call=tool_call,
++                output="Todos updated",
++                is_error=False,
++                metadata={
++                    "new_todos": [
++                        {
++                            "content": "Expand generated chapters to satisfy quality verification",
++                            "active_form": "Expanding generated chapters",
++                            "status": "completed",
++                        }
++                    ]
++                },
++            )
++        ]
++    )
++
++    async def emit(event: AgentEvent) -> None:
++        if event.type == "response":
++            emitted_responses.append(str(event.content))
++
++    summary = TurnSummary(final_response="")
++    result = await runner.execute_batch(
++        tool_calls=[tool_call],
++        tool_source="assistant",
++        pending_tool_calls_seen=set(),
++        emit=emit,
++        summary=summary,
++        dod=dod,
++        executor=executor,  # type: ignore[arg-type]
++        on_confirmation=None,
++        on_user_question=None,
++        emit_confirmation=None,
++        consecutive_errors=0,
++    )
++
++    assert queued_messages
++    message = queued_messages[-1]
++    assert "verification still has an active HTML content-quality repair" in message
++    assert "TodoWrite cannot satisfy that verifier" in message
++    assert f"Immediate next step: edit `{chapter_one.resolve(strict=False)}`" in message
++    assert "thin content" in message
++    assert "Finish with a final response now" not in message
++    assert context.workflow_mode == "execute"
++    assert result.halted is False
++    assert summary.final_response == ""
++    assert not emitted_responses
++
++
  @pytest.mark.asyncio
  async def test_tool_batch_runner_preempts_post_build_audit_after_todowrite_verify_handoff(
      temp_dir: Path,
      assert "replace the surrounding block" not in queued[0]
++def test_tool_batch_runner_blocked_noop_edit_keeps_quality_repair_active_after_full_build(
++    temp_dir: Path,
++) -> None:
++    async def assess_confidence(
++        tool_name: str,
++        tool_args: dict,
++        context: str,
++    ) -> ConfidenceAssessment:
++        raise AssertionError("Confidence scoring should be disabled in this scenario")
++
++    async def verify_action(
++        tool_name: str,
++        tool_args: dict,
++        result: str,
++        expected: str = "",
++    ) -> ActionVerification:
++        raise AssertionError("Verification should not run in this scenario")
++
++    guide_root = temp_dir / "guide"
++    chapters = guide_root / "chapters"
++    chapters.mkdir(parents=True)
++    index_path = guide_root / "index.html"
++    chapter_one = chapters / "01-introduction.html"
++    chapter_two = chapters / "02-installation.html"
++    index_path.write_text("<html></html>\n")
++    chapter_one.write_text("<html></html>\n")
++    chapter_two.write_text("<html></html>\n")
++
++    implementation_plan = temp_dir / "implementation.md"
++    implementation_plan.write_text(
++        "\n".join(
++            [
++                "# Implementation Plan",
++                "",
++                "## File Changes",
++                f"- `{index_path}`",
++                f"- `{chapter_one}`",
++                f"- `{chapter_two}`",
++                "",
++            ]
++        )
++    )
++
++    context = build_context(
++        temp_dir=temp_dir,
++        messages=[
++            Message(
++                role=Role.USER,
++                content=(
++                    "Repair focus:\n"
++                    f"- Improve `{chapter_two}`: thin content (504 text chars, expected at least 1758).\n"
++                    f"- Improve `{chapter_two}`: insufficient structured content (6 blocks, expected at least 18).\n"
++                    f"- Immediate next step: edit `{chapter_two}`.\n"
++                ),
++            )
++        ],
++        safeguards=FakeSafeguards(),
++        assess_confidence=assess_confidence,
++        verify_action=verify_action,
++    )
++    queued: list[str] = []
++    context.queue_steering_message_callback = queued.append
++    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
++
++    dod = create_definition_of_done("Create a multi-file guide.")
++    dod.implementation_plan = str(implementation_plan)
++    dod.touched_files.extend([str(index_path), str(chapter_one), str(chapter_two)])
++    dod.verification_commands = [f"ls -la {guide_root}"]
++
++    runner._queue_blocked_html_edit_nudge(
++        ToolCall(
++            id="edit-1",
++            name="edit",
++            arguments={
++                "file_path": str(chapter_two),
++                "old_string": "same",
++                "new_string": "same",
++            },
++        ),
++        "[Blocked - old_string and new_string are identical - no change would occur] Suggestion: Provide different old and new strings",
++        dod=dod,
++    )
++
++    assert queued
++    assert "active content-quality repair is not complete" in queued[0]
++    assert "Repair focus:" in queued[0]
++    assert f"Immediate next step: edit `{chapter_two}`" in queued[0]
++    assert "thin content" in queued[0]
++    assert "TodoWrite cannot satisfy" not in queued[0]
++    assert "Finish with a final response now" not in queued[0]
++
++
  async def _noop_emit(event: AgentEvent) -> None:
      return None