`7944195`

Keep quality repairs active

Authored by mfwolffe <wolffemf@dukes.jmu.edu> 1 week ago

SHA: 7944195c93f4dab6bbe17927eb588042628c9438
Parents: 9097b6f
Tree: eac3f59

2 changed files

Status	File	+	-
M	`src/loader/runtime/tool_batches.py`	78	0
M	`tests/test_tool_batches.py`	237	0

src/loader/runtime/tool_batches.pymodified

      ) -> str | None:
          if tool_call.name != "TodoWrite" or not single_tool_batch:
              return None
 +        repair = extract_active_repair_context(self.context.session.messages)
 +        if repair is not None and _repair_context_is_html_quality(repair):
 +            return None
          if not all_planned_artifact_outputs_exist(
              dod,
              project_root=self.context.project_root,
          if not target:
              return
 +        if _repair_context_is_html_quality(repair):
 +            repair_issue = _quality_repair_issue_for_target(repair, target)
 +            issue_line = (
 +                f"- {repair_issue}\n"
 +                if repair_issue
 +                else f"- Improve `{target}` until it satisfies the active content-quality verifier.\n"
 +            )
 +            self.context.queue_steering_message(
 +                "That edit would make no on-disk change. "
 +                f"`{target}` already matches the change you attempted, but the active "
 +                "content-quality repair is not complete until a verifier-targeted "
 +                "mutation actually changes the file. Do not mark the task complete, "
 +                "do not use TodoWrite as a substitute for repair, and do not finish yet.\n\n"
 +                "Repair focus:\n"
 +                f"{issue_line}"
 +                f"- Immediate next step: edit `{target}`.\n"
 +                "- Submit one `edit`, `patch`, or `write` call that adds substantial "
 +                "new content or structure to the current generated file."
 +            )
 +            return
++
          verification_commands = dod.verification_commands or derive_verification_commands(
              dod,
              project_root=self.context.project_root,
          *,
          dod: DefinitionOfDone,
      ) -> None:
 +        repair = extract_active_repair_context(self.context.session.messages)
 +        if repair is not None and _repair_context_is_html_quality(repair):
 +            target = repair.artifact_path or (
 +                repair.allowed_paths[0] if repair.allowed_paths else ""
 +            )
 +            if not target:
 +                return
 +            repair_issue = _quality_repair_issue_for_target(repair, target)
 +            issue_line = (
 +                f"- {repair_issue}\n"
 +                if repair_issue
 +                else f"- Improve `{target}` until it satisfies the active content-quality verifier.\n"
 +            )
 +            self.context.set_workflow_mode("execute")
 +            self.context.queue_steering_message(
 +                "Todo tracking is updated, but verification still has an active "
 +                "HTML content-quality repair. TodoWrite cannot satisfy that verifier "
 +                "or close the repair by itself. Do not mark the task complete and do "
 +                "not finish yet.\n\n"
 +                "Repair focus:\n"
 +                f"{issue_line}"
 +                f"- Immediate next step: edit `{target}`.\n"
 +                "- Continue with one concrete `edit`, `patch`, or `write` call that "
 +                "actually changes the current generated file."
 +            )
 +            return
++
          session_messages = list(getattr(self.context.session, "messages", []) or [])
          missing_artifact = _next_missing_planned_artifact(
              dod,
      return "; ".join(preview)
 +def _quality_repair_issue_for_target(repair: Any, target: str) -> str:
 +    """Return the most relevant content-quality repair line for a target path."""
++
 +    target_text = str(target or "").strip()
 +    try:
 +        normalized_target = str(Path(target_text).expanduser().resolve(strict=False))
 +    except (OSError, RuntimeError, ValueError):
 +        normalized_target = str(Path(target_text).expanduser()) if target_text else ""
++
 +    first_quality_line = ""
 +    for raw_line in getattr(repair, "repair_lines", ()) or ():
 +        line = str(raw_line or "").strip()
 +        if not line:
 +            continue
 +        clean_line = line[2:].strip() if line.startswith("- ") else line
 +        if not _repair_line_is_html_quality(clean_line):
 +            continue
 +        if not first_quality_line:
 +            first_quality_line = clean_line
 +        if target_text and target_text in clean_line:
 +            return clean_line
 +        if normalized_target and normalized_target in clean_line:
 +            return clean_line
++
 +    return first_quality_line
++
++
  def _tool_call_label(tool_call: ToolCall) -> str:
      """Human-readable label for one tool call."""
      name = tool_call.name

tests/test_tool_batches.pymodified

      assert context.workflow_mode == "verify"
 +@pytest.mark.asyncio
 +async def test_tool_batch_runner_todowrite_during_quality_repair_requires_mutation(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(
 +        tool_name: str,
 +        tool_args: dict,
 +        context: str,
 +    ) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence scoring should not run for this scenario")
++
 +    async def verify_action(
 +        tool_name: str,
 +        tool_args: dict,
 +        result: str,
 +        expected: str = "",
 +    ) -> ActionVerification:
 +        raise AssertionError("Verification should not run for this scenario")
++
 +    guide_root = temp_dir / "guides" / "nginx"
 +    chapters = guide_root / "chapters"
 +    chapters.mkdir(parents=True)
 +    index_path = guide_root / "index.html"
 +    chapter_one = chapters / "01-introduction.html"
 +    index_path.write_text("<html></html>\n")
 +    chapter_one.write_text("<html></html>\n")
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{guide_root}/`",
 +                f"- `{chapters}/`",
 +                f"- `{index_path}`",
 +                f"- `{chapter_one}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[
 +            Message(
 +                role=Role.USER,
 +                content=(
 +                    "Repair focus:\n"
 +                    f"- Improve `{chapter_one}`: thin content (409 text chars, expected at least 1758).\n"
 +                    f"- Improve `{chapter_one}`: insufficient structured content (6 blocks, expected at least 18).\n"
 +                    f"- Immediate next step: edit `{chapter_one}`.\n"
 +                ),
 +            )
 +        ],
 +        safeguards=FakeSafeguards(),
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +        auto_recover=False,
 +    )
 +    context.set_workflow_mode("verify")
 +    queued_messages: list[str] = []
 +    emitted_responses: list[str] = []
 +    context.queue_steering_message_callback = queued_messages.append
 +    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.verification_commands = [f"ls -la {guide_root}"]
 +    sync_todos_to_definition_of_done(
 +        dod,
 +        [
 +            {
 +                "content": "Expand generated chapters to satisfy quality verification",
 +                "active_form": "Expanding generated chapters",
 +                "status": "in_progress",
 +            }
 +        ],
 +        project_root=temp_dir,
 +    )
++
 +    tool_call = ToolCall(
 +        id="todo-quality",
 +        name="TodoWrite",
 +        arguments={
 +            "todos": [
 +                {
 +                    "content": "Expand generated chapters to satisfy quality verification",
 +                    "active_form": "Expanding generated chapters",
 +                    "status": "completed",
 +                }
 +            ]
 +        },
 +    )
 +    executor = FakeExecutor(
 +        [
 +            tool_outcome(
 +                tool_call=tool_call,
 +                output="Todos updated",
 +                is_error=False,
 +                metadata={
 +                    "new_todos": [
 +                        {
 +                            "content": "Expand generated chapters to satisfy quality verification",
 +                            "active_form": "Expanding generated chapters",
 +                            "status": "completed",
 +                        }
 +                    ]
 +                },
 +            )
 +        ]
 +    )
++
 +    async def emit(event: AgentEvent) -> None:
 +        if event.type == "response":
 +            emitted_responses.append(str(event.content))
++
 +    summary = TurnSummary(final_response="")
 +    result = await runner.execute_batch(
 +        tool_calls=[tool_call],
 +        tool_source="assistant",
 +        pending_tool_calls_seen=set(),
 +        emit=emit,
 +        summary=summary,
 +        dod=dod,
 +        executor=executor,  # type: ignore[arg-type]
 +        on_confirmation=None,
 +        on_user_question=None,
 +        emit_confirmation=None,
 +        consecutive_errors=0,
 +    )
++
 +    assert queued_messages
 +    message = queued_messages[-1]
 +    assert "verification still has an active HTML content-quality repair" in message
 +    assert "TodoWrite cannot satisfy that verifier" in message
 +    assert f"Immediate next step: edit `{chapter_one.resolve(strict=False)}`" in message
 +    assert "thin content" in message
 +    assert "Finish with a final response now" not in message
 +    assert context.workflow_mode == "execute"
 +    assert result.halted is False
 +    assert summary.final_response == ""
 +    assert not emitted_responses
++
++
  @pytest.mark.asyncio
  async def test_tool_batch_runner_preempts_post_build_audit_after_todowrite_verify_handoff(
      temp_dir: Path,
      assert "replace the surrounding block" not in queued[0]
 +def test_tool_batch_runner_blocked_noop_edit_keeps_quality_repair_active_after_full_build(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(
 +        tool_name: str,
 +        tool_args: dict,
 +        context: str,
 +    ) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence scoring should be disabled in this scenario")
++
 +    async def verify_action(
 +        tool_name: str,
 +        tool_args: dict,
 +        result: str,
 +        expected: str = "",
 +    ) -> ActionVerification:
 +        raise AssertionError("Verification should not run in this scenario")
++
 +    guide_root = temp_dir / "guide"
 +    chapters = guide_root / "chapters"
 +    chapters.mkdir(parents=True)
 +    index_path = guide_root / "index.html"
 +    chapter_one = chapters / "01-introduction.html"
 +    chapter_two = chapters / "02-installation.html"
 +    index_path.write_text("<html></html>\n")
 +    chapter_one.write_text("<html></html>\n")
 +    chapter_two.write_text("<html></html>\n")
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{index_path}`",
 +                f"- `{chapter_one}`",
 +                f"- `{chapter_two}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[
 +            Message(
 +                role=Role.USER,
 +                content=(
 +                    "Repair focus:\n"
 +                    f"- Improve `{chapter_two}`: thin content (504 text chars, expected at least 1758).\n"
 +                    f"- Improve `{chapter_two}`: insufficient structured content (6 blocks, expected at least 18).\n"
 +                    f"- Immediate next step: edit `{chapter_two}`.\n"
 +                ),
 +            )
 +        ],
 +        safeguards=FakeSafeguards(),
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +    )
 +    queued: list[str] = []
 +    context.queue_steering_message_callback = queued.append
 +    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
++
 +    dod = create_definition_of_done("Create a multi-file guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.touched_files.extend([str(index_path), str(chapter_one), str(chapter_two)])
 +    dod.verification_commands = [f"ls -la {guide_root}"]
++
 +    runner._queue_blocked_html_edit_nudge(
 +        ToolCall(
 +            id="edit-1",
 +            name="edit",
 +            arguments={
 +                "file_path": str(chapter_two),
 +                "old_string": "same",
 +                "new_string": "same",
 +            },
 +        ),
 +        "[Blocked - old_string and new_string are identical - no change would occur] Suggestion: Provide different old and new strings",
 +        dod=dod,
 +    )
++
 +    assert queued
 +    assert "active content-quality repair is not complete" in queued[0]
 +    assert "Repair focus:" in queued[0]
 +    assert f"Immediate next step: edit `{chapter_two}`" in queued[0]
 +    assert "thin content" in queued[0]
 +    assert "TodoWrite cannot satisfy" not in queued[0]
 +    assert "Finish with a final response now" not in queued[0]
++
++
  async def _noop_emit(event: AgentEvent) -> None:
      return None