`09f576c`

Tighten payload-free mutation recovery

Authored by

espadonne 2 weeks ago

SHA: 09f576cf02b4803d672efa5307ec1838884b3c16
Parents: dcbd31b
Tree: 05b0f18

6 changed files

Status	File	+	-
M	`src/loader/runtime/recovery.py`	117	0
M	`src/loader/runtime/repair.py`	48	0
M	`src/loader/runtime/tool_batch_recovery.py`	63	1
M	`tests/test_recovery.py`	54	0
M	`tests/test_repair.py`	72	0
M	`tests/test_tool_batch_policies.py`	63	0

src/loader/runtime/recovery.pymodified

      ):
          return ErrorCategory.INVALID_ARGUMENTS
 +    if any(
 +        token in error_lower
 +        for token in [
 +            "required positional argument",
 +            "missing 1 required",
 +            "missing required positional",
 +            "empty content",
 +        ]
 +    ):
 +        return ErrorCategory.INVALID_ARGUMENTS
++
      if any(token in error_lower for token in ["network", "unreachable", "dns", "getaddrinfo"]):
          return ErrorCategory.NETWORK_ERROR
      return ErrorCategory.UNKNOWN
 +def detect_missing_mutation_payload(
 +    tool_name: str,
 +    args: dict[str, Any] | None,
 +    error: str,
 +) -> dict[str, Any] | None:
 +    """Detect metadata-only mutation calls missing their real text payload."""
++
 +    arguments = dict(args or {})
 +    error_lower = error.lower()
 +    if error and not any(
 +        token in error_lower
 +        for token in [
 +            "required positional argument",
 +            "missing 1 required",
 +            "missing required",
 +            "empty content",
 +            "validation warning",
 +        ]
 +    ):
 +        return None
++
 +    file_path = str(arguments.get("file_path") or arguments.get("path") or "").strip()
++
 +    if tool_name == "write":
 +        invalid_fields = [
 +            field for field in ("content_chars", "content_lines") if field in arguments
 +        ]
 +        if "content" not in arguments and invalid_fields:
 +            return {
 +                "required_fields": ["content"],
 +                "invalid_fields": invalid_fields,
 +                "file_path": file_path,
 +            }
++
 +    if tool_name == "edit":
 +        missing_fields = [
 +            field for field in ("old_string", "new_string") if field not in arguments
 +        ]
 +        invalid_fields = [
 +            field
 +            for field in (
 +                "old_string_chars",
 +                "old_string_lines",
 +                "new_string_chars",
 +                "new_string_lines",
 +            )
 +            if field in arguments
 +        ]
 +        if missing_fields and invalid_fields:
 +            return {
 +                "required_fields": missing_fields,
 +                "invalid_fields": invalid_fields,
 +                "file_path": file_path,
 +            }
++
 +    if tool_name == "patch":
 +        invalid_fields = [field for field in ("hunk_count",) if field in arguments]
 +        if "patch" not in arguments and "hunks" not in arguments and invalid_fields:
 +            return {
 +                "required_fields": ["patch or hunks"],
 +                "invalid_fields": invalid_fields,
 +                "file_path": file_path,
 +            }
++
 +    return None
++
++
  def get_recovery_hints(
      category: ErrorCategory,
      tool_name: str,
              "If the exact replacement span is unclear, read just the target file and then edit it",
          ] + category_hints
 +    payload_fix = detect_missing_mutation_payload(tool_name, args, "")
 +    if payload_fix is not None:
 +        required = ", ".join(payload_fix["required_fields"])
 +        invalid = ", ".join(payload_fix["invalid_fields"])
 +        target = payload_fix["file_path"]
 +        if tool_name == "write":
 +            category_hints = [
 +                (
 +                    f"Resend the mutation as `write(file_path=..., content='...')` "
 +                    f"for `{target}` with the real file body"
 +                    if target
 +                    else "Resend the mutation as `write(file_path=..., content='...')` with the real file body"
 +                ),
 +                (
 +                    f"`{invalid}` are summary fields, not valid write inputs; provide `{required}` instead"
 +                ),
 +                "Do not reread reference files first unless one specific fact still blocks the write",
 +            ]
 +        elif tool_name == "edit":
 +            category_hints = [
 +                (
 +                    f"Resend the mutation for `{target}` with the real `{required}` text payload"
 +                    if target
 +                    else f"Resend the mutation with the real `{required}` text payload"
 +                ),
 +                f"`{invalid}` are summary fields, not valid edit inputs; provide `{required}` instead",
 +                "Do not reread reference files first unless one specific exact replacement span is still unknown",
 +            ]
 +        elif tool_name == "patch":
 +            category_hints = [
 +                (
 +                    f"Resend the mutation for `{target}` with real `patch` text or structured `hunks`"
 +                    if target
 +                    else "Resend the mutation with real `patch` text or structured `hunks`"
 +                ),
 +                f"`{invalid}` are summary fields, not valid patch inputs; provide `{required}` instead",
 +                "Do not reread reference files first unless one specific edit span is still unknown",
 +            ]
++
      return "\n".join(f"- {hint}" for hint in category_hints)

src/loader/runtime/repair.pymodified

      planned_artifact_target_satisfied,
+ )
  from .parsing import parse_tool_calls
 +from .recovery import detect_missing_mutation_payload
  from .workflow import (
      infer_pending_todo_output_target,
      preferred_pending_todo_item,
          if dod is not None and self._should_compact_empty_retry_message(dod):
              compact_lines: list[str] = []
              compact_lines.extend(self._planned_artifact_progress_lines(dod)[:2])
 +            compact_lines.extend(self._payload_retry_lines())
              compact_lines.extend(
                  self._next_step_resume_lines(
                      dod,
              planned_lines = self._planned_artifact_progress_lines(dod)
              progress_lines.extend(planned_lines)
 +            progress_lines.extend(self._payload_retry_lines())
              progress_lines.extend(
                  self._next_step_resume_lines(
                      dod,
+             ]
+         )
 +    def _payload_retry_lines(self) -> list[str]:
 +        recovery_context = self.context.recovery_context
 +        if recovery_context is None or not recovery_context.attempts:
 +            return []
 +        attempt = recovery_context.attempts[-1]
 +        fix = detect_missing_mutation_payload(
 +            attempt.tool_name,
 +            attempt.arguments,
 +            attempt.error,
 +        )
 +        if fix is None:
 +            return []
++
 +        target = fix["file_path"]
 +        invalid = ", ".join(f"`{field}`" for field in fix["invalid_fields"])
 +        if attempt.tool_name == "write":
 +            target_line = (
 +                f"Last tool failure: resend `write` for `{target}` with real `content`, not just summary fields."
 +                if target
 +                else "Last tool failure: resend `write` with real `content`, not just summary fields."
 +            )
 +            return [
 +                target_line,
 +                f"Do not use {invalid} in place of the actual file body.",
 +            ]
 +        if attempt.tool_name == "edit":
 +            return [
 +                (
 +                    f"Last tool failure: resend `edit` for `{target}` with the real text payload."
 +                    if target
 +                    else "Last tool failure: resend `edit` with the real text payload."
 +                ),
 +                f"Do not use {invalid} in place of `old_string`/`new_string`.",
 +            ]
 +        if attempt.tool_name == "patch":
 +            return [
 +                (
 +                    f"Last tool failure: resend `patch` for `{target}` with real patch text or structured hunks."
 +                    if target
 +                    else "Last tool failure: resend `patch` with real patch text or structured hunks."
 +                ),
 +                f"Do not use {invalid} in place of the real patch payload.",
 +            ]
 +        return []
++
      def _todo_refresh_retry_line(self, dod: DefinitionOfDone) -> str | None:
          non_special_pending = [
              item for item in dod.pending_items if item not in _SPECIAL_DOD_ITEMS

src/loader/runtime/tool_batch_recovery.pymodified

  from .context import RuntimeContext
  from .events import AgentEvent
  from .executor import ToolExecutionOutcome
 -from .recovery import RecoveryContext, format_failure_message, format_recovery_prompt
 +from .recovery import (
 +    RecoveryContext,
 +    detect_missing_mutation_payload,
 +    format_failure_message,
 +    format_recovery_prompt,
 +)
  from .repair_focus import ActiveRepairContext, extract_active_repair_context
  EventSink = Callable[[AgentEvent], Awaitable[None]]
          target_excerpt_lines = self._target_excerpt_lines(tool_call)
          if target_excerpt_lines:
              lines.extend(["", "## CURRENT TARGET EXCERPT", *target_excerpt_lines])
 +        payload_fix_lines = self._missing_payload_fix_lines(tool_call, outcome)
 +        if payload_fix_lines:
 +            lines.extend(["", "## PAYLOAD FORMAT FIX", *payload_fix_lines])
          return "\n".join(lines)
 +    def _missing_payload_fix_lines(
 +        self,
 +        tool_call: ToolCall,
 +        outcome: ToolExecutionOutcome,
 +    ) -> list[str]:
 +        fix = detect_missing_mutation_payload(
 +            tool_call.name,
 +            tool_call.arguments,
 +            outcome.result_output,
 +        )
 +        if fix is None:
 +            return []
++
 +        target = fix["file_path"]
 +        invalid_fields = ", ".join(f"`{field}`" for field in fix["invalid_fields"])
 +        required_fields = "`, `".join(fix["required_fields"])
 +        if tool_call.name == "write":
 +            target_line = (
 +                f"- The failed call for `{target}` omitted the required `content` payload."
 +                if target
 +                else "- The failed call omitted the required `content` payload."
 +            )
 +            return [
 +                target_line,
 +                f"- {invalid_fields} are summary counters, not valid write inputs.",
 +                "- Resend one concrete `write(file_path=..., content='...')` call now instead of rereading more files.",
 +            ]
++
 +        if tool_call.name == "edit":
 +            target_line = (
 +                f"- The failed call for `{target}` omitted the required `{required_fields}` payload."
 +                if target
 +                else f"- The failed call omitted the required `{required_fields}` payload."
 +            )
 +            return [
 +                target_line,
 +                f"- {invalid_fields} are summary counters, not valid edit inputs.",
 +                "- Resend one concrete `edit(file_path=..., old_string='...', new_string='...')` call now instead of rereading more files.",
 +            ]
++
 +        if tool_call.name == "patch":
 +            target_line = (
 +                f"- The failed call for `{target}` omitted the required patch body."
 +                if target
 +                else "- The failed call omitted the required patch body."
 +            )
 +            return [
 +                target_line,
 +                f"- {invalid_fields} are summary counters, not valid patch inputs.",
 +                "- Resend one concrete `patch(file_path=..., patch='...')` or `patch(..., hunks=[...])` call now instead of rereading more files.",
 +            ]
++
 +        return []
++
      def _preferred_focus_path(
          self,
          *,

tests/test_recovery.pymodified

      def test_invalid_arguments(self):
          assert categorize_error("Invalid argument: path") == ErrorCategory.INVALID_ARGUMENTS
          assert categorize_error("Missing required parameter") == ErrorCategory.INVALID_ARGUMENTS
 +        assert (
 +            categorize_error(
 +                "WriteTool.execute() missing 1 required positional argument: 'content'"
 +            )
 +            == ErrorCategory.INVALID_ARGUMENTS
 +        )
      def test_network_error(self):
          assert categorize_error("Network unreachable") == ErrorCategory.NETWORK_ERROR
          assert "edit/patch/write" in hints.lower()
          assert "index.html" in hints
 +    def test_write_metadata_only_hint_requests_real_content_payload(self):
 +        hints = get_recovery_hints(
 +            ErrorCategory.INVALID_ARGUMENTS,
 +            "write",
 +            {
 +                "file_path": "~/Loader/guides/nginx/index.html",
 +                "content_chars": 1354,
 +                "content_lines": 30,
 +            },
 +        )
 +        assert "content='...'" in hints
 +        assert "content_chars" in hints
 +        assert "index.html" in hints
++
  class TestFormatRecoveryPrompt:
      """Tests for recovery prompt formatting."""
          assert "edit/patch/write" in prompt.lower()
          assert "index.html" in prompt
 +    def test_format_recovery_prompt_for_metadata_only_write_requests_real_payload(self):
 +        ctx = RecoveryContext(
 +            original_tool="write",
 +            original_args={
 +                "file_path": "~/Loader/guides/nginx/index.html",
 +                "content_chars": 1354,
 +                "content_lines": 30,
 +            },
 +        )
 +        ctx.add_attempt(
 +            "write",
 +            {
 +                "file_path": "~/Loader/guides/nginx/index.html",
 +                "content_chars": 1354,
 +                "content_lines": 30,
 +            },
 +            "WriteTool.execute() missing 1 required positional argument: 'content'",
 +        )
++
 +        prompt = format_recovery_prompt(
 +            ctx,
 +            "write",
 +            {
 +                "file_path": "~/Loader/guides/nginx/index.html",
 +                "content_chars": 1354,
 +                "content_lines": 30,
 +            },
 +            "WriteTool.execute() missing 1 required positional argument: 'content'",
 +        )
++
 +        assert "content='...'" in prompt
 +        assert "content_chars" in prompt
 +        assert "index.html" in prompt
++
  class TestFormatFailureMessage:
      """Tests for failure message formatting."""

tests/test_repair.pymodified

      build_permission_policy,
      load_permission_rules,
+ )
 +from loader.runtime.recovery import RecoveryContext
  from loader.runtime.repair import ResponseRepairer
  from loader.tools.base import create_default_registry
  from tests.helpers.runtime_harness import ScriptedBackend
+     )
 +def test_empty_response_retry_reminds_model_to_resend_real_write_payload(
 +    temp_dir: Path,
 +) -> None:
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        use_react=False,
 +    )
 +    repairer = ResponseRepairer(context)
++
 +    guide_root = temp_dir / "guides" / "nginx"
 +    chapters = guide_root / "chapters"
 +    chapters.mkdir(parents=True)
 +    chapter_one = chapters / "01-introduction.html"
 +    chapter_one.write_text("<html></html>\n")
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{guide_root}/`",
 +                f"- `{chapters}/`",
 +                f"- `{guide_root / 'index.html'}`",
 +                f"- `{chapters / '01-introduction.html'}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.touched_files.append(str(chapter_one))
 +    dod.completed_items.append("Create first chapter file (01-introduction.html)")
 +    dod.pending_items.append("Develop the main index.html file for the nginx guide")
++
 +    recovery_context = RecoveryContext(
 +        original_tool="write",
 +        original_args={
 +            "file_path": "~/Loader/guides/nginx/index.html",
 +            "content_chars": 1354,
 +            "content_lines": 30,
 +        },
 +    )
 +    recovery_context.add_attempt(
 +        "write",
 +        {
 +            "file_path": "~/Loader/guides/nginx/index.html",
 +            "content_chars": 1354,
 +            "content_lines": 30,
 +        },
 +        "WriteTool.execute() missing 1 required positional argument: 'content'",
 +    )
 +    context.recovery_context = recovery_context
++
 +    decision = repairer.handle_empty_response(
 +        task="Create a multi-file nginx guide.",
 +        original_task=None,
 +        empty_retry_count=2,
 +        max_empty_retries=2,
 +        dod=dod,
 +    )
++
 +    assert decision.should_continue is True
 +    assert decision.retry_message is not None
 +    assert "resend `write`" in decision.retry_message
 +    assert "content_chars" in decision.retry_message
 +    assert "index.html" in decision.retry_message
++
++
  def test_empty_response_retry_uses_compact_prompt_after_early_progress_with_concrete_next_file(
      temp_dir: Path,
  ) -> None:

tests/test_tool_batch_policies.pymodified

      assert "verify the current result" in error_event.content
 +@pytest.mark.asyncio
 +async def test_tool_batch_recovery_controller_surfaces_missing_write_payload_fix(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(
 +        tool_name: str,
 +        tool_args: dict,
 +        context: str,
 +    ) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence should not run here")
++
 +    async def verify_action(
 +        tool_name: str,
 +        tool_args: dict,
 +        result: str,
 +        expected: str = "",
 +    ) -> ActionVerification:
 +        raise AssertionError("Verification should not run here")
++
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[
 +            Message(
 +                role=Role.USER,
 +                content="Create ~/Loader/guides/nginx/index.html",
 +            )
 +        ],
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +    )
 +    controller = ToolBatchRecoveryController(context)
 +    tool_call = ToolCall(
 +        id="write-metadata-only",
 +        name="write",
 +        arguments={
 +            "file_path": "~/Loader/guides/nginx/index.html",
 +            "content_chars": 1354,
 +            "content_lines": 30,
 +        },
 +    )
 +    outcome = tool_outcome(
 +        tool_call=tool_call,
 +        output=(
 +            "[Validation warning] Writing empty content to file\n"
 +            "Tool execution error: WriteTool.execute() missing 1 required "
 +            "positional argument: 'content'"
 +        ),
 +        is_error=True,
 +    )
++
 +    follow_up = await controller.build_follow_up(
 +        tool_call=tool_call,
 +        outcome=outcome,
 +        emit=lambda event: _noop_emit(event),
 +    )
++
 +    assert follow_up is not None
 +    assert "## PAYLOAD FORMAT FIX" in follow_up.content
 +    assert "content_chars" in follow_up.content
 +    assert "write(file_path=..., content='...')" in follow_up.content
 +    assert "index.html" in follow_up.content
++
++
  @pytest.mark.asyncio
  async def test_tool_batch_recovery_controller_resets_context_for_unrelated_failures(
      temp_dir: Path,