Recover blocked mutation paths
Authored by
mfwolffe <wolffemf@dukes.jmu.edu>
- SHA
6eb402b466d0b9fb0f17abe309280aa051d1b933- Parents
-
1fafa49 - Tree
e675a78
6eb402b
6eb402b466d0b9fb0f17abe309280aa051d1b9331fafa49
e675a78| Status | File | + | - |
|---|---|---|---|
| M |
src/loader/runtime/recovery.py
|
66 | 2 |
| M |
src/loader/runtime/repair.py
|
65 | 4 |
| M |
src/loader/runtime/tool_batch_recovery.py
|
34 | 0 |
| M |
src/loader/runtime/tool_batches.py
|
160 | 0 |
| M |
tests/test_repair.py
|
65 | 0 |
| M |
tests/test_tool_batches.py
|
107 | 0 |
src/loader/runtime/recovery.pymodified@@ -545,7 +545,7 @@ def detect_missing_mutation_payload( | ||
| 545 | 545 | args: dict[str, Any] | None, |
| 546 | 546 | error: str, |
| 547 | 547 | ) -> dict[str, Any] | None: |
| 548 | - """Detect metadata-only mutation calls missing their real text payload.""" | |
| 548 | + """Detect invalid mutation calls missing their real payload or target path.""" | |
| 549 | 549 | |
| 550 | 550 | arguments = dict(args or {}) |
| 551 | 551 | error_lower = error.lower() |
@@ -557,11 +557,38 @@ def detect_missing_mutation_payload( | ||
| 557 | 557 | "missing required", |
| 558 | 558 | "empty content", |
| 559 | 559 | "validation warning", |
| 560 | + "empty file path", | |
| 561 | + "valid file path", | |
| 562 | + "missing file path", | |
| 560 | 563 | ] |
| 561 | 564 | ): |
| 562 | 565 | return None |
| 563 | 566 | |
| 564 | 567 | file_path = str(arguments.get("file_path") or arguments.get("path") or "").strip() |
| 568 | + missing_target = tool_name in {"write", "edit", "patch"} and ( | |
| 569 | + "empty file path" in error_lower | |
| 570 | + or "valid file path" in error_lower | |
| 571 | + or "missing file path" in error_lower | |
| 572 | + or ( | |
| 573 | + any( | |
| 574 | + token in error_lower | |
| 575 | + for token in [ | |
| 576 | + "required positional argument", | |
| 577 | + "missing 1 required", | |
| 578 | + "missing required", | |
| 579 | + ] | |
| 580 | + ) | |
| 581 | + and "file_path" in error_lower | |
| 582 | + ) | |
| 583 | + ) | |
| 584 | + | |
| 585 | + if missing_target: | |
| 586 | + return { | |
| 587 | + "kind": "missing_target", | |
| 588 | + "required_fields": ["file_path"], | |
| 589 | + "invalid_fields": [], | |
| 590 | + "file_path": file_path, | |
| 591 | + } | |
| 565 | 592 | |
| 566 | 593 | if tool_name == "write": |
| 567 | 594 | invalid_fields = [ |
@@ -569,6 +596,7 @@ def detect_missing_mutation_payload( | ||
| 569 | 596 | ] |
| 570 | 597 | if "content" not in arguments and invalid_fields: |
| 571 | 598 | return { |
| 599 | + "kind": "missing_payload", | |
| 572 | 600 | "required_fields": ["content"], |
| 573 | 601 | "invalid_fields": invalid_fields, |
| 574 | 602 | "file_path": file_path, |
@@ -590,6 +618,7 @@ def detect_missing_mutation_payload( | ||
| 590 | 618 | ] |
| 591 | 619 | if missing_fields and invalid_fields: |
| 592 | 620 | return { |
| 621 | + "kind": "missing_payload", | |
| 593 | 622 | "required_fields": missing_fields, |
| 594 | 623 | "invalid_fields": invalid_fields, |
| 595 | 624 | "file_path": file_path, |
@@ -599,6 +628,7 @@ def detect_missing_mutation_payload( | ||
| 599 | 628 | invalid_fields = [field for field in ("hunk_count",) if field in arguments] |
| 600 | 629 | if "patch" not in arguments and "hunks" not in arguments and invalid_fields: |
| 601 | 630 | return { |
| 631 | + "kind": "missing_payload", | |
| 602 | 632 | "required_fields": ["patch or hunks"], |
| 603 | 633 | "invalid_fields": invalid_fields, |
| 604 | 634 | "file_path": file_path, |
@@ -771,7 +801,41 @@ def get_recovery_hints( | ||
| 771 | 801 | required = ", ".join(payload_fix["required_fields"]) |
| 772 | 802 | invalid = ", ".join(payload_fix["invalid_fields"]) |
| 773 | 803 | target = payload_fix["file_path"] |
| 774 | - if tool_name == "write": | |
| 804 | + if payload_fix.get("kind") == "missing_target": | |
| 805 | + if tool_name == "write": | |
| 806 | + category_hints = [ | |
| 807 | + ( | |
| 808 | + f"Resend the mutation as `write(file_path=..., content='...')` " | |
| 809 | + f"for `{target}` with a real file path" | |
| 810 | + if target | |
| 811 | + else "Resend the mutation as `write(file_path=..., content='...')` with a real file path" | |
| 812 | + ), | |
| 813 | + "Do not leave `file_path` empty or pointed at an unknown target", | |
| 814 | + "Do not reread reference files first unless one specific fact still blocks the write target", | |
| 815 | + ] | |
| 816 | + elif tool_name == "edit": | |
| 817 | + category_hints = [ | |
| 818 | + ( | |
| 819 | + f"Resend the mutation as `edit(file_path=..., old_string='...', new_string='...')` " | |
| 820 | + f"for `{target}` with a real file path" | |
| 821 | + if target | |
| 822 | + else "Resend the mutation as `edit(file_path=..., old_string='...', new_string='...')` with a real file path" | |
| 823 | + ), | |
| 824 | + "Do not leave `file_path` empty or pointed at an unknown target", | |
| 825 | + "Do not reread reference files first unless one specific exact replacement span is still unknown", | |
| 826 | + ] | |
| 827 | + elif tool_name == "patch": | |
| 828 | + category_hints = [ | |
| 829 | + ( | |
| 830 | + f"Resend the mutation as `patch(file_path=..., patch='...')` " | |
| 831 | + f"for `{target}` with a real file path" | |
| 832 | + if target | |
| 833 | + else "Resend the mutation as `patch(file_path=..., patch='...')` with a real file path" | |
| 834 | + ), | |
| 835 | + "Do not leave `file_path` empty or pointed at an unknown target", | |
| 836 | + "Do not reread reference files first unless one specific edit span is still unknown", | |
| 837 | + ] | |
| 838 | + elif tool_name == "write": | |
| 775 | 839 | category_hints = [ |
| 776 | 840 | ( |
| 777 | 841 | f"Resend the mutation as `write(file_path=..., content='...')` " |
src/loader/runtime/repair.pymodified@@ -254,7 +254,7 @@ class ResponseRepairer: | ||
| 254 | 254 | if dod is not None and self._should_compact_empty_retry_message(dod): |
| 255 | 255 | compact_lines: list[str] = [] |
| 256 | 256 | compact_lines.extend(self._planned_artifact_progress_lines(dod)[:2]) |
| 257 | - compact_lines.extend(self._payload_retry_lines()) | |
| 257 | + compact_lines.extend(self._payload_retry_lines(dod)) | |
| 258 | 258 | compact_lines.extend( |
| 259 | 259 | self._next_step_resume_lines( |
| 260 | 260 | dod, |
@@ -289,7 +289,7 @@ class ResponseRepairer: | ||
| 289 | 289 | |
| 290 | 290 | planned_lines = self._planned_artifact_progress_lines(dod) |
| 291 | 291 | progress_lines.extend(planned_lines) |
| 292 | - progress_lines.extend(self._payload_retry_lines()) | |
| 292 | + progress_lines.extend(self._payload_retry_lines(dod)) | |
| 293 | 293 | progress_lines.extend( |
| 294 | 294 | self._next_step_resume_lines( |
| 295 | 295 | dod, |
@@ -350,7 +350,7 @@ class ResponseRepairer: | ||
| 350 | 350 | ] |
| 351 | 351 | ) |
| 352 | 352 | |
| 353 | - def _payload_retry_lines(self) -> list[str]: | |
| 353 | + def _payload_retry_lines(self, dod: DefinitionOfDone | None) -> list[str]: | |
| 354 | 354 | recovery_context = self.context.recovery_context |
| 355 | 355 | if recovery_context is None or not recovery_context.attempts: |
| 356 | 356 | return [] |
@@ -363,8 +363,39 @@ class ResponseRepairer: | ||
| 363 | 363 | if fix is None: |
| 364 | 364 | return [] |
| 365 | 365 | |
| 366 | - target = fix["file_path"] | |
| 366 | + target = fix["file_path"] or self._preferred_retry_target(dod) | |
| 367 | 367 | invalid = ", ".join(f"`{field}`" for field in fix["invalid_fields"]) |
| 368 | + if fix.get("kind") == "missing_target": | |
| 369 | + if attempt.tool_name == "write": | |
| 370 | + target_line = ( | |
| 371 | + f"Last tool failure: resend `write` for `{target}` with a valid `file_path` and real `content`." | |
| 372 | + if target | |
| 373 | + else "Last tool failure: resend `write` with a valid `file_path` and real `content`." | |
| 374 | + ) | |
| 375 | + return [ | |
| 376 | + target_line, | |
| 377 | + "Do not leave `file_path` empty; point it at the concrete next output file.", | |
| 378 | + ] | |
| 379 | + if attempt.tool_name == "edit": | |
| 380 | + target_line = ( | |
| 381 | + f"Last tool failure: resend `edit` for `{target}` with a valid `file_path` plus real `old_string`/`new_string`." | |
| 382 | + if target | |
| 383 | + else "Last tool failure: resend `edit` with a valid `file_path` plus real `old_string`/`new_string`." | |
| 384 | + ) | |
| 385 | + return [ | |
| 386 | + target_line, | |
| 387 | + "Do not leave `file_path` empty; point it at the concrete file you already know needs the edit.", | |
| 388 | + ] | |
| 389 | + if attempt.tool_name == "patch": | |
| 390 | + target_line = ( | |
| 391 | + f"Last tool failure: resend `patch` for `{target}` with a valid `file_path` and real patch text or `hunks`." | |
| 392 | + if target | |
| 393 | + else "Last tool failure: resend `patch` with a valid `file_path` and real patch text or `hunks`." | |
| 394 | + ) | |
| 395 | + return [ | |
| 396 | + target_line, | |
| 397 | + "Do not leave `file_path` empty; point it at the concrete file you already know needs the patch.", | |
| 398 | + ] | |
| 368 | 399 | if attempt.tool_name == "write": |
| 369 | 400 | target_line = ( |
| 370 | 401 | f"Last tool failure: resend `write` for `{target}` with real `content`, not just summary fields." |
@@ -880,6 +911,36 @@ class ResponseRepairer: | ||
| 880 | 911 | return normalized_target, False |
| 881 | 912 | return first_missing |
| 882 | 913 | |
| 914 | + def _preferred_retry_target(self, dod: DefinitionOfDone | None) -> str: | |
| 915 | + if dod is None: | |
| 916 | + return "" | |
| 917 | + | |
| 918 | + missing_artifact = self._preferred_resume_missing_artifact(dod) | |
| 919 | + next_pending = self._preferred_resume_pending_item( | |
| 920 | + dod, | |
| 921 | + missing_artifact=missing_artifact, | |
| 922 | + ) | |
| 923 | + if next_pending: | |
| 924 | + pending_target = self._infer_pending_item_output_target(dod, next_pending) | |
| 925 | + if pending_target is not None and not pending_target.exists(): | |
| 926 | + return str(pending_target) | |
| 927 | + | |
| 928 | + if missing_artifact is None: | |
| 929 | + return "" | |
| 930 | + | |
| 931 | + target, expect_directory = missing_artifact | |
| 932 | + if not expect_directory: | |
| 933 | + return str(target) | |
| 934 | + | |
| 935 | + next_output_file, _ = infer_next_output_file( | |
| 936 | + target=target, | |
| 937 | + project_root=self.context.project_root, | |
| 938 | + messages=list(getattr(self.context.session, "messages", []) or []), | |
| 939 | + ) | |
| 940 | + if next_output_file is not None: | |
| 941 | + return str(next_output_file) | |
| 942 | + return str(target) | |
| 943 | + | |
| 883 | 944 | def _concretize_directory_missing_artifact( |
| 884 | 945 | self, |
| 885 | 946 | dod: DefinitionOfDone, |
src/loader/runtime/tool_batch_recovery.pymodified@@ -259,6 +259,40 @@ class ToolBatchRecoveryController: | ||
| 259 | 259 | target = fix["file_path"] |
| 260 | 260 | invalid_fields = ", ".join(f"`{field}`" for field in fix["invalid_fields"]) |
| 261 | 261 | required_fields = "`, `".join(fix["required_fields"]) |
| 262 | + if fix.get("kind") == "missing_target": | |
| 263 | + if tool_call.name == "write": | |
| 264 | + target_line = ( | |
| 265 | + f"- The failed call for `{target}` omitted a valid `file_path`." | |
| 266 | + if target | |
| 267 | + else "- The failed call omitted a valid `file_path`." | |
| 268 | + ) | |
| 269 | + return [ | |
| 270 | + target_line, | |
| 271 | + "- Resend one concrete `write(file_path=..., content='...')` call now instead of rereading more files.", | |
| 272 | + ] | |
| 273 | + | |
| 274 | + if tool_call.name == "edit": | |
| 275 | + target_line = ( | |
| 276 | + f"- The failed call for `{target}` omitted a valid `file_path`." | |
| 277 | + if target | |
| 278 | + else "- The failed call omitted a valid `file_path`." | |
| 279 | + ) | |
| 280 | + return [ | |
| 281 | + target_line, | |
| 282 | + "- Resend one concrete `edit(file_path=..., old_string='...', new_string='...')` call now instead of rereading more files.", | |
| 283 | + ] | |
| 284 | + | |
| 285 | + if tool_call.name == "patch": | |
| 286 | + target_line = ( | |
| 287 | + f"- The failed call for `{target}` omitted a valid `file_path`." | |
| 288 | + if target | |
| 289 | + else "- The failed call omitted a valid `file_path`." | |
| 290 | + ) | |
| 291 | + return [ | |
| 292 | + target_line, | |
| 293 | + "- Resend one concrete `patch(file_path=..., patch='...')` or `patch(..., hunks=[...])` call now instead of rereading more files.", | |
| 294 | + ] | |
| 295 | + | |
| 262 | 296 | if tool_call.name == "write": |
| 263 | 297 | target_line = ( |
| 264 | 298 | f"- The failed call for `{target}` omitted the required `content` payload." |
src/loader/runtime/tool_batches.pymodified@@ -29,6 +29,7 @@ from .evidence_provenance import EvidenceProvenance, EvidenceProvenanceStatus | ||
| 29 | 29 | from .executor import ToolExecutionState, ToolExecutor |
| 30 | 30 | from .logging import get_runtime_logger |
| 31 | 31 | from .policy_timeline import append_verification_timeline_entry |
| 32 | +from .recovery import RecoveryContext, detect_missing_mutation_payload | |
| 32 | 33 | from .repair_focus import extract_active_repair_context |
| 33 | 34 | from .safeguard_services import extract_shell_text_rewrite_target |
| 34 | 35 | from .tool_batch_checks import ToolBatchConfidenceGate, ToolBatchVerificationGate |
@@ -279,6 +280,11 @@ class ToolBatchRunner: | ||
| 279 | 280 | if outcome.state == ToolExecutionState.DUPLICATE: |
| 280 | 281 | self._queue_duplicate_observation_nudge(tool_call, dod=dod) |
| 281 | 282 | elif outcome.state == ToolExecutionState.BLOCKED: |
| 283 | + self._queue_blocked_invalid_mutation_nudge( | |
| 284 | + tool_call, | |
| 285 | + outcome.event_content, | |
| 286 | + dod=dod, | |
| 287 | + ) | |
| 282 | 288 | self._queue_blocked_active_repair_nudge(outcome.event_content) |
| 283 | 289 | self._queue_blocked_active_repair_mutation_nudge(outcome.event_content) |
| 284 | 290 | self._queue_blocked_completed_artifact_scope_nudge( |
@@ -679,6 +685,115 @@ class ToolBatchRunner: | ||
| 679 | 685 | "Do not reopen unrelated reference materials while this concrete repair target is unresolved." |
| 680 | 686 | ) |
| 681 | 687 | |
| 688 | + def _queue_blocked_invalid_mutation_nudge( | |
| 689 | + self, | |
| 690 | + tool_call: ToolCall, | |
| 691 | + event_content: str, | |
| 692 | + *, | |
| 693 | + dod: DefinitionOfDone, | |
| 694 | + ) -> None: | |
| 695 | + """Recover blocked mutations that omitted a real target path or text payload.""" | |
| 696 | + | |
| 697 | + fix = detect_missing_mutation_payload( | |
| 698 | + tool_call.name, | |
| 699 | + tool_call.arguments, | |
| 700 | + event_content, | |
| 701 | + ) | |
| 702 | + if fix is None: | |
| 703 | + return | |
| 704 | + | |
| 705 | + self._record_blocked_invalid_mutation_attempt(tool_call, event_content) | |
| 706 | + | |
| 707 | + messages = list(getattr(self.context.session, "messages", []) or []) | |
| 708 | + missing_artifact = _next_missing_planned_artifact( | |
| 709 | + dod, | |
| 710 | + project_root=self.context.project_root, | |
| 711 | + messages=messages, | |
| 712 | + ) | |
| 713 | + next_pending = preferred_pending_todo_item( | |
| 714 | + dod, | |
| 715 | + project_root=self.context.project_root, | |
| 716 | + missing_artifact=missing_artifact, | |
| 717 | + ) | |
| 718 | + missing_artifact = _prefer_missing_artifact_for_pending_item( | |
| 719 | + dod, | |
| 720 | + missing_artifact=missing_artifact, | |
| 721 | + next_pending=next_pending, | |
| 722 | + project_root=self.context.project_root, | |
| 723 | + ) | |
| 724 | + resume_target = _preferred_resume_target_path( | |
| 725 | + dod, | |
| 726 | + next_pending=next_pending, | |
| 727 | + missing_artifact=missing_artifact, | |
| 728 | + project_root=self.context.project_root, | |
| 729 | + messages=messages, | |
| 730 | + ) | |
| 731 | + resume_suffix = _pending_item_resume_suffix( | |
| 732 | + dod, | |
| 733 | + next_pending=next_pending, | |
| 734 | + missing_artifact=missing_artifact, | |
| 735 | + project_root=self.context.project_root, | |
| 736 | + messages=messages, | |
| 737 | + ) | |
| 738 | + target_label = f"`{resume_target.name or str(resume_target)}`" if resume_target else "" | |
| 739 | + | |
| 740 | + if fix.get("kind") == "missing_target": | |
| 741 | + prefix = f"That `{tool_call.name}` call did not provide a valid `file_path`." | |
| 742 | + if target_label: | |
| 743 | + prefix += f" Stay on {target_label}." | |
| 744 | + self.context.queue_steering_message( | |
| 745 | + prefix | |
| 746 | + + resume_suffix | |
| 747 | + + " Resend one concrete " | |
| 748 | + + _invalid_mutation_call_shape(tool_call.name) | |
| 749 | + + " now instead of another working note, reread, or empty response." | |
| 750 | + ) | |
| 751 | + return | |
| 752 | + | |
| 753 | + invalid_fields = ", ".join(f"`{field}`" for field in fix["invalid_fields"]) | |
| 754 | + prefix = f"That `{tool_call.name}` call omitted the real text payload." | |
| 755 | + if invalid_fields: | |
| 756 | + prefix += f" {invalid_fields} are summary fields, not valid mutation inputs." | |
| 757 | + if target_label: | |
| 758 | + prefix += f" Stay on {target_label}." | |
| 759 | + self.context.queue_steering_message( | |
| 760 | + prefix | |
| 761 | + + resume_suffix | |
| 762 | + + " Resend one concrete " | |
| 763 | + + _invalid_mutation_call_shape(tool_call.name) | |
| 764 | + + " now instead of rereading more files." | |
| 765 | + ) | |
| 766 | + | |
| 767 | + def _record_blocked_invalid_mutation_attempt( | |
| 768 | + self, | |
| 769 | + tool_call: ToolCall, | |
| 770 | + error: str, | |
| 771 | + ) -> None: | |
| 772 | + """Seed recovery state from blocked malformed mutations for later retry guidance.""" | |
| 773 | + | |
| 774 | + recovery_context = self.context.recovery_context | |
| 775 | + if recovery_context is None or not recovery_context.is_related_failure( | |
| 776 | + tool_call.name, | |
| 777 | + tool_call.arguments, | |
| 778 | + error, | |
| 779 | + ): | |
| 780 | + recovery_context = RecoveryContext( | |
| 781 | + original_tool=tool_call.name, | |
| 782 | + original_args=tool_call.arguments, | |
| 783 | + max_retries=self.context.config.max_recovery_attempts, | |
| 784 | + ) | |
| 785 | + self.context.recovery_context = recovery_context | |
| 786 | + | |
| 787 | + if not recovery_context.is_similar_attempt( | |
| 788 | + tool_call.name, | |
| 789 | + tool_call.arguments, | |
| 790 | + ): | |
| 791 | + recovery_context.add_attempt( | |
| 792 | + tool_call.name, | |
| 793 | + tool_call.arguments, | |
| 794 | + error, | |
| 795 | + ) | |
| 796 | + | |
| 682 | 797 | async def _record_successful_execution( |
| 683 | 798 | self, |
| 684 | 799 | *, |
@@ -1427,6 +1542,51 @@ def _pending_item_resume_suffix( | ||
| 1427 | 1542 | ) |
| 1428 | 1543 | |
| 1429 | 1544 | |
| 1545 | +def _preferred_resume_target_path( | |
| 1546 | + dod: DefinitionOfDone, | |
| 1547 | + *, | |
| 1548 | + next_pending: str | None, | |
| 1549 | + missing_artifact: tuple[Path, bool] | None, | |
| 1550 | + project_root: Path, | |
| 1551 | + messages: list[Any] | None = None, | |
| 1552 | +) -> Path | None: | |
| 1553 | + if next_pending: | |
| 1554 | + pending_target = infer_pending_todo_output_target( | |
| 1555 | + dod, | |
| 1556 | + next_pending, | |
| 1557 | + project_root=project_root, | |
| 1558 | + ) | |
| 1559 | + if pending_target is not None and not pending_target.exists(): | |
| 1560 | + return pending_target.expanduser().resolve(strict=False) | |
| 1561 | + | |
| 1562 | + if missing_artifact is None: | |
| 1563 | + return None | |
| 1564 | + | |
| 1565 | + target, expect_directory = missing_artifact | |
| 1566 | + normalized_target = target.expanduser().resolve(strict=False) | |
| 1567 | + if not expect_directory: | |
| 1568 | + return normalized_target | |
| 1569 | + | |
| 1570 | + next_output_file, _ = infer_next_output_file( | |
| 1571 | + target=normalized_target, | |
| 1572 | + project_root=project_root, | |
| 1573 | + messages=list(messages or []), | |
| 1574 | + ) | |
| 1575 | + if next_output_file is not None: | |
| 1576 | + return next_output_file.expanduser().resolve(strict=False) | |
| 1577 | + return normalized_target | |
| 1578 | + | |
| 1579 | + | |
| 1580 | +def _invalid_mutation_call_shape(tool_name: str) -> str: | |
| 1581 | + if tool_name == "write": | |
| 1582 | + return "`write(file_path=..., content=...)`" | |
| 1583 | + if tool_name == "edit": | |
| 1584 | + return "`edit(file_path=..., old_string=..., new_string=...)`" | |
| 1585 | + if tool_name == "patch": | |
| 1586 | + return "`patch(file_path=..., patch='...')` or `patch(..., hunks=[...])`" | |
| 1587 | + return f"`{tool_name}(...)`" | |
| 1588 | + | |
| 1589 | + | |
| 1430 | 1590 | def _resume_suffix_for_target( |
| 1431 | 1591 | target: Path, |
| 1432 | 1592 | *, |
tests/test_repair.pymodified@@ -322,6 +322,71 @@ def test_empty_response_retry_mentions_write_can_create_missing_parent_directori | ||
| 322 | 322 | ) |
| 323 | 323 | |
| 324 | 324 | |
| 325 | +def test_empty_response_retry_recovers_blocked_empty_file_path_to_concrete_target( | |
| 326 | + temp_dir: Path, | |
| 327 | +) -> None: | |
| 328 | + context = build_context( | |
| 329 | + temp_dir=temp_dir, | |
| 330 | + use_react=False, | |
| 331 | + ) | |
| 332 | + repairer = ResponseRepairer(context) | |
| 333 | + | |
| 334 | + guide_root = temp_dir / "guides" / "nginx" | |
| 335 | + chapters = guide_root / "chapters" | |
| 336 | + chapters.mkdir(parents=True) | |
| 337 | + index_path = guide_root / "index.html" | |
| 338 | + first_chapter = chapters / "01-introduction.html" | |
| 339 | + second_chapter = chapters / "02-installation.html" | |
| 340 | + index_path.write_text("<html></html>\n") | |
| 341 | + first_chapter.write_text("<h1>Intro</h1>\n") | |
| 342 | + | |
| 343 | + implementation_plan = temp_dir / "implementation.md" | |
| 344 | + implementation_plan.write_text( | |
| 345 | + "\n".join( | |
| 346 | + [ | |
| 347 | + "# Implementation Plan", | |
| 348 | + "", | |
| 349 | + "## File Changes", | |
| 350 | + f"- `{index_path}`", | |
| 351 | + f"- `{first_chapter}`", | |
| 352 | + f"- `{second_chapter}`", | |
| 353 | + "", | |
| 354 | + ] | |
| 355 | + ) | |
| 356 | + ) | |
| 357 | + | |
| 358 | + dod = create_definition_of_done("Create a multi-file nginx guide.") | |
| 359 | + dod.implementation_plan = str(implementation_plan) | |
| 360 | + dod.touched_files.extend([str(index_path), str(first_chapter)]) | |
| 361 | + dod.pending_items.append("Creating Chapter 2: Installation and Setup") | |
| 362 | + | |
| 363 | + context.recovery_context = RecoveryContext( | |
| 364 | + original_tool="write", | |
| 365 | + original_args={"file_path": "", "content": "<html></html>\n"}, | |
| 366 | + ) | |
| 367 | + context.recovery_context.add_attempt( | |
| 368 | + "write", | |
| 369 | + {"file_path": "", "content": "<html></html>\n"}, | |
| 370 | + "Empty file path", | |
| 371 | + ) | |
| 372 | + | |
| 373 | + decision = repairer.handle_empty_response( | |
| 374 | + task="Create a multi-file nginx guide.", | |
| 375 | + original_task=None, | |
| 376 | + empty_retry_count=1, | |
| 377 | + max_empty_retries=2, | |
| 378 | + dod=dod, | |
| 379 | + ) | |
| 380 | + | |
| 381 | + assert decision.should_continue is True | |
| 382 | + assert decision.retry_message is not None | |
| 383 | + assert ( | |
| 384 | + f"Last tool failure: resend `write` for `{second_chapter}` with a valid `file_path` and real `content`." | |
| 385 | + in decision.retry_message | |
| 386 | + ) | |
| 387 | + assert "Do not leave `file_path` empty" in decision.retry_message | |
| 388 | + | |
| 389 | + | |
| 325 | 390 | def test_empty_response_retry_respects_discovery_first_pending_step( |
| 326 | 391 | temp_dir: Path, |
| 327 | 392 | ) -> None: |
tests/test_tool_batches.pymodified@@ -4600,3 +4600,110 @@ def test_tool_batch_runner_blocked_completed_artifact_scope_nudge_prefers_verifi | ||
| 4600 | 4600 | assert "All explicitly planned artifacts already exist." in queued[0] |
| 4601 | 4601 | assert "Verify all guide files are linked and complete" in queued[0] |
| 4602 | 4602 | assert "Do not reopen earlier reference materials." in queued[0] |
| 4603 | + | |
| 4604 | + | |
| 4605 | +@pytest.mark.asyncio | |
| 4606 | +async def test_tool_batch_runner_blocked_empty_file_path_nudges_concrete_next_artifact( | |
| 4607 | + temp_dir: Path, | |
| 4608 | +) -> None: | |
| 4609 | + async def assess_confidence( | |
| 4610 | + tool_name: str, | |
| 4611 | + tool_args: dict, | |
| 4612 | + context: str, | |
| 4613 | + ) -> ConfidenceAssessment: | |
| 4614 | + raise AssertionError("Confidence scoring should be disabled in this scenario") | |
| 4615 | + | |
| 4616 | + async def verify_action( | |
| 4617 | + tool_name: str, | |
| 4618 | + tool_args: dict, | |
| 4619 | + result: str, | |
| 4620 | + expected: str = "", | |
| 4621 | + ) -> ActionVerification: | |
| 4622 | + raise AssertionError("Verification should not run in this scenario") | |
| 4623 | + | |
| 4624 | + guide_root = temp_dir / "guides" / "nginx" | |
| 4625 | + chapters = guide_root / "chapters" | |
| 4626 | + chapters.mkdir(parents=True) | |
| 4627 | + index_path = guide_root / "index.html" | |
| 4628 | + chapter_one = chapters / "01-introduction.html" | |
| 4629 | + chapter_two = chapters / "02-installation.html" | |
| 4630 | + index_path.write_text("<html></html>\n") | |
| 4631 | + chapter_one.write_text("<h1>Intro</h1>\n") | |
| 4632 | + | |
| 4633 | + implementation_plan = temp_dir / "implementation.md" | |
| 4634 | + implementation_plan.write_text( | |
| 4635 | + "\n".join( | |
| 4636 | + [ | |
| 4637 | + "# Implementation Plan", | |
| 4638 | + "", | |
| 4639 | + "## File Changes", | |
| 4640 | + f"- `{index_path}`", | |
| 4641 | + f"- `{chapter_one}`", | |
| 4642 | + f"- `{chapter_two}`", | |
| 4643 | + "", | |
| 4644 | + ] | |
| 4645 | + ) | |
| 4646 | + ) | |
| 4647 | + | |
| 4648 | + context = build_context( | |
| 4649 | + temp_dir=temp_dir, | |
| 4650 | + messages=[], | |
| 4651 | + safeguards=FakeSafeguards(), | |
| 4652 | + assess_confidence=assess_confidence, | |
| 4653 | + verify_action=verify_action, | |
| 4654 | + auto_recover=False, | |
| 4655 | + ) | |
| 4656 | + queued: list[str] = [] | |
| 4657 | + context.queue_steering_message_callback = queued.append | |
| 4658 | + runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir)) | |
| 4659 | + tool_call = ToolCall( | |
| 4660 | + id="write-2", | |
| 4661 | + name="write", | |
| 4662 | + arguments={"file_path": "", "content": "<html></html>\n"}, | |
| 4663 | + ) | |
| 4664 | + blocked_message = "[Blocked - Empty file path] Suggestion: Provide a valid file path" | |
| 4665 | + executor = FakeExecutor( | |
| 4666 | + [ | |
| 4667 | + ToolExecutionOutcome( | |
| 4668 | + tool_call=tool_call, | |
| 4669 | + state=ToolExecutionState.BLOCKED, | |
| 4670 | + message=Message.tool_result_message( | |
| 4671 | + tool_call_id=tool_call.id, | |
| 4672 | + display_content=blocked_message, | |
| 4673 | + result_content=blocked_message, | |
| 4674 | + is_error=True, | |
| 4675 | + ), | |
| 4676 | + event_content=blocked_message, | |
| 4677 | + is_error=True, | |
| 4678 | + result_output=blocked_message, | |
| 4679 | + ) | |
| 4680 | + ] | |
| 4681 | + ) | |
| 4682 | + dod = create_definition_of_done("Create a multi-file nginx guide.") | |
| 4683 | + dod.implementation_plan = str(implementation_plan) | |
| 4684 | + dod.touched_files.extend([str(index_path), str(chapter_one)]) | |
| 4685 | + dod.pending_items.append("Creating Chapter 2: Installation and Setup") | |
| 4686 | + | |
| 4687 | + await runner.execute_batch( | |
| 4688 | + tool_calls=[tool_call], | |
| 4689 | + tool_source="assistant", | |
| 4690 | + pending_tool_calls_seen=set(), | |
| 4691 | + emit=_noop_emit, | |
| 4692 | + summary=TurnSummary(final_response=""), | |
| 4693 | + dod=dod, | |
| 4694 | + executor=executor, # type: ignore[arg-type] | |
| 4695 | + on_confirmation=None, | |
| 4696 | + on_user_question=None, | |
| 4697 | + emit_confirmation=None, | |
| 4698 | + consecutive_errors=0, | |
| 4699 | + ) | |
| 4700 | + | |
| 4701 | + assert queued | |
| 4702 | + assert "did not provide a valid `file_path`" in queued[0] | |
| 4703 | + assert "Resume by creating `02-installation.html` now." in queued[0] | |
| 4704 | + assert ( | |
| 4705 | + f"Prefer one `write` call for `{chapter_two}` instead of more rereads." | |
| 4706 | + in queued[0] | |
| 4707 | + ) | |
| 4708 | + assert context.recovery_context is not None | |
| 4709 | + assert context.recovery_context.attempts[-1].error == blocked_message | |