Steer duplicate repair writes
Authored by
mfwolffe <wolffemf@dukes.jmu.edu>
- SHA
07dbbb9e97edcb0b44a7e8fe558e29bfbc6f327a- Parents
-
fdbe57a - Tree
98b6131
07dbbb9
07dbbb9e97edcb0b44a7e8fe558e29bfbc6f327afdbe57a
98b6131| Status | File | + | - |
|---|---|---|---|
| M |
src/loader/runtime/repair_focus.py
|
19 | 5 |
| M |
src/loader/runtime/tool_batches.py
|
57 | 0 |
| M |
tests/test_repair_focus.py
|
26 | 0 |
| M |
tests/test_tool_batches.py
|
60 | 0 |
src/loader/runtime/repair_focus.pymodified@@ -69,11 +69,9 @@ def extract_active_repair_context( | ||
| 69 | 69 | if artifact_path: |
| 70 | 70 | if artifact_path not in absolute_paths: |
| 71 | 71 | absolute_paths.insert(0, artifact_path) |
| 72 | - allowed_paths = tuple( | |
| 73 | - sorted( | |
| 74 | - absolute_paths, | |
| 75 | - key=lambda item: (not Path(item).exists(), item), | |
| 76 | - ) | |
| 72 | + allowed_paths = _ordered_allowed_paths( | |
| 73 | + absolute_paths, | |
| 74 | + primary_path=artifact_path, | |
| 77 | 75 | ) |
| 78 | 76 | allowed_roots = _collapse_roots(_path_roots(set(absolute_paths))) |
| 79 | 77 | return ActiveRepairContext( |
@@ -133,3 +131,19 @@ def _collapse_roots(roots: set[str]) -> tuple[str, ...]: | ||
| 133 | 131 | continue |
| 134 | 132 | collapsed.append(root) |
| 135 | 133 | return tuple(collapsed) |
| 134 | + | |
| 135 | + | |
| 136 | +def _ordered_allowed_paths(paths: list[str], *, primary_path: str) -> tuple[str, ...]: | |
| 137 | + """Preserve repair-focus order with the immediate target first.""" | |
| 138 | + | |
| 139 | + ordered: list[str] = [] | |
| 140 | + | |
| 141 | + def add(path: str) -> None: | |
| 142 | + if not path or path in ordered: | |
| 143 | + return | |
| 144 | + ordered.append(path) | |
| 145 | + | |
| 146 | + add(primary_path) | |
| 147 | + for path in paths: | |
| 148 | + add(path) | |
| 149 | + return tuple(ordered) | |
src/loader/runtime/tool_batches.pymodified@@ -336,6 +336,7 @@ class ToolBatchRunner: | ||
| 336 | 336 | self.context.session.append(outcome.message) |
| 337 | 337 | summary.tool_result_messages.append(outcome.message) |
| 338 | 338 | if outcome.state == ToolExecutionState.DUPLICATE: |
| 339 | + self._queue_duplicate_mutation_nudge(tool_call, dod=dod) | |
| 339 | 340 | self._queue_duplicate_observation_nudge(tool_call, dod=dod) |
| 340 | 341 | elif outcome.state == ToolExecutionState.BLOCKED: |
| 341 | 342 | self._queue_blocked_invalid_mutation_nudge( |
@@ -652,6 +653,44 @@ class ToolBatchRunner: | ||
| 652 | 653 | "Choose a different next step that makes progress." |
| 653 | 654 | ) |
| 654 | 655 | |
| 656 | + def _queue_duplicate_mutation_nudge( | |
| 657 | + self, | |
| 658 | + tool_call: ToolCall, | |
| 659 | + *, | |
| 660 | + dod: DefinitionOfDone, | |
| 661 | + ) -> None: | |
| 662 | + """After a duplicate mutation, restate concrete repair deltas.""" | |
| 663 | + | |
| 664 | + if tool_call.name not in {"write", "edit", "patch"}: | |
| 665 | + return | |
| 666 | + | |
| 667 | + target = str( | |
| 668 | + tool_call.arguments.get("file_path") | |
| 669 | + or tool_call.arguments.get("path") | |
| 670 | + or "" | |
| 671 | + ).strip() | |
| 672 | + repair = extract_active_repair_context(self.context.session.messages) | |
| 673 | + if repair is not None: | |
| 674 | + repair_preview = _active_repair_focus_preview(repair.repair_lines) | |
| 675 | + target_label = f"`{target}`" if target else "that file" | |
| 676 | + self.context.queue_steering_message( | |
| 677 | + f"That {tool_call.name} was skipped because it would not change {target_label}. " | |
| 678 | + "Do not submit the same content again. " | |
| 679 | + f"Verification still requires these concrete repair deltas: {repair_preview} " | |
| 680 | + "Use the current generated file as the source of truth and make one real edit, " | |
| 681 | + "patch, or write that expands or changes the flagged artifact." | |
| 682 | + ) | |
| 683 | + return | |
| 684 | + | |
| 685 | + if all_planned_artifact_outputs_exist(dod, project_root=self.context.project_root): | |
| 686 | + target_label = f"`{target}`" if target else "the target file" | |
| 687 | + self.context.queue_steering_message( | |
| 688 | + f"That {tool_call.name} was skipped because it would not change {target_label}. " | |
| 689 | + "All explicitly planned artifacts already exist, so do not rewrite the same content. " | |
| 690 | + "If verification identified a mismatch, make a different concrete mutation that fixes it; " | |
| 691 | + "otherwise finish so Loader can verify the files already on disk." | |
| 692 | + ) | |
| 693 | + | |
| 655 | 694 | def _queue_post_mutation_self_audit_nudge( |
| 656 | 695 | self, |
| 657 | 696 | tool_call: ToolCall, |
@@ -3068,6 +3107,24 @@ def _is_recoverable_guidance_block(event_content: str) -> bool: | ||
| 3068 | 3107 | ) |
| 3069 | 3108 | |
| 3070 | 3109 | |
| 3110 | +def _active_repair_focus_preview(repair_lines: list[str], *, max_lines: int = 4) -> str: | |
| 3111 | + """Compact repair-focus bullets for steering after no-op mutations.""" | |
| 3112 | + | |
| 3113 | + preview: list[str] = [] | |
| 3114 | + for raw_line in repair_lines: | |
| 3115 | + line = str(raw_line or "").strip() | |
| 3116 | + if not line.startswith("- "): | |
| 3117 | + continue | |
| 3118 | + if line.startswith("- Immediate next step:"): | |
| 3119 | + continue | |
| 3120 | + preview.append(line[2:].strip()) | |
| 3121 | + if len(preview) >= max_lines: | |
| 3122 | + break | |
| 3123 | + if not preview: | |
| 3124 | + return "the active verifier repair focus" | |
| 3125 | + return "; ".join(preview) | |
| 3126 | + | |
| 3127 | + | |
| 3071 | 3128 | def _tool_call_label(tool_call: ToolCall) -> str: |
| 3072 | 3129 | """Human-readable label for one tool call.""" |
| 3073 | 3130 | name = tool_call.name |
tests/test_repair_focus.pymodified@@ -27,3 +27,29 @@ def test_extract_active_repair_context_parses_write_next_step_target( | ||
| 27 | 27 | assert context is not None |
| 28 | 28 | assert context.artifact_path == str(repair_target.resolve(strict=False)) |
| 29 | 29 | assert str(repair_target.resolve(strict=False)) in context.allowed_paths |
| 30 | + | |
| 31 | + | |
| 32 | +def test_extract_active_repair_context_keeps_immediate_target_first( | |
| 33 | + tmp_path: Path, | |
| 34 | +) -> None: | |
| 35 | + index_path = tmp_path / "guides" / "nginx" / "index.html" | |
| 36 | + chapter_path = tmp_path / "guides" / "nginx" / "chapters" / "02-installation.html" | |
| 37 | + | |
| 38 | + context = extract_active_repair_context( | |
| 39 | + [ | |
| 40 | + Message( | |
| 41 | + role=Role.USER, | |
| 42 | + content=( | |
| 43 | + "Repair focus:\n" | |
| 44 | + f"- Improve `{chapter_path}`: thin content (526 text chars, expected at least 1758).\n" | |
| 45 | + f"- Immediate next step: edit `{index_path}`.\n" | |
| 46 | + f"- Improve `{index_path}`: insufficient structured content (9 blocks, expected at least 12).\n" | |
| 47 | + ), | |
| 48 | + ) | |
| 49 | + ] | |
| 50 | + ) | |
| 51 | + | |
| 52 | + assert context is not None | |
| 53 | + assert context.artifact_path == str(index_path.resolve(strict=False)) | |
| 54 | + assert context.allowed_paths[0] == str(index_path.resolve(strict=False)) | |
| 55 | + assert str(chapter_path.resolve(strict=False)) in context.allowed_paths | |
tests/test_tool_batches.pymodified@@ -6822,6 +6822,66 @@ def test_tool_batch_runner_blocked_active_repair_mutation_nudge_uses_allowed_pat | ||
| 6822 | 6822 | assert "before widening the change set" in queued[0] |
| 6823 | 6823 | |
| 6824 | 6824 | |
| 6825 | +def test_tool_batch_runner_duplicate_repair_mutation_restates_verifier_deltas( | |
| 6826 | + temp_dir: Path, | |
| 6827 | +) -> None: | |
| 6828 | + async def assess_confidence( | |
| 6829 | + tool_name: str, | |
| 6830 | + tool_args: dict, | |
| 6831 | + context: str, | |
| 6832 | + ) -> ConfidenceAssessment: | |
| 6833 | + raise AssertionError("Confidence scoring should be disabled in this scenario") | |
| 6834 | + | |
| 6835 | + async def verify_action( | |
| 6836 | + tool_name: str, | |
| 6837 | + tool_args: dict, | |
| 6838 | + result: str, | |
| 6839 | + expected: str = "", | |
| 6840 | + ) -> ActionVerification: | |
| 6841 | + raise AssertionError("Verification should not run in this scenario") | |
| 6842 | + | |
| 6843 | + index_path = temp_dir / "guide" / "index.html" | |
| 6844 | + chapter_path = temp_dir / "guide" / "chapters" / "02-installation.html" | |
| 6845 | + context = build_context( | |
| 6846 | + temp_dir=temp_dir, | |
| 6847 | + messages=[ | |
| 6848 | + Message( | |
| 6849 | + role=Role.USER, | |
| 6850 | + content=( | |
| 6851 | + "Repair focus:\n" | |
| 6852 | + f"- Improve `{index_path}`: insufficient structured content (9 blocks, expected at least 12).\n" | |
| 6853 | + f"- Improve `{chapter_path}`: thin content (526 text chars, expected at least 1758).\n" | |
| 6854 | + f"- Immediate next step: edit `{index_path}`.\n" | |
| 6855 | + "- Update the listed generated artifacts directly; do not recreate the artifact set.\n" | |
| 6856 | + ), | |
| 6857 | + ) | |
| 6858 | + ], | |
| 6859 | + safeguards=FakeSafeguards(), | |
| 6860 | + assess_confidence=assess_confidence, | |
| 6861 | + verify_action=verify_action, | |
| 6862 | + ) | |
| 6863 | + queued: list[str] = [] | |
| 6864 | + context.queue_steering_message_callback = queued.append | |
| 6865 | + runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir)) | |
| 6866 | + dod = create_definition_of_done("Create a multi-file guide.") | |
| 6867 | + | |
| 6868 | + runner._queue_duplicate_mutation_nudge( # type: ignore[attr-defined] | |
| 6869 | + ToolCall( | |
| 6870 | + id="dup-write", | |
| 6871 | + name="write", | |
| 6872 | + arguments={"file_path": str(index_path), "content": "<h1>same</h1>"}, | |
| 6873 | + ), | |
| 6874 | + dod=dod, | |
| 6875 | + ) | |
| 6876 | + | |
| 6877 | + assert queued | |
| 6878 | + assert "skipped because it would not change" in queued[0] | |
| 6879 | + assert "Do not submit the same content again" in queued[0] | |
| 6880 | + assert "insufficient structured content" in queued[0] | |
| 6881 | + assert "thin content" in queued[0] | |
| 6882 | + assert "make one real edit" in queued[0] | |
| 6883 | + | |
| 6884 | + | |
| 6825 | 6885 | @pytest.mark.asyncio |
| 6826 | 6886 | async def test_tool_batch_runner_hands_off_after_active_repair_support_file_write( |
| 6827 | 6887 | temp_dir: Path, |