Hand off repair support writes
Authored by
mfwolffe <wolffemf@dukes.jmu.edu>
- SHA
0544f0e86a285ea4f924d37c1080c7700da4c2c5- Parents
-
8b1407b - Tree
a713aaf
0544f0e
0544f0e86a285ea4f924d37c1080c7700da4c2c58b1407b
a713aaf| Status | File | + | - |
|---|---|---|---|
| M |
src/loader/runtime/tool_batches.py
|
39 | 0 |
| M |
tests/test_tool_batches.py
|
81 | 0 |
src/loader/runtime/tool_batches.pymodified@@ -374,6 +374,7 @@ class ToolBatchRunner: | ||
| 374 | 374 | dod=dod, |
| 375 | 375 | ) |
| 376 | 376 | else: |
| 377 | + self._queue_active_repair_success_handoff_nudge(tool_call) | |
| 377 | 378 | self._queue_post_mutation_self_audit_nudge(tool_call, dod=dod) |
| 378 | 379 | self._queue_completed_artifact_observation_handoff_nudge( |
| 379 | 380 | tool_call, |
@@ -859,6 +860,44 @@ class ToolBatchRunner: | ||
| 859 | 860 | f"and repair `{repair.artifact_path}` before widening the change set." |
| 860 | 861 | ) |
| 861 | 862 | |
| 863 | + def _queue_active_repair_success_handoff_nudge(self, tool_call: ToolCall) -> None: | |
| 864 | + """After a repair mutation, hand back to finalization instead of ad hoc edits.""" | |
| 865 | + | |
| 866 | + if tool_call.name not in {"write", "edit", "patch"}: | |
| 867 | + return | |
| 868 | + raw_path = str(tool_call.arguments.get("file_path", "")).strip() | |
| 869 | + if not raw_path: | |
| 870 | + return | |
| 871 | + | |
| 872 | + repair = extract_active_repair_context(self.context.session.messages) | |
| 873 | + if repair is None or not repair.allowed_paths: | |
| 874 | + return | |
| 875 | + | |
| 876 | + try: | |
| 877 | + changed_path = str(Path(raw_path).expanduser().resolve(strict=False)) | |
| 878 | + except (OSError, RuntimeError, ValueError): | |
| 879 | + changed_path = str(Path(raw_path).expanduser()) | |
| 880 | + allowed_paths = { | |
| 881 | + str(Path(path).expanduser().resolve(strict=False)) | |
| 882 | + for path in repair.allowed_paths | |
| 883 | + } | |
| 884 | + if changed_path not in allowed_paths: | |
| 885 | + return | |
| 886 | + | |
| 887 | + if changed_path == str(Path(repair.artifact_path).expanduser().resolve(strict=False)): | |
| 888 | + self.context.queue_steering_message( | |
| 889 | + "The active verification repair target was updated. " | |
| 890 | + "Do not keep auditing or retarget nearby links by guesswork. " | |
| 891 | + "Finish with a final response now so Loader can re-run verification." | |
| 892 | + ) | |
| 893 | + return | |
| 894 | + | |
| 895 | + self.context.queue_steering_message( | |
| 896 | + "The support file for the active verification repair now exists. " | |
| 897 | + f"Do not retarget `{repair.artifact_path}` to a different missing path by guesswork. " | |
| 898 | + "Finish with a final response now so Loader can re-run verification." | |
| 899 | + ) | |
| 900 | + | |
| 862 | 901 | def _queue_blocked_late_reference_drift_nudge( |
| 863 | 902 | self, |
| 864 | 903 | event_content: str, |
tests/test_tool_batches.pymodified@@ -6822,6 +6822,87 @@ def test_tool_batch_runner_blocked_active_repair_mutation_nudge_uses_allowed_pat | ||
| 6822 | 6822 | assert "before widening the change set" in queued[0] |
| 6823 | 6823 | |
| 6824 | 6824 | |
| 6825 | +@pytest.mark.asyncio | |
| 6826 | +async def test_tool_batch_runner_hands_off_after_active_repair_support_file_write( | |
| 6827 | + temp_dir: Path, | |
| 6828 | +) -> None: | |
| 6829 | + async def assess_confidence( | |
| 6830 | + tool_name: str, | |
| 6831 | + tool_args: dict, | |
| 6832 | + context: str, | |
| 6833 | + ) -> ConfidenceAssessment: | |
| 6834 | + raise AssertionError("Confidence scoring should be disabled in this scenario") | |
| 6835 | + | |
| 6836 | + async def verify_action( | |
| 6837 | + tool_name: str, | |
| 6838 | + tool_args: dict, | |
| 6839 | + result: str, | |
| 6840 | + expected: str = "", | |
| 6841 | + ) -> ActionVerification: | |
| 6842 | + raise AssertionError("Verification should not run in this scenario") | |
| 6843 | + | |
| 6844 | + repair_target = temp_dir / "guide" / "index.html" | |
| 6845 | + stylesheet = temp_dir / "guide" / "style.css" | |
| 6846 | + repair_target.parent.mkdir(parents=True) | |
| 6847 | + repair_target.write_text('<link rel="stylesheet" href="style.css">\n') | |
| 6848 | + context = build_context( | |
| 6849 | + temp_dir=temp_dir, | |
| 6850 | + messages=[ | |
| 6851 | + Message( | |
| 6852 | + role=Role.ASSISTANT, | |
| 6853 | + content=( | |
| 6854 | + "Repair focus:\n" | |
| 6855 | + f"- Fix the broken local reference `style.css` in `{repair_target}`.\n" | |
| 6856 | + f"- Immediate next step: edit `{repair_target}`.\n" | |
| 6857 | + f"- If the broken reference should remain, create `{stylesheet}`; otherwise remove or replace `style.css`.\n" | |
| 6858 | + ), | |
| 6859 | + ) | |
| 6860 | + ], | |
| 6861 | + safeguards=FakeSafeguards(), | |
| 6862 | + assess_confidence=assess_confidence, | |
| 6863 | + verify_action=verify_action, | |
| 6864 | + ) | |
| 6865 | + queued: list[str] = [] | |
| 6866 | + context.queue_steering_message_callback = queued.append | |
| 6867 | + runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir)) | |
| 6868 | + dod = create_definition_of_done("Repair a guide stylesheet link.") | |
| 6869 | + tool_call = ToolCall( | |
| 6870 | + id="write-style", | |
| 6871 | + name="write", | |
| 6872 | + arguments={ | |
| 6873 | + "file_path": str(stylesheet), | |
| 6874 | + "content": "body { font-family: sans-serif; }\n", | |
| 6875 | + }, | |
| 6876 | + ) | |
| 6877 | + | |
| 6878 | + await runner.execute_batch( | |
| 6879 | + tool_calls=[tool_call], | |
| 6880 | + tool_source="assistant", | |
| 6881 | + pending_tool_calls_seen=set(), | |
| 6882 | + emit=_noop_emit, | |
| 6883 | + summary=TurnSummary(final_response=""), | |
| 6884 | + dod=dod, | |
| 6885 | + executor=FakeExecutor( | |
| 6886 | + [ | |
| 6887 | + tool_outcome( | |
| 6888 | + tool_call=tool_call, | |
| 6889 | + output=f"Successfully wrote {stylesheet}", | |
| 6890 | + is_error=False, | |
| 6891 | + ) | |
| 6892 | + ] | |
| 6893 | + ), # type: ignore[arg-type] | |
| 6894 | + on_confirmation=None, | |
| 6895 | + on_user_question=None, | |
| 6896 | + emit_confirmation=None, | |
| 6897 | + consecutive_errors=0, | |
| 6898 | + ) | |
| 6899 | + | |
| 6900 | + assert queued | |
| 6901 | + assert any("support file for the active verification repair now exists" in message for message in queued) | |
| 6902 | + assert any("Do not retarget" in message for message in queued) | |
| 6903 | + assert any("Loader can re-run verification" in message for message in queued) | |
| 6904 | + | |
| 6905 | + | |
| 6825 | 6906 | def test_tool_batch_runner_blocked_late_reference_drift_nudge_points_to_missing_artifact( |
| 6826 | 6907 | temp_dir: Path, |
| 6827 | 6908 | ) -> None: |