Harden repeated asset retries
Authored by
mfwolffe <wolffemf@dukes.jmu.edu>
- SHA
d06c6391f590730de197686c869d7bbd6f411c7a- Parents
-
e6ba8d6 - Tree
062e517
d06c639
d06c6391f590730de197686c869d7bbd6f411c7ae6ba8d6
062e517| Status | File | + | - |
|---|---|---|---|
| M |
src/loader/runtime/tool_batches.py
|
35 | 0 |
| M |
tests/test_tool_batches.py
|
54 | 0 |
src/loader/runtime/tool_batches.pymodified@@ -1401,6 +1401,24 @@ class ToolBatchRunner: | ||
| 1401 | 1401 | + "." |
| 1402 | 1402 | ) |
| 1403 | 1403 | |
| 1404 | + repeat_count = _count_recent_blocked_html_asset_events( | |
| 1405 | + self.context.session.messages, | |
| 1406 | + missing_assets, | |
| 1407 | + ) | |
| 1408 | + if repeat_count >= 2 and missing_assets: | |
| 1409 | + missing_preview = ", ".join(f"`{asset}`" for asset in missing_assets[:3]) | |
| 1410 | + self.context.queue_steering_message( | |
| 1411 | + f"The same HTML mutation for `{target}` has now been blocked " | |
| 1412 | + f"{repeat_count} times because local asset href(s) {missing_preview} " | |
| 1413 | + "do not exist. Do not resend another `write`/`edit`/`patch` for " | |
| 1414 | + f"`{target}` while it still contains those hrefs. Recommended next " | |
| 1415 | + "action: retry the same file with the entire stylesheet `<link>` " | |
| 1416 | + "line removed and inline any necessary styling. Alternative: create " | |
| 1417 | + "the referenced asset file first, then link to it. Do not claim " | |
| 1418 | + "completion until this blocked file write succeeds." | |
| 1419 | + ) | |
| 1420 | + return | |
| 1421 | + | |
| 1404 | 1422 | self.context.queue_steering_message( |
| 1405 | 1423 | f"The last HTML mutation for `{target}` was blocked, so that file was " |
| 1406 | 1424 | "not created or updated. Retry the same file with one concrete " |
@@ -2821,6 +2839,23 @@ def _extract_blocked_html_target_list(event_content: str, marker: str) -> list[s | ||
| 2821 | 2839 | return [item.strip() for item in target_text.split(",") if item.strip()] |
| 2822 | 2840 | |
| 2823 | 2841 | |
| 2842 | +def _count_recent_blocked_html_asset_events( | |
| 2843 | + messages: list[Any], | |
| 2844 | + missing_assets: list[str], | |
| 2845 | +) -> int: | |
| 2846 | + if not missing_assets: | |
| 2847 | + return 0 | |
| 2848 | + | |
| 2849 | + count = 0 | |
| 2850 | + for message in reversed(messages[-12:]): | |
| 2851 | + content = str(getattr(message, "content", "") or "") | |
| 2852 | + if "HTML local asset references do not exist" not in content: | |
| 2853 | + continue | |
| 2854 | + if any(asset and asset in content for asset in missing_assets): | |
| 2855 | + count += 1 | |
| 2856 | + return count | |
| 2857 | + | |
| 2858 | + | |
| 2824 | 2859 | def _resume_suffix_for_target( |
| 2825 | 2860 | target: Path, |
| 2826 | 2861 | *, |
tests/test_tool_batches.pymodified@@ -7918,6 +7918,60 @@ def test_tool_batch_runner_blocked_html_asset_nudge_retries_same_file( | ||
| 7918 | 7918 | assert "do not claim completion" in queued[0] |
| 7919 | 7919 | |
| 7920 | 7920 | |
| 7921 | +def test_tool_batch_runner_repeated_blocked_html_asset_nudge_forces_href_removal( | |
| 7922 | + temp_dir: Path, | |
| 7923 | +) -> None: | |
| 7924 | + async def assess_confidence( | |
| 7925 | + tool_name: str, | |
| 7926 | + tool_args: dict, | |
| 7927 | + context: str, | |
| 7928 | + ) -> ConfidenceAssessment: | |
| 7929 | + raise AssertionError("Confidence scoring should not run in this scenario") | |
| 7930 | + | |
| 7931 | + async def verify_action( | |
| 7932 | + tool_name: str, | |
| 7933 | + tool_args: dict, | |
| 7934 | + result: str, | |
| 7935 | + expected: str = "", | |
| 7936 | + ) -> ActionVerification: | |
| 7937 | + raise AssertionError("Verification should not run in this scenario") | |
| 7938 | + | |
| 7939 | + blocked_event = ( | |
| 7940 | + "[Blocked - HTML local asset references do not exist] Suggestion: " | |
| 7941 | + "Use only existing local assets for non-HTML href values. " | |
| 7942 | + "Missing local asset href(s): ../style.css. Remove the asset link, " | |
| 7943 | + "create the referenced asset first, inline the styling/content, or point " | |
| 7944 | + "the href at an existing local file." | |
| 7945 | + ) | |
| 7946 | + context = build_context( | |
| 7947 | + temp_dir=temp_dir, | |
| 7948 | + messages=[Message(role=Role.TOOL, content=blocked_event)], | |
| 7949 | + safeguards=FakeSafeguards(), | |
| 7950 | + assess_confidence=assess_confidence, | |
| 7951 | + verify_action=verify_action, | |
| 7952 | + ) | |
| 7953 | + context.session.append(Message(role=Role.TOOL, content=blocked_event)) | |
| 7954 | + queued: list[str] = [] | |
| 7955 | + context.queue_steering_message_callback = queued.append | |
| 7956 | + runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir)) | |
| 7957 | + target = temp_dir / "guide" / "chapters" / "05-troubleshooting.html" | |
| 7958 | + | |
| 7959 | + runner._queue_blocked_html_asset_nudge( | |
| 7960 | + ToolCall( | |
| 7961 | + id="write-troubleshooting", | |
| 7962 | + name="write", | |
| 7963 | + arguments={"file_path": str(target)}, | |
| 7964 | + ), | |
| 7965 | + blocked_event, | |
| 7966 | + ) | |
| 7967 | + | |
| 7968 | + assert queued | |
| 7969 | + assert "blocked 2 times" in queued[0] | |
| 7970 | + assert "`../style.css`" in queued[0] | |
| 7971 | + assert "line removed" in queued[0] | |
| 7972 | + assert "Do not resend another" in queued[0] | |
| 7973 | + | |
| 7974 | + | |
| 7921 | 7975 | @pytest.mark.asyncio |
| 7922 | 7976 | async def test_tool_batch_runner_blocked_empty_file_path_nudges_concrete_next_artifact( |
| 7923 | 7977 | temp_dir: Path, |