Continue missing artifact repairs
Authored by
mfwolffe <wolffemf@dukes.jmu.edu>
- SHA
d4b1624366902a4476bd47ef347da90a6e868769- Parents
-
b3d784b - Tree
f40afcf
d4b1624
d4b1624366902a4476bd47ef347da90a6e868769b3d784b
f40afcf| Status | File | + | - |
|---|---|---|---|
| M |
src/loader/runtime/tool_batches.py
|
46 | 0 |
| M |
tests/test_tool_batches.py
|
92 | 0 |
src/loader/runtime/tool_batches.pymodified@@ -1052,6 +1052,26 @@ class ToolBatchRunner: | ||
| 1052 | 1052 | ) |
| 1053 | 1053 | return |
| 1054 | 1054 | |
| 1055 | + next_missing_target = _next_missing_repair_path( | |
| 1056 | + repair, | |
| 1057 | + changed_path=changed_path, | |
| 1058 | + ) | |
| 1059 | + if next_missing_target: | |
| 1060 | + self.context.queue_steering_message( | |
| 1061 | + "The active verification repair target was updated, but the " | |
| 1062 | + "declared artifact set is still incomplete. Continue directly with " | |
| 1063 | + f"the next missing declared artifact `{next_missing_target}` using " | |
| 1064 | + "one concrete `write(file_path=..., content=...)` call. Do not " | |
| 1065 | + "run verification, call TodoWrite, reread unrelated references, or " | |
| 1066 | + "finish with a summary yet.\n\n" | |
| 1067 | + "Repair focus:\n" | |
| 1068 | + f"- Continue the declared output set by creating missing planned artifact `{next_missing_target}`.\n" | |
| 1069 | + f"- Immediate next step: write `{next_missing_target}`.\n" | |
| 1070 | + "- Use the current generated guide files as source of truth; do not " | |
| 1071 | + "rewrite existing aggregate files to match the partial artifact set." | |
| 1072 | + ) | |
| 1073 | + return | |
| 1074 | + | |
| 1055 | 1075 | if changed_path == str(Path(repair.artifact_path).expanduser().resolve(strict=False)): |
| 1056 | 1076 | self.context.queue_steering_message( |
| 1057 | 1077 | "The active verification repair target was updated. " |
@@ -3474,6 +3494,32 @@ def _next_quality_repair_path(repair: Any, *, changed_path: str) -> str: | ||
| 3474 | 3494 | return "" |
| 3475 | 3495 | |
| 3476 | 3496 | |
| 3497 | +def _next_missing_repair_path(repair: Any, *, changed_path: str) -> str: | |
| 3498 | + """Return the next concrete missing repair file after one support artifact exists.""" | |
| 3499 | + | |
| 3500 | + try: | |
| 3501 | + normalized_changed = str(Path(changed_path).expanduser().resolve(strict=False)) | |
| 3502 | + except (OSError, RuntimeError, ValueError): | |
| 3503 | + normalized_changed = str(Path(changed_path).expanduser()) | |
| 3504 | + | |
| 3505 | + for raw_path in getattr(repair, "allowed_paths", ()) or (): | |
| 3506 | + path_text = str(raw_path or "").strip() | |
| 3507 | + if not path_text: | |
| 3508 | + continue | |
| 3509 | + try: | |
| 3510 | + normalized = str(Path(path_text).expanduser().resolve(strict=False)) | |
| 3511 | + except (OSError, RuntimeError, ValueError): | |
| 3512 | + normalized = str(Path(path_text).expanduser()) | |
| 3513 | + if normalized == normalized_changed: | |
| 3514 | + continue | |
| 3515 | + try: | |
| 3516 | + if not Path(normalized).exists(): | |
| 3517 | + return normalized | |
| 3518 | + except (OSError, RuntimeError, ValueError): | |
| 3519 | + continue | |
| 3520 | + return "" | |
| 3521 | + | |
| 3522 | + | |
| 3477 | 3523 | def _tool_call_targets_path(tool_call: ToolCall, target: str) -> bool: |
| 3478 | 3524 | if not target: |
| 3479 | 3525 | return False |
tests/test_tool_batches.pymodified@@ -7386,6 +7386,98 @@ async def test_tool_batch_runner_quality_repair_success_hands_to_next_target( | ||
| 7386 | 7386 | assert all("All explicitly planned artifacts now exist" not in message for message in queued) |
| 7387 | 7387 | |
| 7388 | 7388 | |
| 7389 | +@pytest.mark.asyncio | |
| 7390 | +async def test_tool_batch_runner_continues_missing_declared_repairs_before_verify( | |
| 7391 | + temp_dir: Path, | |
| 7392 | +) -> None: | |
| 7393 | + async def assess_confidence( | |
| 7394 | + tool_name: str, | |
| 7395 | + tool_args: dict, | |
| 7396 | + context: str, | |
| 7397 | + ) -> ConfidenceAssessment: | |
| 7398 | + raise AssertionError("Confidence scoring should be disabled in this scenario") | |
| 7399 | + | |
| 7400 | + async def verify_action( | |
| 7401 | + tool_name: str, | |
| 7402 | + tool_args: dict, | |
| 7403 | + result: str, | |
| 7404 | + expected: str = "", | |
| 7405 | + ) -> ActionVerification: | |
| 7406 | + raise AssertionError("Verification should not run in this scenario") | |
| 7407 | + | |
| 7408 | + guide_root = temp_dir / "guide" | |
| 7409 | + chapters = guide_root / "chapters" | |
| 7410 | + index = guide_root / "index.html" | |
| 7411 | + first_missing = chapters / "02-installation.html" | |
| 7412 | + second_missing = chapters / "03-configuration.html" | |
| 7413 | + chapters.mkdir(parents=True) | |
| 7414 | + index.write_text( | |
| 7415 | + '<a href="chapters/02-installation.html">Install</a>\n' | |
| 7416 | + '<a href="chapters/03-configuration.html">Configure</a>\n' | |
| 7417 | + ) | |
| 7418 | + context = build_context( | |
| 7419 | + temp_dir=temp_dir, | |
| 7420 | + messages=[ | |
| 7421 | + Message( | |
| 7422 | + role=Role.USER, | |
| 7423 | + content=( | |
| 7424 | + "Repair focus:\n" | |
| 7425 | + f"- Continue the declared output set by creating missing planned artifact `{first_missing}`.\n" | |
| 7426 | + f"- Continue the declared output set by creating missing planned artifact `{second_missing}`.\n" | |
| 7427 | + f"- Existing file `{index}` already references `chapters/02-installation.html` -> `{first_missing}`.\n" | |
| 7428 | + f"- Immediate next step: write `{first_missing}`.\n" | |
| 7429 | + "- Continue one missing declared output at a time until the declared set exists.\n" | |
| 7430 | + ), | |
| 7431 | + ) | |
| 7432 | + ], | |
| 7433 | + safeguards=FakeSafeguards(), | |
| 7434 | + assess_confidence=assess_confidence, | |
| 7435 | + verify_action=verify_action, | |
| 7436 | + ) | |
| 7437 | + queued: list[str] = [] | |
| 7438 | + context.queue_steering_message_callback = queued.append | |
| 7439 | + runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir)) | |
| 7440 | + dod = create_definition_of_done("Create a multi-file guide.") | |
| 7441 | + tool_call = ToolCall( | |
| 7442 | + id="write-install", | |
| 7443 | + name="write", | |
| 7444 | + arguments={ | |
| 7445 | + "file_path": str(first_missing), | |
| 7446 | + "content": "<html><body><h1>Install</h1></body></html>", | |
| 7447 | + }, | |
| 7448 | + ) | |
| 7449 | + | |
| 7450 | + await runner.execute_batch( | |
| 7451 | + tool_calls=[tool_call], | |
| 7452 | + tool_source="assistant", | |
| 7453 | + pending_tool_calls_seen=set(), | |
| 7454 | + emit=_noop_emit, | |
| 7455 | + summary=TurnSummary(final_response=""), | |
| 7456 | + dod=dod, | |
| 7457 | + executor=FakeExecutor( | |
| 7458 | + [ | |
| 7459 | + tool_outcome( | |
| 7460 | + tool_call=tool_call, | |
| 7461 | + output=f"Successfully wrote {first_missing}", | |
| 7462 | + is_error=False, | |
| 7463 | + ) | |
| 7464 | + ] | |
| 7465 | + ), # type: ignore[arg-type] | |
| 7466 | + on_confirmation=None, | |
| 7467 | + on_user_question=None, | |
| 7468 | + emit_confirmation=None, | |
| 7469 | + consecutive_errors=0, | |
| 7470 | + ) | |
| 7471 | + | |
| 7472 | + assert queued | |
| 7473 | + handoff = queued[-1] | |
| 7474 | + assert "declared artifact set is still incomplete" in handoff | |
| 7475 | + assert str(second_missing.resolve(strict=False)) in handoff | |
| 7476 | + assert f"Immediate next step: write `{second_missing.resolve(strict=False)}`" in handoff | |
| 7477 | + assert "Do not run verification" in handoff | |
| 7478 | + assert "Finish with a final response now" not in handoff | |
| 7479 | + | |
| 7480 | + | |
| 7389 | 7481 | @pytest.mark.asyncio |
| 7390 | 7482 | async def test_tool_batch_runner_hands_off_after_active_repair_support_file_write( |
| 7391 | 7483 | temp_dir: Path, |