Python · 81679 bytes Raw Blame History
1 """Tool-batch execution and recovery bookkeeping for the typed runtime."""
2
3 from __future__ import annotations
4
5 import shlex
6 from collections.abc import Awaitable, Callable
7 from dataclasses import dataclass, field
8 from pathlib import Path
9 from typing import Any
10
11 from ..llm.base import ToolCall
12 from .compaction import infer_preferred_next_step, summarize_confirmed_facts
13 from .context import RuntimeContext
14 from .dod import (
15 DefinitionOfDone,
16 DefinitionOfDoneStore,
17 all_planned_artifacts_exist,
18 begin_new_verification_attempt,
19 collect_planned_artifact_targets,
20 derive_verification_commands,
21 ensure_active_verification_attempt,
22 infer_next_output_file,
23 is_state_mutating_tool_call,
24 planned_artifact_target_satisfied,
25 record_successful_tool_call,
26 synthesize_todo_items,
27 )
28 from .events import AgentEvent, TurnSummary
29 from .evidence_provenance import EvidenceProvenance, EvidenceProvenanceStatus
30 from .executor import ToolExecutionState, ToolExecutor
31 from .logging import get_runtime_logger
32 from .policy_timeline import append_verification_timeline_entry
33 from .recovery import RecoveryContext, detect_missing_mutation_payload
34 from .repair_focus import extract_active_repair_context
35 from .safeguard_services import extract_shell_text_rewrite_target
36 from .tool_batch_checks import ToolBatchConfidenceGate, ToolBatchVerificationGate
37 from .tool_batch_recovery import ToolBatchRecoveryController
38 from .verification_observations import (
39 VerificationObservation,
40 VerificationObservationStatus,
41 )
42 from .workflow import (
43 advance_todos_from_tool_call,
44 effective_pending_todo_items,
45 infer_pending_todo_output_target,
46 preferred_pending_todo_item,
47 reconcile_aggregate_completion_steps,
48 sync_todos_to_definition_of_done,
49 )
50
51 EventSink = Callable[[AgentEvent], Awaitable[None]]
52 ConfirmationHandler = (
53 Callable[[str, str, str, dict[str, Any] | None], Awaitable[bool]] | None
54 )
55 UserQuestionHandler = Callable[[str, list[str] | None], Awaitable[str]] | None
56
57 _VERIFY_ITEM = "Collect verification evidence"
58 _TODO_NUDGE_EXCLUDED_ITEMS = {
59 "Complete the requested work",
60 _VERIFY_ITEM,
61 }
62 _MUTATION_TODO_HINTS = (
63 "create",
64 "creating",
65 "update",
66 "updating",
67 "edit",
68 "editing",
69 "write",
70 "writing",
71 "fix",
72 "fixing",
73 "modify",
74 "modifying",
75 "change",
76 "changing",
77 "patch",
78 "patching",
79 "replace",
80 "replacing",
81 "correct",
82 "correcting",
83 "rewrite",
84 "rewriting",
85 )
86 _CONSISTENCY_REVIEW_HINTS = (
87 "consistent",
88 "consistently",
89 "formatted",
90 "link",
91 "linked",
92 "navigation",
93 "work properly",
94 "all files",
95 "every file",
96 "ensure",
97 )
98 _BOOKKEEPING_NOTE_TOOL_NAMES = {
99 "notepad_write_working",
100 "notepad_append",
101 "notepad_write_priority",
102 "notepad_write_manual",
103 }
104
105
106 @dataclass
107 class ToolBatchResult:
108 """Outcome of running one assistant-proposed tool batch."""
109
110 actions_taken: list[str] = field(default_factory=list)
111 consecutive_errors: int = 0
112 halted: bool = False
113 final_response: str = ""
114
115
116 class ToolBatchRunner:
117 """Owns tool-batch execution, recovery, and post-tool bookkeeping."""
118
119 def __init__(
120 self,
121 context: RuntimeContext,
122 dod_store: DefinitionOfDoneStore,
123 *,
124 confidence_gate: ToolBatchConfidenceGate | None = None,
125 recovery_controller: ToolBatchRecoveryController | None = None,
126 verification_gate: ToolBatchVerificationGate | None = None,
127 ) -> None:
128 self.context = context
129 self.dod_store = dod_store
130 self.confidence_gate = confidence_gate or ToolBatchConfidenceGate(context)
131 self.recovery_controller = recovery_controller or ToolBatchRecoveryController(context)
132 self.verification_gate = verification_gate or ToolBatchVerificationGate(context)
133
134 async def execute_batch(
135 self,
136 *,
137 tool_calls: list[ToolCall],
138 tool_source: str,
139 pending_tool_calls_seen: set[str],
140 emit: EventSink,
141 summary: TurnSummary,
142 dod: DefinitionOfDone,
143 executor: ToolExecutor,
144 on_confirmation: ConfirmationHandler,
145 on_user_question: UserQuestionHandler,
146 emit_confirmation,
147 consecutive_errors: int,
148 ) -> ToolBatchResult:
149 """Run one assistant tool batch through the shared executor seam."""
150
151 result = ToolBatchResult(consecutive_errors=consecutive_errors)
152
153 # Pre-populate planned items for the entire batch so the todo
154 # widget shows what's coming, not just what's done.
155 planned_labels = _batch_planned_labels(tool_calls)
156 completed_labels: list[str] = []
157
158 async def _emit_batch_todos() -> None:
159 """Emit a todo update combining DoD state with batch progress."""
160 items = synthesize_todo_items(dod)
161 for label in planned_labels:
162 if label in completed_labels:
163 continue
164 # Don't duplicate items already in DoD
165 if any(item["content"] == label for item in items):
166 continue
167 items.append({"content": label, "status": "in_progress", "active_form": label})
168 if items:
169 await emit(AgentEvent(type="todo_update", todo_items=items))
170
171 await _emit_batch_todos()
172
173 for tool_call in tool_calls:
174 cfg = self.context.config.reasoning
175
176 if cfg.confidence_scoring:
177 should_skip = await self.confidence_gate.should_skip(
178 tool_call=tool_call,
179 emit=emit,
180 )
181 if should_skip:
182 continue
183
184 if tool_call.id not in pending_tool_calls_seen:
185 await emit(
186 AgentEvent(
187 type="tool_call",
188 tool_name=tool_call.name,
189 tool_call_id=tool_call.id,
190 tool_args=tool_call.arguments,
191 phase="assistant",
192 )
193 )
194
195 result.actions_taken.append(
196 f"{tool_call.name}: {str(tool_call.arguments)[:100]}"
197 )
198
199 outcome = await executor.execute_tool_call(
200 tool_call,
201 on_confirmation=on_confirmation,
202 on_user_question=on_user_question,
203 emit_confirmation=emit_confirmation,
204 source=tool_source,
205 )
206 executed_tool_call = outcome.tool_call
207 if (
208 outcome.rollback_action is not None
209 and self.context.config.reasoning.show_rollback_plan
210 ):
211 await emit(
212 AgentEvent(
213 type="rollback",
214 content=(
215 f"Rollback tracked: {outcome.rollback_action.description}"
216 ),
217 rollback_action=outcome.rollback_action,
218 )
219 )
220
221 if (
222 outcome.state == ToolExecutionState.EXECUTED
223 and outcome.is_error
224 and self.context.config.auto_recover
225 ):
226 recovery_result = await self.recovery_controller.build_follow_up(
227 tool_call=executed_tool_call,
228 outcome=outcome,
229 emit=emit,
230 )
231 if recovery_result is not None:
232 summary.tool_result_messages.append(recovery_result)
233 self.context.session.append(recovery_result)
234 continue
235
236 if outcome.state == ToolExecutionState.EXECUTED and not outcome.is_error:
237 loop_response = await self._record_successful_execution(
238 tool_call=executed_tool_call,
239 outcome=outcome,
240 dod=dod,
241 emit=emit,
242 summary=summary,
243 )
244 # Mark this tool's label as completed and emit live progress
245 label = _tool_call_label(executed_tool_call)
246 if label:
247 completed_labels.append(label)
248 await _emit_batch_todos()
249 if loop_response is not None:
250 result.halted = True
251 result.final_response = loop_response
252 return result
253
254 if outcome.is_error:
255 result.consecutive_errors += 1
256 else:
257 result.consecutive_errors = 0
258
259 await emit(
260 AgentEvent(
261 type="tool_result",
262 content=outcome.event_content,
263 tool_name=executed_tool_call.name,
264 tool_call_id=outcome.tool_call.id,
265 tool_metadata=(
266 outcome.registry_result.metadata
267 if outcome.registry_result is not None
268 else None
269 ),
270 is_error=outcome.is_error,
271 phase="assistant",
272 )
273 )
274
275 # Always append tool results to the session so the model sees
276 # its own output. The verification gate may inject a correction
277 # prompt, but the original result must still be in context —
278 # otherwise the model operates blind and loops.
279 self.context.session.append(outcome.message)
280 summary.tool_result_messages.append(outcome.message)
281 if outcome.state == ToolExecutionState.DUPLICATE:
282 self._queue_duplicate_observation_nudge(tool_call, dod=dod)
283 elif outcome.state == ToolExecutionState.BLOCKED:
284 self._queue_blocked_invalid_mutation_nudge(
285 tool_call,
286 outcome.event_content,
287 dod=dod,
288 )
289 self._queue_blocked_html_declared_target_nudge(
290 tool_call,
291 outcome.event_content,
292 )
293 self._queue_blocked_active_repair_nudge(outcome.event_content)
294 self._queue_blocked_active_repair_mutation_nudge(outcome.event_content)
295 self._queue_blocked_completed_artifact_scope_nudge(
296 outcome.event_content,
297 dod=dod,
298 )
299 self._queue_blocked_late_reference_drift_nudge(
300 outcome.event_content,
301 dod=dod,
302 )
303 self._queue_blocked_shell_rewrite_nudge(tool_call)
304 self._queue_blocked_html_edit_nudge(tool_call, outcome.event_content)
305
306 should_continue = await self.verification_gate.should_continue(
307 tool_call=tool_call,
308 outcome=outcome,
309 emit=emit,
310 )
311
312 rlog = get_runtime_logger()
313 rlog.tool_exec(
314 name=tool_call.name,
315 state=outcome.state.value,
316 is_error=outcome.is_error,
317 result_preview=outcome.event_content,
318 appended_to_session=True,
319 )
320 if should_continue:
321 rlog.verification_gate(tool_call.name, should_continue=True)
322 continue
323
324 if result.consecutive_errors >= 3:
325 final_response = (
326 "I ran into some issues. "
327 "Let me know if you'd like me to try a different approach."
328 )
329 summary.final_response = final_response
330 summary.failures.append("three consecutive tool errors")
331 await emit(AgentEvent(type="response", content=final_response))
332 result.halted = True
333 result.final_response = final_response
334
335 return result
336
337 def _queue_duplicate_observation_nudge(
338 self,
339 tool_call: ToolCall,
340 *,
341 dod: DefinitionOfDone,
342 ) -> None:
343 """Queue a concrete next-step nudge after duplicate observational actions."""
344
345 if tool_call.name not in {"read", "glob", "grep", "bash"}:
346 return
347
348 current_task = getattr(self.context.session, "current_task", None)
349 missing_artifact = _next_missing_planned_artifact(
350 dod,
351 project_root=self.context.project_root,
352 messages=list(getattr(self.context.session, "messages", []) or []),
353 )
354 next_pending = preferred_pending_todo_item(
355 dod,
356 project_root=self.context.project_root,
357 missing_artifact=missing_artifact,
358 )
359 confirmed_facts = summarize_confirmed_facts(
360 self.context.session.messages,
361 max_items=2,
362 )
363 if _should_prioritize_missing_artifact(
364 dod=dod,
365 next_pending=next_pending,
366 missing_artifact=missing_artifact,
367 project_root=self.context.project_root,
368 ):
369 prefix = "Reuse the earlier observation instead of repeating it. "
370 if confirmed_facts:
371 prefix += f"Confirmed facts: {confirmed_facts}. "
372 self.context.queue_steering_message(
373 prefix
374 + "A declared output artifact is still missing."
375 + _missing_artifact_resume_suffix(
376 missing_artifact,
377 project_root=self.context.project_root,
378 messages=list(getattr(self.context.session, "messages", []) or []),
379 )
380 + " Do not switch into review or consistency-check mode until the missing artifact exists."
381 )
382 return
383 if next_pending:
384 mutation_suffix = ""
385 if _todo_is_mutation_step(next_pending):
386 mutation_suffix = _pending_item_resume_suffix(
387 dod,
388 next_pending=next_pending,
389 missing_artifact=missing_artifact,
390 project_root=self.context.project_root,
391 messages=list(getattr(self.context.session, "messages", []) or []),
392 )
393 if not mutation_suffix:
394 mutation_suffix = (
395 " You already have enough evidence for that step, so stop gathering "
396 "more reference material and perform the change now."
397 )
398 if confirmed_facts:
399 self.context.queue_steering_message(
400 "Reuse the earlier observation instead of repeating it. "
401 f"Confirmed facts: {confirmed_facts}. "
402 f"Continue with the next pending item: `{next_pending}`. "
403 "Only gather more evidence if a specific fact required for that step is still unknown."
404 + mutation_suffix
405 )
406 else:
407 self.context.queue_steering_message(
408 "Reuse the earlier observation instead of repeating it. "
409 f"Continue with the next pending item: `{next_pending}`. "
410 "Only gather more evidence if a specific fact required for that step is still unknown."
411 + mutation_suffix
412 )
413 return
414
415 if missing_artifact is not None:
416 self.context.queue_steering_message(
417 "Reuse the earlier observation instead of repeating it. "
418 + _missing_artifact_resume_suffix(
419 missing_artifact,
420 project_root=self.context.project_root,
421 messages=list(getattr(self.context.session, "messages", []) or []),
422 ).strip()
423 )
424 return
425
426 if all_planned_artifacts_exist(dod, project_root=self.context.project_root):
427 verification_commands = dod.verification_commands or derive_verification_commands(
428 dod,
429 project_root=self.context.project_root,
430 task_statement=current_task,
431 supplement_existing=True,
432 )
433 verification_suffix = (
434 "Move to verification or final confirmation using the files already on disk."
435 if verification_commands
436 else "Finish the current review using the files already on disk."
437 )
438 self.context.queue_steering_message(
439 "Reuse the earlier observation instead of repeating it. "
440 "All explicitly planned artifacts already exist. "
441 "Use the current task artifacts as the source of truth and do not reopen "
442 "reference materials unless one specific gap is still unknown. "
443 + verification_suffix
444 )
445 return
446
447 preferred_next_step = infer_preferred_next_step(
448 self.context.session.messages,
449 current_task=current_task,
450 )
451 if preferred_next_step and confirmed_facts:
452 self.context.queue_steering_message(
453 "Reuse the earlier observation instead of repeating it. "
454 f"Confirmed facts: {confirmed_facts}. "
455 f"{preferred_next_step} "
456 "Only gather more evidence if a specific filename, href, or title is still unknown."
457 )
458 return
459
460 if preferred_next_step:
461 self.context.queue_steering_message(
462 "Reuse the earlier observation instead of repeating it. "
463 f"{preferred_next_step} "
464 "Only gather more evidence if a specific filename, href, or title is still unknown."
465 )
466 return
467
468 target_path = str(
469 tool_call.arguments.get("file_path")
470 or tool_call.arguments.get("path")
471 or ""
472 ).strip()
473 if target_path:
474 self.context.queue_steering_message(
475 "Reuse the earlier observation instead of repeating it. "
476 f"Use the current contents of `{target_path}` and take a different next step. "
477 "Only gather more evidence if a specific filename, href, or title is still unknown."
478 )
479 return
480
481 self.context.queue_steering_message(
482 "Reuse the earlier observation instead of repeating it. "
483 "Choose a different next step that makes progress."
484 )
485
486 def _queue_blocked_shell_rewrite_nudge(self, tool_call: ToolCall) -> None:
487 """Steer the model back to file tools after a blocked shell text rewrite."""
488
489 if tool_call.name != "bash":
490 return
491
492 target = extract_shell_text_rewrite_target(
493 str(tool_call.arguments.get("command", ""))
494 )
495 if target is None:
496 return
497
498 current_task = getattr(self.context.session, "current_task", None)
499 confirmed_facts = summarize_confirmed_facts(
500 self.context.session.messages,
501 max_items=2,
502 )
503 preferred_next_step = infer_preferred_next_step(
504 self.context.session.messages,
505 current_task=current_task,
506 )
507
508 if preferred_next_step and confirmed_facts:
509 self.context.queue_steering_message(
510 "Use Loader's file tools for this text edit instead of a shell rewrite. "
511 f"Confirmed facts: {confirmed_facts}. "
512 f"{preferred_next_step} "
513 f"Target `{target}` with edit/patch/write rather than `bash`."
514 )
515 return
516
517 self.context.queue_steering_message(
518 "Use Loader's file tools for this text edit instead of a shell rewrite. "
519 f"Apply the change to `{target}` with edit/patch/write."
520 )
521
522 def _queue_blocked_active_repair_nudge(self, event_content: str) -> None:
523 """Reinforce active repair focus after an out-of-scope blocked observation."""
524
525 if "[Blocked - active repair scope:" not in event_content:
526 return
527
528 repair = extract_active_repair_context(self.context.session.messages)
529 if repair is None:
530 return
531
532 if repair.allowed_paths:
533 allowed_preview = ", ".join(f"`{path}`" for path in repair.allowed_paths[:3])
534 if len(repair.allowed_paths) > 3:
535 allowed_preview += ", ..."
536 self.context.queue_steering_message(
537 "Verification already identified the active repair target. "
538 f"Stay on the concrete repair files {allowed_preview} "
539 f"and repair `{repair.artifact_path}` directly. "
540 "Do not reopen unrelated reference materials while this repair target is unresolved."
541 )
542 return
543
544 roots_preview = ", ".join(f"`{root}`" for root in repair.allowed_roots[:2])
545 if len(repair.allowed_roots) > 2:
546 roots_preview += ", ..."
547 self.context.queue_steering_message(
548 "Verification already identified the active repair target. "
549 f"Stay within the current artifact set under {roots_preview} "
550 f"and repair `{repair.artifact_path}` directly. "
551 "Do not reopen unrelated reference materials while this repair target is unresolved."
552 )
553
554 def _queue_blocked_active_repair_mutation_nudge(self, event_content: str) -> None:
555 """Keep repair-phase mutations pinned to the named repair files."""
556
557 if "[Blocked - active repair mutation scope:" not in event_content:
558 return
559
560 repair = extract_active_repair_context(self.context.session.messages)
561 if repair is None or not repair.allowed_paths:
562 return
563
564 allowed_preview = ", ".join(f"`{path}`" for path in repair.allowed_paths[:3])
565 if len(repair.allowed_paths) > 3:
566 allowed_preview += ", ..."
567 self.context.queue_steering_message(
568 "Verification already identified the concrete repair files. "
569 f"Keep mutations pinned to {allowed_preview} "
570 f"and repair `{repair.artifact_path}` before widening the change set."
571 )
572
573 def _queue_blocked_late_reference_drift_nudge(
574 self,
575 event_content: str,
576 *,
577 dod: DefinitionOfDone,
578 ) -> None:
579 """Reinforce missing-artifact progress after late-stage reference drift is blocked."""
580
581 if "[Blocked - late reference drift:" not in event_content:
582 return
583
584 missing_artifact = _next_missing_planned_artifact(
585 dod,
586 project_root=self.context.project_root,
587 messages=list(getattr(self.context.session, "messages", []) or []),
588 )
589 if missing_artifact is None:
590 return
591
592 planned_roots: list[str] = []
593 seen_roots: set[str] = set()
594 for target, expect_directory in collect_planned_artifact_targets(
595 dod,
596 project_root=self.context.project_root,
597 ):
598 root = str(target if expect_directory else target.parent)
599 if root in seen_roots:
600 continue
601 seen_roots.add(root)
602 planned_roots.append(root)
603
604 roots_preview = ", ".join(f"`{root}`" for root in planned_roots[:2])
605 if len(planned_roots) > 2:
606 roots_preview += ", ..."
607 self.context.queue_steering_message(
608 "Late-stage reference rereads are no longer helping. "
609 "One explicitly planned artifact is still missing."
610 + _missing_artifact_resume_suffix(
611 missing_artifact,
612 project_root=self.context.project_root,
613 messages=list(getattr(self.context.session, "messages", []) or []),
614 )
615 + f" Stay within the current output roots under {roots_preview}"
616 + " and finish that artifact before reopening older reference materials."
617 )
618
619 def _queue_blocked_completed_artifact_scope_nudge(
620 self,
621 event_content: str,
622 *,
623 dod: DefinitionOfDone,
624 ) -> None:
625 """Keep post-build review anchored to the generated artifact set."""
626
627 if "[Blocked - completed artifact set scope:" not in event_content:
628 return
629
630 planned_roots: list[str] = []
631 seen_roots: set[str] = set()
632 for target, expect_directory in collect_planned_artifact_targets(
633 dod,
634 project_root=self.context.project_root,
635 ):
636 root = str(target if expect_directory else target.parent)
637 if root in seen_roots:
638 continue
639 seen_roots.add(root)
640 planned_roots.append(root)
641
642 next_pending = preferred_pending_todo_item(
643 dod,
644 project_root=self.context.project_root,
645 )
646 roots_preview = ", ".join(f"`{root}`" for root in planned_roots[:2])
647 if len(planned_roots) > 2:
648 roots_preview += ", ..."
649 if next_pending and _todo_is_consistency_review_step(next_pending):
650 self.context.queue_steering_message(
651 "All explicitly planned artifacts already exist. "
652 f"Stay within the current output roots under {roots_preview} and continue "
653 f"with `{next_pending}` using the generated files as the source of truth. "
654 "Do not reopen earlier reference materials."
655 )
656 return
657
658 self.context.queue_steering_message(
659 "All explicitly planned artifacts already exist. "
660 f"Stay within the current output roots under {roots_preview} "
661 "and move to verification or final confirmation using the generated files. "
662 "Do not reopen earlier reference materials."
663 )
664
665 def _queue_blocked_html_edit_nudge(self, tool_call: ToolCall, event_content: str) -> None:
666 """Keep blocked edit feedback generic; avoid task-class-specific steering."""
667
668 if tool_call.name != "edit":
669 return
670 if "old_string and new_string are identical - no change would occur" not in event_content:
671 return
672
673 repair = extract_active_repair_context(self.context.session.messages)
674 if repair is None:
675 return
676
677 target = (
678 str(tool_call.arguments.get("file_path") or "").strip() or repair.artifact_path
679 )
680 if not target:
681 return
682
683 self.context.queue_steering_message(
684 "That edit would make no on-disk change. "
685 f"Stay on `{target}` and use the current file contents as the source of truth. "
686 "Read the exact current text you need to change, then submit one `edit`, `patch`, "
687 "or `write` call that actually changes the file. "
688 "If a narrow single-line edit keeps bouncing, replace the surrounding block in one "
689 "mutation instead of retrying the same no-op edit. "
690 "Do not reopen unrelated reference materials while this concrete repair target is unresolved."
691 )
692
693 def _queue_blocked_html_declared_target_nudge(
694 self,
695 tool_call: ToolCall,
696 event_content: str,
697 ) -> None:
698 """Steer blocked HTML graph edits back to the root-declared local targets."""
699
700 if tool_call.name not in {"write", "edit", "patch"}:
701 return
702 if "HTML page introduces new local targets outside the current declared artifact set" not in event_content:
703 return
704
705 target = str(
706 tool_call.arguments.get("file_path")
707 or tool_call.arguments.get("path")
708 or ""
709 ).strip()
710 if not target:
711 return
712
713 closest_targets = _extract_blocked_html_target_list(
714 event_content,
715 "Closest declared local targets include:",
716 )
717 declared_targets = _extract_blocked_html_target_list(
718 event_content,
719 "Already-declared local targets include:",
720 )
721
722 guidance = (
723 "That HTML mutation introduced sibling targets outside the current declared local-link set. "
724 f"Stay on `{target}`."
725 )
726 if closest_targets:
727 guidance += (
728 " Replace the invented hrefs with the closest declared target(s): "
729 + ", ".join(f"`{candidate}`" for candidate in closest_targets[:3])
730 + "."
731 )
732 elif declared_targets:
733 guidance += (
734 " Keep local links within the declared target set, for example: "
735 + ", ".join(f"`{candidate}`" for candidate in declared_targets[:3])
736 + "."
737 )
738 guidance += (
739 " Resend one concrete mutation for that same file now instead of rereading the reference guide."
740 )
741 self.context.queue_steering_message(guidance)
742
743 def _queue_blocked_invalid_mutation_nudge(
744 self,
745 tool_call: ToolCall,
746 event_content: str,
747 *,
748 dod: DefinitionOfDone,
749 ) -> None:
750 """Recover blocked mutations that omitted a real target path or text payload."""
751
752 fix = detect_missing_mutation_payload(
753 tool_call.name,
754 tool_call.arguments,
755 event_content,
756 )
757 if fix is None:
758 return
759
760 self._record_blocked_invalid_mutation_attempt(tool_call, event_content)
761
762 messages = list(getattr(self.context.session, "messages", []) or [])
763 missing_artifact = _next_missing_planned_artifact(
764 dod,
765 project_root=self.context.project_root,
766 messages=messages,
767 )
768 next_pending = preferred_pending_todo_item(
769 dod,
770 project_root=self.context.project_root,
771 missing_artifact=missing_artifact,
772 )
773 missing_artifact = _prefer_missing_artifact_for_pending_item(
774 dod,
775 missing_artifact=missing_artifact,
776 next_pending=next_pending,
777 project_root=self.context.project_root,
778 )
779 resume_target = _preferred_resume_target_path(
780 dod,
781 next_pending=next_pending,
782 missing_artifact=missing_artifact,
783 project_root=self.context.project_root,
784 messages=messages,
785 )
786 resume_suffix = _pending_item_resume_suffix(
787 dod,
788 next_pending=next_pending,
789 missing_artifact=missing_artifact,
790 project_root=self.context.project_root,
791 messages=messages,
792 )
793 target_label = f"`{resume_target.name or str(resume_target)}`" if resume_target else ""
794
795 if fix.get("kind") == "missing_target":
796 prefix = f"That `{tool_call.name}` call did not provide a valid `file_path`."
797 if target_label:
798 prefix += f" Stay on {target_label}."
799 self.context.queue_steering_message(
800 prefix
801 + resume_suffix
802 + " Resend one concrete "
803 + _invalid_mutation_call_shape(tool_call.name)
804 + " now instead of another working note, reread, or empty response."
805 )
806 return
807
808 invalid_fields = ", ".join(f"`{field}`" for field in fix["invalid_fields"])
809 prefix = f"That `{tool_call.name}` call omitted the real text payload."
810 if invalid_fields:
811 prefix += f" {invalid_fields} are summary fields, not valid mutation inputs."
812 if target_label:
813 prefix += f" Stay on {target_label}."
814 self.context.queue_steering_message(
815 prefix
816 + resume_suffix
817 + " Resend one concrete "
818 + _invalid_mutation_call_shape(tool_call.name)
819 + " now instead of rereading more files."
820 )
821
822 def _record_blocked_invalid_mutation_attempt(
823 self,
824 tool_call: ToolCall,
825 error: str,
826 ) -> None:
827 """Seed recovery state from blocked malformed mutations for later retry guidance."""
828
829 recovery_context = self.context.recovery_context
830 if recovery_context is None or not recovery_context.is_related_failure(
831 tool_call.name,
832 tool_call.arguments,
833 error,
834 ):
835 recovery_context = RecoveryContext(
836 original_tool=tool_call.name,
837 original_args=tool_call.arguments,
838 max_retries=self.context.config.max_recovery_attempts,
839 )
840 self.context.recovery_context = recovery_context
841
842 if not recovery_context.is_similar_attempt(
843 tool_call.name,
844 tool_call.arguments,
845 ):
846 recovery_context.add_attempt(
847 tool_call.name,
848 tool_call.arguments,
849 error,
850 )
851
852 async def _record_successful_execution(
853 self,
854 *,
855 tool_call: ToolCall,
856 outcome,
857 dod: DefinitionOfDone,
858 emit: EventSink,
859 summary: TurnSummary,
860 ) -> str | None:
861 """Update DoD bookkeeping after a successful tool execution."""
862
863 is_mutating = is_state_mutating_tool_call(tool_call)
864 previously_verified = dod.last_verification_result == "passed"
865 record_successful_tool_call(dod, tool_call)
866 if previously_verified and is_mutating:
867 _mark_verification_stale(
868 context=self.context,
869 summary=summary,
870 dod=dod,
871 tool_call=tool_call,
872 )
873 elif is_mutating and _should_plan_verification_for_tool_call(
874 dod,
875 tool_call=tool_call,
876 project_root=self.context.project_root,
877 ):
878 _mark_verification_planned(
879 context=self.context,
880 summary=summary,
881 dod=dod,
882 tool_call=tool_call,
883 )
884 if tool_call.name == "TodoWrite" and outcome.registry_result is not None:
885 new_todos = outcome.registry_result.metadata.get("new_todos", [])
886 if isinstance(new_todos, list):
887 sync_todos_to_definition_of_done(
888 dod,
889 new_todos,
890 project_root=self.context.project_root,
891 )
892 self._queue_todowrite_resume_nudge(dod=dod)
893 else:
894 pending_before = list(dod.pending_items)
895 if advance_todos_from_tool_call(dod, tool_call):
896 reconcile_aggregate_completion_steps(
897 dod,
898 project_root=self.context.project_root,
899 )
900 self._queue_next_pending_todo_nudge(
901 tool_call=tool_call,
902 pending_before=pending_before,
903 dod=dod,
904 )
905 self._queue_bookkeeping_resume_nudge(
906 tool_call=tool_call,
907 dod=dod,
908 )
909 self._queue_missing_artifact_progress_nudge(
910 tool_call=tool_call,
911 dod=dod,
912 )
913 self._queue_planned_artifact_handoff_nudge(
914 tool_call=tool_call,
915 dod=dod,
916 )
917 self.dod_store.save(dod)
918 recovery_context = self.context.recovery_context
919 if recovery_context is not None:
920 recovery_context.note_success(tool_call.name, tool_call.arguments)
921 if recovery_context.should_clear_after_success(
922 tool_call.name,
923 tool_call.arguments,
924 ):
925 self.context.recovery_context = None
926 return None
927
928 def _queue_next_pending_todo_nudge(
929 self,
930 *,
931 tool_call: ToolCall,
932 pending_before: list[str],
933 dod: DefinitionOfDone,
934 ) -> None:
935 if is_state_mutating_tool_call(tool_call):
936 return
937 if tool_call.name not in {"read", "glob", "grep", "bash"}:
938 return
939 if tool_call.name == "bash":
940 command = str(tool_call.arguments.get("command", "")).lower()
941 if not any(
942 token in command
943 for token in (
944 "ls ",
945 " ls",
946 "find ",
947 "grep ",
948 "rg ",
949 "cat ",
950 "sed ",
951 "head ",
952 "tail ",
953 )
954 ):
955 return
956
957 completed_label = next(
958 (
959 item
960 for item in pending_before
961 if item not in dod.pending_items
962 and item not in _TODO_NUDGE_EXCLUDED_ITEMS
963 ),
964 None,
965 )
966 missing_artifact = _next_missing_planned_artifact(
967 dod,
968 project_root=self.context.project_root,
969 messages=list(getattr(self.context.session, "messages", []) or []),
970 )
971 next_pending = preferred_pending_todo_item(
972 dod,
973 project_root=self.context.project_root,
974 missing_artifact=missing_artifact,
975 )
976 has_file_artifact_progress = _has_confirmed_file_artifact_progress(
977 dod,
978 project_root=self.context.project_root,
979 )
980 if not completed_label or not next_pending or next_pending == completed_label:
981 return
982 if _should_prioritize_missing_artifact(
983 dod=dod,
984 next_pending=next_pending,
985 missing_artifact=missing_artifact,
986 project_root=self.context.project_root,
987 ):
988 if not has_file_artifact_progress:
989 compact_handoff = _compact_missing_artifact_handoff(
990 missing_artifact,
991 project_root=self.context.project_root,
992 messages=list(getattr(self.context.session, "messages", []) or []),
993 )
994 if compact_handoff:
995 self.context.queue_steering_message(
996 f"Confirmed progress: `{completed_label}` is now satisfied by the successful "
997 f"`{tool_call.name}` result. {compact_handoff}"
998 " Do not reread reference material or spend the next turn on bookkeeping."
999 )
1000 return
1001 self.context.queue_steering_message(
1002 f"Confirmed progress: `{completed_label}` is now satisfied by the successful "
1003 f"`{tool_call.name}` result. One declared output artifact is still missing."
1004 + _missing_artifact_resume_suffix(
1005 missing_artifact,
1006 project_root=self.context.project_root,
1007 messages=list(getattr(self.context.session, "messages", []) or []),
1008 )
1009 + " Do not switch into review or consistency-check mode until the missing artifact exists."
1010 )
1011 return
1012
1013 mutation_suffix = ""
1014 if _todo_is_mutation_step(next_pending):
1015 mutation_suffix = _pending_item_resume_suffix(
1016 dod,
1017 next_pending=next_pending,
1018 missing_artifact=missing_artifact,
1019 project_root=self.context.project_root,
1020 messages=list(getattr(self.context.session, "messages", []) or []),
1021 )
1022 if not mutation_suffix:
1023 mutation_suffix = (
1024 " You already have enough evidence for that step, so stop gathering "
1025 "more reference material and perform the change now."
1026 )
1027
1028 self.context.queue_steering_message(
1029 f"Confirmed progress: `{completed_label}` is now satisfied by the successful "
1030 f"`{tool_call.name}` result. Continue with the next pending item: "
1031 f"`{next_pending}` instead of rereading the same evidence.{mutation_suffix}"
1032 )
1033
1034 def _queue_planned_artifact_handoff_nudge(
1035 self,
1036 *,
1037 tool_call: ToolCall,
1038 dod: DefinitionOfDone,
1039 ) -> None:
1040 if not is_state_mutating_tool_call(tool_call):
1041 return
1042 if not all_planned_artifacts_exist(dod, project_root=self.context.project_root):
1043 return
1044
1045 next_pending = preferred_pending_todo_item(
1046 dod,
1047 project_root=self.context.project_root,
1048 )
1049 verification_commands = dod.verification_commands or derive_verification_commands(
1050 dod,
1051 project_root=self.context.project_root,
1052 task_statement=getattr(self.context.session, "current_task", "") or "",
1053 supplement_existing=True,
1054 )
1055
1056 if next_pending and _todo_is_consistency_review_step(next_pending):
1057 verification_suffix = (
1058 " Move to verification once no specific mismatch remains."
1059 if verification_commands
1060 else " Avoid another full reread unless one specific inconsistency is still unknown."
1061 )
1062 self.context.queue_steering_message(
1063 "All explicitly planned artifacts now exist. "
1064 f"Continue with the next pending item: `{next_pending}`. "
1065 "Use the files already on disk as the source of truth instead of restarting "
1066 "discovery or inventing alternate filenames."
1067 + verification_suffix
1068 )
1069 return
1070
1071 if verification_commands:
1072 self.context.queue_steering_message(
1073 "All explicitly planned artifacts now exist. "
1074 "Do not expand the artifact set or restart discovery unless a specific gap is "
1075 "still known. Move to verification or final confirmation using the files that "
1076 "already exist."
1077 )
1078
1079 def _queue_missing_artifact_progress_nudge(
1080 self,
1081 *,
1082 tool_call: ToolCall,
1083 dod: DefinitionOfDone,
1084 ) -> None:
1085 if not is_state_mutating_tool_call(tool_call):
1086 return
1087 missing_artifact = _next_missing_planned_artifact(
1088 dod,
1089 project_root=self.context.project_root,
1090 )
1091 if missing_artifact is None:
1092 return
1093 next_pending = preferred_pending_todo_item(
1094 dod,
1095 project_root=self.context.project_root,
1096 missing_artifact=missing_artifact,
1097 )
1098 missing_artifact = _prefer_missing_artifact_for_pending_item(
1099 dod,
1100 missing_artifact=missing_artifact,
1101 next_pending=next_pending,
1102 project_root=self.context.project_root,
1103 )
1104
1105 current_label = _current_mutation_label(tool_call)
1106 has_file_artifact_progress = _has_confirmed_file_artifact_progress(
1107 dod,
1108 project_root=self.context.project_root,
1109 )
1110 if (
1111 not has_file_artifact_progress
1112 and _is_pure_directory_creation_tool_call(tool_call)
1113 ):
1114 return
1115 resume_target = _preferred_resume_target_path(
1116 dod,
1117 next_pending=next_pending,
1118 missing_artifact=missing_artifact,
1119 project_root=self.context.project_root,
1120 messages=list(getattr(self.context.session, "messages", []) or []),
1121 )
1122 resume_suffix = _pending_item_resume_suffix(
1123 dod,
1124 next_pending=next_pending,
1125 missing_artifact=missing_artifact,
1126 project_root=self.context.project_root,
1127 messages=list(getattr(self.context.session, "messages", []) or []),
1128 )
1129 use_persistent_handoff = _should_use_persistent_missing_artifact_handoff(
1130 dod,
1131 project_root=self.context.project_root,
1132 )
1133 session_messages = list(getattr(self.context.session, "messages", []) or [])
1134 if use_persistent_handoff and _recent_recovery_prompt(session_messages):
1135 use_persistent_handoff = False
1136 queue_message = (
1137 self.context.queue_steering_message
1138 if use_persistent_handoff
1139 else self.context.queue_ephemeral_steering_message
1140 )
1141 if (
1142 use_persistent_handoff
1143 and resume_target is not None
1144 and resume_target.suffix
1145 ):
1146 compact_resume = _compact_missing_artifact_handoff(
1147 (resume_target, False),
1148 project_root=self.context.project_root,
1149 messages=session_messages,
1150 )
1151 if compact_resume:
1152 queue_message(
1153 f"Confirmed progress: {current_label} is now recorded. "
1154 + compact_resume
1155 + " Do not reread reference material or spend the next turn on bookkeeping."
1156 )
1157 return
1158 todo_refresh = _todo_refresh_guidance(
1159 dod,
1160 project_root=self.context.project_root,
1161 )
1162 if not has_file_artifact_progress:
1163 compact_handoff = _compact_missing_artifact_handoff(
1164 missing_artifact,
1165 project_root=self.context.project_root,
1166 messages=session_messages,
1167 )
1168 if compact_handoff:
1169 queue_message(
1170 f"Confirmed progress: {current_label} is now recorded. "
1171 + compact_handoff
1172 + " Do not reread reference material or spend the next turn on bookkeeping."
1173 )
1174 return
1175 if _late_stage_missing_artifact_build(
1176 dod,
1177 project_root=self.context.project_root,
1178 ):
1179 queue_message(
1180 f"Confirmed progress: {current_label} is now recorded."
1181 + resume_suffix
1182 + " No TodoWrite, no verification, no rereads until that artifact exists."
1183 )
1184 return
1185 queue_message(
1186 f"Confirmed progress: {current_label} is now recorded."
1187 " One declared output artifact is still missing."
1188 + resume_suffix
1189 + todo_refresh
1190 + " Do not move to verification, final confirmation, or TodoWrite-only "
1191 "bookkeeping until that artifact exists."
1192 + " Do not spend another turn on working notes or rediscovery alone."
1193 )
1194
1195 def _queue_todowrite_resume_nudge(
1196 self,
1197 *,
1198 dod: DefinitionOfDone,
1199 ) -> None:
1200 session_messages = list(getattr(self.context.session, "messages", []) or [])
1201 missing_artifact = _next_missing_planned_artifact(
1202 dod,
1203 project_root=self.context.project_root,
1204 messages=session_messages,
1205 )
1206 next_pending = preferred_pending_todo_item(
1207 dod,
1208 project_root=self.context.project_root,
1209 missing_artifact=missing_artifact,
1210 )
1211 if missing_artifact is None:
1212 if next_pending and _todo_is_mutation_step(next_pending):
1213 pending_target = infer_pending_todo_output_target(
1214 dod,
1215 next_pending,
1216 project_root=self.context.project_root,
1217 )
1218 if pending_target is not None:
1219 concrete_message = (
1220 "Todo tracking is updated. Continue with the next pending item: "
1221 f"`{next_pending}`. Resume by creating `{pending_target.name}` now. "
1222 f"Prefer one `write` call for `{pending_target}` instead of more rereads. "
1223 )
1224 if not pending_target.parent.exists():
1225 concrete_message += (
1226 "The `write` tool can create that file's parent directories "
1227 "automatically, so do the write in one step instead of stopping "
1228 "for a separate mkdir. "
1229 )
1230 concrete_message += (
1231 "Use the current output files as the source of truth, and do not "
1232 "reopen reference materials unless one specific fact required for "
1233 "that step is still unknown. Make your next response the concrete "
1234 "mutation tool call itself, not another bookkeeping-only turn. "
1235 "Perform the mutation now instead of spending another turn on "
1236 "planning, rereads, or verification."
1237 )
1238 self.context.queue_steering_message(concrete_message)
1239 return
1240 self.context.queue_steering_message(
1241 "Todo tracking is updated. Continue with the next pending item: "
1242 f"`{next_pending}`. Use the current output files as the source of "
1243 "truth, and do not reopen reference materials unless one specific "
1244 "fact required for that step is still unknown. Perform the mutation "
1245 "now instead of spending another turn on planning, rereads, or "
1246 "verification."
1247 )
1248 return
1249
1250 if (
1251 next_pending
1252 and _todo_is_consistency_review_step(next_pending)
1253 and not all_planned_artifacts_exist(
1254 dod,
1255 project_root=self.context.project_root,
1256 )
1257 ):
1258 self.context.queue_ephemeral_steering_message(
1259 "Todo tracking is updated. Continue with the next pending item: "
1260 f"`{next_pending}`. Use the current output files as the source of "
1261 "truth, and do not reopen reference materials unless one specific "
1262 "mismatch is still unknown."
1263 )
1264 return
1265
1266 if not all_planned_artifacts_exist(dod, project_root=self.context.project_root):
1267 return
1268
1269 verification_commands = dod.verification_commands or derive_verification_commands(
1270 dod,
1271 project_root=self.context.project_root,
1272 task_statement=getattr(self.context.session, "current_task", "") or "",
1273 supplement_existing=True,
1274 )
1275 if next_pending and _todo_is_consistency_review_step(next_pending):
1276 verification_suffix = (
1277 " Move to verification once no specific mismatch remains."
1278 if verification_commands
1279 else " Finish the targeted consistency pass without reopening reference materials."
1280 )
1281 self.context.queue_ephemeral_steering_message(
1282 "Todo tracking is updated. All explicitly planned artifacts now exist. "
1283 f"Continue with the next pending item: `{next_pending}`. "
1284 "Use the current output files as the source of truth, and do not restart "
1285 "early discovery or reopen reference materials."
1286 + verification_suffix
1287 )
1288 return
1289
1290 verification_suffix = (
1291 " Move to verification or final confirmation using the files already on disk."
1292 if verification_commands
1293 else " Finish the task using the files already on disk."
1294 )
1295 self.context.queue_ephemeral_steering_message(
1296 "Todo tracking is updated. All explicitly planned artifacts now exist. "
1297 "Do not restart discovery, reopen reference materials, or spend another turn "
1298 "on TodoWrite alone."
1299 + verification_suffix
1300 )
1301 return
1302
1303 todo_refresh = _todo_refresh_guidance(
1304 dod,
1305 project_root=self.context.project_root,
1306 )
1307 next_pending_suffix = (
1308 f" Continue with the next pending item: `{next_pending}`."
1309 if next_pending
1310 else ""
1311 )
1312 self.context.queue_steering_message(
1313 "Todo tracking is updated. A declared output artifact is still missing."
1314 + next_pending_suffix
1315 + _missing_artifact_resume_suffix(
1316 missing_artifact,
1317 project_root=self.context.project_root,
1318 messages=session_messages,
1319 )
1320 + todo_refresh
1321 + " Do not spend the next turn on TodoWrite alone, bookkeeping notes, "
1322 "verification, or final confirmation until that artifact exists."
1323 )
1324
1325 def _queue_bookkeeping_resume_nudge(
1326 self,
1327 *,
1328 tool_call: ToolCall,
1329 dod: DefinitionOfDone,
1330 ) -> None:
1331 if tool_call.name not in _BOOKKEEPING_NOTE_TOOL_NAMES:
1332 return
1333
1334 session_messages = list(getattr(self.context.session, "messages", []) or [])
1335 missing_artifact = _next_missing_planned_artifact(
1336 dod,
1337 project_root=self.context.project_root,
1338 messages=session_messages,
1339 )
1340 if missing_artifact is None:
1341 return
1342
1343 next_pending = preferred_pending_todo_item(
1344 dod,
1345 project_root=self.context.project_root,
1346 missing_artifact=missing_artifact,
1347 )
1348 todo_refresh = _todo_refresh_guidance(
1349 dod,
1350 project_root=self.context.project_root,
1351 )
1352 if (
1353 next_pending
1354 and not _todo_is_mutation_step(next_pending)
1355 and not _todo_is_consistency_review_step(next_pending)
1356 and not _should_prioritize_missing_artifact(
1357 dod=dod,
1358 next_pending=next_pending,
1359 missing_artifact=(
1360 missing_artifact
1361 if _has_confirmed_artifact_progress(
1362 dod,
1363 project_root=self.context.project_root,
1364 )
1365 else None
1366 ),
1367 project_root=self.context.project_root,
1368 )
1369 ):
1370 self.context.queue_ephemeral_steering_message(
1371 "Bookkeeping note is recorded. Continue with the next pending item: "
1372 f"`{next_pending}`. Make your next response one concrete evidence-gathering "
1373 "tool call that advances that step, not another bookkeeping-only turn."
1374 + todo_refresh
1375 + " Do not jump ahead to later artifact creation, verification, or final "
1376 "confirmation until that step is satisfied."
1377 )
1378 return
1379
1380 self.context.queue_ephemeral_steering_message(
1381 "Bookkeeping note is recorded. A declared output artifact is still missing."
1382 + _missing_artifact_resume_suffix(
1383 missing_artifact,
1384 project_root=self.context.project_root,
1385 messages=session_messages,
1386 )
1387 + todo_refresh
1388 + " Do not spend the next turn on additional notes, rediscovery, "
1389 "verification, or final confirmation until that artifact exists."
1390 )
1391
1392
1393 def _todo_is_consistency_review_step(item: str) -> bool:
1394 text = item.lower()
1395 return any(hint in text for hint in _CONSISTENCY_REVIEW_HINTS)
1396
1397
1398 def _should_prioritize_missing_artifact(
1399 *,
1400 dod: DefinitionOfDone,
1401 next_pending: str | None,
1402 missing_artifact: tuple[Path, bool] | None,
1403 project_root: Path,
1404 ) -> bool:
1405 if missing_artifact is None:
1406 return False
1407 if not next_pending:
1408 return True
1409 if _pending_todo_conflicts_with_missing_artifact(
1410 dod,
1411 item=next_pending,
1412 missing_artifact=missing_artifact,
1413 project_root=project_root,
1414 ):
1415 return True
1416 if _todo_is_consistency_review_step(next_pending):
1417 return True
1418 return not _todo_is_mutation_step(next_pending)
1419
1420
1421 def _pending_todo_conflicts_with_missing_artifact(
1422 dod: DefinitionOfDone,
1423 *,
1424 item: str,
1425 missing_artifact: tuple[Path, bool],
1426 project_root: Path,
1427 ) -> bool:
1428 text = item.strip().lower()
1429 if not text or item in _TODO_NUDGE_EXCLUDED_ITEMS:
1430 return False
1431
1432 target, expect_directory = missing_artifact
1433 inferred_target = infer_pending_todo_output_target(
1434 dod,
1435 item,
1436 project_root=project_root,
1437 )
1438 if inferred_target is None:
1439 return not expect_directory and _todo_is_mutation_step(item)
1440
1441 inferred_target = inferred_target.resolve(strict=False)
1442 target = target.resolve(strict=False)
1443 if expect_directory:
1444 return target != inferred_target and target not in inferred_target.parents
1445 return inferred_target != target
1446
1447
1448 def _next_missing_planned_artifact(
1449 dod: DefinitionOfDone,
1450 *,
1451 project_root: Path,
1452 messages: list[Any] | None = None,
1453 ) -> tuple[Path, bool] | None:
1454 for target, expect_directory in collect_planned_artifact_targets(
1455 dod,
1456 project_root=project_root,
1457 max_paths=12,
1458 ):
1459 if not planned_artifact_target_satisfied(
1460 dod,
1461 target=target,
1462 expect_directory=expect_directory,
1463 project_root=project_root,
1464 ):
1465 return target, expect_directory
1466 for target, expect_directory in collect_planned_artifact_targets(
1467 dod,
1468 project_root=project_root,
1469 max_paths=12,
1470 ):
1471 if not expect_directory or not target.is_dir():
1472 continue
1473 next_output_file, _ = infer_next_output_file(
1474 target=target,
1475 project_root=project_root,
1476 messages=list(messages or []),
1477 )
1478 if next_output_file is not None and not next_output_file.exists():
1479 return next_output_file, False
1480 return None
1481
1482
1483 def _prefer_missing_artifact_for_pending_item(
1484 dod: DefinitionOfDone,
1485 *,
1486 missing_artifact: tuple[Path, bool] | None,
1487 next_pending: str | None,
1488 project_root: Path,
1489 ) -> tuple[Path, bool] | None:
1490 if missing_artifact is None or not next_pending:
1491 return missing_artifact
1492
1493 inferred_target = infer_pending_todo_output_target(
1494 dod,
1495 next_pending,
1496 project_root=project_root,
1497 )
1498 if inferred_target is None or inferred_target.exists():
1499 return missing_artifact
1500
1501 normalized_target = inferred_target.expanduser().resolve(strict=False)
1502 for planned_target, expect_directory in collect_planned_artifact_targets(
1503 dod,
1504 project_root=project_root,
1505 max_paths=12,
1506 ):
1507 normalized_planned = planned_target.expanduser().resolve(strict=False)
1508 if expect_directory:
1509 try:
1510 normalized_target.relative_to(normalized_planned)
1511 except ValueError:
1512 continue
1513 return normalized_target, False
1514 if normalized_planned == normalized_target:
1515 return normalized_target, False
1516 return missing_artifact
1517
1518
1519 def _late_stage_missing_artifact_build(
1520 dod: DefinitionOfDone,
1521 *,
1522 project_root: Path,
1523 ) -> bool:
1524 completed = 0
1525 missing = 0
1526 for target, expect_directory in collect_planned_artifact_targets(
1527 dod,
1528 project_root=project_root,
1529 max_paths=12,
1530 ):
1531 if planned_artifact_target_satisfied(
1532 dod,
1533 target=target,
1534 expect_directory=expect_directory,
1535 project_root=project_root,
1536 ):
1537 completed += 1
1538 else:
1539 missing += 1
1540 return completed >= 7 and missing > 0
1541
1542
1543 def _has_confirmed_artifact_progress(
1544 dod: DefinitionOfDone,
1545 *,
1546 project_root: Path,
1547 ) -> bool:
1548 for target, expect_directory in collect_planned_artifact_targets(
1549 dod,
1550 project_root=project_root,
1551 max_paths=12,
1552 ):
1553 if planned_artifact_target_satisfied(
1554 dod,
1555 target=target,
1556 expect_directory=expect_directory,
1557 project_root=project_root,
1558 ):
1559 return True
1560 return bool(dod.touched_files)
1561
1562
1563 def _has_confirmed_file_artifact_progress(
1564 dod: DefinitionOfDone,
1565 *,
1566 project_root: Path,
1567 ) -> bool:
1568 return _confirmed_file_artifact_count(dod, project_root=project_root) > 0
1569
1570
1571 def _confirmed_file_artifact_count(
1572 dod: DefinitionOfDone,
1573 *,
1574 project_root: Path,
1575 ) -> int:
1576 count = 0
1577 for target, expect_directory in collect_planned_artifact_targets(
1578 dod,
1579 project_root=project_root,
1580 max_paths=12,
1581 ):
1582 if expect_directory:
1583 continue
1584 if planned_artifact_target_satisfied(
1585 dod,
1586 target=target,
1587 expect_directory=False,
1588 project_root=project_root,
1589 ):
1590 count += 1
1591 if count:
1592 return count
1593 return sum(
1594 1
1595 for path in dod.touched_files
1596 if str(path).strip()
1597 and Path(path).expanduser().resolve(strict=False).suffix
1598 )
1599
1600
1601 def _should_use_persistent_missing_artifact_handoff(
1602 dod: DefinitionOfDone,
1603 *,
1604 project_root: Path,
1605 ) -> bool:
1606 return _confirmed_file_artifact_count(
1607 dod,
1608 project_root=project_root,
1609 ) < 2
1610
1611
1612 def _next_missing_planned_file_within_directory(
1613 dod: DefinitionOfDone,
1614 *,
1615 target: Path,
1616 project_root: Path,
1617 ) -> Path | None:
1618 normalized_target = target.expanduser().resolve(strict=False)
1619 if normalized_target.suffix:
1620 return None
1621
1622 for planned_target, expect_directory in collect_planned_artifact_targets(
1623 dod,
1624 project_root=project_root,
1625 max_paths=12,
1626 ):
1627 if expect_directory:
1628 continue
1629 normalized_planned = planned_target.expanduser().resolve(strict=False)
1630 try:
1631 normalized_planned.relative_to(normalized_target)
1632 except ValueError:
1633 continue
1634 if planned_artifact_target_satisfied(
1635 dod,
1636 target=normalized_planned,
1637 expect_directory=False,
1638 project_root=project_root,
1639 ):
1640 continue
1641 return normalized_planned
1642 return None
1643
1644
1645 def _missing_artifact_resume_suffix(
1646 missing_artifact: tuple[Path, bool] | None,
1647 *,
1648 project_root: Path,
1649 messages: list[Any] | None = None,
1650 ) -> str:
1651 if missing_artifact is None:
1652 return ""
1653
1654 target, expect_directory = missing_artifact
1655 return _resume_suffix_for_target(
1656 target,
1657 expect_directory=expect_directory,
1658 project_root=project_root,
1659 messages=messages,
1660 )
1661
1662
1663 def _pending_item_resume_suffix(
1664 dod: DefinitionOfDone,
1665 *,
1666 next_pending: str | None,
1667 missing_artifact: tuple[Path, bool] | None,
1668 project_root: Path,
1669 messages: list[Any] | None = None,
1670 ) -> str:
1671 if next_pending:
1672 pending_target = infer_pending_todo_output_target(
1673 dod,
1674 next_pending,
1675 project_root=project_root,
1676 )
1677 if pending_target is not None and not pending_target.exists():
1678 normalized_target = pending_target.expanduser().resolve(strict=False)
1679 return _resume_suffix_for_target(
1680 normalized_target,
1681 expect_directory=not bool(normalized_target.suffix),
1682 project_root=project_root,
1683 messages=messages,
1684 allow_inferred_child=False,
1685 )
1686 if missing_artifact is not None and missing_artifact[1]:
1687 next_planned_file = _next_missing_planned_file_within_directory(
1688 dod,
1689 target=missing_artifact[0],
1690 project_root=project_root,
1691 )
1692 if next_planned_file is not None:
1693 parent_label = missing_artifact[0].name or str(missing_artifact[0])
1694 return (
1695 f" Resume by creating `{next_planned_file.name}` now."
1696 f" It is the next missing declared output under `{parent_label}/`."
1697 f" Prefer one `write` call for `{next_planned_file}` instead of more rereads."
1698 " Make your next response the concrete mutation tool call itself, not another"
1699 " bookkeeping-only turn."
1700 )
1701 return _missing_artifact_resume_suffix(
1702 missing_artifact,
1703 project_root=project_root,
1704 messages=messages,
1705 )
1706
1707
1708 def _preferred_resume_target_path(
1709 dod: DefinitionOfDone,
1710 *,
1711 next_pending: str | None,
1712 missing_artifact: tuple[Path, bool] | None,
1713 project_root: Path,
1714 messages: list[Any] | None = None,
1715 ) -> Path | None:
1716 if next_pending:
1717 pending_target = infer_pending_todo_output_target(
1718 dod,
1719 next_pending,
1720 project_root=project_root,
1721 )
1722 if pending_target is not None and not pending_target.exists():
1723 return pending_target.expanduser().resolve(strict=False)
1724
1725 if missing_artifact is None:
1726 return None
1727
1728 target, expect_directory = missing_artifact
1729 normalized_target = target.expanduser().resolve(strict=False)
1730 if not expect_directory:
1731 return normalized_target
1732
1733 next_planned_file = _next_missing_planned_file_within_directory(
1734 dod,
1735 target=normalized_target,
1736 project_root=project_root,
1737 )
1738 if next_planned_file is not None:
1739 return next_planned_file.expanduser().resolve(strict=False)
1740
1741 next_output_file, _ = infer_next_output_file(
1742 target=normalized_target,
1743 project_root=project_root,
1744 messages=list(messages or []),
1745 )
1746 if next_output_file is not None:
1747 return next_output_file.expanduser().resolve(strict=False)
1748 return normalized_target
1749
1750
1751 def _invalid_mutation_call_shape(tool_name: str) -> str:
1752 if tool_name == "write":
1753 return "`write(file_path=..., content=...)`"
1754 if tool_name == "edit":
1755 return "`edit(file_path=..., old_string=..., new_string=...)`"
1756 if tool_name == "patch":
1757 return "`patch(file_path=..., patch='...')` or `patch(..., hunks=[...])`"
1758 return f"`{tool_name}(...)`"
1759
1760
1761 def _extract_blocked_html_target_list(event_content: str, marker: str) -> list[str]:
1762 if marker not in event_content:
1763 return []
1764 tail = event_content.split(marker, 1)[1].strip()
1765 target_text = tail.split(". ", 1)[0].strip()
1766 if not target_text:
1767 return []
1768 return [item.strip() for item in target_text.split(",") if item.strip()]
1769
1770
1771 def _resume_suffix_for_target(
1772 target: Path,
1773 *,
1774 expect_directory: bool,
1775 project_root: Path,
1776 messages: list[Any] | None = None,
1777 allow_inferred_child: bool = True,
1778 ) -> str:
1779 label = target.name or str(target)
1780 if expect_directory and not label.endswith("/"):
1781 label += "/"
1782 if expect_directory:
1783 if allow_inferred_child:
1784 next_output_file, next_output_source = infer_next_output_file(
1785 target=target,
1786 project_root=project_root,
1787 messages=list(messages or []),
1788 )
1789 if next_output_file is not None:
1790 guidance_origin = (
1791 f"It is the next missing declared output under `{label}`."
1792 if next_output_source == "declared"
1793 else (
1794 "It mirrors the observed filename pattern from another "
1795 f"`{label}` directory you already inspected."
1796 )
1797 )
1798 guidance = (
1799 f" Resume by creating `{next_output_file.name}` now. {guidance_origin} "
1800 f"Prefer one `write` call for "
1801 f"`{next_output_file}` instead of more rereads."
1802 )
1803 if not next_output_file.parent.exists():
1804 guidance += (
1805 " The `write` tool can create that file's parent directories automatically,"
1806 " so do the write in one step instead of stopping for a separate mkdir."
1807 )
1808 guidance += (
1809 " Make your next response the concrete mutation tool call itself, not another"
1810 " bookkeeping-only turn."
1811 )
1812 return guidance
1813 if target.is_dir():
1814 return (
1815 f" Resume by creating the next output file under `{label}` now. Prefer one "
1816 f"concrete `write` call for a file inside `{target}` instead of more rereads."
1817 " Make your next response the concrete mutation tool call itself, not another"
1818 " bookkeeping-only turn."
1819 )
1820 return (
1821 f" Resume by creating `{label}` now. Prefer one concrete directory-creation "
1822 f"step for `{target}` instead of more rereads."
1823 )
1824 guidance = (
1825 f" Resume by creating `{label}` now. Prefer one `write` call for `{target}` "
1826 "instead of more rereads."
1827 )
1828 if not target.parent.exists():
1829 guidance += (
1830 " The `write` tool can create that file's parent directories automatically,"
1831 " so do the write in one step instead of stopping for a separate mkdir."
1832 )
1833 guidance += (
1834 " Make your next response the concrete mutation tool call itself, not another"
1835 " bookkeeping-only turn."
1836 )
1837 return guidance
1838
1839
1840 def _compact_missing_artifact_handoff(
1841 missing_artifact: tuple[Path, bool] | None,
1842 *,
1843 project_root: Path,
1844 messages: list[Any] | None = None,
1845 ) -> str:
1846 """Build a shorter first-mutation handoff once the next output target is known."""
1847
1848 if missing_artifact is None:
1849 return ""
1850
1851 target, expect_directory = missing_artifact
1852 label = target.name or str(target)
1853 if expect_directory and not label.endswith("/"):
1854 label += "/"
1855 if expect_directory:
1856 next_output_file, _ = infer_next_output_file(
1857 target=target,
1858 project_root=project_root,
1859 messages=list(messages or []),
1860 )
1861 if next_output_file is None:
1862 if target.is_dir():
1863 return (
1864 f"Next step: create the next output file under `{label}`. Prefer one "
1865 f"concrete `write` call inside `{target}` now."
1866 )
1867 return (
1868 f"Next step: create `{label}`. Prefer one concrete directory-creation step "
1869 f"for `{target}` now."
1870 )
1871 guidance = (
1872 f"Next step: create `{next_output_file.name}`. Prefer one "
1873 f"`write(file_path=..., content=...)` call for `{next_output_file}` now."
1874 )
1875 if not next_output_file.parent.exists():
1876 guidance += (
1877 " The `write` tool can create that file's parent directories automatically."
1878 )
1879 guidance += " Make your next response the concrete mutation tool call itself."
1880 return guidance
1881
1882 guidance = (
1883 f"Next step: create `{label}`. Prefer one "
1884 f"`write(file_path=..., content=...)` call for `{target}` now."
1885 )
1886 if not target.parent.exists():
1887 guidance += (
1888 " The `write` tool can create that file's parent directories automatically."
1889 )
1890 guidance += " Make your next response the concrete mutation tool call itself."
1891 return guidance
1892
1893
1894 def _todo_refresh_guidance(
1895 dod: DefinitionOfDone,
1896 *,
1897 project_root: Path | None = None,
1898 ) -> str:
1899 non_special_pending = [
1900 item
1901 for item in effective_pending_todo_items(dod, project_root=project_root)
1902 if item not in _TODO_NUDGE_EXCLUDED_ITEMS
1903 ]
1904 non_special_completed = [
1905 item for item in dod.completed_items if item not in _TODO_NUDGE_EXCLUDED_ITEMS
1906 ]
1907 if len(dod.touched_files) < 2 and (len(non_special_pending) + len(non_special_completed)) < 3:
1908 return ""
1909 return (
1910 " If the tracked steps no longer match the confirmed progress, refresh `TodoWrite` "
1911 "in the same response as the next concrete step instead of spending a full turn on "
1912 "bookkeeping alone."
1913 )
1914
1915
1916 def _mark_verification_stale(
1917 *,
1918 context: RuntimeContext,
1919 summary: TurnSummary,
1920 dod: DefinitionOfDone,
1921 tool_call: ToolCall,
1922 ) -> None:
1923 detail = _stale_verification_detail(tool_call)
1924 stale_attempt = ensure_active_verification_attempt(dod)
1925 next_attempt = begin_new_verification_attempt(
1926 dod,
1927 supersedes_attempt_id=stale_attempt.attempt_id,
1928 )
1929 append_verification_timeline_entry(
1930 context,
1931 summary,
1932 reason_code="verification_stale",
1933 reason_summary="previous verification became stale after new mutating work",
1934 evidence_summary=[f"fresh verification required after {detail}"],
1935 evidence_provenance=_stale_verification_provenance(dod, detail=detail),
1936 verification_observations=_stale_verification_observations(
1937 dod,
1938 detail=detail,
1939 stale_attempt_id=stale_attempt.attempt_id,
1940 stale_attempt_number=stale_attempt.attempt_number,
1941 superseded_by_attempt_id=next_attempt.attempt_id,
1942 ),
1943 )
1944 dod.last_verification_result = VerificationObservationStatus.STALE.value
1945 dod.evidence = []
1946 while _VERIFY_ITEM in dod.completed_items:
1947 dod.completed_items.remove(_VERIFY_ITEM)
1948 if _VERIFY_ITEM not in dod.pending_items:
1949 dod.pending_items.append(_VERIFY_ITEM)
1950
1951
1952 def _todo_is_mutation_step(label: str) -> bool:
1953 lowered = label.lower()
1954 return any(token in lowered for token in _MUTATION_TODO_HINTS)
1955
1956
1957 def _should_plan_verification_for_tool_call(
1958 dod: DefinitionOfDone,
1959 *,
1960 tool_call: ToolCall,
1961 project_root: Path,
1962 ) -> bool:
1963 if tool_call.name in {"write", "edit", "patch"}:
1964 return True
1965 if tool_call.name != "bash":
1966 return False
1967 if any(
1968 Path(path).expanduser().resolve(strict=False).suffix
1969 for path in dod.touched_files
1970 if str(path).strip()
1971 ):
1972 return True
1973 return any(
1974 not expect_directory
1975 and planned_artifact_target_satisfied(
1976 dod,
1977 target=target,
1978 expect_directory=False,
1979 project_root=project_root,
1980 )
1981 for target, expect_directory in collect_planned_artifact_targets(
1982 dod,
1983 project_root=project_root,
1984 max_paths=12,
1985 )
1986 )
1987
1988
1989 def _mark_verification_planned(
1990 *,
1991 context: RuntimeContext,
1992 summary: TurnSummary,
1993 dod: DefinitionOfDone,
1994 tool_call: ToolCall,
1995 ) -> None:
1996 if dod.last_verification_result in {
1997 VerificationObservationStatus.PLANNED.value,
1998 VerificationObservationStatus.PENDING.value,
1999 VerificationObservationStatus.STALE.value,
2000 }:
2001 return
2002 if not dod.verification_commands:
2003 dod.verification_commands = derive_verification_commands(
2004 dod,
2005 project_root=context.project_root,
2006 task_statement=dod.task_statement,
2007 )
2008 commands = [command for command in dod.verification_commands if command]
2009 if not commands:
2010 return
2011
2012 attempt = begin_new_verification_attempt(dod)
2013 detail = _stale_verification_detail(tool_call)
2014 append_verification_timeline_entry(
2015 context,
2016 summary,
2017 reason_code="verification_planned",
2018 reason_summary="verification is planned after new mutating work",
2019 evidence_summary=[f"verification planned for `{command}`" for command in commands[:2]],
2020 evidence_provenance=[
2021 EvidenceProvenance(
2022 category="verification",
2023 source="dod.verification_commands",
2024 summary=f"verification planned for `{command}`",
2025 status=EvidenceProvenanceStatus.MISSING.value,
2026 subject=command,
2027 detail=detail,
2028 )
2029 for command in commands
2030 ],
2031 verification_observations=[
2032 VerificationObservation(
2033 status=VerificationObservationStatus.PLANNED.value,
2034 summary=f"verification planned for `{command}`",
2035 command=command,
2036 kind="runtime",
2037 detail=detail,
2038 attempt_id=attempt.attempt_id,
2039 attempt_number=attempt.attempt_number,
2040 )
2041 for command in commands
2042 ],
2043 )
2044 dod.last_verification_result = VerificationObservationStatus.PLANNED.value
2045 while _VERIFY_ITEM in dod.completed_items:
2046 dod.completed_items.remove(_VERIFY_ITEM)
2047 if _VERIFY_ITEM not in dod.pending_items:
2048 dod.pending_items.append(_VERIFY_ITEM)
2049
2050
2051 def _stale_verification_observations(
2052 dod: DefinitionOfDone,
2053 *,
2054 detail: str,
2055 stale_attempt_id: str,
2056 stale_attempt_number: int,
2057 superseded_by_attempt_id: str,
2058 ) -> list[VerificationObservation]:
2059 return [
2060 VerificationObservation(
2061 status=VerificationObservationStatus.STALE.value,
2062 summary=f"verification became stale for `{command}` after new mutating work",
2063 command=command,
2064 kind="runtime",
2065 detail=detail,
2066 attempt_id=stale_attempt_id,
2067 attempt_number=stale_attempt_number,
2068 supersedes_attempt_id=superseded_by_attempt_id,
2069 )
2070 for command in _stale_verification_commands(dod)
2071 ]
2072
2073
2074 def _stale_verification_provenance(
2075 dod: DefinitionOfDone,
2076 *,
2077 detail: str,
2078 ) -> list[EvidenceProvenance]:
2079 return [
2080 EvidenceProvenance(
2081 category="verification",
2082 source="tool_execution",
2083 summary=f"fresh verification required for `{command}` after new mutating work",
2084 status=EvidenceProvenanceStatus.MISSING.value,
2085 subject=command,
2086 detail=detail,
2087 )
2088 for command in _stale_verification_commands(dod)
2089 ]
2090
2091
2092 def _stale_verification_commands(dod: DefinitionOfDone) -> list[str]:
2093 commands = [command for command in dod.verification_commands if command]
2094 if commands:
2095 return commands
2096 observed = [evidence.command for evidence in dod.evidence if evidence.command]
2097 if observed:
2098 return observed
2099 return ["verification"]
2100
2101
2102 def _stale_verification_detail(tool_call: ToolCall) -> str:
2103 if tool_call.name in {"write", "edit", "patch"}:
2104 file_path = str(tool_call.arguments.get("file_path", "")).strip()
2105 if file_path:
2106 return f"{tool_call.name} changed {file_path}"
2107 if tool_call.name == "bash":
2108 command = str(tool_call.arguments.get("command", "")).strip()
2109 if command:
2110 return f"bash ran `{command}`"
2111 return f"{tool_call.name} changed the workspace"
2112
2113
2114 def _current_mutation_label(tool_call: ToolCall) -> str:
2115 if tool_call.name in {"write", "edit", "patch"}:
2116 file_path = str(tool_call.arguments.get("file_path", "")).strip()
2117 if file_path:
2118 return f"`{Path(file_path).name or file_path}`"
2119 if tool_call.name == "bash":
2120 command = str(tool_call.arguments.get("command", "")).strip()
2121 if command:
2122 return f"`{command}`"
2123 return f"the successful `{tool_call.name}` result"
2124
2125
2126 def _is_pure_directory_creation_tool_call(tool_call: ToolCall) -> bool:
2127 if tool_call.name != "bash":
2128 return False
2129 command = str(tool_call.arguments.get("command", "")).strip()
2130 if not command or any(
2131 operator in command for operator in ("&&", "||", ";", "|", "$(", ">", "<")
2132 ):
2133 return False
2134 try:
2135 parts = shlex.split(command)
2136 except ValueError:
2137 return False
2138 return bool(parts) and parts[0] == "mkdir"
2139
2140
2141 def _recent_recovery_prompt(messages: list[Any]) -> bool:
2142 for message in reversed(messages[-4:]):
2143 role = getattr(message, "role", None)
2144 if getattr(role, "value", role) != "user":
2145 continue
2146 content = getattr(message, "content", "")
2147 if not isinstance(content, str):
2148 continue
2149 if content.startswith("[EMPTY ASSISTANT RESPONSE]"):
2150 return True
2151 if content.startswith("[CONTINUE CURRENT STEP]"):
2152 return True
2153 return False
2154
2155
2156 def _tool_call_label(tool_call: ToolCall) -> str:
2157 """Human-readable label for one tool call."""
2158 name = tool_call.name
2159 if name in ("write", "edit", "patch"):
2160 path = str(tool_call.arguments.get("file_path", "")).strip()
2161 if path:
2162 short = Path(path).name
2163 verb = "Write" if name == "write" else "Edit"
2164 return f"{verb} {short}"
2165 if name == "bash":
2166 cmd = str(tool_call.arguments.get("command", "")).strip()
2167 if cmd:
2168 return f"Run {cmd[:40]}"
2169 if name == "read":
2170 path = str(tool_call.arguments.get("file_path", "")).strip()
2171 if path:
2172 return f"Read {Path(path).name}"
2173 if name == "glob":
2174 pattern = str(tool_call.arguments.get("pattern", "")).strip()
2175 if pattern:
2176 return f"Search {pattern[:30]}"
2177 return ""
2178
2179
2180 def _batch_planned_labels(tool_calls: list[ToolCall]) -> list[str]:
2181 """Build labels for all tool calls in a batch (for upfront planning display)."""
2182 labels = []
2183 for tc in tool_calls:
2184 label = _tool_call_label(tc)
2185 if label and label not in labels:
2186 labels.append(label)
2187 return labels