Python · 64001 bytes Raw Blame History
1 """Runtime integration coverage for Sprint 04 workflow routing."""
2
3 from __future__ import annotations
4
5 from pathlib import Path
6
7 import pytest
8
9 from loader.agent.loop import AgentConfig
10 from loader.llm.base import CompletionResponse, ToolCall
11 from tests.helpers.runtime_harness import ScriptedBackend, run_scenario
12
13
14 def non_streaming_config() -> AgentConfig:
15 """Shared config for deterministic workflow-mode runtime tests."""
16
17 return AgentConfig(auto_context=False, stream=False, max_iterations=8)
18
19
20 def non_streaming_clarify_config() -> AgentConfig:
21 """Deterministic config that enters clarify mode directly."""
22
23 return AgentConfig(
24 auto_context=False,
25 stream=False,
26 max_iterations=8,
27 workflow_mode_override="clarify",
28 )
29
30
31 def non_streaming_single_round_clarify_config() -> AgentConfig:
32 """Deterministic config for one-round clarify artifact tests."""
33
34 return AgentConfig(
35 auto_context=False,
36 stream=False,
37 max_iterations=8,
38 workflow_mode_override="clarify",
39 clarify_max_rounds=1,
40 )
41
42
43 def non_streaming_pressure_clarify_config() -> AgentConfig:
44 """Deterministic config that allows a third clarify round for pressure passes."""
45
46 return AgentConfig(
47 auto_context=False,
48 stream=False,
49 max_iterations=10,
50 workflow_mode_override="clarify",
51 clarify_max_rounds=3,
52 )
53
54
55 def workflow_modes(run) -> list[str]:
56 """Return emitted workflow modes in order."""
57
58 return [
59 event.workflow_mode
60 for event in run.events
61 if event.type == "workflow_mode" and event.workflow_mode
62 ]
63
64
65 def artifact_kinds(run) -> list[str]:
66 """Return emitted artifact kinds in order."""
67
68 return [
69 event.artifact_kind
70 for event in run.events
71 if event.type == "artifact" and event.artifact_kind
72 ]
73
74
75 def verification_commands(run) -> list[str]:
76 """Return verification-phase bash commands."""
77
78 return [
79 str((event.tool_args or {}).get("command", ""))
80 for event in run.events
81 if event.type == "tool_call" and event.phase == "verification"
82 ]
83
84
85 def workflow_timeline_kinds(run) -> list[str]:
86 assert run.agent.last_turn_summary is not None
87 return [entry.kind for entry in run.agent.last_turn_summary.workflow_timeline]
88
89
90 def seed_runtime_workspace(root: Path) -> None:
91 """Create a small brownfield runtime workspace for clarify tests."""
92
93 (root / "pyproject.toml").write_text("[project]\nname='loader'\n")
94 (root / "src" / "loader" / "runtime").mkdir(parents=True)
95 (root / "src" / "loader" / "runtime" / "workflow_lanes.py").write_text(
96 '"""Runtime lane orchestration for Loader."""\n\n'
97 "class WorkflowLaneRunner:\n"
98 " pass\n"
99 )
100 (root / "src" / "loader" / "runtime" / "clarify_strategy.py").write_text(
101 '"""Intent-aware clarify strategy for runtime follow-up."""\n'
102 )
103 (root / "tests").mkdir()
104 (root / "tests" / "test_workflow_runtime.py").write_text("pass\n")
105
106
107 @pytest.mark.asyncio
108 async def test_ambiguous_prompt_routes_to_clarify_and_persists_brief(
109 temp_dir: Path,
110 ) -> None:
111 backend = ScriptedBackend(
112 completions=[
113 CompletionResponse(
114 content="I need one clarification before I proceed.",
115 tool_calls=[
116 ToolCall(
117 id="ask-1",
118 name="AskUserQuestion",
119 arguments={
120 "question": (
121 "What should stay out of scope for this Loader "
122 "improvement?"
123 ),
124 },
125 )
126 ],
127 ),
128 CompletionResponse(
129 content="\n".join(
130 [
131 "## Task Statement",
132 "Improve Loader so it feels more like claw-code.",
133 "",
134 "## Desired Outcome",
135 "- Make Loader more reliable without broad redesign.",
136 "",
137 "## In Scope",
138 "- Tighten the runtime workflow around the user-facing goal.",
139 "",
140 "## Non Goals",
141 "- Rebuild unrelated subsystems.",
142 "",
143 "## Decision Boundaries",
144 "- Escalate before changing unrelated UX patterns.",
145 "",
146 "## Constraints",
147 "- Stay within the current repository.",
148 "",
149 "## Likely Touchpoints",
150 "- Runtime entry points and prompt behavior.",
151 "",
152 "## Assumptions",
153 "- The user wants a narrow runtime-quality improvement.",
154 "",
155 "## Acceptance Criteria",
156 "- The improvement stays focused on runtime behavior.",
157 ]
158 )
159 ),
160 CompletionResponse(content="I have the brief and can move forward."),
161 ]
162 )
163
164 async def answer(question: str, options: list[str] | None) -> str:
165 assert "out of scope" in question.lower()
166 assert options is None
167 return "Do not redesign the whole interface."
168
169 run = await run_scenario(
170 "Improve Loader so it feels more like claw-code.",
171 backend,
172 config=non_streaming_config(),
173 project_root=temp_dir,
174 on_user_question=answer,
175 )
176
177 dod = run.agent.last_turn_summary.definition_of_done
178 assert dod is not None
179 assert workflow_modes(run)[:2] == ["clarify", "execute"]
180 assert artifact_kinds(run) == ["clarify_brief"]
181 assert dod.clarify_brief is not None
182 assert Path(dod.clarify_brief).exists()
183 brief_markdown = Path(dod.clarify_brief).read_text()
184 assert "single-question clarify brief" in brief_markdown
185 assert "return control to `execute` mode" in brief_markdown
186 assert run.agent.session.workflow_artifact_status == "active"
187 assert run.agent.session.workflow_artifact_sources == ["clarify_brief"]
188 assert "runtime behavior" in dod.acceptance_criteria[0].lower()
189 assert "## Clarify Mode" in backend.invocations[0].messages[0].content
190 assert run.agent.last_turn_summary is not None
191 assert run.agent.last_turn_summary.workflow_mode == "execute"
192 assert run.agent.last_turn_summary.workflow_reason_code == "post_clarify_task_is_concrete"
193 assert run.agent.last_turn_summary.workflow_decision_kind == "handoff"
194 assert run.agent.last_turn_summary.workflow_timeline[0].mode == "clarify"
195 assert run.agent.last_turn_summary.workflow_timeline[-1].mode == "execute"
196
197
198 @pytest.mark.asyncio
199 async def test_clarify_prompt_and_brief_include_workspace_evidence(
200 temp_dir: Path,
201 ) -> None:
202 seed_runtime_workspace(temp_dir)
203 backend = ScriptedBackend(
204 completions=[
205 CompletionResponse(
206 content="I need one clarification before I proceed.",
207 tool_calls=[
208 ToolCall(
209 id="ask-1",
210 name="AskUserQuestion",
211 arguments={
212 "question": (
213 "Should I keep the work inside "
214 "src/loader/runtime/workflow_lanes.py?"
215 ),
216 },
217 )
218 ],
219 ),
220 CompletionResponse(
221 content="\n".join(
222 [
223 "## Task Statement",
224 "Tighten clarify behavior around src/loader/runtime/workflow_lanes.py.",
225 "",
226 "## Desired Outcome",
227 "- Keep clarify behavior tighter around one runtime seam.",
228 "",
229 "## In Scope",
230 "- Narrow the change to workflow lane handling.",
231 "",
232 "## Non Goals",
233 "- Do not broaden into unrelated CLI changes.",
234 "",
235 "## Decision Boundaries",
236 "- Escalate before changing other runtime modules.",
237 "",
238 "## Constraints",
239 "- Stay within the existing workspace.",
240 "",
241 "## Likely Touchpoints",
242 "- src/loader/runtime/workflow_lanes.py",
243 "",
244 "## Assumptions",
245 "- The user wants a narrow brownfield change.",
246 "",
247 "## Acceptance Criteria",
248 "- Clarify stays scoped to workflow_lanes.py.",
249 ]
250 )
251 ),
252 CompletionResponse(content="I can move forward now."),
253 CompletionResponse(content="Done."),
254 CompletionResponse(content="Done."),
255 ]
256 )
257
258 async def answer(_: str, __: list[str] | None) -> str:
259 return "Yes, keep it there and avoid CLI churn."
260
261 run = await run_scenario(
262 "Tighten clarify behavior around src/loader/runtime/workflow_lanes.py.",
263 backend,
264 config=non_streaming_clarify_config(),
265 project_root=temp_dir,
266 on_user_question=answer,
267 )
268
269 assert "Relevant workspace evidence:" in backend.invocations[0].messages[-1].content
270 assert "Relevant repo facts:" in backend.invocations[0].messages[-1].content
271 assert "class WorkflowLaneRunner:" in backend.invocations[0].messages[-1].content
272 assert "Grounded brief hints:" in backend.invocations[1].messages[-1].content
273 assert "Seed likely touchpoints:" in backend.invocations[1].messages[-1].content
274 assert "Scope acceptance criteria:" in backend.invocations[1].messages[-1].content
275 assert (
276 "workflow_lanes.py"
277 in run.agent.last_turn_summary.definition_of_done.acceptance_criteria[0]
278 )
279
280
281 @pytest.mark.asyncio
282 async def test_fallback_clarify_brief_inherits_grounded_workspace_hints(
283 temp_dir: Path,
284 ) -> None:
285 seed_runtime_workspace(temp_dir)
286 backend = ScriptedBackend(
287 completions=[
288 CompletionResponse(
289 content="I need one clarification before I proceed.",
290 tool_calls=[
291 ToolCall(
292 id="ask-1",
293 name="AskUserQuestion",
294 arguments={
295 "question": (
296 "Should I keep the work inside "
297 "src/loader/runtime/workflow_lanes.py?"
298 ),
299 },
300 )
301 ],
302 ),
303 CompletionResponse(content=""),
304 CompletionResponse(content="I can move forward now."),
305 CompletionResponse(content="Done."),
306 CompletionResponse(content="Done."),
307 ]
308 )
309
310 async def answer(_: str, __: list[str] | None) -> str:
311 return (
312 "Keep it scoped to src/loader/runtime/workflow_lanes.py and leave "
313 "clarify_strategy.py unchanged."
314 )
315
316 run = await run_scenario(
317 "Tighten clarify behavior around src/loader/runtime/workflow_lanes.py.",
318 backend,
319 config=non_streaming_single_round_clarify_config(),
320 project_root=temp_dir,
321 on_user_question=answer,
322 )
323
324 brief_prompt = backend.invocations[1].messages[-1].content
325 assert "Grounded brief hints:" in brief_prompt
326 assert "Seed likely touchpoints:" in brief_prompt
327 assert "Scope acceptance criteria:" in brief_prompt
328
329 dod = run.agent.last_turn_summary.definition_of_done
330 assert dod is not None
331 assert dod.clarify_brief is not None
332 brief_text = Path(dod.clarify_brief).read_text()
333 assert "src/loader/runtime/workflow_lanes.py" in brief_text
334 assert "src/loader/runtime/clarify_strategy.py" in brief_text
335 assert "WorkflowLaneRunner" in brief_text
336 assert any(
337 "Primary work stays scoped to `src/loader/runtime/workflow_lanes.py`."
338 == item
339 for item in dod.acceptance_criteria
340 )
341
342
343 @pytest.mark.asyncio
344 async def test_clarify_can_continue_for_a_second_round_when_scope_stays_ambiguous(
345 temp_dir: Path,
346 ) -> None:
347 backend = ScriptedBackend(
348 completions=[
349 CompletionResponse(
350 content="I need one clarification before I proceed.",
351 tool_calls=[
352 ToolCall(
353 id="ask-1",
354 name="AskUserQuestion",
355 arguments={"question": "What part should change most?"},
356 )
357 ],
358 ),
359 CompletionResponse(content=""),
360 CompletionResponse(
361 content="I need one more focused detail before moving on.",
362 tool_calls=[
363 ToolCall(
364 id="ask-2",
365 name="AskUserQuestion",
366 arguments={
367 "question": "Which file should change, and what should stay unchanged?",
368 },
369 )
370 ],
371 ),
372 CompletionResponse(
373 content="\n".join(
374 [
375 "## Task Statement",
376 "Improve Loader so it feels more like claw-code.",
377 "",
378 "## Desired Outcome",
379 "- Make the runtime feel more disciplined.",
380 "",
381 "## In Scope",
382 "- Update src/loader/runtime/conversation.py only.",
383 "",
384 "## Non Goals",
385 "- Do not change the CLI surface.",
386 "",
387 "## Decision Boundaries",
388 "- Escalate before touching unrelated modules.",
389 "",
390 "## Constraints",
391 "- Stay within the repository.",
392 "",
393 "## Likely Touchpoints",
394 "- src/loader/runtime/conversation.py",
395 "",
396 "## Assumptions",
397 "- The user wants a narrow runtime change.",
398 "",
399 "## Acceptance Criteria",
400 "- Only conversation.py changes.",
401 ]
402 )
403 ),
404 CompletionResponse(content="I have enough detail now and can move forward."),
405 ]
406 )
407
408 answers = iter(
409 [
410 "Make it nicer.",
411 "Only update src/loader/runtime/conversation.py and keep the CLI unchanged.",
412 ]
413 )
414
415 async def answer(_: str, __: list[str] | None) -> str:
416 return next(answers)
417
418 run = await run_scenario(
419 "Improve Loader so it feels more like claw-code.",
420 backend,
421 config=non_streaming_config(),
422 project_root=temp_dir,
423 on_user_question=answer,
424 )
425
426 dod = run.agent.last_turn_summary.definition_of_done
427 assert dod is not None
428 assert dod.clarify_brief is not None
429 assert Path(dod.clarify_brief).exists()
430 assert workflow_modes(run)[:2] == ["clarify", "execute"]
431 assert workflow_timeline_kinds(run).count("clarify_continue") == 1
432 assert "clarify_exit" in workflow_timeline_kinds(run)
433 assert "Focus slot: likely touchpoints" in backend.invocations[2].messages[-1].content
434 assert any(
435 entry.reason_code == "clarify_follow_up_needed"
436 for entry in run.agent.last_turn_summary.workflow_timeline
437 )
438 clarify_continue = next(
439 entry
440 for entry in run.agent.last_turn_summary.workflow_timeline
441 if entry.kind == "clarify_continue"
442 )
443 assert clarify_continue.clarify_stage == "readiness"
444
445
446 @pytest.mark.asyncio
447 async def test_second_round_fallback_question_uses_workspace_grounding(
448 temp_dir: Path,
449 ) -> None:
450 seed_runtime_workspace(temp_dir)
451 backend = ScriptedBackend(
452 completions=[
453 CompletionResponse(
454 content="I need one clarification before I proceed.",
455 tool_calls=[
456 ToolCall(
457 id="ask-1",
458 name="AskUserQuestion",
459 arguments={"question": "What part should change most?"},
460 )
461 ],
462 ),
463 CompletionResponse(content=""),
464 CompletionResponse(content=""),
465 CompletionResponse(
466 content="\n".join(
467 [
468 "## Task Statement",
469 "Tighten Loader runtime clarify behavior.",
470 "",
471 "## Desired Outcome",
472 "- Keep the clarify workflow more grounded.",
473 "",
474 "## In Scope",
475 "- Stay inside workflow lane handling.",
476 "",
477 "## Non Goals",
478 "- Do not broaden into the CLI surface.",
479 "",
480 "## Decision Boundaries",
481 "- Escalate before changing unrelated modules.",
482 "",
483 "## Constraints",
484 "- Stay within the repository.",
485 "",
486 "## Likely Touchpoints",
487 "- src/loader/runtime/workflow_lanes.py",
488 "",
489 "## Assumptions",
490 "- The user wants a narrow runtime behavior fix.",
491 "",
492 "## Acceptance Criteria",
493 "- workflow_lanes.py stays the main touchpoint.",
494 ]
495 )
496 ),
497 CompletionResponse(content="I can move forward now."),
498 CompletionResponse(content="Done."),
499 CompletionResponse(content="Done."),
500 ]
501 )
502
503 asked_questions: list[str] = []
504 answers = iter(
505 [
506 "Make it nicer.",
507 "Keep it scoped to src/loader/runtime/workflow_lanes.py and leave the CLI alone.",
508 ]
509 )
510
511 async def answer(question: str, _: list[str] | None) -> str:
512 asked_questions.append(question)
513 return next(answers)
514
515 run = await run_scenario(
516 "Tighten Loader runtime clarify behavior.",
517 backend,
518 config=non_streaming_clarify_config(),
519 project_root=temp_dir,
520 on_user_question=answer,
521 )
522
523 round_two_prompt = backend.invocations[2].messages[-1].content
524 assert len(asked_questions) == 2
525 assert "concrete outcome" in asked_questions[1].lower()
526 assert "Focus slot: desired outcome" in round_two_prompt
527 assert [
528 event.tool_name
529 for event in run.events
530 if event.type == "tool_call" and event.tool_name
531 ][:2] == ["AskUserQuestion", "AskUserQuestion"]
532
533
534 @pytest.mark.asyncio
535 async def test_second_round_non_goal_prompt_uses_slot_aware_repo_facts(
536 temp_dir: Path,
537 ) -> None:
538 seed_runtime_workspace(temp_dir)
539 backend = ScriptedBackend(
540 completions=[
541 CompletionResponse(
542 content="I need one clarification before I proceed.",
543 tool_calls=[
544 ToolCall(
545 id="ask-1",
546 name="AskUserQuestion",
547 arguments={
548 "question": "Which runtime file should I focus on first?",
549 },
550 )
551 ],
552 ),
553 CompletionResponse(
554 content="\n".join(
555 [
556 "## Task Statement",
557 "Tighten Loader runtime clarify behavior.",
558 "",
559 "## Desired Outcome",
560 "- Keep clarify behavior grounded in brownfield repo facts.",
561 "",
562 "## In Scope",
563 "- Focus on runtime lane handling first.",
564 "",
565 "## Constraints",
566 "- Stay within the current repository.",
567 "",
568 "## Likely Touchpoints",
569 "- src/loader/runtime/workflow_lanes.py",
570 "",
571 "## Acceptance Criteria",
572 "- The next round clarifies what stays unchanged.",
573 ]
574 )
575 ),
576 CompletionResponse(content=""),
577 CompletionResponse(
578 content="\n".join(
579 [
580 "## Task Statement",
581 "Tighten Loader runtime clarify behavior.",
582 "",
583 "## Desired Outcome",
584 "- Keep clarify behavior grounded in brownfield repo facts.",
585 "",
586 "## In Scope",
587 "- Focus on runtime lane handling first.",
588 "",
589 "## Non Goals",
590 "- Leave clarify strategy behavior unchanged for now.",
591 "",
592 "## Decision Boundaries",
593 "- Stop and confirm before broadening beyond runtime lanes.",
594 "",
595 "## Constraints",
596 "- Stay within the current repository.",
597 "",
598 "## Likely Touchpoints",
599 "- src/loader/runtime/workflow_lanes.py",
600 "",
601 "## Acceptance Criteria",
602 "- workflow_lanes.py remains the primary touchpoint.",
603 ]
604 )
605 ),
606 CompletionResponse(content="I can move forward now."),
607 CompletionResponse(content="Done."),
608 CompletionResponse(content="Done."),
609 ]
610 )
611
612 asked_questions: list[str] = []
613 answers = iter(
614 [
615 "Start with src/loader/runtime/workflow_lanes.py.",
616 "Keep clarify_strategy.py unchanged while we tighten the workflow lanes.",
617 ]
618 )
619
620 async def answer(question: str, _: list[str] | None) -> str:
621 asked_questions.append(question)
622 return next(answers)
623
624 await run_scenario(
625 "Tighten Loader runtime clarify behavior.",
626 backend,
627 config=non_streaming_clarify_config(),
628 project_root=temp_dir,
629 on_user_question=answer,
630 )
631
632 round_two_prompt = backend.invocations[2].messages[-1].content
633 assert "Focus slot: non-goals" in round_two_prompt
634 assert "Relevant workspace evidence:" in round_two_prompt
635 assert "workflow_lanes.py" in round_two_prompt
636 assert "clarify_strategy.py" in round_two_prompt
637 assert "Relevant repo facts:" in round_two_prompt
638 assert len(asked_questions) == 2
639 assert "clarify_strategy.py" in asked_questions[1]
640 assert "unchanged" in asked_questions[1].lower()
641
642
643 @pytest.mark.asyncio
644 async def test_third_round_example_pressure_question_grounds_non_goals_with_repo_facts(
645 temp_dir: Path,
646 ) -> None:
647 seed_runtime_workspace(temp_dir)
648 backend = ScriptedBackend(
649 completions=[
650 CompletionResponse(
651 content="I need one clarification before I proceed.",
652 tool_calls=[
653 ToolCall(
654 id="ask-1",
655 name="AskUserQuestion",
656 arguments={
657 "question": "Which runtime file should I focus on first?",
658 },
659 )
660 ],
661 ),
662 CompletionResponse(
663 content="\n".join(
664 [
665 "## Task Statement",
666 "Tighten Loader runtime clarify behavior.",
667 "",
668 "## Desired Outcome",
669 "- Keep clarify follow-up grounded in brownfield repo evidence.",
670 "",
671 "## In Scope",
672 "- Focus on runtime lane handling first.",
673 "",
674 "## Decision Boundaries",
675 "- Stop and confirm before broadening beyond runtime lanes.",
676 "",
677 "## Constraints",
678 "- Stay within the current repository.",
679 "",
680 "## Likely Touchpoints",
681 "- src/loader/runtime/workflow_lanes.py",
682 "",
683 "## Acceptance Criteria",
684 "- The next clarify round locks down what stays out of scope.",
685 ]
686 )
687 ),
688 CompletionResponse(content=""),
689 CompletionResponse(
690 content="\n".join(
691 [
692 "## Task Statement",
693 "Tighten Loader runtime clarify behavior.",
694 "",
695 "## Desired Outcome",
696 "- Keep clarify follow-up grounded in brownfield repo evidence.",
697 "",
698 "## In Scope",
699 "- Focus on runtime lane handling first.",
700 "",
701 "## Decision Boundaries",
702 "- Stop and confirm before broadening beyond runtime lanes.",
703 "",
704 "## Constraints",
705 "- Stay within the current repository.",
706 "",
707 "## Likely Touchpoints",
708 "- src/loader/runtime/workflow_lanes.py",
709 "",
710 "## Acceptance Criteria",
711 "- The next clarify round still needs a concrete out-of-scope boundary.",
712 ]
713 )
714 ),
715 CompletionResponse(content=""),
716 CompletionResponse(
717 content="\n".join(
718 [
719 "## Task Statement",
720 "Tighten Loader runtime clarify behavior.",
721 "",
722 "## Desired Outcome",
723 "- Keep clarify follow-up grounded in brownfield repo evidence.",
724 "",
725 "## In Scope",
726 "- Focus on runtime lane handling first.",
727 "",
728 "## Non Goals",
729 "- Leave clarify_strategy.py unchanged while tightening workflow_lanes.py.",
730 "",
731 "## Decision Boundaries",
732 "- Stop and confirm before broadening beyond runtime lanes.",
733 "",
734 "## Constraints",
735 "- Stay within the current repository.",
736 "",
737 "## Likely Touchpoints",
738 "- src/loader/runtime/workflow_lanes.py",
739 "",
740 "## Acceptance Criteria",
741 (
742 "- workflow_lanes.py stays in scope and "
743 "clarify_strategy.py stays out of scope."
744 ),
745 ]
746 )
747 ),
748 CompletionResponse(content="I can move forward now."),
749 CompletionResponse(content="Done."),
750 CompletionResponse(content="Done."),
751 ]
752 )
753
754 asked_questions: list[str] = []
755 answers = iter(
756 [
757 "Start with src/loader/runtime/workflow_lanes.py.",
758 "Maybe something around the workflow lane area.",
759 (
760 "Changing workflow_lanes.py is in scope, but clarify_strategy.py "
761 "should stay out of scope."
762 ),
763 ]
764 )
765
766 async def answer(question: str, _: list[str] | None) -> str:
767 asked_questions.append(question)
768 return next(answers)
769
770 await run_scenario(
771 "Tighten Loader runtime clarify behavior.",
772 backend,
773 config=non_streaming_pressure_clarify_config(),
774 project_root=temp_dir,
775 on_user_question=answer,
776 )
777
778 round_three_prompt = backend.invocations[4].messages[-1].content
779 assert "Focus slot: non-goals" in round_three_prompt
780 assert "Pressure pass: example" in round_three_prompt
781 assert "Relevant repo facts:" in round_three_prompt
782 assert "workflow_lanes.py" in round_three_prompt
783 assert "clarify_strategy.py" in round_three_prompt
784 assert len(asked_questions) == 3
785 assert "out of scope" in asked_questions[2].lower()
786 assert "workflow_lanes.py" in asked_questions[2]
787 assert "clarify_strategy.py" in asked_questions[2]
788
789
790 @pytest.mark.asyncio
791 async def test_third_round_tradeoff_pressure_question_uses_nearby_repo_fact(
792 temp_dir: Path,
793 ) -> None:
794 seed_runtime_workspace(temp_dir)
795 backend = ScriptedBackend(
796 completions=[
797 CompletionResponse(
798 content="I need one clarification before I proceed.",
799 tool_calls=[
800 ToolCall(
801 id="ask-1",
802 name="AskUserQuestion",
803 arguments={
804 "question": "Which runtime file should I focus on first?",
805 },
806 )
807 ],
808 ),
809 CompletionResponse(
810 content="\n".join(
811 [
812 "## Task Statement",
813 "Tighten Loader runtime clarify behavior.",
814 "",
815 "## Desired Outcome",
816 "- Keep clarify behavior grounded in brownfield repo facts.",
817 "",
818 "## In Scope",
819 "- Focus on runtime lane handling first.",
820 "",
821 "## Constraints",
822 "- Stay within the current repository.",
823 "",
824 "## Likely Touchpoints",
825 "- src/loader/runtime/workflow_lanes.py",
826 "",
827 "## Acceptance Criteria",
828 "- The next round clarifies what stays unchanged.",
829 ]
830 )
831 ),
832 CompletionResponse(content=""),
833 CompletionResponse(
834 content="\n".join(
835 [
836 "## Task Statement",
837 "Tighten Loader runtime clarify behavior.",
838 "",
839 "## Desired Outcome",
840 "- Keep clarify behavior grounded in brownfield repo facts.",
841 "",
842 "## In Scope",
843 "- Focus on runtime lane handling first.",
844 "",
845 "## Constraints",
846 "- Stay within the current repository.",
847 "",
848 "## Likely Touchpoints",
849 "- src/loader/runtime/workflow_lanes.py",
850 "",
851 "## Acceptance Criteria",
852 "- The next round still needs a clearer stop boundary.",
853 ]
854 )
855 ),
856 CompletionResponse(content=""),
857 CompletionResponse(
858 content="\n".join(
859 [
860 "## Task Statement",
861 "Tighten Loader runtime clarify behavior.",
862 "",
863 "## Desired Outcome",
864 "- Keep clarify behavior grounded in brownfield repo facts.",
865 "",
866 "## In Scope",
867 "- Focus on runtime lane handling first.",
868 "",
869 "## Non Goals",
870 "- Leave clarify strategy behavior unchanged for now.",
871 "",
872 "## Decision Boundaries",
873 "- Stop and confirm before broadening beyond runtime lanes.",
874 "",
875 "## Constraints",
876 "- Stay within the current repository.",
877 "",
878 "## Likely Touchpoints",
879 "- src/loader/runtime/workflow_lanes.py",
880 "",
881 "## Acceptance Criteria",
882 "- workflow_lanes.py remains the primary touchpoint.",
883 ]
884 )
885 ),
886 CompletionResponse(content="I can move forward now."),
887 CompletionResponse(content="Done."),
888 CompletionResponse(content="Done."),
889 ]
890 )
891
892 asked_questions: list[str] = []
893 answers = iter(
894 [
895 "Start with src/loader/runtime/workflow_lanes.py.",
896 "Scope it to the runtime lane code.",
897 "Keep clarify_strategy.py unchanged while we tighten the workflow lanes.",
898 ]
899 )
900
901 async def answer(question: str, _: list[str] | None) -> str:
902 asked_questions.append(question)
903 return next(answers)
904
905 await run_scenario(
906 "Tighten Loader runtime clarify behavior.",
907 backend,
908 config=non_streaming_pressure_clarify_config(),
909 project_root=temp_dir,
910 on_user_question=answer,
911 )
912
913 round_three_prompt = backend.invocations[4].messages[-1].content
914 assert "Focus slot: non-goals" in round_three_prompt
915 assert "Pressure pass: tradeoff" in round_three_prompt
916 assert "Relevant repo facts:" in round_three_prompt
917 assert "clarify_strategy.py" in round_three_prompt
918 assert len(asked_questions) == 3
919 assert "broader edits would be easier" in asked_questions[2]
920 assert "clarify_strategy.py" in asked_questions[2]
921
922
923 @pytest.mark.asyncio
924 async def test_third_round_assumption_question_challenges_desired_outcome_assumptions(
925 temp_dir: Path,
926 ) -> None:
927 seed_runtime_workspace(temp_dir)
928 backend = ScriptedBackend(
929 completions=[
930 CompletionResponse(
931 content="I need one clarification before I proceed.",
932 tool_calls=[
933 ToolCall(
934 id="ask-1",
935 name="AskUserQuestion",
936 arguments={
937 "question": "Which runtime file should I focus on first?",
938 },
939 )
940 ],
941 ),
942 CompletionResponse(
943 content="\n".join(
944 [
945 "## Task Statement",
946 "Tighten Loader runtime clarify behavior.",
947 "",
948 "## In Scope",
949 "- Focus on runtime lane handling first.",
950 "",
951 "## Non Goals",
952 "- Do not broaden into unrelated CLI changes.",
953 "",
954 "## Decision Boundaries",
955 "- Stop and confirm before broadening beyond runtime lanes.",
956 "",
957 "## Constraints",
958 "- Stay within the current repository.",
959 "",
960 "## Likely Touchpoints",
961 "- src/loader/runtime/workflow_lanes.py",
962 "",
963 "## Acceptance Criteria",
964 "- The next clarify round makes the intended outcome explicit.",
965 ]
966 )
967 ),
968 CompletionResponse(content=""),
969 CompletionResponse(
970 content="\n".join(
971 [
972 "## Task Statement",
973 "Tighten Loader runtime clarify behavior.",
974 "",
975 "## In Scope",
976 "- Focus on runtime lane handling first.",
977 "",
978 "## Non Goals",
979 "- Do not broaden into unrelated CLI changes.",
980 "",
981 "## Decision Boundaries",
982 "- Stop and confirm before broadening beyond runtime lanes.",
983 "",
984 "## Constraints",
985 "- Stay within the current repository.",
986 "",
987 "## Likely Touchpoints",
988 "- src/loader/runtime/workflow_lanes.py",
989 "",
990 "## Acceptance Criteria",
991 "- The next clarify round still needs a more explicit finished outcome.",
992 ]
993 )
994 ),
995 CompletionResponse(content=""),
996 CompletionResponse(
997 content="\n".join(
998 [
999 "## Task Statement",
1000 "Tighten Loader runtime clarify behavior.",
1001 "",
1002 "## Desired Outcome",
1003 "- Make clarify follow-up cite repo evidence before planning.",
1004 "",
1005 "## In Scope",
1006 "- Focus on runtime lane handling first.",
1007 "",
1008 "## Non Goals",
1009 "- Do not broaden into unrelated CLI changes.",
1010 "",
1011 "## Decision Boundaries",
1012 "- Stop and confirm before broadening beyond runtime lanes.",
1013 "",
1014 "## Constraints",
1015 "- Stay within the current repository.",
1016 "",
1017 "## Likely Touchpoints",
1018 "- src/loader/runtime/workflow_lanes.py",
1019 "",
1020 "## Acceptance Criteria",
1021 "- The clarify outcome is explicit before execution begins.",
1022 ]
1023 )
1024 ),
1025 CompletionResponse(content="I can move forward now."),
1026 CompletionResponse(content="Done."),
1027 CompletionResponse(content="Done."),
1028 ]
1029 )
1030
1031 asked_questions: list[str] = []
1032 answers = iter(
1033 [
1034 "Start with src/loader/runtime/workflow_lanes.py.",
1035 "Make clarify follow-up cite repo evidence before planning.",
1036 (
1037 "The risky assumption would be broader runtime cleanup instead of "
1038 "just grounded clarify follow-up."
1039 ),
1040 ]
1041 )
1042
1043 async def answer(question: str, _: list[str] | None) -> str:
1044 asked_questions.append(question)
1045 return next(answers)
1046
1047 await run_scenario(
1048 "Tighten Loader runtime clarify behavior.",
1049 backend,
1050 config=non_streaming_pressure_clarify_config(),
1051 project_root=temp_dir,
1052 on_user_question=answer,
1053 )
1054
1055 round_three_prompt = backend.invocations[4].messages[-1].content
1056 assert "Focus slot: desired outcome" in round_three_prompt
1057 assert "Pressure pass: assumption" in round_three_prompt
1058 assert "Relevant repo facts:" in round_three_prompt
1059 assert "workflow_lanes.py" in round_three_prompt
1060 assert len(asked_questions) == 3
1061 assert "assumption" in asked_questions[2].lower()
1062 assert "get wrong" in asked_questions[2].lower()
1063
1064
1065 @pytest.mark.asyncio
1066 async def test_complex_prompt_routes_to_plan_and_uses_verification_artifact(
1067 temp_dir: Path,
1068 ) -> None:
1069 target = temp_dir / "planned.txt"
1070 backend = ScriptedBackend(
1071 completions=[
1072 CompletionResponse(
1073 content="\n".join(
1074 [
1075 "# Implementation Plan",
1076 "",
1077 "## File Changes",
1078 f"- Create {target.name} in the workspace root.",
1079 "",
1080 "## Execution Order",
1081 f"1. Write {target.name}.",
1082 "2. Confirm the file exists.",
1083 "",
1084 "## Risks",
1085 "- Writing the wrong file path.",
1086 "",
1087 "<<<VERIFICATION>>>",
1088 "",
1089 "# Verification Plan",
1090 "",
1091 "## Acceptance Criteria",
1092 f"- {target.name} exists in the workspace root.",
1093 "",
1094 "## Verification Commands",
1095 f"- `test -f {target}`",
1096 "",
1097 "## Notes",
1098 "- Use a deterministic file existence check.",
1099 ]
1100 )
1101 ),
1102 CompletionResponse(
1103 content="I'll create the file now.",
1104 tool_calls=[
1105 ToolCall(
1106 id="write-1",
1107 name="write",
1108 arguments={
1109 "file_path": str(target),
1110 "content": "planned output\n",
1111 },
1112 )
1113 ],
1114 ),
1115 CompletionResponse(content="The file is in place."),
1116 ]
1117 )
1118
1119 run = await run_scenario(
1120 "Implement a persistent workflow mode router with clarify artifacts, "
1121 "planning artifacts, and verification-plan wiring in the runtime.",
1122 backend,
1123 config=non_streaming_config(),
1124 project_root=temp_dir,
1125 )
1126
1127 dod = run.agent.last_turn_summary.definition_of_done
1128 assert dod is not None
1129 assert workflow_modes(run)[:3] == ["plan", "execute", "verify"]
1130 assert artifact_kinds(run) == ["implementation_plan", "verification_plan"]
1131 assert dod.implementation_plan is not None
1132 assert dod.verification_plan is not None
1133 assert Path(dod.implementation_plan).exists()
1134 assert Path(dod.verification_plan).exists()
1135 implementation_markdown = Path(dod.implementation_plan).read_text()
1136 verification_markdown = Path(dod.verification_plan).read_text()
1137 assert "single-pass planning artifact generation" in implementation_markdown
1138 assert "planner/critic consensus loop" in implementation_markdown
1139 assert "single-pass planning artifact generation" in verification_markdown
1140 assert run.agent.session.workflow_artifact_status == "active"
1141 assert run.agent.session.workflow_artifact_sources == [
1142 "implementation_plan",
1143 "verification_plan",
1144 ]
1145 assert not any(event.type == "decomposition" for event in run.events)
1146 assert not any(event.type == "subtask" for event in run.events)
1147 assert dod.verification_commands == [f"test -f {target}"]
1148 assert "## Plan Mode" in backend.invocations[0].messages[0].content
1149 assert run.agent.last_turn_summary is not None
1150 assert run.agent.last_turn_summary.workflow_mode == "verify"
1151 assert run.agent.last_turn_summary.workflow_reason_code == (
1152 "definition_of_done_requires_verification"
1153 )
1154 assert run.agent.last_turn_summary.workflow_decision_kind == "handoff"
1155 timeline = run.agent.last_turn_summary.workflow_timeline
1156 assert any(
1157 entry.mode == "execute"
1158 and entry.reason_code == "verification_planned"
1159 and entry.policy_outcome == "planned"
1160 for entry in timeline
1161 )
1162 assert any(
1163 entry.mode == "verify"
1164 and entry.reason_code == "verification_pending"
1165 and entry.policy_outcome == "pending"
1166 for entry in timeline
1167 )
1168 verify_calls = [
1169 event
1170 for event in run.events
1171 if event.type == "tool_call" and event.phase == "verification"
1172 ]
1173 assert [event.tool_args["command"] for event in verify_calls] == [f"test -f {target}"]
1174
1175
1176 @pytest.mark.asyncio
1177 async def test_verify_failure_returns_to_execute_without_retriggering_plan(
1178 temp_dir: Path,
1179 ) -> None:
1180 target = temp_dir / "retry.txt"
1181 backend = ScriptedBackend(
1182 completions=[
1183 CompletionResponse(
1184 content="\n".join(
1185 [
1186 "# Implementation Plan",
1187 "",
1188 "## File Changes",
1189 f"- Create {target.name}.",
1190 "",
1191 "## Execution Order",
1192 f"1. Write {target.name}.",
1193 "2. Fix it if verification fails.",
1194 "",
1195 "## Risks",
1196 "- Initial content may be wrong.",
1197 "",
1198 "<<<VERIFICATION>>>",
1199 "",
1200 "# Verification Plan",
1201 "",
1202 "## Acceptance Criteria",
1203 "- The file contains the word fixed.",
1204 "",
1205 "## Verification Commands",
1206 f"- `grep -q fixed {target}`",
1207 "",
1208 "## Notes",
1209 "- Retry if the first write misses the target string.",
1210 ]
1211 )
1212 ),
1213 CompletionResponse(
1214 content="I'll write the first draft.",
1215 tool_calls=[
1216 ToolCall(
1217 id="write-1",
1218 name="write",
1219 arguments={
1220 "file_path": str(target),
1221 "content": "draft output\n",
1222 },
1223 )
1224 ],
1225 ),
1226 CompletionResponse(content="First draft is written."),
1227 CompletionResponse(
1228 content="I'll correct the file.",
1229 tool_calls=[
1230 ToolCall(
1231 id="write-2",
1232 name="write",
1233 arguments={
1234 "file_path": str(target),
1235 "content": "fixed output\n",
1236 },
1237 )
1238 ],
1239 ),
1240 CompletionResponse(content="The file now contains the fixed output."),
1241 ]
1242 )
1243
1244 run = await run_scenario(
1245 "Implement a persistent workflow mode router with clarify artifacts, "
1246 "planning artifacts, and verification-plan wiring in the runtime.",
1247 backend,
1248 config=non_streaming_config(),
1249 project_root=temp_dir,
1250 )
1251
1252 modes = workflow_modes(run)
1253 assert modes.count("plan") == 1
1254 assert modes.count("clarify") == 0
1255 assert modes.count("execute") >= 2
1256 assert modes.count("verify") >= 2
1257 assert "fixed output" in target.read_text()
1258
1259
1260 @pytest.mark.asyncio
1261 async def test_plan_mode_recovers_verification_commands_from_legacy_separator(
1262 temp_dir: Path,
1263 ) -> None:
1264 target = temp_dir / "planned.txt"
1265 backend = ScriptedBackend(
1266 completions=[
1267 CompletionResponse(
1268 content="\n".join(
1269 [
1270 "# Implementation Plan",
1271 "",
1272 "## File Changes",
1273 f"- Create {target.name} in the workspace root.",
1274 "",
1275 "## Execution Order",
1276 f"1. Write {target.name}.",
1277 "2. Verify the file exists.",
1278 "",
1279 "## Risks",
1280 "- Losing the verification commands during parsing.",
1281 "",
1282 "# Verification Plan",
1283 "",
1284 "## Acceptance Criteria",
1285 f"- {target.name} exists in the workspace root.",
1286 "",
1287 "## Verification Commands",
1288 f"- `test -f {target}`",
1289 "",
1290 "## Notes",
1291 "- This simulates a legacy separator emitted after the plan body.",
1292 "",
1293 "<<VERIFICATION>>",
1294 ]
1295 )
1296 ),
1297 CompletionResponse(
1298 content="I'll create the planned artifact.",
1299 tool_calls=[
1300 ToolCall(
1301 id="write-1",
1302 name="write",
1303 arguments={
1304 "file_path": str(target),
1305 "content": "planned output\n",
1306 },
1307 )
1308 ],
1309 ),
1310 CompletionResponse(content="The planned artifact is in place."),
1311 ]
1312 )
1313
1314 run = await run_scenario(
1315 "Implement a persistent workflow mode router with clarify artifacts, "
1316 "planning artifacts, and verification-plan wiring in the runtime.",
1317 backend,
1318 config=non_streaming_config(),
1319 project_root=temp_dir,
1320 )
1321
1322 dod = run.agent.last_turn_summary.definition_of_done
1323 assert dod is not None
1324 assert dod.verification_commands == [f"test -f {target}"]
1325 assert verification_commands(run) == [f"test -f {target}"]
1326 assert Path(dod.verification_plan).read_text().count("## Verification Commands") == 1
1327
1328
1329 @pytest.mark.asyncio
1330 async def test_stale_plan_artifacts_trigger_targeted_plan_refresh(
1331 temp_dir: Path,
1332 ) -> None:
1333 target = temp_dir / "notes.txt"
1334 backend = ScriptedBackend(
1335 completions=[
1336 CompletionResponse(
1337 content="\n".join(
1338 [
1339 "# Implementation Plan",
1340 "",
1341 "## File Changes",
1342 "- Create planned.txt in the workspace root.",
1343 "",
1344 "## Execution Order",
1345 "1. Write planned.txt.",
1346 "",
1347 "## Risks",
1348 "- Choosing the wrong file path.",
1349 "",
1350 "<<<VERIFICATION>>>",
1351 "",
1352 "# Verification Plan",
1353 "",
1354 "## Acceptance Criteria",
1355 "- planned.txt exists.",
1356 "",
1357 "## Verification Commands",
1358 f"- `test -f {temp_dir / 'planned.txt'}`",
1359 "",
1360 "## Notes",
1361 "- Verify the originally planned file.",
1362 ]
1363 )
1364 ),
1365 CompletionResponse(
1366 content="I'll create the audit notes file first.",
1367 tool_calls=[
1368 ToolCall(
1369 id="write-1",
1370 name="write",
1371 arguments={
1372 "file_path": str(target),
1373 "content": "runtime notes\n",
1374 },
1375 )
1376 ],
1377 ),
1378 CompletionResponse(
1379 content="\n".join(
1380 [
1381 "# Implementation Plan",
1382 "",
1383 "## File Changes",
1384 f"- Keep {target.name} as the runtime audit artifact.",
1385 "",
1386 "## Execution Order",
1387 f"1. Confirm {target.name} is the intended output.",
1388 "",
1389 "## Risks",
1390 "- Accidentally verifying the stale plan output.",
1391 "",
1392 "<<<VERIFICATION>>>",
1393 "",
1394 "# Verification Plan",
1395 "",
1396 "## Acceptance Criteria",
1397 f"- {target.name} exists in the workspace root.",
1398 "",
1399 "## Verification Commands",
1400 f"- `test -f {target}`",
1401 "",
1402 "## Notes",
1403 "- Refresh the plan around the actual artifact.",
1404 ]
1405 )
1406 ),
1407 CompletionResponse(content="The refreshed plan matches the notes artifact."),
1408 ]
1409 )
1410
1411 run = await run_scenario(
1412 "Implement a persistent workflow artifact with planning artifacts, "
1413 "verification commands, and plan refresh discipline so Loader can refresh stale plans.",
1414 backend,
1415 config=non_streaming_config(),
1416 project_root=temp_dir,
1417 )
1418
1419 modes = workflow_modes(run)
1420 assert modes.count("plan") == 2
1421 assert modes.count("execute") >= 2
1422 assert modes[-1] == "verify"
1423 assert artifact_kinds(run).count("implementation_plan") == 2
1424 assert artifact_kinds(run).count("verification_plan") == 2
1425 assert target.read_text() == "runtime notes\n"
1426 assert any(
1427 entry.reason_code == "stale_plan_artifacts"
1428 for entry in run.agent.last_turn_summary.workflow_timeline
1429 )
1430 stale_entry = next(
1431 entry
1432 for entry in run.agent.last_turn_summary.workflow_timeline
1433 if entry.reason_code == "stale_plan_artifacts"
1434 )
1435 assert any("confirmed touchpoint" in item for item in stale_entry.evidence_summary)
1436 assert any("acceptance anchor" in item for item in stale_entry.evidence_summary)
1437 assert any(
1438 entry.reason_code == "plan_refresh_completed"
1439 for entry in run.agent.last_turn_summary.workflow_timeline
1440 )
1441 refresh_prompt = next(
1442 invocation.messages[-1].content
1443 for invocation in backend.invocations
1444 if "Refresh the existing planning artifacts instead of creating a fresh plan from scratch."
1445 in invocation.messages[-1].content
1446 )
1447 assert "Current execution progress:" in refresh_prompt
1448 assert "Already touched during execution:" in refresh_prompt
1449 assert f"- {target}" in refresh_prompt
1450 assert any(
1451 "Plan refresh preserved the progress already made." in message.content
1452 and "Do not restart from initial discovery" in message.content
1453 for invocation in backend.invocations
1454 for message in invocation.messages
1455 )
1456
1457
1458 @pytest.mark.asyncio
1459 async def test_full_replan_can_reenter_clarify_before_rebuilding_plan(
1460 temp_dir: Path,
1461 ) -> None:
1462 task = (
1463 "Don't assume the scope: improve Loader so it feels more like claw-code "
1464 "while tightening workflow artifacts."
1465 )
1466 target = temp_dir / "notes.txt"
1467 backend = ScriptedBackend(
1468 completions=[
1469 CompletionResponse(
1470 content="I need one clarification before planning.",
1471 tool_calls=[
1472 ToolCall(
1473 id="ask-1",
1474 name="AskUserQuestion",
1475 arguments={
1476 "question": "What outcome matters most for this Loader improvement?",
1477 },
1478 )
1479 ],
1480 ),
1481 CompletionResponse(
1482 content="\n".join(
1483 [
1484 "## Task Statement",
1485 task,
1486 "",
1487 "## Desired Outcome",
1488 "- Improve the runtime workflow around the planned artifact.",
1489 "",
1490 "## Non Goals",
1491 "- Do not redesign the CLI surface.",
1492 "",
1493 "## Decision Boundaries",
1494 "- Escalate before broad UX changes.",
1495 "",
1496 "## Constraints",
1497 "- Stay within the current repository conventions.",
1498 "",
1499 "## Likely Touchpoints",
1500 "- planned.txt",
1501 "",
1502 "## Acceptance Criteria",
1503 "- planned.txt exists in the workspace root.",
1504 ]
1505 )
1506 ),
1507 CompletionResponse(
1508 content="\n".join(
1509 [
1510 "# Implementation Plan",
1511 "",
1512 "## File Changes",
1513 "- Create planned.txt in the workspace root.",
1514 "",
1515 "## Execution Order",
1516 "1. Write planned.txt.",
1517 "",
1518 "## Risks",
1519 "- Choosing the wrong output artifact.",
1520 "",
1521 "<<<VERIFICATION>>>",
1522 "",
1523 "# Verification Plan",
1524 "",
1525 "## Acceptance Criteria",
1526 "- planned.txt exists.",
1527 "",
1528 "## Verification Commands",
1529 f"- `test -f {temp_dir / 'planned.txt'}`",
1530 "",
1531 "## Notes",
1532 "- Verify the originally planned artifact.",
1533 ]
1534 )
1535 ),
1536 CompletionResponse(
1537 content="I'll create the notes artifact first.",
1538 tool_calls=[
1539 ToolCall(
1540 id="write-1",
1541 name="write",
1542 arguments={
1543 "file_path": str(target),
1544 "content": "runtime notes\n",
1545 },
1546 )
1547 ],
1548 ),
1549 CompletionResponse(
1550 content="I need one more clarification before rebuilding the plan.",
1551 tool_calls=[
1552 ToolCall(
1553 id="ask-2",
1554 name="AskUserQuestion",
1555 arguments={
1556 "question": (
1557 "Which file should I actually focus on, "
1558 "and what should stay unchanged?"
1559 ),
1560 },
1561 )
1562 ],
1563 ),
1564 CompletionResponse(
1565 content="\n".join(
1566 [
1567 "## Task Statement",
1568 task,
1569 "",
1570 "## Desired Outcome",
1571 "- Keep the runtime artifact aligned with the actual work.",
1572 "",
1573 "## Non Goals",
1574 "- Do not change the CLI surface.",
1575 "",
1576 "## Decision Boundaries",
1577 "- Escalate before touching unrelated modules.",
1578 "",
1579 "## Constraints",
1580 "- Stay within the repository.",
1581 "",
1582 "## Likely Touchpoints",
1583 f"- {target.name}",
1584 "",
1585 "## Acceptance Criteria",
1586 f"- {target.name} exists in the workspace root.",
1587 ]
1588 )
1589 ),
1590 CompletionResponse(
1591 content="\n".join(
1592 [
1593 "# Implementation Plan",
1594 "",
1595 "## File Changes",
1596 f"- Keep {target.name} as the runtime artifact.",
1597 "",
1598 "## Execution Order",
1599 f"1. Confirm {target.name} remains the intended output.",
1600 "",
1601 "## Risks",
1602 "- Accidentally verifying the stale artifact name.",
1603 "",
1604 "<<<VERIFICATION>>>",
1605 "",
1606 "# Verification Plan",
1607 "",
1608 "## Acceptance Criteria",
1609 f"- {target.name} exists in the workspace root.",
1610 "",
1611 "## Verification Commands",
1612 f"- `test -f {target}`",
1613 "",
1614 "## Notes",
1615 "- Rebuild the plan around the actual runtime artifact.",
1616 ]
1617 )
1618 ),
1619 CompletionResponse(
1620 content="The refreshed brief and plan now match the notes artifact."
1621 ),
1622 CompletionResponse(
1623 content="The refreshed brief and plan now match the notes artifact."
1624 ),
1625 ]
1626 )
1627
1628 answers = iter(
1629 [
1630 (
1631 "Focus on the planned runtime artifact, keep the CLI unchanged, "
1632 "and stop before broad UX changes."
1633 ),
1634 "Focus on notes.txt and keep the CLI unchanged.",
1635 ]
1636 )
1637
1638 async def answer(_: str, __: list[str] | None) -> str:
1639 return next(answers)
1640
1641 run = await run_scenario(
1642 task,
1643 backend,
1644 config=non_streaming_config(),
1645 project_root=temp_dir,
1646 on_user_question=answer,
1647 )
1648
1649 modes = workflow_modes(run)
1650 assert modes.count("clarify") >= 2
1651 assert modes.count("plan") == 2
1652 assert modes.count("execute") >= 2
1653 assert modes[-1] == "verify"
1654 assert target.read_text() == "runtime notes\n"
1655 assert any(
1656 entry.reason_code == "full_replan_requires_clarify"
1657 for entry in run.agent.last_turn_summary.workflow_timeline
1658 )
1659 assert any(
1660 entry.reason_code == "full_replan_required"
1661 for entry in run.agent.last_turn_summary.workflow_timeline
1662 )
1663 assert any(
1664 item.status == "contradicted"
1665 for item in run.agent.session.workflow_ledger.assumptions
1666 )
1667 assert any(
1668 item.status == "changed"
1669 for item in run.agent.session.workflow_ledger.acceptance_anchors
1670 )