Python · 45616 bytes Raw Blame History
1 """Tests for no-tool text completion orchestration."""
2
3 from __future__ import annotations
4
5 from pathlib import Path
6
7 import pytest
8
9 from loader.agent.loop import Agent, AgentConfig
10 from loader.llm.base import Message, Role
11 from loader.runtime.conversation import ConversationRuntime
12 from loader.runtime.dod import VerificationEvidence
13 from loader.runtime.phases import TurnPhase
14 from loader.runtime.turn_completion import TurnCompletionAction
15 from loader.runtime.verification_observations import VerificationObservationStatus
16 from tests.helpers.runtime_harness import ScriptedBackend
17
18
19 def non_streaming_config() -> AgentConfig:
20 """Shared config for direct turn-completion tests."""
21
22 return AgentConfig(auto_context=False, stream=False, max_iterations=8)
23
24
25 @pytest.mark.asyncio
26 async def test_turn_completion_requests_continuation_for_premature_text_response(
27 temp_dir: Path,
28 ) -> None:
29 backend = ScriptedBackend()
30 agent = Agent(
31 backend=backend,
32 config=non_streaming_config(),
33 project_root=temp_dir,
34 )
35 runtime = ConversationRuntime(agent)
36 events = []
37
38 async def capture(event) -> None:
39 events.append(event)
40
41 prepared = await runtime.turn_preparation.prepare(
42 task="Fix the README heading.",
43 emit=capture,
44 requested_mode="execute",
45 original_task=None,
46 on_user_question=None,
47 )
48 await runtime.phase_tracker.enter(
49 TurnPhase.ASSISTANT,
50 capture,
51 detail="Requesting assistant response",
52 reason_code="request_assistant_response",
53 )
54
55 decision = await runtime.turn_completion.handle_text_response(
56 content="I looked into it.",
57 response_content="I looked into it.",
58 task=prepared.task,
59 effective_task=prepared.effective_task,
60 iterations=1,
61 max_iterations=agent.config.max_iterations,
62 actions_taken=[],
63 continuation_count=0,
64 dod=prepared.definition_of_done,
65 emit=capture,
66 summary=prepared.summary,
67 executor=prepared.executor,
68 rollback_plan=prepared.rollback_plan,
69 )
70
71 assert decision.action == TurnCompletionAction.CONTINUE
72 assert decision.continuation_count == 1
73 assert prepared.summary.completion_decision_code == "premature_completion_nudge"
74 assert prepared.summary.completion_decision_summary == (
75 "requested one continuation because the non-mutating response looked incomplete"
76 )
77 assert agent.session.last_completion_decision_code == "premature_completion_nudge"
78 assert [
79 entry.decision_code for entry in prepared.summary.completion_trace
80 ] == ["premature_completion_nudge"]
81 assert prepared.summary.completion_trace[0].stage == "continuation_check"
82 assert [entry.kind for entry in prepared.summary.workflow_timeline[-1:]] == [
83 "completion_continue"
84 ]
85 assert prepared.summary.workflow_timeline[-1].policy_stage == "continuation_check"
86 assert prepared.summary.workflow_timeline[-1].policy_outcome == "continue"
87 assert agent.session.messages[-1].role.value == "user"
88 assert "concrete evidence" in agent.session.messages[-1].content
89 assert "Carry out the requested change or command now" in agent.session.messages[-1].content
90 assert any(event.type == "completion_check" for event in events)
91
92
93 @pytest.mark.asyncio
94 async def test_turn_completion_marks_non_mutating_response_done(
95 temp_dir: Path,
96 ) -> None:
97 backend = ScriptedBackend()
98 agent = Agent(
99 backend=backend,
100 config=non_streaming_config(),
101 project_root=temp_dir,
102 )
103 runtime = ConversationRuntime(agent)
104 events = []
105
106 async def capture(event) -> None:
107 events.append(event)
108
109 prepared = await runtime.turn_preparation.prepare(
110 task="Explain Loader's clarify loop.",
111 emit=capture,
112 requested_mode="execute",
113 original_task=None,
114 on_user_question=None,
115 )
116 await runtime.phase_tracker.enter(
117 TurnPhase.ASSISTANT,
118 capture,
119 detail="Requesting assistant response",
120 reason_code="request_assistant_response",
121 )
122
123 decision = await runtime.turn_completion.handle_text_response(
124 content="Loader uses a bounded clarify loop before execution.",
125 response_content="Loader uses a bounded clarify loop before execution.",
126 task=prepared.task,
127 effective_task=prepared.effective_task,
128 iterations=1,
129 max_iterations=agent.config.max_iterations,
130 actions_taken=[],
131 continuation_count=0,
132 dod=prepared.definition_of_done,
133 emit=capture,
134 summary=prepared.summary,
135 executor=prepared.executor,
136 rollback_plan=prepared.rollback_plan,
137 )
138
139 assert decision.action == TurnCompletionAction.COMPLETE
140 assert prepared.summary.final_response == (
141 "Loader uses a bounded clarify loop before execution."
142 )
143 assert prepared.summary.completion_decision_code == "non_mutating_response_accepted"
144 assert prepared.summary.completion_decision_summary == (
145 "accepted the response because no mutating work required verification"
146 )
147 assert agent.session.last_completion_decision_code == (
148 "non_mutating_response_accepted"
149 )
150 assert [
151 entry.decision_code for entry in prepared.summary.completion_trace
152 ] == [
153 "completion_response_accepted",
154 "non_mutating_response_accepted",
155 ]
156 policy_entries = [
157 entry
158 for entry in prepared.summary.workflow_timeline
159 if entry.kind.startswith("completion_")
160 ]
161 assert [entry.kind for entry in policy_entries] == [
162 "completion_check",
163 "completion_complete",
164 ]
165 assert policy_entries[0].policy_stage == "continuation_check"
166 assert policy_entries[-1].policy_stage == "definition_of_done"
167 assert [item.summary for item in prepared.summary.completion_trace[-1].evidence_provenance] == [
168 "verification was skipped because no mutating work required checks"
169 ]
170 assert [
171 item.status
172 for item in prepared.summary.completion_trace[-1].verification_observations
173 ] == [VerificationObservationStatus.SKIPPED.value]
174 assert [
175 item.summary
176 for item in prepared.summary.completion_trace[-1].verification_observations
177 ] == ["verification was skipped because no mutating work required checks"]
178 assert [item.status for item in policy_entries[-1].verification_observations] == [
179 VerificationObservationStatus.SKIPPED.value
180 ]
181 assert prepared.definition_of_done.status == "done"
182 assert prepared.definition_of_done.last_verification_result == "skipped"
183 assert any(event.type == "response" for event in events)
184 assert any(
185 event.type == "dod_status" and event.dod_status == "done"
186 for event in events
187 )
188
189
190 @pytest.mark.asyncio
191 async def test_turn_completion_blocks_false_completion_without_preserving_it(
192 temp_dir: Path,
193 ) -> None:
194 backend = ScriptedBackend()
195 agent = Agent(
196 backend=backend,
197 config=non_streaming_config(),
198 project_root=temp_dir,
199 )
200 runtime = ConversationRuntime(agent)
201 events = []
202
203 async def capture(event) -> None:
204 events.append(event)
205
206 prepared = await runtime.turn_preparation.prepare(
207 task=(
208 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
209 "with an index and chapter files."
210 ),
211 emit=capture,
212 requested_mode="execute",
213 original_task=None,
214 on_user_question=None,
215 )
216 await runtime.phase_tracker.enter(
217 TurnPhase.ASSISTANT,
218 capture,
219 detail="Requesting assistant response",
220 reason_code="request_assistant_response",
221 )
222
223 implementation_plan = temp_dir / "implementation.md"
224 implementation_plan.write_text(
225 "# Implementation Plan\n\n"
226 "## File Changes\n\n"
227 "1. Create main index.html file:\n"
228 " - `index.html`\n\n"
229 "2. Create chapter files:\n"
230 " - `chapters/01-getting-started.html`\n"
231 " - `chapters/06-troubleshooting.html`\n"
232 )
233 chapters_dir = temp_dir / "chapters"
234 chapters_dir.mkdir()
235 (chapters_dir / "01-getting-started.html").write_text("<h1>Getting Started</h1>\n")
236 (temp_dir / "index.html").write_text("<h1>NGINX Guide</h1>\n")
237
238 prepared.definition_of_done.implementation_plan = str(implementation_plan)
239 prepared.definition_of_done.mutating_actions.append("write")
240 prepared.definition_of_done.touched_files.extend(
241 [
242 str(temp_dir / "index.html"),
243 str(chapters_dir / "01-getting-started.html"),
244 ]
245 )
246
247 queued_messages: list[str] = []
248 runtime.context.queue_steering_message_callback = queued_messages.append
249
250 completion_claim = (
251 "I've successfully completed the NGINX guide with all planned files "
252 "and verified everything is done."
253 )
254 decision = await runtime.turn_completion.handle_text_response(
255 content=completion_claim,
256 response_content=completion_claim,
257 task=prepared.task,
258 effective_task=prepared.effective_task,
259 iterations=1,
260 max_iterations=agent.config.max_iterations,
261 actions_taken=[],
262 continuation_count=0,
263 dod=prepared.definition_of_done,
264 emit=capture,
265 summary=prepared.summary,
266 executor=prepared.executor,
267 rollback_plan=prepared.rollback_plan,
268 )
269
270 assert decision.action == TurnCompletionAction.CONTINUE
271 assert prepared.summary.assistant_messages == []
272 assert not any(
273 message.role.value == "assistant" and message.content == completion_claim
274 for message in agent.session.messages
275 )
276 assert agent.session.messages[-1].role.value == "user"
277 assert agent.session.messages[-1].content.startswith(
278 "[PLANNED ARTIFACTS STILL MISSING]"
279 )
280 assert "`06-troubleshooting.html`" in agent.session.messages[-1].content
281 assert queued_messages
282 assert "06-troubleshooting.html" in queued_messages[-1]
283 assert "Do not summarize, mark completion, or write bookkeeping notes yet" in queued_messages[-1]
284 assert not any(event.type == "response" for event in events)
285
286
287 @pytest.mark.asyncio
288 async def test_turn_completion_interrupts_progress_intent_once_output_files_exist(
289 temp_dir: Path,
290 ) -> None:
291 backend = ScriptedBackend()
292 agent = Agent(
293 backend=backend,
294 config=non_streaming_config(),
295 project_root=temp_dir,
296 )
297 runtime = ConversationRuntime(agent)
298 events = []
299
300 async def capture(event) -> None:
301 events.append(event)
302
303 prepared = await runtime.turn_preparation.prepare(
304 task=(
305 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
306 "with an index and chapter files."
307 ),
308 emit=capture,
309 requested_mode="execute",
310 original_task=None,
311 on_user_question=None,
312 )
313 await runtime.phase_tracker.enter(
314 TurnPhase.ASSISTANT,
315 capture,
316 detail="Requesting assistant response",
317 reason_code="request_assistant_response",
318 )
319
320 implementation_plan = temp_dir / "implementation.md"
321 implementation_plan.write_text(
322 "# Implementation Plan\n\n"
323 "## File Changes\n\n"
324 "1. Create main index.html file:\n"
325 f" - `{temp_dir / 'index.html'}`\n\n"
326 "2. Create chapter files:\n"
327 f" - `{temp_dir / 'chapters' / '01-introduction.html'}`\n"
328 f" - `{temp_dir / 'chapters' / '02-installation.html'}`\n"
329 )
330 chapters_dir = temp_dir / "chapters"
331 chapters_dir.mkdir()
332 (temp_dir / "index.html").write_text("<h1>NGINX Guide</h1>\n")
333 (chapters_dir / "01-introduction.html").write_text("<h1>Intro</h1>\n")
334
335 prepared.definition_of_done.implementation_plan = str(implementation_plan)
336 prepared.definition_of_done.mutating_actions.append("write")
337 prepared.definition_of_done.touched_files.extend(
338 [
339 str(temp_dir / "index.html"),
340 str(chapters_dir / "01-introduction.html"),
341 ]
342 )
343 prepared.definition_of_done.pending_items.append("Create chapter files for nginx guide")
344
345 content = "Now I'll create the second chapter file for the nginx guide."
346 decision = await runtime.turn_completion.handle_text_response(
347 content=content,
348 response_content=content,
349 task=prepared.task,
350 effective_task=prepared.effective_task,
351 iterations=1,
352 max_iterations=agent.config.max_iterations,
353 actions_taken=[],
354 continuation_count=0,
355 dod=prepared.definition_of_done,
356 emit=capture,
357 summary=prepared.summary,
358 executor=prepared.executor,
359 rollback_plan=prepared.rollback_plan,
360 )
361
362 assert decision.action == TurnCompletionAction.CONTINUE
363 assert decision.continuation_count == 1
364 assert prepared.summary.completion_decision_code == "in_progress_transition_continue"
365 assert prepared.summary.assistant_messages[-1].content == content
366 assert agent.session.messages[-1].role.value == "user"
367 assert agent.session.messages[-1].content.startswith("[CONTINUE CURRENT STEP]")
368 assert "02-installation.html" in agent.session.messages[-1].content
369 assert not any(
370 message.role.value == "user"
371 and message.content.startswith("[PLANNED ARTIFACTS STILL MISSING]")
372 for message in agent.session.messages
373 )
374
375
376 @pytest.mark.asyncio
377 async def test_turn_completion_uses_quality_repair_prompt_for_rewrite_narration(
378 temp_dir: Path,
379 ) -> None:
380 backend = ScriptedBackend()
381 config = non_streaming_config()
382 config.reasoning.completion_check = False
383 agent = Agent(
384 backend=backend,
385 config=config,
386 project_root=temp_dir,
387 )
388 runtime = ConversationRuntime(agent)
389 events = []
390
391 async def capture(event) -> None:
392 events.append(event)
393
394 prepared = await runtime.turn_preparation.prepare(
395 task="Create an equally thorough HTML guide.",
396 emit=capture,
397 requested_mode="execute",
398 original_task=None,
399 on_user_question=None,
400 )
401 await runtime.phase_tracker.enter(
402 TurnPhase.ASSISTANT,
403 capture,
404 detail="Requesting assistant response",
405 reason_code="request_assistant_response",
406 )
407
408 chapter = temp_dir / "guides" / "nginx" / "chapters" / "01-introduction.html"
409 chapter.parent.mkdir(parents=True)
410 chapter.write_text("<html><body><h1>Intro</h1></body></html>\n")
411 prepared.definition_of_done.touched_files.append(str(chapter))
412 prepared.definition_of_done.mutating_actions.append("write")
413 agent.session.append(
414 Message(
415 role=Role.USER,
416 content=(
417 "Repair focus:\n"
418 f"- Improve `{chapter}`: insufficient structured content "
419 "(12 blocks, expected at least 18).\n"
420 f"- Immediate next step: edit `{chapter}` with a substantial "
421 "expansion or replacement that satisfies its listed quality issue.\n"
422 ),
423 )
424 )
425
426 content = (
427 "Let me try a different approach by rewriting the entire file with more "
428 "comprehensive content:"
429 )
430 decision = await runtime.turn_completion.handle_text_response(
431 content=content,
432 response_content=content,
433 task=prepared.task,
434 effective_task=prepared.effective_task,
435 iterations=1,
436 max_iterations=agent.config.max_iterations,
437 actions_taken=[],
438 continuation_count=0,
439 dod=prepared.definition_of_done,
440 emit=capture,
441 summary=prepared.summary,
442 executor=prepared.executor,
443 rollback_plan=prepared.rollback_plan,
444 )
445
446 assert decision.action == TurnCompletionAction.CONTINUE
447 assert prepared.summary.completion_decision_code == "in_progress_transition_continue"
448 assert agent.session.messages[-1].role.value == "user"
449 assert agent.session.messages[-1].content.startswith("[CONTINUE QUALITY REPAIR]")
450 assert str(chapter.resolve(strict=False)) in agent.session.messages[-1].content
451 assert (
452 "one concrete `patch`, `edit`, or `write` tool call"
453 in agent.session.messages[-1].content
454 )
455 assert "Do not rewrite the whole file from memory" in agent.session.messages[-1].content
456
457
458 @pytest.mark.asyncio
459 async def test_turn_completion_allows_first_progress_narration_before_any_output_exists(
460 temp_dir: Path,
461 ) -> None:
462 backend = ScriptedBackend()
463 config = non_streaming_config()
464 config.reasoning.completion_check = False
465 agent = Agent(
466 backend=backend,
467 config=config,
468 project_root=temp_dir,
469 )
470 runtime = ConversationRuntime(agent)
471 events = []
472
473 async def capture(event) -> None:
474 events.append(event)
475
476 prepared = await runtime.turn_preparation.prepare(
477 task=(
478 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
479 "with an index and chapter files."
480 ),
481 emit=capture,
482 requested_mode="execute",
483 original_task=None,
484 on_user_question=None,
485 )
486 await runtime.phase_tracker.enter(
487 TurnPhase.ASSISTANT,
488 capture,
489 detail="Requesting assistant response",
490 reason_code="request_assistant_response",
491 )
492
493 implementation_plan = temp_dir / "implementation.md"
494 implementation_plan.write_text(
495 "# Implementation Plan\n\n"
496 "## File Changes\n\n"
497 f"- `{temp_dir / 'index.html'}`\n"
498 f"- `{temp_dir / 'chapters' / '01-introduction.html'}`\n"
499 )
500
501 prepared.definition_of_done.implementation_plan = str(implementation_plan)
502 prepared.definition_of_done.pending_items.append(
503 "Develop the main index.html file for nginx guide"
504 )
505
506 content = "Now I'll create the main index.html file for the nginx guide."
507 decision = await runtime.turn_completion.handle_text_response(
508 content=content,
509 response_content=content,
510 task=prepared.task,
511 effective_task=prepared.effective_task,
512 iterations=1,
513 max_iterations=agent.config.max_iterations,
514 actions_taken=[],
515 continuation_count=0,
516 dod=prepared.definition_of_done,
517 emit=capture,
518 summary=prepared.summary,
519 executor=prepared.executor,
520 rollback_plan=prepared.rollback_plan,
521 )
522
523 assert decision.action == TurnCompletionAction.CONTINUE
524 assert decision.continuation_count == 1
525 assert prepared.summary.assistant_messages[-1].content == content
526 assert agent.session.messages[-1].role.value == "assistant"
527
528
529 @pytest.mark.asyncio
530 async def test_turn_completion_interrupts_repeated_concrete_progress_narration(
531 temp_dir: Path,
532 ) -> None:
533 backend = ScriptedBackend()
534 config = non_streaming_config()
535 config.reasoning.completion_check = False
536 agent = Agent(
537 backend=backend,
538 config=config,
539 project_root=temp_dir,
540 )
541 runtime = ConversationRuntime(agent)
542 events = []
543
544 async def capture(event) -> None:
545 events.append(event)
546
547 prepared = await runtime.turn_preparation.prepare(
548 task=(
549 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
550 "with an index and chapter files."
551 ),
552 emit=capture,
553 requested_mode="execute",
554 original_task=None,
555 on_user_question=None,
556 )
557 await runtime.phase_tracker.enter(
558 TurnPhase.ASSISTANT,
559 capture,
560 detail="Requesting assistant response",
561 reason_code="request_assistant_response",
562 )
563
564 implementation_plan = temp_dir / "implementation.md"
565 implementation_plan.write_text(
566 "# Implementation Plan\n\n"
567 "## File Changes\n\n"
568 "1. Create main index.html file:\n"
569 f" - `{temp_dir / 'index.html'}`\n\n"
570 "2. Create chapter files:\n"
571 f" - `{temp_dir / 'chapters' / '01-introduction.html'}`\n"
572 f" - `{temp_dir / 'chapters' / '02-installation.html'}`\n"
573 )
574 chapters_dir = temp_dir / "chapters"
575 chapters_dir.mkdir()
576 (temp_dir / "index.html").write_text("<h1>NGINX Guide</h1>\n")
577 (chapters_dir / "01-introduction.html").write_text("<h1>Intro</h1>\n")
578
579 prepared.definition_of_done.implementation_plan = str(implementation_plan)
580 prepared.definition_of_done.mutating_actions.append("write")
581 prepared.definition_of_done.touched_files.extend(
582 [
583 str(temp_dir / "index.html"),
584 str(chapters_dir / "01-introduction.html"),
585 ]
586 )
587 prepared.definition_of_done.pending_items.append("Create chapter files for nginx guide")
588
589 content = "Now I'll create the second chapter file for the nginx guide."
590 decision = await runtime.turn_completion.handle_text_response(
591 content=content,
592 response_content=content,
593 task=prepared.task,
594 effective_task=prepared.effective_task,
595 iterations=1,
596 max_iterations=agent.config.max_iterations,
597 actions_taken=[],
598 continuation_count=1,
599 dod=prepared.definition_of_done,
600 emit=capture,
601 summary=prepared.summary,
602 executor=prepared.executor,
603 rollback_plan=prepared.rollback_plan,
604 )
605
606 assert decision.action == TurnCompletionAction.CONTINUE
607 assert decision.continuation_count == 2
608 assert agent.session.messages[-1].role.value == "user"
609 assert agent.session.messages[-1].content.startswith("[CONTINUE CURRENT STEP]")
610 assert "02-installation.html" in agent.session.messages[-1].content
611
612
613 @pytest.mark.asyncio
614 async def test_turn_completion_prioritizes_missing_artifact_continuation_over_text_loop(
615 temp_dir: Path,
616 ) -> None:
617 backend = ScriptedBackend()
618 config = non_streaming_config()
619 config.reasoning.completion_check = False
620 agent = Agent(
621 backend=backend,
622 config=config,
623 project_root=temp_dir,
624 )
625 runtime = ConversationRuntime(agent)
626 events = []
627
628 async def capture(event) -> None:
629 events.append(event)
630
631 prepared = await runtime.turn_preparation.prepare(
632 task=(
633 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
634 "with an index and chapter files."
635 ),
636 emit=capture,
637 requested_mode="execute",
638 original_task=None,
639 on_user_question=None,
640 )
641 await runtime.phase_tracker.enter(
642 TurnPhase.ASSISTANT,
643 capture,
644 detail="Requesting assistant response",
645 reason_code="request_assistant_response",
646 )
647
648 implementation_plan = temp_dir / "implementation.md"
649 implementation_plan.write_text(
650 "# Implementation Plan\n\n"
651 "## File Changes\n\n"
652 "1. Create main index.html file:\n"
653 f" - `{temp_dir / 'index.html'}`\n\n"
654 "2. Create chapter files:\n"
655 f" - `{temp_dir / 'chapters' / '01-introduction.html'}`\n"
656 f" - `{temp_dir / 'chapters' / '02-installation.html'}`\n"
657 )
658 chapters_dir = temp_dir / "chapters"
659 chapters_dir.mkdir()
660 (temp_dir / "index.html").write_text("<h1>NGINX Guide</h1>\n")
661 (chapters_dir / "01-introduction.html").write_text("<h1>Intro</h1>\n")
662
663 prepared.definition_of_done.implementation_plan = str(implementation_plan)
664 prepared.definition_of_done.mutating_actions.append("write")
665 prepared.definition_of_done.touched_files.extend(
666 [
667 str(temp_dir / "index.html"),
668 str(chapters_dir / "01-introduction.html"),
669 ]
670 )
671 prepared.definition_of_done.pending_items.append("Create chapter files for nginx guide")
672
673 content = "Let me continue creating the remaining chapter files for the nginx guide:"
674 runtime.context.safeguards.record_response(content)
675 runtime.context.safeguards.record_response(content)
676
677 decision = await runtime.turn_completion.handle_text_response(
678 content=content,
679 response_content=content,
680 task=prepared.task,
681 effective_task=prepared.effective_task,
682 iterations=1,
683 max_iterations=agent.config.max_iterations,
684 actions_taken=[],
685 continuation_count=2,
686 dod=prepared.definition_of_done,
687 emit=capture,
688 summary=prepared.summary,
689 executor=prepared.executor,
690 rollback_plan=prepared.rollback_plan,
691 )
692
693 assert decision.action == TurnCompletionAction.CONTINUE
694 assert prepared.summary.completion_decision_code == "in_progress_transition_continue"
695 assert agent.session.messages[-1].role.value == "user"
696 assert agent.session.messages[-1].content.startswith("[CONTINUE CURRENT STEP]")
697 assert "02-installation.html" in agent.session.messages[-1].content
698 assert not prepared.summary.final_response
699 assert not any(event.type == "error" and "Text loop detected" in event.content for event in events)
700
701
702 @pytest.mark.asyncio
703 async def test_turn_completion_interrupts_first_narration_after_concrete_target_prompt(
704 temp_dir: Path,
705 ) -> None:
706 backend = ScriptedBackend()
707 config = non_streaming_config()
708 config.reasoning.completion_check = False
709 agent = Agent(
710 backend=backend,
711 config=config,
712 project_root=temp_dir,
713 )
714 runtime = ConversationRuntime(agent)
715 events = []
716
717 async def capture(event) -> None:
718 events.append(event)
719
720 prepared = await runtime.turn_preparation.prepare(
721 task=(
722 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
723 "with an index and chapter files."
724 ),
725 emit=capture,
726 requested_mode="execute",
727 original_task=None,
728 on_user_question=None,
729 )
730 await runtime.phase_tracker.enter(
731 TurnPhase.ASSISTANT,
732 capture,
733 detail="Requesting assistant response",
734 reason_code="request_assistant_response",
735 )
736
737 implementation_plan = temp_dir / "implementation.md"
738 implementation_plan.write_text(
739 "# Implementation Plan\n\n"
740 "## File Changes\n\n"
741 f"- `{temp_dir / 'index.html'}`\n"
742 f"- `{temp_dir / 'chapters' / '01-introduction.html'}`\n"
743 )
744 chapters_dir = temp_dir / "chapters"
745 chapters_dir.mkdir()
746
747 prepared.definition_of_done.implementation_plan = str(implementation_plan)
748 prepared.definition_of_done.pending_items.append(
749 "Develop the main index.html file for nginx guide"
750 )
751
752 agent.session.append(
753 Message(
754 role=Role.USER,
755 content=(
756 "[USER INTERRUPTION]: Directory setup is complete. Continue with the next pending item: "
757 "`Develop the main index.html file for nginx guide`. Resume by creating `index.html` now. "
758 f"Prefer one `write` call for `{(temp_dir / 'index.html').resolve(strict=False)}` instead of more rereads. "
759 "Make your next response the concrete mutation tool call itself, not another bookkeeping-only turn."
760 ),
761 )
762 )
763
764 content = "Now I'll create the main index.html file for the nginx guide."
765 decision = await runtime.turn_completion.handle_text_response(
766 content=content,
767 response_content=content,
768 task=prepared.task,
769 effective_task=prepared.effective_task,
770 iterations=1,
771 max_iterations=agent.config.max_iterations,
772 actions_taken=[],
773 continuation_count=0,
774 dod=prepared.definition_of_done,
775 emit=capture,
776 summary=prepared.summary,
777 executor=prepared.executor,
778 rollback_plan=prepared.rollback_plan,
779 )
780
781 assert decision.action == TurnCompletionAction.CONTINUE
782 assert decision.continuation_count == 1
783 assert prepared.summary.assistant_messages[-1].content == content
784 assert agent.session.messages[-1].role.value == "user"
785 assert agent.session.messages[-1].content.startswith("[CONTINUE CURRENT STEP]")
786 assert "index.html" in agent.session.messages[-1].content
787
788
789 @pytest.mark.asyncio
790 async def test_turn_completion_first_chapter_continuation_allows_compact_initial_version(
791 temp_dir: Path,
792 ) -> None:
793 backend = ScriptedBackend()
794 config = non_streaming_config()
795 config.reasoning.completion_check = False
796 agent = Agent(
797 backend=backend,
798 config=config,
799 project_root=temp_dir,
800 )
801 runtime = ConversationRuntime(agent)
802 events = []
803
804 async def capture(event) -> None:
805 events.append(event)
806
807 prepared = await runtime.turn_preparation.prepare(
808 task=(
809 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
810 "with an index and chapter files."
811 ),
812 emit=capture,
813 requested_mode="execute",
814 original_task=None,
815 on_user_question=None,
816 )
817 await runtime.phase_tracker.enter(
818 TurnPhase.ASSISTANT,
819 capture,
820 detail="Requesting assistant response",
821 reason_code="request_assistant_response",
822 )
823
824 chapters_dir = temp_dir / "chapters"
825 chapters_dir.mkdir()
826 index_path = temp_dir / "index.html"
827 index_path.write_text("<html></html>\n")
828
829 implementation_plan = temp_dir / "implementation.md"
830 implementation_plan.write_text(
831 "# Implementation Plan\n\n"
832 "## File Changes\n\n"
833 f"- `{index_path}`\n"
834 f"- `{chapters_dir / '01-introduction.html'}`\n"
835 )
836
837 prepared.definition_of_done.implementation_plan = str(implementation_plan)
838 prepared.definition_of_done.touched_files.append(str(index_path))
839 prepared.definition_of_done.pending_items.append("Create chapter files for nginx guide")
840
841 content = "Now I'll create the first chapter of the nginx guide."
842 decision = await runtime.turn_completion.handle_text_response(
843 content=content,
844 response_content=content,
845 task=prepared.task,
846 effective_task=prepared.effective_task,
847 iterations=1,
848 max_iterations=agent.config.max_iterations,
849 actions_taken=[],
850 continuation_count=1,
851 dod=prepared.definition_of_done,
852 emit=capture,
853 summary=prepared.summary,
854 executor=prepared.executor,
855 rollback_plan=prepared.rollback_plan,
856 )
857
858 assert decision.action == TurnCompletionAction.CONTINUE
859 assert decision.continuation_count == 2
860 assert agent.session.messages[-1].role.value == "user"
861 assert agent.session.messages[-1].content.startswith("[CONTINUE CURRENT STEP]")
862 assert "01-introduction.html" in agent.session.messages[-1].content
863 assert "write a compact but real initial version of that file now" in agent.session.messages[-1].content.lower()
864
865
866 @pytest.mark.asyncio
867 async def test_turn_completion_interrupts_first_chapter_narration_from_declared_index_graph(
868 temp_dir: Path,
869 ) -> None:
870 backend = ScriptedBackend()
871 config = non_streaming_config()
872 config.reasoning.completion_check = False
873 agent = Agent(
874 backend=backend,
875 config=config,
876 project_root=temp_dir,
877 )
878 runtime = ConversationRuntime(agent)
879 events = []
880
881 async def capture(event) -> None:
882 events.append(event)
883
884 prepared = await runtime.turn_preparation.prepare(
885 task=(
886 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
887 "with an index and chapter files."
888 ),
889 emit=capture,
890 requested_mode="execute",
891 original_task=None,
892 on_user_question=None,
893 )
894 await runtime.phase_tracker.enter(
895 TurnPhase.ASSISTANT,
896 capture,
897 detail="Requesting assistant response",
898 reason_code="request_assistant_response",
899 )
900
901 guide_root = temp_dir / "Loader" / "guides" / "nginx"
902 chapters_dir = guide_root / "chapters"
903 chapters_dir.mkdir(parents=True)
904 index_path = guide_root / "index.html"
905 index_path.write_text(
906 "\n".join(
907 [
908 "<!DOCTYPE html>",
909 '<a href="chapters/01-introduction.html">Chapter 1: Introduction to Nginx</a>',
910 '<a href="chapters/02-installation.html">Chapter 2: Installation and Setup</a>',
911 "",
912 ]
913 )
914 )
915
916 implementation_plan = temp_dir / "implementation.md"
917 implementation_plan.write_text(
918 "# Implementation Plan\n\n"
919 "## File Changes\n\n"
920 f"- `{index_path}`\n"
921 f"- `{chapters_dir}/`\n"
922 )
923
924 prepared.definition_of_done.implementation_plan = str(implementation_plan)
925 prepared.definition_of_done.touched_files.append(str(index_path))
926 prepared.definition_of_done.mutating_actions.append("write")
927 prepared.definition_of_done.pending_items.append(
928 "Develop the nginx guide content following the same structure and cadence as the fortran guide"
929 )
930
931 content = "Now I'll create the first chapter of the nginx guide."
932 decision = await runtime.turn_completion.handle_text_response(
933 content=content,
934 response_content=content,
935 task=prepared.task,
936 effective_task=prepared.effective_task,
937 iterations=1,
938 max_iterations=agent.config.max_iterations,
939 actions_taken=[],
940 continuation_count=0,
941 dod=prepared.definition_of_done,
942 emit=capture,
943 summary=prepared.summary,
944 executor=prepared.executor,
945 rollback_plan=prepared.rollback_plan,
946 )
947
948 assert decision.action == TurnCompletionAction.CONTINUE
949 assert decision.continuation_count == 1
950 assert prepared.summary.completion_decision_code == "in_progress_transition_continue"
951 assert agent.session.messages[-1].role.value == "user"
952 assert agent.session.messages[-1].content.startswith("[CONTINUE CURRENT STEP]")
953 assert "01-introduction.html" in agent.session.messages[-1].content
954
955
956 @pytest.mark.asyncio
957 async def test_turn_completion_handles_fake_tool_narration_without_reroute(
958 temp_dir: Path,
959 ) -> None:
960 backend = ScriptedBackend()
961 config = non_streaming_config()
962 config.reasoning.completion_check = False
963 agent = Agent(
964 backend=backend,
965 config=config,
966 project_root=temp_dir,
967 )
968 runtime = ConversationRuntime(agent)
969 events = []
970
971 async def capture(event) -> None:
972 events.append(event)
973
974 prepared = await runtime.turn_preparation.prepare(
975 task="Summarize the current test status.",
976 emit=capture,
977 requested_mode="execute",
978 original_task=None,
979 on_user_question=None,
980 )
981 await runtime.phase_tracker.enter(
982 TurnPhase.ASSISTANT,
983 capture,
984 detail="Requesting assistant response",
985 reason_code="request_assistant_response",
986 )
987
988 narrated = "Used bash tool with command `pytest -q` and everything passed."
989 decision = await runtime.turn_completion.handle_text_response(
990 content=narrated,
991 response_content=narrated,
992 task=prepared.task,
993 effective_task=prepared.effective_task,
994 iterations=1,
995 max_iterations=agent.config.max_iterations,
996 actions_taken=[],
997 continuation_count=0,
998 dod=prepared.definition_of_done,
999 emit=capture,
1000 summary=prepared.summary,
1001 executor=prepared.executor,
1002 rollback_plan=prepared.rollback_plan,
1003 )
1004
1005 assert decision.action == TurnCompletionAction.COMPLETE
1006 assert prepared.summary.final_response == narrated
1007 assert prepared.summary.completion_decision_code == "non_mutating_response_accepted"
1008 assert prepared.summary.completion_trace[-1].decision_code == (
1009 "non_mutating_response_accepted"
1010 )
1011 assert not any(
1012 "PRETENDING to use tools" in message.content
1013 for message in agent.session.messages
1014 )
1015 assert any(event.type == "response" and event.content == narrated for event in events)
1016
1017
1018 @pytest.mark.asyncio
1019 async def test_turn_completion_handles_deflection_text_without_repair_prompt(
1020 temp_dir: Path,
1021 ) -> None:
1022 backend = ScriptedBackend()
1023 config = non_streaming_config()
1024 config.reasoning.completion_check = False
1025 agent = Agent(
1026 backend=backend,
1027 config=config,
1028 project_root=temp_dir,
1029 )
1030 runtime = ConversationRuntime(agent)
1031 events = []
1032
1033 async def capture(event) -> None:
1034 events.append(event)
1035
1036 prepared = await runtime.turn_preparation.prepare(
1037 task="What should I verify next?",
1038 emit=capture,
1039 requested_mode="execute",
1040 original_task=None,
1041 on_user_question=None,
1042 )
1043 await runtime.phase_tracker.enter(
1044 TurnPhase.ASSISTANT,
1045 capture,
1046 detail="Requesting assistant response",
1047 reason_code="request_assistant_response",
1048 )
1049
1050 deflection = "You can run pytest -q to verify the current state."
1051 decision = await runtime.turn_completion.handle_text_response(
1052 content=deflection,
1053 response_content=deflection,
1054 task=prepared.task,
1055 effective_task=prepared.effective_task,
1056 iterations=1,
1057 max_iterations=agent.config.max_iterations,
1058 actions_taken=[],
1059 continuation_count=0,
1060 dod=prepared.definition_of_done,
1061 emit=capture,
1062 summary=prepared.summary,
1063 executor=prepared.executor,
1064 rollback_plan=prepared.rollback_plan,
1065 )
1066
1067 assert decision.action == TurnCompletionAction.COMPLETE
1068 assert prepared.summary.final_response == deflection
1069 assert prepared.summary.completion_decision_code == "non_mutating_response_accepted"
1070 assert prepared.summary.completion_trace[-1].decision_code == (
1071 "non_mutating_response_accepted"
1072 )
1073 assert not any(
1074 "Please use your tools to execute the task" in message.content
1075 for message in agent.session.messages
1076 )
1077 assert any(event.type == "response" and event.content == deflection for event in events)
1078
1079
1080 @pytest.mark.asyncio
1081 async def test_turn_completion_skips_self_critique_reroute(
1082 temp_dir: Path,
1083 ) -> None:
1084 backend = ScriptedBackend()
1085 config = non_streaming_config()
1086 config.reasoning.completion_check = False
1087 config.reasoning.self_critique = True
1088 agent = Agent(
1089 backend=backend,
1090 config=config,
1091 project_root=temp_dir,
1092 )
1093 runtime = ConversationRuntime(agent)
1094 events = []
1095
1096 async def capture(event) -> None:
1097 events.append(event)
1098
1099 prepared = await runtime.turn_preparation.prepare(
1100 task="Explain Loader's clarify loop.",
1101 emit=capture,
1102 requested_mode="execute",
1103 original_task=None,
1104 on_user_question=None,
1105 )
1106 await runtime.phase_tracker.enter(
1107 TurnPhase.ASSISTANT,
1108 capture,
1109 detail="Requesting assistant response",
1110 reason_code="request_assistant_response",
1111 )
1112
1113 detailed = (
1114 "Loader might begin with a bounded clarify pass, perhaps asking follow-up "
1115 "questions when the task leaves touchpoints or decision boundaries unclear. "
1116 "It then shifts into execution once the workflow policy is satisfied."
1117 )
1118 decision = await runtime.turn_completion.handle_text_response(
1119 content=detailed,
1120 response_content=detailed,
1121 task=prepared.task,
1122 effective_task=prepared.effective_task,
1123 iterations=1,
1124 max_iterations=agent.config.max_iterations,
1125 actions_taken=[],
1126 continuation_count=0,
1127 dod=prepared.definition_of_done,
1128 emit=capture,
1129 summary=prepared.summary,
1130 executor=prepared.executor,
1131 rollback_plan=prepared.rollback_plan,
1132 )
1133
1134 assert decision.action == TurnCompletionAction.COMPLETE
1135 assert prepared.summary.final_response == detailed
1136 assert prepared.summary.completion_decision_code == "non_mutating_response_accepted"
1137 assert prepared.summary.completion_trace[-1].decision_code == (
1138 "non_mutating_response_accepted"
1139 )
1140 assert not any("[SELF-CRITIQUE]" in message.content for message in agent.session.messages)
1141 assert not any(event.type == "critique" for event in events)
1142
1143
1144 @pytest.mark.asyncio
1145 async def test_turn_completion_finalizes_when_follow_through_budget_is_exhausted(
1146 temp_dir: Path,
1147 ) -> None:
1148 backend = ScriptedBackend()
1149 agent = Agent(
1150 backend=backend,
1151 config=non_streaming_config(),
1152 project_root=temp_dir,
1153 )
1154 runtime = ConversationRuntime(agent)
1155 events = []
1156
1157 async def capture(event) -> None:
1158 events.append(event)
1159
1160 prepared = await runtime.turn_preparation.prepare(
1161 task="Fix the README heading.",
1162 emit=capture,
1163 requested_mode="execute",
1164 original_task=None,
1165 on_user_question=None,
1166 )
1167 await runtime.phase_tracker.enter(
1168 TurnPhase.ASSISTANT,
1169 capture,
1170 detail="Requesting assistant response",
1171 reason_code="request_assistant_response",
1172 )
1173
1174 decision = await runtime.turn_completion.handle_text_response(
1175 content="I looked into it.",
1176 response_content="I looked into it.",
1177 task=prepared.task,
1178 effective_task=prepared.effective_task,
1179 iterations=1,
1180 max_iterations=agent.config.max_iterations,
1181 actions_taken=[],
1182 continuation_count=agent.config.reasoning.max_continuation_prompts,
1183 dod=prepared.definition_of_done,
1184 emit=capture,
1185 summary=prepared.summary,
1186 executor=prepared.executor,
1187 rollback_plan=prepared.rollback_plan,
1188 )
1189
1190 assert decision.action == TurnCompletionAction.FINALIZE
1191 assert decision.finalize_reason_code == "continuation_budget_exhausted"
1192 assert prepared.summary.final_response.startswith(
1193 "I stopped because I still could not show enough evidence"
1194 )
1195 assert prepared.summary.completion_decision_code == "continuation_budget_exhausted"
1196 assert prepared.summary.failures == [
1197 "missing follow-through evidence after continuation budget exhaustion"
1198 ]
1199 assert prepared.summary.completion_trace[-1].outcome == "finalize"
1200 assert prepared.summary.completion_trace[-1].decision_code == (
1201 "continuation_budget_exhausted"
1202 )
1203 assert prepared.summary.completion_trace[-1].evidence_summary == [
1204 "showing the requested work was actually carried out"
1205 ]
1206 assert [item.status for item in prepared.summary.completion_trace[-1].evidence_provenance] == [
1207 "missing"
1208 ]
1209 assert prepared.summary.workflow_timeline[-1].kind == "completion_finalize"
1210 assert prepared.summary.workflow_timeline[-1].evidence_summary == [
1211 "showing the requested work was actually carried out"
1212 ]
1213 assert [event.type for event in events[-3:]] == [
1214 "completion_check",
1215 "error",
1216 "response",
1217 ]
1218
1219
1220 @pytest.mark.asyncio
1221 async def test_turn_completion_uses_observed_verification_for_budget_exhaustion(
1222 temp_dir: Path,
1223 ) -> None:
1224 backend = ScriptedBackend()
1225 agent = Agent(
1226 backend=backend,
1227 config=non_streaming_config(),
1228 project_root=temp_dir,
1229 )
1230 runtime = ConversationRuntime(agent)
1231 events = []
1232
1233 async def capture(event) -> None:
1234 events.append(event)
1235
1236 prepared = await runtime.turn_preparation.prepare(
1237 task="Run pytest -q and make sure it works.",
1238 emit=capture,
1239 requested_mode="execute",
1240 original_task=None,
1241 on_user_question=None,
1242 )
1243 prepared.definition_of_done.verification_commands = ["pytest -q"]
1244 prepared.definition_of_done.evidence = [
1245 VerificationEvidence(
1246 command="pytest -q",
1247 passed=False,
1248 stderr="1 failed",
1249 kind="test",
1250 )
1251 ]
1252 prepared.definition_of_done.last_verification_result = "failed"
1253 await runtime.phase_tracker.enter(
1254 TurnPhase.ASSISTANT,
1255 capture,
1256 detail="Requesting assistant response",
1257 reason_code="request_assistant_response",
1258 )
1259
1260 decision = await runtime.turn_completion.handle_text_response(
1261 content="The tests are done.",
1262 response_content="The tests are done.",
1263 task=prepared.task,
1264 effective_task=prepared.effective_task,
1265 iterations=1,
1266 max_iterations=agent.config.max_iterations,
1267 actions_taken=[],
1268 continuation_count=agent.config.reasoning.max_continuation_prompts,
1269 dod=prepared.definition_of_done,
1270 emit=capture,
1271 summary=prepared.summary,
1272 executor=prepared.executor,
1273 rollback_plan=prepared.rollback_plan,
1274 )
1275
1276 assert decision.action == TurnCompletionAction.FINALIZE
1277 assert decision.finalize_reason_code == "continuation_budget_exhausted"
1278 assert prepared.summary.final_response == (
1279 "I stopped because the continuation budget was exhausted and observed "
1280 "verification still showed: verification failed for `pytest -q` [1 failed]."
1281 )
1282 assert prepared.summary.completion_trace[-1].decision_code == (
1283 "continuation_budget_exhausted"
1284 )
1285 assert [
1286 item.status
1287 for item in prepared.summary.completion_trace[-1].verification_observations
1288 ] == [VerificationObservationStatus.FAILED.value]
1289 assert [
1290 item.summary
1291 for item in prepared.summary.completion_trace[-1].verification_observations
1292 ] == ["verification failed for `pytest -q`"]
1293 assert [
1294 item.status
1295 for item in prepared.summary.workflow_timeline[-1].verification_observations
1296 ] == [VerificationObservationStatus.FAILED.value]