Python · 42687 bytes Raw Blame History
1 """Tests for no-tool text completion orchestration."""
2
3 from __future__ import annotations
4
5 from pathlib import Path
6
7 import pytest
8
9 from loader.agent.loop import Agent, AgentConfig
10 from loader.llm.base import Message, Role
11 from loader.runtime.conversation import ConversationRuntime
12 from loader.runtime.dod import VerificationEvidence
13 from loader.runtime.phases import TurnPhase
14 from loader.runtime.turn_completion import TurnCompletionAction
15 from loader.runtime.verification_observations import VerificationObservationStatus
16 from tests.helpers.runtime_harness import ScriptedBackend
17
18
19 def non_streaming_config() -> AgentConfig:
20 """Shared config for direct turn-completion tests."""
21
22 return AgentConfig(auto_context=False, stream=False, max_iterations=8)
23
24
25 @pytest.mark.asyncio
26 async def test_turn_completion_requests_continuation_for_premature_text_response(
27 temp_dir: Path,
28 ) -> None:
29 backend = ScriptedBackend()
30 agent = Agent(
31 backend=backend,
32 config=non_streaming_config(),
33 project_root=temp_dir,
34 )
35 runtime = ConversationRuntime(agent)
36 events = []
37
38 async def capture(event) -> None:
39 events.append(event)
40
41 prepared = await runtime.turn_preparation.prepare(
42 task="Fix the README heading.",
43 emit=capture,
44 requested_mode="execute",
45 original_task=None,
46 on_user_question=None,
47 )
48 await runtime.phase_tracker.enter(
49 TurnPhase.ASSISTANT,
50 capture,
51 detail="Requesting assistant response",
52 reason_code="request_assistant_response",
53 )
54
55 decision = await runtime.turn_completion.handle_text_response(
56 content="I looked into it.",
57 response_content="I looked into it.",
58 task=prepared.task,
59 effective_task=prepared.effective_task,
60 iterations=1,
61 max_iterations=agent.config.max_iterations,
62 actions_taken=[],
63 continuation_count=0,
64 dod=prepared.definition_of_done,
65 emit=capture,
66 summary=prepared.summary,
67 executor=prepared.executor,
68 rollback_plan=prepared.rollback_plan,
69 )
70
71 assert decision.action == TurnCompletionAction.CONTINUE
72 assert decision.continuation_count == 1
73 assert prepared.summary.completion_decision_code == "premature_completion_nudge"
74 assert prepared.summary.completion_decision_summary == (
75 "requested one continuation because the non-mutating response looked incomplete"
76 )
77 assert agent.session.last_completion_decision_code == "premature_completion_nudge"
78 assert [
79 entry.decision_code for entry in prepared.summary.completion_trace
80 ] == ["premature_completion_nudge"]
81 assert prepared.summary.completion_trace[0].stage == "continuation_check"
82 assert [entry.kind for entry in prepared.summary.workflow_timeline[-1:]] == [
83 "completion_continue"
84 ]
85 assert prepared.summary.workflow_timeline[-1].policy_stage == "continuation_check"
86 assert prepared.summary.workflow_timeline[-1].policy_outcome == "continue"
87 assert agent.session.messages[-1].role.value == "user"
88 assert "concrete evidence" in agent.session.messages[-1].content
89 assert "Carry out the requested change or command now" in agent.session.messages[-1].content
90 assert any(event.type == "completion_check" for event in events)
91
92
93 @pytest.mark.asyncio
94 async def test_turn_completion_marks_non_mutating_response_done(
95 temp_dir: Path,
96 ) -> None:
97 backend = ScriptedBackend()
98 agent = Agent(
99 backend=backend,
100 config=non_streaming_config(),
101 project_root=temp_dir,
102 )
103 runtime = ConversationRuntime(agent)
104 events = []
105
106 async def capture(event) -> None:
107 events.append(event)
108
109 prepared = await runtime.turn_preparation.prepare(
110 task="Explain Loader's clarify loop.",
111 emit=capture,
112 requested_mode="execute",
113 original_task=None,
114 on_user_question=None,
115 )
116 await runtime.phase_tracker.enter(
117 TurnPhase.ASSISTANT,
118 capture,
119 detail="Requesting assistant response",
120 reason_code="request_assistant_response",
121 )
122
123 decision = await runtime.turn_completion.handle_text_response(
124 content="Loader uses a bounded clarify loop before execution.",
125 response_content="Loader uses a bounded clarify loop before execution.",
126 task=prepared.task,
127 effective_task=prepared.effective_task,
128 iterations=1,
129 max_iterations=agent.config.max_iterations,
130 actions_taken=[],
131 continuation_count=0,
132 dod=prepared.definition_of_done,
133 emit=capture,
134 summary=prepared.summary,
135 executor=prepared.executor,
136 rollback_plan=prepared.rollback_plan,
137 )
138
139 assert decision.action == TurnCompletionAction.COMPLETE
140 assert prepared.summary.final_response == (
141 "Loader uses a bounded clarify loop before execution."
142 )
143 assert prepared.summary.completion_decision_code == "non_mutating_response_accepted"
144 assert prepared.summary.completion_decision_summary == (
145 "accepted the response because no mutating work required verification"
146 )
147 assert agent.session.last_completion_decision_code == (
148 "non_mutating_response_accepted"
149 )
150 assert [
151 entry.decision_code for entry in prepared.summary.completion_trace
152 ] == [
153 "completion_response_accepted",
154 "non_mutating_response_accepted",
155 ]
156 policy_entries = [
157 entry
158 for entry in prepared.summary.workflow_timeline
159 if entry.kind.startswith("completion_")
160 ]
161 assert [entry.kind for entry in policy_entries] == [
162 "completion_check",
163 "completion_complete",
164 ]
165 assert policy_entries[0].policy_stage == "continuation_check"
166 assert policy_entries[-1].policy_stage == "definition_of_done"
167 assert [item.summary for item in prepared.summary.completion_trace[-1].evidence_provenance] == [
168 "verification was skipped because no mutating work required checks"
169 ]
170 assert [
171 item.status
172 for item in prepared.summary.completion_trace[-1].verification_observations
173 ] == [VerificationObservationStatus.SKIPPED.value]
174 assert [
175 item.summary
176 for item in prepared.summary.completion_trace[-1].verification_observations
177 ] == ["verification was skipped because no mutating work required checks"]
178 assert [item.status for item in policy_entries[-1].verification_observations] == [
179 VerificationObservationStatus.SKIPPED.value
180 ]
181 assert prepared.definition_of_done.status == "done"
182 assert prepared.definition_of_done.last_verification_result == "skipped"
183 assert any(event.type == "response" for event in events)
184 assert any(
185 event.type == "dod_status" and event.dod_status == "done"
186 for event in events
187 )
188
189
190 @pytest.mark.asyncio
191 async def test_turn_completion_blocks_false_completion_without_preserving_it(
192 temp_dir: Path,
193 ) -> None:
194 backend = ScriptedBackend()
195 agent = Agent(
196 backend=backend,
197 config=non_streaming_config(),
198 project_root=temp_dir,
199 )
200 runtime = ConversationRuntime(agent)
201 events = []
202
203 async def capture(event) -> None:
204 events.append(event)
205
206 prepared = await runtime.turn_preparation.prepare(
207 task=(
208 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
209 "with an index and chapter files."
210 ),
211 emit=capture,
212 requested_mode="execute",
213 original_task=None,
214 on_user_question=None,
215 )
216 await runtime.phase_tracker.enter(
217 TurnPhase.ASSISTANT,
218 capture,
219 detail="Requesting assistant response",
220 reason_code="request_assistant_response",
221 )
222
223 implementation_plan = temp_dir / "implementation.md"
224 implementation_plan.write_text(
225 "# Implementation Plan\n\n"
226 "## File Changes\n\n"
227 "1. Create main index.html file:\n"
228 " - `index.html`\n\n"
229 "2. Create chapter files:\n"
230 " - `chapters/01-getting-started.html`\n"
231 " - `chapters/06-troubleshooting.html`\n"
232 )
233 chapters_dir = temp_dir / "chapters"
234 chapters_dir.mkdir()
235 (chapters_dir / "01-getting-started.html").write_text("<h1>Getting Started</h1>\n")
236 (temp_dir / "index.html").write_text("<h1>NGINX Guide</h1>\n")
237
238 prepared.definition_of_done.implementation_plan = str(implementation_plan)
239 prepared.definition_of_done.mutating_actions.append("write")
240 prepared.definition_of_done.touched_files.extend(
241 [
242 str(temp_dir / "index.html"),
243 str(chapters_dir / "01-getting-started.html"),
244 ]
245 )
246
247 queued_messages: list[str] = []
248 runtime.context.queue_steering_message_callback = queued_messages.append
249
250 completion_claim = (
251 "I've successfully completed the NGINX guide with all planned files "
252 "and verified everything is done."
253 )
254 decision = await runtime.turn_completion.handle_text_response(
255 content=completion_claim,
256 response_content=completion_claim,
257 task=prepared.task,
258 effective_task=prepared.effective_task,
259 iterations=1,
260 max_iterations=agent.config.max_iterations,
261 actions_taken=[],
262 continuation_count=0,
263 dod=prepared.definition_of_done,
264 emit=capture,
265 summary=prepared.summary,
266 executor=prepared.executor,
267 rollback_plan=prepared.rollback_plan,
268 )
269
270 assert decision.action == TurnCompletionAction.CONTINUE
271 assert prepared.summary.assistant_messages == []
272 assert not any(
273 message.role.value == "assistant" and message.content == completion_claim
274 for message in agent.session.messages
275 )
276 assert agent.session.messages[-1].role.value == "user"
277 assert agent.session.messages[-1].content.startswith(
278 "[PLANNED ARTIFACTS STILL MISSING]"
279 )
280 assert "`06-troubleshooting.html`" in agent.session.messages[-1].content
281 assert queued_messages
282 assert "06-troubleshooting.html" in queued_messages[-1]
283 assert "Do not summarize, mark completion, or write bookkeeping notes yet" in queued_messages[-1]
284 assert not any(event.type == "response" for event in events)
285
286
287 @pytest.mark.asyncio
288 async def test_turn_completion_interrupts_progress_intent_once_output_files_exist(
289 temp_dir: Path,
290 ) -> None:
291 backend = ScriptedBackend()
292 agent = Agent(
293 backend=backend,
294 config=non_streaming_config(),
295 project_root=temp_dir,
296 )
297 runtime = ConversationRuntime(agent)
298 events = []
299
300 async def capture(event) -> None:
301 events.append(event)
302
303 prepared = await runtime.turn_preparation.prepare(
304 task=(
305 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
306 "with an index and chapter files."
307 ),
308 emit=capture,
309 requested_mode="execute",
310 original_task=None,
311 on_user_question=None,
312 )
313 await runtime.phase_tracker.enter(
314 TurnPhase.ASSISTANT,
315 capture,
316 detail="Requesting assistant response",
317 reason_code="request_assistant_response",
318 )
319
320 implementation_plan = temp_dir / "implementation.md"
321 implementation_plan.write_text(
322 "# Implementation Plan\n\n"
323 "## File Changes\n\n"
324 "1. Create main index.html file:\n"
325 f" - `{temp_dir / 'index.html'}`\n\n"
326 "2. Create chapter files:\n"
327 f" - `{temp_dir / 'chapters' / '01-introduction.html'}`\n"
328 f" - `{temp_dir / 'chapters' / '02-installation.html'}`\n"
329 )
330 chapters_dir = temp_dir / "chapters"
331 chapters_dir.mkdir()
332 (temp_dir / "index.html").write_text("<h1>NGINX Guide</h1>\n")
333 (chapters_dir / "01-introduction.html").write_text("<h1>Intro</h1>\n")
334
335 prepared.definition_of_done.implementation_plan = str(implementation_plan)
336 prepared.definition_of_done.mutating_actions.append("write")
337 prepared.definition_of_done.touched_files.extend(
338 [
339 str(temp_dir / "index.html"),
340 str(chapters_dir / "01-introduction.html"),
341 ]
342 )
343 prepared.definition_of_done.pending_items.append("Create chapter files for nginx guide")
344
345 content = "Now I'll create the second chapter file for the nginx guide."
346 decision = await runtime.turn_completion.handle_text_response(
347 content=content,
348 response_content=content,
349 task=prepared.task,
350 effective_task=prepared.effective_task,
351 iterations=1,
352 max_iterations=agent.config.max_iterations,
353 actions_taken=[],
354 continuation_count=0,
355 dod=prepared.definition_of_done,
356 emit=capture,
357 summary=prepared.summary,
358 executor=prepared.executor,
359 rollback_plan=prepared.rollback_plan,
360 )
361
362 assert decision.action == TurnCompletionAction.CONTINUE
363 assert decision.continuation_count == 1
364 assert prepared.summary.completion_decision_code == "in_progress_transition_continue"
365 assert prepared.summary.assistant_messages[-1].content == content
366 assert agent.session.messages[-1].role.value == "user"
367 assert agent.session.messages[-1].content.startswith("[CONTINUE CURRENT STEP]")
368 assert "02-installation.html" in agent.session.messages[-1].content
369 assert not any(
370 message.role.value == "user"
371 and message.content.startswith("[PLANNED ARTIFACTS STILL MISSING]")
372 for message in agent.session.messages
373 )
374
375
376 @pytest.mark.asyncio
377 async def test_turn_completion_allows_first_progress_narration_before_any_output_exists(
378 temp_dir: Path,
379 ) -> None:
380 backend = ScriptedBackend()
381 config = non_streaming_config()
382 config.reasoning.completion_check = False
383 agent = Agent(
384 backend=backend,
385 config=config,
386 project_root=temp_dir,
387 )
388 runtime = ConversationRuntime(agent)
389 events = []
390
391 async def capture(event) -> None:
392 events.append(event)
393
394 prepared = await runtime.turn_preparation.prepare(
395 task=(
396 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
397 "with an index and chapter files."
398 ),
399 emit=capture,
400 requested_mode="execute",
401 original_task=None,
402 on_user_question=None,
403 )
404 await runtime.phase_tracker.enter(
405 TurnPhase.ASSISTANT,
406 capture,
407 detail="Requesting assistant response",
408 reason_code="request_assistant_response",
409 )
410
411 implementation_plan = temp_dir / "implementation.md"
412 implementation_plan.write_text(
413 "# Implementation Plan\n\n"
414 "## File Changes\n\n"
415 f"- `{temp_dir / 'index.html'}`\n"
416 f"- `{temp_dir / 'chapters' / '01-introduction.html'}`\n"
417 )
418
419 prepared.definition_of_done.implementation_plan = str(implementation_plan)
420 prepared.definition_of_done.pending_items.append(
421 "Develop the main index.html file for nginx guide"
422 )
423
424 content = "Now I'll create the main index.html file for the nginx guide."
425 decision = await runtime.turn_completion.handle_text_response(
426 content=content,
427 response_content=content,
428 task=prepared.task,
429 effective_task=prepared.effective_task,
430 iterations=1,
431 max_iterations=agent.config.max_iterations,
432 actions_taken=[],
433 continuation_count=0,
434 dod=prepared.definition_of_done,
435 emit=capture,
436 summary=prepared.summary,
437 executor=prepared.executor,
438 rollback_plan=prepared.rollback_plan,
439 )
440
441 assert decision.action == TurnCompletionAction.CONTINUE
442 assert decision.continuation_count == 1
443 assert prepared.summary.assistant_messages[-1].content == content
444 assert agent.session.messages[-1].role.value == "assistant"
445
446
447 @pytest.mark.asyncio
448 async def test_turn_completion_interrupts_repeated_concrete_progress_narration(
449 temp_dir: Path,
450 ) -> None:
451 backend = ScriptedBackend()
452 config = non_streaming_config()
453 config.reasoning.completion_check = False
454 agent = Agent(
455 backend=backend,
456 config=config,
457 project_root=temp_dir,
458 )
459 runtime = ConversationRuntime(agent)
460 events = []
461
462 async def capture(event) -> None:
463 events.append(event)
464
465 prepared = await runtime.turn_preparation.prepare(
466 task=(
467 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
468 "with an index and chapter files."
469 ),
470 emit=capture,
471 requested_mode="execute",
472 original_task=None,
473 on_user_question=None,
474 )
475 await runtime.phase_tracker.enter(
476 TurnPhase.ASSISTANT,
477 capture,
478 detail="Requesting assistant response",
479 reason_code="request_assistant_response",
480 )
481
482 implementation_plan = temp_dir / "implementation.md"
483 implementation_plan.write_text(
484 "# Implementation Plan\n\n"
485 "## File Changes\n\n"
486 "1. Create main index.html file:\n"
487 f" - `{temp_dir / 'index.html'}`\n\n"
488 "2. Create chapter files:\n"
489 f" - `{temp_dir / 'chapters' / '01-introduction.html'}`\n"
490 f" - `{temp_dir / 'chapters' / '02-installation.html'}`\n"
491 )
492 chapters_dir = temp_dir / "chapters"
493 chapters_dir.mkdir()
494 (temp_dir / "index.html").write_text("<h1>NGINX Guide</h1>\n")
495 (chapters_dir / "01-introduction.html").write_text("<h1>Intro</h1>\n")
496
497 prepared.definition_of_done.implementation_plan = str(implementation_plan)
498 prepared.definition_of_done.mutating_actions.append("write")
499 prepared.definition_of_done.touched_files.extend(
500 [
501 str(temp_dir / "index.html"),
502 str(chapters_dir / "01-introduction.html"),
503 ]
504 )
505 prepared.definition_of_done.pending_items.append("Create chapter files for nginx guide")
506
507 content = "Now I'll create the second chapter file for the nginx guide."
508 decision = await runtime.turn_completion.handle_text_response(
509 content=content,
510 response_content=content,
511 task=prepared.task,
512 effective_task=prepared.effective_task,
513 iterations=1,
514 max_iterations=agent.config.max_iterations,
515 actions_taken=[],
516 continuation_count=1,
517 dod=prepared.definition_of_done,
518 emit=capture,
519 summary=prepared.summary,
520 executor=prepared.executor,
521 rollback_plan=prepared.rollback_plan,
522 )
523
524 assert decision.action == TurnCompletionAction.CONTINUE
525 assert decision.continuation_count == 2
526 assert agent.session.messages[-1].role.value == "user"
527 assert agent.session.messages[-1].content.startswith("[CONTINUE CURRENT STEP]")
528 assert "02-installation.html" in agent.session.messages[-1].content
529
530
531 @pytest.mark.asyncio
532 async def test_turn_completion_prioritizes_missing_artifact_continuation_over_text_loop(
533 temp_dir: Path,
534 ) -> None:
535 backend = ScriptedBackend()
536 config = non_streaming_config()
537 config.reasoning.completion_check = False
538 agent = Agent(
539 backend=backend,
540 config=config,
541 project_root=temp_dir,
542 )
543 runtime = ConversationRuntime(agent)
544 events = []
545
546 async def capture(event) -> None:
547 events.append(event)
548
549 prepared = await runtime.turn_preparation.prepare(
550 task=(
551 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
552 "with an index and chapter files."
553 ),
554 emit=capture,
555 requested_mode="execute",
556 original_task=None,
557 on_user_question=None,
558 )
559 await runtime.phase_tracker.enter(
560 TurnPhase.ASSISTANT,
561 capture,
562 detail="Requesting assistant response",
563 reason_code="request_assistant_response",
564 )
565
566 implementation_plan = temp_dir / "implementation.md"
567 implementation_plan.write_text(
568 "# Implementation Plan\n\n"
569 "## File Changes\n\n"
570 "1. Create main index.html file:\n"
571 f" - `{temp_dir / 'index.html'}`\n\n"
572 "2. Create chapter files:\n"
573 f" - `{temp_dir / 'chapters' / '01-introduction.html'}`\n"
574 f" - `{temp_dir / 'chapters' / '02-installation.html'}`\n"
575 )
576 chapters_dir = temp_dir / "chapters"
577 chapters_dir.mkdir()
578 (temp_dir / "index.html").write_text("<h1>NGINX Guide</h1>\n")
579 (chapters_dir / "01-introduction.html").write_text("<h1>Intro</h1>\n")
580
581 prepared.definition_of_done.implementation_plan = str(implementation_plan)
582 prepared.definition_of_done.mutating_actions.append("write")
583 prepared.definition_of_done.touched_files.extend(
584 [
585 str(temp_dir / "index.html"),
586 str(chapters_dir / "01-introduction.html"),
587 ]
588 )
589 prepared.definition_of_done.pending_items.append("Create chapter files for nginx guide")
590
591 content = "Let me continue creating the remaining chapter files for the nginx guide:"
592 runtime.context.safeguards.record_response(content)
593 runtime.context.safeguards.record_response(content)
594
595 decision = await runtime.turn_completion.handle_text_response(
596 content=content,
597 response_content=content,
598 task=prepared.task,
599 effective_task=prepared.effective_task,
600 iterations=1,
601 max_iterations=agent.config.max_iterations,
602 actions_taken=[],
603 continuation_count=2,
604 dod=prepared.definition_of_done,
605 emit=capture,
606 summary=prepared.summary,
607 executor=prepared.executor,
608 rollback_plan=prepared.rollback_plan,
609 )
610
611 assert decision.action == TurnCompletionAction.CONTINUE
612 assert prepared.summary.completion_decision_code == "in_progress_transition_continue"
613 assert agent.session.messages[-1].role.value == "user"
614 assert agent.session.messages[-1].content.startswith("[CONTINUE CURRENT STEP]")
615 assert "02-installation.html" in agent.session.messages[-1].content
616 assert not prepared.summary.final_response
617 assert not any(event.type == "error" and "Text loop detected" in event.content for event in events)
618
619
620 @pytest.mark.asyncio
621 async def test_turn_completion_interrupts_first_narration_after_concrete_target_prompt(
622 temp_dir: Path,
623 ) -> None:
624 backend = ScriptedBackend()
625 config = non_streaming_config()
626 config.reasoning.completion_check = False
627 agent = Agent(
628 backend=backend,
629 config=config,
630 project_root=temp_dir,
631 )
632 runtime = ConversationRuntime(agent)
633 events = []
634
635 async def capture(event) -> None:
636 events.append(event)
637
638 prepared = await runtime.turn_preparation.prepare(
639 task=(
640 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
641 "with an index and chapter files."
642 ),
643 emit=capture,
644 requested_mode="execute",
645 original_task=None,
646 on_user_question=None,
647 )
648 await runtime.phase_tracker.enter(
649 TurnPhase.ASSISTANT,
650 capture,
651 detail="Requesting assistant response",
652 reason_code="request_assistant_response",
653 )
654
655 implementation_plan = temp_dir / "implementation.md"
656 implementation_plan.write_text(
657 "# Implementation Plan\n\n"
658 "## File Changes\n\n"
659 f"- `{temp_dir / 'index.html'}`\n"
660 f"- `{temp_dir / 'chapters' / '01-introduction.html'}`\n"
661 )
662 chapters_dir = temp_dir / "chapters"
663 chapters_dir.mkdir()
664
665 prepared.definition_of_done.implementation_plan = str(implementation_plan)
666 prepared.definition_of_done.pending_items.append(
667 "Develop the main index.html file for nginx guide"
668 )
669
670 agent.session.append(
671 Message(
672 role=Role.USER,
673 content=(
674 "[USER INTERRUPTION]: Directory setup is complete. Continue with the next pending item: "
675 "`Develop the main index.html file for nginx guide`. Resume by creating `index.html` now. "
676 f"Prefer one `write` call for `{(temp_dir / 'index.html').resolve(strict=False)}` instead of more rereads. "
677 "Make your next response the concrete mutation tool call itself, not another bookkeeping-only turn."
678 ),
679 )
680 )
681
682 content = "Now I'll create the main index.html file for the nginx guide."
683 decision = await runtime.turn_completion.handle_text_response(
684 content=content,
685 response_content=content,
686 task=prepared.task,
687 effective_task=prepared.effective_task,
688 iterations=1,
689 max_iterations=agent.config.max_iterations,
690 actions_taken=[],
691 continuation_count=0,
692 dod=prepared.definition_of_done,
693 emit=capture,
694 summary=prepared.summary,
695 executor=prepared.executor,
696 rollback_plan=prepared.rollback_plan,
697 )
698
699 assert decision.action == TurnCompletionAction.CONTINUE
700 assert decision.continuation_count == 1
701 assert prepared.summary.assistant_messages[-1].content == content
702 assert agent.session.messages[-1].role.value == "user"
703 assert agent.session.messages[-1].content.startswith("[CONTINUE CURRENT STEP]")
704 assert "index.html" in agent.session.messages[-1].content
705
706
707 @pytest.mark.asyncio
708 async def test_turn_completion_first_chapter_continuation_allows_compact_initial_version(
709 temp_dir: Path,
710 ) -> None:
711 backend = ScriptedBackend()
712 config = non_streaming_config()
713 config.reasoning.completion_check = False
714 agent = Agent(
715 backend=backend,
716 config=config,
717 project_root=temp_dir,
718 )
719 runtime = ConversationRuntime(agent)
720 events = []
721
722 async def capture(event) -> None:
723 events.append(event)
724
725 prepared = await runtime.turn_preparation.prepare(
726 task=(
727 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
728 "with an index and chapter files."
729 ),
730 emit=capture,
731 requested_mode="execute",
732 original_task=None,
733 on_user_question=None,
734 )
735 await runtime.phase_tracker.enter(
736 TurnPhase.ASSISTANT,
737 capture,
738 detail="Requesting assistant response",
739 reason_code="request_assistant_response",
740 )
741
742 chapters_dir = temp_dir / "chapters"
743 chapters_dir.mkdir()
744 index_path = temp_dir / "index.html"
745 index_path.write_text("<html></html>\n")
746
747 implementation_plan = temp_dir / "implementation.md"
748 implementation_plan.write_text(
749 "# Implementation Plan\n\n"
750 "## File Changes\n\n"
751 f"- `{index_path}`\n"
752 f"- `{chapters_dir / '01-introduction.html'}`\n"
753 )
754
755 prepared.definition_of_done.implementation_plan = str(implementation_plan)
756 prepared.definition_of_done.touched_files.append(str(index_path))
757 prepared.definition_of_done.pending_items.append("Create chapter files for nginx guide")
758
759 content = "Now I'll create the first chapter of the nginx guide."
760 decision = await runtime.turn_completion.handle_text_response(
761 content=content,
762 response_content=content,
763 task=prepared.task,
764 effective_task=prepared.effective_task,
765 iterations=1,
766 max_iterations=agent.config.max_iterations,
767 actions_taken=[],
768 continuation_count=1,
769 dod=prepared.definition_of_done,
770 emit=capture,
771 summary=prepared.summary,
772 executor=prepared.executor,
773 rollback_plan=prepared.rollback_plan,
774 )
775
776 assert decision.action == TurnCompletionAction.CONTINUE
777 assert decision.continuation_count == 2
778 assert agent.session.messages[-1].role.value == "user"
779 assert agent.session.messages[-1].content.startswith("[CONTINUE CURRENT STEP]")
780 assert "01-introduction.html" in agent.session.messages[-1].content
781 assert "write a compact but real initial version of that file now" in agent.session.messages[-1].content.lower()
782
783
784 @pytest.mark.asyncio
785 async def test_turn_completion_interrupts_first_chapter_narration_from_declared_index_graph(
786 temp_dir: Path,
787 ) -> None:
788 backend = ScriptedBackend()
789 config = non_streaming_config()
790 config.reasoning.completion_check = False
791 agent = Agent(
792 backend=backend,
793 config=config,
794 project_root=temp_dir,
795 )
796 runtime = ConversationRuntime(agent)
797 events = []
798
799 async def capture(event) -> None:
800 events.append(event)
801
802 prepared = await runtime.turn_preparation.prepare(
803 task=(
804 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
805 "with an index and chapter files."
806 ),
807 emit=capture,
808 requested_mode="execute",
809 original_task=None,
810 on_user_question=None,
811 )
812 await runtime.phase_tracker.enter(
813 TurnPhase.ASSISTANT,
814 capture,
815 detail="Requesting assistant response",
816 reason_code="request_assistant_response",
817 )
818
819 guide_root = temp_dir / "Loader" / "guides" / "nginx"
820 chapters_dir = guide_root / "chapters"
821 chapters_dir.mkdir(parents=True)
822 index_path = guide_root / "index.html"
823 index_path.write_text(
824 "\n".join(
825 [
826 "<!DOCTYPE html>",
827 '<a href="chapters/01-introduction.html">Chapter 1: Introduction to Nginx</a>',
828 '<a href="chapters/02-installation.html">Chapter 2: Installation and Setup</a>',
829 "",
830 ]
831 )
832 )
833
834 implementation_plan = temp_dir / "implementation.md"
835 implementation_plan.write_text(
836 "# Implementation Plan\n\n"
837 "## File Changes\n\n"
838 f"- `{index_path}`\n"
839 f"- `{chapters_dir}/`\n"
840 )
841
842 prepared.definition_of_done.implementation_plan = str(implementation_plan)
843 prepared.definition_of_done.touched_files.append(str(index_path))
844 prepared.definition_of_done.mutating_actions.append("write")
845 prepared.definition_of_done.pending_items.append(
846 "Develop the nginx guide content following the same structure and cadence as the fortran guide"
847 )
848
849 content = "Now I'll create the first chapter of the nginx guide."
850 decision = await runtime.turn_completion.handle_text_response(
851 content=content,
852 response_content=content,
853 task=prepared.task,
854 effective_task=prepared.effective_task,
855 iterations=1,
856 max_iterations=agent.config.max_iterations,
857 actions_taken=[],
858 continuation_count=0,
859 dod=prepared.definition_of_done,
860 emit=capture,
861 summary=prepared.summary,
862 executor=prepared.executor,
863 rollback_plan=prepared.rollback_plan,
864 )
865
866 assert decision.action == TurnCompletionAction.CONTINUE
867 assert decision.continuation_count == 1
868 assert prepared.summary.completion_decision_code == "in_progress_transition_continue"
869 assert agent.session.messages[-1].role.value == "user"
870 assert agent.session.messages[-1].content.startswith("[CONTINUE CURRENT STEP]")
871 assert "01-introduction.html" in agent.session.messages[-1].content
872
873
874 @pytest.mark.asyncio
875 async def test_turn_completion_handles_fake_tool_narration_without_reroute(
876 temp_dir: Path,
877 ) -> None:
878 backend = ScriptedBackend()
879 config = non_streaming_config()
880 config.reasoning.completion_check = False
881 agent = Agent(
882 backend=backend,
883 config=config,
884 project_root=temp_dir,
885 )
886 runtime = ConversationRuntime(agent)
887 events = []
888
889 async def capture(event) -> None:
890 events.append(event)
891
892 prepared = await runtime.turn_preparation.prepare(
893 task="Summarize the current test status.",
894 emit=capture,
895 requested_mode="execute",
896 original_task=None,
897 on_user_question=None,
898 )
899 await runtime.phase_tracker.enter(
900 TurnPhase.ASSISTANT,
901 capture,
902 detail="Requesting assistant response",
903 reason_code="request_assistant_response",
904 )
905
906 narrated = "Used bash tool with command `pytest -q` and everything passed."
907 decision = await runtime.turn_completion.handle_text_response(
908 content=narrated,
909 response_content=narrated,
910 task=prepared.task,
911 effective_task=prepared.effective_task,
912 iterations=1,
913 max_iterations=agent.config.max_iterations,
914 actions_taken=[],
915 continuation_count=0,
916 dod=prepared.definition_of_done,
917 emit=capture,
918 summary=prepared.summary,
919 executor=prepared.executor,
920 rollback_plan=prepared.rollback_plan,
921 )
922
923 assert decision.action == TurnCompletionAction.COMPLETE
924 assert prepared.summary.final_response == narrated
925 assert prepared.summary.completion_decision_code == "non_mutating_response_accepted"
926 assert prepared.summary.completion_trace[-1].decision_code == (
927 "non_mutating_response_accepted"
928 )
929 assert not any(
930 "PRETENDING to use tools" in message.content
931 for message in agent.session.messages
932 )
933 assert any(event.type == "response" and event.content == narrated for event in events)
934
935
936 @pytest.mark.asyncio
937 async def test_turn_completion_handles_deflection_text_without_repair_prompt(
938 temp_dir: Path,
939 ) -> None:
940 backend = ScriptedBackend()
941 config = non_streaming_config()
942 config.reasoning.completion_check = False
943 agent = Agent(
944 backend=backend,
945 config=config,
946 project_root=temp_dir,
947 )
948 runtime = ConversationRuntime(agent)
949 events = []
950
951 async def capture(event) -> None:
952 events.append(event)
953
954 prepared = await runtime.turn_preparation.prepare(
955 task="What should I verify next?",
956 emit=capture,
957 requested_mode="execute",
958 original_task=None,
959 on_user_question=None,
960 )
961 await runtime.phase_tracker.enter(
962 TurnPhase.ASSISTANT,
963 capture,
964 detail="Requesting assistant response",
965 reason_code="request_assistant_response",
966 )
967
968 deflection = "You can run pytest -q to verify the current state."
969 decision = await runtime.turn_completion.handle_text_response(
970 content=deflection,
971 response_content=deflection,
972 task=prepared.task,
973 effective_task=prepared.effective_task,
974 iterations=1,
975 max_iterations=agent.config.max_iterations,
976 actions_taken=[],
977 continuation_count=0,
978 dod=prepared.definition_of_done,
979 emit=capture,
980 summary=prepared.summary,
981 executor=prepared.executor,
982 rollback_plan=prepared.rollback_plan,
983 )
984
985 assert decision.action == TurnCompletionAction.COMPLETE
986 assert prepared.summary.final_response == deflection
987 assert prepared.summary.completion_decision_code == "non_mutating_response_accepted"
988 assert prepared.summary.completion_trace[-1].decision_code == (
989 "non_mutating_response_accepted"
990 )
991 assert not any(
992 "Please use your tools to execute the task" in message.content
993 for message in agent.session.messages
994 )
995 assert any(event.type == "response" and event.content == deflection for event in events)
996
997
998 @pytest.mark.asyncio
999 async def test_turn_completion_skips_self_critique_reroute(
1000 temp_dir: Path,
1001 ) -> None:
1002 backend = ScriptedBackend()
1003 config = non_streaming_config()
1004 config.reasoning.completion_check = False
1005 config.reasoning.self_critique = True
1006 agent = Agent(
1007 backend=backend,
1008 config=config,
1009 project_root=temp_dir,
1010 )
1011 runtime = ConversationRuntime(agent)
1012 events = []
1013
1014 async def capture(event) -> None:
1015 events.append(event)
1016
1017 prepared = await runtime.turn_preparation.prepare(
1018 task="Explain Loader's clarify loop.",
1019 emit=capture,
1020 requested_mode="execute",
1021 original_task=None,
1022 on_user_question=None,
1023 )
1024 await runtime.phase_tracker.enter(
1025 TurnPhase.ASSISTANT,
1026 capture,
1027 detail="Requesting assistant response",
1028 reason_code="request_assistant_response",
1029 )
1030
1031 detailed = (
1032 "Loader might begin with a bounded clarify pass, perhaps asking follow-up "
1033 "questions when the task leaves touchpoints or decision boundaries unclear. "
1034 "It then shifts into execution once the workflow policy is satisfied."
1035 )
1036 decision = await runtime.turn_completion.handle_text_response(
1037 content=detailed,
1038 response_content=detailed,
1039 task=prepared.task,
1040 effective_task=prepared.effective_task,
1041 iterations=1,
1042 max_iterations=agent.config.max_iterations,
1043 actions_taken=[],
1044 continuation_count=0,
1045 dod=prepared.definition_of_done,
1046 emit=capture,
1047 summary=prepared.summary,
1048 executor=prepared.executor,
1049 rollback_plan=prepared.rollback_plan,
1050 )
1051
1052 assert decision.action == TurnCompletionAction.COMPLETE
1053 assert prepared.summary.final_response == detailed
1054 assert prepared.summary.completion_decision_code == "non_mutating_response_accepted"
1055 assert prepared.summary.completion_trace[-1].decision_code == (
1056 "non_mutating_response_accepted"
1057 )
1058 assert not any("[SELF-CRITIQUE]" in message.content for message in agent.session.messages)
1059 assert not any(event.type == "critique" for event in events)
1060
1061
1062 @pytest.mark.asyncio
1063 async def test_turn_completion_finalizes_when_follow_through_budget_is_exhausted(
1064 temp_dir: Path,
1065 ) -> None:
1066 backend = ScriptedBackend()
1067 agent = Agent(
1068 backend=backend,
1069 config=non_streaming_config(),
1070 project_root=temp_dir,
1071 )
1072 runtime = ConversationRuntime(agent)
1073 events = []
1074
1075 async def capture(event) -> None:
1076 events.append(event)
1077
1078 prepared = await runtime.turn_preparation.prepare(
1079 task="Fix the README heading.",
1080 emit=capture,
1081 requested_mode="execute",
1082 original_task=None,
1083 on_user_question=None,
1084 )
1085 await runtime.phase_tracker.enter(
1086 TurnPhase.ASSISTANT,
1087 capture,
1088 detail="Requesting assistant response",
1089 reason_code="request_assistant_response",
1090 )
1091
1092 decision = await runtime.turn_completion.handle_text_response(
1093 content="I looked into it.",
1094 response_content="I looked into it.",
1095 task=prepared.task,
1096 effective_task=prepared.effective_task,
1097 iterations=1,
1098 max_iterations=agent.config.max_iterations,
1099 actions_taken=[],
1100 continuation_count=agent.config.reasoning.max_continuation_prompts,
1101 dod=prepared.definition_of_done,
1102 emit=capture,
1103 summary=prepared.summary,
1104 executor=prepared.executor,
1105 rollback_plan=prepared.rollback_plan,
1106 )
1107
1108 assert decision.action == TurnCompletionAction.FINALIZE
1109 assert decision.finalize_reason_code == "continuation_budget_exhausted"
1110 assert prepared.summary.final_response.startswith(
1111 "I stopped because I still could not show enough evidence"
1112 )
1113 assert prepared.summary.completion_decision_code == "continuation_budget_exhausted"
1114 assert prepared.summary.failures == [
1115 "missing follow-through evidence after continuation budget exhaustion"
1116 ]
1117 assert prepared.summary.completion_trace[-1].outcome == "finalize"
1118 assert prepared.summary.completion_trace[-1].decision_code == (
1119 "continuation_budget_exhausted"
1120 )
1121 assert prepared.summary.completion_trace[-1].evidence_summary == [
1122 "showing the requested work was actually carried out"
1123 ]
1124 assert [item.status for item in prepared.summary.completion_trace[-1].evidence_provenance] == [
1125 "missing"
1126 ]
1127 assert prepared.summary.workflow_timeline[-1].kind == "completion_finalize"
1128 assert prepared.summary.workflow_timeline[-1].evidence_summary == [
1129 "showing the requested work was actually carried out"
1130 ]
1131 assert [event.type for event in events[-3:]] == [
1132 "completion_check",
1133 "error",
1134 "response",
1135 ]
1136
1137
1138 @pytest.mark.asyncio
1139 async def test_turn_completion_uses_observed_verification_for_budget_exhaustion(
1140 temp_dir: Path,
1141 ) -> None:
1142 backend = ScriptedBackend()
1143 agent = Agent(
1144 backend=backend,
1145 config=non_streaming_config(),
1146 project_root=temp_dir,
1147 )
1148 runtime = ConversationRuntime(agent)
1149 events = []
1150
1151 async def capture(event) -> None:
1152 events.append(event)
1153
1154 prepared = await runtime.turn_preparation.prepare(
1155 task="Run pytest -q and make sure it works.",
1156 emit=capture,
1157 requested_mode="execute",
1158 original_task=None,
1159 on_user_question=None,
1160 )
1161 prepared.definition_of_done.verification_commands = ["pytest -q"]
1162 prepared.definition_of_done.evidence = [
1163 VerificationEvidence(
1164 command="pytest -q",
1165 passed=False,
1166 stderr="1 failed",
1167 kind="test",
1168 )
1169 ]
1170 prepared.definition_of_done.last_verification_result = "failed"
1171 await runtime.phase_tracker.enter(
1172 TurnPhase.ASSISTANT,
1173 capture,
1174 detail="Requesting assistant response",
1175 reason_code="request_assistant_response",
1176 )
1177
1178 decision = await runtime.turn_completion.handle_text_response(
1179 content="The tests are done.",
1180 response_content="The tests are done.",
1181 task=prepared.task,
1182 effective_task=prepared.effective_task,
1183 iterations=1,
1184 max_iterations=agent.config.max_iterations,
1185 actions_taken=[],
1186 continuation_count=agent.config.reasoning.max_continuation_prompts,
1187 dod=prepared.definition_of_done,
1188 emit=capture,
1189 summary=prepared.summary,
1190 executor=prepared.executor,
1191 rollback_plan=prepared.rollback_plan,
1192 )
1193
1194 assert decision.action == TurnCompletionAction.FINALIZE
1195 assert decision.finalize_reason_code == "continuation_budget_exhausted"
1196 assert prepared.summary.final_response == (
1197 "I stopped because the continuation budget was exhausted and observed "
1198 "verification still showed: verification failed for `pytest -q` [1 failed]."
1199 )
1200 assert prepared.summary.completion_trace[-1].decision_code == (
1201 "continuation_budget_exhausted"
1202 )
1203 assert [
1204 item.status
1205 for item in prepared.summary.completion_trace[-1].verification_observations
1206 ] == [VerificationObservationStatus.FAILED.value]
1207 assert [
1208 item.summary
1209 for item in prepared.summary.completion_trace[-1].verification_observations
1210 ] == ["verification failed for `pytest -q`"]
1211 assert [
1212 item.status
1213 for item in prepared.summary.workflow_timeline[-1].verification_observations
1214 ] == [VerificationObservationStatus.FAILED.value]