Python · 33587 bytes Raw Blame History
1 """Tests for no-tool text completion orchestration."""
2
3 from __future__ import annotations
4
5 from pathlib import Path
6
7 import pytest
8
9 from loader.agent.loop import Agent, AgentConfig
10 from loader.llm.base import Message, Role
11 from loader.runtime.conversation import ConversationRuntime
12 from loader.runtime.dod import VerificationEvidence
13 from loader.runtime.phases import TurnPhase
14 from loader.runtime.turn_completion import TurnCompletionAction
15 from loader.runtime.verification_observations import VerificationObservationStatus
16 from tests.helpers.runtime_harness import ScriptedBackend
17
18
19 def non_streaming_config() -> AgentConfig:
20 """Shared config for direct turn-completion tests."""
21
22 return AgentConfig(auto_context=False, stream=False, max_iterations=8)
23
24
25 @pytest.mark.asyncio
26 async def test_turn_completion_requests_continuation_for_premature_text_response(
27 temp_dir: Path,
28 ) -> None:
29 backend = ScriptedBackend()
30 agent = Agent(
31 backend=backend,
32 config=non_streaming_config(),
33 project_root=temp_dir,
34 )
35 runtime = ConversationRuntime(agent)
36 events = []
37
38 async def capture(event) -> None:
39 events.append(event)
40
41 prepared = await runtime.turn_preparation.prepare(
42 task="Fix the README heading.",
43 emit=capture,
44 requested_mode="execute",
45 original_task=None,
46 on_user_question=None,
47 )
48 await runtime.phase_tracker.enter(
49 TurnPhase.ASSISTANT,
50 capture,
51 detail="Requesting assistant response",
52 reason_code="request_assistant_response",
53 )
54
55 decision = await runtime.turn_completion.handle_text_response(
56 content="I looked into it.",
57 response_content="I looked into it.",
58 task=prepared.task,
59 effective_task=prepared.effective_task,
60 iterations=1,
61 max_iterations=agent.config.max_iterations,
62 actions_taken=[],
63 continuation_count=0,
64 dod=prepared.definition_of_done,
65 emit=capture,
66 summary=prepared.summary,
67 executor=prepared.executor,
68 rollback_plan=prepared.rollback_plan,
69 )
70
71 assert decision.action == TurnCompletionAction.CONTINUE
72 assert decision.continuation_count == 1
73 assert prepared.summary.completion_decision_code == "premature_completion_nudge"
74 assert prepared.summary.completion_decision_summary == (
75 "requested one continuation because the non-mutating response looked incomplete"
76 )
77 assert agent.session.last_completion_decision_code == "premature_completion_nudge"
78 assert [
79 entry.decision_code for entry in prepared.summary.completion_trace
80 ] == ["premature_completion_nudge"]
81 assert prepared.summary.completion_trace[0].stage == "continuation_check"
82 assert [entry.kind for entry in prepared.summary.workflow_timeline[-1:]] == [
83 "completion_continue"
84 ]
85 assert prepared.summary.workflow_timeline[-1].policy_stage == "continuation_check"
86 assert prepared.summary.workflow_timeline[-1].policy_outcome == "continue"
87 assert agent.session.messages[-1].role.value == "user"
88 assert "concrete evidence" in agent.session.messages[-1].content
89 assert "Carry out the requested change or command now" in agent.session.messages[-1].content
90 assert any(event.type == "completion_check" for event in events)
91
92
93 @pytest.mark.asyncio
94 async def test_turn_completion_marks_non_mutating_response_done(
95 temp_dir: Path,
96 ) -> None:
97 backend = ScriptedBackend()
98 agent = Agent(
99 backend=backend,
100 config=non_streaming_config(),
101 project_root=temp_dir,
102 )
103 runtime = ConversationRuntime(agent)
104 events = []
105
106 async def capture(event) -> None:
107 events.append(event)
108
109 prepared = await runtime.turn_preparation.prepare(
110 task="Explain Loader's clarify loop.",
111 emit=capture,
112 requested_mode="execute",
113 original_task=None,
114 on_user_question=None,
115 )
116 await runtime.phase_tracker.enter(
117 TurnPhase.ASSISTANT,
118 capture,
119 detail="Requesting assistant response",
120 reason_code="request_assistant_response",
121 )
122
123 decision = await runtime.turn_completion.handle_text_response(
124 content="Loader uses a bounded clarify loop before execution.",
125 response_content="Loader uses a bounded clarify loop before execution.",
126 task=prepared.task,
127 effective_task=prepared.effective_task,
128 iterations=1,
129 max_iterations=agent.config.max_iterations,
130 actions_taken=[],
131 continuation_count=0,
132 dod=prepared.definition_of_done,
133 emit=capture,
134 summary=prepared.summary,
135 executor=prepared.executor,
136 rollback_plan=prepared.rollback_plan,
137 )
138
139 assert decision.action == TurnCompletionAction.COMPLETE
140 assert prepared.summary.final_response == (
141 "Loader uses a bounded clarify loop before execution."
142 )
143 assert prepared.summary.completion_decision_code == "non_mutating_response_accepted"
144 assert prepared.summary.completion_decision_summary == (
145 "accepted the response because no mutating work required verification"
146 )
147 assert agent.session.last_completion_decision_code == (
148 "non_mutating_response_accepted"
149 )
150 assert [
151 entry.decision_code for entry in prepared.summary.completion_trace
152 ] == [
153 "completion_response_accepted",
154 "non_mutating_response_accepted",
155 ]
156 policy_entries = [
157 entry
158 for entry in prepared.summary.workflow_timeline
159 if entry.kind.startswith("completion_")
160 ]
161 assert [entry.kind for entry in policy_entries] == [
162 "completion_check",
163 "completion_complete",
164 ]
165 assert policy_entries[0].policy_stage == "continuation_check"
166 assert policy_entries[-1].policy_stage == "definition_of_done"
167 assert [item.summary for item in prepared.summary.completion_trace[-1].evidence_provenance] == [
168 "verification was skipped because no mutating work required checks"
169 ]
170 assert [
171 item.status
172 for item in prepared.summary.completion_trace[-1].verification_observations
173 ] == [VerificationObservationStatus.SKIPPED.value]
174 assert [
175 item.summary
176 for item in prepared.summary.completion_trace[-1].verification_observations
177 ] == ["verification was skipped because no mutating work required checks"]
178 assert [item.status for item in policy_entries[-1].verification_observations] == [
179 VerificationObservationStatus.SKIPPED.value
180 ]
181 assert prepared.definition_of_done.status == "done"
182 assert prepared.definition_of_done.last_verification_result == "skipped"
183 assert any(event.type == "response" for event in events)
184 assert any(
185 event.type == "dod_status" and event.dod_status == "done"
186 for event in events
187 )
188
189
190 @pytest.mark.asyncio
191 async def test_turn_completion_blocks_false_completion_without_preserving_it(
192 temp_dir: Path,
193 ) -> None:
194 backend = ScriptedBackend()
195 agent = Agent(
196 backend=backend,
197 config=non_streaming_config(),
198 project_root=temp_dir,
199 )
200 runtime = ConversationRuntime(agent)
201 events = []
202
203 async def capture(event) -> None:
204 events.append(event)
205
206 prepared = await runtime.turn_preparation.prepare(
207 task=(
208 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
209 "with an index and chapter files."
210 ),
211 emit=capture,
212 requested_mode="execute",
213 original_task=None,
214 on_user_question=None,
215 )
216 await runtime.phase_tracker.enter(
217 TurnPhase.ASSISTANT,
218 capture,
219 detail="Requesting assistant response",
220 reason_code="request_assistant_response",
221 )
222
223 implementation_plan = temp_dir / "implementation.md"
224 implementation_plan.write_text(
225 "# Implementation Plan\n\n"
226 "## File Changes\n\n"
227 "1. Create main index.html file:\n"
228 " - `index.html`\n\n"
229 "2. Create chapter files:\n"
230 " - `chapters/01-getting-started.html`\n"
231 " - `chapters/06-troubleshooting.html`\n"
232 )
233 chapters_dir = temp_dir / "chapters"
234 chapters_dir.mkdir()
235 (chapters_dir / "01-getting-started.html").write_text("<h1>Getting Started</h1>\n")
236 (temp_dir / "index.html").write_text("<h1>NGINX Guide</h1>\n")
237
238 prepared.definition_of_done.implementation_plan = str(implementation_plan)
239 prepared.definition_of_done.mutating_actions.append("write")
240 prepared.definition_of_done.touched_files.extend(
241 [
242 str(temp_dir / "index.html"),
243 str(chapters_dir / "01-getting-started.html"),
244 ]
245 )
246
247 queued_messages: list[str] = []
248 runtime.context.queue_steering_message_callback = queued_messages.append
249
250 completion_claim = (
251 "I've successfully completed the NGINX guide with all planned files "
252 "and verified everything is done."
253 )
254 decision = await runtime.turn_completion.handle_text_response(
255 content=completion_claim,
256 response_content=completion_claim,
257 task=prepared.task,
258 effective_task=prepared.effective_task,
259 iterations=1,
260 max_iterations=agent.config.max_iterations,
261 actions_taken=[],
262 continuation_count=0,
263 dod=prepared.definition_of_done,
264 emit=capture,
265 summary=prepared.summary,
266 executor=prepared.executor,
267 rollback_plan=prepared.rollback_plan,
268 )
269
270 assert decision.action == TurnCompletionAction.CONTINUE
271 assert prepared.summary.assistant_messages == []
272 assert not any(
273 message.role.value == "assistant" and message.content == completion_claim
274 for message in agent.session.messages
275 )
276 assert agent.session.messages[-1].role.value == "user"
277 assert agent.session.messages[-1].content.startswith(
278 "[PLANNED ARTIFACTS STILL MISSING]"
279 )
280 assert "`06-troubleshooting.html`" in agent.session.messages[-1].content
281 assert queued_messages
282 assert "06-troubleshooting.html" in queued_messages[-1]
283 assert "Do not summarize, mark completion, or write bookkeeping notes yet" in queued_messages[-1]
284 assert not any(event.type == "response" for event in events)
285
286
287 @pytest.mark.asyncio
288 async def test_turn_completion_interrupts_progress_intent_once_output_files_exist(
289 temp_dir: Path,
290 ) -> None:
291 backend = ScriptedBackend()
292 agent = Agent(
293 backend=backend,
294 config=non_streaming_config(),
295 project_root=temp_dir,
296 )
297 runtime = ConversationRuntime(agent)
298 events = []
299
300 async def capture(event) -> None:
301 events.append(event)
302
303 prepared = await runtime.turn_preparation.prepare(
304 task=(
305 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
306 "with an index and chapter files."
307 ),
308 emit=capture,
309 requested_mode="execute",
310 original_task=None,
311 on_user_question=None,
312 )
313 await runtime.phase_tracker.enter(
314 TurnPhase.ASSISTANT,
315 capture,
316 detail="Requesting assistant response",
317 reason_code="request_assistant_response",
318 )
319
320 implementation_plan = temp_dir / "implementation.md"
321 implementation_plan.write_text(
322 "# Implementation Plan\n\n"
323 "## File Changes\n\n"
324 "1. Create main index.html file:\n"
325 f" - `{temp_dir / 'index.html'}`\n\n"
326 "2. Create chapter files:\n"
327 f" - `{temp_dir / 'chapters' / '01-introduction.html'}`\n"
328 f" - `{temp_dir / 'chapters' / '02-installation.html'}`\n"
329 )
330 chapters_dir = temp_dir / "chapters"
331 chapters_dir.mkdir()
332 (temp_dir / "index.html").write_text("<h1>NGINX Guide</h1>\n")
333 (chapters_dir / "01-introduction.html").write_text("<h1>Intro</h1>\n")
334
335 prepared.definition_of_done.implementation_plan = str(implementation_plan)
336 prepared.definition_of_done.mutating_actions.append("write")
337 prepared.definition_of_done.touched_files.extend(
338 [
339 str(temp_dir / "index.html"),
340 str(chapters_dir / "01-introduction.html"),
341 ]
342 )
343 prepared.definition_of_done.pending_items.append("Create chapter files for nginx guide")
344
345 content = "Now I'll create the second chapter file for the nginx guide."
346 decision = await runtime.turn_completion.handle_text_response(
347 content=content,
348 response_content=content,
349 task=prepared.task,
350 effective_task=prepared.effective_task,
351 iterations=1,
352 max_iterations=agent.config.max_iterations,
353 actions_taken=[],
354 continuation_count=0,
355 dod=prepared.definition_of_done,
356 emit=capture,
357 summary=prepared.summary,
358 executor=prepared.executor,
359 rollback_plan=prepared.rollback_plan,
360 )
361
362 assert decision.action == TurnCompletionAction.CONTINUE
363 assert decision.continuation_count == 1
364 assert prepared.summary.completion_decision_code == "in_progress_transition_continue"
365 assert prepared.summary.assistant_messages[-1].content == content
366 assert agent.session.messages[-1].role.value == "user"
367 assert agent.session.messages[-1].content.startswith("[CONTINUE CURRENT STEP]")
368 assert "02-installation.html" in agent.session.messages[-1].content
369 assert not any(
370 message.role.value == "user"
371 and message.content.startswith("[PLANNED ARTIFACTS STILL MISSING]")
372 for message in agent.session.messages
373 )
374
375
376 @pytest.mark.asyncio
377 async def test_turn_completion_allows_first_progress_narration_before_any_output_exists(
378 temp_dir: Path,
379 ) -> None:
380 backend = ScriptedBackend()
381 config = non_streaming_config()
382 config.reasoning.completion_check = False
383 agent = Agent(
384 backend=backend,
385 config=config,
386 project_root=temp_dir,
387 )
388 runtime = ConversationRuntime(agent)
389 events = []
390
391 async def capture(event) -> None:
392 events.append(event)
393
394 prepared = await runtime.turn_preparation.prepare(
395 task=(
396 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
397 "with an index and chapter files."
398 ),
399 emit=capture,
400 requested_mode="execute",
401 original_task=None,
402 on_user_question=None,
403 )
404 await runtime.phase_tracker.enter(
405 TurnPhase.ASSISTANT,
406 capture,
407 detail="Requesting assistant response",
408 reason_code="request_assistant_response",
409 )
410
411 implementation_plan = temp_dir / "implementation.md"
412 implementation_plan.write_text(
413 "# Implementation Plan\n\n"
414 "## File Changes\n\n"
415 f"- `{temp_dir / 'index.html'}`\n"
416 f"- `{temp_dir / 'chapters' / '01-introduction.html'}`\n"
417 )
418
419 prepared.definition_of_done.implementation_plan = str(implementation_plan)
420 prepared.definition_of_done.pending_items.append(
421 "Develop the main index.html file for nginx guide"
422 )
423
424 content = "Now I'll create the main index.html file for the nginx guide."
425 decision = await runtime.turn_completion.handle_text_response(
426 content=content,
427 response_content=content,
428 task=prepared.task,
429 effective_task=prepared.effective_task,
430 iterations=1,
431 max_iterations=agent.config.max_iterations,
432 actions_taken=[],
433 continuation_count=0,
434 dod=prepared.definition_of_done,
435 emit=capture,
436 summary=prepared.summary,
437 executor=prepared.executor,
438 rollback_plan=prepared.rollback_plan,
439 )
440
441 assert decision.action == TurnCompletionAction.CONTINUE
442 assert decision.continuation_count == 1
443 assert prepared.summary.assistant_messages[-1].content == content
444 assert agent.session.messages[-1].role.value == "assistant"
445
446
447 @pytest.mark.asyncio
448 async def test_turn_completion_interrupts_repeated_concrete_progress_narration(
449 temp_dir: Path,
450 ) -> None:
451 backend = ScriptedBackend()
452 config = non_streaming_config()
453 config.reasoning.completion_check = False
454 agent = Agent(
455 backend=backend,
456 config=config,
457 project_root=temp_dir,
458 )
459 runtime = ConversationRuntime(agent)
460 events = []
461
462 async def capture(event) -> None:
463 events.append(event)
464
465 prepared = await runtime.turn_preparation.prepare(
466 task=(
467 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
468 "with an index and chapter files."
469 ),
470 emit=capture,
471 requested_mode="execute",
472 original_task=None,
473 on_user_question=None,
474 )
475 await runtime.phase_tracker.enter(
476 TurnPhase.ASSISTANT,
477 capture,
478 detail="Requesting assistant response",
479 reason_code="request_assistant_response",
480 )
481
482 implementation_plan = temp_dir / "implementation.md"
483 implementation_plan.write_text(
484 "# Implementation Plan\n\n"
485 "## File Changes\n\n"
486 "1. Create main index.html file:\n"
487 f" - `{temp_dir / 'index.html'}`\n\n"
488 "2. Create chapter files:\n"
489 f" - `{temp_dir / 'chapters' / '01-introduction.html'}`\n"
490 f" - `{temp_dir / 'chapters' / '02-installation.html'}`\n"
491 )
492 chapters_dir = temp_dir / "chapters"
493 chapters_dir.mkdir()
494 (temp_dir / "index.html").write_text("<h1>NGINX Guide</h1>\n")
495 (chapters_dir / "01-introduction.html").write_text("<h1>Intro</h1>\n")
496
497 prepared.definition_of_done.implementation_plan = str(implementation_plan)
498 prepared.definition_of_done.mutating_actions.append("write")
499 prepared.definition_of_done.touched_files.extend(
500 [
501 str(temp_dir / "index.html"),
502 str(chapters_dir / "01-introduction.html"),
503 ]
504 )
505 prepared.definition_of_done.pending_items.append("Create chapter files for nginx guide")
506
507 content = "Now I'll create the second chapter file for the nginx guide."
508 decision = await runtime.turn_completion.handle_text_response(
509 content=content,
510 response_content=content,
511 task=prepared.task,
512 effective_task=prepared.effective_task,
513 iterations=1,
514 max_iterations=agent.config.max_iterations,
515 actions_taken=[],
516 continuation_count=1,
517 dod=prepared.definition_of_done,
518 emit=capture,
519 summary=prepared.summary,
520 executor=prepared.executor,
521 rollback_plan=prepared.rollback_plan,
522 )
523
524 assert decision.action == TurnCompletionAction.CONTINUE
525 assert decision.continuation_count == 2
526 assert agent.session.messages[-1].role.value == "user"
527 assert agent.session.messages[-1].content.startswith("[CONTINUE CURRENT STEP]")
528 assert "02-installation.html" in agent.session.messages[-1].content
529
530
531 @pytest.mark.asyncio
532 async def test_turn_completion_interrupts_first_narration_after_concrete_target_prompt(
533 temp_dir: Path,
534 ) -> None:
535 backend = ScriptedBackend()
536 config = non_streaming_config()
537 config.reasoning.completion_check = False
538 agent = Agent(
539 backend=backend,
540 config=config,
541 project_root=temp_dir,
542 )
543 runtime = ConversationRuntime(agent)
544 events = []
545
546 async def capture(event) -> None:
547 events.append(event)
548
549 prepared = await runtime.turn_preparation.prepare(
550 task=(
551 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
552 "with an index and chapter files."
553 ),
554 emit=capture,
555 requested_mode="execute",
556 original_task=None,
557 on_user_question=None,
558 )
559 await runtime.phase_tracker.enter(
560 TurnPhase.ASSISTANT,
561 capture,
562 detail="Requesting assistant response",
563 reason_code="request_assistant_response",
564 )
565
566 implementation_plan = temp_dir / "implementation.md"
567 implementation_plan.write_text(
568 "# Implementation Plan\n\n"
569 "## File Changes\n\n"
570 f"- `{temp_dir / 'index.html'}`\n"
571 f"- `{temp_dir / 'chapters' / '01-introduction.html'}`\n"
572 )
573 chapters_dir = temp_dir / "chapters"
574 chapters_dir.mkdir()
575
576 prepared.definition_of_done.implementation_plan = str(implementation_plan)
577 prepared.definition_of_done.pending_items.append(
578 "Develop the main index.html file for nginx guide"
579 )
580
581 agent.session.append(
582 Message(
583 role=Role.USER,
584 content=(
585 "[USER INTERRUPTION]: Directory setup is complete. Continue with the next pending item: "
586 "`Develop the main index.html file for nginx guide`. Resume by creating `index.html` now. "
587 f"Prefer one `write` call for `{(temp_dir / 'index.html').resolve(strict=False)}` instead of more rereads. "
588 "Make your next response the concrete mutation tool call itself, not another bookkeeping-only turn."
589 ),
590 )
591 )
592
593 content = "Now I'll create the main index.html file for the nginx guide."
594 decision = await runtime.turn_completion.handle_text_response(
595 content=content,
596 response_content=content,
597 task=prepared.task,
598 effective_task=prepared.effective_task,
599 iterations=1,
600 max_iterations=agent.config.max_iterations,
601 actions_taken=[],
602 continuation_count=0,
603 dod=prepared.definition_of_done,
604 emit=capture,
605 summary=prepared.summary,
606 executor=prepared.executor,
607 rollback_plan=prepared.rollback_plan,
608 )
609
610 assert decision.action == TurnCompletionAction.CONTINUE
611 assert decision.continuation_count == 1
612 assert prepared.summary.assistant_messages[-1].content == content
613 assert agent.session.messages[-1].role.value == "user"
614 assert agent.session.messages[-1].content.startswith("[CONTINUE CURRENT STEP]")
615 assert "index.html" in agent.session.messages[-1].content
616
617
618 @pytest.mark.asyncio
619 async def test_turn_completion_handles_fake_tool_narration_without_reroute(
620 temp_dir: Path,
621 ) -> None:
622 backend = ScriptedBackend()
623 config = non_streaming_config()
624 config.reasoning.completion_check = False
625 agent = Agent(
626 backend=backend,
627 config=config,
628 project_root=temp_dir,
629 )
630 runtime = ConversationRuntime(agent)
631 events = []
632
633 async def capture(event) -> None:
634 events.append(event)
635
636 prepared = await runtime.turn_preparation.prepare(
637 task="Summarize the current test status.",
638 emit=capture,
639 requested_mode="execute",
640 original_task=None,
641 on_user_question=None,
642 )
643 await runtime.phase_tracker.enter(
644 TurnPhase.ASSISTANT,
645 capture,
646 detail="Requesting assistant response",
647 reason_code="request_assistant_response",
648 )
649
650 narrated = "Used bash tool with command `pytest -q` and everything passed."
651 decision = await runtime.turn_completion.handle_text_response(
652 content=narrated,
653 response_content=narrated,
654 task=prepared.task,
655 effective_task=prepared.effective_task,
656 iterations=1,
657 max_iterations=agent.config.max_iterations,
658 actions_taken=[],
659 continuation_count=0,
660 dod=prepared.definition_of_done,
661 emit=capture,
662 summary=prepared.summary,
663 executor=prepared.executor,
664 rollback_plan=prepared.rollback_plan,
665 )
666
667 assert decision.action == TurnCompletionAction.COMPLETE
668 assert prepared.summary.final_response == narrated
669 assert prepared.summary.completion_decision_code == "non_mutating_response_accepted"
670 assert prepared.summary.completion_trace[-1].decision_code == (
671 "non_mutating_response_accepted"
672 )
673 assert not any(
674 "PRETENDING to use tools" in message.content
675 for message in agent.session.messages
676 )
677 assert any(event.type == "response" and event.content == narrated for event in events)
678
679
680 @pytest.mark.asyncio
681 async def test_turn_completion_handles_deflection_text_without_repair_prompt(
682 temp_dir: Path,
683 ) -> None:
684 backend = ScriptedBackend()
685 config = non_streaming_config()
686 config.reasoning.completion_check = False
687 agent = Agent(
688 backend=backend,
689 config=config,
690 project_root=temp_dir,
691 )
692 runtime = ConversationRuntime(agent)
693 events = []
694
695 async def capture(event) -> None:
696 events.append(event)
697
698 prepared = await runtime.turn_preparation.prepare(
699 task="What should I verify next?",
700 emit=capture,
701 requested_mode="execute",
702 original_task=None,
703 on_user_question=None,
704 )
705 await runtime.phase_tracker.enter(
706 TurnPhase.ASSISTANT,
707 capture,
708 detail="Requesting assistant response",
709 reason_code="request_assistant_response",
710 )
711
712 deflection = "You can run pytest -q to verify the current state."
713 decision = await runtime.turn_completion.handle_text_response(
714 content=deflection,
715 response_content=deflection,
716 task=prepared.task,
717 effective_task=prepared.effective_task,
718 iterations=1,
719 max_iterations=agent.config.max_iterations,
720 actions_taken=[],
721 continuation_count=0,
722 dod=prepared.definition_of_done,
723 emit=capture,
724 summary=prepared.summary,
725 executor=prepared.executor,
726 rollback_plan=prepared.rollback_plan,
727 )
728
729 assert decision.action == TurnCompletionAction.COMPLETE
730 assert prepared.summary.final_response == deflection
731 assert prepared.summary.completion_decision_code == "non_mutating_response_accepted"
732 assert prepared.summary.completion_trace[-1].decision_code == (
733 "non_mutating_response_accepted"
734 )
735 assert not any(
736 "Please use your tools to execute the task" in message.content
737 for message in agent.session.messages
738 )
739 assert any(event.type == "response" and event.content == deflection for event in events)
740
741
742 @pytest.mark.asyncio
743 async def test_turn_completion_skips_self_critique_reroute(
744 temp_dir: Path,
745 ) -> None:
746 backend = ScriptedBackend()
747 config = non_streaming_config()
748 config.reasoning.completion_check = False
749 config.reasoning.self_critique = True
750 agent = Agent(
751 backend=backend,
752 config=config,
753 project_root=temp_dir,
754 )
755 runtime = ConversationRuntime(agent)
756 events = []
757
758 async def capture(event) -> None:
759 events.append(event)
760
761 prepared = await runtime.turn_preparation.prepare(
762 task="Explain Loader's clarify loop.",
763 emit=capture,
764 requested_mode="execute",
765 original_task=None,
766 on_user_question=None,
767 )
768 await runtime.phase_tracker.enter(
769 TurnPhase.ASSISTANT,
770 capture,
771 detail="Requesting assistant response",
772 reason_code="request_assistant_response",
773 )
774
775 detailed = (
776 "Loader might begin with a bounded clarify pass, perhaps asking follow-up "
777 "questions when the task leaves touchpoints or decision boundaries unclear. "
778 "It then shifts into execution once the workflow policy is satisfied."
779 )
780 decision = await runtime.turn_completion.handle_text_response(
781 content=detailed,
782 response_content=detailed,
783 task=prepared.task,
784 effective_task=prepared.effective_task,
785 iterations=1,
786 max_iterations=agent.config.max_iterations,
787 actions_taken=[],
788 continuation_count=0,
789 dod=prepared.definition_of_done,
790 emit=capture,
791 summary=prepared.summary,
792 executor=prepared.executor,
793 rollback_plan=prepared.rollback_plan,
794 )
795
796 assert decision.action == TurnCompletionAction.COMPLETE
797 assert prepared.summary.final_response == detailed
798 assert prepared.summary.completion_decision_code == "non_mutating_response_accepted"
799 assert prepared.summary.completion_trace[-1].decision_code == (
800 "non_mutating_response_accepted"
801 )
802 assert not any("[SELF-CRITIQUE]" in message.content for message in agent.session.messages)
803 assert not any(event.type == "critique" for event in events)
804
805
806 @pytest.mark.asyncio
807 async def test_turn_completion_finalizes_when_follow_through_budget_is_exhausted(
808 temp_dir: Path,
809 ) -> None:
810 backend = ScriptedBackend()
811 agent = Agent(
812 backend=backend,
813 config=non_streaming_config(),
814 project_root=temp_dir,
815 )
816 runtime = ConversationRuntime(agent)
817 events = []
818
819 async def capture(event) -> None:
820 events.append(event)
821
822 prepared = await runtime.turn_preparation.prepare(
823 task="Fix the README heading.",
824 emit=capture,
825 requested_mode="execute",
826 original_task=None,
827 on_user_question=None,
828 )
829 await runtime.phase_tracker.enter(
830 TurnPhase.ASSISTANT,
831 capture,
832 detail="Requesting assistant response",
833 reason_code="request_assistant_response",
834 )
835
836 decision = await runtime.turn_completion.handle_text_response(
837 content="I looked into it.",
838 response_content="I looked into it.",
839 task=prepared.task,
840 effective_task=prepared.effective_task,
841 iterations=1,
842 max_iterations=agent.config.max_iterations,
843 actions_taken=[],
844 continuation_count=agent.config.reasoning.max_continuation_prompts,
845 dod=prepared.definition_of_done,
846 emit=capture,
847 summary=prepared.summary,
848 executor=prepared.executor,
849 rollback_plan=prepared.rollback_plan,
850 )
851
852 assert decision.action == TurnCompletionAction.FINALIZE
853 assert decision.finalize_reason_code == "continuation_budget_exhausted"
854 assert prepared.summary.final_response.startswith(
855 "I stopped because I still could not show enough evidence"
856 )
857 assert prepared.summary.completion_decision_code == "continuation_budget_exhausted"
858 assert prepared.summary.failures == [
859 "missing follow-through evidence after continuation budget exhaustion"
860 ]
861 assert prepared.summary.completion_trace[-1].outcome == "finalize"
862 assert prepared.summary.completion_trace[-1].decision_code == (
863 "continuation_budget_exhausted"
864 )
865 assert prepared.summary.completion_trace[-1].evidence_summary == [
866 "showing the requested work was actually carried out"
867 ]
868 assert [item.status for item in prepared.summary.completion_trace[-1].evidence_provenance] == [
869 "missing"
870 ]
871 assert prepared.summary.workflow_timeline[-1].kind == "completion_finalize"
872 assert prepared.summary.workflow_timeline[-1].evidence_summary == [
873 "showing the requested work was actually carried out"
874 ]
875 assert [event.type for event in events[-3:]] == [
876 "completion_check",
877 "error",
878 "response",
879 ]
880
881
882 @pytest.mark.asyncio
883 async def test_turn_completion_uses_observed_verification_for_budget_exhaustion(
884 temp_dir: Path,
885 ) -> None:
886 backend = ScriptedBackend()
887 agent = Agent(
888 backend=backend,
889 config=non_streaming_config(),
890 project_root=temp_dir,
891 )
892 runtime = ConversationRuntime(agent)
893 events = []
894
895 async def capture(event) -> None:
896 events.append(event)
897
898 prepared = await runtime.turn_preparation.prepare(
899 task="Run pytest -q and make sure it works.",
900 emit=capture,
901 requested_mode="execute",
902 original_task=None,
903 on_user_question=None,
904 )
905 prepared.definition_of_done.verification_commands = ["pytest -q"]
906 prepared.definition_of_done.evidence = [
907 VerificationEvidence(
908 command="pytest -q",
909 passed=False,
910 stderr="1 failed",
911 kind="test",
912 )
913 ]
914 prepared.definition_of_done.last_verification_result = "failed"
915 await runtime.phase_tracker.enter(
916 TurnPhase.ASSISTANT,
917 capture,
918 detail="Requesting assistant response",
919 reason_code="request_assistant_response",
920 )
921
922 decision = await runtime.turn_completion.handle_text_response(
923 content="The tests are done.",
924 response_content="The tests are done.",
925 task=prepared.task,
926 effective_task=prepared.effective_task,
927 iterations=1,
928 max_iterations=agent.config.max_iterations,
929 actions_taken=[],
930 continuation_count=agent.config.reasoning.max_continuation_prompts,
931 dod=prepared.definition_of_done,
932 emit=capture,
933 summary=prepared.summary,
934 executor=prepared.executor,
935 rollback_plan=prepared.rollback_plan,
936 )
937
938 assert decision.action == TurnCompletionAction.FINALIZE
939 assert decision.finalize_reason_code == "continuation_budget_exhausted"
940 assert prepared.summary.final_response == (
941 "I stopped because the continuation budget was exhausted and observed "
942 "verification still showed: verification failed for `pytest -q` [1 failed]."
943 )
944 assert prepared.summary.completion_trace[-1].decision_code == (
945 "continuation_budget_exhausted"
946 )
947 assert [
948 item.status
949 for item in prepared.summary.completion_trace[-1].verification_observations
950 ] == [VerificationObservationStatus.FAILED.value]
951 assert [
952 item.summary
953 for item in prepared.summary.completion_trace[-1].verification_observations
954 ] == ["verification failed for `pytest -q`"]
955 assert [
956 item.status
957 for item in prepared.summary.workflow_timeline[-1].verification_observations
958 ] == [VerificationObservationStatus.FAILED.value]