Python · 54409 bytes Raw Blame History
1 """Tests for no-tool text completion orchestration."""
2
3 from __future__ import annotations
4
5 from pathlib import Path
6
7 import pytest
8
9 from loader.agent.loop import Agent, AgentConfig
10 from loader.llm.base import Message, Role
11 from loader.runtime.conversation import ConversationRuntime
12 from loader.runtime.dod import VerificationEvidence
13 from loader.runtime.phases import TurnPhase
14 from loader.runtime.turn_completion import TurnCompletionAction
15 from loader.runtime.verification_observations import VerificationObservationStatus
16 from tests.helpers.runtime_harness import ScriptedBackend
17
18
19 def non_streaming_config() -> AgentConfig:
20 """Shared config for direct turn-completion tests."""
21
22 return AgentConfig(auto_context=False, stream=False, max_iterations=8)
23
24
25 @pytest.mark.asyncio
26 async def test_turn_completion_requests_continuation_for_premature_text_response(
27 temp_dir: Path,
28 ) -> None:
29 backend = ScriptedBackend()
30 agent = Agent(
31 backend=backend,
32 config=non_streaming_config(),
33 project_root=temp_dir,
34 )
35 runtime = ConversationRuntime(agent)
36 events = []
37
38 async def capture(event) -> None:
39 events.append(event)
40
41 prepared = await runtime.turn_preparation.prepare(
42 task="Fix the README heading.",
43 emit=capture,
44 requested_mode="execute",
45 original_task=None,
46 on_user_question=None,
47 )
48 await runtime.phase_tracker.enter(
49 TurnPhase.ASSISTANT,
50 capture,
51 detail="Requesting assistant response",
52 reason_code="request_assistant_response",
53 )
54
55 decision = await runtime.turn_completion.handle_text_response(
56 content="I looked into it.",
57 response_content="I looked into it.",
58 task=prepared.task,
59 effective_task=prepared.effective_task,
60 iterations=1,
61 max_iterations=agent.config.max_iterations,
62 actions_taken=[],
63 continuation_count=0,
64 dod=prepared.definition_of_done,
65 emit=capture,
66 summary=prepared.summary,
67 executor=prepared.executor,
68 rollback_plan=prepared.rollback_plan,
69 )
70
71 assert decision.action == TurnCompletionAction.CONTINUE
72 assert decision.continuation_count == 1
73 assert prepared.summary.completion_decision_code == "premature_completion_nudge"
74 assert prepared.summary.completion_decision_summary == (
75 "requested one continuation because the non-mutating response looked incomplete"
76 )
77 assert agent.session.last_completion_decision_code == "premature_completion_nudge"
78 assert [
79 entry.decision_code for entry in prepared.summary.completion_trace
80 ] == ["premature_completion_nudge"]
81 assert prepared.summary.completion_trace[0].stage == "continuation_check"
82 assert [entry.kind for entry in prepared.summary.workflow_timeline[-1:]] == [
83 "completion_continue"
84 ]
85 assert prepared.summary.workflow_timeline[-1].policy_stage == "continuation_check"
86 assert prepared.summary.workflow_timeline[-1].policy_outcome == "continue"
87 assert agent.session.messages[-1].role.value == "user"
88 assert "concrete evidence" in agent.session.messages[-1].content
89 assert "Carry out the requested change or command now" in agent.session.messages[-1].content
90 assert any(event.type == "completion_check" for event in events)
91
92
93 @pytest.mark.asyncio
94 async def test_turn_completion_marks_non_mutating_response_done(
95 temp_dir: Path,
96 ) -> None:
97 backend = ScriptedBackend()
98 agent = Agent(
99 backend=backend,
100 config=non_streaming_config(),
101 project_root=temp_dir,
102 )
103 runtime = ConversationRuntime(agent)
104 events = []
105
106 async def capture(event) -> None:
107 events.append(event)
108
109 prepared = await runtime.turn_preparation.prepare(
110 task="Explain Loader's clarify loop.",
111 emit=capture,
112 requested_mode="execute",
113 original_task=None,
114 on_user_question=None,
115 )
116 await runtime.phase_tracker.enter(
117 TurnPhase.ASSISTANT,
118 capture,
119 detail="Requesting assistant response",
120 reason_code="request_assistant_response",
121 )
122
123 decision = await runtime.turn_completion.handle_text_response(
124 content="Loader uses a bounded clarify loop before execution.",
125 response_content="Loader uses a bounded clarify loop before execution.",
126 task=prepared.task,
127 effective_task=prepared.effective_task,
128 iterations=1,
129 max_iterations=agent.config.max_iterations,
130 actions_taken=[],
131 continuation_count=0,
132 dod=prepared.definition_of_done,
133 emit=capture,
134 summary=prepared.summary,
135 executor=prepared.executor,
136 rollback_plan=prepared.rollback_plan,
137 )
138
139 assert decision.action == TurnCompletionAction.COMPLETE
140 assert prepared.summary.final_response == (
141 "Loader uses a bounded clarify loop before execution."
142 )
143 assert prepared.summary.completion_decision_code == "non_mutating_response_accepted"
144 assert prepared.summary.completion_decision_summary == (
145 "accepted the response because no mutating work required verification"
146 )
147 assert agent.session.last_completion_decision_code == (
148 "non_mutating_response_accepted"
149 )
150 assert [
151 entry.decision_code for entry in prepared.summary.completion_trace
152 ] == [
153 "completion_response_accepted",
154 "non_mutating_response_accepted",
155 ]
156 policy_entries = [
157 entry
158 for entry in prepared.summary.workflow_timeline
159 if entry.kind.startswith("completion_")
160 ]
161 assert [entry.kind for entry in policy_entries] == [
162 "completion_check",
163 "completion_complete",
164 ]
165 assert policy_entries[0].policy_stage == "continuation_check"
166 assert policy_entries[-1].policy_stage == "definition_of_done"
167 assert [item.summary for item in prepared.summary.completion_trace[-1].evidence_provenance] == [
168 "verification was skipped because no mutating work required checks"
169 ]
170 assert [
171 item.status
172 for item in prepared.summary.completion_trace[-1].verification_observations
173 ] == [VerificationObservationStatus.SKIPPED.value]
174 assert [
175 item.summary
176 for item in prepared.summary.completion_trace[-1].verification_observations
177 ] == ["verification was skipped because no mutating work required checks"]
178 assert [item.status for item in policy_entries[-1].verification_observations] == [
179 VerificationObservationStatus.SKIPPED.value
180 ]
181 assert prepared.definition_of_done.status == "done"
182 assert prepared.definition_of_done.last_verification_result == "skipped"
183 assert any(event.type == "response" for event in events)
184 assert any(
185 event.type == "dod_status" and event.dod_status == "done"
186 for event in events
187 )
188
189
190 @pytest.mark.asyncio
191 async def test_turn_completion_blocks_false_completion_without_preserving_it(
192 temp_dir: Path,
193 ) -> None:
194 backend = ScriptedBackend()
195 agent = Agent(
196 backend=backend,
197 config=non_streaming_config(),
198 project_root=temp_dir,
199 )
200 runtime = ConversationRuntime(agent)
201 events = []
202
203 async def capture(event) -> None:
204 events.append(event)
205
206 prepared = await runtime.turn_preparation.prepare(
207 task=(
208 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
209 "with an index and chapter files."
210 ),
211 emit=capture,
212 requested_mode="execute",
213 original_task=None,
214 on_user_question=None,
215 )
216 await runtime.phase_tracker.enter(
217 TurnPhase.ASSISTANT,
218 capture,
219 detail="Requesting assistant response",
220 reason_code="request_assistant_response",
221 )
222
223 implementation_plan = temp_dir / "implementation.md"
224 implementation_plan.write_text(
225 "# Implementation Plan\n\n"
226 "## File Changes\n\n"
227 "1. Create main index.html file:\n"
228 " - `index.html`\n\n"
229 "2. Create chapter files:\n"
230 " - `chapters/01-getting-started.html`\n"
231 " - `chapters/06-troubleshooting.html`\n"
232 )
233 chapters_dir = temp_dir / "chapters"
234 chapters_dir.mkdir()
235 (chapters_dir / "01-getting-started.html").write_text("<h1>Getting Started</h1>\n")
236 (temp_dir / "index.html").write_text("<h1>NGINX Guide</h1>\n")
237
238 prepared.definition_of_done.implementation_plan = str(implementation_plan)
239 prepared.definition_of_done.mutating_actions.append("write")
240 prepared.definition_of_done.touched_files.extend(
241 [
242 str(temp_dir / "index.html"),
243 str(chapters_dir / "01-getting-started.html"),
244 ]
245 )
246
247 queued_messages: list[str] = []
248 runtime.context.queue_steering_message_callback = queued_messages.append
249
250 completion_claim = (
251 "I've successfully completed the NGINX guide with all planned files "
252 "and verified everything is done."
253 )
254 decision = await runtime.turn_completion.handle_text_response(
255 content=completion_claim,
256 response_content=completion_claim,
257 task=prepared.task,
258 effective_task=prepared.effective_task,
259 iterations=1,
260 max_iterations=agent.config.max_iterations,
261 actions_taken=[],
262 continuation_count=0,
263 dod=prepared.definition_of_done,
264 emit=capture,
265 summary=prepared.summary,
266 executor=prepared.executor,
267 rollback_plan=prepared.rollback_plan,
268 )
269
270 assert decision.action == TurnCompletionAction.CONTINUE
271 assert prepared.summary.assistant_messages == []
272 assert not any(
273 message.role.value == "assistant" and message.content == completion_claim
274 for message in agent.session.messages
275 )
276 assert agent.session.messages[-1].role.value == "user"
277 assert agent.session.messages[-1].content.startswith(
278 "[PLANNED ARTIFACTS STILL MISSING]"
279 )
280 assert "`06-troubleshooting.html`" in agent.session.messages[-1].content
281 assert queued_messages
282 assert "06-troubleshooting.html" in queued_messages[-1]
283 assert "Do not summarize, mark completion, or write bookkeeping notes yet" in queued_messages[-1]
284 assert not any(event.type == "response" for event in events)
285
286
287 @pytest.mark.asyncio
288 async def test_turn_completion_interrupts_progress_intent_once_output_files_exist(
289 temp_dir: Path,
290 ) -> None:
291 backend = ScriptedBackend()
292 agent = Agent(
293 backend=backend,
294 config=non_streaming_config(),
295 project_root=temp_dir,
296 )
297 runtime = ConversationRuntime(agent)
298 events = []
299
300 async def capture(event) -> None:
301 events.append(event)
302
303 prepared = await runtime.turn_preparation.prepare(
304 task=(
305 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
306 "with an index and chapter files."
307 ),
308 emit=capture,
309 requested_mode="execute",
310 original_task=None,
311 on_user_question=None,
312 )
313 await runtime.phase_tracker.enter(
314 TurnPhase.ASSISTANT,
315 capture,
316 detail="Requesting assistant response",
317 reason_code="request_assistant_response",
318 )
319
320 implementation_plan = temp_dir / "implementation.md"
321 implementation_plan.write_text(
322 "# Implementation Plan\n\n"
323 "## File Changes\n\n"
324 "1. Create main index.html file:\n"
325 f" - `{temp_dir / 'index.html'}`\n\n"
326 "2. Create chapter files:\n"
327 f" - `{temp_dir / 'chapters' / '01-introduction.html'}`\n"
328 f" - `{temp_dir / 'chapters' / '02-installation.html'}`\n"
329 )
330 chapters_dir = temp_dir / "chapters"
331 chapters_dir.mkdir()
332 (temp_dir / "index.html").write_text("<h1>NGINX Guide</h1>\n")
333 (chapters_dir / "01-introduction.html").write_text("<h1>Intro</h1>\n")
334
335 prepared.definition_of_done.implementation_plan = str(implementation_plan)
336 prepared.definition_of_done.mutating_actions.append("write")
337 prepared.definition_of_done.touched_files.extend(
338 [
339 str(temp_dir / "index.html"),
340 str(chapters_dir / "01-introduction.html"),
341 ]
342 )
343 prepared.definition_of_done.pending_items.append("Create chapter files for nginx guide")
344
345 content = "Now I'll create the second chapter file for the nginx guide."
346 decision = await runtime.turn_completion.handle_text_response(
347 content=content,
348 response_content=content,
349 task=prepared.task,
350 effective_task=prepared.effective_task,
351 iterations=1,
352 max_iterations=agent.config.max_iterations,
353 actions_taken=[],
354 continuation_count=0,
355 dod=prepared.definition_of_done,
356 emit=capture,
357 summary=prepared.summary,
358 executor=prepared.executor,
359 rollback_plan=prepared.rollback_plan,
360 )
361
362 assert decision.action == TurnCompletionAction.CONTINUE
363 assert decision.continuation_count == 1
364 assert prepared.summary.completion_decision_code == "in_progress_transition_continue"
365 assert prepared.summary.assistant_messages[-1].content == content
366 assert agent.session.messages[-1].role.value == "user"
367 assert agent.session.messages[-1].content.startswith("[CONTINUE CURRENT STEP]")
368 assert "02-installation.html" in agent.session.messages[-1].content
369 assert not any(
370 message.role.value == "user"
371 and message.content.startswith("[PLANNED ARTIFACTS STILL MISSING]")
372 for message in agent.session.messages
373 )
374
375
376 @pytest.mark.asyncio
377 async def test_turn_completion_uses_quality_repair_prompt_for_rewrite_narration(
378 temp_dir: Path,
379 ) -> None:
380 backend = ScriptedBackend()
381 config = non_streaming_config()
382 config.reasoning.completion_check = False
383 agent = Agent(
384 backend=backend,
385 config=config,
386 project_root=temp_dir,
387 )
388 runtime = ConversationRuntime(agent)
389 events = []
390
391 async def capture(event) -> None:
392 events.append(event)
393
394 prepared = await runtime.turn_preparation.prepare(
395 task="Create an equally thorough HTML guide.",
396 emit=capture,
397 requested_mode="execute",
398 original_task=None,
399 on_user_question=None,
400 )
401 await runtime.phase_tracker.enter(
402 TurnPhase.ASSISTANT,
403 capture,
404 detail="Requesting assistant response",
405 reason_code="request_assistant_response",
406 )
407
408 chapter = temp_dir / "guides" / "nginx" / "chapters" / "01-introduction.html"
409 chapter.parent.mkdir(parents=True)
410 chapter.write_text("<html><body><h1>Intro</h1></body></html>\n")
411 prepared.definition_of_done.touched_files.append(str(chapter))
412 prepared.definition_of_done.mutating_actions.append("write")
413 agent.session.append(
414 Message(
415 role=Role.USER,
416 content=(
417 "Repair focus:\n"
418 f"- Improve `{chapter}`: insufficient structured content "
419 "(12 blocks, expected at least 18).\n"
420 f"- Immediate next step: edit `{chapter}` with a substantial "
421 "expansion or replacement that satisfies its listed quality issue.\n"
422 ),
423 )
424 )
425
426 content = (
427 "Let me try a different approach by rewriting the entire file with more "
428 "comprehensive content:"
429 )
430 decision = await runtime.turn_completion.handle_text_response(
431 content=content,
432 response_content=content,
433 task=prepared.task,
434 effective_task=prepared.effective_task,
435 iterations=1,
436 max_iterations=agent.config.max_iterations,
437 actions_taken=[],
438 continuation_count=0,
439 dod=prepared.definition_of_done,
440 emit=capture,
441 summary=prepared.summary,
442 executor=prepared.executor,
443 rollback_plan=prepared.rollback_plan,
444 )
445
446 assert decision.action == TurnCompletionAction.CONTINUE
447 assert prepared.summary.completion_decision_code == "in_progress_transition_continue"
448 assert agent.session.messages[-1].role.value == "user"
449 assert agent.session.messages[-1].content.startswith("[CONTINUE QUALITY REPAIR]")
450 assert str(chapter.resolve(strict=False)) in agent.session.messages[-1].content
451 assert (
452 "one concrete `patch`, `edit`, or `write` tool call"
453 in agent.session.messages[-1].content
454 )
455 assert "Do not rewrite the whole file from memory" in agent.session.messages[-1].content
456
457
458 @pytest.mark.asyncio
459 async def test_turn_completion_uses_exact_anchor_after_stale_quality_repair_context(
460 temp_dir: Path,
461 ) -> None:
462 backend = ScriptedBackend()
463 config = non_streaming_config()
464 config.reasoning.completion_check = False
465 agent = Agent(
466 backend=backend,
467 config=config,
468 project_root=temp_dir,
469 )
470 runtime = ConversationRuntime(agent)
471 events = []
472
473 async def capture(event) -> None:
474 events.append(event)
475
476 prepared = await runtime.turn_preparation.prepare(
477 task="Create an equally thorough HTML guide.",
478 emit=capture,
479 requested_mode="execute",
480 original_task=None,
481 on_user_question=None,
482 )
483 await runtime.phase_tracker.enter(
484 TurnPhase.ASSISTANT,
485 capture,
486 detail="Requesting assistant response",
487 reason_code="request_assistant_response",
488 )
489
490 chapter = temp_dir / "guides" / "nginx" / "chapters" / "05-load-balancing.html"
491 chapter.parent.mkdir(parents=True)
492 chapter.write_text("<html><body><h1>Load Balancing</h1></body></html>\n")
493 prepared.definition_of_done.touched_files.append(str(chapter))
494 prepared.definition_of_done.mutating_actions.append("edit")
495 agent.session.append(
496 Message(
497 role=Role.USER,
498 content=(
499 "Repair focus:\n"
500 f"- Improve `{chapter}`: thin content "
501 "(846 text chars, expected at least 1758).\n"
502 f"- Immediate next step: edit `{chapter}`.\n"
503 ),
504 )
505 )
506 agent.session.append(
507 Message(
508 role=Role.TOOL,
509 content=(
510 "Observation [edit]: Error: Failed to complete the operation after "
511 f"2 attempts for {chapter}. old_string not found in file."
512 ),
513 )
514 )
515
516 content = "I'll rewrite the load balancing chapter with comprehensive content."
517 decision = await runtime.turn_completion.handle_text_response(
518 content=content,
519 response_content=content,
520 task=prepared.task,
521 effective_task=prepared.effective_task,
522 iterations=1,
523 max_iterations=agent.config.max_iterations,
524 actions_taken=[],
525 continuation_count=0,
526 dod=prepared.definition_of_done,
527 emit=capture,
528 summary=prepared.summary,
529 executor=prepared.executor,
530 rollback_plan=prepared.rollback_plan,
531 )
532
533 assert decision.action == TurnCompletionAction.CONTINUE
534 message = agent.session.messages[-1].content
535 assert message.startswith("[CONTINUE QUALITY REPAIR]")
536 assert "exactly one `edit(file_path=..., old_string=..., new_string=...)`" in message
537 assert "Use this exact `old_string` value from the current file" in message
538 assert "```html\n</body></html>\n```" in message
539 assert "Do not call `read`, `patch`, `write`, `TodoWrite`, or summarize." in message
540
541
542 @pytest.mark.asyncio
543 async def test_turn_completion_forces_write_for_structural_html_repair(
544 temp_dir: Path,
545 ) -> None:
546 backend = ScriptedBackend()
547 config = non_streaming_config()
548 config.reasoning.completion_check = False
549 agent = Agent(
550 backend=backend,
551 config=config,
552 project_root=temp_dir,
553 )
554 runtime = ConversationRuntime(agent)
555 events = []
556
557 async def capture(event) -> None:
558 events.append(event)
559
560 prepared = await runtime.turn_preparation.prepare(
561 task="Create an equally thorough HTML guide.",
562 emit=capture,
563 requested_mode="execute",
564 original_task=None,
565 on_user_question=None,
566 )
567 await runtime.phase_tracker.enter(
568 TurnPhase.ASSISTANT,
569 capture,
570 detail="Requesting assistant response",
571 reason_code="request_assistant_response",
572 )
573
574 chapter = temp_dir / "guides" / "nginx" / "chapters" / "08-troubleshooting.html"
575 chapter.parent.mkdir(parents=True)
576 chapter.write_text(
577 "<!DOCTYPE html><html><body><h1>Troubleshooting</h1></body></html>\n"
578 "<p>Trailing content.</p>\n"
579 )
580 prepared.definition_of_done.touched_files.append(str(chapter))
581 agent.session.append(
582 Message(
583 role=Role.USER,
584 content=(
585 "Repair focus:\n"
586 f"- Improve `{chapter}`: expected exactly one closing </html> tag (found 2).\n"
587 f"- Immediate next step: replace `{chapter}` with one complete valid HTML document.\n"
588 ),
589 )
590 )
591
592 content = "I will fix the malformed troubleshooting HTML structure."
593 decision = await runtime.turn_completion.handle_text_response(
594 content=content,
595 response_content=content,
596 task=prepared.task,
597 effective_task=prepared.effective_task,
598 iterations=1,
599 max_iterations=agent.config.max_iterations,
600 actions_taken=[],
601 continuation_count=0,
602 dod=prepared.definition_of_done,
603 emit=capture,
604 summary=prepared.summary,
605 executor=prepared.executor,
606 rollback_plan=prepared.rollback_plan,
607 )
608
609 assert decision.action == TurnCompletionAction.CONTINUE
610 message = agent.session.messages[-1].content
611 assert message.startswith("[CONTINUE QUALITY REPAIR]")
612 assert "malformed HTML document structure" in message
613 assert "expected exactly one closing </html>" in message
614 assert "exactly one closing `</body>` tag" in message
615 assert "exactly one `write(file_path=..., content=...)`" in message
616
617
618 @pytest.mark.asyncio
619 async def test_turn_completion_continues_queued_quality_repair_after_summary(
620 temp_dir: Path,
621 ) -> None:
622 backend = ScriptedBackend()
623 config = non_streaming_config()
624 config.reasoning.completion_check = False
625 agent = Agent(
626 backend=backend,
627 config=config,
628 project_root=temp_dir,
629 )
630 runtime = ConversationRuntime(agent)
631 events = []
632
633 async def capture(event) -> None:
634 events.append(event)
635
636 prepared = await runtime.turn_preparation.prepare(
637 task="Repair generated HTML guide quality.",
638 emit=capture,
639 requested_mode="execute",
640 original_task=None,
641 on_user_question=None,
642 )
643 await runtime.phase_tracker.enter(
644 TurnPhase.ASSISTANT,
645 capture,
646 detail="Requesting assistant response",
647 reason_code="request_assistant_response",
648 )
649
650 first = temp_dir / "guides" / "nginx" / "chapters" / "01-introduction.html"
651 second = temp_dir / "guides" / "nginx" / "chapters" / "02-installation.html"
652 second.parent.mkdir(parents=True)
653 first.write_text("<html><body><h1>Intro</h1></body></html>\n")
654 second.write_text("<html><body><h1>Install</h1></body></html>\n")
655 prepared.definition_of_done.touched_files.extend(
656 [
657 str(first),
658 str(second),
659 ]
660 )
661 prepared.definition_of_done.mutating_actions.append("edit")
662 agent.session.append(
663 Message(
664 role=Role.USER,
665 content=(
666 "The active HTML content-quality repair target was updated. "
667 f"Continue directly with the next listed quality target `{second}` "
668 "using one substantial write/edit/patch anchored to current content.\n\n"
669 "Repair focus:\n"
670 f"- Improve `{second}`: thin content (513 text chars, expected at least 1758).\n"
671 f"- Immediate next step: edit `{second}`.\n"
672 "- Continue with one concrete `edit`, `patch`, or `write` call that actually changes the current generated file."
673 ),
674 )
675 )
676
677 content = (
678 "I've expanded the introduction chapter, so it should now meet the "
679 "minimum quality threshold."
680 )
681 decision = await runtime.turn_completion.handle_text_response(
682 content=content,
683 response_content=content,
684 task=prepared.task,
685 effective_task=prepared.effective_task,
686 iterations=1,
687 max_iterations=agent.config.max_iterations,
688 actions_taken=[],
689 continuation_count=0,
690 dod=prepared.definition_of_done,
691 emit=capture,
692 summary=prepared.summary,
693 executor=prepared.executor,
694 rollback_plan=prepared.rollback_plan,
695 )
696
697 assert decision.action == TurnCompletionAction.CONTINUE
698 assert prepared.summary.completion_decision_code == "pending_quality_repair_continue"
699 assert agent.session.messages[-1].role.value == "user"
700 assert agent.session.messages[-1].content.startswith("[CONTINUE QUALITY REPAIR]")
701 assert str(second.resolve(strict=False)) in agent.session.messages[-1].content
702 assert "one concrete `patch`, `edit`, or `write` tool call" in agent.session.messages[-1].content
703
704
705 @pytest.mark.asyncio
706 async def test_turn_completion_allows_first_progress_narration_before_any_output_exists(
707 temp_dir: Path,
708 ) -> None:
709 backend = ScriptedBackend()
710 config = non_streaming_config()
711 config.reasoning.completion_check = False
712 agent = Agent(
713 backend=backend,
714 config=config,
715 project_root=temp_dir,
716 )
717 runtime = ConversationRuntime(agent)
718 events = []
719
720 async def capture(event) -> None:
721 events.append(event)
722
723 prepared = await runtime.turn_preparation.prepare(
724 task=(
725 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
726 "with an index and chapter files."
727 ),
728 emit=capture,
729 requested_mode="execute",
730 original_task=None,
731 on_user_question=None,
732 )
733 await runtime.phase_tracker.enter(
734 TurnPhase.ASSISTANT,
735 capture,
736 detail="Requesting assistant response",
737 reason_code="request_assistant_response",
738 )
739
740 implementation_plan = temp_dir / "implementation.md"
741 implementation_plan.write_text(
742 "# Implementation Plan\n\n"
743 "## File Changes\n\n"
744 f"- `{temp_dir / 'index.html'}`\n"
745 f"- `{temp_dir / 'chapters' / '01-introduction.html'}`\n"
746 )
747
748 prepared.definition_of_done.implementation_plan = str(implementation_plan)
749 prepared.definition_of_done.pending_items.append(
750 "Develop the main index.html file for nginx guide"
751 )
752
753 content = "Now I'll create the main index.html file for the nginx guide."
754 decision = await runtime.turn_completion.handle_text_response(
755 content=content,
756 response_content=content,
757 task=prepared.task,
758 effective_task=prepared.effective_task,
759 iterations=1,
760 max_iterations=agent.config.max_iterations,
761 actions_taken=[],
762 continuation_count=0,
763 dod=prepared.definition_of_done,
764 emit=capture,
765 summary=prepared.summary,
766 executor=prepared.executor,
767 rollback_plan=prepared.rollback_plan,
768 )
769
770 assert decision.action == TurnCompletionAction.CONTINUE
771 assert decision.continuation_count == 1
772 assert prepared.summary.assistant_messages[-1].content == content
773 assert agent.session.messages[-1].role.value == "assistant"
774
775
776 @pytest.mark.asyncio
777 async def test_turn_completion_interrupts_repeated_concrete_progress_narration(
778 temp_dir: Path,
779 ) -> None:
780 backend = ScriptedBackend()
781 config = non_streaming_config()
782 config.reasoning.completion_check = False
783 agent = Agent(
784 backend=backend,
785 config=config,
786 project_root=temp_dir,
787 )
788 runtime = ConversationRuntime(agent)
789 events = []
790
791 async def capture(event) -> None:
792 events.append(event)
793
794 prepared = await runtime.turn_preparation.prepare(
795 task=(
796 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
797 "with an index and chapter files."
798 ),
799 emit=capture,
800 requested_mode="execute",
801 original_task=None,
802 on_user_question=None,
803 )
804 await runtime.phase_tracker.enter(
805 TurnPhase.ASSISTANT,
806 capture,
807 detail="Requesting assistant response",
808 reason_code="request_assistant_response",
809 )
810
811 implementation_plan = temp_dir / "implementation.md"
812 implementation_plan.write_text(
813 "# Implementation Plan\n\n"
814 "## File Changes\n\n"
815 "1. Create main index.html file:\n"
816 f" - `{temp_dir / 'index.html'}`\n\n"
817 "2. Create chapter files:\n"
818 f" - `{temp_dir / 'chapters' / '01-introduction.html'}`\n"
819 f" - `{temp_dir / 'chapters' / '02-installation.html'}`\n"
820 )
821 chapters_dir = temp_dir / "chapters"
822 chapters_dir.mkdir()
823 (temp_dir / "index.html").write_text("<h1>NGINX Guide</h1>\n")
824 (chapters_dir / "01-introduction.html").write_text("<h1>Intro</h1>\n")
825
826 prepared.definition_of_done.implementation_plan = str(implementation_plan)
827 prepared.definition_of_done.mutating_actions.append("write")
828 prepared.definition_of_done.touched_files.extend(
829 [
830 str(temp_dir / "index.html"),
831 str(chapters_dir / "01-introduction.html"),
832 ]
833 )
834 prepared.definition_of_done.pending_items.append("Create chapter files for nginx guide")
835
836 content = "Now I'll create the second chapter file for the nginx guide."
837 decision = await runtime.turn_completion.handle_text_response(
838 content=content,
839 response_content=content,
840 task=prepared.task,
841 effective_task=prepared.effective_task,
842 iterations=1,
843 max_iterations=agent.config.max_iterations,
844 actions_taken=[],
845 continuation_count=1,
846 dod=prepared.definition_of_done,
847 emit=capture,
848 summary=prepared.summary,
849 executor=prepared.executor,
850 rollback_plan=prepared.rollback_plan,
851 )
852
853 assert decision.action == TurnCompletionAction.CONTINUE
854 assert decision.continuation_count == 2
855 assert agent.session.messages[-1].role.value == "user"
856 assert agent.session.messages[-1].content.startswith("[CONTINUE CURRENT STEP]")
857 assert "02-installation.html" in agent.session.messages[-1].content
858
859
860 @pytest.mark.asyncio
861 async def test_turn_completion_prioritizes_missing_artifact_continuation_over_text_loop(
862 temp_dir: Path,
863 ) -> None:
864 backend = ScriptedBackend()
865 config = non_streaming_config()
866 config.reasoning.completion_check = False
867 agent = Agent(
868 backend=backend,
869 config=config,
870 project_root=temp_dir,
871 )
872 runtime = ConversationRuntime(agent)
873 events = []
874
875 async def capture(event) -> None:
876 events.append(event)
877
878 prepared = await runtime.turn_preparation.prepare(
879 task=(
880 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
881 "with an index and chapter files."
882 ),
883 emit=capture,
884 requested_mode="execute",
885 original_task=None,
886 on_user_question=None,
887 )
888 await runtime.phase_tracker.enter(
889 TurnPhase.ASSISTANT,
890 capture,
891 detail="Requesting assistant response",
892 reason_code="request_assistant_response",
893 )
894
895 implementation_plan = temp_dir / "implementation.md"
896 implementation_plan.write_text(
897 "# Implementation Plan\n\n"
898 "## File Changes\n\n"
899 "1. Create main index.html file:\n"
900 f" - `{temp_dir / 'index.html'}`\n\n"
901 "2. Create chapter files:\n"
902 f" - `{temp_dir / 'chapters' / '01-introduction.html'}`\n"
903 f" - `{temp_dir / 'chapters' / '02-installation.html'}`\n"
904 )
905 chapters_dir = temp_dir / "chapters"
906 chapters_dir.mkdir()
907 (temp_dir / "index.html").write_text("<h1>NGINX Guide</h1>\n")
908 (chapters_dir / "01-introduction.html").write_text("<h1>Intro</h1>\n")
909
910 prepared.definition_of_done.implementation_plan = str(implementation_plan)
911 prepared.definition_of_done.mutating_actions.append("write")
912 prepared.definition_of_done.touched_files.extend(
913 [
914 str(temp_dir / "index.html"),
915 str(chapters_dir / "01-introduction.html"),
916 ]
917 )
918 prepared.definition_of_done.pending_items.append("Create chapter files for nginx guide")
919
920 content = "Let me continue creating the remaining chapter files for the nginx guide:"
921 runtime.context.safeguards.record_response(content)
922 runtime.context.safeguards.record_response(content)
923
924 decision = await runtime.turn_completion.handle_text_response(
925 content=content,
926 response_content=content,
927 task=prepared.task,
928 effective_task=prepared.effective_task,
929 iterations=1,
930 max_iterations=agent.config.max_iterations,
931 actions_taken=[],
932 continuation_count=2,
933 dod=prepared.definition_of_done,
934 emit=capture,
935 summary=prepared.summary,
936 executor=prepared.executor,
937 rollback_plan=prepared.rollback_plan,
938 )
939
940 assert decision.action == TurnCompletionAction.CONTINUE
941 assert prepared.summary.completion_decision_code == "in_progress_transition_continue"
942 assert agent.session.messages[-1].role.value == "user"
943 assert agent.session.messages[-1].content.startswith("[CONTINUE CURRENT STEP]")
944 assert "02-installation.html" in agent.session.messages[-1].content
945 assert not prepared.summary.final_response
946 assert not any(event.type == "error" and "Text loop detected" in event.content for event in events)
947
948
949 @pytest.mark.asyncio
950 async def test_turn_completion_interrupts_first_narration_after_concrete_target_prompt(
951 temp_dir: Path,
952 ) -> None:
953 backend = ScriptedBackend()
954 config = non_streaming_config()
955 config.reasoning.completion_check = False
956 agent = Agent(
957 backend=backend,
958 config=config,
959 project_root=temp_dir,
960 )
961 runtime = ConversationRuntime(agent)
962 events = []
963
964 async def capture(event) -> None:
965 events.append(event)
966
967 prepared = await runtime.turn_preparation.prepare(
968 task=(
969 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
970 "with an index and chapter files."
971 ),
972 emit=capture,
973 requested_mode="execute",
974 original_task=None,
975 on_user_question=None,
976 )
977 await runtime.phase_tracker.enter(
978 TurnPhase.ASSISTANT,
979 capture,
980 detail="Requesting assistant response",
981 reason_code="request_assistant_response",
982 )
983
984 implementation_plan = temp_dir / "implementation.md"
985 implementation_plan.write_text(
986 "# Implementation Plan\n\n"
987 "## File Changes\n\n"
988 f"- `{temp_dir / 'index.html'}`\n"
989 f"- `{temp_dir / 'chapters' / '01-introduction.html'}`\n"
990 )
991 chapters_dir = temp_dir / "chapters"
992 chapters_dir.mkdir()
993
994 prepared.definition_of_done.implementation_plan = str(implementation_plan)
995 prepared.definition_of_done.pending_items.append(
996 "Develop the main index.html file for nginx guide"
997 )
998
999 agent.session.append(
1000 Message(
1001 role=Role.USER,
1002 content=(
1003 "[USER INTERRUPTION]: Directory setup is complete. Continue with the next pending item: "
1004 "`Develop the main index.html file for nginx guide`. Resume by creating `index.html` now. "
1005 f"Prefer one `write` call for `{(temp_dir / 'index.html').resolve(strict=False)}` instead of more rereads. "
1006 "Make your next response the concrete mutation tool call itself, not another bookkeeping-only turn."
1007 ),
1008 )
1009 )
1010
1011 content = "Now I'll create the main index.html file for the nginx guide."
1012 decision = await runtime.turn_completion.handle_text_response(
1013 content=content,
1014 response_content=content,
1015 task=prepared.task,
1016 effective_task=prepared.effective_task,
1017 iterations=1,
1018 max_iterations=agent.config.max_iterations,
1019 actions_taken=[],
1020 continuation_count=0,
1021 dod=prepared.definition_of_done,
1022 emit=capture,
1023 summary=prepared.summary,
1024 executor=prepared.executor,
1025 rollback_plan=prepared.rollback_plan,
1026 )
1027
1028 assert decision.action == TurnCompletionAction.CONTINUE
1029 assert decision.continuation_count == 1
1030 assert prepared.summary.assistant_messages[-1].content == content
1031 assert agent.session.messages[-1].role.value == "user"
1032 assert agent.session.messages[-1].content.startswith("[CONTINUE CURRENT STEP]")
1033 assert "index.html" in agent.session.messages[-1].content
1034
1035
1036 @pytest.mark.asyncio
1037 async def test_turn_completion_first_chapter_continuation_allows_compact_initial_version(
1038 temp_dir: Path,
1039 ) -> None:
1040 backend = ScriptedBackend()
1041 config = non_streaming_config()
1042 config.reasoning.completion_check = False
1043 agent = Agent(
1044 backend=backend,
1045 config=config,
1046 project_root=temp_dir,
1047 )
1048 runtime = ConversationRuntime(agent)
1049 events = []
1050
1051 async def capture(event) -> None:
1052 events.append(event)
1053
1054 prepared = await runtime.turn_preparation.prepare(
1055 task=(
1056 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
1057 "with an index and chapter files."
1058 ),
1059 emit=capture,
1060 requested_mode="execute",
1061 original_task=None,
1062 on_user_question=None,
1063 )
1064 await runtime.phase_tracker.enter(
1065 TurnPhase.ASSISTANT,
1066 capture,
1067 detail="Requesting assistant response",
1068 reason_code="request_assistant_response",
1069 )
1070
1071 chapters_dir = temp_dir / "chapters"
1072 chapters_dir.mkdir()
1073 index_path = temp_dir / "index.html"
1074 index_path.write_text("<html></html>\n")
1075
1076 implementation_plan = temp_dir / "implementation.md"
1077 implementation_plan.write_text(
1078 "# Implementation Plan\n\n"
1079 "## File Changes\n\n"
1080 f"- `{index_path}`\n"
1081 f"- `{chapters_dir / '01-introduction.html'}`\n"
1082 )
1083
1084 prepared.definition_of_done.implementation_plan = str(implementation_plan)
1085 prepared.definition_of_done.touched_files.append(str(index_path))
1086 prepared.definition_of_done.pending_items.append("Create chapter files for nginx guide")
1087
1088 content = "Now I'll create the first chapter of the nginx guide."
1089 decision = await runtime.turn_completion.handle_text_response(
1090 content=content,
1091 response_content=content,
1092 task=prepared.task,
1093 effective_task=prepared.effective_task,
1094 iterations=1,
1095 max_iterations=agent.config.max_iterations,
1096 actions_taken=[],
1097 continuation_count=1,
1098 dod=prepared.definition_of_done,
1099 emit=capture,
1100 summary=prepared.summary,
1101 executor=prepared.executor,
1102 rollback_plan=prepared.rollback_plan,
1103 )
1104
1105 assert decision.action == TurnCompletionAction.CONTINUE
1106 assert decision.continuation_count == 2
1107 assert agent.session.messages[-1].role.value == "user"
1108 assert agent.session.messages[-1].content.startswith("[CONTINUE CURRENT STEP]")
1109 assert "01-introduction.html" in agent.session.messages[-1].content
1110 assert "write a compact but real initial version of that file now" in agent.session.messages[-1].content.lower()
1111
1112
1113 @pytest.mark.asyncio
1114 async def test_turn_completion_interrupts_first_chapter_narration_from_declared_index_graph(
1115 temp_dir: Path,
1116 ) -> None:
1117 backend = ScriptedBackend()
1118 config = non_streaming_config()
1119 config.reasoning.completion_check = False
1120 agent = Agent(
1121 backend=backend,
1122 config=config,
1123 project_root=temp_dir,
1124 )
1125 runtime = ConversationRuntime(agent)
1126 events = []
1127
1128 async def capture(event) -> None:
1129 events.append(event)
1130
1131 prepared = await runtime.turn_preparation.prepare(
1132 task=(
1133 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
1134 "with an index and chapter files."
1135 ),
1136 emit=capture,
1137 requested_mode="execute",
1138 original_task=None,
1139 on_user_question=None,
1140 )
1141 await runtime.phase_tracker.enter(
1142 TurnPhase.ASSISTANT,
1143 capture,
1144 detail="Requesting assistant response",
1145 reason_code="request_assistant_response",
1146 )
1147
1148 guide_root = temp_dir / "Loader" / "guides" / "nginx"
1149 chapters_dir = guide_root / "chapters"
1150 chapters_dir.mkdir(parents=True)
1151 index_path = guide_root / "index.html"
1152 index_path.write_text(
1153 "\n".join(
1154 [
1155 "<!DOCTYPE html>",
1156 '<a href="chapters/01-introduction.html">Chapter 1: Introduction to Nginx</a>',
1157 '<a href="chapters/02-installation.html">Chapter 2: Installation and Setup</a>',
1158 "",
1159 ]
1160 )
1161 )
1162
1163 implementation_plan = temp_dir / "implementation.md"
1164 implementation_plan.write_text(
1165 "# Implementation Plan\n\n"
1166 "## File Changes\n\n"
1167 f"- `{index_path}`\n"
1168 f"- `{chapters_dir}/`\n"
1169 )
1170
1171 prepared.definition_of_done.implementation_plan = str(implementation_plan)
1172 prepared.definition_of_done.touched_files.append(str(index_path))
1173 prepared.definition_of_done.mutating_actions.append("write")
1174 prepared.definition_of_done.pending_items.append(
1175 "Develop the nginx guide content following the same structure and cadence as the fortran guide"
1176 )
1177
1178 content = "Now I'll create the first chapter of the nginx guide."
1179 decision = await runtime.turn_completion.handle_text_response(
1180 content=content,
1181 response_content=content,
1182 task=prepared.task,
1183 effective_task=prepared.effective_task,
1184 iterations=1,
1185 max_iterations=agent.config.max_iterations,
1186 actions_taken=[],
1187 continuation_count=0,
1188 dod=prepared.definition_of_done,
1189 emit=capture,
1190 summary=prepared.summary,
1191 executor=prepared.executor,
1192 rollback_plan=prepared.rollback_plan,
1193 )
1194
1195 assert decision.action == TurnCompletionAction.CONTINUE
1196 assert decision.continuation_count == 1
1197 assert prepared.summary.completion_decision_code == "in_progress_transition_continue"
1198 assert agent.session.messages[-1].role.value == "user"
1199 assert agent.session.messages[-1].content.startswith("[CONTINUE CURRENT STEP]")
1200 assert "01-introduction.html" in agent.session.messages[-1].content
1201
1202
1203 @pytest.mark.asyncio
1204 async def test_turn_completion_handles_fake_tool_narration_without_reroute(
1205 temp_dir: Path,
1206 ) -> None:
1207 backend = ScriptedBackend()
1208 config = non_streaming_config()
1209 config.reasoning.completion_check = False
1210 agent = Agent(
1211 backend=backend,
1212 config=config,
1213 project_root=temp_dir,
1214 )
1215 runtime = ConversationRuntime(agent)
1216 events = []
1217
1218 async def capture(event) -> None:
1219 events.append(event)
1220
1221 prepared = await runtime.turn_preparation.prepare(
1222 task="Summarize the current test status.",
1223 emit=capture,
1224 requested_mode="execute",
1225 original_task=None,
1226 on_user_question=None,
1227 )
1228 await runtime.phase_tracker.enter(
1229 TurnPhase.ASSISTANT,
1230 capture,
1231 detail="Requesting assistant response",
1232 reason_code="request_assistant_response",
1233 )
1234
1235 narrated = "Used bash tool with command `pytest -q` and everything passed."
1236 decision = await runtime.turn_completion.handle_text_response(
1237 content=narrated,
1238 response_content=narrated,
1239 task=prepared.task,
1240 effective_task=prepared.effective_task,
1241 iterations=1,
1242 max_iterations=agent.config.max_iterations,
1243 actions_taken=[],
1244 continuation_count=0,
1245 dod=prepared.definition_of_done,
1246 emit=capture,
1247 summary=prepared.summary,
1248 executor=prepared.executor,
1249 rollback_plan=prepared.rollback_plan,
1250 )
1251
1252 assert decision.action == TurnCompletionAction.COMPLETE
1253 assert prepared.summary.final_response == narrated
1254 assert prepared.summary.completion_decision_code == "non_mutating_response_accepted"
1255 assert prepared.summary.completion_trace[-1].decision_code == (
1256 "non_mutating_response_accepted"
1257 )
1258 assert not any(
1259 "PRETENDING to use tools" in message.content
1260 for message in agent.session.messages
1261 )
1262 assert any(event.type == "response" and event.content == narrated for event in events)
1263
1264
1265 @pytest.mark.asyncio
1266 async def test_turn_completion_handles_deflection_text_without_repair_prompt(
1267 temp_dir: Path,
1268 ) -> None:
1269 backend = ScriptedBackend()
1270 config = non_streaming_config()
1271 config.reasoning.completion_check = False
1272 agent = Agent(
1273 backend=backend,
1274 config=config,
1275 project_root=temp_dir,
1276 )
1277 runtime = ConversationRuntime(agent)
1278 events = []
1279
1280 async def capture(event) -> None:
1281 events.append(event)
1282
1283 prepared = await runtime.turn_preparation.prepare(
1284 task="What should I verify next?",
1285 emit=capture,
1286 requested_mode="execute",
1287 original_task=None,
1288 on_user_question=None,
1289 )
1290 await runtime.phase_tracker.enter(
1291 TurnPhase.ASSISTANT,
1292 capture,
1293 detail="Requesting assistant response",
1294 reason_code="request_assistant_response",
1295 )
1296
1297 deflection = "You can run pytest -q to verify the current state."
1298 decision = await runtime.turn_completion.handle_text_response(
1299 content=deflection,
1300 response_content=deflection,
1301 task=prepared.task,
1302 effective_task=prepared.effective_task,
1303 iterations=1,
1304 max_iterations=agent.config.max_iterations,
1305 actions_taken=[],
1306 continuation_count=0,
1307 dod=prepared.definition_of_done,
1308 emit=capture,
1309 summary=prepared.summary,
1310 executor=prepared.executor,
1311 rollback_plan=prepared.rollback_plan,
1312 )
1313
1314 assert decision.action == TurnCompletionAction.COMPLETE
1315 assert prepared.summary.final_response == deflection
1316 assert prepared.summary.completion_decision_code == "non_mutating_response_accepted"
1317 assert prepared.summary.completion_trace[-1].decision_code == (
1318 "non_mutating_response_accepted"
1319 )
1320 assert not any(
1321 "Please use your tools to execute the task" in message.content
1322 for message in agent.session.messages
1323 )
1324 assert any(event.type == "response" and event.content == deflection for event in events)
1325
1326
1327 @pytest.mark.asyncio
1328 async def test_turn_completion_skips_self_critique_reroute(
1329 temp_dir: Path,
1330 ) -> None:
1331 backend = ScriptedBackend()
1332 config = non_streaming_config()
1333 config.reasoning.completion_check = False
1334 config.reasoning.self_critique = True
1335 agent = Agent(
1336 backend=backend,
1337 config=config,
1338 project_root=temp_dir,
1339 )
1340 runtime = ConversationRuntime(agent)
1341 events = []
1342
1343 async def capture(event) -> None:
1344 events.append(event)
1345
1346 prepared = await runtime.turn_preparation.prepare(
1347 task="Explain Loader's clarify loop.",
1348 emit=capture,
1349 requested_mode="execute",
1350 original_task=None,
1351 on_user_question=None,
1352 )
1353 await runtime.phase_tracker.enter(
1354 TurnPhase.ASSISTANT,
1355 capture,
1356 detail="Requesting assistant response",
1357 reason_code="request_assistant_response",
1358 )
1359
1360 detailed = (
1361 "Loader might begin with a bounded clarify pass, perhaps asking follow-up "
1362 "questions when the task leaves touchpoints or decision boundaries unclear. "
1363 "It then shifts into execution once the workflow policy is satisfied."
1364 )
1365 decision = await runtime.turn_completion.handle_text_response(
1366 content=detailed,
1367 response_content=detailed,
1368 task=prepared.task,
1369 effective_task=prepared.effective_task,
1370 iterations=1,
1371 max_iterations=agent.config.max_iterations,
1372 actions_taken=[],
1373 continuation_count=0,
1374 dod=prepared.definition_of_done,
1375 emit=capture,
1376 summary=prepared.summary,
1377 executor=prepared.executor,
1378 rollback_plan=prepared.rollback_plan,
1379 )
1380
1381 assert decision.action == TurnCompletionAction.COMPLETE
1382 assert prepared.summary.final_response == detailed
1383 assert prepared.summary.completion_decision_code == "non_mutating_response_accepted"
1384 assert prepared.summary.completion_trace[-1].decision_code == (
1385 "non_mutating_response_accepted"
1386 )
1387 assert not any("[SELF-CRITIQUE]" in message.content for message in agent.session.messages)
1388 assert not any(event.type == "critique" for event in events)
1389
1390
1391 @pytest.mark.asyncio
1392 async def test_turn_completion_finalizes_when_follow_through_budget_is_exhausted(
1393 temp_dir: Path,
1394 ) -> None:
1395 backend = ScriptedBackend()
1396 agent = Agent(
1397 backend=backend,
1398 config=non_streaming_config(),
1399 project_root=temp_dir,
1400 )
1401 runtime = ConversationRuntime(agent)
1402 events = []
1403
1404 async def capture(event) -> None:
1405 events.append(event)
1406
1407 prepared = await runtime.turn_preparation.prepare(
1408 task="Fix the README heading.",
1409 emit=capture,
1410 requested_mode="execute",
1411 original_task=None,
1412 on_user_question=None,
1413 )
1414 await runtime.phase_tracker.enter(
1415 TurnPhase.ASSISTANT,
1416 capture,
1417 detail="Requesting assistant response",
1418 reason_code="request_assistant_response",
1419 )
1420
1421 decision = await runtime.turn_completion.handle_text_response(
1422 content="I looked into it.",
1423 response_content="I looked into it.",
1424 task=prepared.task,
1425 effective_task=prepared.effective_task,
1426 iterations=1,
1427 max_iterations=agent.config.max_iterations,
1428 actions_taken=[],
1429 continuation_count=agent.config.reasoning.max_continuation_prompts,
1430 dod=prepared.definition_of_done,
1431 emit=capture,
1432 summary=prepared.summary,
1433 executor=prepared.executor,
1434 rollback_plan=prepared.rollback_plan,
1435 )
1436
1437 assert decision.action == TurnCompletionAction.FINALIZE
1438 assert decision.finalize_reason_code == "continuation_budget_exhausted"
1439 assert prepared.summary.final_response.startswith(
1440 "I stopped because I still could not show enough evidence"
1441 )
1442 assert prepared.summary.completion_decision_code == "continuation_budget_exhausted"
1443 assert prepared.summary.failures == [
1444 "missing follow-through evidence after continuation budget exhaustion"
1445 ]
1446 assert prepared.summary.completion_trace[-1].outcome == "finalize"
1447 assert prepared.summary.completion_trace[-1].decision_code == (
1448 "continuation_budget_exhausted"
1449 )
1450 assert prepared.summary.completion_trace[-1].evidence_summary == [
1451 "showing the requested work was actually carried out"
1452 ]
1453 assert [item.status for item in prepared.summary.completion_trace[-1].evidence_provenance] == [
1454 "missing"
1455 ]
1456 assert prepared.summary.workflow_timeline[-1].kind == "completion_finalize"
1457 assert prepared.summary.workflow_timeline[-1].evidence_summary == [
1458 "showing the requested work was actually carried out"
1459 ]
1460 assert [event.type for event in events[-3:]] == [
1461 "completion_check",
1462 "error",
1463 "response",
1464 ]
1465
1466
1467 @pytest.mark.asyncio
1468 async def test_turn_completion_uses_observed_verification_for_budget_exhaustion(
1469 temp_dir: Path,
1470 ) -> None:
1471 backend = ScriptedBackend()
1472 agent = Agent(
1473 backend=backend,
1474 config=non_streaming_config(),
1475 project_root=temp_dir,
1476 )
1477 runtime = ConversationRuntime(agent)
1478 events = []
1479
1480 async def capture(event) -> None:
1481 events.append(event)
1482
1483 prepared = await runtime.turn_preparation.prepare(
1484 task="Run pytest -q and make sure it works.",
1485 emit=capture,
1486 requested_mode="execute",
1487 original_task=None,
1488 on_user_question=None,
1489 )
1490 prepared.definition_of_done.verification_commands = ["pytest -q"]
1491 prepared.definition_of_done.evidence = [
1492 VerificationEvidence(
1493 command="pytest -q",
1494 passed=False,
1495 stderr="1 failed",
1496 kind="test",
1497 )
1498 ]
1499 prepared.definition_of_done.last_verification_result = "failed"
1500 await runtime.phase_tracker.enter(
1501 TurnPhase.ASSISTANT,
1502 capture,
1503 detail="Requesting assistant response",
1504 reason_code="request_assistant_response",
1505 )
1506
1507 decision = await runtime.turn_completion.handle_text_response(
1508 content="The tests are done.",
1509 response_content="The tests are done.",
1510 task=prepared.task,
1511 effective_task=prepared.effective_task,
1512 iterations=1,
1513 max_iterations=agent.config.max_iterations,
1514 actions_taken=[],
1515 continuation_count=agent.config.reasoning.max_continuation_prompts,
1516 dod=prepared.definition_of_done,
1517 emit=capture,
1518 summary=prepared.summary,
1519 executor=prepared.executor,
1520 rollback_plan=prepared.rollback_plan,
1521 )
1522
1523 assert decision.action == TurnCompletionAction.FINALIZE
1524 assert decision.finalize_reason_code == "continuation_budget_exhausted"
1525 assert prepared.summary.final_response == (
1526 "I stopped because the continuation budget was exhausted and observed "
1527 "verification still showed: verification failed for `pytest -q` [1 failed]."
1528 )
1529 assert prepared.summary.completion_trace[-1].decision_code == (
1530 "continuation_budget_exhausted"
1531 )
1532 assert [
1533 item.status
1534 for item in prepared.summary.completion_trace[-1].verification_observations
1535 ] == [VerificationObservationStatus.FAILED.value]
1536 assert [
1537 item.summary
1538 for item in prepared.summary.completion_trace[-1].verification_observations
1539 ] == ["verification failed for `pytest -q`"]
1540 assert [
1541 item.status
1542 for item in prepared.summary.workflow_timeline[-1].verification_observations
1543 ] == [VerificationObservationStatus.FAILED.value]