Python · 54252 bytes Raw Blame History
1 """Tests for no-tool text completion orchestration."""
2
3 from __future__ import annotations
4
5 from pathlib import Path
6
7 import pytest
8
9 from loader.agent.loop import Agent, AgentConfig
10 from loader.llm.base import Message, Role
11 from loader.runtime.conversation import ConversationRuntime
12 from loader.runtime.dod import VerificationEvidence
13 from loader.runtime.phases import TurnPhase
14 from loader.runtime.turn_completion import TurnCompletionAction
15 from loader.runtime.verification_observations import VerificationObservationStatus
16 from tests.helpers.runtime_harness import ScriptedBackend
17
18
19 def non_streaming_config() -> AgentConfig:
20 """Shared config for direct turn-completion tests."""
21
22 return AgentConfig(auto_context=False, stream=False, max_iterations=8)
23
24
25 @pytest.mark.asyncio
26 async def test_turn_completion_requests_continuation_for_premature_text_response(
27 temp_dir: Path,
28 ) -> None:
29 backend = ScriptedBackend()
30 agent = Agent(
31 backend=backend,
32 config=non_streaming_config(),
33 project_root=temp_dir,
34 )
35 runtime = ConversationRuntime(agent)
36 events = []
37
38 async def capture(event) -> None:
39 events.append(event)
40
41 prepared = await runtime.turn_preparation.prepare(
42 task="Fix the README heading.",
43 emit=capture,
44 requested_mode="execute",
45 original_task=None,
46 on_user_question=None,
47 )
48 await runtime.phase_tracker.enter(
49 TurnPhase.ASSISTANT,
50 capture,
51 detail="Requesting assistant response",
52 reason_code="request_assistant_response",
53 )
54
55 decision = await runtime.turn_completion.handle_text_response(
56 content="I looked into it.",
57 response_content="I looked into it.",
58 task=prepared.task,
59 effective_task=prepared.effective_task,
60 iterations=1,
61 max_iterations=agent.config.max_iterations,
62 actions_taken=[],
63 continuation_count=0,
64 dod=prepared.definition_of_done,
65 emit=capture,
66 summary=prepared.summary,
67 executor=prepared.executor,
68 rollback_plan=prepared.rollback_plan,
69 )
70
71 assert decision.action == TurnCompletionAction.CONTINUE
72 assert decision.continuation_count == 1
73 assert prepared.summary.completion_decision_code == "premature_completion_nudge"
74 assert prepared.summary.completion_decision_summary == (
75 "requested one continuation because the non-mutating response looked incomplete"
76 )
77 assert agent.session.last_completion_decision_code == "premature_completion_nudge"
78 assert [
79 entry.decision_code for entry in prepared.summary.completion_trace
80 ] == ["premature_completion_nudge"]
81 assert prepared.summary.completion_trace[0].stage == "continuation_check"
82 assert [entry.kind for entry in prepared.summary.workflow_timeline[-1:]] == [
83 "completion_continue"
84 ]
85 assert prepared.summary.workflow_timeline[-1].policy_stage == "continuation_check"
86 assert prepared.summary.workflow_timeline[-1].policy_outcome == "continue"
87 assert agent.session.messages[-1].role.value == "user"
88 assert "concrete evidence" in agent.session.messages[-1].content
89 assert "Carry out the requested change or command now" in agent.session.messages[-1].content
90 assert any(event.type == "completion_check" for event in events)
91
92
93 @pytest.mark.asyncio
94 async def test_turn_completion_marks_non_mutating_response_done(
95 temp_dir: Path,
96 ) -> None:
97 backend = ScriptedBackend()
98 agent = Agent(
99 backend=backend,
100 config=non_streaming_config(),
101 project_root=temp_dir,
102 )
103 runtime = ConversationRuntime(agent)
104 events = []
105
106 async def capture(event) -> None:
107 events.append(event)
108
109 prepared = await runtime.turn_preparation.prepare(
110 task="Explain Loader's clarify loop.",
111 emit=capture,
112 requested_mode="execute",
113 original_task=None,
114 on_user_question=None,
115 )
116 await runtime.phase_tracker.enter(
117 TurnPhase.ASSISTANT,
118 capture,
119 detail="Requesting assistant response",
120 reason_code="request_assistant_response",
121 )
122
123 decision = await runtime.turn_completion.handle_text_response(
124 content="Loader uses a bounded clarify loop before execution.",
125 response_content="Loader uses a bounded clarify loop before execution.",
126 task=prepared.task,
127 effective_task=prepared.effective_task,
128 iterations=1,
129 max_iterations=agent.config.max_iterations,
130 actions_taken=[],
131 continuation_count=0,
132 dod=prepared.definition_of_done,
133 emit=capture,
134 summary=prepared.summary,
135 executor=prepared.executor,
136 rollback_plan=prepared.rollback_plan,
137 )
138
139 assert decision.action == TurnCompletionAction.COMPLETE
140 assert prepared.summary.final_response == (
141 "Loader uses a bounded clarify loop before execution."
142 )
143 assert prepared.summary.completion_decision_code == "non_mutating_response_accepted"
144 assert prepared.summary.completion_decision_summary == (
145 "accepted the response because no mutating work required verification"
146 )
147 assert agent.session.last_completion_decision_code == (
148 "non_mutating_response_accepted"
149 )
150 assert [
151 entry.decision_code for entry in prepared.summary.completion_trace
152 ] == [
153 "completion_response_accepted",
154 "non_mutating_response_accepted",
155 ]
156 policy_entries = [
157 entry
158 for entry in prepared.summary.workflow_timeline
159 if entry.kind.startswith("completion_")
160 ]
161 assert [entry.kind for entry in policy_entries] == [
162 "completion_check",
163 "completion_complete",
164 ]
165 assert policy_entries[0].policy_stage == "continuation_check"
166 assert policy_entries[-1].policy_stage == "definition_of_done"
167 assert [item.summary for item in prepared.summary.completion_trace[-1].evidence_provenance] == [
168 "verification was skipped because no mutating work required checks"
169 ]
170 assert [
171 item.status
172 for item in prepared.summary.completion_trace[-1].verification_observations
173 ] == [VerificationObservationStatus.SKIPPED.value]
174 assert [
175 item.summary
176 for item in prepared.summary.completion_trace[-1].verification_observations
177 ] == ["verification was skipped because no mutating work required checks"]
178 assert [item.status for item in policy_entries[-1].verification_observations] == [
179 VerificationObservationStatus.SKIPPED.value
180 ]
181 assert prepared.definition_of_done.status == "done"
182 assert prepared.definition_of_done.last_verification_result == "skipped"
183 assert any(event.type == "response" for event in events)
184 assert any(
185 event.type == "dod_status" and event.dod_status == "done"
186 for event in events
187 )
188
189
190 @pytest.mark.asyncio
191 async def test_turn_completion_blocks_false_completion_without_preserving_it(
192 temp_dir: Path,
193 ) -> None:
194 backend = ScriptedBackend()
195 agent = Agent(
196 backend=backend,
197 config=non_streaming_config(),
198 project_root=temp_dir,
199 )
200 runtime = ConversationRuntime(agent)
201 events = []
202
203 async def capture(event) -> None:
204 events.append(event)
205
206 prepared = await runtime.turn_preparation.prepare(
207 task=(
208 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
209 "with an index and chapter files."
210 ),
211 emit=capture,
212 requested_mode="execute",
213 original_task=None,
214 on_user_question=None,
215 )
216 await runtime.phase_tracker.enter(
217 TurnPhase.ASSISTANT,
218 capture,
219 detail="Requesting assistant response",
220 reason_code="request_assistant_response",
221 )
222
223 implementation_plan = temp_dir / "implementation.md"
224 implementation_plan.write_text(
225 "# Implementation Plan\n\n"
226 "## File Changes\n\n"
227 "1. Create main index.html file:\n"
228 " - `index.html`\n\n"
229 "2. Create chapter files:\n"
230 " - `chapters/01-getting-started.html`\n"
231 " - `chapters/06-troubleshooting.html`\n"
232 )
233 chapters_dir = temp_dir / "chapters"
234 chapters_dir.mkdir()
235 (chapters_dir / "01-getting-started.html").write_text("<h1>Getting Started</h1>\n")
236 (temp_dir / "index.html").write_text("<h1>NGINX Guide</h1>\n")
237
238 prepared.definition_of_done.implementation_plan = str(implementation_plan)
239 prepared.definition_of_done.mutating_actions.append("write")
240 prepared.definition_of_done.touched_files.extend(
241 [
242 str(temp_dir / "index.html"),
243 str(chapters_dir / "01-getting-started.html"),
244 ]
245 )
246
247 queued_messages: list[str] = []
248 runtime.context.queue_steering_message_callback = queued_messages.append
249
250 completion_claim = (
251 "I've successfully completed the NGINX guide with all planned files "
252 "and verified everything is done."
253 )
254 decision = await runtime.turn_completion.handle_text_response(
255 content=completion_claim,
256 response_content=completion_claim,
257 task=prepared.task,
258 effective_task=prepared.effective_task,
259 iterations=1,
260 max_iterations=agent.config.max_iterations,
261 actions_taken=[],
262 continuation_count=0,
263 dod=prepared.definition_of_done,
264 emit=capture,
265 summary=prepared.summary,
266 executor=prepared.executor,
267 rollback_plan=prepared.rollback_plan,
268 )
269
270 assert decision.action == TurnCompletionAction.CONTINUE
271 assert prepared.summary.assistant_messages == []
272 assert not any(
273 message.role.value == "assistant" and message.content == completion_claim
274 for message in agent.session.messages
275 )
276 assert agent.session.messages[-1].role.value == "user"
277 assert agent.session.messages[-1].content.startswith(
278 "[PLANNED ARTIFACTS STILL MISSING]"
279 )
280 assert "`06-troubleshooting.html`" in agent.session.messages[-1].content
281 assert queued_messages
282 assert "06-troubleshooting.html" in queued_messages[-1]
283 assert "Do not summarize, mark completion, or write bookkeeping notes yet" in queued_messages[-1]
284 assert not any(event.type == "response" for event in events)
285
286
287 @pytest.mark.asyncio
288 async def test_turn_completion_interrupts_progress_intent_once_output_files_exist(
289 temp_dir: Path,
290 ) -> None:
291 backend = ScriptedBackend()
292 agent = Agent(
293 backend=backend,
294 config=non_streaming_config(),
295 project_root=temp_dir,
296 )
297 runtime = ConversationRuntime(agent)
298 events = []
299
300 async def capture(event) -> None:
301 events.append(event)
302
303 prepared = await runtime.turn_preparation.prepare(
304 task=(
305 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
306 "with an index and chapter files."
307 ),
308 emit=capture,
309 requested_mode="execute",
310 original_task=None,
311 on_user_question=None,
312 )
313 await runtime.phase_tracker.enter(
314 TurnPhase.ASSISTANT,
315 capture,
316 detail="Requesting assistant response",
317 reason_code="request_assistant_response",
318 )
319
320 implementation_plan = temp_dir / "implementation.md"
321 implementation_plan.write_text(
322 "# Implementation Plan\n\n"
323 "## File Changes\n\n"
324 "1. Create main index.html file:\n"
325 f" - `{temp_dir / 'index.html'}`\n\n"
326 "2. Create chapter files:\n"
327 f" - `{temp_dir / 'chapters' / '01-introduction.html'}`\n"
328 f" - `{temp_dir / 'chapters' / '02-installation.html'}`\n"
329 )
330 chapters_dir = temp_dir / "chapters"
331 chapters_dir.mkdir()
332 (temp_dir / "index.html").write_text("<h1>NGINX Guide</h1>\n")
333 (chapters_dir / "01-introduction.html").write_text("<h1>Intro</h1>\n")
334
335 prepared.definition_of_done.implementation_plan = str(implementation_plan)
336 prepared.definition_of_done.mutating_actions.append("write")
337 prepared.definition_of_done.touched_files.extend(
338 [
339 str(temp_dir / "index.html"),
340 str(chapters_dir / "01-introduction.html"),
341 ]
342 )
343 prepared.definition_of_done.pending_items.append("Create chapter files for nginx guide")
344
345 content = "Now I'll create the second chapter file for the nginx guide."
346 decision = await runtime.turn_completion.handle_text_response(
347 content=content,
348 response_content=content,
349 task=prepared.task,
350 effective_task=prepared.effective_task,
351 iterations=1,
352 max_iterations=agent.config.max_iterations,
353 actions_taken=[],
354 continuation_count=0,
355 dod=prepared.definition_of_done,
356 emit=capture,
357 summary=prepared.summary,
358 executor=prepared.executor,
359 rollback_plan=prepared.rollback_plan,
360 )
361
362 assert decision.action == TurnCompletionAction.CONTINUE
363 assert decision.continuation_count == 1
364 assert prepared.summary.completion_decision_code == "in_progress_transition_continue"
365 assert prepared.summary.assistant_messages[-1].content == content
366 assert agent.session.messages[-1].role.value == "user"
367 assert agent.session.messages[-1].content.startswith("[CONTINUE CURRENT STEP]")
368 assert "02-installation.html" in agent.session.messages[-1].content
369 assert not any(
370 message.role.value == "user"
371 and message.content.startswith("[PLANNED ARTIFACTS STILL MISSING]")
372 for message in agent.session.messages
373 )
374
375
376 @pytest.mark.asyncio
377 async def test_turn_completion_uses_quality_repair_prompt_for_rewrite_narration(
378 temp_dir: Path,
379 ) -> None:
380 backend = ScriptedBackend()
381 config = non_streaming_config()
382 config.reasoning.completion_check = False
383 agent = Agent(
384 backend=backend,
385 config=config,
386 project_root=temp_dir,
387 )
388 runtime = ConversationRuntime(agent)
389 events = []
390
391 async def capture(event) -> None:
392 events.append(event)
393
394 prepared = await runtime.turn_preparation.prepare(
395 task="Create an equally thorough HTML guide.",
396 emit=capture,
397 requested_mode="execute",
398 original_task=None,
399 on_user_question=None,
400 )
401 await runtime.phase_tracker.enter(
402 TurnPhase.ASSISTANT,
403 capture,
404 detail="Requesting assistant response",
405 reason_code="request_assistant_response",
406 )
407
408 chapter = temp_dir / "guides" / "nginx" / "chapters" / "01-introduction.html"
409 chapter.parent.mkdir(parents=True)
410 chapter.write_text("<html><body><h1>Intro</h1></body></html>\n")
411 prepared.definition_of_done.touched_files.append(str(chapter))
412 prepared.definition_of_done.mutating_actions.append("write")
413 agent.session.append(
414 Message(
415 role=Role.USER,
416 content=(
417 "Repair focus:\n"
418 f"- Improve `{chapter}`: insufficient structured content "
419 "(12 blocks, expected at least 18).\n"
420 f"- Immediate next step: edit `{chapter}` with a substantial "
421 "expansion or replacement that satisfies its listed quality issue.\n"
422 ),
423 )
424 )
425
426 content = (
427 "Let me try a different approach by rewriting the entire file with more "
428 "comprehensive content:"
429 )
430 decision = await runtime.turn_completion.handle_text_response(
431 content=content,
432 response_content=content,
433 task=prepared.task,
434 effective_task=prepared.effective_task,
435 iterations=1,
436 max_iterations=agent.config.max_iterations,
437 actions_taken=[],
438 continuation_count=0,
439 dod=prepared.definition_of_done,
440 emit=capture,
441 summary=prepared.summary,
442 executor=prepared.executor,
443 rollback_plan=prepared.rollback_plan,
444 )
445
446 assert decision.action == TurnCompletionAction.CONTINUE
447 assert prepared.summary.completion_decision_code == "in_progress_transition_continue"
448 assert agent.session.messages[-1].role.value == "user"
449 assert agent.session.messages[-1].content.startswith("[CONTINUE QUALITY REPAIR]")
450 assert str(chapter.resolve(strict=False)) in agent.session.messages[-1].content
451 assert (
452 "one concrete `patch`, `edit`, or `write` tool call"
453 in agent.session.messages[-1].content
454 )
455 assert "Do not rewrite the whole file from memory" in agent.session.messages[-1].content
456
457
458 @pytest.mark.asyncio
459 async def test_turn_completion_forces_write_after_stale_quality_repair_context(
460 temp_dir: Path,
461 ) -> None:
462 backend = ScriptedBackend()
463 config = non_streaming_config()
464 config.reasoning.completion_check = False
465 agent = Agent(
466 backend=backend,
467 config=config,
468 project_root=temp_dir,
469 )
470 runtime = ConversationRuntime(agent)
471 events = []
472
473 async def capture(event) -> None:
474 events.append(event)
475
476 prepared = await runtime.turn_preparation.prepare(
477 task="Create an equally thorough HTML guide.",
478 emit=capture,
479 requested_mode="execute",
480 original_task=None,
481 on_user_question=None,
482 )
483 await runtime.phase_tracker.enter(
484 TurnPhase.ASSISTANT,
485 capture,
486 detail="Requesting assistant response",
487 reason_code="request_assistant_response",
488 )
489
490 chapter = temp_dir / "guides" / "nginx" / "chapters" / "05-load-balancing.html"
491 chapter.parent.mkdir(parents=True)
492 chapter.write_text("<html><body><h1>Load Balancing</h1></body></html>\n")
493 prepared.definition_of_done.touched_files.append(str(chapter))
494 prepared.definition_of_done.mutating_actions.append("edit")
495 agent.session.append(
496 Message(
497 role=Role.USER,
498 content=(
499 "Repair focus:\n"
500 f"- Improve `{chapter}`: thin content "
501 "(846 text chars, expected at least 1758).\n"
502 f"- Immediate next step: edit `{chapter}`.\n"
503 ),
504 )
505 )
506 agent.session.append(
507 Message(
508 role=Role.TOOL,
509 content=(
510 "Observation [edit]: Error: Failed to complete the operation after "
511 f"2 attempts for {chapter}. old_string not found in file."
512 ),
513 )
514 )
515
516 content = "I'll rewrite the load balancing chapter with comprehensive content."
517 decision = await runtime.turn_completion.handle_text_response(
518 content=content,
519 response_content=content,
520 task=prepared.task,
521 effective_task=prepared.effective_task,
522 iterations=1,
523 max_iterations=agent.config.max_iterations,
524 actions_taken=[],
525 continuation_count=0,
526 dod=prepared.definition_of_done,
527 emit=capture,
528 summary=prepared.summary,
529 executor=prepared.executor,
530 rollback_plan=prepared.rollback_plan,
531 )
532
533 assert decision.action == TurnCompletionAction.CONTINUE
534 message = agent.session.messages[-1].content
535 assert message.startswith("[CONTINUE QUALITY REPAIR]")
536 assert "exactly one `write(file_path=..., content=...)`" in message
537 assert "Do not call `read`, `edit`, `patch`, `TodoWrite`, or summarize." in message
538
539
540 @pytest.mark.asyncio
541 async def test_turn_completion_forces_write_for_structural_html_repair(
542 temp_dir: Path,
543 ) -> None:
544 backend = ScriptedBackend()
545 config = non_streaming_config()
546 config.reasoning.completion_check = False
547 agent = Agent(
548 backend=backend,
549 config=config,
550 project_root=temp_dir,
551 )
552 runtime = ConversationRuntime(agent)
553 events = []
554
555 async def capture(event) -> None:
556 events.append(event)
557
558 prepared = await runtime.turn_preparation.prepare(
559 task="Create an equally thorough HTML guide.",
560 emit=capture,
561 requested_mode="execute",
562 original_task=None,
563 on_user_question=None,
564 )
565 await runtime.phase_tracker.enter(
566 TurnPhase.ASSISTANT,
567 capture,
568 detail="Requesting assistant response",
569 reason_code="request_assistant_response",
570 )
571
572 chapter = temp_dir / "guides" / "nginx" / "chapters" / "08-troubleshooting.html"
573 chapter.parent.mkdir(parents=True)
574 chapter.write_text(
575 "<!DOCTYPE html><html><body><h1>Troubleshooting</h1></body></html>\n"
576 "<p>Trailing content.</p>\n"
577 )
578 prepared.definition_of_done.touched_files.append(str(chapter))
579 agent.session.append(
580 Message(
581 role=Role.USER,
582 content=(
583 "Repair focus:\n"
584 f"- Improve `{chapter}`: expected exactly one closing </html> tag (found 2).\n"
585 f"- Immediate next step: replace `{chapter}` with one complete valid HTML document.\n"
586 ),
587 )
588 )
589
590 content = "I will fix the malformed troubleshooting HTML structure."
591 decision = await runtime.turn_completion.handle_text_response(
592 content=content,
593 response_content=content,
594 task=prepared.task,
595 effective_task=prepared.effective_task,
596 iterations=1,
597 max_iterations=agent.config.max_iterations,
598 actions_taken=[],
599 continuation_count=0,
600 dod=prepared.definition_of_done,
601 emit=capture,
602 summary=prepared.summary,
603 executor=prepared.executor,
604 rollback_plan=prepared.rollback_plan,
605 )
606
607 assert decision.action == TurnCompletionAction.CONTINUE
608 message = agent.session.messages[-1].content
609 assert message.startswith("[CONTINUE QUALITY REPAIR]")
610 assert "malformed HTML document structure" in message
611 assert "expected exactly one closing </html>" in message
612 assert "exactly one closing `</body>` tag" in message
613 assert "exactly one `write(file_path=..., content=...)`" in message
614
615
616 @pytest.mark.asyncio
617 async def test_turn_completion_continues_queued_quality_repair_after_summary(
618 temp_dir: Path,
619 ) -> None:
620 backend = ScriptedBackend()
621 config = non_streaming_config()
622 config.reasoning.completion_check = False
623 agent = Agent(
624 backend=backend,
625 config=config,
626 project_root=temp_dir,
627 )
628 runtime = ConversationRuntime(agent)
629 events = []
630
631 async def capture(event) -> None:
632 events.append(event)
633
634 prepared = await runtime.turn_preparation.prepare(
635 task="Repair generated HTML guide quality.",
636 emit=capture,
637 requested_mode="execute",
638 original_task=None,
639 on_user_question=None,
640 )
641 await runtime.phase_tracker.enter(
642 TurnPhase.ASSISTANT,
643 capture,
644 detail="Requesting assistant response",
645 reason_code="request_assistant_response",
646 )
647
648 first = temp_dir / "guides" / "nginx" / "chapters" / "01-introduction.html"
649 second = temp_dir / "guides" / "nginx" / "chapters" / "02-installation.html"
650 second.parent.mkdir(parents=True)
651 first.write_text("<html><body><h1>Intro</h1></body></html>\n")
652 second.write_text("<html><body><h1>Install</h1></body></html>\n")
653 prepared.definition_of_done.touched_files.extend(
654 [
655 str(first),
656 str(second),
657 ]
658 )
659 prepared.definition_of_done.mutating_actions.append("edit")
660 agent.session.append(
661 Message(
662 role=Role.USER,
663 content=(
664 "The active HTML content-quality repair target was updated. "
665 f"Continue directly with the next listed quality target `{second}` "
666 "using one substantial write/edit/patch anchored to current content.\n\n"
667 "Repair focus:\n"
668 f"- Improve `{second}`: thin content (513 text chars, expected at least 1758).\n"
669 f"- Immediate next step: edit `{second}`.\n"
670 "- Continue with one concrete `edit`, `patch`, or `write` call that actually changes the current generated file."
671 ),
672 )
673 )
674
675 content = (
676 "I've expanded the introduction chapter, so it should now meet the "
677 "minimum quality threshold."
678 )
679 decision = await runtime.turn_completion.handle_text_response(
680 content=content,
681 response_content=content,
682 task=prepared.task,
683 effective_task=prepared.effective_task,
684 iterations=1,
685 max_iterations=agent.config.max_iterations,
686 actions_taken=[],
687 continuation_count=0,
688 dod=prepared.definition_of_done,
689 emit=capture,
690 summary=prepared.summary,
691 executor=prepared.executor,
692 rollback_plan=prepared.rollback_plan,
693 )
694
695 assert decision.action == TurnCompletionAction.CONTINUE
696 assert prepared.summary.completion_decision_code == "pending_quality_repair_continue"
697 assert agent.session.messages[-1].role.value == "user"
698 assert agent.session.messages[-1].content.startswith("[CONTINUE QUALITY REPAIR]")
699 assert str(second.resolve(strict=False)) in agent.session.messages[-1].content
700 assert "one concrete `patch`, `edit`, or `write` tool call" in agent.session.messages[-1].content
701
702
703 @pytest.mark.asyncio
704 async def test_turn_completion_allows_first_progress_narration_before_any_output_exists(
705 temp_dir: Path,
706 ) -> None:
707 backend = ScriptedBackend()
708 config = non_streaming_config()
709 config.reasoning.completion_check = False
710 agent = Agent(
711 backend=backend,
712 config=config,
713 project_root=temp_dir,
714 )
715 runtime = ConversationRuntime(agent)
716 events = []
717
718 async def capture(event) -> None:
719 events.append(event)
720
721 prepared = await runtime.turn_preparation.prepare(
722 task=(
723 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
724 "with an index and chapter files."
725 ),
726 emit=capture,
727 requested_mode="execute",
728 original_task=None,
729 on_user_question=None,
730 )
731 await runtime.phase_tracker.enter(
732 TurnPhase.ASSISTANT,
733 capture,
734 detail="Requesting assistant response",
735 reason_code="request_assistant_response",
736 )
737
738 implementation_plan = temp_dir / "implementation.md"
739 implementation_plan.write_text(
740 "# Implementation Plan\n\n"
741 "## File Changes\n\n"
742 f"- `{temp_dir / 'index.html'}`\n"
743 f"- `{temp_dir / 'chapters' / '01-introduction.html'}`\n"
744 )
745
746 prepared.definition_of_done.implementation_plan = str(implementation_plan)
747 prepared.definition_of_done.pending_items.append(
748 "Develop the main index.html file for nginx guide"
749 )
750
751 content = "Now I'll create the main index.html file for the nginx guide."
752 decision = await runtime.turn_completion.handle_text_response(
753 content=content,
754 response_content=content,
755 task=prepared.task,
756 effective_task=prepared.effective_task,
757 iterations=1,
758 max_iterations=agent.config.max_iterations,
759 actions_taken=[],
760 continuation_count=0,
761 dod=prepared.definition_of_done,
762 emit=capture,
763 summary=prepared.summary,
764 executor=prepared.executor,
765 rollback_plan=prepared.rollback_plan,
766 )
767
768 assert decision.action == TurnCompletionAction.CONTINUE
769 assert decision.continuation_count == 1
770 assert prepared.summary.assistant_messages[-1].content == content
771 assert agent.session.messages[-1].role.value == "assistant"
772
773
774 @pytest.mark.asyncio
775 async def test_turn_completion_interrupts_repeated_concrete_progress_narration(
776 temp_dir: Path,
777 ) -> None:
778 backend = ScriptedBackend()
779 config = non_streaming_config()
780 config.reasoning.completion_check = False
781 agent = Agent(
782 backend=backend,
783 config=config,
784 project_root=temp_dir,
785 )
786 runtime = ConversationRuntime(agent)
787 events = []
788
789 async def capture(event) -> None:
790 events.append(event)
791
792 prepared = await runtime.turn_preparation.prepare(
793 task=(
794 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
795 "with an index and chapter files."
796 ),
797 emit=capture,
798 requested_mode="execute",
799 original_task=None,
800 on_user_question=None,
801 )
802 await runtime.phase_tracker.enter(
803 TurnPhase.ASSISTANT,
804 capture,
805 detail="Requesting assistant response",
806 reason_code="request_assistant_response",
807 )
808
809 implementation_plan = temp_dir / "implementation.md"
810 implementation_plan.write_text(
811 "# Implementation Plan\n\n"
812 "## File Changes\n\n"
813 "1. Create main index.html file:\n"
814 f" - `{temp_dir / 'index.html'}`\n\n"
815 "2. Create chapter files:\n"
816 f" - `{temp_dir / 'chapters' / '01-introduction.html'}`\n"
817 f" - `{temp_dir / 'chapters' / '02-installation.html'}`\n"
818 )
819 chapters_dir = temp_dir / "chapters"
820 chapters_dir.mkdir()
821 (temp_dir / "index.html").write_text("<h1>NGINX Guide</h1>\n")
822 (chapters_dir / "01-introduction.html").write_text("<h1>Intro</h1>\n")
823
824 prepared.definition_of_done.implementation_plan = str(implementation_plan)
825 prepared.definition_of_done.mutating_actions.append("write")
826 prepared.definition_of_done.touched_files.extend(
827 [
828 str(temp_dir / "index.html"),
829 str(chapters_dir / "01-introduction.html"),
830 ]
831 )
832 prepared.definition_of_done.pending_items.append("Create chapter files for nginx guide")
833
834 content = "Now I'll create the second chapter file for the nginx guide."
835 decision = await runtime.turn_completion.handle_text_response(
836 content=content,
837 response_content=content,
838 task=prepared.task,
839 effective_task=prepared.effective_task,
840 iterations=1,
841 max_iterations=agent.config.max_iterations,
842 actions_taken=[],
843 continuation_count=1,
844 dod=prepared.definition_of_done,
845 emit=capture,
846 summary=prepared.summary,
847 executor=prepared.executor,
848 rollback_plan=prepared.rollback_plan,
849 )
850
851 assert decision.action == TurnCompletionAction.CONTINUE
852 assert decision.continuation_count == 2
853 assert agent.session.messages[-1].role.value == "user"
854 assert agent.session.messages[-1].content.startswith("[CONTINUE CURRENT STEP]")
855 assert "02-installation.html" in agent.session.messages[-1].content
856
857
858 @pytest.mark.asyncio
859 async def test_turn_completion_prioritizes_missing_artifact_continuation_over_text_loop(
860 temp_dir: Path,
861 ) -> None:
862 backend = ScriptedBackend()
863 config = non_streaming_config()
864 config.reasoning.completion_check = False
865 agent = Agent(
866 backend=backend,
867 config=config,
868 project_root=temp_dir,
869 )
870 runtime = ConversationRuntime(agent)
871 events = []
872
873 async def capture(event) -> None:
874 events.append(event)
875
876 prepared = await runtime.turn_preparation.prepare(
877 task=(
878 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
879 "with an index and chapter files."
880 ),
881 emit=capture,
882 requested_mode="execute",
883 original_task=None,
884 on_user_question=None,
885 )
886 await runtime.phase_tracker.enter(
887 TurnPhase.ASSISTANT,
888 capture,
889 detail="Requesting assistant response",
890 reason_code="request_assistant_response",
891 )
892
893 implementation_plan = temp_dir / "implementation.md"
894 implementation_plan.write_text(
895 "# Implementation Plan\n\n"
896 "## File Changes\n\n"
897 "1. Create main index.html file:\n"
898 f" - `{temp_dir / 'index.html'}`\n\n"
899 "2. Create chapter files:\n"
900 f" - `{temp_dir / 'chapters' / '01-introduction.html'}`\n"
901 f" - `{temp_dir / 'chapters' / '02-installation.html'}`\n"
902 )
903 chapters_dir = temp_dir / "chapters"
904 chapters_dir.mkdir()
905 (temp_dir / "index.html").write_text("<h1>NGINX Guide</h1>\n")
906 (chapters_dir / "01-introduction.html").write_text("<h1>Intro</h1>\n")
907
908 prepared.definition_of_done.implementation_plan = str(implementation_plan)
909 prepared.definition_of_done.mutating_actions.append("write")
910 prepared.definition_of_done.touched_files.extend(
911 [
912 str(temp_dir / "index.html"),
913 str(chapters_dir / "01-introduction.html"),
914 ]
915 )
916 prepared.definition_of_done.pending_items.append("Create chapter files for nginx guide")
917
918 content = "Let me continue creating the remaining chapter files for the nginx guide:"
919 runtime.context.safeguards.record_response(content)
920 runtime.context.safeguards.record_response(content)
921
922 decision = await runtime.turn_completion.handle_text_response(
923 content=content,
924 response_content=content,
925 task=prepared.task,
926 effective_task=prepared.effective_task,
927 iterations=1,
928 max_iterations=agent.config.max_iterations,
929 actions_taken=[],
930 continuation_count=2,
931 dod=prepared.definition_of_done,
932 emit=capture,
933 summary=prepared.summary,
934 executor=prepared.executor,
935 rollback_plan=prepared.rollback_plan,
936 )
937
938 assert decision.action == TurnCompletionAction.CONTINUE
939 assert prepared.summary.completion_decision_code == "in_progress_transition_continue"
940 assert agent.session.messages[-1].role.value == "user"
941 assert agent.session.messages[-1].content.startswith("[CONTINUE CURRENT STEP]")
942 assert "02-installation.html" in agent.session.messages[-1].content
943 assert not prepared.summary.final_response
944 assert not any(event.type == "error" and "Text loop detected" in event.content for event in events)
945
946
947 @pytest.mark.asyncio
948 async def test_turn_completion_interrupts_first_narration_after_concrete_target_prompt(
949 temp_dir: Path,
950 ) -> None:
951 backend = ScriptedBackend()
952 config = non_streaming_config()
953 config.reasoning.completion_check = False
954 agent = Agent(
955 backend=backend,
956 config=config,
957 project_root=temp_dir,
958 )
959 runtime = ConversationRuntime(agent)
960 events = []
961
962 async def capture(event) -> None:
963 events.append(event)
964
965 prepared = await runtime.turn_preparation.prepare(
966 task=(
967 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
968 "with an index and chapter files."
969 ),
970 emit=capture,
971 requested_mode="execute",
972 original_task=None,
973 on_user_question=None,
974 )
975 await runtime.phase_tracker.enter(
976 TurnPhase.ASSISTANT,
977 capture,
978 detail="Requesting assistant response",
979 reason_code="request_assistant_response",
980 )
981
982 implementation_plan = temp_dir / "implementation.md"
983 implementation_plan.write_text(
984 "# Implementation Plan\n\n"
985 "## File Changes\n\n"
986 f"- `{temp_dir / 'index.html'}`\n"
987 f"- `{temp_dir / 'chapters' / '01-introduction.html'}`\n"
988 )
989 chapters_dir = temp_dir / "chapters"
990 chapters_dir.mkdir()
991
992 prepared.definition_of_done.implementation_plan = str(implementation_plan)
993 prepared.definition_of_done.pending_items.append(
994 "Develop the main index.html file for nginx guide"
995 )
996
997 agent.session.append(
998 Message(
999 role=Role.USER,
1000 content=(
1001 "[USER INTERRUPTION]: Directory setup is complete. Continue with the next pending item: "
1002 "`Develop the main index.html file for nginx guide`. Resume by creating `index.html` now. "
1003 f"Prefer one `write` call for `{(temp_dir / 'index.html').resolve(strict=False)}` instead of more rereads. "
1004 "Make your next response the concrete mutation tool call itself, not another bookkeeping-only turn."
1005 ),
1006 )
1007 )
1008
1009 content = "Now I'll create the main index.html file for the nginx guide."
1010 decision = await runtime.turn_completion.handle_text_response(
1011 content=content,
1012 response_content=content,
1013 task=prepared.task,
1014 effective_task=prepared.effective_task,
1015 iterations=1,
1016 max_iterations=agent.config.max_iterations,
1017 actions_taken=[],
1018 continuation_count=0,
1019 dod=prepared.definition_of_done,
1020 emit=capture,
1021 summary=prepared.summary,
1022 executor=prepared.executor,
1023 rollback_plan=prepared.rollback_plan,
1024 )
1025
1026 assert decision.action == TurnCompletionAction.CONTINUE
1027 assert decision.continuation_count == 1
1028 assert prepared.summary.assistant_messages[-1].content == content
1029 assert agent.session.messages[-1].role.value == "user"
1030 assert agent.session.messages[-1].content.startswith("[CONTINUE CURRENT STEP]")
1031 assert "index.html" in agent.session.messages[-1].content
1032
1033
1034 @pytest.mark.asyncio
1035 async def test_turn_completion_first_chapter_continuation_allows_compact_initial_version(
1036 temp_dir: Path,
1037 ) -> None:
1038 backend = ScriptedBackend()
1039 config = non_streaming_config()
1040 config.reasoning.completion_check = False
1041 agent = Agent(
1042 backend=backend,
1043 config=config,
1044 project_root=temp_dir,
1045 )
1046 runtime = ConversationRuntime(agent)
1047 events = []
1048
1049 async def capture(event) -> None:
1050 events.append(event)
1051
1052 prepared = await runtime.turn_preparation.prepare(
1053 task=(
1054 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
1055 "with an index and chapter files."
1056 ),
1057 emit=capture,
1058 requested_mode="execute",
1059 original_task=None,
1060 on_user_question=None,
1061 )
1062 await runtime.phase_tracker.enter(
1063 TurnPhase.ASSISTANT,
1064 capture,
1065 detail="Requesting assistant response",
1066 reason_code="request_assistant_response",
1067 )
1068
1069 chapters_dir = temp_dir / "chapters"
1070 chapters_dir.mkdir()
1071 index_path = temp_dir / "index.html"
1072 index_path.write_text("<html></html>\n")
1073
1074 implementation_plan = temp_dir / "implementation.md"
1075 implementation_plan.write_text(
1076 "# Implementation Plan\n\n"
1077 "## File Changes\n\n"
1078 f"- `{index_path}`\n"
1079 f"- `{chapters_dir / '01-introduction.html'}`\n"
1080 )
1081
1082 prepared.definition_of_done.implementation_plan = str(implementation_plan)
1083 prepared.definition_of_done.touched_files.append(str(index_path))
1084 prepared.definition_of_done.pending_items.append("Create chapter files for nginx guide")
1085
1086 content = "Now I'll create the first chapter of the nginx guide."
1087 decision = await runtime.turn_completion.handle_text_response(
1088 content=content,
1089 response_content=content,
1090 task=prepared.task,
1091 effective_task=prepared.effective_task,
1092 iterations=1,
1093 max_iterations=agent.config.max_iterations,
1094 actions_taken=[],
1095 continuation_count=1,
1096 dod=prepared.definition_of_done,
1097 emit=capture,
1098 summary=prepared.summary,
1099 executor=prepared.executor,
1100 rollback_plan=prepared.rollback_plan,
1101 )
1102
1103 assert decision.action == TurnCompletionAction.CONTINUE
1104 assert decision.continuation_count == 2
1105 assert agent.session.messages[-1].role.value == "user"
1106 assert agent.session.messages[-1].content.startswith("[CONTINUE CURRENT STEP]")
1107 assert "01-introduction.html" in agent.session.messages[-1].content
1108 assert "write a compact but real initial version of that file now" in agent.session.messages[-1].content.lower()
1109
1110
1111 @pytest.mark.asyncio
1112 async def test_turn_completion_interrupts_first_chapter_narration_from_declared_index_graph(
1113 temp_dir: Path,
1114 ) -> None:
1115 backend = ScriptedBackend()
1116 config = non_streaming_config()
1117 config.reasoning.completion_check = False
1118 agent = Agent(
1119 backend=backend,
1120 config=config,
1121 project_root=temp_dir,
1122 )
1123 runtime = ConversationRuntime(agent)
1124 events = []
1125
1126 async def capture(event) -> None:
1127 events.append(event)
1128
1129 prepared = await runtime.turn_preparation.prepare(
1130 task=(
1131 "Create a multi-file nginx guide under ~/Loader/guides/nginx "
1132 "with an index and chapter files."
1133 ),
1134 emit=capture,
1135 requested_mode="execute",
1136 original_task=None,
1137 on_user_question=None,
1138 )
1139 await runtime.phase_tracker.enter(
1140 TurnPhase.ASSISTANT,
1141 capture,
1142 detail="Requesting assistant response",
1143 reason_code="request_assistant_response",
1144 )
1145
1146 guide_root = temp_dir / "Loader" / "guides" / "nginx"
1147 chapters_dir = guide_root / "chapters"
1148 chapters_dir.mkdir(parents=True)
1149 index_path = guide_root / "index.html"
1150 index_path.write_text(
1151 "\n".join(
1152 [
1153 "<!DOCTYPE html>",
1154 '<a href="chapters/01-introduction.html">Chapter 1: Introduction to Nginx</a>',
1155 '<a href="chapters/02-installation.html">Chapter 2: Installation and Setup</a>',
1156 "",
1157 ]
1158 )
1159 )
1160
1161 implementation_plan = temp_dir / "implementation.md"
1162 implementation_plan.write_text(
1163 "# Implementation Plan\n\n"
1164 "## File Changes\n\n"
1165 f"- `{index_path}`\n"
1166 f"- `{chapters_dir}/`\n"
1167 )
1168
1169 prepared.definition_of_done.implementation_plan = str(implementation_plan)
1170 prepared.definition_of_done.touched_files.append(str(index_path))
1171 prepared.definition_of_done.mutating_actions.append("write")
1172 prepared.definition_of_done.pending_items.append(
1173 "Develop the nginx guide content following the same structure and cadence as the fortran guide"
1174 )
1175
1176 content = "Now I'll create the first chapter of the nginx guide."
1177 decision = await runtime.turn_completion.handle_text_response(
1178 content=content,
1179 response_content=content,
1180 task=prepared.task,
1181 effective_task=prepared.effective_task,
1182 iterations=1,
1183 max_iterations=agent.config.max_iterations,
1184 actions_taken=[],
1185 continuation_count=0,
1186 dod=prepared.definition_of_done,
1187 emit=capture,
1188 summary=prepared.summary,
1189 executor=prepared.executor,
1190 rollback_plan=prepared.rollback_plan,
1191 )
1192
1193 assert decision.action == TurnCompletionAction.CONTINUE
1194 assert decision.continuation_count == 1
1195 assert prepared.summary.completion_decision_code == "in_progress_transition_continue"
1196 assert agent.session.messages[-1].role.value == "user"
1197 assert agent.session.messages[-1].content.startswith("[CONTINUE CURRENT STEP]")
1198 assert "01-introduction.html" in agent.session.messages[-1].content
1199
1200
1201 @pytest.mark.asyncio
1202 async def test_turn_completion_handles_fake_tool_narration_without_reroute(
1203 temp_dir: Path,
1204 ) -> None:
1205 backend = ScriptedBackend()
1206 config = non_streaming_config()
1207 config.reasoning.completion_check = False
1208 agent = Agent(
1209 backend=backend,
1210 config=config,
1211 project_root=temp_dir,
1212 )
1213 runtime = ConversationRuntime(agent)
1214 events = []
1215
1216 async def capture(event) -> None:
1217 events.append(event)
1218
1219 prepared = await runtime.turn_preparation.prepare(
1220 task="Summarize the current test status.",
1221 emit=capture,
1222 requested_mode="execute",
1223 original_task=None,
1224 on_user_question=None,
1225 )
1226 await runtime.phase_tracker.enter(
1227 TurnPhase.ASSISTANT,
1228 capture,
1229 detail="Requesting assistant response",
1230 reason_code="request_assistant_response",
1231 )
1232
1233 narrated = "Used bash tool with command `pytest -q` and everything passed."
1234 decision = await runtime.turn_completion.handle_text_response(
1235 content=narrated,
1236 response_content=narrated,
1237 task=prepared.task,
1238 effective_task=prepared.effective_task,
1239 iterations=1,
1240 max_iterations=agent.config.max_iterations,
1241 actions_taken=[],
1242 continuation_count=0,
1243 dod=prepared.definition_of_done,
1244 emit=capture,
1245 summary=prepared.summary,
1246 executor=prepared.executor,
1247 rollback_plan=prepared.rollback_plan,
1248 )
1249
1250 assert decision.action == TurnCompletionAction.COMPLETE
1251 assert prepared.summary.final_response == narrated
1252 assert prepared.summary.completion_decision_code == "non_mutating_response_accepted"
1253 assert prepared.summary.completion_trace[-1].decision_code == (
1254 "non_mutating_response_accepted"
1255 )
1256 assert not any(
1257 "PRETENDING to use tools" in message.content
1258 for message in agent.session.messages
1259 )
1260 assert any(event.type == "response" and event.content == narrated for event in events)
1261
1262
1263 @pytest.mark.asyncio
1264 async def test_turn_completion_handles_deflection_text_without_repair_prompt(
1265 temp_dir: Path,
1266 ) -> None:
1267 backend = ScriptedBackend()
1268 config = non_streaming_config()
1269 config.reasoning.completion_check = False
1270 agent = Agent(
1271 backend=backend,
1272 config=config,
1273 project_root=temp_dir,
1274 )
1275 runtime = ConversationRuntime(agent)
1276 events = []
1277
1278 async def capture(event) -> None:
1279 events.append(event)
1280
1281 prepared = await runtime.turn_preparation.prepare(
1282 task="What should I verify next?",
1283 emit=capture,
1284 requested_mode="execute",
1285 original_task=None,
1286 on_user_question=None,
1287 )
1288 await runtime.phase_tracker.enter(
1289 TurnPhase.ASSISTANT,
1290 capture,
1291 detail="Requesting assistant response",
1292 reason_code="request_assistant_response",
1293 )
1294
1295 deflection = "You can run pytest -q to verify the current state."
1296 decision = await runtime.turn_completion.handle_text_response(
1297 content=deflection,
1298 response_content=deflection,
1299 task=prepared.task,
1300 effective_task=prepared.effective_task,
1301 iterations=1,
1302 max_iterations=agent.config.max_iterations,
1303 actions_taken=[],
1304 continuation_count=0,
1305 dod=prepared.definition_of_done,
1306 emit=capture,
1307 summary=prepared.summary,
1308 executor=prepared.executor,
1309 rollback_plan=prepared.rollback_plan,
1310 )
1311
1312 assert decision.action == TurnCompletionAction.COMPLETE
1313 assert prepared.summary.final_response == deflection
1314 assert prepared.summary.completion_decision_code == "non_mutating_response_accepted"
1315 assert prepared.summary.completion_trace[-1].decision_code == (
1316 "non_mutating_response_accepted"
1317 )
1318 assert not any(
1319 "Please use your tools to execute the task" in message.content
1320 for message in agent.session.messages
1321 )
1322 assert any(event.type == "response" and event.content == deflection for event in events)
1323
1324
1325 @pytest.mark.asyncio
1326 async def test_turn_completion_skips_self_critique_reroute(
1327 temp_dir: Path,
1328 ) -> None:
1329 backend = ScriptedBackend()
1330 config = non_streaming_config()
1331 config.reasoning.completion_check = False
1332 config.reasoning.self_critique = True
1333 agent = Agent(
1334 backend=backend,
1335 config=config,
1336 project_root=temp_dir,
1337 )
1338 runtime = ConversationRuntime(agent)
1339 events = []
1340
1341 async def capture(event) -> None:
1342 events.append(event)
1343
1344 prepared = await runtime.turn_preparation.prepare(
1345 task="Explain Loader's clarify loop.",
1346 emit=capture,
1347 requested_mode="execute",
1348 original_task=None,
1349 on_user_question=None,
1350 )
1351 await runtime.phase_tracker.enter(
1352 TurnPhase.ASSISTANT,
1353 capture,
1354 detail="Requesting assistant response",
1355 reason_code="request_assistant_response",
1356 )
1357
1358 detailed = (
1359 "Loader might begin with a bounded clarify pass, perhaps asking follow-up "
1360 "questions when the task leaves touchpoints or decision boundaries unclear. "
1361 "It then shifts into execution once the workflow policy is satisfied."
1362 )
1363 decision = await runtime.turn_completion.handle_text_response(
1364 content=detailed,
1365 response_content=detailed,
1366 task=prepared.task,
1367 effective_task=prepared.effective_task,
1368 iterations=1,
1369 max_iterations=agent.config.max_iterations,
1370 actions_taken=[],
1371 continuation_count=0,
1372 dod=prepared.definition_of_done,
1373 emit=capture,
1374 summary=prepared.summary,
1375 executor=prepared.executor,
1376 rollback_plan=prepared.rollback_plan,
1377 )
1378
1379 assert decision.action == TurnCompletionAction.COMPLETE
1380 assert prepared.summary.final_response == detailed
1381 assert prepared.summary.completion_decision_code == "non_mutating_response_accepted"
1382 assert prepared.summary.completion_trace[-1].decision_code == (
1383 "non_mutating_response_accepted"
1384 )
1385 assert not any("[SELF-CRITIQUE]" in message.content for message in agent.session.messages)
1386 assert not any(event.type == "critique" for event in events)
1387
1388
1389 @pytest.mark.asyncio
1390 async def test_turn_completion_finalizes_when_follow_through_budget_is_exhausted(
1391 temp_dir: Path,
1392 ) -> None:
1393 backend = ScriptedBackend()
1394 agent = Agent(
1395 backend=backend,
1396 config=non_streaming_config(),
1397 project_root=temp_dir,
1398 )
1399 runtime = ConversationRuntime(agent)
1400 events = []
1401
1402 async def capture(event) -> None:
1403 events.append(event)
1404
1405 prepared = await runtime.turn_preparation.prepare(
1406 task="Fix the README heading.",
1407 emit=capture,
1408 requested_mode="execute",
1409 original_task=None,
1410 on_user_question=None,
1411 )
1412 await runtime.phase_tracker.enter(
1413 TurnPhase.ASSISTANT,
1414 capture,
1415 detail="Requesting assistant response",
1416 reason_code="request_assistant_response",
1417 )
1418
1419 decision = await runtime.turn_completion.handle_text_response(
1420 content="I looked into it.",
1421 response_content="I looked into it.",
1422 task=prepared.task,
1423 effective_task=prepared.effective_task,
1424 iterations=1,
1425 max_iterations=agent.config.max_iterations,
1426 actions_taken=[],
1427 continuation_count=agent.config.reasoning.max_continuation_prompts,
1428 dod=prepared.definition_of_done,
1429 emit=capture,
1430 summary=prepared.summary,
1431 executor=prepared.executor,
1432 rollback_plan=prepared.rollback_plan,
1433 )
1434
1435 assert decision.action == TurnCompletionAction.FINALIZE
1436 assert decision.finalize_reason_code == "continuation_budget_exhausted"
1437 assert prepared.summary.final_response.startswith(
1438 "I stopped because I still could not show enough evidence"
1439 )
1440 assert prepared.summary.completion_decision_code == "continuation_budget_exhausted"
1441 assert prepared.summary.failures == [
1442 "missing follow-through evidence after continuation budget exhaustion"
1443 ]
1444 assert prepared.summary.completion_trace[-1].outcome == "finalize"
1445 assert prepared.summary.completion_trace[-1].decision_code == (
1446 "continuation_budget_exhausted"
1447 )
1448 assert prepared.summary.completion_trace[-1].evidence_summary == [
1449 "showing the requested work was actually carried out"
1450 ]
1451 assert [item.status for item in prepared.summary.completion_trace[-1].evidence_provenance] == [
1452 "missing"
1453 ]
1454 assert prepared.summary.workflow_timeline[-1].kind == "completion_finalize"
1455 assert prepared.summary.workflow_timeline[-1].evidence_summary == [
1456 "showing the requested work was actually carried out"
1457 ]
1458 assert [event.type for event in events[-3:]] == [
1459 "completion_check",
1460 "error",
1461 "response",
1462 ]
1463
1464
1465 @pytest.mark.asyncio
1466 async def test_turn_completion_uses_observed_verification_for_budget_exhaustion(
1467 temp_dir: Path,
1468 ) -> None:
1469 backend = ScriptedBackend()
1470 agent = Agent(
1471 backend=backend,
1472 config=non_streaming_config(),
1473 project_root=temp_dir,
1474 )
1475 runtime = ConversationRuntime(agent)
1476 events = []
1477
1478 async def capture(event) -> None:
1479 events.append(event)
1480
1481 prepared = await runtime.turn_preparation.prepare(
1482 task="Run pytest -q and make sure it works.",
1483 emit=capture,
1484 requested_mode="execute",
1485 original_task=None,
1486 on_user_question=None,
1487 )
1488 prepared.definition_of_done.verification_commands = ["pytest -q"]
1489 prepared.definition_of_done.evidence = [
1490 VerificationEvidence(
1491 command="pytest -q",
1492 passed=False,
1493 stderr="1 failed",
1494 kind="test",
1495 )
1496 ]
1497 prepared.definition_of_done.last_verification_result = "failed"
1498 await runtime.phase_tracker.enter(
1499 TurnPhase.ASSISTANT,
1500 capture,
1501 detail="Requesting assistant response",
1502 reason_code="request_assistant_response",
1503 )
1504
1505 decision = await runtime.turn_completion.handle_text_response(
1506 content="The tests are done.",
1507 response_content="The tests are done.",
1508 task=prepared.task,
1509 effective_task=prepared.effective_task,
1510 iterations=1,
1511 max_iterations=agent.config.max_iterations,
1512 actions_taken=[],
1513 continuation_count=agent.config.reasoning.max_continuation_prompts,
1514 dod=prepared.definition_of_done,
1515 emit=capture,
1516 summary=prepared.summary,
1517 executor=prepared.executor,
1518 rollback_plan=prepared.rollback_plan,
1519 )
1520
1521 assert decision.action == TurnCompletionAction.FINALIZE
1522 assert decision.finalize_reason_code == "continuation_budget_exhausted"
1523 assert prepared.summary.final_response == (
1524 "I stopped because the continuation budget was exhausted and observed "
1525 "verification still showed: verification failed for `pytest -q` [1 failed]."
1526 )
1527 assert prepared.summary.completion_trace[-1].decision_code == (
1528 "continuation_budget_exhausted"
1529 )
1530 assert [
1531 item.status
1532 for item in prepared.summary.completion_trace[-1].verification_observations
1533 ] == [VerificationObservationStatus.FAILED.value]
1534 assert [
1535 item.summary
1536 for item in prepared.summary.completion_trace[-1].verification_observations
1537 ] == ["verification failed for `pytest -q`"]
1538 assert [
1539 item.status
1540 for item in prepared.summary.workflow_timeline[-1].verification_observations
1541 ] == [VerificationObservationStatus.FAILED.value]