Python · 42106 bytes Raw Blame History
1 """Tests for response-repair helpers on RuntimeContext."""
2
3 from __future__ import annotations
4
5 import json
6 from pathlib import Path
7 from types import SimpleNamespace
8
9 from loader.llm.base import Message, Role, ToolCall
10 from loader.runtime.context import RuntimeContext
11 from loader.runtime.dod import create_definition_of_done
12 from loader.runtime.permissions import (
13 PermissionMode,
14 build_permission_policy,
15 load_permission_rules,
16 )
17 from loader.runtime.repair import ResponseRepairer
18 from loader.tools.base import create_default_registry
19 from tests.helpers.runtime_harness import ScriptedBackend
20
21
22 class FakeSession:
23 def __init__(self) -> None:
24 self.messages = []
25
26 def append(self, message) -> None:
27 self.messages.append(message)
28
29
30 class FakeCodeFilter:
31 def reset(self) -> None:
32 return None
33
34
35 class FakeSafeguards:
36 def __init__(self) -> None:
37 self.action_tracker = object()
38 self.validator = object()
39 self.code_filter = FakeCodeFilter()
40
41 def filter_stream_chunk(self, content: str) -> str:
42 return content
43
44 def filter_complete_content(self, content: str) -> str:
45 return content
46
47 def should_steer(self) -> bool:
48 return False
49
50 def get_steering_message(self) -> str | None:
51 return None
52
53 def record_response(self, content: str) -> None:
54 return None
55
56 def detect_text_loop(self, content: str) -> tuple[bool, str]:
57 return False, ""
58
59 def detect_loop(self) -> tuple[bool, str]:
60 return False, ""
61
62
63 def build_context(
64 *,
65 temp_dir: Path,
66 use_react: bool,
67 ) -> RuntimeContext:
68 registry = create_default_registry(temp_dir)
69 registry.configure_workspace_root(temp_dir)
70 rule_status = load_permission_rules(temp_dir)
71 policy = build_permission_policy(
72 active_mode=PermissionMode.WORKSPACE_WRITE,
73 workspace_root=temp_dir,
74 tool_requirements=registry.get_tool_requirements(),
75 rules=rule_status.rules,
76 )
77 session = FakeSession()
78 return RuntimeContext(
79 project_root=temp_dir,
80 backend=ScriptedBackend(),
81 registry=registry,
82 session=session, # type: ignore[arg-type]
83 config=SimpleNamespace(force_react=use_react),
84 capability_profile=SimpleNamespace(supports_native_tools=not use_react), # type: ignore[arg-type]
85 project_context=None,
86 permission_policy=policy,
87 permission_config_status=rule_status,
88 workflow_mode="execute",
89 safeguards=FakeSafeguards(),
90 )
91
92
93 def test_response_repairer_uses_runtime_parser_for_bracket_tool_fallback(
94 temp_dir: Path,
95 ) -> None:
96 context = build_context(
97 temp_dir=temp_dir,
98 use_react=False,
99 )
100 repairer = ResponseRepairer(context)
101
102 analysis = repairer.analyze_response(
103 content="I need clarification.",
104 response_content='[calls askuserquestion tool with: question="Which path?"]',
105 tool_calls=[],
106 extracted_iterations=0,
107 max_extracted_iterations=3,
108 )
109
110 assert analysis.tool_calls == [
111 ToolCall(
112 id="call_0",
113 name="AskUserQuestion",
114 arguments={"question": "Which path?"},
115 )
116 ]
117 assert analysis.tool_source == "raw_text"
118 assert analysis.clear_stream is True
119
120
121 def test_response_repairer_recovers_todowrite_from_runtime_registry(
122 temp_dir: Path,
123 ) -> None:
124 context = build_context(
125 temp_dir=temp_dir,
126 use_react=False,
127 )
128 repairer = ResponseRepairer(context)
129
130 analysis = repairer.analyze_response(
131 content="I'll track the work first.",
132 response_content=json.dumps(
133 {
134 "name": "TodoWrite",
135 "arguments": {
136 "todos": [
137 {
138 "content": "Run tests",
139 "active_form": "Running tests",
140 "status": "in_progress",
141 }
142 ]
143 },
144 }
145 ),
146 tool_calls=[],
147 extracted_iterations=0,
148 max_extracted_iterations=3,
149 )
150
151 assert analysis.tool_source == "raw_text"
152 assert analysis.clear_stream is True
153 assert analysis.tool_calls == [
154 ToolCall(
155 id="call_0",
156 name="TodoWrite",
157 arguments={
158 "todos": [
159 {
160 "content": "Run tests",
161 "active_form": "Running tests",
162 "status": "in_progress",
163 }
164 ]
165 },
166 )
167 ]
168
169
170 def test_response_repairer_fails_honestly_when_raw_tool_budget_is_exhausted(
171 temp_dir: Path,
172 ) -> None:
173 context = build_context(
174 temp_dir=temp_dir,
175 use_react=False,
176 )
177 repairer = ResponseRepairer(context)
178
179 analysis = repairer.analyze_response(
180 content=json.dumps(
181 {
182 "name": "read",
183 "arguments": {"file_path": "README.md"},
184 }
185 ),
186 response_content=json.dumps(
187 {
188 "name": "read",
189 "arguments": {"file_path": "README.md"},
190 }
191 ),
192 tool_calls=[],
193 extracted_iterations=3,
194 max_extracted_iterations=3,
195 )
196
197 assert analysis.should_stop is True
198 assert analysis.final_response == (
199 "I couldn't safely continue because the model kept emitting raw-text "
200 "tool calls instead of proper tool invocations. Please try again or "
201 "switch to a different backend/model."
202 )
203 assert analysis.failure == "raw-text tool recovery budget exhausted"
204 assert "Let me know if you'd like me to continue" not in analysis.final_response
205
206
207 def test_empty_response_retry_message_surfaces_missing_planned_artifacts_and_working_note(
208 temp_dir: Path,
209 ) -> None:
210 context = build_context(
211 temp_dir=temp_dir,
212 use_react=False,
213 )
214 repairer = ResponseRepairer(context)
215 implementation_plan = temp_dir / "implementation.md"
216 implementation_plan.write_text(
217 "\n".join(
218 [
219 "# Implementation Plan",
220 "",
221 "## File Changes",
222 f"- `{temp_dir / 'guides' / 'nginx' / 'index.html'}`",
223 f"- `{temp_dir / 'guides' / 'nginx' / 'chapters'}`",
224 "",
225 ]
226 )
227 )
228 first_artifact = temp_dir / "guides" / "nginx" / "index.html"
229 first_artifact.parent.mkdir(parents=True)
230 first_artifact.write_text("<html></html>\n")
231
232 dod = create_definition_of_done("Create a multi-file nginx guide.")
233 dod.implementation_plan = str(implementation_plan)
234 dod.touched_files.append(str(first_artifact))
235 dod.completed_items.append("Create the main index.html file")
236 dod.pending_items.append("Create each chapter file in sequence")
237
238 context.session.append(
239 SimpleNamespace(
240 role="tool",
241 content=(
242 "Observation [notepad_write_working]: Result: "
243 "- [2026-04-21T19:17:34Z] Creating fifth chapter file: Advanced configurations"
244 ),
245 )
246 )
247
248 decision = repairer.handle_empty_response(
249 task="Create a multi-file nginx guide.",
250 original_task=None,
251 empty_retry_count=1,
252 max_empty_retries=2,
253 dod=dod,
254 )
255
256 assert decision.should_continue is True
257 assert decision.retry_message is not None
258 assert "Latest working note: Creating fifth chapter file: Advanced configurations" in decision.retry_message
259 assert "Confirmed touched files: `index.html`" in decision.retry_message
260 assert "Confirmed completed work: Create the main index.html file" in decision.retry_message
261 assert "Next pending item: Create each chapter file in sequence" in decision.retry_message
262 assert "Continue from the confirmed progress below instead of restarting." in decision.retry_message
263
264
265 def test_empty_response_retry_mentions_write_can_create_missing_parent_directories(
266 temp_dir: Path,
267 ) -> None:
268 context = build_context(
269 temp_dir=temp_dir,
270 use_react=False,
271 )
272 repairer = ResponseRepairer(context)
273
274 guide_root = temp_dir / "guides" / "nginx"
275 index_path = guide_root / "index.html"
276
277 implementation_plan = temp_dir / "implementation.md"
278 implementation_plan.write_text(
279 "\n".join(
280 [
281 "# Implementation Plan",
282 "",
283 "## File Changes",
284 f"- `{index_path}`",
285 "",
286 ]
287 )
288 )
289
290 dod = create_definition_of_done("Create a multi-file nginx guide.")
291 dod.implementation_plan = str(implementation_plan)
292 dod.pending_items.extend(
293 [
294 "Create nginx guide directory structure",
295 "Write main index.html for nginx guide",
296 ]
297 )
298
299 decision = repairer.handle_empty_response(
300 task="Create a multi-file nginx guide.",
301 original_task=None,
302 empty_retry_count=1,
303 max_empty_retries=2,
304 dod=dod,
305 )
306
307 assert decision.should_continue is True
308 assert decision.retry_message is not None
309 assert (
310 "Resume with this exact next step: continue `Write main index.html for nginx guide` "
311 "by creating `index.html`."
312 in decision.retry_message
313 )
314 assert (
315 f"Prefer one `write(content=...)` call for `{index_path}` before more research."
316 in decision.retry_message
317 )
318 assert (
319 "Do not restart discovery unless one specific missing fact blocks that file write."
320 in decision.retry_message
321 )
322
323
324 def test_empty_response_retry_respects_discovery_first_pending_step(
325 temp_dir: Path,
326 ) -> None:
327 context = build_context(
328 temp_dir=temp_dir,
329 use_react=False,
330 )
331 repairer = ResponseRepairer(context)
332
333 implementation_plan = temp_dir / "implementation.md"
334 implementation_plan.write_text(
335 "\n".join(
336 [
337 "# Implementation Plan",
338 "",
339 "## File Changes",
340 f"- `{temp_dir / 'guides' / 'nginx' / 'index.html'}`",
341 f"- `{temp_dir / 'guides' / 'nginx' / 'chapters'}`",
342 "",
343 ]
344 )
345 )
346
347 dod = create_definition_of_done("Create a multi-file nginx guide.")
348 dod.implementation_plan = str(implementation_plan)
349 dod.pending_items.extend(
350 [
351 "First, examine the existing fortran guide structure and content to understand the format",
352 "Create the nginx directory structure",
353 "Develop the main index.html file for the nginx guide",
354 ]
355 )
356
357 context.session.append(
358 SimpleNamespace(
359 role="tool",
360 content=(
361 "Observation [notepad_write_working]: Result: "
362 "- [2026-04-22T22:42:18Z] Analyzing the fortran guide structure before creating nginx guide"
363 ),
364 )
365 )
366
367 decision = repairer.handle_empty_response(
368 task="Create a multi-file nginx guide.",
369 original_task=None,
370 empty_retry_count=1,
371 max_empty_retries=2,
372 dod=dod,
373 )
374
375 assert decision.should_continue is True
376 assert decision.retry_message is not None
377 assert (
378 "Resume with this exact next step: advance `First, examine the existing fortran guide structure and content to understand the format`."
379 in decision.retry_message
380 )
381 assert "one concrete evidence-gathering tool call" in decision.retry_message
382 assert "Resume with this exact next step: create `index.html`." not in decision.retry_message
383
384
385 def test_empty_response_retry_budget_extends_for_late_stage_multi_artifact_progress(
386 temp_dir: Path,
387 ) -> None:
388 context = build_context(
389 temp_dir=temp_dir,
390 use_react=False,
391 )
392 repairer = ResponseRepairer(context)
393
394 guide_root = temp_dir / "guides" / "nginx"
395 chapters = guide_root / "chapters"
396 chapters.mkdir(parents=True)
397 index_path = guide_root / "index.html"
398 chapter_one = chapters / "01-getting-started.html"
399 chapter_two = chapters / "02-installation.html"
400 chapter_three = chapters / "03-first-website.html"
401 chapter_four = chapters / "04-configuration-basics.html"
402 index_path.write_text("<html></html>\n")
403 chapter_one.write_text("<h1>One</h1>\n")
404 chapter_two.write_text("<h1>Two</h1>\n")
405 chapter_three.write_text("<h1>Three</h1>\n")
406
407 implementation_plan = temp_dir / "implementation.md"
408 implementation_plan.write_text(
409 "\n".join(
410 [
411 "# Implementation Plan",
412 "",
413 "## File Changes",
414 f"- `{guide_root}/`",
415 f"- `{chapters}/`",
416 f"- `{index_path}`",
417 f"- `{chapter_one}`",
418 f"- `{chapter_two}`",
419 f"- `{chapter_three}`",
420 f"- `{chapter_four}`",
421 "",
422 ]
423 )
424 )
425
426 dod = create_definition_of_done("Create a multi-file nginx guide.")
427 dod.implementation_plan = str(implementation_plan)
428 dod.touched_files.extend(
429 [str(index_path), str(chapter_one), str(chapter_two), str(chapter_three)]
430 )
431 dod.completed_items.extend(
432 [
433 "Create the directory structure for the new nginx guide",
434 "Create the main index.html file with proper structure",
435 ]
436 )
437 dod.pending_items.append("Create each chapter file in sequence")
438
439 decision = repairer.handle_empty_response(
440 task="Create a multi-file nginx guide.",
441 original_task=None,
442 empty_retry_count=3,
443 max_empty_retries=2,
444 dod=dod,
445 )
446
447 assert decision.should_continue is True
448 assert decision.retry_message is not None
449 assert "retry 3/4" in decision.retry_message
450 assert "Follow the same one-file-at-a-time mutation pattern" in decision.retry_message
451
452
453 def test_empty_response_retry_budget_extends_when_concrete_next_output_is_known(
454 temp_dir: Path,
455 ) -> None:
456 context = build_context(
457 temp_dir=temp_dir,
458 use_react=False,
459 )
460 repairer = ResponseRepairer(context)
461
462 implementation_plan = temp_dir / "implementation.md"
463 implementation_plan.write_text(
464 "\n".join(
465 [
466 "# Implementation Plan",
467 "",
468 "## File Changes",
469 f"- `{temp_dir / 'guides' / 'nginx' / 'index.html'}`",
470 f"- `{temp_dir / 'guides' / 'nginx' / 'chapters'}`",
471 "",
472 ]
473 )
474 )
475
476 dod = create_definition_of_done("Create a multi-file nginx guide.")
477 dod.implementation_plan = str(implementation_plan)
478 dod.pending_items.append("Develop the main index.html file for the nginx guide")
479
480 decision = repairer.handle_empty_response(
481 task="Create a multi-file nginx guide.",
482 original_task=None,
483 empty_retry_count=3,
484 max_empty_retries=2,
485 dod=dod,
486 )
487
488 assert decision.should_continue is True
489 assert decision.retry_message is not None
490 assert "retry 3/4" in decision.retry_message
491 assert "Next missing planned artifact: `index.html`" in decision.retry_message
492 assert (
493 "Resume with this exact next step: continue `Develop the main index.html file for the nginx guide` "
494 "by creating `index.html`."
495 in decision.retry_message
496 )
497
498
499 def test_empty_response_retry_uses_compact_prompt_after_substantial_progress(
500 temp_dir: Path,
501 ) -> None:
502 context = build_context(
503 temp_dir=temp_dir,
504 use_react=False,
505 )
506 context.session.messages.append(
507 SimpleNamespace(
508 content=(
509 "Observation [notepad_write_working]: Result: "
510 "- [2026-04-23T19:00:00Z] Creating fifth chapter file: Advanced features"
511 )
512 )
513 )
514 repairer = ResponseRepairer(context)
515
516 guide_root = temp_dir / "guides" / "nginx"
517 chapters = guide_root / "chapters"
518 chapters.mkdir(parents=True)
519 index_path = guide_root / "index.html"
520 chapter_one = chapters / "01-getting-started.html"
521 chapter_two = chapters / "02-installation.html"
522 chapter_three = chapters / "03-first-website.html"
523 chapter_four = chapters / "04-configuration-basics.html"
524 chapter_five = chapters / "05-advanced-features.html"
525 index_path.write_text("<html></html>\n")
526 chapter_one.write_text("<h1>One</h1>\n")
527 chapter_two.write_text("<h1>Two</h1>\n")
528 chapter_three.write_text("<h1>Three</h1>\n")
529 chapter_four.write_text("<h1>Four</h1>\n")
530
531 implementation_plan = temp_dir / "implementation.md"
532 implementation_plan.write_text(
533 "\n".join(
534 [
535 "# Implementation Plan",
536 "",
537 "## File Changes",
538 f"- `{guide_root}/`",
539 f"- `{chapters}/`",
540 f"- `{index_path}`",
541 f"- `{chapter_one}`",
542 f"- `{chapter_two}`",
543 f"- `{chapter_three}`",
544 f"- `{chapter_four}`",
545 f"- `{chapter_five}`",
546 "",
547 ]
548 )
549 )
550
551 dod = create_definition_of_done("Create a multi-file nginx guide.")
552 dod.implementation_plan = str(implementation_plan)
553 dod.touched_files.extend(
554 [str(index_path), str(chapter_one), str(chapter_two), str(chapter_three)]
555 )
556 dod.completed_items.extend(
557 [
558 "Create the directory structure for the new nginx guide",
559 "Create the main index.html file with proper structure",
560 ]
561 )
562 dod.pending_items.append("Create each chapter file in sequence")
563
564 decision = repairer.handle_empty_response(
565 task="Create a multi-file nginx guide.",
566 original_task=None,
567 empty_retry_count=3,
568 max_empty_retries=2,
569 dod=dod,
570 )
571
572 assert decision.should_continue is True
573 assert decision.retry_message is not None
574 assert "Continue from the exact next step below." in decision.retry_message
575 assert "Latest working note:" not in decision.retry_message
576 assert "Confirmed completed work:" not in decision.retry_message
577 assert "Next pending item:" not in decision.retry_message
578
579
580 def test_empty_response_retry_points_at_next_output_file_when_planned_directory_is_empty(
581 temp_dir: Path,
582 ) -> None:
583 context = build_context(
584 temp_dir=temp_dir,
585 use_react=False,
586 )
587 repairer = ResponseRepairer(context)
588
589 guide_root = temp_dir / "guides" / "nginx"
590 chapters = guide_root / "chapters"
591 chapters.mkdir(parents=True)
592 index_path = guide_root / "index.html"
593 index_path.write_text("<html></html>\n")
594
595 implementation_plan = temp_dir / "implementation.md"
596 implementation_plan.write_text(
597 "\n".join(
598 [
599 "# Implementation Plan",
600 "",
601 "## File Changes",
602 f"- `{guide_root}/`",
603 f"- `{chapters}/`",
604 f"- `{index_path}`",
605 f"- `{chapters / '02-installation.html'}`",
606 "",
607 ]
608 )
609 )
610
611 dod = create_definition_of_done("Create a multi-file nginx guide.")
612 dod.implementation_plan = str(implementation_plan)
613 dod.touched_files.append(str(index_path))
614 dod.pending_items.append("Write the introduction chapter")
615
616 decision = repairer.handle_empty_response(
617 task="Create a multi-file nginx guide.",
618 original_task=None,
619 empty_retry_count=1,
620 max_empty_retries=2,
621 dod=dod,
622 )
623
624 assert decision.should_continue is True
625 assert decision.retry_message is not None
626 assert "Next missing planned artifact: `chapters/`" in decision.retry_message
627 assert (
628 "Resume with this exact next step: continue `Write the introduction chapter` "
629 "by creating the next output file under `chapters/`."
630 in decision.retry_message
631 )
632 assert (
633 f"Prefer one concrete `write` call for a file inside `{chapters}` before more research."
634 in decision.retry_message
635 )
636
637
638 def test_empty_response_retry_treats_develop_index_step_as_mutation_work(
639 temp_dir: Path,
640 ) -> None:
641 context = build_context(
642 temp_dir=temp_dir,
643 use_react=False,
644 )
645 repairer = ResponseRepairer(context)
646
647 guide_root = temp_dir / "guides" / "nginx"
648 chapters = guide_root / "chapters"
649 guide_root.mkdir(parents=True)
650 chapters.mkdir()
651 chapter_one = chapters / "01-introduction.html"
652 index_path = guide_root / "index.html"
653
654 implementation_plan = temp_dir / "implementation.md"
655 implementation_plan.write_text(
656 "\n".join(
657 [
658 "# Implementation Plan",
659 "",
660 "## File Changes",
661 f"- `{guide_root}/`",
662 f"- `{index_path}`",
663 f"- `{chapters}/`",
664 f"- `{chapter_one}`",
665 "",
666 ]
667 )
668 )
669
670 dod = create_definition_of_done("Create a multi-file nginx guide.")
671 dod.implementation_plan = str(implementation_plan)
672 dod.completed_items.extend(
673 [
674 "First, examine the existing Fortran guide structure to understand the format and depth",
675 "Create the new nginx guide directory structure",
676 ]
677 )
678 dod.pending_items.append("Develop the main index.html file with proper structure")
679
680 decision = repairer.handle_empty_response(
681 task="Create a multi-file nginx guide.",
682 original_task=None,
683 empty_retry_count=2,
684 max_empty_retries=2,
685 dod=dod,
686 )
687
688 assert decision.should_continue is True
689 assert decision.retry_message is not None
690 assert (
691 "Resume with this exact next step: continue `Develop the main index.html file with proper structure`"
692 in decision.retry_message
693 )
694 assert "Prefer one `write(content=...)` call" in decision.retry_message
695 assert "Make the next response one concrete evidence-gathering tool call" not in decision.retry_message
696
697
698 def test_empty_response_retry_prefers_output_index_over_reference_index_with_same_name(
699 temp_dir: Path,
700 ) -> None:
701 context = build_context(
702 temp_dir=temp_dir,
703 use_react=False,
704 )
705 repairer = ResponseRepairer(context)
706
707 nginx_root = temp_dir / "Loader" / "guides" / "nginx"
708 fortran_root = temp_dir / "Loader" / "guides" / "fortran"
709 nginx_root.mkdir(parents=True)
710 fortran_root.mkdir(parents=True)
711 reference_index = fortran_root / "index.html"
712 reference_index.write_text("<html>fortran</html>\n")
713 output_index = nginx_root / "index.html"
714
715 implementation_plan = temp_dir / "implementation.md"
716 implementation_plan.write_text(
717 "\n".join(
718 [
719 "# Implementation Plan",
720 "",
721 "## File Changes",
722 f"- `{output_index}`",
723 f"- `{nginx_root / 'chapters'}/`",
724 f"- `{reference_index}`",
725 "",
726 ]
727 )
728 )
729
730 dod = create_definition_of_done("Create a multi-file nginx guide.")
731 dod.implementation_plan = str(implementation_plan)
732 dod.touched_files.append(str(reference_index))
733 dod.completed_items.append(
734 "First, examine the existing Fortran guide structure and content"
735 )
736 dod.pending_items.append("Develop the nginx index.html file")
737
738 decision = repairer.handle_empty_response(
739 task="Create a multi-file nginx guide.",
740 original_task=None,
741 empty_retry_count=2,
742 max_empty_retries=2,
743 dod=dod,
744 )
745
746 assert decision.should_continue is True
747 assert decision.retry_message is not None
748 assert (
749 f"Prefer one `write(content=...)` call for `{output_index}` before more research."
750 in decision.retry_message
751 )
752 assert str(reference_index) not in decision.retry_message
753
754
755 def test_empty_response_retry_points_at_declared_child_file_within_incomplete_output_directory(
756 temp_dir: Path,
757 ) -> None:
758 context = build_context(
759 temp_dir=temp_dir,
760 use_react=False,
761 )
762 repairer = ResponseRepairer(context)
763
764 guide_root = temp_dir / "guides" / "nginx"
765 chapters = guide_root / "chapters"
766 chapters.mkdir(parents=True)
767 index_path = guide_root / "index.html"
768 index_path.write_text(
769 "\n".join(
770 [
771 "<html>",
772 '<a href="chapters/introduction.html">Introduction</a>',
773 '<a href="chapters/installation.html">Installation</a>',
774 "</html>",
775 ]
776 )
777 + "\n"
778 )
779
780 implementation_plan = temp_dir / "implementation.md"
781 implementation_plan.write_text(
782 "\n".join(
783 [
784 "# Implementation Plan",
785 "",
786 "## File Changes",
787 f"- `{guide_root}/`",
788 f"- `{chapters}/`",
789 f"- `{index_path}`",
790 f"- `{chapters / '02-installation.html'}`",
791 "",
792 ]
793 )
794 )
795
796 dod = create_definition_of_done("Create a multi-file nginx guide.")
797 dod.implementation_plan = str(implementation_plan)
798 dod.touched_files.append(str(index_path))
799 dod.pending_items.append("Write the introduction chapter")
800
801 decision = repairer.handle_empty_response(
802 task="Create a multi-file nginx guide.",
803 original_task=None,
804 empty_retry_count=1,
805 max_empty_retries=2,
806 dod=dod,
807 )
808
809 assert decision.should_continue is True
810 assert decision.retry_message is not None
811 assert "Next missing planned artifact: `chapters/`" in decision.retry_message
812 assert "Next declared output under `chapters/`: `introduction.html`" in decision.retry_message
813 assert (
814 "Resume with this exact next step: continue `Write the introduction chapter` "
815 "by creating `introduction.html`."
816 in decision.retry_message
817 )
818 assert (
819 f"Prefer one `write(content=...)` call for `{(chapters / 'introduction.html').resolve(strict=False)}` "
820 "before more research."
821 in decision.retry_message
822 )
823
824
825 def test_empty_response_retry_infers_concrete_file_from_pending_todo_after_broad_artifacts_exist(
826 temp_dir: Path,
827 ) -> None:
828 context = build_context(
829 temp_dir=temp_dir,
830 use_react=False,
831 )
832 repairer = ResponseRepairer(context)
833
834 guide_root = temp_dir / "guides" / "nginx"
835 chapters = guide_root / "chapters"
836 chapters.mkdir(parents=True)
837 index_path = guide_root / "index.html"
838 chapter_one = chapters / "01-introduction.html"
839 index_path.write_text("<html></html>\n")
840 chapter_one.write_text("<html></html>\n")
841
842 implementation_plan = temp_dir / "implementation.md"
843 implementation_plan.write_text(
844 "\n".join(
845 [
846 "# Implementation Plan",
847 "",
848 "## File Changes",
849 f"- `{guide_root}/`",
850 f"- `{chapters}/`",
851 f"- `{index_path}`",
852 f"- `{chapters / '02-installation.html'}`",
853 "",
854 ]
855 )
856 )
857
858 dod = create_definition_of_done("Create a multi-file nginx guide.")
859 dod.implementation_plan = str(implementation_plan)
860 dod.touched_files.extend([str(index_path), str(chapter_one)])
861 dod.completed_items.extend(
862 [
863 "Create index.html for nginx guide",
864 "Create first chapter file (01-introduction.html)",
865 ]
866 )
867 dod.pending_items.append("Create second chapter file (02-installation.html)")
868
869 decision = repairer.handle_empty_response(
870 task="Create a multi-file nginx guide.",
871 original_task=None,
872 empty_retry_count=2,
873 max_empty_retries=2,
874 dod=dod,
875 )
876
877 assert decision.should_continue is True
878 assert decision.retry_message is not None
879 assert (
880 "Resume with this exact next step: continue `Create second chapter file "
881 "(02-installation.html)` by creating `02-installation.html`."
882 in decision.retry_message
883 )
884 assert (
885 f"Prefer one `write(content=...)` call for `{chapters / '02-installation.html'}` "
886 "before more research."
887 in decision.retry_message
888 )
889 assert "Do not return another working note or empty response" in decision.retry_message
890
891
892 def test_empty_response_retry_maps_title_style_todo_to_html_graph_target(
893 temp_dir: Path,
894 ) -> None:
895 context = build_context(
896 temp_dir=temp_dir,
897 use_react=False,
898 )
899 repairer = ResponseRepairer(context)
900
901 guide_root = temp_dir / "guides" / "nginx"
902 chapters = guide_root / "chapters"
903 chapters.mkdir(parents=True)
904 index_path = guide_root / "index.html"
905 chapter_one = chapters / "01-introduction.html"
906 index_path.write_text(
907 "\n".join(
908 [
909 "<html>",
910 '<a href="chapters/01-introduction.html">Chapter 1: Introduction to NGINX Tool</a>',
911 '<a href="chapters/02-installation.html">Chapter 2: Installation and Setup</a>',
912 "</html>",
913 ]
914 )
915 + "\n"
916 )
917 chapter_one.write_text("<html></html>\n")
918
919 implementation_plan = temp_dir / "implementation.md"
920 implementation_plan.write_text(
921 "\n".join(
922 [
923 "# Implementation Plan",
924 "",
925 "## File Changes",
926 f"- `{guide_root}/`",
927 f"- `{chapters}/`",
928 f"- `{index_path}`",
929 f"- `{chapters / '02-installation.html'}`",
930 "",
931 ]
932 )
933 )
934
935 dod = create_definition_of_done("Create a multi-file nginx guide.")
936 dod.implementation_plan = str(implementation_plan)
937 dod.touched_files.extend([str(index_path), str(chapter_one)])
938 dod.completed_items.extend(
939 [
940 "Create index.html for nginx guide",
941 "Create Chapter 1: Introduction to NGINX Tool",
942 ]
943 )
944 dod.pending_items.append("Creating Chapter 2: Installation and Setup")
945
946 decision = repairer.handle_empty_response(
947 task="Create a multi-file nginx guide.",
948 original_task=None,
949 empty_retry_count=2,
950 max_empty_retries=2,
951 dod=dod,
952 )
953
954 assert decision.should_continue is True
955 assert decision.retry_message is not None
956 assert (
957 "Resume with this exact next step: continue `Creating Chapter 2: Installation and Setup` "
958 "by creating `02-installation.html`."
959 in decision.retry_message
960 )
961 assert (
962 f"Prefer one `write(content=...)` call for `{(chapters / '02-installation.html').resolve(strict=False)}` "
963 "before more research."
964 in decision.retry_message
965 )
966
967
968 def test_empty_response_retry_uses_compact_prompt_after_early_progress_with_concrete_next_file(
969 temp_dir: Path,
970 ) -> None:
971 context = build_context(
972 temp_dir=temp_dir,
973 use_react=False,
974 )
975 repairer = ResponseRepairer(context)
976
977 guide_root = temp_dir / "guides" / "nginx"
978 chapters = guide_root / "chapters"
979 chapters.mkdir(parents=True)
980 index_path = guide_root / "index.html"
981 chapter_one = chapters / "01-introduction.html"
982 index_path.write_text(
983 "\n".join(
984 [
985 "<html>",
986 '<a href="chapters/01-introduction.html">Introduction</a>',
987 '<a href="chapters/02-installation.html">Installation</a>',
988 "</html>",
989 ]
990 )
991 + "\n"
992 )
993 chapter_one.write_text("<html></html>\n")
994
995 implementation_plan = temp_dir / "implementation.md"
996 implementation_plan.write_text(
997 "\n".join(
998 [
999 "# Implementation Plan",
1000 "",
1001 "## File Changes",
1002 f"- `{guide_root}/`",
1003 f"- `{chapters}/`",
1004 f"- `{index_path}`",
1005 f"- `{chapters / '02-installation.html'}`",
1006 "",
1007 ]
1008 )
1009 )
1010
1011 dod = create_definition_of_done("Create a multi-file nginx guide.")
1012 dod.implementation_plan = str(implementation_plan)
1013 dod.touched_files.extend([str(index_path), str(chapter_one)])
1014 dod.completed_items.extend(
1015 [
1016 "Create index.html for nginx guide",
1017 "Create first chapter file (01-introduction.html)",
1018 ]
1019 )
1020 dod.pending_items.append("Create second chapter file (02-installation.html)")
1021
1022 decision = repairer.handle_empty_response(
1023 task="Create a multi-file nginx guide.",
1024 original_task=None,
1025 empty_retry_count=1,
1026 max_empty_retries=2,
1027 dod=dod,
1028 )
1029
1030 assert decision.should_continue is True
1031 assert decision.retry_message is not None
1032 assert "Continue from the exact next step below." in decision.retry_message
1033 assert "Confirmed completed work:" not in decision.retry_message
1034 assert "Next pending item:" not in decision.retry_message
1035 assert (
1036 "Resume with this exact next step: continue `Create second chapter file "
1037 "(02-installation.html)` by creating `02-installation.html`."
1038 in decision.retry_message
1039 )
1040
1041
1042 def test_empty_response_retry_fails_after_extended_late_stage_budget_is_exhausted(
1043 temp_dir: Path,
1044 ) -> None:
1045 context = build_context(
1046 temp_dir=temp_dir,
1047 use_react=False,
1048 )
1049 repairer = ResponseRepairer(context)
1050
1051 guide_root = temp_dir / "guides" / "nginx"
1052 chapters = guide_root / "chapters"
1053 chapters.mkdir(parents=True)
1054 index_path = guide_root / "index.html"
1055 chapter_one = chapters / "01-getting-started.html"
1056 chapter_two = chapters / "02-installation.html"
1057 chapter_three = chapters / "03-first-website.html"
1058 chapter_four = chapters / "04-configuration-basics.html"
1059 index_path.write_text("<html></html>\n")
1060 chapter_one.write_text("<h1>One</h1>\n")
1061 chapter_two.write_text("<h1>Two</h1>\n")
1062 chapter_three.write_text("<h1>Three</h1>\n")
1063
1064 implementation_plan = temp_dir / "implementation.md"
1065 implementation_plan.write_text(
1066 "\n".join(
1067 [
1068 "# Implementation Plan",
1069 "",
1070 "## File Changes",
1071 f"- `{guide_root}/`",
1072 f"- `{chapters}/`",
1073 f"- `{index_path}`",
1074 f"- `{chapter_one}`",
1075 f"- `{chapter_two}`",
1076 f"- `{chapter_three}`",
1077 f"- `{chapter_four}`",
1078 "",
1079 ]
1080 )
1081 )
1082
1083 dod = create_definition_of_done("Create a multi-file nginx guide.")
1084 dod.implementation_plan = str(implementation_plan)
1085 dod.touched_files.extend(
1086 [str(index_path), str(chapter_one), str(chapter_two), str(chapter_three)]
1087 )
1088 dod.completed_items.extend(
1089 [
1090 "Create the directory structure for the new nginx guide",
1091 "Create the main index.html file with proper structure",
1092 ]
1093 )
1094 dod.pending_items.append("Create each chapter file in sequence")
1095
1096 decision = repairer.handle_empty_response(
1097 task="Create a multi-file nginx guide.",
1098 original_task=None,
1099 empty_retry_count=5,
1100 max_empty_retries=2,
1101 dod=dod,
1102 )
1103
1104 assert decision.should_continue is False
1105 assert decision.final_response is not None
1106 assert "retrying 4 times" in decision.final_response
1107
1108
1109 def test_empty_response_retry_mentions_todowrite_when_progress_has_outpaced_tracking(
1110 temp_dir: Path,
1111 ) -> None:
1112 context = build_context(
1113 temp_dir=temp_dir,
1114 use_react=False,
1115 )
1116 repairer = ResponseRepairer(context)
1117
1118 guide_root = temp_dir / "guides" / "nginx"
1119 chapters = guide_root / "chapters"
1120 chapters.mkdir(parents=True)
1121 implementation_plan = temp_dir / "implementation.md"
1122 implementation_plan.write_text(
1123 "\n".join(
1124 [
1125 "# Implementation Plan",
1126 "",
1127 "## File Changes",
1128 f"- `{guide_root / 'index.html'}`",
1129 f"- `{chapters / '01-getting-started.html'}`",
1130 f"- `{chapters / '02-installation.html'}`",
1131 "",
1132 ]
1133 )
1134 )
1135
1136 dod = create_definition_of_done("Create a multi-file nginx guide.")
1137 dod.implementation_plan = str(implementation_plan)
1138 dod.touched_files.extend(
1139 [
1140 str(guide_root / "index.html"),
1141 str(chapters / "01-getting-started.html"),
1142 ]
1143 )
1144 dod.completed_items.extend(
1145 [
1146 "Create the directory structure for the new nginx guide",
1147 "Create the main index.html file with proper structure",
1148 ]
1149 )
1150 dod.pending_items.append("Create each chapter file in sequence")
1151
1152 decision = repairer.handle_empty_response(
1153 task="Create a multi-file nginx guide.",
1154 original_task=None,
1155 empty_retry_count=1,
1156 max_empty_retries=2,
1157 dod=dod,
1158 )
1159
1160 assert decision.retry_message is not None
1161 assert "Continue from the exact next step below." in decision.retry_message
1162 assert "refresh `TodoWrite` alongside the next concrete mutation" not in decision.retry_message
1163
1164
1165 def test_empty_response_retry_omits_stale_aggregate_completed_work_when_artifacts_missing(
1166 temp_dir: Path,
1167 ) -> None:
1168 context = build_context(
1169 temp_dir=temp_dir,
1170 use_react=False,
1171 )
1172 repairer = ResponseRepairer(context)
1173
1174 guide_root = temp_dir / "guides" / "nginx"
1175 chapters = guide_root / "chapters"
1176 chapters.mkdir(parents=True)
1177 index_path = guide_root / "index.html"
1178 chapter_one = chapters / "01-getting-started.html"
1179 chapter_two = chapters / "02-installation.html"
1180 chapter_three = chapters / "03-first-website.html"
1181 index_path.write_text("<html></html>\n")
1182 chapter_one.write_text("<h1>One</h1>\n")
1183 chapter_two.write_text("<h1>Two</h1>\n")
1184
1185 implementation_plan = temp_dir / "implementation.md"
1186 implementation_plan.write_text(
1187 "\n".join(
1188 [
1189 "# Implementation Plan",
1190 "",
1191 "## File Changes",
1192 f"- `{guide_root}/`",
1193 f"- `{chapters}/`",
1194 f"- `{index_path}`",
1195 f"- `{chapter_one}`",
1196 f"- `{chapter_two}`",
1197 f"- `{chapter_three}`",
1198 "",
1199 ]
1200 )
1201 )
1202
1203 dod = create_definition_of_done("Create a multi-file nginx guide.")
1204 dod.implementation_plan = str(implementation_plan)
1205 dod.touched_files.extend([str(index_path), str(chapter_one), str(chapter_two)])
1206 dod.completed_items.extend(
1207 [
1208 "Create the main index.html file with proper structure",
1209 "Link all chapters together properly",
1210 ]
1211 )
1212 dod.pending_items.append("Create each chapter file in sequence")
1213
1214 decision = repairer.handle_empty_response(
1215 task="Create a multi-file nginx guide.",
1216 original_task=None,
1217 empty_retry_count=1,
1218 max_empty_retries=2,
1219 dod=dod,
1220 )
1221
1222 assert decision.retry_message is not None
1223 assert "Link all chapters together properly" not in decision.retry_message
1224 assert "Continue from the exact next step below." in decision.retry_message
1225 assert "Resume with this exact next step:" in decision.retry_message
1226
1227
1228 def test_empty_response_retry_names_next_file_from_observed_sibling_directory(
1229 temp_dir: Path,
1230 ) -> None:
1231 context = build_context(
1232 temp_dir=temp_dir,
1233 use_react=False,
1234 )
1235 repairer = ResponseRepairer(context)
1236
1237 reference_chapters = temp_dir / "fortran" / "chapters"
1238 reference_chapters.mkdir(parents=True)
1239 (reference_chapters / "01-introduction.html").write_text("<h1>Introduction</h1>\n")
1240
1241 guide_root = temp_dir / "guides" / "nginx"
1242 chapters = guide_root / "chapters"
1243 chapters.mkdir(parents=True)
1244 index_path = guide_root / "index.html"
1245 index_path.write_text("<html></html>\n")
1246
1247 implementation_plan = temp_dir / "implementation.md"
1248 implementation_plan.write_text(
1249 "\n".join(
1250 [
1251 "# Implementation Plan",
1252 "",
1253 "## File Changes",
1254 f"- `{guide_root}/`",
1255 f"- `{chapters}/`",
1256 f"- `{index_path}`",
1257 "",
1258 ]
1259 )
1260 )
1261
1262 dod = create_definition_of_done("Create a multi-file nginx guide.")
1263 dod.implementation_plan = str(implementation_plan)
1264 dod.touched_files.append(str(index_path))
1265 dod.pending_items.append("Write the introduction chapter")
1266 context.session.append(
1267 Message(
1268 role=Role.ASSISTANT,
1269 content="",
1270 tool_calls=[
1271 ToolCall(
1272 id="read-ref-1",
1273 name="read",
1274 arguments={"file_path": str(reference_chapters / "01-introduction.html")},
1275 )
1276 ],
1277 )
1278 )
1279
1280 decision = repairer.handle_empty_response(
1281 task="Create a multi-file nginx guide.",
1282 original_task=None,
1283 empty_retry_count=1,
1284 max_empty_retries=2,
1285 dod=dod,
1286 )
1287
1288 assert decision.should_continue is True
1289 assert decision.retry_message is not None
1290 assert "Next missing planned artifact: `chapters/`" in decision.retry_message
1291 assert "Next observed output pattern under `chapters/`: `01-introduction.html`" in decision.retry_message
1292 assert (
1293 "Resume with this exact next step: continue `Write the introduction chapter` "
1294 "by creating `01-introduction.html`."
1295 in decision.retry_message
1296 )
1297 assert (
1298 "It mirrors the observed filename pattern from another `chapters/` directory "
1299 "you already inspected."
1300 in decision.retry_message
1301 )