Resume concrete file todos
- SHA
107ddd4458d81afee9439dece19971d9545a229a- Parents
-
33957e2 - Tree
872c78f
107ddd4
107ddd4458d81afee9439dece19971d9545a229a33957e2
872c78f| Status | File | + | - |
|---|---|---|---|
| M |
src/loader/runtime/repair.py
|
121 | 5 |
| M |
src/loader/runtime/workflow.py
|
20 | 0 |
| M |
tests/test_repair.py
|
74 | 5 |
src/loader/runtime/repair.pymodified@@ -15,7 +15,11 @@ from .dod import ( | ||
| 15 | 15 | planned_artifact_target_satisfied, |
| 16 | 16 | ) |
| 17 | 17 | from .parsing import parse_tool_calls |
| 18 | -from .workflow import preferred_pending_todo_item, reconcile_aggregate_completion_steps | |
| 18 | +from .workflow import ( | |
| 19 | + preferred_pending_todo_item, | |
| 20 | + reconcile_aggregate_completion_steps, | |
| 21 | + todo_file_candidates, | |
| 22 | +) | |
| 19 | 23 | |
| 20 | 24 | _SPECIAL_DOD_ITEMS = { |
| 21 | 25 | "Complete the requested work", |
@@ -307,9 +311,8 @@ class ResponseRepairer: | ||
| 307 | 311 | "Confirmed completed work: " + "; ".join(completed[-2:]) |
| 308 | 312 | ) |
| 309 | 313 | |
| 310 | - next_pending = preferred_pending_todo_item( | |
| 314 | + next_pending = self._preferred_resume_pending_item( | |
| 311 | 315 | dod, |
| 312 | - project_root=self.context.project_root, | |
| 313 | 316 | missing_artifact=next( |
| 314 | 317 | ( |
| 315 | 318 | artifact |
@@ -490,9 +493,8 @@ class ResponseRepairer: | ||
| 490 | 493 | ), |
| 491 | 494 | None, |
| 492 | 495 | ) |
| 493 | - next_pending = preferred_pending_todo_item( | |
| 496 | + next_pending = self._preferred_resume_pending_item( | |
| 494 | 497 | dod, |
| 495 | - project_root=self.context.project_root, | |
| 496 | 498 | missing_artifact=next_missing_artifact, |
| 497 | 499 | ) |
| 498 | 500 | if ( |
@@ -519,6 +521,40 @@ class ResponseRepairer: | ||
| 519 | 521 | lines.append( |
| 520 | 522 | "Do not restart from scratch unless one specific missing fact blocks " |
| 521 | 523 | "that discovery step." |
| 524 | + ) | |
| 525 | + return lines | |
| 526 | + | |
| 527 | + inferred_pending_target = ( | |
| 528 | + self._infer_pending_item_output_target(dod, next_pending) | |
| 529 | + if next_pending | |
| 530 | + else None | |
| 531 | + ) | |
| 532 | + if next_pending and inferred_pending_target is not None: | |
| 533 | + inferred_label = self._format_artifact_label( | |
| 534 | + inferred_pending_target, | |
| 535 | + expect_directory=False, | |
| 536 | + ) | |
| 537 | + lines = [ | |
| 538 | + "Resume with this exact next step: continue " | |
| 539 | + f"`{next_pending}` by creating {inferred_label}." | |
| 540 | + ] | |
| 541 | + lines.append( | |
| 542 | + f"Prefer one `write(content=...)` call for `{inferred_pending_target}` before more research." | |
| 543 | + ) | |
| 544 | + if completed_artifacts >= 2: | |
| 545 | + lines.append( | |
| 546 | + "Follow the same one-file-at-a-time mutation pattern that already " | |
| 547 | + "created the confirmed output files." | |
| 548 | + ) | |
| 549 | + if retry_number >= 2: | |
| 550 | + lines.append( | |
| 551 | + "Do not return another working note or empty response; emit the " | |
| 552 | + "concrete mutation tool call now." | |
| 553 | + ) | |
| 554 | + else: | |
| 555 | + lines.append( | |
| 556 | + "Do not restart discovery unless one specific missing fact blocks " | |
| 557 | + "that file write." | |
| 522 | 558 | ) |
| 523 | 559 | return lines |
| 524 | 560 | |
@@ -639,6 +675,86 @@ class ResponseRepairer: | ||
| 639 | 675 | return lines |
| 640 | 676 | return [] |
| 641 | 677 | |
| 678 | + def _infer_pending_item_output_target( | |
| 679 | + self, | |
| 680 | + dod: DefinitionOfDone, | |
| 681 | + item: str, | |
| 682 | + ) -> Path | None: | |
| 683 | + candidates = todo_file_candidates(item) | |
| 684 | + if not candidates: | |
| 685 | + return None | |
| 686 | + | |
| 687 | + planned_targets = collect_planned_artifact_targets( | |
| 688 | + dod, | |
| 689 | + project_root=self.context.project_root, | |
| 690 | + max_paths=12, | |
| 691 | + ) | |
| 692 | + planned_files = { | |
| 693 | + target.name.lower(): target | |
| 694 | + for target, expect_directory in planned_targets | |
| 695 | + if not expect_directory | |
| 696 | + } | |
| 697 | + planned_directories = [ | |
| 698 | + target | |
| 699 | + for target, expect_directory in planned_targets | |
| 700 | + if expect_directory | |
| 701 | + ] | |
| 702 | + touched_paths = [ | |
| 703 | + Path(path) | |
| 704 | + for path in dod.touched_files | |
| 705 | + if str(path).strip() | |
| 706 | + ] | |
| 707 | + | |
| 708 | + for candidate in candidates: | |
| 709 | + candidate_str = str(candidate) | |
| 710 | + if candidate.is_absolute() or candidate_str.startswith("~"): | |
| 711 | + return Path(candidate_str).expanduser() | |
| 712 | + | |
| 713 | + planned_match = planned_files.get(candidate.name.lower()) | |
| 714 | + if planned_match is not None: | |
| 715 | + return planned_match | |
| 716 | + | |
| 717 | + for touched in reversed(touched_paths): | |
| 718 | + if touched.name.lower() == candidate.name.lower(): | |
| 719 | + continue | |
| 720 | + if candidate.suffix and touched.suffix.lower() != candidate.suffix.lower(): | |
| 721 | + continue | |
| 722 | + return touched.parent / candidate.name | |
| 723 | + | |
| 724 | + for directory in planned_directories: | |
| 725 | + return directory / candidate.name | |
| 726 | + | |
| 727 | + return None | |
| 728 | + | |
| 729 | + def _preferred_resume_pending_item( | |
| 730 | + self, | |
| 731 | + dod: DefinitionOfDone, | |
| 732 | + *, | |
| 733 | + missing_artifact: tuple[Path, bool] | None, | |
| 734 | + ) -> str | None: | |
| 735 | + preferred = preferred_pending_todo_item( | |
| 736 | + dod, | |
| 737 | + project_root=self.context.project_root, | |
| 738 | + missing_artifact=missing_artifact, | |
| 739 | + ) | |
| 740 | + if preferred: | |
| 741 | + return preferred | |
| 742 | + | |
| 743 | + explicit_file_items = [ | |
| 744 | + item | |
| 745 | + for item in dod.pending_items | |
| 746 | + if item not in _SPECIAL_DOD_ITEMS | |
| 747 | + and _todo_is_mutation_step(item) | |
| 748 | + and todo_file_candidates(item) | |
| 749 | + ] | |
| 750 | + if explicit_file_items: | |
| 751 | + return explicit_file_items[0] | |
| 752 | + | |
| 753 | + return next( | |
| 754 | + (item for item in dod.pending_items if item not in _SPECIAL_DOD_ITEMS), | |
| 755 | + None, | |
| 756 | + ) | |
| 757 | + | |
| 642 | 758 | @staticmethod |
| 643 | 759 | def _format_artifact_label(path: Path, *, expect_directory: bool) -> str: |
| 644 | 760 | label = path.name or str(path) |
src/loader/runtime/workflow.pymodified@@ -61,6 +61,7 @@ __all__ = [ | ||
| 61 | 61 | "preferred_pending_todo_item", |
| 62 | 62 | "reconcile_aggregate_completion_steps", |
| 63 | 63 | "sync_todos_to_definition_of_done", |
| 64 | + "todo_file_candidates", | |
| 64 | 65 | ] |
| 65 | 66 | |
| 66 | 67 | VERIFICATION_SEPARATOR = "<<<VERIFICATION>>>" |
@@ -865,6 +866,25 @@ def preferred_pending_todo_item( | ||
| 865 | 866 | return pending_items[0] |
| 866 | 867 | |
| 867 | 868 | |
| 869 | +def todo_file_candidates(item: str) -> list[Path]: | |
| 870 | + """Extract explicit file references from a todo item in source order.""" | |
| 871 | + | |
| 872 | + if item in _SPECIAL_TODO_ITEMS: | |
| 873 | + return [] | |
| 874 | + seen: set[str] = set() | |
| 875 | + candidates: list[Path] = [] | |
| 876 | + for match in _TODO_FILE_CANDIDATE_PATTERN.findall(item.strip()): | |
| 877 | + normalized = match.strip() | |
| 878 | + if not normalized: | |
| 879 | + continue | |
| 880 | + key = normalized.lower() | |
| 881 | + if key in seen: | |
| 882 | + continue | |
| 883 | + seen.add(key) | |
| 884 | + candidates.append(Path(normalized)) | |
| 885 | + return candidates | |
| 886 | + | |
| 887 | + | |
| 868 | 888 | def preserve_task_grounded_acceptance_criteria( |
| 869 | 889 | task_statement: str, |
| 870 | 890 | *, |
tests/test_repair.pymodified@@ -318,14 +318,17 @@ def test_empty_response_retry_mentions_write_can_create_missing_parent_directori | ||
| 318 | 318 | |
| 319 | 319 | assert decision.should_continue is True |
| 320 | 320 | assert decision.retry_message is not None |
| 321 | - assert "Resume with this exact next step: create `index.html`." in decision.retry_message | |
| 322 | 321 | assert ( |
| 323 | - "The `write` tool can create that file's parent directories automatically" | |
| 322 | + "Resume with this exact next step: continue `Write main index.html for nginx guide` " | |
| 323 | + "by creating `index.html`." | |
| 324 | 324 | in decision.retry_message |
| 325 | 325 | ) |
| 326 | 326 | assert ( |
| 327 | - "Shape the next response as one concrete `write(file_path=..., content=...)` " | |
| 328 | - "tool call for that exact path." | |
| 327 | + f"Prefer one `write(content=...)` call for `{index_path}` before more research." | |
| 328 | + in decision.retry_message | |
| 329 | + ) | |
| 330 | + assert ( | |
| 331 | + "Do not restart discovery unless one specific missing fact blocks that file write." | |
| 329 | 332 | in decision.retry_message |
| 330 | 333 | ) |
| 331 | 334 | |
@@ -653,7 +656,7 @@ def test_empty_response_retry_treats_develop_index_step_as_mutation_work( | ||
| 653 | 656 | "Resume with this exact next step: continue `Develop the main index.html file with proper structure`" |
| 654 | 657 | in decision.retry_message |
| 655 | 658 | ) |
| 656 | - assert "Prefer one concrete `write` call" in decision.retry_message | |
| 659 | + assert "Prefer one `write(content=...)` call" in decision.retry_message | |
| 657 | 660 | assert "Make the next response one concrete evidence-gathering tool call" not in decision.retry_message |
| 658 | 661 | |
| 659 | 662 | |
@@ -724,6 +727,72 @@ def test_empty_response_retry_points_at_declared_child_file_within_incomplete_ou | ||
| 724 | 727 | assert "introduction.html` before more research." in decision.retry_message |
| 725 | 728 | |
| 726 | 729 | |
| 730 | +def test_empty_response_retry_infers_concrete_file_from_pending_todo_after_broad_artifacts_exist( | |
| 731 | + temp_dir: Path, | |
| 732 | +) -> None: | |
| 733 | + context = build_context( | |
| 734 | + temp_dir=temp_dir, | |
| 735 | + use_react=False, | |
| 736 | + ) | |
| 737 | + repairer = ResponseRepairer(context) | |
| 738 | + | |
| 739 | + guide_root = temp_dir / "guides" / "nginx" | |
| 740 | + chapters = guide_root / "chapters" | |
| 741 | + chapters.mkdir(parents=True) | |
| 742 | + index_path = guide_root / "index.html" | |
| 743 | + chapter_one = chapters / "01-introduction.html" | |
| 744 | + index_path.write_text("<html></html>\n") | |
| 745 | + chapter_one.write_text("<html></html>\n") | |
| 746 | + | |
| 747 | + implementation_plan = temp_dir / "implementation.md" | |
| 748 | + implementation_plan.write_text( | |
| 749 | + "\n".join( | |
| 750 | + [ | |
| 751 | + "# Implementation Plan", | |
| 752 | + "", | |
| 753 | + "## File Changes", | |
| 754 | + f"- `{guide_root}/`", | |
| 755 | + f"- `{chapters}/`", | |
| 756 | + f"- `{index_path}`", | |
| 757 | + "", | |
| 758 | + ] | |
| 759 | + ) | |
| 760 | + ) | |
| 761 | + | |
| 762 | + dod = create_definition_of_done("Create a multi-file nginx guide.") | |
| 763 | + dod.implementation_plan = str(implementation_plan) | |
| 764 | + dod.touched_files.extend([str(index_path), str(chapter_one)]) | |
| 765 | + dod.completed_items.extend( | |
| 766 | + [ | |
| 767 | + "Create index.html for nginx guide", | |
| 768 | + "Create first chapter file (01-introduction.html)", | |
| 769 | + ] | |
| 770 | + ) | |
| 771 | + dod.pending_items.append("Create second chapter file (02-installation.html)") | |
| 772 | + | |
| 773 | + decision = repairer.handle_empty_response( | |
| 774 | + task="Create a multi-file nginx guide.", | |
| 775 | + original_task=None, | |
| 776 | + empty_retry_count=2, | |
| 777 | + max_empty_retries=2, | |
| 778 | + dod=dod, | |
| 779 | + ) | |
| 780 | + | |
| 781 | + assert decision.should_continue is True | |
| 782 | + assert decision.retry_message is not None | |
| 783 | + assert ( | |
| 784 | + "Resume with this exact next step: continue `Create second chapter file " | |
| 785 | + "(02-installation.html)` by creating `02-installation.html`." | |
| 786 | + in decision.retry_message | |
| 787 | + ) | |
| 788 | + assert ( | |
| 789 | + f"Prefer one `write(content=...)` call for `{chapters / '02-installation.html'}` " | |
| 790 | + "before more research." | |
| 791 | + in decision.retry_message | |
| 792 | + ) | |
| 793 | + assert "Do not return another working note or empty response" in decision.retry_message | |
| 794 | + | |
| 795 | + | |
| 727 | 796 | def test_empty_response_retry_fails_after_extended_late_stage_budget_is_exhausted( |
| 728 | 797 | temp_dir: Path, |
| 729 | 798 | ) -> None: |