Ease first chapter handoffs
Authored by
mfwolffe <wolffemf@dukes.jmu.edu>
- SHA
ea5727e33eb1d0e3c3905b9e4f622be1e4352ce6- Parents
-
edbe397 - Tree
7b2a6db
ea5727e
ea5727e33eb1d0e3c3905b9e4f622be1e4352ce6edbe397
7b2a6db| Status | File | + | - |
|---|---|---|---|
| M |
src/loader/runtime/repair.py
|
81 | 7 |
| M |
src/loader/runtime/turn_completion.py
|
69 | 6 |
| M |
tests/test_repair.py
|
3 | 1 |
| M |
tests/test_turn_completion.py
|
77 | 0 |
src/loader/runtime/repair.pymodified@@ -36,6 +36,14 @@ _SPECIAL_DOD_ITEMS = { | ||
| 36 | 36 | _FIRST_FILE_EMPTY_RETRY_EXTRA = 2 |
| 37 | 37 | _LATE_STAGE_EMPTY_RETRY_EXTRA = 2 |
| 38 | 38 | _MULTI_FILE_OUTPUT_EMPTY_RETRY_EXTRA = 2 |
| 39 | +_SUMMARY_ARTIFACT_NAMES = { | |
| 40 | + "index.html", | |
| 41 | + "index.htm", | |
| 42 | + "readme", | |
| 43 | + "readme.md", | |
| 44 | + "readme.rst", | |
| 45 | + "readme.txt", | |
| 46 | +} | |
| 39 | 47 | _WORKING_NOTE_TOOL_NAMES = ( |
| 40 | 48 | "notepad_write_working", |
| 41 | 49 | "notepad_append", |
@@ -505,7 +513,7 @@ class ResponseRepairer: | ||
| 505 | 513 | return base_max_empty_retries + _LATE_STAGE_EMPTY_RETRY_EXTRA |
| 506 | 514 | if self._has_concrete_next_output_step(dod): |
| 507 | 515 | extra_retries = _LATE_STAGE_EMPTY_RETRY_EXTRA |
| 508 | - if self._has_confirmed_output_file_progress(dod): | |
| 516 | + if self._has_confirmed_substantive_output_file_progress(dod): | |
| 509 | 517 | extra_retries += _MULTI_FILE_OUTPUT_EMPTY_RETRY_EXTRA |
| 510 | 518 | elif completed_artifacts > 0: |
| 511 | 519 | extra_retries += _FIRST_FILE_EMPTY_RETRY_EXTRA |
@@ -589,6 +597,33 @@ class ResponseRepairer: | ||
| 589 | 597 | ) |
| 590 | 598 | ) |
| 591 | 599 | |
| 600 | + def _has_confirmed_substantive_output_file_progress( | |
| 601 | + self, | |
| 602 | + dod: DefinitionOfDone, | |
| 603 | + ) -> bool: | |
| 604 | + for raw_path in dod.touched_files: | |
| 605 | + if not str(raw_path).strip(): | |
| 606 | + continue | |
| 607 | + path = Path(raw_path).expanduser().resolve(strict=False) | |
| 608 | + if not path.suffix or _is_summary_artifact_path(path) or not path.is_file(): | |
| 609 | + continue | |
| 610 | + return True | |
| 611 | + return any( | |
| 612 | + not expect_directory | |
| 613 | + and not _is_summary_artifact_path(target) | |
| 614 | + and planned_artifact_target_satisfied( | |
| 615 | + dod, | |
| 616 | + target=target, | |
| 617 | + expect_directory=False, | |
| 618 | + project_root=self.context.project_root, | |
| 619 | + ) | |
| 620 | + for target, expect_directory in collect_planned_artifact_targets( | |
| 621 | + dod, | |
| 622 | + project_root=self.context.project_root, | |
| 623 | + max_paths=12, | |
| 624 | + ) | |
| 625 | + ) | |
| 626 | + | |
| 592 | 627 | def _planned_artifact_progress_lines(self, dod: DefinitionOfDone) -> list[str]: |
| 593 | 628 | targets = collect_planned_artifact_targets( |
| 594 | 629 | dod, |
@@ -699,6 +734,9 @@ class ResponseRepairer: | ||
| 699 | 734 | ) -> list[str]: |
| 700 | 735 | completed_artifacts, _ = self._planned_artifact_counts(dod) |
| 701 | 736 | has_confirmed_output_file_progress = self._has_confirmed_output_file_progress(dod) |
| 737 | + has_confirmed_substantive_output_file_progress = ( | |
| 738 | + self._has_confirmed_substantive_output_file_progress(dod) | |
| 739 | + ) | |
| 702 | 740 | next_missing_artifact = self._preferred_resume_missing_artifact(dod) |
| 703 | 741 | next_pending = self._preferred_resume_pending_item( |
| 704 | 742 | dod, |
@@ -771,13 +809,17 @@ class ResponseRepairer: | ||
| 771 | 809 | lines.append( |
| 772 | 810 | f"Use the existing outline label `{outline_label}` for that file so it matches the current guide structure." |
| 773 | 811 | ) |
| 774 | - if not has_confirmed_output_file_progress: | |
| 812 | + if _should_encourage_initial_version( | |
| 813 | + target=concrete_target, | |
| 814 | + has_confirmed_output_file_progress=has_confirmed_output_file_progress, | |
| 815 | + has_confirmed_substantive_output_file_progress=has_confirmed_substantive_output_file_progress, | |
| 816 | + ): | |
| 775 | 817 | lines.append( |
| 776 | 818 | "Do not wait to perfect the entire multi-file output before this write. " |
| 777 | 819 | "Write a compact but real initial version of this file now, then refine " |
| 778 | 820 | "or expand it in later edits." |
| 779 | 821 | ) |
| 780 | - if has_confirmed_output_file_progress: | |
| 822 | + if has_confirmed_substantive_output_file_progress: | |
| 781 | 823 | lines.append( |
| 782 | 824 | "Follow the same full-payload one-file-at-a-time write pattern that " |
| 783 | 825 | "already created the confirmed output files." |
@@ -837,13 +879,20 @@ class ResponseRepairer: | ||
| 837 | 879 | 1, |
| 838 | 880 | f"It is the next concrete output needed to continue `{next_pending}`.", |
| 839 | 881 | ) |
| 840 | - if not has_confirmed_output_file_progress and not inferred_is_directory: | |
| 882 | + if ( | |
| 883 | + not inferred_is_directory | |
| 884 | + and _should_encourage_initial_version( | |
| 885 | + target=inferred_pending_target, | |
| 886 | + has_confirmed_output_file_progress=has_confirmed_output_file_progress, | |
| 887 | + has_confirmed_substantive_output_file_progress=has_confirmed_substantive_output_file_progress, | |
| 888 | + ) | |
| 889 | + ): | |
| 841 | 890 | lines.append( |
| 842 | 891 | "Do not wait to perfect the entire multi-file output before this write. " |
| 843 | 892 | "Write a compact but real initial version of this file now, then refine " |
| 844 | 893 | "or expand it in later edits." |
| 845 | 894 | ) |
| 846 | - if has_confirmed_output_file_progress: | |
| 895 | + if has_confirmed_substantive_output_file_progress: | |
| 847 | 896 | lines.append( |
| 848 | 897 | "Follow the same full-payload one-file-at-a-time write pattern that " |
| 849 | 898 | "already created the confirmed output files." |
@@ -925,7 +974,11 @@ class ResponseRepairer: | ||
| 925 | 974 | lines.append( |
| 926 | 975 | f"Use the existing outline label `{outline_label}` for that file so it matches the current guide structure." |
| 927 | 976 | ) |
| 928 | - if not has_confirmed_output_file_progress: | |
| 977 | + if _should_encourage_initial_version( | |
| 978 | + target=next_output_file, | |
| 979 | + has_confirmed_output_file_progress=has_confirmed_output_file_progress, | |
| 980 | + has_confirmed_substantive_output_file_progress=has_confirmed_substantive_output_file_progress, | |
| 981 | + ): | |
| 929 | 982 | lines.append( |
| 930 | 983 | "Do not wait to perfect the entire multi-file output before this write. " |
| 931 | 984 | "Write a compact but real initial version of this file now, then refine " |
@@ -979,7 +1032,11 @@ class ResponseRepairer: | ||
| 979 | 1032 | "automatically, so do the write in one step instead of stopping " |
| 980 | 1033 | "for a separate mkdir." |
| 981 | 1034 | ) |
| 982 | - if not has_confirmed_output_file_progress: | |
| 1035 | + if _should_encourage_initial_version( | |
| 1036 | + target=target, | |
| 1037 | + has_confirmed_output_file_progress=has_confirmed_output_file_progress, | |
| 1038 | + has_confirmed_substantive_output_file_progress=has_confirmed_substantive_output_file_progress, | |
| 1039 | + ): | |
| 983 | 1040 | lines.append( |
| 984 | 1041 | "Do not wait to perfect the entire multi-file output before this write. " |
| 985 | 1042 | "Write a compact but real initial version of this file now, then refine " |
@@ -1226,3 +1283,20 @@ def _todo_is_mutation_step(label: str) -> bool: | ||
| 1226 | 1283 | def _todo_is_consistency_review_step(label: str) -> bool: |
| 1227 | 1284 | lowered = label.lower() |
| 1228 | 1285 | return any(token in lowered for token in _CONSISTENCY_REVIEW_HINTS) |
| 1286 | + | |
| 1287 | + | |
| 1288 | +def _is_summary_artifact_path(path: Path) -> bool: | |
| 1289 | + return path.name.lower() in _SUMMARY_ARTIFACT_NAMES | |
| 1290 | + | |
| 1291 | + | |
| 1292 | +def _should_encourage_initial_version( | |
| 1293 | + *, | |
| 1294 | + target: Path, | |
| 1295 | + has_confirmed_output_file_progress: bool, | |
| 1296 | + has_confirmed_substantive_output_file_progress: bool, | |
| 1297 | +) -> bool: | |
| 1298 | + if not has_confirmed_output_file_progress: | |
| 1299 | + return True | |
| 1300 | + if _is_summary_artifact_path(target): | |
| 1301 | + return False | |
| 1302 | + return not has_confirmed_substantive_output_file_progress | |
src/loader/runtime/turn_completion.pymodified@@ -60,6 +60,14 @@ _COMPLETION_HINTS = ( | ||
| 60 | 60 | "successfully completed", |
| 61 | 61 | "everything is done", |
| 62 | 62 | ) |
| 63 | +_SUMMARY_ARTIFACT_NAMES = { | |
| 64 | + "index.html", | |
| 65 | + "index.htm", | |
| 66 | + "readme", | |
| 67 | + "readme.md", | |
| 68 | + "readme.rst", | |
| 69 | + "readme.txt", | |
| 70 | +} | |
| 63 | 71 | |
| 64 | 72 | |
| 65 | 73 | class TurnCompletionAction(StrEnum): |
@@ -429,13 +437,26 @@ def _build_in_progress_continuation( | ||
| 429 | 437 | messages=messages, |
| 430 | 438 | ) |
| 431 | 439 | if target is not None: |
| 440 | + prompt = ( | |
| 441 | + "[CONTINUE CURRENT STEP]\n" | |
| 442 | + "You just described the next planned step, but the concrete output is not on disk yet. " | |
| 443 | + f"Respond with one concrete `write` or `edit`-style tool call that creates or updates `{target}` now. " | |
| 444 | + "Do not summarize, verify, or restart discovery first." | |
| 445 | + ) | |
| 446 | + if ( | |
| 447 | + not _is_summary_artifact_path(target) | |
| 448 | + and _confirmed_substantive_output_file_count( | |
| 449 | + dod, | |
| 450 | + project_root=project_root, | |
| 451 | + ) | |
| 452 | + == 0 | |
| 453 | + ): | |
| 454 | + prompt += ( | |
| 455 | + " If needed, write a compact but real initial version of that file now; " | |
| 456 | + "you can expand or refine it in later edits." | |
| 457 | + ) | |
| 432 | 458 | return InProgressContinuation( |
| 433 | - prompt=( | |
| 434 | - "[CONTINUE CURRENT STEP]\n" | |
| 435 | - "You just described the next planned step, but the concrete output is not on disk yet. " | |
| 436 | - f"Respond with one concrete `write` or `edit`-style tool call that creates or updates `{target}` now. " | |
| 437 | - "Do not summarize, verify, or restart discovery first." | |
| 438 | - ), | |
| 459 | + prompt=prompt, | |
| 439 | 460 | target=target, |
| 440 | 461 | ) |
| 441 | 462 | |
@@ -560,6 +581,48 @@ def _confirmed_output_file_count( | ||
| 560 | 581 | ) |
| 561 | 582 | |
| 562 | 583 | |
| 584 | +def _confirmed_substantive_output_file_count( | |
| 585 | + dod: DefinitionOfDone, | |
| 586 | + *, | |
| 587 | + project_root: Path, | |
| 588 | +) -> int: | |
| 589 | + count = 0 | |
| 590 | + seen: set[str] = set() | |
| 591 | + for raw_path in dod.touched_files: | |
| 592 | + if not str(raw_path).strip(): | |
| 593 | + continue | |
| 594 | + path = Path(raw_path).expanduser().resolve(strict=False) | |
| 595 | + key = str(path) | |
| 596 | + if key in seen: | |
| 597 | + continue | |
| 598 | + seen.add(key) | |
| 599 | + if not path.suffix or _is_summary_artifact_path(path) or not path.is_file(): | |
| 600 | + continue | |
| 601 | + count += 1 | |
| 602 | + | |
| 603 | + return sum( | |
| 604 | + 1 | |
| 605 | + for target, expect_directory in collect_planned_artifact_targets( | |
| 606 | + dod, | |
| 607 | + project_root=project_root, | |
| 608 | + max_paths=12, | |
| 609 | + ) | |
| 610 | + if str(target.expanduser().resolve(strict=False)) not in seen | |
| 611 | + if not expect_directory | |
| 612 | + and not _is_summary_artifact_path(target) | |
| 613 | + and planned_artifact_target_satisfied( | |
| 614 | + dod, | |
| 615 | + target=target, | |
| 616 | + expect_directory=False, | |
| 617 | + project_root=project_root, | |
| 618 | + ) | |
| 619 | + ) + count | |
| 620 | + | |
| 621 | + | |
| 622 | +def _is_summary_artifact_path(path: Path) -> bool: | |
| 623 | + return path.name.lower() in _SUMMARY_ARTIFACT_NAMES | |
| 624 | + | |
| 625 | + | |
| 563 | 626 | def _recent_concrete_target_prompt( |
| 564 | 627 | messages: list[object], |
| 565 | 628 | *, |
tests/test_repair.pymodified@@ -1089,9 +1089,10 @@ def test_empty_response_retry_uses_concrete_file_language_for_aggregate_chapter_ | ||
| 1089 | 1089 | in decision.retry_message |
| 1090 | 1090 | ) |
| 1091 | 1091 | assert ( |
| 1092 | - "Follow the same full-payload one-file-at-a-time write pattern that already created the confirmed output files." | |
| 1092 | + "Write a compact but real initial version of this file now, then refine or expand it in later edits." | |
| 1093 | 1093 | in decision.retry_message |
| 1094 | 1094 | ) |
| 1095 | + assert "Follow the same full-payload one-file-at-a-time write pattern" not in decision.retry_message | |
| 1095 | 1096 | assert "Remaining planned artifacts:" not in decision.retry_message |
| 1096 | 1097 | assert "Next pending item:" not in decision.retry_message |
| 1097 | 1098 | |
@@ -1171,6 +1172,7 @@ def test_empty_response_retry_keeps_concrete_second_chapter_for_aggregate_chapte | ||
| 1171 | 1172 | in decision.retry_message |
| 1172 | 1173 | ) |
| 1173 | 1174 | assert f"`{display_runtime_path(chapter_two)}`" in decision.retry_message |
| 1175 | + assert "Follow the same full-payload one-file-at-a-time write pattern" in decision.retry_message | |
| 1174 | 1176 | |
| 1175 | 1177 | |
| 1176 | 1178 | def test_empty_response_retry_prefers_output_index_over_reference_index_with_same_name( |
tests/test_turn_completion.pymodified@@ -615,6 +615,83 @@ async def test_turn_completion_interrupts_first_narration_after_concrete_target_ | ||
| 615 | 615 | assert "index.html" in agent.session.messages[-1].content |
| 616 | 616 | |
| 617 | 617 | |
| 618 | +@pytest.mark.asyncio | |
| 619 | +async def test_turn_completion_first_chapter_continuation_allows_compact_initial_version( | |
| 620 | + temp_dir: Path, | |
| 621 | +) -> None: | |
| 622 | + backend = ScriptedBackend() | |
| 623 | + config = non_streaming_config() | |
| 624 | + config.reasoning.completion_check = False | |
| 625 | + agent = Agent( | |
| 626 | + backend=backend, | |
| 627 | + config=config, | |
| 628 | + project_root=temp_dir, | |
| 629 | + ) | |
| 630 | + runtime = ConversationRuntime(agent) | |
| 631 | + events = [] | |
| 632 | + | |
| 633 | + async def capture(event) -> None: | |
| 634 | + events.append(event) | |
| 635 | + | |
| 636 | + prepared = await runtime.turn_preparation.prepare( | |
| 637 | + task=( | |
| 638 | + "Create a multi-file nginx guide under ~/Loader/guides/nginx " | |
| 639 | + "with an index and chapter files." | |
| 640 | + ), | |
| 641 | + emit=capture, | |
| 642 | + requested_mode="execute", | |
| 643 | + original_task=None, | |
| 644 | + on_user_question=None, | |
| 645 | + ) | |
| 646 | + await runtime.phase_tracker.enter( | |
| 647 | + TurnPhase.ASSISTANT, | |
| 648 | + capture, | |
| 649 | + detail="Requesting assistant response", | |
| 650 | + reason_code="request_assistant_response", | |
| 651 | + ) | |
| 652 | + | |
| 653 | + chapters_dir = temp_dir / "chapters" | |
| 654 | + chapters_dir.mkdir() | |
| 655 | + index_path = temp_dir / "index.html" | |
| 656 | + index_path.write_text("<html></html>\n") | |
| 657 | + | |
| 658 | + implementation_plan = temp_dir / "implementation.md" | |
| 659 | + implementation_plan.write_text( | |
| 660 | + "# Implementation Plan\n\n" | |
| 661 | + "## File Changes\n\n" | |
| 662 | + f"- `{index_path}`\n" | |
| 663 | + f"- `{chapters_dir / '01-introduction.html'}`\n" | |
| 664 | + ) | |
| 665 | + | |
| 666 | + prepared.definition_of_done.implementation_plan = str(implementation_plan) | |
| 667 | + prepared.definition_of_done.touched_files.append(str(index_path)) | |
| 668 | + prepared.definition_of_done.pending_items.append("Create chapter files for nginx guide") | |
| 669 | + | |
| 670 | + content = "Now I'll create the first chapter of the nginx guide." | |
| 671 | + decision = await runtime.turn_completion.handle_text_response( | |
| 672 | + content=content, | |
| 673 | + response_content=content, | |
| 674 | + task=prepared.task, | |
| 675 | + effective_task=prepared.effective_task, | |
| 676 | + iterations=1, | |
| 677 | + max_iterations=agent.config.max_iterations, | |
| 678 | + actions_taken=[], | |
| 679 | + continuation_count=1, | |
| 680 | + dod=prepared.definition_of_done, | |
| 681 | + emit=capture, | |
| 682 | + summary=prepared.summary, | |
| 683 | + executor=prepared.executor, | |
| 684 | + rollback_plan=prepared.rollback_plan, | |
| 685 | + ) | |
| 686 | + | |
| 687 | + assert decision.action == TurnCompletionAction.CONTINUE | |
| 688 | + assert decision.continuation_count == 2 | |
| 689 | + assert agent.session.messages[-1].role.value == "user" | |
| 690 | + assert agent.session.messages[-1].content.startswith("[CONTINUE CURRENT STEP]") | |
| 691 | + assert "01-introduction.html" in agent.session.messages[-1].content | |
| 692 | + assert "write a compact but real initial version of that file now" in agent.session.messages[-1].content.lower() | |
| 693 | + | |
| 694 | + | |
| 618 | 695 | @pytest.mark.asyncio |
| 619 | 696 | async def test_turn_completion_handles_fake_tool_narration_without_reroute( |
| 620 | 697 | temp_dir: Path, |