Name concrete chapter outputs
Authored by
mfwolffe <wolffemf@dukes.jmu.edu>
- SHA
024377f917e545deda140d32f749dfb046c1d71d- Parents
-
f4fe116 - Tree
baa4420
024377f
024377f917e545deda140d32f749dfb046c1d71df4fe116
baa4420| Status | File | + | - |
|---|---|---|---|
| M |
src/loader/runtime/repair.py
|
52 | 0 |
| M |
src/loader/runtime/tool_batches.py
|
55 | 4 |
| M |
src/loader/runtime/workflow.py
|
20 | 0 |
| M |
tests/test_repair.py
|
71 | 0 |
| M |
tests/test_tool_batches.py
|
132 | 0 |
src/loader/runtime/repair.pymodified@@ -23,6 +23,8 @@ from .workflow import ( | ||
| 23 | 23 | infer_pending_todo_output_target, |
| 24 | 24 | preferred_pending_todo_item, |
| 25 | 25 | reconcile_aggregate_completion_steps, |
| 26 | + todo_describes_aggregate_mutation, | |
| 27 | + todo_describes_broad_setup_step, | |
| 26 | 28 | todo_file_candidates, |
| 27 | 29 | ) |
| 28 | 30 | |
@@ -701,6 +703,56 @@ class ResponseRepairer: | ||
| 701 | 703 | if next_pending |
| 702 | 704 | else None |
| 703 | 705 | ) |
| 706 | + if ( | |
| 707 | + next_pending | |
| 708 | + and inferred_pending_target is None | |
| 709 | + and next_missing_artifact is not None | |
| 710 | + and not next_missing_artifact[1] | |
| 711 | + and todo_describes_aggregate_mutation(next_pending) | |
| 712 | + and not todo_describes_broad_setup_step(next_pending) | |
| 713 | + ): | |
| 714 | + concrete_target = next_missing_artifact[0] | |
| 715 | + outline_label = infer_output_outline_label( | |
| 716 | + dod, | |
| 717 | + concrete_target, | |
| 718 | + project_root=self.context.project_root, | |
| 719 | + todo_label=next_pending, | |
| 720 | + ) | |
| 721 | + lines = [ | |
| 722 | + f"Resume with this exact next step: create `{concrete_target.name}`.", | |
| 723 | + f"It is the next concrete output needed to continue `{next_pending}`.", | |
| 724 | + f"Prefer one `write(content=...)` call for `{concrete_target}` before more research.", | |
| 725 | + self._mutation_tool_scaffold( | |
| 726 | + concrete_target, | |
| 727 | + tool_name="write", | |
| 728 | + ), | |
| 729 | + ] | |
| 730 | + if not concrete_target.parent.exists(): | |
| 731 | + lines.append( | |
| 732 | + "The `write` tool can create that file's parent directories " | |
| 733 | + "automatically, so do the write in one step instead of stopping " | |
| 734 | + "for a separate mkdir." | |
| 735 | + ) | |
| 736 | + if outline_label: | |
| 737 | + lines.append( | |
| 738 | + f"Use the existing outline label `{outline_label}` for that file so it matches the current guide structure." | |
| 739 | + ) | |
| 740 | + if completed_artifacts >= 2: | |
| 741 | + lines.append( | |
| 742 | + "Follow the same one-file-at-a-time mutation pattern that already " | |
| 743 | + "created the confirmed output files." | |
| 744 | + ) | |
| 745 | + if retry_number >= 2: | |
| 746 | + lines.append( | |
| 747 | + "Do not return another working note or empty response; emit the " | |
| 748 | + "concrete mutation tool call now." | |
| 749 | + ) | |
| 750 | + else: | |
| 751 | + lines.append( | |
| 752 | + "Do not restart discovery unless one specific missing fact blocks " | |
| 753 | + "that file write." | |
| 754 | + ) | |
| 755 | + return lines | |
| 704 | 756 | if next_pending and inferred_pending_target is not None: |
| 705 | 757 | inferred_is_directory = not bool(inferred_pending_target.suffix) |
| 706 | 758 | inferred_label = self._format_artifact_label( |
src/loader/runtime/tool_batches.pymodified@@ -46,6 +46,8 @@ from .workflow import ( | ||
| 46 | 46 | preferred_pending_todo_item, |
| 47 | 47 | reconcile_aggregate_completion_steps, |
| 48 | 48 | sync_todos_to_definition_of_done, |
| 49 | + todo_describes_aggregate_mutation, | |
| 50 | + todo_describes_broad_setup_step, | |
| 49 | 51 | ) |
| 50 | 52 | |
| 51 | 53 | EventSink = Callable[[AgentEvent], Awaitable[None]] |
@@ -1333,10 +1335,22 @@ class ToolBatchRunner: | ||
| 1333 | 1335 | dod, |
| 1334 | 1336 | project_root=self.context.project_root, |
| 1335 | 1337 | ) |
| 1336 | - next_pending_suffix = ( | |
| 1337 | - f" Continue with the next pending item: `{next_pending}`." | |
| 1338 | - if next_pending | |
| 1339 | - else "" | |
| 1338 | + resume_target = _preferred_resume_target_path( | |
| 1339 | + dod, | |
| 1340 | + next_pending=next_pending, | |
| 1341 | + missing_artifact=missing_artifact, | |
| 1342 | + project_root=self.context.project_root, | |
| 1343 | + messages=session_messages, | |
| 1344 | + ) | |
| 1345 | + pending_target = _preferred_pending_target_path( | |
| 1346 | + dod, | |
| 1347 | + next_pending=next_pending, | |
| 1348 | + project_root=self.context.project_root, | |
| 1349 | + ) | |
| 1350 | + next_pending_suffix = _pending_item_handoff_prefix( | |
| 1351 | + next_pending, | |
| 1352 | + pending_target=pending_target, | |
| 1353 | + resume_target=resume_target, | |
| 1340 | 1354 | ) |
| 1341 | 1355 | self.context.queue_steering_message( |
| 1342 | 1356 | "Todo tracking is updated. A declared output artifact is still missing." |
@@ -1742,6 +1756,43 @@ def _pending_item_resume_suffix( | ||
| 1742 | 1756 | ) |
| 1743 | 1757 | |
| 1744 | 1758 | |
| 1759 | +def _pending_item_handoff_prefix( | |
| 1760 | + next_pending: str | None, | |
| 1761 | + *, | |
| 1762 | + pending_target: Path | None, | |
| 1763 | + resume_target: Path | None, | |
| 1764 | +) -> str: | |
| 1765 | + if not next_pending: | |
| 1766 | + return "" | |
| 1767 | + if ( | |
| 1768 | + pending_target is None | |
| 1769 | + and resume_target is not None | |
| 1770 | + and resume_target.suffix | |
| 1771 | + and todo_describes_aggregate_mutation(next_pending) | |
| 1772 | + and not todo_describes_broad_setup_step(next_pending) | |
| 1773 | + ): | |
| 1774 | + return f" Continue with the next concrete output: `{resume_target.name}`." | |
| 1775 | + return f" Continue with the next pending item: `{next_pending}`." | |
| 1776 | + | |
| 1777 | + | |
| 1778 | +def _preferred_pending_target_path( | |
| 1779 | + dod: DefinitionOfDone, | |
| 1780 | + *, | |
| 1781 | + next_pending: str | None, | |
| 1782 | + project_root: Path, | |
| 1783 | +) -> Path | None: | |
| 1784 | + if not next_pending: | |
| 1785 | + return None | |
| 1786 | + pending_target = infer_pending_todo_output_target( | |
| 1787 | + dod, | |
| 1788 | + next_pending, | |
| 1789 | + project_root=project_root, | |
| 1790 | + ) | |
| 1791 | + if pending_target is None: | |
| 1792 | + return None | |
| 1793 | + return pending_target.expanduser().resolve(strict=False) | |
| 1794 | + | |
| 1795 | + | |
| 1745 | 1796 | def _preferred_resume_target_path( |
| 1746 | 1797 | dod: DefinitionOfDone, |
| 1747 | 1798 | *, |
src/loader/runtime/workflow.pymodified@@ -63,6 +63,8 @@ __all__ = [ | ||
| 63 | 63 | "preferred_pending_todo_item", |
| 64 | 64 | "reconcile_aggregate_completion_steps", |
| 65 | 65 | "sync_todos_to_definition_of_done", |
| 66 | + "todo_describes_aggregate_mutation", | |
| 67 | + "todo_describes_broad_setup_step", | |
| 66 | 68 | "todo_file_candidates", |
| 67 | 69 | ] |
| 68 | 70 | |
@@ -1426,6 +1428,24 @@ def _todo_describes_aggregate_mutation(text: str) -> bool: | ||
| 1426 | 1428 | ) |
| 1427 | 1429 | |
| 1428 | 1430 | |
| 1431 | +def todo_describes_aggregate_mutation(item: str) -> bool: | |
| 1432 | + """Return True when a todo describes a broad multi-artifact mutation step.""" | |
| 1433 | + | |
| 1434 | + text = item.strip().lower() | |
| 1435 | + if not text or item in _SPECIAL_TODO_ITEMS: | |
| 1436 | + return False | |
| 1437 | + return _todo_describes_aggregate_mutation(text) | |
| 1438 | + | |
| 1439 | + | |
| 1440 | +def todo_describes_broad_setup_step(item: str) -> bool: | |
| 1441 | + """Return True when a todo is primarily about directory/setup scaffolding.""" | |
| 1442 | + | |
| 1443 | + text = item.strip().lower() | |
| 1444 | + if not text or item in _SPECIAL_TODO_ITEMS: | |
| 1445 | + return False | |
| 1446 | + return _contains_any(text, _BROAD_SETUP_HINTS) | |
| 1447 | + | |
| 1448 | + | |
| 1429 | 1449 | def _todo_requires_complete_artifact_set(text: str) -> bool: |
| 1430 | 1450 | return ( |
| 1431 | 1451 | _contains_any(text, _AGGREGATE_TODO_HINTS) |
tests/test_repair.pymodified@@ -953,6 +953,77 @@ def test_empty_response_retry_prefers_pending_index_over_broad_directory_headlin | ||
| 953 | 953 | ) |
| 954 | 954 | |
| 955 | 955 | |
| 956 | +def test_empty_response_retry_uses_concrete_file_language_for_aggregate_chapter_step( | |
| 957 | + temp_dir: Path, | |
| 958 | +) -> None: | |
| 959 | + context = build_context( | |
| 960 | + temp_dir=temp_dir, | |
| 961 | + use_react=False, | |
| 962 | + ) | |
| 963 | + repairer = ResponseRepairer(context) | |
| 964 | + | |
| 965 | + guide_root = temp_dir / "guides" / "nginx" | |
| 966 | + chapters = guide_root / "chapters" | |
| 967 | + chapters.mkdir(parents=True) | |
| 968 | + index_path = guide_root / "index.html" | |
| 969 | + index_path.write_text( | |
| 970 | + "\n".join( | |
| 971 | + [ | |
| 972 | + "<html>", | |
| 973 | + '<a href="chapters/01-introduction.html">Chapter 1: Introduction to Nginx</a>', | |
| 974 | + '<a href="chapters/02-installation.html">Chapter 2: Installation and Setup</a>', | |
| 975 | + "</html>", | |
| 976 | + ] | |
| 977 | + ) | |
| 978 | + + "\n" | |
| 979 | + ) | |
| 980 | + | |
| 981 | + implementation_plan = temp_dir / "implementation.md" | |
| 982 | + implementation_plan.write_text( | |
| 983 | + "\n".join( | |
| 984 | + [ | |
| 985 | + "# Implementation Plan", | |
| 986 | + "", | |
| 987 | + "## File Changes", | |
| 988 | + f"- `{guide_root}/`", | |
| 989 | + f"- `{chapters}/`", | |
| 990 | + f"- `{index_path}`", | |
| 991 | + "", | |
| 992 | + ] | |
| 993 | + ) | |
| 994 | + ) | |
| 995 | + | |
| 996 | + dod = create_definition_of_done("Create a multi-file nginx guide.") | |
| 997 | + dod.implementation_plan = str(implementation_plan) | |
| 998 | + dod.touched_files.append(str(index_path)) | |
| 999 | + dod.completed_items.append("Develop the main index.html file with proper structure") | |
| 1000 | + dod.pending_items.append("Create chapter files with content and structure") | |
| 1001 | + | |
| 1002 | + decision = repairer.handle_empty_response( | |
| 1003 | + task="Create a multi-file nginx guide.", | |
| 1004 | + original_task=None, | |
| 1005 | + empty_retry_count=3, | |
| 1006 | + max_empty_retries=4, | |
| 1007 | + dod=dod, | |
| 1008 | + ) | |
| 1009 | + | |
| 1010 | + assert decision.should_continue is True | |
| 1011 | + assert decision.retry_message is not None | |
| 1012 | + assert "Next missing planned artifact: `01-introduction.html`" in decision.retry_message | |
| 1013 | + assert ( | |
| 1014 | + "Resume with this exact next step: create `01-introduction.html`." | |
| 1015 | + in decision.retry_message | |
| 1016 | + ) | |
| 1017 | + assert ( | |
| 1018 | + "It is the next concrete output needed to continue `Create chapter files with content and structure`." | |
| 1019 | + in decision.retry_message | |
| 1020 | + ) | |
| 1021 | + assert ( | |
| 1022 | + "continue `Create chapter files with content and structure` by creating `01-introduction.html`." | |
| 1023 | + not in decision.retry_message | |
| 1024 | + ) | |
| 1025 | + | |
| 1026 | + | |
| 956 | 1027 | def test_empty_response_retry_prefers_output_index_over_reference_index_with_same_name( |
| 957 | 1028 | temp_dir: Path, |
| 958 | 1029 | ) -> None: |
tests/test_tool_batches.pymodified@@ -2480,6 +2480,138 @@ async def test_tool_batch_runner_softens_first_file_handoff_after_recovery_promp | ||
| 2480 | 2480 | assert "Resume by creating `01-introduction.html` now." in message |
| 2481 | 2481 | |
| 2482 | 2482 | |
| 2483 | +@pytest.mark.asyncio | |
| 2484 | +async def test_tool_batch_runner_todowrite_uses_concrete_output_language_for_aggregate_chapter_step( | |
| 2485 | + temp_dir: Path, | |
| 2486 | +) -> None: | |
| 2487 | + async def assess_confidence( | |
| 2488 | + tool_name: str, | |
| 2489 | + tool_args: dict, | |
| 2490 | + context: str, | |
| 2491 | + ) -> ConfidenceAssessment: | |
| 2492 | + raise AssertionError("Confidence scoring should not run in this scenario") | |
| 2493 | + | |
| 2494 | + async def verify_action( | |
| 2495 | + tool_name: str, | |
| 2496 | + tool_args: dict, | |
| 2497 | + result: str, | |
| 2498 | + expected: str = "", | |
| 2499 | + ) -> ActionVerification: | |
| 2500 | + raise AssertionError("Verification should not run in this scenario") | |
| 2501 | + | |
| 2502 | + guide_root = temp_dir / "guides" / "nginx" | |
| 2503 | + chapters = guide_root / "chapters" | |
| 2504 | + chapters.mkdir(parents=True) | |
| 2505 | + index_path = guide_root / "index.html" | |
| 2506 | + index_path.write_text( | |
| 2507 | + "\n".join( | |
| 2508 | + [ | |
| 2509 | + "<html>", | |
| 2510 | + '<a href="chapters/01-introduction.html">Chapter 1: Introduction to Nginx</a>', | |
| 2511 | + '<a href="chapters/02-installation.html">Chapter 2: Installation and Setup</a>', | |
| 2512 | + "</html>", | |
| 2513 | + ] | |
| 2514 | + ) | |
| 2515 | + + "\n" | |
| 2516 | + ) | |
| 2517 | + | |
| 2518 | + implementation_plan = temp_dir / "implementation.md" | |
| 2519 | + implementation_plan.write_text( | |
| 2520 | + "\n".join( | |
| 2521 | + [ | |
| 2522 | + "# Implementation Plan", | |
| 2523 | + "", | |
| 2524 | + "## File Changes", | |
| 2525 | + f"- `{guide_root}/`", | |
| 2526 | + f"- `{chapters}/`", | |
| 2527 | + f"- `{index_path}`", | |
| 2528 | + "", | |
| 2529 | + ] | |
| 2530 | + ) | |
| 2531 | + ) | |
| 2532 | + | |
| 2533 | + context = build_context( | |
| 2534 | + temp_dir=temp_dir, | |
| 2535 | + messages=[], | |
| 2536 | + safeguards=FakeSafeguards(), | |
| 2537 | + assess_confidence=assess_confidence, | |
| 2538 | + verify_action=verify_action, | |
| 2539 | + ) | |
| 2540 | + queued_messages: list[str] = [] | |
| 2541 | + context.queue_steering_message_callback = queued_messages.append | |
| 2542 | + runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir)) | |
| 2543 | + dod = create_definition_of_done("Create a multi-file nginx guide.") | |
| 2544 | + dod.implementation_plan = str(implementation_plan) | |
| 2545 | + dod.touched_files.append(str(index_path)) | |
| 2546 | + sync_todos_to_definition_of_done( | |
| 2547 | + dod, | |
| 2548 | + [ | |
| 2549 | + { | |
| 2550 | + "content": "Develop the main index.html file with proper structure", | |
| 2551 | + "active_form": "Developing the main index.html file with proper structure", | |
| 2552 | + "status": "completed", | |
| 2553 | + }, | |
| 2554 | + { | |
| 2555 | + "content": "Create chapter files with content and structure", | |
| 2556 | + "active_form": "Creating chapter files with content and structure", | |
| 2557 | + "status": "pending", | |
| 2558 | + }, | |
| 2559 | + ], | |
| 2560 | + ) | |
| 2561 | + | |
| 2562 | + todos = [ | |
| 2563 | + { | |
| 2564 | + "content": "Develop the main index.html file with proper structure", | |
| 2565 | + "active_form": "Developing the main index.html file with proper structure", | |
| 2566 | + "status": "completed", | |
| 2567 | + }, | |
| 2568 | + { | |
| 2569 | + "content": "Create chapter files with content and structure", | |
| 2570 | + "active_form": "Creating chapter files with content and structure", | |
| 2571 | + "status": "pending", | |
| 2572 | + }, | |
| 2573 | + ] | |
| 2574 | + tool_call = ToolCall( | |
| 2575 | + id="todo-aggregate", | |
| 2576 | + name="TodoWrite", | |
| 2577 | + arguments={"todos": todos}, | |
| 2578 | + ) | |
| 2579 | + executor = FakeExecutor( | |
| 2580 | + [ | |
| 2581 | + tool_outcome( | |
| 2582 | + tool_call=tool_call, | |
| 2583 | + output="Todos updated", | |
| 2584 | + is_error=False, | |
| 2585 | + metadata={"new_todos": todos}, | |
| 2586 | + ) | |
| 2587 | + ] | |
| 2588 | + ) | |
| 2589 | + | |
| 2590 | + summary = TurnSummary(final_response="") | |
| 2591 | + await runner.execute_batch( | |
| 2592 | + tool_calls=[tool_call], | |
| 2593 | + tool_source="assistant", | |
| 2594 | + pending_tool_calls_seen=set(), | |
| 2595 | + emit=_noop_emit, | |
| 2596 | + summary=summary, | |
| 2597 | + dod=dod, | |
| 2598 | + executor=executor, # type: ignore[arg-type] | |
| 2599 | + on_confirmation=None, | |
| 2600 | + on_user_question=None, | |
| 2601 | + emit_confirmation=None, | |
| 2602 | + consecutive_errors=0, | |
| 2603 | + ) | |
| 2604 | + | |
| 2605 | + assert queued_messages | |
| 2606 | + message = queued_messages[-1] | |
| 2607 | + assert "Continue with the next concrete output: `01-introduction.html`." in message | |
| 2608 | + assert "Resume by creating `01-introduction.html` now." in message | |
| 2609 | + assert ( | |
| 2610 | + "Continue with the next pending item: `Create chapter files with content and structure`." | |
| 2611 | + not in message | |
| 2612 | + ) | |
| 2613 | + | |
| 2614 | + | |
| 2483 | 2615 | @pytest.mark.asyncio |
| 2484 | 2616 | async def test_duplicate_observation_nudge_prioritizes_missing_artifact_over_review( |
| 2485 | 2617 | temp_dir: Path, |