tenseleyflow/loader / 721c739

Browse files

Handoff post-build audit reads

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
721c739c2bf9727483a62ec2e455238cf3a92dc3
Parents
9412f34
Tree
af0b580

2 changed files

StatusFile+-
M src/loader/runtime/tool_batches.py 204 1
M tests/test_tool_batches.py 113 0
src/loader/runtime/tool_batches.pymodified
@@ -32,7 +32,7 @@ from .logging import get_runtime_logger
3232
 from .path_display import display_runtime_path
3333
 from .policy_timeline import append_verification_timeline_entry
3434
 from .recovery import RecoveryContext, detect_missing_mutation_payload
35
-from .repair_focus import extract_active_repair_context
35
+from .repair_focus import extract_active_repair_context, path_within_allowed_roots
3636
 from .safeguard_services import extract_shell_text_rewrite_target
3737
 from .tool_batch_checks import ToolBatchConfidenceGate, ToolBatchVerificationGate
3838
 from .tool_batch_recovery import ToolBatchRecoveryController
@@ -118,6 +118,25 @@ _SUMMARY_ARTIFACT_NAMES = {
118118
     "readme.rst",
119119
     "readme.txt",
120120
 }
121
+_OBSERVATION_TOOLS = frozenset({"read", "glob", "grep", "bash"})
122
+_READ_ONLY_BASH_PREFIXES = frozenset(
123
+    {"ls", "pwd", "find", "stat", "cat", "head", "tail", "rg", "grep"}
124
+)
125
+_MUTATING_BASH_FRAGMENTS = (
126
+    " >",
127
+    ">>",
128
+    "| tee",
129
+    "touch ",
130
+    "mkdir ",
131
+    "rm ",
132
+    "mv ",
133
+    "cp ",
134
+    "sed -i",
135
+    "perl -pi",
136
+    "git add",
137
+    "git commit",
138
+    "git apply",
139
+)
121140
 
122141
 
123142
 @dataclass
@@ -325,6 +344,10 @@ class ToolBatchRunner:
325344
                 self._queue_blocked_html_edit_nudge(tool_call, outcome.event_content)
326345
             else:
327346
                 self._queue_post_mutation_self_audit_nudge(tool_call, dod=dod)
347
+                self._queue_completed_artifact_observation_handoff_nudge(
348
+                    tool_call,
349
+                    dod=dod,
350
+                )
328351
 
329352
             should_continue = await self.verification_gate.should_continue(
330353
                 tool_call=tool_call,
@@ -545,6 +568,77 @@ class ToolBatchRunner:
545568
             + " Do not spend another turn rereading the file you just wrote or on TodoWrite alone."
546569
         )
547570
 
571
+    def _queue_completed_artifact_observation_handoff_nudge(
572
+        self,
573
+        tool_call: ToolCall,
574
+        *,
575
+        dod: DefinitionOfDone,
576
+    ) -> None:
577
+        """Turn successful post-build audit reads into verify/finalize handoffs."""
578
+
579
+        if tool_call.name not in _OBSERVATION_TOOLS:
580
+            return
581
+        if dod.status in {"fixing", "done"}:
582
+            return
583
+        if extract_active_repair_context(self.context.session.messages) is not None:
584
+            return
585
+        if not all_planned_artifacts_exist(dod, project_root=self.context.project_root):
586
+            return
587
+
588
+        observed_paths = _extract_observation_paths(tool_call)
589
+        if not observed_paths:
590
+            return
591
+
592
+        planned_roots = _planned_output_roots(
593
+            dod,
594
+            project_root=self.context.project_root,
595
+        )
596
+        if not planned_roots:
597
+            return
598
+        if not all(path_within_allowed_roots(path, planned_roots) for path in observed_paths):
599
+            return
600
+
601
+        next_pending = preferred_pending_todo_item(
602
+            dod,
603
+            project_root=self.context.project_root,
604
+        )
605
+        verification_commands = dod.verification_commands or derive_verification_commands(
606
+            dod,
607
+            project_root=self.context.project_root,
608
+            task_statement=getattr(self.context.session, "current_task", "") or "",
609
+            supplement_existing=True,
610
+        )
611
+        roots_preview = ", ".join(f"`{root}`" for root in planned_roots[:2])
612
+        if len(planned_roots) > 2:
613
+            roots_preview += ", ..."
614
+
615
+        if next_pending and _todo_is_consistency_review_step(next_pending):
616
+            verification_suffix = (
617
+                " If no specific mismatch remains, move to verification now."
618
+                if verification_commands
619
+                else " If no specific mismatch remains, finish the task now."
620
+            )
621
+            self.context.queue_ephemeral_steering_message(
622
+                "All explicitly planned artifacts already exist. "
623
+                f"Continue with `{next_pending}` using the generated files under {roots_preview} "
624
+                "as the source of truth, but do not keep broad-rereading the output set. "
625
+                "If you already know a concrete mismatch, fix it directly."
626
+                + verification_suffix
627
+            )
628
+            return
629
+
630
+        verification_suffix = (
631
+            "Move to verification or final confirmation using the files already on disk."
632
+            if verification_commands
633
+            else "Finish the task using the files already on disk."
634
+        )
635
+        self.context.queue_ephemeral_steering_message(
636
+            "All explicitly planned artifacts already exist. "
637
+            f"Use the generated files under {roots_preview} as the source of truth and stop broad rereads. "
638
+            "If you already know a concrete mismatch, fix it directly. "
639
+            + verification_suffix
640
+        )
641
+
548642
     def _queue_blocked_shell_rewrite_nudge(self, tool_call: ToolCall) -> None:
549643
         """Steer the model back to file tools after a blocked shell text rewrite."""
550644
 
@@ -1571,6 +1665,115 @@ def _todo_is_consistency_review_step(item: str) -> bool:
15711665
     return any(hint in text for hint in _CONSISTENCY_REVIEW_HINTS)
15721666
 
15731667
 
1668
+def _planned_output_roots(
1669
+    dod: DefinitionOfDone,
1670
+    *,
1671
+    project_root: Path,
1672
+) -> tuple[str, ...]:
1673
+    planned_roots: list[str] = []
1674
+    seen_roots: set[str] = set()
1675
+    for target, expect_directory in collect_planned_artifact_targets(
1676
+        dod,
1677
+        project_root=project_root,
1678
+    ):
1679
+        root = str(target if expect_directory else target.parent)
1680
+        if root in seen_roots:
1681
+            continue
1682
+        seen_roots.add(root)
1683
+        planned_roots.append(root)
1684
+    return tuple(planned_roots)
1685
+
1686
+
1687
+def _extract_observation_paths(tool_call: ToolCall) -> list[str]:
1688
+    arguments = tool_call.arguments
1689
+    if tool_call.name == "read":
1690
+        file_path = str(arguments.get("file_path", "")).strip()
1691
+        return [file_path] if file_path else []
1692
+
1693
+    if tool_call.name in {"glob", "grep"}:
1694
+        candidates: list[str] = []
1695
+        search_path = str(arguments.get("path", "")).strip()
1696
+        if search_path:
1697
+            anchored_path = _derive_search_anchor(
1698
+                search_path,
1699
+                str(arguments.get("pattern", "")).strip(),
1700
+            )
1701
+            candidates.append(anchored_path or search_path)
1702
+        pattern = str(arguments.get("pattern", "")).strip()
1703
+        if not search_path and pattern.startswith(("/", "~")):
1704
+            candidates.append(str(Path(pattern).expanduser().parent))
1705
+        return candidates
1706
+
1707
+    command = str(arguments.get("command", "")).strip()
1708
+    if not _is_read_only_bash(command):
1709
+        return []
1710
+    return _extract_bash_paths(command)
1711
+
1712
+
1713
+def _derive_search_anchor(search_path: str, pattern: str) -> str:
1714
+    base = str(Path(search_path).expanduser())
1715
+    normalized_pattern = pattern.strip()
1716
+    if not normalized_pattern:
1717
+        return base
1718
+    if normalized_pattern.startswith(("~", "/")):
1719
+        pattern_path = Path(normalized_pattern).expanduser()
1720
+        try:
1721
+            return str(pattern_path.parent.resolve(strict=False))
1722
+        except Exception:
1723
+            return str(pattern_path.parent)
1724
+    if "/" in normalized_pattern:
1725
+        prefix = normalized_pattern.rsplit("/", 1)[0].strip()
1726
+        if prefix and prefix not in {".", ".."}:
1727
+            joined = Path(base).joinpath(prefix).expanduser()
1728
+            try:
1729
+                return str(joined.resolve(strict=False))
1730
+            except Exception:
1731
+                return str(joined)
1732
+    return base
1733
+
1734
+
1735
+def _is_read_only_bash(command: str) -> bool:
1736
+    normalized = " ".join(command.split())
1737
+    if not normalized:
1738
+        return False
1739
+    if extract_shell_text_rewrite_target(normalized) is not None:
1740
+        return False
1741
+    if any(fragment in normalized for fragment in _MUTATING_BASH_FRAGMENTS):
1742
+        return False
1743
+    try:
1744
+        argv = shlex.split(normalized)
1745
+    except ValueError:
1746
+        return False
1747
+    if not argv:
1748
+        return False
1749
+    return argv[0] in _READ_ONLY_BASH_PREFIXES
1750
+
1751
+
1752
+def _extract_bash_paths(command: str) -> list[str]:
1753
+    try:
1754
+        argv = shlex.split(command)
1755
+    except ValueError:
1756
+        return []
1757
+    if not argv:
1758
+        return []
1759
+
1760
+    command_name = argv[0]
1761
+    if command_name == "pwd":
1762
+        return [str(Path.cwd())]
1763
+
1764
+    paths: list[str] = []
1765
+    for arg in argv[1:]:
1766
+        if arg.startswith("-"):
1767
+            continue
1768
+        if command_name in {"ls", "stat", "cat", "head", "tail"}:
1769
+            paths.append(arg)
1770
+            continue
1771
+        if command_name in {"find", "rg", "grep"}:
1772
+            paths.append(str(Path.cwd()) if arg in {".", "./"} else arg)
1773
+            break
1774
+    return paths
1775
+
1776
+
15741777
 def _should_prioritize_missing_artifact(
15751778
     *,
15761779
     dod: DefinitionOfDone,
tests/test_tool_batches.pymodified
@@ -1961,6 +1961,119 @@ async def test_tool_batch_runner_duplicate_read_after_plan_complete_ignores_stal
19611961
     assert ephemeral_messages == []
19621962
 
19631963
 
1964
+@pytest.mark.asyncio
1965
+async def test_tool_batch_runner_successful_read_after_plan_complete_pushes_review_handoff(
1966
+    temp_dir: Path,
1967
+) -> None:
1968
+    async def assess_confidence(
1969
+        tool_name: str,
1970
+        tool_args: dict,
1971
+        context: str,
1972
+    ) -> ConfidenceAssessment:
1973
+        raise AssertionError("Confidence scoring should not run for this scenario")
1974
+
1975
+    async def verify_action(
1976
+        tool_name: str,
1977
+        tool_args: dict,
1978
+        result: str,
1979
+        expected: str = "",
1980
+    ) -> ActionVerification:
1981
+        raise AssertionError("Verification should not run for this scenario")
1982
+
1983
+    guide_root = temp_dir / "guides" / "nginx"
1984
+    chapters = guide_root / "chapters"
1985
+    guide_root.mkdir(parents=True)
1986
+    chapters.mkdir()
1987
+    index_path = guide_root / "index.html"
1988
+    chapter_one = chapters / "01-getting-started.html"
1989
+    chapter_two = chapters / "02-installation.html"
1990
+    index_path.write_text("<html></html>\n")
1991
+    chapter_one.write_text("<h1>One</h1>\n")
1992
+    chapter_two.write_text("<h1>Two</h1>\n")
1993
+
1994
+    implementation_plan = temp_dir / "implementation.md"
1995
+    implementation_plan.write_text(
1996
+        "\n".join(
1997
+            [
1998
+                "# Implementation Plan",
1999
+                "",
2000
+                "## File Changes",
2001
+                f"- `{guide_root}/`",
2002
+                f"- `{chapters}/`",
2003
+                f"- `{index_path}`",
2004
+                f"- `{chapter_one}`",
2005
+                f"- `{chapter_two}`",
2006
+                "",
2007
+            ]
2008
+        )
2009
+    )
2010
+
2011
+    context = build_context(
2012
+        temp_dir=temp_dir,
2013
+        messages=[],
2014
+        safeguards=FakeSafeguards(),
2015
+        assess_confidence=assess_confidence,
2016
+        verify_action=verify_action,
2017
+        auto_recover=False,
2018
+    )
2019
+    persistent_messages: list[str] = []
2020
+    ephemeral_messages: list[str] = []
2021
+    context.queue_steering_message_callback = persistent_messages.append
2022
+    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
2023
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
2024
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
2025
+    dod.implementation_plan = str(implementation_plan)
2026
+    dod.verification_commands = [f"ls -la {guide_root}"]
2027
+    sync_todos_to_definition_of_done(
2028
+        dod,
2029
+        [
2030
+            {
2031
+                "content": "Create 01-getting-started.html",
2032
+                "active_form": "Creating 01-getting-started.html",
2033
+                "status": "pending",
2034
+            },
2035
+            {
2036
+                "content": "Ensure all files are properly linked and formatted consistently",
2037
+                "active_form": "Reviewing guide consistency and linkage",
2038
+                "status": "pending",
2039
+            },
2040
+        ],
2041
+    )
2042
+
2043
+    tool_call = ToolCall(
2044
+        id="read-built-review",
2045
+        name="read",
2046
+        arguments={"file_path": str(chapter_one)},
2047
+    )
2048
+    executor = FakeExecutor(
2049
+        [tool_outcome(tool_call=tool_call, output=chapter_one.read_text(), is_error=False)]
2050
+    )
2051
+
2052
+    summary = TurnSummary(final_response="")
2053
+    await runner.execute_batch(
2054
+        tool_calls=[tool_call],
2055
+        tool_source="assistant",
2056
+        pending_tool_calls_seen=set(),
2057
+        emit=_noop_emit,
2058
+        summary=summary,
2059
+        dod=dod,
2060
+        executor=executor,  # type: ignore[arg-type]
2061
+        on_confirmation=None,
2062
+        on_user_question=None,
2063
+        emit_confirmation=None,
2064
+        consecutive_errors=0,
2065
+    )
2066
+
2067
+    assert persistent_messages == []
2068
+    assert len(ephemeral_messages) == 1
2069
+    message = ephemeral_messages[0]
2070
+    assert "All explicitly planned artifacts already exist." in message
2071
+    assert "Ensure all files are properly linked and formatted consistently" in message
2072
+    assert "Create 01-getting-started.html" not in message
2073
+    assert "do not keep broad-rereading the output set" in message
2074
+    assert "If no specific mismatch remains, move to verification now." in message
2075
+
2076
+
19642077
 @pytest.mark.asyncio
19652078
 async def test_tool_batch_runner_observation_handoff_pushes_mutation_step(
19662079
     temp_dir: Path,