tenseleyflow/loader / 19e1fc6

Browse files

Keep study handoffs persistent

Authored by espadonne
SHA
19e1fc672874d643950dcc07700c02c38d8d0466
Parents
e0ebfab
Tree
c625fff

2 changed files

StatusFile+-
M src/loader/runtime/tool_batches.py 3 3
M tests/test_tool_batches.py 91 0
src/loader/runtime/tool_batches.pymodified
@@ -820,13 +820,13 @@ class ToolBatchRunner:
820820
                     messages=list(getattr(self.context.session, "messages", []) or []),
821821
                 )
822822
                 if compact_handoff:
823
-                    self.context.queue_ephemeral_steering_message(
823
+                    self.context.queue_steering_message(
824824
                         f"Confirmed progress: `{completed_label}` is now satisfied by the successful "
825825
                         f"`{tool_call.name}` result. {compact_handoff}"
826826
                         " Do not reread reference material or spend the next turn on bookkeeping."
827827
                     )
828828
                     return
829
-            self.context.queue_ephemeral_steering_message(
829
+            self.context.queue_steering_message(
830830
                 f"Confirmed progress: `{completed_label}` is now satisfied by the successful "
831831
                 f"`{tool_call.name}` result. One declared output artifact is still missing."
832832
                 + _missing_artifact_resume_suffix(
@@ -851,7 +851,7 @@ class ToolBatchRunner:
851851
                     "more reference material and perform the change now."
852852
                 )
853853
 
854
-        self.context.queue_ephemeral_steering_message(
854
+        self.context.queue_steering_message(
855855
             f"Confirmed progress: `{completed_label}` is now satisfied by the successful "
856856
             f"`{tool_call.name}` result. Continue with the next pending item: "
857857
             f"`{next_pending}` instead of rereading the same evidence.{mutation_suffix}"
tests/test_tool_batches.pymodified
@@ -1870,6 +1870,97 @@ async def test_tool_batch_runner_observation_handoff_pushes_mutation_step(
18701870
     )
18711871
 
18721872
 
1873
+@pytest.mark.asyncio
1874
+async def test_tool_batch_runner_discovery_completion_handoff_stays_persistent(
1875
+    temp_dir: Path,
1876
+) -> None:
1877
+    async def assess_confidence(
1878
+        tool_name: str,
1879
+        tool_args: dict,
1880
+        context: str,
1881
+    ) -> ConfidenceAssessment:
1882
+        raise AssertionError("Confidence scoring should be disabled in this scenario")
1883
+
1884
+    async def verify_action(
1885
+        tool_name: str,
1886
+        tool_args: dict,
1887
+        result: str,
1888
+        expected: str = "",
1889
+    ) -> ActionVerification:
1890
+        raise AssertionError("Verification should not run for this scenario")
1891
+
1892
+    reference = temp_dir / "fortran" / "chapters" / "01-introduction.html"
1893
+    reference.parent.mkdir(parents=True)
1894
+    reference.write_text("<h1>Introduction</h1>\n<p>Guide cadence.</p>\n")
1895
+
1896
+    context = build_context(
1897
+        temp_dir=temp_dir,
1898
+        messages=[],
1899
+        safeguards=FakeSafeguards(),
1900
+        assess_confidence=assess_confidence,
1901
+        verify_action=verify_action,
1902
+        auto_recover=False,
1903
+    )
1904
+    persistent_messages: list[str] = []
1905
+    ephemeral_messages: list[str] = []
1906
+    context.queue_steering_message_callback = persistent_messages.append
1907
+    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
1908
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
1909
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
1910
+    sync_todos_to_definition_of_done(
1911
+        dod,
1912
+        [
1913
+            {
1914
+                "content": "First, examine the existing fortran guide structure and content",
1915
+                "active_form": "Working on: First, examine the existing fortran guide structure and content",
1916
+                "status": "pending",
1917
+            },
1918
+            {
1919
+                "content": "Create the nginx directory structure",
1920
+                "active_form": "Working on: Create the nginx directory structure",
1921
+                "status": "pending",
1922
+            },
1923
+        ],
1924
+    )
1925
+    tool_call = ToolCall(
1926
+        id="read-reference",
1927
+        name="read",
1928
+        arguments={"file_path": str(reference)},
1929
+    )
1930
+    executor = FakeExecutor(
1931
+        [
1932
+            tool_outcome(
1933
+                tool_call=tool_call,
1934
+                output="<h1>Introduction</h1>\n<p>Guide cadence.</p>\n",
1935
+                is_error=False,
1936
+            )
1937
+        ]
1938
+    )
1939
+
1940
+    summary = TurnSummary(final_response="")
1941
+    await runner.execute_batch(
1942
+        tool_calls=[tool_call],
1943
+        tool_source="assistant",
1944
+        pending_tool_calls_seen=set(),
1945
+        emit=_noop_emit,
1946
+        summary=summary,
1947
+        dod=dod,
1948
+        executor=executor,  # type: ignore[arg-type]
1949
+        on_confirmation=None,
1950
+        on_user_question=None,
1951
+        emit_confirmation=None,
1952
+        consecutive_errors=0,
1953
+    )
1954
+
1955
+    assert persistent_messages
1956
+    assert any(
1957
+        "Continue with the next pending item: `Create the nginx directory structure`"
1958
+        in message
1959
+        for message in persistent_messages
1960
+    )
1961
+    assert ephemeral_messages == []
1962
+
1963
+
18731964
 @pytest.mark.asyncio
18741965
 async def test_tool_batch_runner_missing_artifact_nudge_prefers_pending_index_after_mkdir(
18751966
     temp_dir: Path,