loader Public

Watch 0 Fork 0 Star 0

Python · 8688 bytes Raw Blame History

  
        1
        """Tests for turn bootstrap and workflow preparation."""
      
        2
        
        3
        from __future__ import annotations
      
        4
        
        5
        from pathlib import Path
      
        6
        
        7
        import pytest
      
        8
        
        9
        from loader.agent.loop import AgentConfig
      
        10
        from loader.llm.base import CompletionResponse, ToolCall
      
        11
        from loader.runtime.completion_trace import CompletionTraceEntry
      
        12
        from loader.runtime.conversation import ConversationRuntime
      
        13
        from loader.runtime.dod import DefinitionOfDoneStore, create_definition_of_done
      
        14
        from loader.runtime.runtime_handle import RuntimeHandle
      
        15
        from tests.helpers.runtime_harness import ScriptedBackend
      
        16
        
        17
        
        18
        def non_streaming_config() -> AgentConfig:
      
        19
            """Shared config for direct turn-preparation tests."""
      
        20
        
        21
            return AgentConfig(auto_context=False, stream=False, max_iterations=8)
      
        22
        
        23
        
        24
        @pytest.mark.asyncio
      
        25
        async def test_turn_preparation_bootstraps_execute_turn_state(
      
        26
            temp_dir: Path,
      
        27
        ) -> None:
      
        28
            backend = ScriptedBackend()
      
        29
            handle = RuntimeHandle(
      
        30
                backend=backend,
      
        31
                config=non_streaming_config(),
      
        32
                project_root=temp_dir,
      
        33
            )
      
        34
            runtime = ConversationRuntime(handle)
      
        35
            events = []
      
        36
            task = "Update README.md heading."
      
        37
            handle.session.last_completion_decision_code = "verification_passed"
      
        38
            handle.session.last_completion_decision_summary = (
      
        39
                "accepted the response after verification evidence passed"
      
        40
            )
      
        41
            handle.session.append_completion_trace_entry(
      
        42
                CompletionTraceEntry(
      
        43
                    stage="definition_of_done",
      
        44
                    outcome="complete",
      
        45
                    decision_code="verification_passed",
      
        46
                    decision_summary="accepted the response after verification evidence passed",
      
        47
                )
      
        48
            )
      
        49
        
        50
            async def capture(event) -> None:
      
        51
                events.append(event)
      
        52
        
        53
            prepared = await runtime.turn_preparation.prepare(
      
        54
                task=task,
      
        55
                emit=capture,
      
        56
                requested_mode="execute",
      
        57
                original_task=None,
      
        58
                on_user_question=None,
      
        59
            )
      
        60
        
        61
            assert prepared.task == task
      
        62
            assert prepared.effective_task == task
      
        63
            assert prepared.executor is not None
      
        64
            assert prepared.rollback_plan is not None
      
        65
            assert prepared.effective_max_tokens >= 512
      
        66
            assert prepared.summary.workflow_mode == "execute"
      
        67
            assert prepared.definition_of_done.current_mode == "execute"
      
        68
            assert prepared.definition_of_done.storage_path is not None
      
        69
            assert handle.session.current_task == task
      
        70
            assert handle.session.active_dod_path == prepared.definition_of_done.storage_path
      
        71
            assert handle.session.workflow_mode == "execute"
      
        72
            assert handle.session.last_completion_decision_code is None
      
        73
            assert handle.session.completion_trace == []
      
        74
            assert any(event.type == "dod_status" for event in events)
      
        75
            assert any(
      
        76
                event.type == "workflow_mode" and event.workflow_mode == "execute"
      
        77
                for event in events
      
        78
            )
      
        79
        
        80
        
        81
        @pytest.mark.asyncio
      
        82
        async def test_turn_preparation_can_bootstrap_clarify_handoff(
      
        83
            temp_dir: Path,
      
        84
        ) -> None:
      
        85
            backend = ScriptedBackend(
      
        86
                completions=[
      
        87
                    CompletionResponse(
      
        88
                        content="I need one clarification before I proceed.",
      
        89
                        tool_calls=[
      
        90
                            ToolCall(
      
        91
                                id="ask-1",
      
        92
                                name="AskUserQuestion",
      
        93
                                arguments={
      
        94
                                    "question": (
      
        95
                                        "Should I keep the runtime change inside "
      
        96
                                        "src/loader/runtime/workflow_lanes.py?"
      
        97
                                    ),
      
        98
                                },
      
        99
                            )
      
        100
                        ],
      
        101
                    ),
      
        102
                    CompletionResponse(
      
        103
                        content="\n".join(
      
        104
                            [
      
        105
                                "## Task Statement",
      
        106
                                "Tighten Loader runtime clarify behavior.",
      
        107
                                "",
      
        108
                                "## Desired Outcome",
      
        109
                                "- Keep the clarify handoff grounded in one runtime seam.",
      
        110
                                "",
      
        111
                                "## Non Goals",
      
        112
                                "- Do not broaden into unrelated CLI changes.",
      
        113
                                "",
      
        114
                                "## Decision Boundaries",
      
        115
                                "- Stop and confirm before crossing into other runtime modules.",
      
        116
                                "",
      
        117
                                "## Constraints",
      
        118
                                "- Stay within the current repository.",
      
        119
                                "",
      
        120
                                "## Likely Touchpoints",
      
        121
                                "- src/loader/runtime/workflow_lanes.py",
      
        122
                                "",
      
        123
                                "## Acceptance Criteria",
      
        124
                                "- workflow_lanes.py remains the primary touchpoint.",
      
        125
                            ]
      
        126
                        )
      
        127
                    ),
      
        128
                ]
      
        129
            )
      
        130
            handle = RuntimeHandle(
      
        131
                backend=backend,
      
        132
                config=non_streaming_config(),
      
        133
                project_root=temp_dir,
      
        134
            )
      
        135
            runtime = ConversationRuntime(handle)
      
        136
            events = []
      
        137
            asked_questions: list[str] = []
      
        138
        
        139
            async def capture(event) -> None:
      
        140
                events.append(event)
      
        141
        
        142
            async def answer(question: str, _: list[str] | None) -> str:
      
        143
                asked_questions.append(question)
      
        144
                return "Yes, keep it inside workflow_lanes.py."
      
        145
        
        146
            prepared = await runtime.turn_preparation.prepare(
      
        147
                task="Tighten Loader runtime clarify behavior.",
      
        148
                emit=capture,
      
        149
                requested_mode="clarify",
      
        150
                original_task=None,
      
        151
                on_user_question=answer,
      
        152
            )
      
        153
        
        154
            assert asked_questions == [
      
        155
                "Should I keep the runtime change inside src/loader/runtime/workflow_lanes.py?"
      
        156
            ]
      
        157
            assert prepared.summary.workflow_mode == "execute"
      
        158
            assert prepared.definition_of_done.current_mode == "execute"
      
        159
            assert prepared.definition_of_done.clarify_brief is not None
      
        160
            assert Path(prepared.definition_of_done.clarify_brief).exists()
      
        161
            assert handle.session.workflow_mode == "execute"
      
        162
            assert any(
      
        163
                entry.kind == "clarify_exit" for entry in prepared.summary.workflow_timeline
      
        164
            )
      
        165
            assert any(
      
        166
                entry.reason_code.startswith("post_clarify_")
      
        167
                for entry in prepared.summary.workflow_timeline
      
        168
            )
      
        169
            assert [
      
        170
                event.workflow_mode
      
        171
                for event in events
      
        172
                if event.type == "workflow_mode" and event.workflow_mode
      
        173
            ] == ["clarify", "execute"]
      
        174
        
        175
        
        176
        @pytest.mark.asyncio
      
        177
        async def test_turn_preparation_does_not_resume_latest_dod_from_older_session(
      
        178
            temp_dir: Path,
      
        179
        ) -> None:
      
        180
            backend = ScriptedBackend()
      
        181
            handle = RuntimeHandle(
      
        182
                backend=backend,
      
        183
                config=non_streaming_config(),
      
        184
                project_root=temp_dir,
      
        185
            )
      
        186
            runtime = ConversationRuntime(handle)
      
        187
            task = "Update /tmp/fortran/index.html so the chapter list matches the real files."
      
        188
        
        189
            stale_dod = create_definition_of_done(task)
      
        190
            stale_dod.status = "fixing"
      
        191
            stale_dod.touched_files.append("/tmp/fortran/index.html")
      
        192
            stale_dod.mutating_actions.append("edit")
      
        193
            stale_path = DefinitionOfDoneStore(temp_dir).save(stale_dod)
      
        194
        
        195
            events = []
      
        196
        
        197
            async def capture(event) -> None:
      
        198
                events.append(event)
      
        199
        
        200
            prepared = await runtime.turn_preparation.prepare(
      
        201
                task=task,
      
        202
                emit=capture,
      
        203
                requested_mode="execute",
      
        204
                original_task=None,
      
        205
                on_user_question=None,
      
        206
            )
      
        207
        
        208
            assert prepared.definition_of_done.storage_path != str(stale_path)
      
        209
            assert prepared.definition_of_done.touched_files == []
      
        210
            assert prepared.definition_of_done.mutating_actions == []
      
        211
            assert prepared.definition_of_done.pending_items == ["Complete the requested work"]
      
        212
        
        213
        
        214
        @pytest.mark.asyncio
      
        215
        async def test_turn_preparation_resumes_active_session_dod(
      
        216
            temp_dir: Path,
      
        217
        ) -> None:
      
        218
            backend = ScriptedBackend()
      
        219
            handle = RuntimeHandle(
      
        220
                backend=backend,
      
        221
                config=non_streaming_config(),
      
        222
                project_root=temp_dir,
      
        223
            )
      
        224
            runtime = ConversationRuntime(handle)
      
        225
            task = "Keep repairing the runtime state controller."
      
        226
        
        227
            existing_dod = create_definition_of_done(task)
      
        228
            existing_dod.status = "fixing"
      
        229
            existing_dod.pending_items.append("Collect verification evidence")
      
        230
            existing_dod.touched_files.append(str(temp_dir / "index.html"))
      
        231
            existing_path = DefinitionOfDoneStore(temp_dir).save(existing_dod)
      
        232
            handle.session.active_dod_path = str(existing_path)
      
        233
        
        234
            events = []
      
        235
        
        236
            async def capture(event) -> None:
      
        237
                events.append(event)
      
        238
        
        239
            prepared = await runtime.turn_preparation.prepare(
      
        240
                task=task,
      
        241
                emit=capture,
      
        242
                requested_mode="execute",
      
        243
                original_task=None,
      
        244
                on_user_question=None,
      
        245
            )
      
        246
        
        247
            assert prepared.definition_of_done.storage_path == str(existing_path)
      
        248
            assert prepared.definition_of_done.touched_files == [str(temp_dir / "index.html")]
      
        249
            assert prepared.definition_of_done.status == "fixing"

1	"""Tests for turn bootstrap and workflow preparation."""
2
3	from __future__ import annotations
4
5	from pathlib import Path
6
7	import pytest
8
9	from loader.agent.loop import AgentConfig
10	from loader.llm.base import CompletionResponse, ToolCall
11	from loader.runtime.completion_trace import CompletionTraceEntry
12	from loader.runtime.conversation import ConversationRuntime
13	from loader.runtime.dod import DefinitionOfDoneStore, create_definition_of_done
14	from loader.runtime.runtime_handle import RuntimeHandle
15	from tests.helpers.runtime_harness import ScriptedBackend
16
17
18	def non_streaming_config() -> AgentConfig:
19	"""Shared config for direct turn-preparation tests."""
20
21	return AgentConfig(auto_context=False, stream=False, max_iterations=8)
22
23
24	@pytest.mark.asyncio
25	async def test_turn_preparation_bootstraps_execute_turn_state(
26	temp_dir: Path,
27	) -> None:
28	backend = ScriptedBackend()
29	handle = RuntimeHandle(
30	backend=backend,
31	config=non_streaming_config(),
32	project_root=temp_dir,
33	)
34	runtime = ConversationRuntime(handle)
35	events = []
36	task = "Update README.md heading."
37	handle.session.last_completion_decision_code = "verification_passed"
38	handle.session.last_completion_decision_summary = (
39	"accepted the response after verification evidence passed"
40	)
41	handle.session.append_completion_trace_entry(
42	CompletionTraceEntry(
43	stage="definition_of_done",
44	outcome="complete",
45	decision_code="verification_passed",
46	decision_summary="accepted the response after verification evidence passed",
47	)
48	)
49
50	async def capture(event) -> None:
51	events.append(event)
52
53	prepared = await runtime.turn_preparation.prepare(
54	task=task,
55	emit=capture,
56	requested_mode="execute",
57	original_task=None,
58	on_user_question=None,
59	)
60
61	assert prepared.task == task
62	assert prepared.effective_task == task
63	assert prepared.executor is not None
64	assert prepared.rollback_plan is not None
65	assert prepared.effective_max_tokens >= 512
66	assert prepared.summary.workflow_mode == "execute"
67	assert prepared.definition_of_done.current_mode == "execute"
68	assert prepared.definition_of_done.storage_path is not None
69	assert handle.session.current_task == task
70	assert handle.session.active_dod_path == prepared.definition_of_done.storage_path
71	assert handle.session.workflow_mode == "execute"
72	assert handle.session.last_completion_decision_code is None
73	assert handle.session.completion_trace == []
74	assert any(event.type == "dod_status" for event in events)
75	assert any(
76	event.type == "workflow_mode" and event.workflow_mode == "execute"
77	for event in events
78	)
79
80
81	@pytest.mark.asyncio
82	async def test_turn_preparation_can_bootstrap_clarify_handoff(
83	temp_dir: Path,
84	) -> None:
85	backend = ScriptedBackend(
86	completions=[
87	CompletionResponse(
88	content="I need one clarification before I proceed.",
89	tool_calls=[
90	ToolCall(
91	id="ask-1",
92	name="AskUserQuestion",
93	arguments={
94	"question": (
95	"Should I keep the runtime change inside "
96	"src/loader/runtime/workflow_lanes.py?"
97	),
98	},
99	)
100	],
101	),
102	CompletionResponse(
103	content="\n".join(
104	[
105	"## Task Statement",
106	"Tighten Loader runtime clarify behavior.",
107	"",
108	"## Desired Outcome",
109	"- Keep the clarify handoff grounded in one runtime seam.",
110	"",
111	"## Non Goals",
112	"- Do not broaden into unrelated CLI changes.",
113	"",
114	"## Decision Boundaries",
115	"- Stop and confirm before crossing into other runtime modules.",
116	"",
117	"## Constraints",
118	"- Stay within the current repository.",
119	"",
120	"## Likely Touchpoints",
121	"- src/loader/runtime/workflow_lanes.py",
122	"",
123	"## Acceptance Criteria",
124	"- workflow_lanes.py remains the primary touchpoint.",
125	]
126	)
127	),
128	]
129	)
130	handle = RuntimeHandle(
131	backend=backend,
132	config=non_streaming_config(),
133	project_root=temp_dir,
134	)
135	runtime = ConversationRuntime(handle)
136	events = []
137	asked_questions: list[str] = []
138
139	async def capture(event) -> None:
140	events.append(event)
141
142	async def answer(question: str, _: list[str] \| None) -> str:
143	asked_questions.append(question)
144	return "Yes, keep it inside workflow_lanes.py."
145
146	prepared = await runtime.turn_preparation.prepare(
147	task="Tighten Loader runtime clarify behavior.",
148	emit=capture,
149	requested_mode="clarify",
150	original_task=None,
151	on_user_question=answer,
152	)
153
154	assert asked_questions == [
155	"Should I keep the runtime change inside src/loader/runtime/workflow_lanes.py?"
156	]
157	assert prepared.summary.workflow_mode == "execute"
158	assert prepared.definition_of_done.current_mode == "execute"
159	assert prepared.definition_of_done.clarify_brief is not None
160	assert Path(prepared.definition_of_done.clarify_brief).exists()
161	assert handle.session.workflow_mode == "execute"
162	assert any(
163	entry.kind == "clarify_exit" for entry in prepared.summary.workflow_timeline
164	)
165	assert any(
166	entry.reason_code.startswith("post_clarify_")
167	for entry in prepared.summary.workflow_timeline
168	)
169	assert [
170	event.workflow_mode
171	for event in events
172	if event.type == "workflow_mode" and event.workflow_mode
173	] == ["clarify", "execute"]
174
175
176	@pytest.mark.asyncio
177	async def test_turn_preparation_does_not_resume_latest_dod_from_older_session(
178	temp_dir: Path,
179	) -> None:
180	backend = ScriptedBackend()
181	handle = RuntimeHandle(
182	backend=backend,
183	config=non_streaming_config(),
184	project_root=temp_dir,
185	)
186	runtime = ConversationRuntime(handle)
187	task = "Update /tmp/fortran/index.html so the chapter list matches the real files."
188
189	stale_dod = create_definition_of_done(task)
190	stale_dod.status = "fixing"
191	stale_dod.touched_files.append("/tmp/fortran/index.html")
192	stale_dod.mutating_actions.append("edit")
193	stale_path = DefinitionOfDoneStore(temp_dir).save(stale_dod)
194
195	events = []
196
197	async def capture(event) -> None:
198	events.append(event)
199
200	prepared = await runtime.turn_preparation.prepare(
201	task=task,
202	emit=capture,
203	requested_mode="execute",
204	original_task=None,
205	on_user_question=None,
206	)
207
208	assert prepared.definition_of_done.storage_path != str(stale_path)
209	assert prepared.definition_of_done.touched_files == []
210	assert prepared.definition_of_done.mutating_actions == []
211	assert prepared.definition_of_done.pending_items == ["Complete the requested work"]
212
213
214	@pytest.mark.asyncio
215	async def test_turn_preparation_resumes_active_session_dod(
216	temp_dir: Path,
217	) -> None:
218	backend = ScriptedBackend()
219	handle = RuntimeHandle(
220	backend=backend,
221	config=non_streaming_config(),
222	project_root=temp_dir,
223	)
224	runtime = ConversationRuntime(handle)
225	task = "Keep repairing the runtime state controller."
226
227	existing_dod = create_definition_of_done(task)
228	existing_dod.status = "fixing"
229	existing_dod.pending_items.append("Collect verification evidence")
230	existing_dod.touched_files.append(str(temp_dir / "index.html"))
231	existing_path = DefinitionOfDoneStore(temp_dir).save(existing_dod)
232	handle.session.active_dod_path = str(existing_path)
233
234	events = []
235
236	async def capture(event) -> None:
237	events.append(event)
238
239	prepared = await runtime.turn_preparation.prepare(
240	task=task,
241	emit=capture,
242	requested_mode="execute",
243	original_task=None,
244	on_user_question=None,
245	)
246
247	assert prepared.definition_of_done.storage_path == str(existing_path)
248	assert prepared.definition_of_done.touched_files == [str(temp_dir / "index.html")]
249	assert prepared.definition_of_done.status == "fixing"