Python · 8951 bytes Raw Blame History
1 """Tests for the Sprint 06 read-only explore lane."""
2
3 from __future__ import annotations
4
5 import pytest
6
7 from loader.agent.loop import Agent, AgentConfig
8 from loader.llm.base import CompletionResponse, ToolCall
9 from loader.runtime.explore_state import ExploreStateStore
10 from loader.runtime.permissions import PermissionMode
11 from tests.helpers.runtime_harness import ScriptedBackend
12
13
14 class DescribingBackend(ScriptedBackend):
15 async def describe_model(self) -> dict[str, object]:
16 self._supports_native_tools = False
17 return {"supports_native_tools": False}
18
19
20 @pytest.mark.asyncio
21 async def test_explore_mode_skips_workflow_router_and_definition_of_done(temp_dir) -> None:
22 target = temp_dir / "feature.py"
23 target.write_text("def important_helper():\n return 1\n")
24 backend = ScriptedBackend(
25 completions=[
26 CompletionResponse(
27 content="I'll search for that helper.",
28 tool_calls=[
29 ToolCall(
30 id="grep-1",
31 name="grep",
32 arguments={
33 "pattern": "important_helper",
34 "path": str(temp_dir),
35 "include": "*.py",
36 },
37 )
38 ],
39 ),
40 CompletionResponse(
41 content="important_helper is defined in feature.py.",
42 ),
43 ]
44 )
45 agent = Agent(
46 backend=backend,
47 config=AgentConfig(
48 auto_context=False,
49 stream=False,
50 permission_mode=PermissionMode.WORKSPACE_WRITE,
51 ),
52 project_root=temp_dir,
53 )
54 events = []
55
56 async def capture(event) -> None:
57 events.append(event)
58
59 response = await agent.run_explore(
60 "Where is important_helper defined?",
61 on_event=capture,
62 )
63
64 assert "feature.py" in response
65 assert not any(event.type == "dod_status" for event in events)
66 assert not any(event.type == "workflow_mode" for event in events)
67 assert agent.last_turn_summary is not None
68 assert agent.last_turn_summary.definition_of_done is None
69 assert agent.last_turn_summary.workflow_mode == "explore"
70 assert not (temp_dir / ".loader" / "dod").exists()
71
72
73 @pytest.mark.asyncio
74 async def test_explore_mode_denies_write_attempts_even_with_workspace_write(temp_dir) -> None:
75 backend = ScriptedBackend(
76 completions=[
77 CompletionResponse(
78 content="I'll write a file.",
79 tool_calls=[
80 ToolCall(
81 id="write-1",
82 name="write",
83 arguments={
84 "file_path": str(temp_dir / "new.txt"),
85 "content": "not allowed\n",
86 },
87 )
88 ],
89 ),
90 CompletionResponse(
91 content="Explore mode is read-only, so I cannot make that change here.",
92 ),
93 ]
94 )
95 agent = Agent(
96 backend=backend,
97 config=AgentConfig(
98 auto_context=False,
99 stream=False,
100 permission_mode=PermissionMode.WORKSPACE_WRITE,
101 ),
102 project_root=temp_dir,
103 )
104 events = []
105
106 async def capture(event) -> None:
107 events.append(event)
108
109 response = await agent.run_explore(
110 "Create a new file anyway.",
111 on_event=capture,
112 )
113
114 tool_results = [event.content for event in events if event.type == "tool_result"]
115 assert "read-only" in "\n".join(tool_results).lower()
116 assert "cannot make that change" in response.lower()
117 assert not (temp_dir / "new.txt").exists()
118
119
120 @pytest.mark.asyncio
121 async def test_explore_mode_refreshes_capabilities_before_request(temp_dir) -> None:
122 backend = DescribingBackend(
123 completions=[CompletionResponse(content="I checked the repo in read-only mode.")],
124 supports_native_tools=True,
125 )
126 agent = Agent(
127 backend=backend,
128 config=AgentConfig(
129 auto_context=False,
130 stream=False,
131 permission_mode=PermissionMode.WORKSPACE_WRITE,
132 ),
133 project_root=temp_dir,
134 )
135
136 response = await agent.run_explore(
137 "Give me a quick repo summary.",
138 )
139
140 assert response == "I checked the repo in read-only mode."
141 assert agent.capability_profile.supports_native_tools is False
142 assert backend.invocations
143 assert backend.invocations[0].tools is None
144
145
146 @pytest.mark.asyncio
147 async def test_explore_mode_ignores_global_allow_rules(temp_dir) -> None:
148 loader_root = temp_dir / ".loader"
149 loader_root.mkdir()
150 (loader_root / "permission-rules.json").write_text(
151 '{"allow": [{"tool": "write", "path_contains": "new.txt"}]}\n'
152 )
153 backend = ScriptedBackend(
154 completions=[
155 CompletionResponse(
156 content="I'll write a file.",
157 tool_calls=[
158 ToolCall(
159 id="write-1",
160 name="write",
161 arguments={
162 "file_path": str(temp_dir / "new.txt"),
163 "content": "still not allowed\n",
164 },
165 )
166 ],
167 ),
168 CompletionResponse(
169 content="Explore mode is read-only, so I cannot make that change here.",
170 ),
171 ]
172 )
173 agent = Agent(
174 backend=backend,
175 config=AgentConfig(
176 auto_context=False,
177 stream=False,
178 permission_mode=PermissionMode.ALLOW,
179 ),
180 project_root=temp_dir,
181 )
182 events = []
183
184 async def capture(event) -> None:
185 events.append(event)
186
187 response = await agent.run_explore(
188 "Create a new file anyway.",
189 on_event=capture,
190 )
191
192 tool_results = [event.content for event in events if event.type == "tool_result"]
193 assert "read-only" in "\n".join(tool_results).lower()
194 assert "cannot make that change" in response.lower()
195 assert not (temp_dir / "new.txt").exists()
196
197
198 @pytest.mark.asyncio
199 async def test_explore_mode_persists_recent_history_for_follow_up_queries(temp_dir) -> None:
200 first_agent = Agent(
201 backend=ScriptedBackend(
202 completions=[CompletionResponse(content="Start with README.md.")]
203 ),
204 config=AgentConfig(auto_context=False, stream=False),
205 project_root=temp_dir,
206 )
207
208 first_response = await first_agent.run_explore("Where should I start?")
209
210 assert first_response == "Start with README.md."
211
212 follow_up_backend = ScriptedBackend(
213 completions=[CompletionResponse(content="I mentioned README.md.")]
214 )
215 second_agent = Agent(
216 backend=follow_up_backend,
217 config=AgentConfig(auto_context=False, stream=False),
218 project_root=temp_dir,
219 )
220
221 follow_up_response = await second_agent.run_explore("What file did you mention?")
222
223 assert follow_up_response == "I mentioned README.md."
224 messages = follow_up_backend.invocations[0].messages
225 assert messages[1].content == "Where should I start?"
226 assert messages[2].content == "Start with README.md."
227 assert messages[3].content == "What file did you mention?"
228
229 snapshot = ExploreStateStore(temp_dir).load()
230 assert snapshot is not None
231 assert snapshot.turn_count == 2
232 assert snapshot.last_history_mode == "continue"
233 assert snapshot.last_query == "What file did you mention?"
234 assert snapshot.last_response == "I mentioned README.md."
235
236
237 @pytest.mark.asyncio
238 async def test_explore_mode_fresh_query_ignores_persisted_history(temp_dir) -> None:
239 first_agent = Agent(
240 backend=ScriptedBackend(
241 completions=[CompletionResponse(content="Start with README.md.")]
242 ),
243 config=AgentConfig(auto_context=False, stream=False),
244 project_root=temp_dir,
245 )
246 await first_agent.run_explore("Where should I start?")
247
248 fresh_backend = ScriptedBackend(
249 completions=[CompletionResponse(content="Fresh answer only.")]
250 )
251 second_agent = Agent(
252 backend=fresh_backend,
253 config=AgentConfig(auto_context=False, stream=False),
254 project_root=temp_dir,
255 )
256
257 response = await second_agent.run_explore(
258 "Ignore the previous lookup.",
259 fresh=True,
260 )
261
262 assert response == "Fresh answer only."
263 messages = fresh_backend.invocations[0].messages
264 assert len(messages) == 2
265 assert messages[1].content == "Ignore the previous lookup."
266
267 snapshot = ExploreStateStore(temp_dir).load()
268 assert snapshot is not None
269 assert snapshot.turn_count == 1
270 assert snapshot.last_history_mode == "fresh"
271 assert snapshot.last_query == "Ignore the previous lookup."
272 assert snapshot.last_response == "Fresh answer only."