| 1 | """Tests for the Sprint 06 read-only explore lane.""" |
| 2 | |
| 3 | from __future__ import annotations |
| 4 | |
| 5 | import pytest |
| 6 | |
| 7 | from loader.agent.loop import Agent, AgentConfig |
| 8 | from loader.llm.base import CompletionResponse, ToolCall |
| 9 | from loader.runtime.explore_state import ExploreStateStore |
| 10 | from loader.runtime.permissions import PermissionMode |
| 11 | from tests.helpers.runtime_harness import ScriptedBackend |
| 12 | |
| 13 | |
| 14 | class DescribingBackend(ScriptedBackend): |
| 15 | async def describe_model(self) -> dict[str, object]: |
| 16 | self._supports_native_tools = False |
| 17 | return {"supports_native_tools": False} |
| 18 | |
| 19 | |
| 20 | @pytest.mark.asyncio |
| 21 | async def test_explore_mode_skips_workflow_router_and_definition_of_done(temp_dir) -> None: |
| 22 | target = temp_dir / "feature.py" |
| 23 | target.write_text("def important_helper():\n return 1\n") |
| 24 | backend = ScriptedBackend( |
| 25 | completions=[ |
| 26 | CompletionResponse( |
| 27 | content="I'll search for that helper.", |
| 28 | tool_calls=[ |
| 29 | ToolCall( |
| 30 | id="grep-1", |
| 31 | name="grep", |
| 32 | arguments={ |
| 33 | "pattern": "important_helper", |
| 34 | "path": str(temp_dir), |
| 35 | "include": "*.py", |
| 36 | }, |
| 37 | ) |
| 38 | ], |
| 39 | ), |
| 40 | CompletionResponse( |
| 41 | content="important_helper is defined in feature.py.", |
| 42 | ), |
| 43 | ] |
| 44 | ) |
| 45 | agent = Agent( |
| 46 | backend=backend, |
| 47 | config=AgentConfig( |
| 48 | auto_context=False, |
| 49 | stream=False, |
| 50 | permission_mode=PermissionMode.WORKSPACE_WRITE, |
| 51 | ), |
| 52 | project_root=temp_dir, |
| 53 | ) |
| 54 | events = [] |
| 55 | |
| 56 | async def capture(event) -> None: |
| 57 | events.append(event) |
| 58 | |
| 59 | response = await agent.run_explore( |
| 60 | "Where is important_helper defined?", |
| 61 | on_event=capture, |
| 62 | ) |
| 63 | |
| 64 | assert "feature.py" in response |
| 65 | assert not any(event.type == "dod_status" for event in events) |
| 66 | assert not any(event.type == "workflow_mode" for event in events) |
| 67 | assert agent.last_turn_summary is not None |
| 68 | assert agent.last_turn_summary.definition_of_done is None |
| 69 | assert agent.last_turn_summary.workflow_mode == "explore" |
| 70 | assert not (temp_dir / ".loader" / "dod").exists() |
| 71 | |
| 72 | |
| 73 | @pytest.mark.asyncio |
| 74 | async def test_explore_mode_denies_write_attempts_even_with_workspace_write(temp_dir) -> None: |
| 75 | backend = ScriptedBackend( |
| 76 | completions=[ |
| 77 | CompletionResponse( |
| 78 | content="I'll write a file.", |
| 79 | tool_calls=[ |
| 80 | ToolCall( |
| 81 | id="write-1", |
| 82 | name="write", |
| 83 | arguments={ |
| 84 | "file_path": str(temp_dir / "new.txt"), |
| 85 | "content": "not allowed\n", |
| 86 | }, |
| 87 | ) |
| 88 | ], |
| 89 | ), |
| 90 | CompletionResponse( |
| 91 | content="Explore mode is read-only, so I cannot make that change here.", |
| 92 | ), |
| 93 | ] |
| 94 | ) |
| 95 | agent = Agent( |
| 96 | backend=backend, |
| 97 | config=AgentConfig( |
| 98 | auto_context=False, |
| 99 | stream=False, |
| 100 | permission_mode=PermissionMode.WORKSPACE_WRITE, |
| 101 | ), |
| 102 | project_root=temp_dir, |
| 103 | ) |
| 104 | events = [] |
| 105 | |
| 106 | async def capture(event) -> None: |
| 107 | events.append(event) |
| 108 | |
| 109 | response = await agent.run_explore( |
| 110 | "Create a new file anyway.", |
| 111 | on_event=capture, |
| 112 | ) |
| 113 | |
| 114 | tool_results = [event.content for event in events if event.type == "tool_result"] |
| 115 | assert "read-only" in "\n".join(tool_results).lower() |
| 116 | assert "cannot make that change" in response.lower() |
| 117 | assert not (temp_dir / "new.txt").exists() |
| 118 | |
| 119 | |
| 120 | @pytest.mark.asyncio |
| 121 | async def test_explore_mode_refreshes_capabilities_before_request(temp_dir) -> None: |
| 122 | backend = DescribingBackend( |
| 123 | completions=[CompletionResponse(content="I checked the repo in read-only mode.")], |
| 124 | supports_native_tools=True, |
| 125 | ) |
| 126 | agent = Agent( |
| 127 | backend=backend, |
| 128 | config=AgentConfig( |
| 129 | auto_context=False, |
| 130 | stream=False, |
| 131 | permission_mode=PermissionMode.WORKSPACE_WRITE, |
| 132 | ), |
| 133 | project_root=temp_dir, |
| 134 | ) |
| 135 | |
| 136 | response = await agent.run_explore( |
| 137 | "Give me a quick repo summary.", |
| 138 | ) |
| 139 | |
| 140 | assert response == "I checked the repo in read-only mode." |
| 141 | assert agent.capability_profile.supports_native_tools is False |
| 142 | assert backend.invocations |
| 143 | assert backend.invocations[0].tools is None |
| 144 | |
| 145 | |
| 146 | @pytest.mark.asyncio |
| 147 | async def test_explore_mode_ignores_global_allow_rules(temp_dir) -> None: |
| 148 | loader_root = temp_dir / ".loader" |
| 149 | loader_root.mkdir() |
| 150 | (loader_root / "permission-rules.json").write_text( |
| 151 | '{"allow": [{"tool": "write", "path_contains": "new.txt"}]}\n' |
| 152 | ) |
| 153 | backend = ScriptedBackend( |
| 154 | completions=[ |
| 155 | CompletionResponse( |
| 156 | content="I'll write a file.", |
| 157 | tool_calls=[ |
| 158 | ToolCall( |
| 159 | id="write-1", |
| 160 | name="write", |
| 161 | arguments={ |
| 162 | "file_path": str(temp_dir / "new.txt"), |
| 163 | "content": "still not allowed\n", |
| 164 | }, |
| 165 | ) |
| 166 | ], |
| 167 | ), |
| 168 | CompletionResponse( |
| 169 | content="Explore mode is read-only, so I cannot make that change here.", |
| 170 | ), |
| 171 | ] |
| 172 | ) |
| 173 | agent = Agent( |
| 174 | backend=backend, |
| 175 | config=AgentConfig( |
| 176 | auto_context=False, |
| 177 | stream=False, |
| 178 | permission_mode=PermissionMode.ALLOW, |
| 179 | ), |
| 180 | project_root=temp_dir, |
| 181 | ) |
| 182 | events = [] |
| 183 | |
| 184 | async def capture(event) -> None: |
| 185 | events.append(event) |
| 186 | |
| 187 | response = await agent.run_explore( |
| 188 | "Create a new file anyway.", |
| 189 | on_event=capture, |
| 190 | ) |
| 191 | |
| 192 | tool_results = [event.content for event in events if event.type == "tool_result"] |
| 193 | assert "read-only" in "\n".join(tool_results).lower() |
| 194 | assert "cannot make that change" in response.lower() |
| 195 | assert not (temp_dir / "new.txt").exists() |
| 196 | |
| 197 | |
| 198 | @pytest.mark.asyncio |
| 199 | async def test_explore_mode_persists_recent_history_for_follow_up_queries(temp_dir) -> None: |
| 200 | first_agent = Agent( |
| 201 | backend=ScriptedBackend( |
| 202 | completions=[CompletionResponse(content="Start with README.md.")] |
| 203 | ), |
| 204 | config=AgentConfig(auto_context=False, stream=False), |
| 205 | project_root=temp_dir, |
| 206 | ) |
| 207 | |
| 208 | first_response = await first_agent.run_explore("Where should I start?") |
| 209 | |
| 210 | assert first_response == "Start with README.md." |
| 211 | |
| 212 | follow_up_backend = ScriptedBackend( |
| 213 | completions=[CompletionResponse(content="I mentioned README.md.")] |
| 214 | ) |
| 215 | second_agent = Agent( |
| 216 | backend=follow_up_backend, |
| 217 | config=AgentConfig(auto_context=False, stream=False), |
| 218 | project_root=temp_dir, |
| 219 | ) |
| 220 | |
| 221 | follow_up_response = await second_agent.run_explore("What file did you mention?") |
| 222 | |
| 223 | assert follow_up_response == "I mentioned README.md." |
| 224 | messages = follow_up_backend.invocations[0].messages |
| 225 | assert messages[1].content == "Where should I start?" |
| 226 | assert messages[2].content == "Start with README.md." |
| 227 | assert messages[3].content == "What file did you mention?" |
| 228 | |
| 229 | snapshot = ExploreStateStore(temp_dir).load() |
| 230 | assert snapshot is not None |
| 231 | assert snapshot.turn_count == 2 |
| 232 | assert snapshot.last_history_mode == "continue" |
| 233 | assert snapshot.last_query == "What file did you mention?" |
| 234 | assert snapshot.last_response == "I mentioned README.md." |
| 235 | |
| 236 | |
| 237 | @pytest.mark.asyncio |
| 238 | async def test_explore_mode_fresh_query_ignores_persisted_history(temp_dir) -> None: |
| 239 | first_agent = Agent( |
| 240 | backend=ScriptedBackend( |
| 241 | completions=[CompletionResponse(content="Start with README.md.")] |
| 242 | ), |
| 243 | config=AgentConfig(auto_context=False, stream=False), |
| 244 | project_root=temp_dir, |
| 245 | ) |
| 246 | await first_agent.run_explore("Where should I start?") |
| 247 | |
| 248 | fresh_backend = ScriptedBackend( |
| 249 | completions=[CompletionResponse(content="Fresh answer only.")] |
| 250 | ) |
| 251 | second_agent = Agent( |
| 252 | backend=fresh_backend, |
| 253 | config=AgentConfig(auto_context=False, stream=False), |
| 254 | project_root=temp_dir, |
| 255 | ) |
| 256 | |
| 257 | response = await second_agent.run_explore( |
| 258 | "Ignore the previous lookup.", |
| 259 | fresh=True, |
| 260 | ) |
| 261 | |
| 262 | assert response == "Fresh answer only." |
| 263 | messages = fresh_backend.invocations[0].messages |
| 264 | assert len(messages) == 2 |
| 265 | assert messages[1].content == "Ignore the previous lookup." |
| 266 | |
| 267 | snapshot = ExploreStateStore(temp_dir).load() |
| 268 | assert snapshot is not None |
| 269 | assert snapshot.turn_count == 1 |
| 270 | assert snapshot.last_history_mode == "fresh" |
| 271 | assert snapshot.last_query == "Ignore the previous lookup." |
| 272 | assert snapshot.last_response == "Fresh answer only." |