@@ -1220,6 +1220,127 @@ async def test_tool_batch_runner_queues_next_pending_todo_after_discovery_progre |
| 1220 | 1220 | assert ephemeral_messages == [] |
| 1221 | 1221 | |
| 1222 | 1222 | |
| 1223 | +@pytest.mark.asyncio |
| 1224 | +async def test_tool_batch_runner_queues_setup_directory_before_file_when_plan_lists_index_first( |
| 1225 | + temp_dir: Path, |
| 1226 | +) -> None: |
| 1227 | + async def assess_confidence( |
| 1228 | + tool_name: str, |
| 1229 | + tool_args: dict, |
| 1230 | + context: str, |
| 1231 | + ) -> ConfidenceAssessment: |
| 1232 | + raise AssertionError("Confidence scoring should be disabled in this scenario") |
| 1233 | + |
| 1234 | + async def verify_action( |
| 1235 | + tool_name: str, |
| 1236 | + tool_args: dict, |
| 1237 | + result: str, |
| 1238 | + expected: str = "", |
| 1239 | + ) -> ActionVerification: |
| 1240 | + raise AssertionError("Verification should not run for this scenario") |
| 1241 | + |
| 1242 | + reference = temp_dir / "fortran" / "chapters" / "01-introduction.html" |
| 1243 | + reference.parent.mkdir(parents=True) |
| 1244 | + reference.write_text("<h1>Introduction</h1>\n<p>Guide cadence.</p>\n") |
| 1245 | + nginx_root = temp_dir / "Loader" / "guides" / "nginx" |
| 1246 | + chapters = nginx_root / "chapters" |
| 1247 | + implementation_plan = temp_dir / "implementation.md" |
| 1248 | + implementation_plan.write_text( |
| 1249 | + "\n".join( |
| 1250 | + [ |
| 1251 | + "# Implementation Plan", |
| 1252 | + "", |
| 1253 | + "## File Changes", |
| 1254 | + f"- `{nginx_root / 'index.html'}`", |
| 1255 | + f"- `{chapters}/`", |
| 1256 | + "", |
| 1257 | + ] |
| 1258 | + ) |
| 1259 | + ) |
| 1260 | + |
| 1261 | + context = build_context( |
| 1262 | + temp_dir=temp_dir, |
| 1263 | + messages=[], |
| 1264 | + safeguards=FakeSafeguards(), |
| 1265 | + assess_confidence=assess_confidence, |
| 1266 | + verify_action=verify_action, |
| 1267 | + auto_recover=False, |
| 1268 | + ) |
| 1269 | + persistent_messages: list[str] = [] |
| 1270 | + ephemeral_messages: list[str] = [] |
| 1271 | + context.queue_steering_message_callback = persistent_messages.append |
| 1272 | + context.queue_ephemeral_steering_message_callback = ephemeral_messages.append |
| 1273 | + runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir)) |
| 1274 | + dod = create_definition_of_done("Create an equally thorough nginx guide.") |
| 1275 | + dod.implementation_plan = str(implementation_plan) |
| 1276 | + sync_todos_to_definition_of_done( |
| 1277 | + dod, |
| 1278 | + [ |
| 1279 | + { |
| 1280 | + "content": "Examine the existing Fortran guide structure to understand the cadence and format", |
| 1281 | + "active_form": "Working on: Examine the existing Fortran guide structure to understand the cadence and format", |
| 1282 | + "status": "pending", |
| 1283 | + }, |
| 1284 | + { |
| 1285 | + "content": "Create the nginx directory structure", |
| 1286 | + "active_form": "Working on: Create the nginx directory structure", |
| 1287 | + "status": "pending", |
| 1288 | + }, |
| 1289 | + { |
| 1290 | + "content": "Create the nginx index.html file", |
| 1291 | + "active_form": "Working on: Create the nginx index.html file", |
| 1292 | + "status": "pending", |
| 1293 | + }, |
| 1294 | + ], |
| 1295 | + project_root=temp_dir, |
| 1296 | + ) |
| 1297 | + tool_call = ToolCall( |
| 1298 | + id="read-reference-index-first", |
| 1299 | + name="read", |
| 1300 | + arguments={"file_path": str(reference)}, |
| 1301 | + ) |
| 1302 | + executor = FakeExecutor( |
| 1303 | + [ |
| 1304 | + tool_outcome( |
| 1305 | + tool_call=tool_call, |
| 1306 | + output="<h1>Introduction</h1>\n<p>Guide cadence.</p>\n", |
| 1307 | + is_error=False, |
| 1308 | + ) |
| 1309 | + ] |
| 1310 | + ) |
| 1311 | + |
| 1312 | + summary = TurnSummary(final_response="") |
| 1313 | + await runner.execute_batch( |
| 1314 | + tool_calls=[tool_call], |
| 1315 | + tool_source="assistant", |
| 1316 | + pending_tool_calls_seen=set(), |
| 1317 | + emit=_noop_emit, |
| 1318 | + summary=summary, |
| 1319 | + dod=dod, |
| 1320 | + executor=executor, # type: ignore[arg-type] |
| 1321 | + on_confirmation=None, |
| 1322 | + on_user_question=None, |
| 1323 | + emit_confirmation=None, |
| 1324 | + consecutive_errors=0, |
| 1325 | + ) |
| 1326 | + |
| 1327 | + assert persistent_messages |
| 1328 | + assert any( |
| 1329 | + "Continue with the next pending item: `Create the nginx directory structure`" |
| 1330 | + in message |
| 1331 | + for message in persistent_messages |
| 1332 | + ) |
| 1333 | + assert any( |
| 1334 | + "Resume by creating `chapters/` now." in message |
| 1335 | + for message in persistent_messages |
| 1336 | + ) |
| 1337 | + assert all( |
| 1338 | + "Next step: create `index.html`." not in message |
| 1339 | + for message in persistent_messages |
| 1340 | + ) |
| 1341 | + assert ephemeral_messages == [] |
| 1342 | + |
| 1343 | + |
| 1223 | 1344 | @pytest.mark.asyncio |
| 1224 | 1345 | async def test_tool_batch_runner_duplicate_reference_read_prefers_next_pending_todo( |
| 1225 | 1346 | temp_dir: Path, |