tenseleyflow/loader / 8056705

Browse files

Prioritize concrete missing outputs

Authored by espadonne
SHA
8056705e83c8772c93189c34988f92e8adf90cfb
Parents
09f576c
Tree
d28b7a9

4 changed files

StatusFile+-
M src/loader/runtime/tool_batches.py 42 0
M src/loader/runtime/workflow.py 15 0
M tests/test_tool_batches.py 136 1
M tests/test_workflow.py 36 0
src/loader/runtime/tool_batches.pymodified
@@ -350,8 +350,10 @@ class ToolBatchRunner:
350350
             max_items=2,
351351
         )
352352
         if _should_prioritize_missing_artifact(
353
+            dod=dod,
353354
             next_pending=next_pending,
354355
             missing_artifact=missing_artifact,
356
+            project_root=self.context.project_root,
355357
         ):
356358
             prefix = "Reuse the earlier observation instead of repeating it. "
357359
             if confirmed_facts:
@@ -802,8 +804,10 @@ class ToolBatchRunner:
802804
         if not completed_label or not next_pending or next_pending == completed_label:
803805
             return
804806
         if _should_prioritize_missing_artifact(
807
+            dod=dod,
805808
             next_pending=next_pending,
806809
             missing_artifact=missing_artifact,
810
+            project_root=self.context.project_root,
807811
         ):
808812
             if not has_artifact_progress:
809813
                 compact_handoff = _compact_missing_artifact_handoff(
@@ -1102,6 +1106,7 @@ class ToolBatchRunner:
11021106
             and not _todo_is_mutation_step(next_pending)
11031107
             and not _todo_is_consistency_review_step(next_pending)
11041108
             and not _should_prioritize_missing_artifact(
1109
+                dod=dod,
11051110
                 next_pending=next_pending,
11061111
                 missing_artifact=(
11071112
                     missing_artifact
@@ -1111,6 +1116,7 @@ class ToolBatchRunner:
11111116
                     )
11121117
                     else None
11131118
                 ),
1119
+                project_root=self.context.project_root,
11141120
             )
11151121
         ):
11161122
             self.context.queue_steering_message(
@@ -1143,18 +1149,54 @@ def _todo_is_consistency_review_step(item: str) -> bool:
11431149
 
11441150
 def _should_prioritize_missing_artifact(
11451151
     *,
1152
+    dod: DefinitionOfDone,
11461153
     next_pending: str | None,
11471154
     missing_artifact: tuple[Path, bool] | None,
1155
+    project_root: Path,
11481156
 ) -> bool:
11491157
     if missing_artifact is None:
11501158
         return False
11511159
     if not next_pending:
11521160
         return True
1161
+    if _pending_todo_conflicts_with_missing_artifact(
1162
+        dod,
1163
+        item=next_pending,
1164
+        missing_artifact=missing_artifact,
1165
+        project_root=project_root,
1166
+    ):
1167
+        return True
11531168
     if _todo_is_consistency_review_step(next_pending):
11541169
         return True
11551170
     return not _todo_is_mutation_step(next_pending)
11561171
 
11571172
 
1173
+def _pending_todo_conflicts_with_missing_artifact(
1174
+    dod: DefinitionOfDone,
1175
+    *,
1176
+    item: str,
1177
+    missing_artifact: tuple[Path, bool],
1178
+    project_root: Path,
1179
+) -> bool:
1180
+    text = item.strip().lower()
1181
+    if not text or item in _TODO_NUDGE_EXCLUDED_ITEMS:
1182
+        return False
1183
+
1184
+    target, expect_directory = missing_artifact
1185
+    inferred_target = infer_pending_todo_output_target(
1186
+        dod,
1187
+        item,
1188
+        project_root=project_root,
1189
+    )
1190
+    if inferred_target is None:
1191
+        return not expect_directory and _todo_is_mutation_step(item)
1192
+
1193
+    inferred_target = inferred_target.resolve(strict=False)
1194
+    target = target.resolve(strict=False)
1195
+    if expect_directory:
1196
+        return target != inferred_target and target not in inferred_target.parents
1197
+    return inferred_target != target
1198
+
1199
+
11581200
 def _next_missing_planned_artifact(
11591201
     dod: DefinitionOfDone,
11601202
     *,
src/loader/runtime/workflow.pymodified
@@ -181,6 +181,10 @@ _ARTIFACT_SET_COMPLETION_HINTS = (
181181
     "formatted",
182182
     "formatting",
183183
     "review",
184
+    "style",
185
+    "same style",
186
+    "same structure",
187
+    "follow the same",
184188
 )
185189
 _BROAD_SETUP_HINTS = (
186190
     "directory structure",
@@ -1236,6 +1240,17 @@ def _todo_progress_score(item: str, tool_call: ToolCall) -> int:
12361240
         )
12371241
     ):
12381242
         return 0
1243
+    if (
1244
+        is_discovery_tool
1245
+        and _todo_requires_complete_artifact_set(text)
1246
+        and not (
1247
+            _contains_any(text, _READ_STEP_HINTS)
1248
+            or _contains_any(text, _SEARCH_STEP_HINTS)
1249
+            or _contains_any(text, _PARSE_STEP_HINTS)
1250
+            or _contains_any(text, _VERIFY_STEP_HINTS)
1251
+        )
1252
+    ):
1253
+        return 0
12391254
 
12401255
     if basename and basename in text:
12411256
         score += 3
tests/test_tool_batches.pymodified
@@ -688,7 +688,7 @@ async def test_tool_batch_runner_queues_duplicate_observation_nudge(
688688
 
689689
     assert len(queued_messages) == 1
690690
     assert "Reuse the earlier observation instead of repeating it." in queued_messages[0]
691
-    assert "Continue with the next pending item: `Create the remaining chapter files`." in queued_messages[0]
691
+    assert "A declared output artifact is still missing." in queued_messages[0]
692692
     assert "Resume by creating `04-variables.html` now." in queued_messages[0]
693693
     assert f"Prefer one `write` call for `{temp_dir / 'chapters' / '04-variables.html'}` instead of more rereads." in queued_messages[0]
694694
 
@@ -1303,6 +1303,139 @@ async def test_tool_batch_runner_duplicate_reference_read_prefers_next_pending_t
13031303
     assert "Update `" not in queued_messages[0]
13041304
 
13051305
 
1306
+@pytest.mark.asyncio
1307
+async def test_tool_batch_runner_successful_reference_read_prioritizes_concrete_missing_artifact(
1308
+    temp_dir: Path,
1309
+) -> None:
1310
+    async def assess_confidence(
1311
+        tool_name: str,
1312
+        tool_args: dict,
1313
+        context: str,
1314
+    ) -> ConfidenceAssessment:
1315
+        raise AssertionError("Confidence scoring should be disabled in this scenario")
1316
+
1317
+    async def verify_action(
1318
+        tool_name: str,
1319
+        tool_args: dict,
1320
+        result: str,
1321
+        expected: str = "",
1322
+    ) -> ActionVerification:
1323
+        raise AssertionError("Verification should not run for this scenario")
1324
+
1325
+    guide_root = temp_dir / "Loader" / "guides" / "nginx"
1326
+    chapters = guide_root / "chapters"
1327
+    chapters.mkdir(parents=True)
1328
+    chapter_one = chapters / "01-introduction.html"
1329
+    chapter_one.write_text("<html></html>\n")
1330
+    index_path = guide_root / "index.html"
1331
+
1332
+    reference = temp_dir / "Loader" / "guides" / "fortran" / "index.html"
1333
+    reference.parent.mkdir(parents=True, exist_ok=True)
1334
+    reference.write_text("<h1>Fortran Beginner's Guide</h1>\n")
1335
+
1336
+    implementation_plan = temp_dir / "implementation.md"
1337
+    implementation_plan.write_text(
1338
+        "\n".join(
1339
+            [
1340
+                "# Implementation Plan",
1341
+                "",
1342
+                "## File Changes",
1343
+                f"- `{guide_root}/`",
1344
+                f"- `{chapters}/`",
1345
+                f"- `{index_path}`",
1346
+                f"- `{chapter_one}`",
1347
+                f"- `{chapters / '02-installation.html'}`",
1348
+                "",
1349
+            ]
1350
+        )
1351
+    )
1352
+
1353
+    context = build_context(
1354
+        temp_dir=temp_dir,
1355
+        messages=[],
1356
+        safeguards=FakeSafeguards(),
1357
+        assess_confidence=assess_confidence,
1358
+        verify_action=verify_action,
1359
+        auto_recover=False,
1360
+    )
1361
+    queued_messages: list[str] = []
1362
+    context.queue_steering_message_callback = queued_messages.append
1363
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
1364
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
1365
+    dod.implementation_plan = str(implementation_plan)
1366
+    dod.touched_files.append(str(chapter_one))
1367
+    sync_todos_to_definition_of_done(
1368
+        dod,
1369
+        [
1370
+            {
1371
+                "content": "Examine the existing Fortran guide structure to understand the format and cadence",
1372
+                "active_form": "Working on: Examine the existing Fortran guide structure to understand the format and cadence",
1373
+                "status": "pending",
1374
+            },
1375
+            {
1376
+                "content": "Create each chapter file with appropriate content",
1377
+                "active_form": "Working on: Create each chapter file with appropriate content",
1378
+                "status": "pending",
1379
+            },
1380
+            {
1381
+                "content": "Ensure all files follow the same structure and style as the Fortran guide",
1382
+                "active_form": "Working on: Ensure all files follow the same structure and style as the Fortran guide",
1383
+                "status": "pending",
1384
+            },
1385
+        ],
1386
+    )
1387
+    tool_call = ToolCall(
1388
+        id="read-reference-index",
1389
+        name="read",
1390
+        arguments={"file_path": str(reference)},
1391
+    )
1392
+    read_output = "Observation [read]: Result: <h1>Fortran Beginner's Guide</h1>\n"
1393
+    executor = FakeExecutor(
1394
+        [
1395
+            ToolExecutionOutcome(
1396
+                tool_call=tool_call,
1397
+                state=ToolExecutionState.EXECUTED,
1398
+                message=Message.tool_result_message(
1399
+                    tool_call_id=tool_call.id,
1400
+                    display_content=read_output,
1401
+                    result_content=read_output,
1402
+                ),
1403
+                event_content=read_output,
1404
+                is_error=False,
1405
+                result_output=read_output,
1406
+            )
1407
+        ]
1408
+    )
1409
+
1410
+    summary = TurnSummary(final_response="")
1411
+    await runner.execute_batch(
1412
+        tool_calls=[tool_call],
1413
+        tool_source="assistant",
1414
+        pending_tool_calls_seen=set(),
1415
+        emit=_noop_emit,
1416
+        summary=summary,
1417
+        dod=dod,
1418
+        executor=executor,  # type: ignore[arg-type]
1419
+        on_confirmation=None,
1420
+        on_user_question=None,
1421
+        emit_confirmation=None,
1422
+        consecutive_errors=0,
1423
+    )
1424
+
1425
+    assert queued_messages
1426
+    assert any(
1427
+        "Confirmed progress: `Examine the existing Fortran guide structure to understand the format and cadence`"
1428
+        in message
1429
+        for message in queued_messages
1430
+    )
1431
+    assert any("Resume by creating `index.html` now." in message for message in queued_messages)
1432
+    assert not any(
1433
+        "Continue with the next pending item: `Create each chapter file with appropriate content`"
1434
+        in message
1435
+        for message in queued_messages
1436
+    )
1437
+
1438
+
13061439
 @pytest.mark.asyncio
13071440
 async def test_tool_batch_runner_duplicate_read_ignores_unplanned_expansion_after_plan_complete(
13081441
     temp_dir: Path,
@@ -1808,8 +1941,10 @@ async def test_duplicate_observation_nudge_prioritizes_missing_artifact_over_rev
18081941
         ],
18091942
     )
18101943
     assert tool_batches_should_prioritize_missing_artifact(
1944
+        dod=dod,
18111945
         next_pending=dod.pending_items[0],
18121946
         missing_artifact=(chapters / "06-ssl-configuration.html", False),
1947
+        project_root=temp_dir,
18131948
     )
18141949
 
18151950
     tool_call = ToolCall(
tests/test_workflow.pymodified
@@ -1024,6 +1024,42 @@ def test_advance_todos_from_tool_call_does_not_complete_linking_step_from_glob()
10241024
     assert "Link all chapters together properly in the index file" in dod.pending_items
10251025
 
10261026
 
1027
+def test_advance_todos_from_tool_call_does_not_complete_aggregate_style_step_from_reference_read() -> None:
1028
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
1029
+    sync_todos_to_definition_of_done(
1030
+        dod,
1031
+        [
1032
+            {
1033
+                "content": "Create each chapter file with appropriate content",
1034
+                "active_form": "Working on: Create each chapter file with appropriate content",
1035
+                "status": "pending",
1036
+            },
1037
+            {
1038
+                "content": "Ensure all files follow the same structure and style as the Fortran guide",
1039
+                "active_form": "Working on: Ensure all files follow the same structure and style as the Fortran guide",
1040
+                "status": "pending",
1041
+            },
1042
+        ],
1043
+    )
1044
+
1045
+    assert (
1046
+        advance_todos_from_tool_call(
1047
+            dod,
1048
+            ToolCall(
1049
+                id="read-reference-index",
1050
+                name="read",
1051
+                arguments={"file_path": "~/Loader/guides/fortran/index.html"},
1052
+            ),
1053
+        )
1054
+        is False
1055
+    )
1056
+    assert "Create each chapter file with appropriate content" in dod.pending_items
1057
+    assert (
1058
+        "Ensure all files follow the same structure and style as the Fortran guide"
1059
+        in dod.pending_items
1060
+    )
1061
+
1062
+
10271063
 def test_sync_todos_to_definition_of_done_keeps_linking_step_pending_while_artifacts_missing(
10281064
     temp_dir: Path,
10291065
 ) -> None: