tenseleyflow/loader / 59470aa

Browse files

Add typed verification attempt identity

Authored by espadonne
SHA
59470aabd9a80da4d7bf962e98098ebf1b85b24f
Parents
f7b6ab4
Tree
95eb357

8 changed files

StatusFile+-
M src/loader/runtime/dod.py 51 0
M src/loader/runtime/finalization.py 44 4
M src/loader/runtime/tool_batches.py 19 0
M src/loader/runtime/verification_observations.py 24 0
M tests/test_dod.py 17 0
M tests/test_finalization.py 16 0
M tests/test_tool_batches.py 25 0
M tests/test_verification_observations.py 6 0
src/loader/runtime/dod.pymodified
@@ -12,6 +12,7 @@ from typing import Any, Literal
1212
 
1313
 from ..llm.base import ToolCall
1414
 from ..tools.shell_tools import BashTool
15
+from .verification_observations import VerificationAttempt, verification_attempt_id
1516
 
1617
 TaskSize = Literal["small", "standard", "large"]
1718
 DoDStatus = Literal["draft", "in_progress", "verifying", "fixing", "done", "failed"]
@@ -53,6 +54,9 @@ class DefinitionOfDone:
5354
     line_changes: int = 0
5455
     storage_path: str | None = None
5556
     last_verification_result: str | None = None
57
+    verification_attempt_counter: int = 0
58
+    active_verification_attempt_id: str | None = None
59
+    active_verification_attempt_number: int | None = None
5660
     current_mode: str = "execute"
5761
     mode_history: list[str] = field(default_factory=list)
5862
     clarify_brief: str | None = None
@@ -88,6 +92,13 @@ class DefinitionOfDone:
8892
             line_changes=int(data.get("line_changes", 0)),
8993
             storage_path=data.get("storage_path"),
9094
             last_verification_result=data.get("last_verification_result"),
95
+            verification_attempt_counter=int(data.get("verification_attempt_counter", 0)),
96
+            active_verification_attempt_id=data.get("active_verification_attempt_id"),
97
+            active_verification_attempt_number=(
98
+                int(data["active_verification_attempt_number"])
99
+                if data.get("active_verification_attempt_number") is not None
100
+                else None
101
+            ),
91102
             current_mode=data.get("current_mode", "execute"),
92103
             mode_history=list(data.get("mode_history", [])),
93104
             clarify_brief=data.get("clarify_brief"),
@@ -255,6 +266,46 @@ def build_verification_summary(evidence: list[VerificationEvidence]) -> str:
255266
     return "\n".join(lines)
256267
 
257268
 
269
+def ensure_active_verification_attempt(dod: DefinitionOfDone) -> VerificationAttempt:
270
+    """Return the current verification attempt, synthesizing one if needed."""
271
+
272
+    if (
273
+        dod.active_verification_attempt_id
274
+        and dod.active_verification_attempt_number is not None
275
+    ):
276
+        return VerificationAttempt(
277
+            attempt_id=dod.active_verification_attempt_id,
278
+            attempt_number=dod.active_verification_attempt_number,
279
+        )
280
+
281
+    next_number = max(int(dod.verification_attempt_counter or 0), 1)
282
+    dod.verification_attempt_counter = next_number
283
+    dod.active_verification_attempt_number = next_number
284
+    dod.active_verification_attempt_id = verification_attempt_id(next_number)
285
+    return VerificationAttempt(
286
+        attempt_id=dod.active_verification_attempt_id,
287
+        attempt_number=next_number,
288
+    )
289
+
290
+
291
+def begin_new_verification_attempt(
292
+    dod: DefinitionOfDone,
293
+    *,
294
+    supersedes_attempt_id: str | None = None,
295
+) -> VerificationAttempt:
296
+    """Start the next verification attempt and mark it as active."""
297
+
298
+    next_number = max(int(dod.verification_attempt_counter or 0), 0) + 1
299
+    dod.verification_attempt_counter = next_number
300
+    dod.active_verification_attempt_number = next_number
301
+    dod.active_verification_attempt_id = verification_attempt_id(next_number)
302
+    return VerificationAttempt(
303
+        attempt_id=dod.active_verification_attempt_id,
304
+        attempt_number=next_number,
305
+        supersedes_attempt_id=supersedes_attempt_id,
306
+    )
307
+
308
+
258309
 class DefinitionOfDoneStore:
259310
     """Persist DoD state to `.loader/dod/`."""
260311
 
src/loader/runtime/finalization.pymodified
@@ -15,6 +15,7 @@ from .dod import (
1515
     VerificationEvidence,
1616
     build_verification_summary,
1717
     derive_verification_commands,
18
+    ensure_active_verification_attempt,
1819
 )
1920
 from .events import AgentEvent, TurnSummary
2021
 from .evidence_provenance import (
@@ -221,6 +222,7 @@ class TurnFinalizer:
221222
             summary=summary,
222223
         )
223224
         if dod.verification_commands:
225
+            attempt = ensure_active_verification_attempt(dod)
224226
             dod.last_verification_result = VerificationObservationStatus.PENDING.value
225227
             self.dod_store.save(dod)
226228
             append_verification_timeline_entry(
@@ -235,7 +237,11 @@ class TurnFinalizer:
235237
                     for command in dod.verification_commands[:2]
236238
                 ],
237239
                 evidence_provenance=_pending_verification_provenance(dod),
238
-                verification_observations=_pending_verification_observations(dod),
240
+                verification_observations=_pending_verification_observations(
241
+                    dod,
242
+                    attempt_id=attempt.attempt_id,
243
+                    attempt_number=attempt.attempt_number,
244
+                ),
239245
             )
240246
         verification_passed = await self.verify_definition_of_done(
241247
             dod=dod,
@@ -246,6 +252,8 @@ class TurnFinalizer:
246252
         verification_observations = _verification_result_observations(
247253
             dod,
248254
             passed=verification_passed,
255
+            attempt_id=dod.active_verification_attempt_id,
256
+            attempt_number=dod.active_verification_attempt_number,
249257
         )
250258
         if verification_passed:
251259
             passed_provenance = _verification_result_provenance(dod, passed=True)
@@ -350,10 +358,14 @@ class TurnFinalizer:
350358
         dod.status = "verifying"
351359
         self.dod_store.save(dod)
352360
         await self.emit_dod_status(emit, dod)
361
+        attempt = ensure_active_verification_attempt(dod)
353362
 
354363
         if not dod.verification_commands:
355364
             missing_provenance = _missing_verification_provenance()
356
-            missing_observations = _missing_verification_observations()
365
+            missing_observations = _missing_verification_observations(
366
+                attempt_id=attempt.attempt_id,
367
+                attempt_number=attempt.attempt_number,
368
+            )
357369
             append_verification_timeline_entry(
358370
                 self.context,
359371
                 summary,
@@ -411,7 +423,11 @@ class TurnFinalizer:
411423
                 kind=_classify_verification_kind(command),
412424
             )
413425
             dod.evidence.append(evidence)
414
-            observation = _verification_observation_from_evidence(evidence)
426
+            observation = _verification_observation_from_evidence(
427
+                evidence,
428
+                attempt_id=attempt.attempt_id,
429
+                attempt_number=attempt.attempt_number,
430
+            )
415431
             provenance = _verification_provenance_from_evidence(evidence)
416432
             append_verification_timeline_entry(
417433
                 self.context,
@@ -602,6 +618,8 @@ def _verification_result_observations(
602618
     dod: DefinitionOfDone,
603619
     *,
604620
     passed: bool,
621
+    attempt_id: str | None,
622
+    attempt_number: int | None,
605623
 ) -> list[VerificationObservation]:
606624
     entries: list[VerificationObservation] = []
607625
     target_status = (
@@ -627,6 +645,8 @@ def _verification_result_observations(
627645
                 kind=evidence.kind,
628646
                 exit_code=evidence.exit_code,
629647
                 detail=_verification_detail(evidence),
648
+                attempt_id=attempt_id,
649
+                attempt_number=attempt_number,
630650
             )
631651
         )
632652
 
@@ -642,6 +662,8 @@ def _verification_result_observations(
642662
                 summary=f"verification did not produce an observed result for `{command}`",
643663
                 command=command,
644664
                 kind=_classify_verification_kind(command),
665
+                attempt_id=attempt_id,
666
+                attempt_number=attempt_number,
645667
             )
646668
         )
647669
 
@@ -652,12 +674,17 @@ def _verification_result_observations(
652674
         VerificationObservation(
653675
             status=VerificationObservationStatus.MISSING.value,
654676
             summary="verification commands were still missing at execution time",
677
+            attempt_id=attempt_id,
678
+            attempt_number=attempt_number,
655679
         )
656680
     ]
657681
 
658682
 
659683
 def _verification_observation_from_evidence(
660684
     evidence: VerificationEvidence,
685
+    *,
686
+    attempt_id: str | None,
687
+    attempt_number: int | None,
661688
 ) -> VerificationObservation:
662689
     command = evidence.command or "verification"
663690
     return VerificationObservation(
@@ -675,6 +702,8 @@ def _verification_observation_from_evidence(
675702
         kind=evidence.kind,
676703
         exit_code=evidence.exit_code,
677704
         detail=_verification_detail(evidence),
705
+        attempt_id=attempt_id,
706
+        attempt_number=attempt_number,
678707
     )
679708
 
680709
 
@@ -702,11 +731,17 @@ def _verification_provenance_from_evidence(
702731
     ]
703732
 
704733
 
705
-def _missing_verification_observations() -> list[VerificationObservation]:
734
+def _missing_verification_observations(
735
+    *,
736
+    attempt_id: str | None,
737
+    attempt_number: int | None,
738
+) -> list[VerificationObservation]:
706739
     return [
707740
         VerificationObservation(
708741
             status=VerificationObservationStatus.MISSING.value,
709742
             summary="verification commands were still missing at execution time",
743
+            attempt_id=attempt_id,
744
+            attempt_number=attempt_number,
710745
         )
711746
     ]
712747
 
@@ -724,6 +759,9 @@ def _missing_verification_provenance() -> list[EvidenceProvenance]:
724759
 
725760
 def _pending_verification_observations(
726761
     dod: DefinitionOfDone,
762
+    *,
763
+    attempt_id: str | None,
764
+    attempt_number: int | None,
727765
 ) -> list[VerificationObservation]:
728766
     observations: list[VerificationObservation] = []
729767
     for command in dod.verification_commands:
@@ -732,6 +770,8 @@ def _pending_verification_observations(
732770
                 status=VerificationObservationStatus.PENDING.value,
733771
                 summary=f"verification pending for `{command}`",
734772
                 command=command,
773
+                attempt_id=attempt_id,
774
+                attempt_number=attempt_number,
735775
             )
736776
         )
737777
     return observations
src/loader/runtime/tool_batches.pymodified
@@ -10,7 +10,9 @@ from .context import RuntimeContext
1010
 from .dod import (
1111
     DefinitionOfDone,
1212
     DefinitionOfDoneStore,
13
+    begin_new_verification_attempt,
1314
     derive_verification_commands,
15
+    ensure_active_verification_attempt,
1416
     is_state_mutating_tool_call,
1517
     record_successful_tool_call,
1618
 )
@@ -238,6 +240,11 @@ def _mark_verification_stale(
238240
     tool_call: ToolCall,
239241
 ) -> None:
240242
     detail = _stale_verification_detail(tool_call)
243
+    stale_attempt = ensure_active_verification_attempt(dod)
244
+    next_attempt = begin_new_verification_attempt(
245
+        dod,
246
+        supersedes_attempt_id=stale_attempt.attempt_id,
247
+    )
241248
     append_verification_timeline_entry(
242249
         context,
243250
         summary,
@@ -248,6 +255,9 @@ def _mark_verification_stale(
248255
         verification_observations=_stale_verification_observations(
249256
             dod,
250257
             detail=detail,
258
+            stale_attempt_id=stale_attempt.attempt_id,
259
+            stale_attempt_number=stale_attempt.attempt_number,
260
+            superseded_by_attempt_id=next_attempt.attempt_id,
251261
         ),
252262
     )
253263
     dod.last_verification_result = VerificationObservationStatus.STALE.value
@@ -281,6 +291,7 @@ def _mark_verification_planned(
281291
     if not commands:
282292
         return
283293
 
294
+    attempt = begin_new_verification_attempt(dod)
284295
     detail = _stale_verification_detail(tool_call)
285296
     append_verification_timeline_entry(
286297
         context,
@@ -306,6 +317,8 @@ def _mark_verification_planned(
306317
                 command=command,
307318
                 kind="runtime",
308319
                 detail=detail,
320
+                attempt_id=attempt.attempt_id,
321
+                attempt_number=attempt.attempt_number,
309322
             )
310323
             for command in commands
311324
         ],
@@ -321,6 +334,9 @@ def _stale_verification_observations(
321334
     dod: DefinitionOfDone,
322335
     *,
323336
     detail: str,
337
+    stale_attempt_id: str,
338
+    stale_attempt_number: int,
339
+    superseded_by_attempt_id: str,
324340
 ) -> list[VerificationObservation]:
325341
     return [
326342
         VerificationObservation(
@@ -329,6 +345,9 @@ def _stale_verification_observations(
329345
             command=command,
330346
             kind="runtime",
331347
             detail=detail,
348
+            attempt_id=stale_attempt_id,
349
+            attempt_number=stale_attempt_number,
350
+            supersedes_attempt_id=superseded_by_attempt_id,
332351
         )
333352
         for command in _stale_verification_commands(dod)
334353
     ]
src/loader/runtime/verification_observations.pymodified
@@ -7,6 +7,15 @@ from enum import StrEnum
77
 from typing import Any
88
 
99
 
10
+@dataclass(slots=True, frozen=True)
11
+class VerificationAttempt:
12
+    """Identity for one verification attempt across lifecycle events."""
13
+
14
+    attempt_id: str
15
+    attempt_number: int
16
+    supersedes_attempt_id: str | None = None
17
+
18
+
1019
 class VerificationObservationStatus(StrEnum):
1120
     """How one verification observation resolved at runtime."""
1221
 
@@ -29,6 +38,9 @@ class VerificationObservation:
2938
     kind: str | None = None
3039
     exit_code: int | None = None
3140
     detail: str | None = None
41
+    attempt_id: str | None = None
42
+    attempt_number: int | None = None
43
+    supersedes_attempt_id: str | None = None
3244
 
3345
     def to_dict(self) -> dict[str, Any]:
3446
         """Serialize one observation for persisted runtime state."""
@@ -40,6 +52,9 @@ class VerificationObservation:
4052
             "kind": self.kind,
4153
             "exit_code": self.exit_code,
4254
             "detail": self.detail,
55
+            "attempt_id": self.attempt_id,
56
+            "attempt_number": self.attempt_number,
57
+            "supersedes_attempt_id": self.supersedes_attempt_id,
4358
         }
4459
 
4560
     @classmethod
@@ -53,9 +68,18 @@ class VerificationObservation:
5368
             kind=_optional_text(data.get("kind")),
5469
             exit_code=_optional_int(data.get("exit_code")),
5570
             detail=_optional_text(data.get("detail")),
71
+            attempt_id=_optional_text(data.get("attempt_id")),
72
+            attempt_number=_optional_int(data.get("attempt_number")),
73
+            supersedes_attempt_id=_optional_text(data.get("supersedes_attempt_id")),
5674
         )
5775
 
5876
 
77
+def verification_attempt_id(attempt_number: int) -> str:
78
+    """Build the canonical persisted verification-attempt identifier."""
79
+
80
+    return f"verification-attempt-{attempt_number}"
81
+
82
+
5983
 def normalize_verification_observation_status(value: Any) -> str:
6084
     """Coerce persisted observation statuses into the canonical enum set."""
6185
 
tests/test_dod.pymodified
@@ -5,9 +5,11 @@ from pathlib import Path
55
 from loader.llm.base import ToolCall
66
 from loader.runtime.dod import (
77
     DefinitionOfDoneStore,
8
+    begin_new_verification_attempt,
89
     create_definition_of_done,
910
     derive_verification_commands,
1011
     determine_task_size,
12
+    ensure_active_verification_attempt,
1113
     record_successful_tool_call,
1214
 )
1315
 
@@ -31,6 +33,7 @@ def test_definition_of_done_round_trip(tmp_path: Path) -> None:
3133
     dod.retry_count = 1
3234
     dod.verification_commands = ["python hello.py"]
3335
     dod.touched_files = [str(tmp_path / "hello.py")]
36
+    attempt = begin_new_verification_attempt(dod)
3437
     saved_path = store.save(dod)
3538
 
3639
     reloaded = store.load(saved_path)
@@ -40,6 +43,20 @@ def test_definition_of_done_round_trip(tmp_path: Path) -> None:
4043
     assert reloaded.retry_count == 1
4144
     assert reloaded.verification_commands == ["python hello.py"]
4245
     assert reloaded.touched_files == [str(tmp_path / "hello.py")]
46
+    assert reloaded.active_verification_attempt_id == attempt.attempt_id
47
+    assert reloaded.active_verification_attempt_number == attempt.attempt_number
48
+
49
+
50
+def test_ensure_active_verification_attempt_rehydrates_missing_active_attempt() -> None:
51
+    dod = create_definition_of_done("Verify the runtime output.")
52
+    dod.verification_attempt_counter = 2
53
+
54
+    attempt = ensure_active_verification_attempt(dod)
55
+
56
+    assert attempt.attempt_id == "verification-attempt-2"
57
+    assert attempt.attempt_number == 2
58
+    assert dod.active_verification_attempt_id == "verification-attempt-2"
59
+    assert dod.active_verification_attempt_number == 2
4360
 
4461
 
4562
 def test_verification_command_derivation_prefers_runtime_evidence(tmp_path: Path) -> None:
tests/test_finalization.pymodified
@@ -336,6 +336,8 @@ async def test_turn_finalizer_records_passed_verification_observation(
336336
     assert [item.status for item in result.verification_observations] == [
337337
         VerificationObservationStatus.PASSED.value
338338
     ]
339
+    assert result.verification_observations[0].attempt_id == "verification-attempt-1"
340
+    assert result.verification_observations[0].attempt_number == 1
339341
     assert result.verification_observations[0].command == "uv run pytest -q"
340342
     assert result.verification_observations[0].detail == "219 passed"
341343
     assert summary.verification_status == "passed"
@@ -346,6 +348,10 @@ async def test_turn_finalizer_records_passed_verification_observation(
346348
     assert [item.status for item in session.workflow_timeline[-2].verification_observations] == [
347349
         VerificationObservationStatus.PENDING.value
348350
     ]
351
+    assert (
352
+        session.workflow_timeline[-2].verification_observations[0].attempt_id
353
+        == "verification-attempt-1"
354
+    )
349355
     assert session.workflow_timeline[-2].verification_observations[0].command == (
350356
         "uv run pytest -q"
351357
     )
@@ -354,6 +360,10 @@ async def test_turn_finalizer_records_passed_verification_observation(
354360
     assert [item.status for item in session.workflow_timeline[-1].verification_observations] == [
355361
         VerificationObservationStatus.PASSED.value
356362
     ]
363
+    assert (
364
+        session.workflow_timeline[-1].verification_observations[0].attempt_id
365
+        == "verification-attempt-1"
366
+    )
357367
 
358368
 
359369
 @pytest.mark.asyncio
@@ -388,6 +398,8 @@ async def test_turn_finalizer_records_missing_verification_observation(
388398
     assert [item.status for item in result.verification_observations] == [
389399
         VerificationObservationStatus.MISSING.value
390400
     ]
401
+    assert result.verification_observations[0].attempt_id == "verification-attempt-1"
402
+    assert result.verification_observations[0].attempt_number == 1
391403
     assert [item.summary for item in result.verification_observations] == [
392404
         "verification commands were still missing at execution time"
393405
     ]
@@ -397,5 +409,9 @@ async def test_turn_finalizer_records_missing_verification_observation(
397409
     assert [item.status for item in session.workflow_timeline[-1].verification_observations] == [
398410
         VerificationObservationStatus.MISSING.value
399411
     ]
412
+    assert (
413
+        session.workflow_timeline[-1].verification_observations[0].attempt_id
414
+        == "verification-attempt-1"
415
+    )
400416
     assert session.messages[-1].role == Role.USER
401417
     assert session.messages[-1].content.startswith("[DEFINITION OF DONE CHECK FAILED]")
tests/test_tool_batches.pymodified
@@ -396,9 +396,18 @@ async def test_tool_batch_runner_marks_verification_planned_after_new_mutation(
396396
     assert dod.last_verification_result == "planned"
397397
     assert dod.verification_commands
398398
     assert "Collect verification evidence" in dod.pending_items
399
+    assert dod.active_verification_attempt_id == "verification-attempt-1"
400
+    assert dod.active_verification_attempt_number == 1
399401
     assert summary.workflow_timeline[-1].reason_code == "verification_planned"
400402
     assert summary.workflow_timeline[-1].policy_outcome == "planned"
401403
     assert summary.workflow_timeline[-1].verification_observations[0].status == "planned"
404
+    assert (
405
+        summary.workflow_timeline[-1].verification_observations[0].attempt_id
406
+        == "verification-attempt-1"
407
+    )
408
+    assert (
409
+        summary.workflow_timeline[-1].verification_observations[0].attempt_number == 1
410
+    )
402411
 
403412
 
404413
 @pytest.mark.asyncio
@@ -440,6 +449,9 @@ async def test_tool_batch_runner_marks_passed_verification_stale_after_new_mutat
440449
     dod = create_definition_of_done("Update README and verify it still works.")
441450
     dod.verification_commands = ["uv run pytest -q"]
442451
     dod.last_verification_result = "passed"
452
+    dod.verification_attempt_counter = 1
453
+    dod.active_verification_attempt_id = "verification-attempt-1"
454
+    dod.active_verification_attempt_number = 1
443455
     dod.evidence = [
444456
         VerificationEvidence(
445457
             command="uv run pytest -q",
@@ -472,9 +484,22 @@ async def test_tool_batch_runner_marks_passed_verification_stale_after_new_mutat
472484
     assert dod.evidence == []
473485
     assert "Collect verification evidence" in dod.pending_items
474486
     assert "Collect verification evidence" not in dod.completed_items
487
+    assert dod.active_verification_attempt_id == "verification-attempt-2"
488
+    assert dod.active_verification_attempt_number == 2
475489
     assert summary.workflow_timeline[-1].reason_code == "verification_stale"
476490
     assert summary.workflow_timeline[-1].policy_outcome == "stale"
477491
     assert summary.workflow_timeline[-1].verification_observations[0].status == "stale"
492
+    assert (
493
+        summary.workflow_timeline[-1].verification_observations[0].attempt_id
494
+        == "verification-attempt-1"
495
+    )
496
+    assert (
497
+        summary.workflow_timeline[-1].verification_observations[0].attempt_number == 1
498
+    )
499
+    assert (
500
+        summary.workflow_timeline[-1].verification_observations[0].supersedes_attempt_id
501
+        == "verification-attempt-2"
502
+    )
478503
     assert (
479504
         summary.workflow_timeline[-1].verification_observations[0].command
480505
         == "uv run pytest -q"
tests/test_verification_observations.pymodified
@@ -27,6 +27,9 @@ def test_normalize_verification_observations_round_trips_entries() -> None:
2727
                 "kind": "test",
2828
                 "exit_code": 0,
2929
                 "detail": "219 passed",
30
+                "attempt_id": "verification-attempt-3",
31
+                "attempt_number": 3,
32
+                "supersedes_attempt_id": "verification-attempt-2",
3033
             }
3134
         ]
3235
     )
@@ -39,6 +42,9 @@ def test_normalize_verification_observations_round_trips_entries() -> None:
3942
             kind="test",
4043
             exit_code=0,
4144
             detail="219 passed",
45
+            attempt_id="verification-attempt-3",
46
+            attempt_number=3,
47
+            supersedes_attempt_id="verification-attempt-2",
4248
         )
4349
     ]
4450