Add typed verification attempt identity
- SHA
59470aabd9a80da4d7bf962e98098ebf1b85b24f- Parents
-
f7b6ab4 - Tree
95eb357
59470aa
59470aabd9a80da4d7bf962e98098ebf1b85b24ff7b6ab4
95eb357| Status | File | + | - |
|---|---|---|---|
| M |
src/loader/runtime/dod.py
|
51 | 0 |
| M |
src/loader/runtime/finalization.py
|
44 | 4 |
| M |
src/loader/runtime/tool_batches.py
|
19 | 0 |
| M |
src/loader/runtime/verification_observations.py
|
24 | 0 |
| M |
tests/test_dod.py
|
17 | 0 |
| M |
tests/test_finalization.py
|
16 | 0 |
| M |
tests/test_tool_batches.py
|
25 | 0 |
| M |
tests/test_verification_observations.py
|
6 | 0 |
src/loader/runtime/dod.pymodified@@ -12,6 +12,7 @@ from typing import Any, Literal | ||
| 12 | 12 | |
| 13 | 13 | from ..llm.base import ToolCall |
| 14 | 14 | from ..tools.shell_tools import BashTool |
| 15 | +from .verification_observations import VerificationAttempt, verification_attempt_id | |
| 15 | 16 | |
| 16 | 17 | TaskSize = Literal["small", "standard", "large"] |
| 17 | 18 | DoDStatus = Literal["draft", "in_progress", "verifying", "fixing", "done", "failed"] |
@@ -53,6 +54,9 @@ class DefinitionOfDone: | ||
| 53 | 54 | line_changes: int = 0 |
| 54 | 55 | storage_path: str | None = None |
| 55 | 56 | last_verification_result: str | None = None |
| 57 | + verification_attempt_counter: int = 0 | |
| 58 | + active_verification_attempt_id: str | None = None | |
| 59 | + active_verification_attempt_number: int | None = None | |
| 56 | 60 | current_mode: str = "execute" |
| 57 | 61 | mode_history: list[str] = field(default_factory=list) |
| 58 | 62 | clarify_brief: str | None = None |
@@ -88,6 +92,13 @@ class DefinitionOfDone: | ||
| 88 | 92 | line_changes=int(data.get("line_changes", 0)), |
| 89 | 93 | storage_path=data.get("storage_path"), |
| 90 | 94 | last_verification_result=data.get("last_verification_result"), |
| 95 | + verification_attempt_counter=int(data.get("verification_attempt_counter", 0)), | |
| 96 | + active_verification_attempt_id=data.get("active_verification_attempt_id"), | |
| 97 | + active_verification_attempt_number=( | |
| 98 | + int(data["active_verification_attempt_number"]) | |
| 99 | + if data.get("active_verification_attempt_number") is not None | |
| 100 | + else None | |
| 101 | + ), | |
| 91 | 102 | current_mode=data.get("current_mode", "execute"), |
| 92 | 103 | mode_history=list(data.get("mode_history", [])), |
| 93 | 104 | clarify_brief=data.get("clarify_brief"), |
@@ -255,6 +266,46 @@ def build_verification_summary(evidence: list[VerificationEvidence]) -> str: | ||
| 255 | 266 | return "\n".join(lines) |
| 256 | 267 | |
| 257 | 268 | |
| 269 | +def ensure_active_verification_attempt(dod: DefinitionOfDone) -> VerificationAttempt: | |
| 270 | + """Return the current verification attempt, synthesizing one if needed.""" | |
| 271 | + | |
| 272 | + if ( | |
| 273 | + dod.active_verification_attempt_id | |
| 274 | + and dod.active_verification_attempt_number is not None | |
| 275 | + ): | |
| 276 | + return VerificationAttempt( | |
| 277 | + attempt_id=dod.active_verification_attempt_id, | |
| 278 | + attempt_number=dod.active_verification_attempt_number, | |
| 279 | + ) | |
| 280 | + | |
| 281 | + next_number = max(int(dod.verification_attempt_counter or 0), 1) | |
| 282 | + dod.verification_attempt_counter = next_number | |
| 283 | + dod.active_verification_attempt_number = next_number | |
| 284 | + dod.active_verification_attempt_id = verification_attempt_id(next_number) | |
| 285 | + return VerificationAttempt( | |
| 286 | + attempt_id=dod.active_verification_attempt_id, | |
| 287 | + attempt_number=next_number, | |
| 288 | + ) | |
| 289 | + | |
| 290 | + | |
| 291 | +def begin_new_verification_attempt( | |
| 292 | + dod: DefinitionOfDone, | |
| 293 | + *, | |
| 294 | + supersedes_attempt_id: str | None = None, | |
| 295 | +) -> VerificationAttempt: | |
| 296 | + """Start the next verification attempt and mark it as active.""" | |
| 297 | + | |
| 298 | + next_number = max(int(dod.verification_attempt_counter or 0), 0) + 1 | |
| 299 | + dod.verification_attempt_counter = next_number | |
| 300 | + dod.active_verification_attempt_number = next_number | |
| 301 | + dod.active_verification_attempt_id = verification_attempt_id(next_number) | |
| 302 | + return VerificationAttempt( | |
| 303 | + attempt_id=dod.active_verification_attempt_id, | |
| 304 | + attempt_number=next_number, | |
| 305 | + supersedes_attempt_id=supersedes_attempt_id, | |
| 306 | + ) | |
| 307 | + | |
| 308 | + | |
| 258 | 309 | class DefinitionOfDoneStore: |
| 259 | 310 | """Persist DoD state to `.loader/dod/`.""" |
| 260 | 311 | |
src/loader/runtime/finalization.pymodified@@ -15,6 +15,7 @@ from .dod import ( | ||
| 15 | 15 | VerificationEvidence, |
| 16 | 16 | build_verification_summary, |
| 17 | 17 | derive_verification_commands, |
| 18 | + ensure_active_verification_attempt, | |
| 18 | 19 | ) |
| 19 | 20 | from .events import AgentEvent, TurnSummary |
| 20 | 21 | from .evidence_provenance import ( |
@@ -221,6 +222,7 @@ class TurnFinalizer: | ||
| 221 | 222 | summary=summary, |
| 222 | 223 | ) |
| 223 | 224 | if dod.verification_commands: |
| 225 | + attempt = ensure_active_verification_attempt(dod) | |
| 224 | 226 | dod.last_verification_result = VerificationObservationStatus.PENDING.value |
| 225 | 227 | self.dod_store.save(dod) |
| 226 | 228 | append_verification_timeline_entry( |
@@ -235,7 +237,11 @@ class TurnFinalizer: | ||
| 235 | 237 | for command in dod.verification_commands[:2] |
| 236 | 238 | ], |
| 237 | 239 | evidence_provenance=_pending_verification_provenance(dod), |
| 238 | - verification_observations=_pending_verification_observations(dod), | |
| 240 | + verification_observations=_pending_verification_observations( | |
| 241 | + dod, | |
| 242 | + attempt_id=attempt.attempt_id, | |
| 243 | + attempt_number=attempt.attempt_number, | |
| 244 | + ), | |
| 239 | 245 | ) |
| 240 | 246 | verification_passed = await self.verify_definition_of_done( |
| 241 | 247 | dod=dod, |
@@ -246,6 +252,8 @@ class TurnFinalizer: | ||
| 246 | 252 | verification_observations = _verification_result_observations( |
| 247 | 253 | dod, |
| 248 | 254 | passed=verification_passed, |
| 255 | + attempt_id=dod.active_verification_attempt_id, | |
| 256 | + attempt_number=dod.active_verification_attempt_number, | |
| 249 | 257 | ) |
| 250 | 258 | if verification_passed: |
| 251 | 259 | passed_provenance = _verification_result_provenance(dod, passed=True) |
@@ -350,10 +358,14 @@ class TurnFinalizer: | ||
| 350 | 358 | dod.status = "verifying" |
| 351 | 359 | self.dod_store.save(dod) |
| 352 | 360 | await self.emit_dod_status(emit, dod) |
| 361 | + attempt = ensure_active_verification_attempt(dod) | |
| 353 | 362 | |
| 354 | 363 | if not dod.verification_commands: |
| 355 | 364 | missing_provenance = _missing_verification_provenance() |
| 356 | - missing_observations = _missing_verification_observations() | |
| 365 | + missing_observations = _missing_verification_observations( | |
| 366 | + attempt_id=attempt.attempt_id, | |
| 367 | + attempt_number=attempt.attempt_number, | |
| 368 | + ) | |
| 357 | 369 | append_verification_timeline_entry( |
| 358 | 370 | self.context, |
| 359 | 371 | summary, |
@@ -411,7 +423,11 @@ class TurnFinalizer: | ||
| 411 | 423 | kind=_classify_verification_kind(command), |
| 412 | 424 | ) |
| 413 | 425 | dod.evidence.append(evidence) |
| 414 | - observation = _verification_observation_from_evidence(evidence) | |
| 426 | + observation = _verification_observation_from_evidence( | |
| 427 | + evidence, | |
| 428 | + attempt_id=attempt.attempt_id, | |
| 429 | + attempt_number=attempt.attempt_number, | |
| 430 | + ) | |
| 415 | 431 | provenance = _verification_provenance_from_evidence(evidence) |
| 416 | 432 | append_verification_timeline_entry( |
| 417 | 433 | self.context, |
@@ -602,6 +618,8 @@ def _verification_result_observations( | ||
| 602 | 618 | dod: DefinitionOfDone, |
| 603 | 619 | *, |
| 604 | 620 | passed: bool, |
| 621 | + attempt_id: str | None, | |
| 622 | + attempt_number: int | None, | |
| 605 | 623 | ) -> list[VerificationObservation]: |
| 606 | 624 | entries: list[VerificationObservation] = [] |
| 607 | 625 | target_status = ( |
@@ -627,6 +645,8 @@ def _verification_result_observations( | ||
| 627 | 645 | kind=evidence.kind, |
| 628 | 646 | exit_code=evidence.exit_code, |
| 629 | 647 | detail=_verification_detail(evidence), |
| 648 | + attempt_id=attempt_id, | |
| 649 | + attempt_number=attempt_number, | |
| 630 | 650 | ) |
| 631 | 651 | ) |
| 632 | 652 | |
@@ -642,6 +662,8 @@ def _verification_result_observations( | ||
| 642 | 662 | summary=f"verification did not produce an observed result for `{command}`", |
| 643 | 663 | command=command, |
| 644 | 664 | kind=_classify_verification_kind(command), |
| 665 | + attempt_id=attempt_id, | |
| 666 | + attempt_number=attempt_number, | |
| 645 | 667 | ) |
| 646 | 668 | ) |
| 647 | 669 | |
@@ -652,12 +674,17 @@ def _verification_result_observations( | ||
| 652 | 674 | VerificationObservation( |
| 653 | 675 | status=VerificationObservationStatus.MISSING.value, |
| 654 | 676 | summary="verification commands were still missing at execution time", |
| 677 | + attempt_id=attempt_id, | |
| 678 | + attempt_number=attempt_number, | |
| 655 | 679 | ) |
| 656 | 680 | ] |
| 657 | 681 | |
| 658 | 682 | |
| 659 | 683 | def _verification_observation_from_evidence( |
| 660 | 684 | evidence: VerificationEvidence, |
| 685 | + *, | |
| 686 | + attempt_id: str | None, | |
| 687 | + attempt_number: int | None, | |
| 661 | 688 | ) -> VerificationObservation: |
| 662 | 689 | command = evidence.command or "verification" |
| 663 | 690 | return VerificationObservation( |
@@ -675,6 +702,8 @@ def _verification_observation_from_evidence( | ||
| 675 | 702 | kind=evidence.kind, |
| 676 | 703 | exit_code=evidence.exit_code, |
| 677 | 704 | detail=_verification_detail(evidence), |
| 705 | + attempt_id=attempt_id, | |
| 706 | + attempt_number=attempt_number, | |
| 678 | 707 | ) |
| 679 | 708 | |
| 680 | 709 | |
@@ -702,11 +731,17 @@ def _verification_provenance_from_evidence( | ||
| 702 | 731 | ] |
| 703 | 732 | |
| 704 | 733 | |
| 705 | -def _missing_verification_observations() -> list[VerificationObservation]: | |
| 734 | +def _missing_verification_observations( | |
| 735 | + *, | |
| 736 | + attempt_id: str | None, | |
| 737 | + attempt_number: int | None, | |
| 738 | +) -> list[VerificationObservation]: | |
| 706 | 739 | return [ |
| 707 | 740 | VerificationObservation( |
| 708 | 741 | status=VerificationObservationStatus.MISSING.value, |
| 709 | 742 | summary="verification commands were still missing at execution time", |
| 743 | + attempt_id=attempt_id, | |
| 744 | + attempt_number=attempt_number, | |
| 710 | 745 | ) |
| 711 | 746 | ] |
| 712 | 747 | |
@@ -724,6 +759,9 @@ def _missing_verification_provenance() -> list[EvidenceProvenance]: | ||
| 724 | 759 | |
| 725 | 760 | def _pending_verification_observations( |
| 726 | 761 | dod: DefinitionOfDone, |
| 762 | + *, | |
| 763 | + attempt_id: str | None, | |
| 764 | + attempt_number: int | None, | |
| 727 | 765 | ) -> list[VerificationObservation]: |
| 728 | 766 | observations: list[VerificationObservation] = [] |
| 729 | 767 | for command in dod.verification_commands: |
@@ -732,6 +770,8 @@ def _pending_verification_observations( | ||
| 732 | 770 | status=VerificationObservationStatus.PENDING.value, |
| 733 | 771 | summary=f"verification pending for `{command}`", |
| 734 | 772 | command=command, |
| 773 | + attempt_id=attempt_id, | |
| 774 | + attempt_number=attempt_number, | |
| 735 | 775 | ) |
| 736 | 776 | ) |
| 737 | 777 | return observations |
src/loader/runtime/tool_batches.pymodified@@ -10,7 +10,9 @@ from .context import RuntimeContext | ||
| 10 | 10 | from .dod import ( |
| 11 | 11 | DefinitionOfDone, |
| 12 | 12 | DefinitionOfDoneStore, |
| 13 | + begin_new_verification_attempt, | |
| 13 | 14 | derive_verification_commands, |
| 15 | + ensure_active_verification_attempt, | |
| 14 | 16 | is_state_mutating_tool_call, |
| 15 | 17 | record_successful_tool_call, |
| 16 | 18 | ) |
@@ -238,6 +240,11 @@ def _mark_verification_stale( | ||
| 238 | 240 | tool_call: ToolCall, |
| 239 | 241 | ) -> None: |
| 240 | 242 | detail = _stale_verification_detail(tool_call) |
| 243 | + stale_attempt = ensure_active_verification_attempt(dod) | |
| 244 | + next_attempt = begin_new_verification_attempt( | |
| 245 | + dod, | |
| 246 | + supersedes_attempt_id=stale_attempt.attempt_id, | |
| 247 | + ) | |
| 241 | 248 | append_verification_timeline_entry( |
| 242 | 249 | context, |
| 243 | 250 | summary, |
@@ -248,6 +255,9 @@ def _mark_verification_stale( | ||
| 248 | 255 | verification_observations=_stale_verification_observations( |
| 249 | 256 | dod, |
| 250 | 257 | detail=detail, |
| 258 | + stale_attempt_id=stale_attempt.attempt_id, | |
| 259 | + stale_attempt_number=stale_attempt.attempt_number, | |
| 260 | + superseded_by_attempt_id=next_attempt.attempt_id, | |
| 251 | 261 | ), |
| 252 | 262 | ) |
| 253 | 263 | dod.last_verification_result = VerificationObservationStatus.STALE.value |
@@ -281,6 +291,7 @@ def _mark_verification_planned( | ||
| 281 | 291 | if not commands: |
| 282 | 292 | return |
| 283 | 293 | |
| 294 | + attempt = begin_new_verification_attempt(dod) | |
| 284 | 295 | detail = _stale_verification_detail(tool_call) |
| 285 | 296 | append_verification_timeline_entry( |
| 286 | 297 | context, |
@@ -306,6 +317,8 @@ def _mark_verification_planned( | ||
| 306 | 317 | command=command, |
| 307 | 318 | kind="runtime", |
| 308 | 319 | detail=detail, |
| 320 | + attempt_id=attempt.attempt_id, | |
| 321 | + attempt_number=attempt.attempt_number, | |
| 309 | 322 | ) |
| 310 | 323 | for command in commands |
| 311 | 324 | ], |
@@ -321,6 +334,9 @@ def _stale_verification_observations( | ||
| 321 | 334 | dod: DefinitionOfDone, |
| 322 | 335 | *, |
| 323 | 336 | detail: str, |
| 337 | + stale_attempt_id: str, | |
| 338 | + stale_attempt_number: int, | |
| 339 | + superseded_by_attempt_id: str, | |
| 324 | 340 | ) -> list[VerificationObservation]: |
| 325 | 341 | return [ |
| 326 | 342 | VerificationObservation( |
@@ -329,6 +345,9 @@ def _stale_verification_observations( | ||
| 329 | 345 | command=command, |
| 330 | 346 | kind="runtime", |
| 331 | 347 | detail=detail, |
| 348 | + attempt_id=stale_attempt_id, | |
| 349 | + attempt_number=stale_attempt_number, | |
| 350 | + supersedes_attempt_id=superseded_by_attempt_id, | |
| 332 | 351 | ) |
| 333 | 352 | for command in _stale_verification_commands(dod) |
| 334 | 353 | ] |
src/loader/runtime/verification_observations.pymodified@@ -7,6 +7,15 @@ from enum import StrEnum | ||
| 7 | 7 | from typing import Any |
| 8 | 8 | |
| 9 | 9 | |
| 10 | +@dataclass(slots=True, frozen=True) | |
| 11 | +class VerificationAttempt: | |
| 12 | + """Identity for one verification attempt across lifecycle events.""" | |
| 13 | + | |
| 14 | + attempt_id: str | |
| 15 | + attempt_number: int | |
| 16 | + supersedes_attempt_id: str | None = None | |
| 17 | + | |
| 18 | + | |
| 10 | 19 | class VerificationObservationStatus(StrEnum): |
| 11 | 20 | """How one verification observation resolved at runtime.""" |
| 12 | 21 | |
@@ -29,6 +38,9 @@ class VerificationObservation: | ||
| 29 | 38 | kind: str | None = None |
| 30 | 39 | exit_code: int | None = None |
| 31 | 40 | detail: str | None = None |
| 41 | + attempt_id: str | None = None | |
| 42 | + attempt_number: int | None = None | |
| 43 | + supersedes_attempt_id: str | None = None | |
| 32 | 44 | |
| 33 | 45 | def to_dict(self) -> dict[str, Any]: |
| 34 | 46 | """Serialize one observation for persisted runtime state.""" |
@@ -40,6 +52,9 @@ class VerificationObservation: | ||
| 40 | 52 | "kind": self.kind, |
| 41 | 53 | "exit_code": self.exit_code, |
| 42 | 54 | "detail": self.detail, |
| 55 | + "attempt_id": self.attempt_id, | |
| 56 | + "attempt_number": self.attempt_number, | |
| 57 | + "supersedes_attempt_id": self.supersedes_attempt_id, | |
| 43 | 58 | } |
| 44 | 59 | |
| 45 | 60 | @classmethod |
@@ -53,9 +68,18 @@ class VerificationObservation: | ||
| 53 | 68 | kind=_optional_text(data.get("kind")), |
| 54 | 69 | exit_code=_optional_int(data.get("exit_code")), |
| 55 | 70 | detail=_optional_text(data.get("detail")), |
| 71 | + attempt_id=_optional_text(data.get("attempt_id")), | |
| 72 | + attempt_number=_optional_int(data.get("attempt_number")), | |
| 73 | + supersedes_attempt_id=_optional_text(data.get("supersedes_attempt_id")), | |
| 56 | 74 | ) |
| 57 | 75 | |
| 58 | 76 | |
| 77 | +def verification_attempt_id(attempt_number: int) -> str: | |
| 78 | + """Build the canonical persisted verification-attempt identifier.""" | |
| 79 | + | |
| 80 | + return f"verification-attempt-{attempt_number}" | |
| 81 | + | |
| 82 | + | |
| 59 | 83 | def normalize_verification_observation_status(value: Any) -> str: |
| 60 | 84 | """Coerce persisted observation statuses into the canonical enum set.""" |
| 61 | 85 | |
tests/test_dod.pymodified@@ -5,9 +5,11 @@ from pathlib import Path | ||
| 5 | 5 | from loader.llm.base import ToolCall |
| 6 | 6 | from loader.runtime.dod import ( |
| 7 | 7 | DefinitionOfDoneStore, |
| 8 | + begin_new_verification_attempt, | |
| 8 | 9 | create_definition_of_done, |
| 9 | 10 | derive_verification_commands, |
| 10 | 11 | determine_task_size, |
| 12 | + ensure_active_verification_attempt, | |
| 11 | 13 | record_successful_tool_call, |
| 12 | 14 | ) |
| 13 | 15 | |
@@ -31,6 +33,7 @@ def test_definition_of_done_round_trip(tmp_path: Path) -> None: | ||
| 31 | 33 | dod.retry_count = 1 |
| 32 | 34 | dod.verification_commands = ["python hello.py"] |
| 33 | 35 | dod.touched_files = [str(tmp_path / "hello.py")] |
| 36 | + attempt = begin_new_verification_attempt(dod) | |
| 34 | 37 | saved_path = store.save(dod) |
| 35 | 38 | |
| 36 | 39 | reloaded = store.load(saved_path) |
@@ -40,6 +43,20 @@ def test_definition_of_done_round_trip(tmp_path: Path) -> None: | ||
| 40 | 43 | assert reloaded.retry_count == 1 |
| 41 | 44 | assert reloaded.verification_commands == ["python hello.py"] |
| 42 | 45 | assert reloaded.touched_files == [str(tmp_path / "hello.py")] |
| 46 | + assert reloaded.active_verification_attempt_id == attempt.attempt_id | |
| 47 | + assert reloaded.active_verification_attempt_number == attempt.attempt_number | |
| 48 | + | |
| 49 | + | |
| 50 | +def test_ensure_active_verification_attempt_rehydrates_missing_active_attempt() -> None: | |
| 51 | + dod = create_definition_of_done("Verify the runtime output.") | |
| 52 | + dod.verification_attempt_counter = 2 | |
| 53 | + | |
| 54 | + attempt = ensure_active_verification_attempt(dod) | |
| 55 | + | |
| 56 | + assert attempt.attempt_id == "verification-attempt-2" | |
| 57 | + assert attempt.attempt_number == 2 | |
| 58 | + assert dod.active_verification_attempt_id == "verification-attempt-2" | |
| 59 | + assert dod.active_verification_attempt_number == 2 | |
| 43 | 60 | |
| 44 | 61 | |
| 45 | 62 | def test_verification_command_derivation_prefers_runtime_evidence(tmp_path: Path) -> None: |
tests/test_finalization.pymodified@@ -336,6 +336,8 @@ async def test_turn_finalizer_records_passed_verification_observation( | ||
| 336 | 336 | assert [item.status for item in result.verification_observations] == [ |
| 337 | 337 | VerificationObservationStatus.PASSED.value |
| 338 | 338 | ] |
| 339 | + assert result.verification_observations[0].attempt_id == "verification-attempt-1" | |
| 340 | + assert result.verification_observations[0].attempt_number == 1 | |
| 339 | 341 | assert result.verification_observations[0].command == "uv run pytest -q" |
| 340 | 342 | assert result.verification_observations[0].detail == "219 passed" |
| 341 | 343 | assert summary.verification_status == "passed" |
@@ -346,6 +348,10 @@ async def test_turn_finalizer_records_passed_verification_observation( | ||
| 346 | 348 | assert [item.status for item in session.workflow_timeline[-2].verification_observations] == [ |
| 347 | 349 | VerificationObservationStatus.PENDING.value |
| 348 | 350 | ] |
| 351 | + assert ( | |
| 352 | + session.workflow_timeline[-2].verification_observations[0].attempt_id | |
| 353 | + == "verification-attempt-1" | |
| 354 | + ) | |
| 349 | 355 | assert session.workflow_timeline[-2].verification_observations[0].command == ( |
| 350 | 356 | "uv run pytest -q" |
| 351 | 357 | ) |
@@ -354,6 +360,10 @@ async def test_turn_finalizer_records_passed_verification_observation( | ||
| 354 | 360 | assert [item.status for item in session.workflow_timeline[-1].verification_observations] == [ |
| 355 | 361 | VerificationObservationStatus.PASSED.value |
| 356 | 362 | ] |
| 363 | + assert ( | |
| 364 | + session.workflow_timeline[-1].verification_observations[0].attempt_id | |
| 365 | + == "verification-attempt-1" | |
| 366 | + ) | |
| 357 | 367 | |
| 358 | 368 | |
| 359 | 369 | @pytest.mark.asyncio |
@@ -388,6 +398,8 @@ async def test_turn_finalizer_records_missing_verification_observation( | ||
| 388 | 398 | assert [item.status for item in result.verification_observations] == [ |
| 389 | 399 | VerificationObservationStatus.MISSING.value |
| 390 | 400 | ] |
| 401 | + assert result.verification_observations[0].attempt_id == "verification-attempt-1" | |
| 402 | + assert result.verification_observations[0].attempt_number == 1 | |
| 391 | 403 | assert [item.summary for item in result.verification_observations] == [ |
| 392 | 404 | "verification commands were still missing at execution time" |
| 393 | 405 | ] |
@@ -397,5 +409,9 @@ async def test_turn_finalizer_records_missing_verification_observation( | ||
| 397 | 409 | assert [item.status for item in session.workflow_timeline[-1].verification_observations] == [ |
| 398 | 410 | VerificationObservationStatus.MISSING.value |
| 399 | 411 | ] |
| 412 | + assert ( | |
| 413 | + session.workflow_timeline[-1].verification_observations[0].attempt_id | |
| 414 | + == "verification-attempt-1" | |
| 415 | + ) | |
| 400 | 416 | assert session.messages[-1].role == Role.USER |
| 401 | 417 | assert session.messages[-1].content.startswith("[DEFINITION OF DONE CHECK FAILED]") |
tests/test_tool_batches.pymodified@@ -396,9 +396,18 @@ async def test_tool_batch_runner_marks_verification_planned_after_new_mutation( | ||
| 396 | 396 | assert dod.last_verification_result == "planned" |
| 397 | 397 | assert dod.verification_commands |
| 398 | 398 | assert "Collect verification evidence" in dod.pending_items |
| 399 | + assert dod.active_verification_attempt_id == "verification-attempt-1" | |
| 400 | + assert dod.active_verification_attempt_number == 1 | |
| 399 | 401 | assert summary.workflow_timeline[-1].reason_code == "verification_planned" |
| 400 | 402 | assert summary.workflow_timeline[-1].policy_outcome == "planned" |
| 401 | 403 | assert summary.workflow_timeline[-1].verification_observations[0].status == "planned" |
| 404 | + assert ( | |
| 405 | + summary.workflow_timeline[-1].verification_observations[0].attempt_id | |
| 406 | + == "verification-attempt-1" | |
| 407 | + ) | |
| 408 | + assert ( | |
| 409 | + summary.workflow_timeline[-1].verification_observations[0].attempt_number == 1 | |
| 410 | + ) | |
| 402 | 411 | |
| 403 | 412 | |
| 404 | 413 | @pytest.mark.asyncio |
@@ -440,6 +449,9 @@ async def test_tool_batch_runner_marks_passed_verification_stale_after_new_mutat | ||
| 440 | 449 | dod = create_definition_of_done("Update README and verify it still works.") |
| 441 | 450 | dod.verification_commands = ["uv run pytest -q"] |
| 442 | 451 | dod.last_verification_result = "passed" |
| 452 | + dod.verification_attempt_counter = 1 | |
| 453 | + dod.active_verification_attempt_id = "verification-attempt-1" | |
| 454 | + dod.active_verification_attempt_number = 1 | |
| 443 | 455 | dod.evidence = [ |
| 444 | 456 | VerificationEvidence( |
| 445 | 457 | command="uv run pytest -q", |
@@ -472,9 +484,22 @@ async def test_tool_batch_runner_marks_passed_verification_stale_after_new_mutat | ||
| 472 | 484 | assert dod.evidence == [] |
| 473 | 485 | assert "Collect verification evidence" in dod.pending_items |
| 474 | 486 | assert "Collect verification evidence" not in dod.completed_items |
| 487 | + assert dod.active_verification_attempt_id == "verification-attempt-2" | |
| 488 | + assert dod.active_verification_attempt_number == 2 | |
| 475 | 489 | assert summary.workflow_timeline[-1].reason_code == "verification_stale" |
| 476 | 490 | assert summary.workflow_timeline[-1].policy_outcome == "stale" |
| 477 | 491 | assert summary.workflow_timeline[-1].verification_observations[0].status == "stale" |
| 492 | + assert ( | |
| 493 | + summary.workflow_timeline[-1].verification_observations[0].attempt_id | |
| 494 | + == "verification-attempt-1" | |
| 495 | + ) | |
| 496 | + assert ( | |
| 497 | + summary.workflow_timeline[-1].verification_observations[0].attempt_number == 1 | |
| 498 | + ) | |
| 499 | + assert ( | |
| 500 | + summary.workflow_timeline[-1].verification_observations[0].supersedes_attempt_id | |
| 501 | + == "verification-attempt-2" | |
| 502 | + ) | |
| 478 | 503 | assert ( |
| 479 | 504 | summary.workflow_timeline[-1].verification_observations[0].command |
| 480 | 505 | == "uv run pytest -q" |
tests/test_verification_observations.pymodified@@ -27,6 +27,9 @@ def test_normalize_verification_observations_round_trips_entries() -> None: | ||
| 27 | 27 | "kind": "test", |
| 28 | 28 | "exit_code": 0, |
| 29 | 29 | "detail": "219 passed", |
| 30 | + "attempt_id": "verification-attempt-3", | |
| 31 | + "attempt_number": 3, | |
| 32 | + "supersedes_attempt_id": "verification-attempt-2", | |
| 30 | 33 | } |
| 31 | 34 | ] |
| 32 | 35 | ) |
@@ -39,6 +42,9 @@ def test_normalize_verification_observations_round_trips_entries() -> None: | ||
| 39 | 42 | kind="test", |
| 40 | 43 | exit_code=0, |
| 41 | 44 | detail="219 passed", |
| 45 | + attempt_id="verification-attempt-3", | |
| 46 | + attempt_number=3, | |
| 47 | + supersedes_attempt_id="verification-attempt-2", | |
| 42 | 48 | ) |
| 43 | 49 | ] |
| 44 | 50 | |