Surface verification attempt labels
- SHA
15460af617ea3ee60f28521ff7d5455acff9675a- Parents
-
59470aa - Tree
ee52e07
15460af
15460af617ea3ee60f28521ff7d5455acff9675a59470aa
ee52e07| Status | File | + | - |
|---|---|---|---|
| M |
src/loader/cli/main.py
|
16 | 2 |
| M |
src/loader/runtime/inspection.py
|
7 | 1 |
| M |
src/loader/runtime/verification_observations.py
|
34 | 0 |
| M |
src/loader/runtime/workflow_timeline_read_model.py
|
10 | 1 |
| M |
tests/test_inspection.py
|
15 | 3 |
| M |
tests/test_workflow_timeline_read_model.py
|
16 | 3 |
src/loader/cli/main.pymodified@@ -1508,6 +1508,7 @@ def _print_status_snapshot(snapshot: StatusSnapshot) -> None: | ||
| 1508 | 1508 | evidence = Table(show_header=True, header_style="bold cyan") |
| 1509 | 1509 | evidence.add_column("Result", width=8) |
| 1510 | 1510 | evidence.add_column("Kind", width=10) |
| 1511 | + evidence.add_column("Attempt", width=16) | |
| 1511 | 1512 | evidence.add_column("Command", style="white") |
| 1512 | 1513 | evidence.add_column("Detail", style="dim") |
| 1513 | 1514 | for item in snapshot.recent_verification: |
@@ -1520,7 +1521,13 @@ def _print_status_snapshot(snapshot: StatusSnapshot) -> None: | ||
| 1520 | 1521 | "skipped": "[yellow]skip[/yellow]", |
| 1521 | 1522 | "missing": "[magenta]missing[/magenta]", |
| 1522 | 1523 | }.get(item.status, item.status) |
| 1523 | - evidence.add_row(result, item.kind, item.command, item.detail or "-") | |
| 1524 | + evidence.add_row( | |
| 1525 | + result, | |
| 1526 | + item.kind, | |
| 1527 | + item.attempt or "-", | |
| 1528 | + item.command, | |
| 1529 | + item.detail or "-", | |
| 1530 | + ) | |
| 1524 | 1531 | console.print( |
| 1525 | 1532 | Panel.fit( |
| 1526 | 1533 | evidence, |
@@ -1769,6 +1776,7 @@ def _session_show_main(session_id: str) -> None: | ||
| 1769 | 1776 | verification = Table(show_header=True, header_style="bold cyan") |
| 1770 | 1777 | verification.add_column("Result", width=8) |
| 1771 | 1778 | verification.add_column("Kind", width=10) |
| 1779 | + verification.add_column("Attempt", width=16) | |
| 1772 | 1780 | verification.add_column("Command", style="white") |
| 1773 | 1781 | verification.add_column("Detail", style="dim") |
| 1774 | 1782 | for item in detail.recent_verification: |
@@ -1781,7 +1789,13 @@ def _session_show_main(session_id: str) -> None: | ||
| 1781 | 1789 | "skipped": "[yellow]skip[/yellow]", |
| 1782 | 1790 | "missing": "[magenta]missing[/magenta]", |
| 1783 | 1791 | }.get(item.status, item.status) |
| 1784 | - verification.add_row(result, item.kind, item.command, item.detail or "-") | |
| 1792 | + verification.add_row( | |
| 1793 | + result, | |
| 1794 | + item.kind, | |
| 1795 | + item.attempt or "-", | |
| 1796 | + item.command, | |
| 1797 | + item.detail or "-", | |
| 1798 | + ) | |
| 1785 | 1799 | console.print( |
| 1786 | 1800 | Panel.fit( |
| 1787 | 1801 | verification, |
src/loader/runtime/inspection.pymodified@@ -29,7 +29,11 @@ from .permissions import ( | ||
| 29 | 29 | from .prompt_history import PromptSnapshot |
| 30 | 30 | from .prompting import build_system_prompt_result |
| 31 | 31 | from .session import SessionSnapshot, SessionStore |
| 32 | -from .verification_observations import VerificationObservation, VerificationObservationStatus | |
| 32 | +from .verification_observations import ( | |
| 33 | + VerificationObservation, | |
| 34 | + VerificationObservationStatus, | |
| 35 | + describe_verification_attempt, | |
| 36 | +) | |
| 33 | 37 | from .workflow_ledger import WorkflowLedger |
| 34 | 38 | from .workflow_policy import WorkflowTimelineEntry |
| 35 | 39 | from .workflow_timeline_read_model import ( |
@@ -150,6 +154,7 @@ class VerificationSummary: | ||
| 150 | 154 | status: str |
| 151 | 155 | kind: str |
| 152 | 156 | detail: str |
| 157 | + attempt: str = "" | |
| 153 | 158 | |
| 154 | 159 | |
| 155 | 160 | @dataclass(slots=True) |
@@ -1626,6 +1631,7 @@ def _verification_summary_from_observation( | ||
| 1626 | 1631 | status=observation.status, |
| 1627 | 1632 | kind=observation.kind or "runtime", |
| 1628 | 1633 | detail=observation.detail or "", |
| 1634 | + attempt=describe_verification_attempt(observation) or "", | |
| 1629 | 1635 | ) |
| 1630 | 1636 | |
| 1631 | 1637 | |
src/loader/runtime/verification_observations.pymodified@@ -2,6 +2,7 @@ | ||
| 2 | 2 | |
| 3 | 3 | from __future__ import annotations |
| 4 | 4 | |
| 5 | +import re | |
| 5 | 6 | from dataclasses import dataclass |
| 6 | 7 | from enum import StrEnum |
| 7 | 8 | from typing import Any |
@@ -80,6 +81,30 @@ def verification_attempt_id(attempt_number: int) -> str: | ||
| 80 | 81 | return f"verification-attempt-{attempt_number}" |
| 81 | 82 | |
| 82 | 83 | |
| 84 | +def verification_attempt_number(value: str | None) -> int | None: | |
| 85 | + """Extract one attempt number from a persisted attempt identifier.""" | |
| 86 | + | |
| 87 | + if not value: | |
| 88 | + return None | |
| 89 | + match = re.fullmatch(r"verification-attempt-(\d+)", value.strip()) | |
| 90 | + if match is None: | |
| 91 | + return None | |
| 92 | + return int(match.group(1)) | |
| 93 | + | |
| 94 | + | |
| 95 | +def describe_verification_attempt(entry: VerificationObservation) -> str | None: | |
| 96 | + """Render a concise operator-facing attempt label for one observation.""" | |
| 97 | + | |
| 98 | + current = _format_attempt_label(entry.attempt_id, entry.attempt_number) | |
| 99 | + next_attempt = _format_attempt_label( | |
| 100 | + entry.supersedes_attempt_id, | |
| 101 | + verification_attempt_number(entry.supersedes_attempt_id), | |
| 102 | + ) | |
| 103 | + if current and next_attempt: | |
| 104 | + return f"{current} -> {next_attempt}" | |
| 105 | + return current or next_attempt | |
| 106 | + | |
| 107 | + | |
| 83 | 108 | def normalize_verification_observation_status(value: Any) -> str: |
| 84 | 109 | """Coerce persisted observation statuses into the canonical enum set.""" |
| 85 | 110 | |
@@ -131,3 +156,12 @@ def _optional_int(value: Any) -> int | None: | ||
| 131 | 156 | if value is None: |
| 132 | 157 | return None |
| 133 | 158 | return int(value) |
| 159 | + | |
| 160 | + | |
| 161 | +def _format_attempt_label(attempt_id: str | None, attempt_number: int | None) -> str | None: | |
| 162 | + number = attempt_number if attempt_number is not None else verification_attempt_number(attempt_id) | |
| 163 | + if number is not None: | |
| 164 | + return f"attempt {number}" | |
| 165 | + if attempt_id: | |
| 166 | + return attempt_id | |
| 167 | + return None | |
src/loader/runtime/workflow_timeline_read_model.pymodified@@ -5,7 +5,10 @@ from __future__ import annotations | ||
| 5 | 5 | from dataclasses import dataclass, field |
| 6 | 6 | |
| 7 | 7 | from .evidence_provenance import EvidenceProvenanceRollup, rollup_evidence_provenance |
| 8 | -from .verification_observations import VerificationObservation | |
| 8 | +from .verification_observations import ( | |
| 9 | + VerificationObservation, | |
| 10 | + describe_verification_attempt, | |
| 11 | +) | |
| 9 | 12 | from .workflow_ledger import WorkflowLedger, workflow_ledger_highlights |
| 10 | 13 | from .workflow_policy import WorkflowTimelineEntry |
| 11 | 14 | |
@@ -247,10 +250,16 @@ def summarize_observed_verification( | ||
| 247 | 250 | summaries: list[str] = [] |
| 248 | 251 | for entry in entries[:max_items]: |
| 249 | 252 | summary = entry.summary.strip() |
| 253 | + attempt = describe_verification_attempt(entry) | |
| 250 | 254 | if entry.detail: |
| 251 | 255 | detail = entry.detail.strip() |
| 252 | 256 | if detail and detail not in summary: |
| 253 | 257 | summary = f"{summary} [{detail}]" |
| 258 | + if attempt: | |
| 259 | + if "[" in summary and summary.endswith("]"): | |
| 260 | + summary = summary[:-1] + f"; {attempt}]" | |
| 261 | + elif attempt not in summary: | |
| 262 | + summary = f"{summary} [{attempt}]" | |
| 254 | 263 | if summary and summary not in summaries: |
| 255 | 264 | summaries.append(summary) |
| 256 | 265 | return summaries |
tests/test_inspection.pymodified@@ -518,6 +518,8 @@ def _persist_session_with_pending_verification(temp_dir: Path) -> str: | ||
| 518 | 518 | summary="verification pending for `uv run pytest -q`", |
| 519 | 519 | command="uv run pytest -q", |
| 520 | 520 | kind="test", |
| 521 | + attempt_id="verification-attempt-2", | |
| 522 | + attempt_number=2, | |
| 521 | 523 | ) |
| 522 | 524 | ], |
| 523 | 525 | prompt_format="native", |
@@ -562,6 +564,8 @@ def _persist_session_with_planned_verification(temp_dir: Path) -> str: | ||
| 562 | 564 | command="uv run pytest -q", |
| 563 | 565 | kind="runtime", |
| 564 | 566 | detail="write changed src/loader/runtime/tool_batches.py", |
| 567 | + attempt_id="verification-attempt-3", | |
| 568 | + attempt_number=3, | |
| 565 | 569 | ) |
| 566 | 570 | ], |
| 567 | 571 | prompt_format="native", |
@@ -609,6 +613,9 @@ def _persist_session_with_stale_verification(temp_dir: Path) -> str: | ||
| 609 | 613 | command="uv run pytest -q", |
| 610 | 614 | kind="runtime", |
| 611 | 615 | detail="write changed src/loader/runtime/finalization.py", |
| 616 | + attempt_id="verification-attempt-1", | |
| 617 | + attempt_number=1, | |
| 618 | + supersedes_attempt_id="verification-attempt-2", | |
| 612 | 619 | ) |
| 613 | 620 | ], |
| 614 | 621 | prompt_format="native", |
@@ -925,12 +932,13 @@ def test_collect_status_snapshot_surfaces_pending_verification( | ||
| 925 | 932 | assert "verification_pending" in snapshot.latest_policy_summary |
| 926 | 933 | assert "policy-outcome=pending" in snapshot.latest_policy_summary |
| 927 | 934 | assert snapshot.latest_policy_observed_verification == [ |
| 928 | - "verification pending for `uv run pytest -q`" | |
| 935 | + "verification pending for `uv run pytest -q` [attempt 2]" | |
| 929 | 936 | ] |
| 930 | 937 | assert [item.status for item in snapshot.recent_verification] == ["pending"] |
| 931 | 938 | assert [item.command for item in snapshot.recent_verification] == [ |
| 932 | 939 | "uv run pytest -q" |
| 933 | 940 | ] |
| 941 | + assert [item.attempt for item in snapshot.recent_verification] == ["attempt 2"] | |
| 934 | 942 | |
| 935 | 943 | |
| 936 | 944 | def test_collect_status_snapshot_surfaces_planned_verification( |
@@ -946,12 +954,13 @@ def test_collect_status_snapshot_surfaces_planned_verification( | ||
| 946 | 954 | assert "verification_planned" in snapshot.latest_policy_summary |
| 947 | 955 | assert "policy-outcome=planned" in snapshot.latest_policy_summary |
| 948 | 956 | assert snapshot.latest_policy_observed_verification == [ |
| 949 | - "verification planned for `uv run pytest -q` [write changed src/loader/runtime/tool_batches.py]" | |
| 957 | + "verification planned for `uv run pytest -q` [write changed src/loader/runtime/tool_batches.py; attempt 3]" | |
| 950 | 958 | ] |
| 951 | 959 | assert [item.status for item in snapshot.recent_verification] == ["planned"] |
| 952 | 960 | assert [item.command for item in snapshot.recent_verification] == [ |
| 953 | 961 | "uv run pytest -q" |
| 954 | 962 | ] |
| 963 | + assert [item.attempt for item in snapshot.recent_verification] == ["attempt 3"] | |
| 955 | 964 | assert [item.detail for item in snapshot.recent_verification] == [ |
| 956 | 965 | "write changed src/loader/runtime/tool_batches.py" |
| 957 | 966 | ] |
@@ -970,12 +979,15 @@ def test_collect_status_snapshot_surfaces_stale_verification( | ||
| 970 | 979 | assert "verification_stale" in snapshot.latest_policy_summary |
| 971 | 980 | assert "policy-outcome=stale" in snapshot.latest_policy_summary |
| 972 | 981 | assert snapshot.latest_policy_observed_verification == [ |
| 973 | - "verification became stale for `uv run pytest -q` after new mutating work [write changed src/loader/runtime/finalization.py]" | |
| 982 | + "verification became stale for `uv run pytest -q` after new mutating work [write changed src/loader/runtime/finalization.py; attempt 1 -> attempt 2]" | |
| 974 | 983 | ] |
| 975 | 984 | assert [item.status for item in snapshot.recent_verification] == ["stale"] |
| 976 | 985 | assert [item.command for item in snapshot.recent_verification] == [ |
| 977 | 986 | "uv run pytest -q" |
| 978 | 987 | ] |
| 988 | + assert [item.attempt for item in snapshot.recent_verification] == [ | |
| 989 | + "attempt 1 -> attempt 2" | |
| 990 | + ] | |
| 979 | 991 | assert [item.detail for item in snapshot.recent_verification] == [ |
| 980 | 992 | "write changed src/loader/runtime/finalization.py" |
| 981 | 993 | ] |
tests/test_workflow_timeline_read_model.pymodified@@ -144,6 +144,8 @@ def test_project_workflow_timeline_highlights_pending_verification() -> None: | ||
| 144 | 144 | summary="verification pending for `pytest -q`", |
| 145 | 145 | command="pytest -q", |
| 146 | 146 | kind="test", |
| 147 | + attempt_id="verification-attempt-2", | |
| 148 | + attempt_number=2, | |
| 147 | 149 | ) |
| 148 | 150 | ], |
| 149 | 151 | ) |
@@ -153,7 +155,7 @@ def test_project_workflow_timeline_highlights_pending_verification() -> None: | ||
| 153 | 155 | |
| 154 | 156 | assert projection.latest_policy_summary is not None |
| 155 | 157 | assert "policy-outcome=pending" in projection.latest_policy_summary |
| 156 | - assert "observed=verification pending for `pytest -q`" in ( | |
| 158 | + assert "observed=verification pending for `pytest -q` [attempt 2]" in ( | |
| 157 | 159 | projection.latest_policy_summary |
| 158 | 160 | ) |
| 159 | 161 | assert any(item.startswith("Verify pending:") for item in projection.highlights) |
@@ -177,6 +179,8 @@ def test_project_workflow_timeline_highlights_planned_verification() -> None: | ||
| 177 | 179 | command="pytest -q", |
| 178 | 180 | kind="runtime", |
| 179 | 181 | detail="write changed README.md", |
| 182 | + attempt_id="verification-attempt-3", | |
| 183 | + attempt_number=3, | |
| 180 | 184 | ) |
| 181 | 185 | ], |
| 182 | 186 | ) |
@@ -186,7 +190,10 @@ def test_project_workflow_timeline_highlights_planned_verification() -> None: | ||
| 186 | 190 | |
| 187 | 191 | assert projection.latest_policy_summary is not None |
| 188 | 192 | assert "policy-outcome=planned" in projection.latest_policy_summary |
| 189 | - assert "observed=verification planned for `pytest -q` [write changed README.md]" in ( | |
| 193 | + assert ( | |
| 194 | + "observed=verification planned for `pytest -q` " | |
| 195 | + "[write changed README.md; attempt 3]" | |
| 196 | + ) in ( | |
| 190 | 197 | projection.latest_policy_summary |
| 191 | 198 | ) |
| 192 | 199 | assert any(item.startswith("Verify planned:") for item in projection.highlights) |
@@ -212,6 +219,9 @@ def test_project_workflow_timeline_highlights_stale_verification() -> None: | ||
| 212 | 219 | command="pytest -q", |
| 213 | 220 | kind="runtime", |
| 214 | 221 | detail="write changed README.md", |
| 222 | + attempt_id="verification-attempt-1", | |
| 223 | + attempt_number=1, | |
| 224 | + supersedes_attempt_id="verification-attempt-2", | |
| 215 | 225 | ) |
| 216 | 226 | ], |
| 217 | 227 | ) |
@@ -221,7 +231,10 @@ def test_project_workflow_timeline_highlights_stale_verification() -> None: | ||
| 221 | 231 | |
| 222 | 232 | assert projection.latest_policy_summary is not None |
| 223 | 233 | assert "policy-outcome=stale" in projection.latest_policy_summary |
| 224 | - assert "observed=verification became stale for `pytest -q` after new mutating work [write changed README.md]" in ( | |
| 234 | + assert ( | |
| 235 | + "observed=verification became stale for `pytest -q` after new mutating work " | |
| 236 | + "[write changed README.md; attempt 1 -> attempt 2]" | |
| 237 | + ) in ( | |
| 225 | 238 | projection.latest_policy_summary |
| 226 | 239 | ) |
| 227 | 240 | assert any(item.startswith("Verify stale:") for item in projection.highlights) |