Add runtime boundary and verification state summaries
- SHA
d193644f6eb04578eafc7d56daba8c9aae58fd01- Parents
-
cc0fe04 - Tree
d46f5bf
d193644
d193644f6eb04578eafc7d56daba8c9aae58fd01cc0fe04
d46f5bf| Status | File | + | - |
|---|---|---|---|
| M |
src/loader/cli/main.py
|
14 | 0 |
| M |
src/loader/runtime/inspection.py
|
78 | 3 |
| M |
src/loader/runtime/owner_metadata.py
|
28 | 0 |
| M |
tests/test_inspection.py
|
40 | 0 |
src/loader/cli/main.pymodified@@ -1398,6 +1398,8 @@ def _print_status_snapshot(snapshot: StatusSnapshot) -> None: | |||
| 1398 | or "none" | 1398 | or "none" |
| 1399 | ), | 1399 | ), |
| 1400 | ) | 1400 | ) |
| 1401 | + if snapshot.runtime_boundary_summary: | ||
| 1402 | + table.add_row("Boundary", snapshot.runtime_boundary_summary) | ||
| 1401 | table.add_row("Workflow", snapshot.workflow_mode) | 1403 | table.add_row("Workflow", snapshot.workflow_mode) |
| 1402 | if snapshot.workflow_decision_kind: | 1404 | if snapshot.workflow_decision_kind: |
| 1403 | table.add_row("Decision Kind", snapshot.workflow_decision_kind) | 1405 | table.add_row("Decision Kind", snapshot.workflow_decision_kind) |
@@ -1472,6 +1474,8 @@ def _print_status_snapshot(snapshot: StatusSnapshot) -> None: | |||
| 1472 | table.add_row("DoD", snapshot.dod_status or "none") | 1474 | table.add_row("DoD", snapshot.dod_status or "none") |
| 1473 | table.add_row("Pending", str(snapshot.dod_pending_items_count)) | 1475 | table.add_row("Pending", str(snapshot.dod_pending_items_count)) |
| 1474 | table.add_row("Last Verify", snapshot.last_verification_result or "none") | 1476 | table.add_row("Last Verify", snapshot.last_verification_result or "none") |
| 1477 | + if snapshot.verification_state_summary: | ||
| 1478 | + table.add_row("Verification State", snapshot.verification_state_summary) | ||
| 1475 | if snapshot.usage: | 1479 | if snapshot.usage: |
| 1476 | table.add_row( | 1480 | table.add_row( |
| 1477 | "Usage", | 1481 | "Usage", |
@@ -1592,6 +1596,8 @@ def _session_list_main() -> None: | |||
| 1592 | or "none" | 1596 | or "none" |
| 1593 | ), | 1597 | ), |
| 1594 | ) | 1598 | ) |
| 1599 | + if entry.runtime_boundary_summary: | ||
| 1600 | + table.add_row("Boundary", entry.runtime_boundary_summary) | ||
| 1595 | table.add_row("Workflow", entry.workflow_mode) | 1601 | table.add_row("Workflow", entry.workflow_mode) |
| 1596 | if entry.workflow_decision_kind: | 1602 | if entry.workflow_decision_kind: |
| 1597 | table.add_row("Decision Kind", entry.workflow_decision_kind) | 1603 | table.add_row("Decision Kind", entry.workflow_decision_kind) |
@@ -1657,6 +1663,8 @@ def _session_show_main(session_id: str) -> None: | |||
| 1657 | or "none" | 1663 | or "none" |
| 1658 | ), | 1664 | ), |
| 1659 | ) | 1665 | ) |
| 1666 | + if detail.runtime_boundary_summary: | ||
| 1667 | + table.add_row("Boundary", detail.runtime_boundary_summary) | ||
| 1660 | table.add_row("Workflow", snapshot.workflow_mode) | 1668 | table.add_row("Workflow", snapshot.workflow_mode) |
| 1661 | if snapshot.workflow_decision_kind: | 1669 | if snapshot.workflow_decision_kind: |
| 1662 | table.add_row("Decision Kind", snapshot.workflow_decision_kind) | 1670 | table.add_row("Decision Kind", snapshot.workflow_decision_kind) |
@@ -1725,6 +1733,8 @@ def _session_show_main(session_id: str) -> None: | |||
| 1725 | table.add_row("Rules Source", snapshot.permission_rules_source or "none") | 1733 | table.add_row("Rules Source", snapshot.permission_rules_source or "none") |
| 1726 | table.add_row("Task", snapshot.current_task or "none") | 1734 | table.add_row("Task", snapshot.current_task or "none") |
| 1727 | table.add_row("Active DoD", snapshot.active_dod_path or "none") | 1735 | table.add_row("Active DoD", snapshot.active_dod_path or "none") |
| 1736 | + if detail.verification_state_summary: | ||
| 1737 | + table.add_row("Verification State", detail.verification_state_summary) | ||
| 1728 | if snapshot.usage: | 1738 | if snapshot.usage: |
| 1729 | table.add_row( | 1739 | table.add_row( |
| 1730 | "Usage", | 1740 | "Usage", |
@@ -1848,8 +1858,12 @@ def _workflow_show_main( | |||
| 1848 | or "none" | 1858 | or "none" |
| 1849 | ), | 1859 | ), |
| 1850 | ) | 1860 | ) |
| 1861 | + if snapshot.runtime_boundary_summary: | ||
| 1862 | + table.add_row("Boundary", snapshot.runtime_boundary_summary) | ||
| 1851 | table.add_row("Workflow", snapshot.workflow_mode) | 1863 | table.add_row("Workflow", snapshot.workflow_mode) |
| 1852 | table.add_row("Task", snapshot.current_task or "none") | 1864 | table.add_row("Task", snapshot.current_task or "none") |
| 1865 | + if snapshot.verification_state_summary: | ||
| 1866 | + table.add_row("Verification State", snapshot.verification_state_summary) | ||
| 1853 | table.add_row("Entries", f"{len(snapshot.entries)} shown / {snapshot.total_entries} total") | 1867 | table.add_row("Entries", f"{len(snapshot.entries)} shown / {snapshot.total_entries} total") |
| 1854 | if snapshot.latest_policy_summary: | 1868 | if snapshot.latest_policy_summary: |
| 1855 | table.add_row("Latest Policy", snapshot.latest_policy_summary) | 1869 | table.add_row("Latest Policy", snapshot.latest_policy_summary) |
src/loader/runtime/inspection.pymodified@@ -16,6 +16,7 @@ from ..runtime.capabilities import CapabilityProfile, resolve_capability_profile | |||
| 16 | from ..tools.base import ToolRegistry, create_default_registry | 16 | from ..tools.base import ToolRegistry, create_default_registry |
| 17 | from .dod import DefinitionOfDone, DefinitionOfDoneStore, VerificationEvidence | 17 | from .dod import DefinitionOfDone, DefinitionOfDoneStore, VerificationEvidence |
| 18 | from .explore_state import ExploreStateStore | 18 | from .explore_state import ExploreStateStore |
| 19 | +from .owner_metadata import format_runtime_boundary_label | ||
| 19 | from .permissions import ( | 20 | from .permissions import ( |
| 20 | PermissionConfigStatus, | 21 | PermissionConfigStatus, |
| 21 | PermissionDecision, | 22 | PermissionDecision, |
@@ -165,6 +166,7 @@ class StatusSnapshot: | |||
| 165 | model: str | 166 | model: str |
| 166 | capability_profile: CapabilityProfile | 167 | capability_profile: CapabilityProfile |
| 167 | active_session_id: str | None | 168 | active_session_id: str | None |
| 169 | + runtime_boundary_summary: str | None | ||
| 168 | workflow_mode: str | 170 | workflow_mode: str |
| 169 | workflow_reason_code: str | None | 171 | workflow_reason_code: str | None |
| 170 | workflow_reason_summary: str | None | 172 | workflow_reason_summary: str | None |
@@ -192,6 +194,7 @@ class StatusSnapshot: | |||
| 192 | dod_status: str | None | 194 | dod_status: str | None |
| 193 | dod_pending_items_count: int | 195 | dod_pending_items_count: int |
| 194 | last_verification_result: str | None | 196 | last_verification_result: str | None |
| 197 | + verification_state_summary: str | None | ||
| 195 | recent_verification: list[VerificationSummary] | 198 | recent_verification: list[VerificationSummary] |
| 196 | latest_policy_supporting_evidence: list[str] = field(default_factory=list) | 199 | latest_policy_supporting_evidence: list[str] = field(default_factory=list) |
| 197 | latest_policy_blocking_evidence: list[str] = field(default_factory=list) | 200 | latest_policy_blocking_evidence: list[str] = field(default_factory=list) |
@@ -232,6 +235,7 @@ class SessionSummary: | |||
| 232 | created_at: str | 235 | created_at: str |
| 233 | updated_at: str | 236 | updated_at: str |
| 234 | message_count: int | 237 | message_count: int |
| 238 | + runtime_boundary_summary: str | None | ||
| 235 | workflow_mode: str | 239 | workflow_mode: str |
| 236 | workflow_reason_code: str | None | 240 | workflow_reason_code: str | None |
| 237 | workflow_reason_summary: str | None | 241 | workflow_reason_summary: str | None |
@@ -260,6 +264,8 @@ class SessionDetail: | |||
| 260 | snapshot: SessionSnapshot | 264 | snapshot: SessionSnapshot |
| 261 | is_current: bool | 265 | is_current: bool |
| 262 | definition_of_done: DefinitionOfDone | None | 266 | definition_of_done: DefinitionOfDone | None |
| 267 | + runtime_boundary_summary: str | None = None | ||
| 268 | + verification_state_summary: str | None = None | ||
| 263 | recent_verification: list[VerificationSummary] = field(default_factory=list) | 269 | recent_verification: list[VerificationSummary] = field(default_factory=list) |
| 264 | 270 | ||
| 265 | 271 | ||
@@ -290,8 +296,10 @@ class WorkflowTimelineSnapshot: | |||
| 290 | project_root: Path | 296 | project_root: Path |
| 291 | session_id: str | None | 297 | session_id: str | None |
| 292 | is_current: bool | 298 | is_current: bool |
| 299 | + runtime_boundary_summary: str | None | ||
| 293 | workflow_mode: str | 300 | workflow_mode: str |
| 294 | current_task: str | None | 301 | current_task: str | None |
| 302 | + verification_state_summary: str | None = None | ||
| 295 | total_entries: int = 0 | 303 | total_entries: int = 0 |
| 296 | latest_policy_summary: str | None = None | 304 | latest_policy_summary: str | None = None |
| 297 | latest_policy_supporting_evidence: list[str] = field(default_factory=list) | 305 | latest_policy_supporting_evidence: list[str] = field(default_factory=list) |
@@ -461,6 +469,7 @@ def collect_status_snapshot( | |||
| 461 | model=resolved_model, | 469 | model=resolved_model, |
| 462 | capability_profile=capability_profile, | 470 | capability_profile=capability_profile, |
| 463 | active_session_id=None, | 471 | active_session_id=None, |
| 472 | + runtime_boundary_summary=None, | ||
| 464 | runtime_owner_type=None, | 473 | runtime_owner_type=None, |
| 465 | runtime_owner_path=None, | 474 | runtime_owner_path=None, |
| 466 | workflow_mode="execute", | 475 | workflow_mode="execute", |
@@ -498,6 +507,7 @@ def collect_status_snapshot( | |||
| 498 | dod_status=None, | 507 | dod_status=None, |
| 499 | dod_pending_items_count=0, | 508 | dod_pending_items_count=0, |
| 500 | last_verification_result=None, | 509 | last_verification_result=None, |
| 510 | + verification_state_summary=None, | ||
| 501 | recent_verification=[], | 511 | recent_verification=[], |
| 502 | usage={}, | 512 | usage={}, |
| 503 | compaction_count=0, | 513 | compaction_count=0, |
@@ -535,6 +545,10 @@ def collect_status_snapshot( | |||
| 535 | model=resolved_model, | 545 | model=resolved_model, |
| 536 | capability_profile=capability_profile, | 546 | capability_profile=capability_profile, |
| 537 | active_session_id=snapshot.session_id, | 547 | active_session_id=snapshot.session_id, |
| 548 | + runtime_boundary_summary=_runtime_boundary_summary( | ||
| 549 | + snapshot.runtime_owner_type, | ||
| 550 | + snapshot.runtime_owner_path, | ||
| 551 | + ), | ||
| 538 | runtime_owner_type=snapshot.runtime_owner_type, | 552 | runtime_owner_type=snapshot.runtime_owner_type, |
| 539 | runtime_owner_path=snapshot.runtime_owner_path, | 553 | runtime_owner_path=snapshot.runtime_owner_path, |
| 540 | workflow_mode=snapshot.workflow_mode, | 554 | workflow_mode=snapshot.workflow_mode, |
@@ -582,6 +596,12 @@ def collect_status_snapshot( | |||
| 582 | dod=dod, | 596 | dod=dod, |
| 583 | recent_verification=recent_verification, | 597 | recent_verification=recent_verification, |
| 584 | ), | 598 | ), |
| 599 | + verification_state_summary=_verification_state_summary( | ||
| 600 | + recent_verification, | ||
| 601 | + fallback_status=( | ||
| 602 | + dod.last_verification_result if dod is not None else None | ||
| 603 | + ), | ||
| 604 | + ), | ||
| 585 | recent_verification=recent_verification, | 605 | recent_verification=recent_verification, |
| 586 | usage=dict(snapshot.usage), | 606 | usage=dict(snapshot.usage), |
| 587 | compaction_count=(snapshot.compaction.count if snapshot.compaction else 0), | 607 | compaction_count=(snapshot.compaction.count if snapshot.compaction else 0), |
@@ -648,6 +668,10 @@ def list_session_summaries(project_root: Path | str | None = None) -> list[Sessi | |||
| 648 | session_id=snapshot.session_id, | 668 | session_id=snapshot.session_id, |
| 649 | created_at=snapshot.created_at, | 669 | created_at=snapshot.created_at, |
| 650 | updated_at=snapshot.updated_at, | 670 | updated_at=snapshot.updated_at, |
| 671 | + runtime_boundary_summary=_runtime_boundary_summary( | ||
| 672 | + snapshot.runtime_owner_type, | ||
| 673 | + snapshot.runtime_owner_path, | ||
| 674 | + ), | ||
| 651 | runtime_owner_type=snapshot.runtime_owner_type, | 675 | runtime_owner_type=snapshot.runtime_owner_type, |
| 652 | runtime_owner_path=snapshot.runtime_owner_path, | 676 | runtime_owner_path=snapshot.runtime_owner_path, |
| 653 | message_count=len(snapshot.messages), | 677 | message_count=len(snapshot.messages), |
@@ -689,14 +713,23 @@ def load_session_detail( | |||
| 689 | snapshot = store.load(session_id) | 713 | snapshot = store.load(session_id) |
| 690 | current_session_id = _current_session_id(store) | 714 | current_session_id = _current_session_id(store) |
| 691 | dod = _load_dod(snapshot.active_dod_path, project_root=resolved_root) | 715 | dod = _load_dod(snapshot.active_dod_path, project_root=resolved_root) |
| 716 | + recent_verification = _recent_verification_summaries( | ||
| 717 | + timeline=snapshot.workflow_timeline, | ||
| 718 | + evidence=dod.evidence if dod else [], | ||
| 719 | + ) | ||
| 692 | return SessionDetail( | 720 | return SessionDetail( |
| 693 | snapshot=snapshot, | 721 | snapshot=snapshot, |
| 694 | is_current=snapshot.session_id == current_session_id, | 722 | is_current=snapshot.session_id == current_session_id, |
| 695 | definition_of_done=dod, | 723 | definition_of_done=dod, |
| 696 | - recent_verification=_recent_verification_summaries( | 724 | + runtime_boundary_summary=_runtime_boundary_summary( |
| 697 | - timeline=snapshot.workflow_timeline, | 725 | + snapshot.runtime_owner_type, |
| 698 | - evidence=dod.evidence if dod else [], | 726 | + snapshot.runtime_owner_path, |
| 727 | + ), | ||
| 728 | + verification_state_summary=_verification_state_summary( | ||
| 729 | + recent_verification, | ||
| 730 | + fallback_status=(dod.last_verification_result if dod is not None else None), | ||
| 699 | ), | 731 | ), |
| 732 | + recent_verification=recent_verification, | ||
| 700 | ) | 733 | ) |
| 701 | 734 | ||
| 702 | 735 | ||
@@ -943,10 +976,12 @@ def collect_workflow_timeline( | |||
| 943 | project_root=resolved_root, | 976 | project_root=resolved_root, |
| 944 | session_id=None, | 977 | session_id=None, |
| 945 | is_current=False, | 978 | is_current=False, |
| 979 | + runtime_boundary_summary=None, | ||
| 946 | runtime_owner_type=None, | 980 | runtime_owner_type=None, |
| 947 | runtime_owner_path=None, | 981 | runtime_owner_path=None, |
| 948 | workflow_mode="execute", | 982 | workflow_mode="execute", |
| 949 | current_task=None, | 983 | current_task=None, |
| 984 | + verification_state_summary=None, | ||
| 950 | total_entries=0, | 985 | total_entries=0, |
| 951 | latest_policy_summary=None, | 986 | latest_policy_summary=None, |
| 952 | latest_policy_supporting_evidence=[], | 987 | latest_policy_supporting_evidence=[], |
@@ -969,15 +1004,29 @@ def collect_workflow_timeline( | |||
| 969 | accountability_only=accountability_only, | 1004 | accountability_only=accountability_only, |
| 970 | limit=limit, | 1005 | limit=limit, |
| 971 | ) | 1006 | ) |
| 1007 | + dod = _load_dod(snapshot.active_dod_path, project_root=resolved_root) | ||
| 1008 | + recent_verification = _recent_verification_summaries( | ||
| 1009 | + timeline=snapshot.workflow_timeline, | ||
| 1010 | + evidence=dod.evidence if dod else [], | ||
| 1011 | + limit=1, | ||
| 1012 | + ) | ||
| 972 | 1013 | ||
| 973 | return WorkflowTimelineSnapshot( | 1014 | return WorkflowTimelineSnapshot( |
| 974 | project_root=resolved_root, | 1015 | project_root=resolved_root, |
| 975 | session_id=snapshot.session_id, | 1016 | session_id=snapshot.session_id, |
| 976 | is_current=snapshot.session_id == current_session_id, | 1017 | is_current=snapshot.session_id == current_session_id, |
| 1018 | + runtime_boundary_summary=_runtime_boundary_summary( | ||
| 1019 | + snapshot.runtime_owner_type, | ||
| 1020 | + snapshot.runtime_owner_path, | ||
| 1021 | + ), | ||
| 977 | runtime_owner_type=snapshot.runtime_owner_type, | 1022 | runtime_owner_type=snapshot.runtime_owner_type, |
| 978 | runtime_owner_path=snapshot.runtime_owner_path, | 1023 | runtime_owner_path=snapshot.runtime_owner_path, |
| 979 | workflow_mode=snapshot.workflow_mode, | 1024 | workflow_mode=snapshot.workflow_mode, |
| 980 | current_task=snapshot.current_task, | 1025 | current_task=snapshot.current_task, |
| 1026 | + verification_state_summary=_verification_state_summary( | ||
| 1027 | + recent_verification, | ||
| 1028 | + fallback_status=(dod.last_verification_result if dod is not None else None), | ||
| 1029 | + ), | ||
| 981 | total_entries=projection.total_entries, | 1030 | total_entries=projection.total_entries, |
| 982 | latest_policy_summary=projection.latest_policy_summary, | 1031 | latest_policy_summary=projection.latest_policy_summary, |
| 983 | latest_policy_supporting_evidence=( | 1032 | latest_policy_supporting_evidence=( |
@@ -1658,6 +1707,32 @@ def _verification_summaries_from_evidence( | |||
| 1658 | return summaries | 1707 | return summaries |
| 1659 | 1708 | ||
| 1660 | 1709 | ||
| 1710 | +def _runtime_boundary_summary( | ||
| 1711 | + owner_type: str | None, | ||
| 1712 | + owner_path: str | None, | ||
| 1713 | +) -> str | None: | ||
| 1714 | + return format_runtime_boundary_label(owner_type, owner_path) | ||
| 1715 | + | ||
| 1716 | + | ||
| 1717 | +def _verification_state_summary( | ||
| 1718 | + recent_verification: list[VerificationSummary], | ||
| 1719 | + *, | ||
| 1720 | + fallback_status: str | None = None, | ||
| 1721 | +) -> str | None: | ||
| 1722 | + if recent_verification: | ||
| 1723 | + item = recent_verification[0] | ||
| 1724 | + parts = [item.status] | ||
| 1725 | + if item.attempt: | ||
| 1726 | + parts.append(f"({item.attempt})") | ||
| 1727 | + summary = " ".join(parts) | ||
| 1728 | + if item.command: | ||
| 1729 | + summary += f" for {item.command}" | ||
| 1730 | + return summary | ||
| 1731 | + if fallback_status: | ||
| 1732 | + return fallback_status | ||
| 1733 | + return None | ||
| 1734 | + | ||
| 1735 | + | ||
| 1661 | def _last_verification_result( | 1736 | def _last_verification_result( |
| 1662 | *, | 1737 | *, |
| 1663 | dod: DefinitionOfDone | None, | 1738 | dod: DefinitionOfDone | None, |
src/loader/runtime/owner_metadata.pymodified@@ -62,6 +62,34 @@ def format_runtime_owner_label( | |||
| 62 | return normalized_path or normalized_type | 62 | return normalized_path or normalized_type |
| 63 | 63 | ||
| 64 | 64 | ||
| 65 | +def classify_runtime_owner_boundary( | ||
| 66 | + owner_type: str | None, | ||
| 67 | + owner_path: str | None, | ||
| 68 | +) -> str | None: | ||
| 69 | + """Classify the persisted owner boundary for operator surfaces.""" | ||
| 70 | + | ||
| 71 | + normalized_type = normalize_runtime_owner_type(owner_type) | ||
| 72 | + normalized_path = normalize_runtime_owner_path(owner_path, owner_type=normalized_type) | ||
| 73 | + if normalized_path == "runtime-handle" or normalized_type == "RuntimeHandle": | ||
| 74 | + return "runtime-first" | ||
| 75 | + if normalized_path == "public-agent" or normalized_type == "Agent": | ||
| 76 | + return "public-compat" | ||
| 77 | + return None | ||
| 78 | + | ||
| 79 | + | ||
| 80 | +def format_runtime_boundary_label( | ||
| 81 | + owner_type: str | None, | ||
| 82 | + owner_path: str | None, | ||
| 83 | +) -> str | None: | ||
| 84 | + """Render one concise operator-facing runtime boundary label.""" | ||
| 85 | + | ||
| 86 | + boundary = classify_runtime_owner_boundary(owner_type, owner_path) | ||
| 87 | + owner = format_runtime_owner_label(owner_type, owner_path) | ||
| 88 | + if boundary and owner: | ||
| 89 | + return f"{boundary} via {owner}" | ||
| 90 | + return boundary or owner | ||
| 91 | + | ||
| 92 | + | ||
| 65 | def _camel_to_kebab(value: str) -> str: | 93 | def _camel_to_kebab(value: str) -> str: |
| 66 | """Convert one CamelCase-ish class name into kebab-case.""" | 94 | """Convert one CamelCase-ish class name into kebab-case.""" |
| 67 | 95 | ||
tests/test_inspection.pymodified@@ -732,6 +732,7 @@ def test_status_and_session_surfaces_reflect_persisted_state(temp_dir: Path) -> | |||
| 732 | assert snapshot.last_verification_result == "failed" | 732 | assert snapshot.last_verification_result == "failed" |
| 733 | assert snapshot.active_dod_path == dod_path | 733 | assert snapshot.active_dod_path == dod_path |
| 734 | assert snapshot.permission_mode == "prompt" | 734 | assert snapshot.permission_mode == "prompt" |
| 735 | + assert snapshot.runtime_boundary_summary == "runtime-first via runtime-handle (RuntimeHandle)" | ||
| 735 | assert snapshot.runtime_owner_type == "RuntimeHandle" | 736 | assert snapshot.runtime_owner_type == "RuntimeHandle" |
| 736 | assert snapshot.runtime_owner_path == "runtime-handle" | 737 | assert snapshot.runtime_owner_path == "runtime-handle" |
| 737 | assert snapshot.permission_rule_counts == {"allow": 1, "deny": 2, "ask": 1} | 738 | assert snapshot.permission_rule_counts == {"allow": 1, "deny": 2, "ask": 1} |
@@ -769,12 +770,16 @@ def test_status_and_session_surfaces_reflect_persisted_state(temp_dir: Path) -> | |||
| 769 | assert [item.status for item in snapshot.recent_verification] == ["failed"] | 770 | assert [item.status for item in snapshot.recent_verification] == ["failed"] |
| 770 | assert [item.command for item in snapshot.recent_verification] == ["pytest -q"] | 771 | assert [item.command for item in snapshot.recent_verification] == ["pytest -q"] |
| 771 | assert [item.detail for item in snapshot.recent_verification] == ["1 failed"] | 772 | assert [item.detail for item in snapshot.recent_verification] == ["1 failed"] |
| 773 | + assert snapshot.verification_state_summary == "failed for pytest -q" | ||
| 772 | 774 | ||
| 773 | assert len(sessions) == 1 | 775 | assert len(sessions) == 1 |
| 774 | assert sessions[0].session_id == session_id | 776 | assert sessions[0].session_id == session_id |
| 775 | assert sessions[0].is_current is True | 777 | assert sessions[0].is_current is True |
| 776 | assert sessions[0].runtime_owner_type == "RuntimeHandle" | 778 | assert sessions[0].runtime_owner_type == "RuntimeHandle" |
| 777 | assert sessions[0].runtime_owner_path == "runtime-handle" | 779 | assert sessions[0].runtime_owner_path == "runtime-handle" |
| 780 | + assert sessions[0].runtime_boundary_summary == ( | ||
| 781 | + "runtime-first via runtime-handle (RuntimeHandle)" | ||
| 782 | + ) | ||
| 778 | assert sessions[0].dod_status == "fixing" | 783 | assert sessions[0].dod_status == "fixing" |
| 779 | assert sessions[0].permission_prompting_enabled is True | 784 | assert sessions[0].permission_prompting_enabled is True |
| 780 | assert sessions[0].permission_rule_counts == {"allow": 1, "deny": 2, "ask": 1} | 785 | assert sessions[0].permission_rule_counts == {"allow": 1, "deny": 2, "ask": 1} |
@@ -799,6 +804,10 @@ def test_status_and_session_surfaces_reflect_persisted_state(temp_dir: Path) -> | |||
| 799 | assert detail.is_current is True | 804 | assert detail.is_current is True |
| 800 | assert detail.snapshot.runtime_owner_type == "RuntimeHandle" | 805 | assert detail.snapshot.runtime_owner_type == "RuntimeHandle" |
| 801 | assert detail.snapshot.runtime_owner_path == "runtime-handle" | 806 | assert detail.snapshot.runtime_owner_path == "runtime-handle" |
| 807 | + assert detail.runtime_boundary_summary == ( | ||
| 808 | + "runtime-first via runtime-handle (RuntimeHandle)" | ||
| 809 | + ) | ||
| 810 | + assert detail.verification_state_summary == "failed for pytest -q" | ||
| 802 | assert detail.definition_of_done is not None | 811 | assert detail.definition_of_done is not None |
| 803 | assert detail.definition_of_done.status == "fixing" | 812 | assert detail.definition_of_done.status == "fixing" |
| 804 | assert detail.snapshot.permission_rules_source == str( | 813 | assert detail.snapshot.permission_rules_source == str( |
@@ -830,8 +839,12 @@ def test_collect_workflow_timeline_reflects_persisted_history(temp_dir: Path) -> | |||
| 830 | assert snapshot.is_current is True | 839 | assert snapshot.is_current is True |
| 831 | assert snapshot.runtime_owner_type == "RuntimeHandle" | 840 | assert snapshot.runtime_owner_type == "RuntimeHandle" |
| 832 | assert snapshot.runtime_owner_path == "runtime-handle" | 841 | assert snapshot.runtime_owner_path == "runtime-handle" |
| 842 | + assert snapshot.runtime_boundary_summary == ( | ||
| 843 | + "runtime-first via runtime-handle (RuntimeHandle)" | ||
| 844 | + ) | ||
| 833 | assert snapshot.workflow_mode == "execute" | 845 | assert snapshot.workflow_mode == "execute" |
| 834 | assert snapshot.current_task == "Fix the failing tests" | 846 | assert snapshot.current_task == "Fix the failing tests" |
| 847 | + assert snapshot.verification_state_summary == "failed for pytest -q" | ||
| 835 | assert snapshot.total_entries == 2 | 848 | assert snapshot.total_entries == 2 |
| 836 | assert [entry.kind for entry in snapshot.entries] == ["handoff", "reentry"] | 849 | assert [entry.kind for entry in snapshot.entries] == ["handoff", "reentry"] |
| 837 | assert snapshot.entries[-1].reason_code == "verification_failed_reentry" | 850 | assert snapshot.entries[-1].reason_code == "verification_failed_reentry" |
@@ -939,6 +952,9 @@ def test_collect_status_snapshot_surfaces_pending_verification( | |||
| 939 | "uv run pytest -q" | 952 | "uv run pytest -q" |
| 940 | ] | 953 | ] |
| 941 | assert [item.attempt for item in snapshot.recent_verification] == ["attempt 2"] | 954 | assert [item.attempt for item in snapshot.recent_verification] == ["attempt 2"] |
| 955 | + assert snapshot.verification_state_summary == ( | ||
| 956 | + "pending (attempt 2) for uv run pytest -q" | ||
| 957 | + ) | ||
| 942 | 958 | ||
| 943 | 959 | ||
| 944 | def test_collect_status_snapshot_surfaces_planned_verification( | 960 | def test_collect_status_snapshot_surfaces_planned_verification( |
@@ -964,6 +980,9 @@ def test_collect_status_snapshot_surfaces_planned_verification( | |||
| 964 | assert [item.detail for item in snapshot.recent_verification] == [ | 980 | assert [item.detail for item in snapshot.recent_verification] == [ |
| 965 | "write changed src/loader/runtime/tool_batches.py" | 981 | "write changed src/loader/runtime/tool_batches.py" |
| 966 | ] | 982 | ] |
| 983 | + assert snapshot.verification_state_summary == ( | ||
| 984 | + "planned (attempt 3) for uv run pytest -q" | ||
| 985 | + ) | ||
| 967 | 986 | ||
| 968 | 987 | ||
| 969 | def test_collect_status_snapshot_surfaces_stale_verification( | 988 | def test_collect_status_snapshot_surfaces_stale_verification( |
@@ -991,6 +1010,9 @@ def test_collect_status_snapshot_surfaces_stale_verification( | |||
| 991 | assert [item.detail for item in snapshot.recent_verification] == [ | 1010 | assert [item.detail for item in snapshot.recent_verification] == [ |
| 992 | "write changed src/loader/runtime/finalization.py" | 1011 | "write changed src/loader/runtime/finalization.py" |
| 993 | ] | 1012 | ] |
| 1013 | + assert snapshot.verification_state_summary == ( | ||
| 1014 | + "stale (attempt 1 -> attempt 2) for uv run pytest -q" | ||
| 1015 | + ) | ||
| 994 | 1016 | ||
| 995 | 1017 | ||
| 996 | def test_collect_prompt_diff_uses_persisted_prompt_history(temp_dir: Path) -> None: | 1018 | def test_collect_prompt_diff_uses_persisted_prompt_history(temp_dir: Path) -> None: |
@@ -1051,7 +1073,9 @@ def test_status_and_session_commands_render_persisted_state( | |||
| 1051 | assert session_id in status_result.output | 1073 | assert session_id in status_result.output |
| 1052 | assert "fixing" in status_result.output | 1074 | assert "fixing" in status_result.output |
| 1053 | assert "Runtime Owner" in status_result.output | 1075 | assert "Runtime Owner" in status_result.output |
| 1076 | + assert "Boundary" in status_result.output | ||
| 1054 | assert "runtime-handle (RuntimeHandle)" in status_result.output | 1077 | assert "runtime-handle (RuntimeHandle)" in status_result.output |
| 1078 | + assert "runtime-first via runtime-handle (RuntimeHandle)" in status_result.output | ||
| 1055 | assert "1 allow / 2 deny / 1 ask" in status_result.output | 1079 | assert "1 allow / 2 deny / 1 ask" in status_result.output |
| 1056 | assert "native" in status_result.output | 1080 | assert "native" in status_result.output |
| 1057 | assert "Runtime Config, Workflow Context, Mode Guidance" in status_result.output | 1081 | assert "Runtime Config, Workflow Context, Mode Guidance" in status_result.output |
@@ -1066,11 +1090,15 @@ def test_status_and_session_commands_render_persisted_state( | |||
| 1066 | assert "What file did you mention?" in status_result.output | 1090 | assert "What file did you mention?" in status_result.output |
| 1067 | assert "pytest -q" in status_result.output | 1091 | assert "pytest -q" in status_result.output |
| 1068 | assert "1 failed" in status_result.output | 1092 | assert "1 failed" in status_result.output |
| 1093 | + assert "Verification State" in status_result.output | ||
| 1094 | + assert "failed for pytest -q" in status_result.output | ||
| 1069 | 1095 | ||
| 1070 | assert list_result.exit_code == 0 | 1096 | assert list_result.exit_code == 0 |
| 1071 | assert session_id in list_result.output | 1097 | assert session_id in list_result.output |
| 1072 | assert "Runtime Owner" in list_result.output | 1098 | assert "Runtime Owner" in list_result.output |
| 1099 | + assert "Boundary" in list_result.output | ||
| 1073 | assert "runtime-handle (RuntimeHandle)" in list_result.output | 1100 | assert "runtime-handle (RuntimeHandle)" in list_result.output |
| 1101 | + assert "runtime-first via runtime-handle (RuntimeHandle)" in list_result.output | ||
| 1074 | assert "1 allow / 2 deny / 1 ask" in list_result.output | 1102 | assert "1 allow / 2 deny / 1 ask" in list_result.output |
| 1075 | assert "prompting enabled" in list_result.output | 1103 | assert "prompting enabled" in list_result.output |
| 1076 | assert "native" in list_result.output | 1104 | assert "native" in list_result.output |
@@ -1082,7 +1110,9 @@ def test_status_and_session_commands_render_persisted_state( | |||
| 1082 | assert show_result.exit_code == 0 | 1110 | assert show_result.exit_code == 0 |
| 1083 | assert session_id in show_result.output | 1111 | assert session_id in show_result.output |
| 1084 | assert "Runtime Owner" in show_result.output | 1112 | assert "Runtime Owner" in show_result.output |
| 1113 | + assert "Boundary" in show_result.output | ||
| 1085 | assert "runtime-handle (RuntimeHandle)" in show_result.output | 1114 | assert "runtime-handle (RuntimeHandle)" in show_result.output |
| 1115 | + assert "runtime-first via runtime-handle (RuntimeHandle)" in show_result.output | ||
| 1086 | assert "Patch the broken parser" in show_result.output | 1116 | assert "Patch the broken parser" in show_result.output |
| 1087 | assert "1 allow / 2 deny / 1 ask" in show_result.output | 1117 | assert "1 allow / 2 deny / 1 ask" in show_result.output |
| 1088 | assert "enabled" in show_result.output | 1118 | assert "enabled" in show_result.output |
@@ -1092,6 +1122,8 @@ def test_status_and_session_commands_render_persisted_state( | |||
| 1092 | assert "Completion Decision" in show_result.output | 1122 | assert "Completion Decision" in show_result.output |
| 1093 | assert "Completion Trace" in show_result.output | 1123 | assert "Completion Trace" in show_result.output |
| 1094 | assert "Recent Verification" in show_result.output | 1124 | assert "Recent Verification" in show_result.output |
| 1125 | + assert "Verification State" in show_result.output | ||
| 1126 | + assert "failed for pytest -q" in show_result.output | ||
| 1095 | assert "continuation_check" in show_result.output | 1127 | assert "continuation_check" in show_result.output |
| 1096 | assert "completion -> finalize" in show_result.output | 1128 | assert "completion -> finalize" in show_result.output |
| 1097 | assert "Finalizing completed turn" in show_result.output | 1129 | assert "Finalizing completed turn" in show_result.output |
@@ -1107,7 +1139,11 @@ def test_status_and_session_commands_render_persisted_state( | |||
| 1107 | assert "Workflow Timeline" in workflow_result.output | 1139 | assert "Workflow Timeline" in workflow_result.output |
| 1108 | assert session_id in workflow_result.output | 1140 | assert session_id in workflow_result.output |
| 1109 | assert "Runtime Owner" in workflow_result.output | 1141 | assert "Runtime Owner" in workflow_result.output |
| 1142 | + assert "Boundary" in workflow_result.output | ||
| 1110 | assert "runtime-handle (RuntimeHandle)" in workflow_result.output | 1143 | assert "runtime-handle (RuntimeHandle)" in workflow_result.output |
| 1144 | + assert "runtime-first via runtime-handle (RuntimeHandle)" in workflow_result.output | ||
| 1145 | + assert "Verification State" in workflow_result.output | ||
| 1146 | + assert "failed for pytest -q" in workflow_result.output | ||
| 1111 | assert "handoff" in workflow_result.output | 1147 | assert "handoff" in workflow_result.output |
| 1112 | assert "next=verify" in workflow_result.output | 1148 | assert "next=verify" in workflow_result.output |
| 1113 | 1149 | ||
@@ -1170,6 +1206,8 @@ def test_workflow_command_renders_stale_verification_context( | |||
| 1170 | assert "verification_stale" in result.output | 1206 | assert "verification_stale" in result.output |
| 1171 | assert "policy-outcome=stale" in result.output | 1207 | assert "policy-outcome=stale" in result.output |
| 1172 | assert "Observed Verification" in result.output | 1208 | assert "Observed Verification" in result.output |
| 1209 | + assert "Verification State" in result.output | ||
| 1210 | + assert "stale (attempt 1 -> attempt 2) for uv run pytest -q" in result.output | ||
| 1173 | assert "uv run pytest -q" in result.output | 1211 | assert "uv run pytest -q" in result.output |
| 1174 | assert "new mutating work" in result.output | 1212 | assert "new mutating work" in result.output |
| 1175 | 1213 | ||
@@ -1193,6 +1231,8 @@ def test_workflow_command_renders_planned_verification_context( | |||
| 1193 | assert "verification_planned" in result.output | 1231 | assert "verification_planned" in result.output |
| 1194 | assert "policy-outcome=planned" in result.output | 1232 | assert "policy-outcome=planned" in result.output |
| 1195 | assert "Observed Verification" in result.output | 1233 | assert "Observed Verification" in result.output |
| 1234 | + assert "Verification State" in result.output | ||
| 1235 | + assert "planned (attempt 3) for uv run pytest -q" in result.output | ||
| 1196 | assert "verification planned for `uv run pytest -q`" in result.output | 1236 | assert "verification planned for `uv run pytest -q`" in result.output |
| 1197 | assert "uv run pytest -q" in result.output | 1237 | assert "uv run pytest -q" in result.output |
| 1198 | 1238 | ||