`d193644`

Add runtime boundary and verification state summaries

Authored by

espadonne 1 month ago

SHA: d193644f6eb04578eafc7d56daba8c9aae58fd01
Parents: cc0fe04
Tree: d46f5bf

4 changed files

Status	File	+	-
M	`src/loader/cli/main.py`	14	0
M	`src/loader/runtime/inspection.py`	78	3
M	`src/loader/runtime/owner_metadata.py`	28	0
M	`tests/test_inspection.py`	40	0

src/loader/cli/main.pymodified

              or "none"
          ),
+     )
 +    if snapshot.runtime_boundary_summary:
 +        table.add_row("Boundary", snapshot.runtime_boundary_summary)
      table.add_row("Workflow", snapshot.workflow_mode)
      if snapshot.workflow_decision_kind:
          table.add_row("Decision Kind", snapshot.workflow_decision_kind)
      table.add_row("DoD", snapshot.dod_status or "none")
      table.add_row("Pending", str(snapshot.dod_pending_items_count))
      table.add_row("Last Verify", snapshot.last_verification_result or "none")
 +    if snapshot.verification_state_summary:
 +        table.add_row("Verification State", snapshot.verification_state_summary)
      if snapshot.usage:
          table.add_row(
              "Usage",
                  or "none"
              ),
+         )
 +        if entry.runtime_boundary_summary:
 +            table.add_row("Boundary", entry.runtime_boundary_summary)
          table.add_row("Workflow", entry.workflow_mode)
          if entry.workflow_decision_kind:
              table.add_row("Decision Kind", entry.workflow_decision_kind)
              or "none"
          ),
+     )
 +    if detail.runtime_boundary_summary:
 +        table.add_row("Boundary", detail.runtime_boundary_summary)
      table.add_row("Workflow", snapshot.workflow_mode)
      if snapshot.workflow_decision_kind:
          table.add_row("Decision Kind", snapshot.workflow_decision_kind)
      table.add_row("Rules Source", snapshot.permission_rules_source or "none")
      table.add_row("Task", snapshot.current_task or "none")
      table.add_row("Active DoD", snapshot.active_dod_path or "none")
 +    if detail.verification_state_summary:
 +        table.add_row("Verification State", detail.verification_state_summary)
      if snapshot.usage:
          table.add_row(
              "Usage",
              or "none"
          ),
+     )
 +    if snapshot.runtime_boundary_summary:
 +        table.add_row("Boundary", snapshot.runtime_boundary_summary)
      table.add_row("Workflow", snapshot.workflow_mode)
      table.add_row("Task", snapshot.current_task or "none")
 +    if snapshot.verification_state_summary:
 +        table.add_row("Verification State", snapshot.verification_state_summary)
      table.add_row("Entries", f"{len(snapshot.entries)} shown / {snapshot.total_entries} total")
      if snapshot.latest_policy_summary:
          table.add_row("Latest Policy", snapshot.latest_policy_summary)

src/loader/runtime/inspection.pymodified

  from ..tools.base import ToolRegistry, create_default_registry
  from .dod import DefinitionOfDone, DefinitionOfDoneStore, VerificationEvidence
  from .explore_state import ExploreStateStore
 +from .owner_metadata import format_runtime_boundary_label
  from .permissions import (
      PermissionConfigStatus,
      PermissionDecision,
      model: str
      capability_profile: CapabilityProfile
      active_session_id: str | None
 +    runtime_boundary_summary: str | None
      workflow_mode: str
      workflow_reason_code: str | None
      workflow_reason_summary: str | None
      dod_status: str | None
      dod_pending_items_count: int
      last_verification_result: str | None
 +    verification_state_summary: str | None
      recent_verification: list[VerificationSummary]
      latest_policy_supporting_evidence: list[str] = field(default_factory=list)
      latest_policy_blocking_evidence: list[str] = field(default_factory=list)
      created_at: str
      updated_at: str
      message_count: int
 +    runtime_boundary_summary: str | None
      workflow_mode: str
      workflow_reason_code: str | None
      workflow_reason_summary: str | None
      snapshot: SessionSnapshot
      is_current: bool
      definition_of_done: DefinitionOfDone | None
 +    runtime_boundary_summary: str | None = None
 +    verification_state_summary: str | None = None
      recent_verification: list[VerificationSummary] = field(default_factory=list)
      project_root: Path
      session_id: str | None
      is_current: bool
 +    runtime_boundary_summary: str | None
      workflow_mode: str
      current_task: str | None
 +    verification_state_summary: str | None = None
      total_entries: int = 0
      latest_policy_summary: str | None = None
      latest_policy_supporting_evidence: list[str] = field(default_factory=list)
              model=resolved_model,
              capability_profile=capability_profile,
              active_session_id=None,
 +            runtime_boundary_summary=None,
              runtime_owner_type=None,
              runtime_owner_path=None,
              workflow_mode="execute",
              dod_status=None,
              dod_pending_items_count=0,
              last_verification_result=None,
 +            verification_state_summary=None,
              recent_verification=[],
              usage={},
              compaction_count=0,
          model=resolved_model,
          capability_profile=capability_profile,
          active_session_id=snapshot.session_id,
 +        runtime_boundary_summary=_runtime_boundary_summary(
 +            snapshot.runtime_owner_type,
 +            snapshot.runtime_owner_path,
 +        ),
          runtime_owner_type=snapshot.runtime_owner_type,
          runtime_owner_path=snapshot.runtime_owner_path,
          workflow_mode=snapshot.workflow_mode,
              dod=dod,
              recent_verification=recent_verification,
          ),
 +        verification_state_summary=_verification_state_summary(
 +            recent_verification,
 +            fallback_status=(
 +                dod.last_verification_result if dod is not None else None
 +            ),
 +        ),
          recent_verification=recent_verification,
          usage=dict(snapshot.usage),
          compaction_count=(snapshot.compaction.count if snapshot.compaction else 0),
                  session_id=snapshot.session_id,
                  created_at=snapshot.created_at,
                  updated_at=snapshot.updated_at,
 +                runtime_boundary_summary=_runtime_boundary_summary(
 +                    snapshot.runtime_owner_type,
 +                    snapshot.runtime_owner_path,
 +                ),
                  runtime_owner_type=snapshot.runtime_owner_type,
                  runtime_owner_path=snapshot.runtime_owner_path,
                  message_count=len(snapshot.messages),
      snapshot = store.load(session_id)
      current_session_id = _current_session_id(store)
      dod = _load_dod(snapshot.active_dod_path, project_root=resolved_root)
 +    recent_verification = _recent_verification_summaries(
 +        timeline=snapshot.workflow_timeline,
 +        evidence=dod.evidence if dod else [],
 +    )
      return SessionDetail(
          snapshot=snapshot,
          is_current=snapshot.session_id == current_session_id,
          definition_of_done=dod,
 -        recent_verification=_recent_verification_summaries(
 -            timeline=snapshot.workflow_timeline,
 -            evidence=dod.evidence if dod else [],
 +        runtime_boundary_summary=_runtime_boundary_summary(
 +            snapshot.runtime_owner_type,
 +            snapshot.runtime_owner_path,
 +        ),
 +        verification_state_summary=_verification_state_summary(
 +            recent_verification,
 +            fallback_status=(dod.last_verification_result if dod is not None else None),
          ),
 +        recent_verification=recent_verification,
+     )
              project_root=resolved_root,
              session_id=None,
              is_current=False,
 +            runtime_boundary_summary=None,
              runtime_owner_type=None,
              runtime_owner_path=None,
              workflow_mode="execute",
              current_task=None,
 +            verification_state_summary=None,
              total_entries=0,
              latest_policy_summary=None,
              latest_policy_supporting_evidence=[],
          accountability_only=accountability_only,
          limit=limit,
+     )
 +    dod = _load_dod(snapshot.active_dod_path, project_root=resolved_root)
 +    recent_verification = _recent_verification_summaries(
 +        timeline=snapshot.workflow_timeline,
 +        evidence=dod.evidence if dod else [],
 +        limit=1,
 +    )
      return WorkflowTimelineSnapshot(
          project_root=resolved_root,
          session_id=snapshot.session_id,
          is_current=snapshot.session_id == current_session_id,
 +        runtime_boundary_summary=_runtime_boundary_summary(
 +            snapshot.runtime_owner_type,
 +            snapshot.runtime_owner_path,
 +        ),
          runtime_owner_type=snapshot.runtime_owner_type,
          runtime_owner_path=snapshot.runtime_owner_path,
          workflow_mode=snapshot.workflow_mode,
          current_task=snapshot.current_task,
 +        verification_state_summary=_verification_state_summary(
 +            recent_verification,
 +            fallback_status=(dod.last_verification_result if dod is not None else None),
 +        ),
          total_entries=projection.total_entries,
          latest_policy_summary=projection.latest_policy_summary,
          latest_policy_supporting_evidence=(
      return summaries
 +def _runtime_boundary_summary(
 +    owner_type: str | None,
 +    owner_path: str | None,
 +) -> str | None:
 +    return format_runtime_boundary_label(owner_type, owner_path)
++
++
 +def _verification_state_summary(
 +    recent_verification: list[VerificationSummary],
 +    *,
 +    fallback_status: str | None = None,
 +) -> str | None:
 +    if recent_verification:
 +        item = recent_verification[0]
 +        parts = [item.status]
 +        if item.attempt:
 +            parts.append(f"({item.attempt})")
 +        summary = " ".join(parts)
 +        if item.command:
 +            summary += f" for {item.command}"
 +        return summary
 +    if fallback_status:
 +        return fallback_status
 +    return None
++
++
  def _last_verification_result(
      *,
      dod: DefinitionOfDone | None,

src/loader/runtime/owner_metadata.pymodified

      return normalized_path or normalized_type
 +def classify_runtime_owner_boundary(
 +    owner_type: str | None,
 +    owner_path: str | None,
 +) -> str | None:
 +    """Classify the persisted owner boundary for operator surfaces."""
++
 +    normalized_type = normalize_runtime_owner_type(owner_type)
 +    normalized_path = normalize_runtime_owner_path(owner_path, owner_type=normalized_type)
 +    if normalized_path == "runtime-handle" or normalized_type == "RuntimeHandle":
 +        return "runtime-first"
 +    if normalized_path == "public-agent" or normalized_type == "Agent":
 +        return "public-compat"
 +    return None
++
++
 +def format_runtime_boundary_label(
 +    owner_type: str | None,
 +    owner_path: str | None,
 +) -> str | None:
 +    """Render one concise operator-facing runtime boundary label."""
++
 +    boundary = classify_runtime_owner_boundary(owner_type, owner_path)
 +    owner = format_runtime_owner_label(owner_type, owner_path)
 +    if boundary and owner:
 +        return f"{boundary} via {owner}"
 +    return boundary or owner
++
++
  def _camel_to_kebab(value: str) -> str:
      """Convert one CamelCase-ish class name into kebab-case."""

tests/test_inspection.pymodified

      assert snapshot.last_verification_result == "failed"
      assert snapshot.active_dod_path == dod_path
      assert snapshot.permission_mode == "prompt"
 +    assert snapshot.runtime_boundary_summary == "runtime-first via runtime-handle (RuntimeHandle)"
      assert snapshot.runtime_owner_type == "RuntimeHandle"
      assert snapshot.runtime_owner_path == "runtime-handle"
      assert snapshot.permission_rule_counts == {"allow": 1, "deny": 2, "ask": 1}
      assert [item.status for item in snapshot.recent_verification] == ["failed"]
      assert [item.command for item in snapshot.recent_verification] == ["pytest -q"]
      assert [item.detail for item in snapshot.recent_verification] == ["1 failed"]
 +    assert snapshot.verification_state_summary == "failed for pytest -q"
      assert len(sessions) == 1
      assert sessions[0].session_id == session_id
      assert sessions[0].is_current is True
      assert sessions[0].runtime_owner_type == "RuntimeHandle"
      assert sessions[0].runtime_owner_path == "runtime-handle"
 +    assert sessions[0].runtime_boundary_summary == (
 +        "runtime-first via runtime-handle (RuntimeHandle)"
 +    )
      assert sessions[0].dod_status == "fixing"
      assert sessions[0].permission_prompting_enabled is True
      assert sessions[0].permission_rule_counts == {"allow": 1, "deny": 2, "ask": 1}
      assert detail.is_current is True
      assert detail.snapshot.runtime_owner_type == "RuntimeHandle"
      assert detail.snapshot.runtime_owner_path == "runtime-handle"
 +    assert detail.runtime_boundary_summary == (
 +        "runtime-first via runtime-handle (RuntimeHandle)"
 +    )
 +    assert detail.verification_state_summary == "failed for pytest -q"
      assert detail.definition_of_done is not None
      assert detail.definition_of_done.status == "fixing"
      assert detail.snapshot.permission_rules_source == str(
      assert snapshot.is_current is True
      assert snapshot.runtime_owner_type == "RuntimeHandle"
      assert snapshot.runtime_owner_path == "runtime-handle"
 +    assert snapshot.runtime_boundary_summary == (
 +        "runtime-first via runtime-handle (RuntimeHandle)"
 +    )
      assert snapshot.workflow_mode == "execute"
      assert snapshot.current_task == "Fix the failing tests"
 +    assert snapshot.verification_state_summary == "failed for pytest -q"
      assert snapshot.total_entries == 2
      assert [entry.kind for entry in snapshot.entries] == ["handoff", "reentry"]
      assert snapshot.entries[-1].reason_code == "verification_failed_reentry"
          "uv run pytest -q"
+     ]
      assert [item.attempt for item in snapshot.recent_verification] == ["attempt 2"]
 +    assert snapshot.verification_state_summary == (
 +        "pending (attempt 2) for uv run pytest -q"
 +    )
  def test_collect_status_snapshot_surfaces_planned_verification(
      assert [item.detail for item in snapshot.recent_verification] == [
          "write changed src/loader/runtime/tool_batches.py"
+     ]
 +    assert snapshot.verification_state_summary == (
 +        "planned (attempt 3) for uv run pytest -q"
 +    )
  def test_collect_status_snapshot_surfaces_stale_verification(
      assert [item.detail for item in snapshot.recent_verification] == [
          "write changed src/loader/runtime/finalization.py"
+     ]
 +    assert snapshot.verification_state_summary == (
 +        "stale (attempt 1 -> attempt 2) for uv run pytest -q"
 +    )
  def test_collect_prompt_diff_uses_persisted_prompt_history(temp_dir: Path) -> None:
      assert session_id in status_result.output
      assert "fixing" in status_result.output
      assert "Runtime Owner" in status_result.output
 +    assert "Boundary" in status_result.output
      assert "runtime-handle (RuntimeHandle)" in status_result.output
 +    assert "runtime-first via runtime-handle (RuntimeHandle)" in status_result.output
      assert "1 allow / 2 deny / 1 ask" in status_result.output
      assert "native" in status_result.output
      assert "Runtime Config, Workflow Context, Mode Guidance" in status_result.output
      assert "What file did you mention?" in status_result.output
      assert "pytest -q" in status_result.output
      assert "1 failed" in status_result.output
 +    assert "Verification State" in status_result.output
 +    assert "failed for pytest -q" in status_result.output
      assert list_result.exit_code == 0
      assert session_id in list_result.output
      assert "Runtime Owner" in list_result.output
 +    assert "Boundary" in list_result.output
      assert "runtime-handle (RuntimeHandle)" in list_result.output
 +    assert "runtime-first via runtime-handle (RuntimeHandle)" in list_result.output
      assert "1 allow / 2 deny / 1 ask" in list_result.output
      assert "prompting enabled" in list_result.output
      assert "native" in list_result.output
      assert show_result.exit_code == 0
      assert session_id in show_result.output
      assert "Runtime Owner" in show_result.output
 +    assert "Boundary" in show_result.output
      assert "runtime-handle (RuntimeHandle)" in show_result.output
 +    assert "runtime-first via runtime-handle (RuntimeHandle)" in show_result.output
      assert "Patch the broken parser" in show_result.output
      assert "1 allow / 2 deny / 1 ask" in show_result.output
      assert "enabled" in show_result.output
      assert "Completion Decision" in show_result.output
      assert "Completion Trace" in show_result.output
      assert "Recent Verification" in show_result.output
 +    assert "Verification State" in show_result.output
 +    assert "failed for pytest -q" in show_result.output
      assert "continuation_check" in show_result.output
      assert "completion -> finalize" in show_result.output
      assert "Finalizing completed turn" in show_result.output
      assert "Workflow Timeline" in workflow_result.output
      assert session_id in workflow_result.output
      assert "Runtime Owner" in workflow_result.output
 +    assert "Boundary" in workflow_result.output
      assert "runtime-handle (RuntimeHandle)" in workflow_result.output
 +    assert "runtime-first via runtime-handle (RuntimeHandle)" in workflow_result.output
 +    assert "Verification State" in workflow_result.output
 +    assert "failed for pytest -q" in workflow_result.output
      assert "handoff" in workflow_result.output
      assert "next=verify" in workflow_result.output
      assert "verification_stale" in result.output
      assert "policy-outcome=stale" in result.output
      assert "Observed Verification" in result.output
 +    assert "Verification State" in result.output
 +    assert "stale (attempt 1 -> attempt 2) for uv run pytest -q" in result.output
      assert "uv run pytest -q" in result.output
      assert "new mutating work" in result.output
      assert "verification_planned" in result.output
      assert "policy-outcome=planned" in result.output
      assert "Observed Verification" in result.output
 +    assert "Verification State" in result.output
 +    assert "planned (attempt 3) for uv run pytest -q" in result.output
      assert "verification planned for `uv run pytest -q`" in result.output
      assert "uv run pytest -q" in result.output