`e84c07c`

Add persisted workflow ledger core

Authored by

espadonne 1 month ago

SHA: e84c07cb6dad81923462c971ff40fe61ff85c7f1
Parents: 6a4ef72
Tree: b991c70

4 changed files

Status	File	+	-
M	`src/loader/runtime/session.py`	26	1
A	`src/loader/runtime/workflow_ledger.py`	336	0
M	`tests/test_session_state.py`	53	0
A	`tests/test_workflow_ledger.py`	63	0

src/loader/runtime/session.pymodified

      compact_session_messages,
      estimate_message_tokens,
  )
++from .workflow_ledger import WorkflowLedger
  from .workflow_policy import WorkflowTimelineEntry
--SESSION_VERSION = 5
++SESSION_VERSION = 6
  DEFAULT_ROTATE_AFTER_BYTES = 256 * 1024
  MAX_ROTATED_FILES = 3
  _UNSET = object()
      return entries
++def normalize_workflow_ledger(value: Any) -> WorkflowLedger:
++    """Coerce persisted workflow-ledger state."""
++
++    if isinstance(value, WorkflowLedger):
++        return value.copy()
++    if isinstance(value, dict):
++        return WorkflowLedger.from_dict(value)
++    return WorkflowLedger()
++
++
  @dataclass(slots=True)
  class SessionCompaction:
      """Metadata describing the latest transcript compaction."""
      last_turn_transition_kind: str | None = None
      last_turn_transition_reason_code: str | None = None
      workflow_timeline: list[WorkflowTimelineEntry] = field(default_factory=list)
++    workflow_ledger: WorkflowLedger = field(default_factory=WorkflowLedger)
      compaction: SessionCompaction | None = None
      version: int = SESSION_VERSION
              "last_turn_transition_kind": self.last_turn_transition_kind,
              "last_turn_transition_reason_code": self.last_turn_transition_reason_code,
              "workflow_timeline": [entry.to_dict() for entry in self.workflow_timeline],
++            "workflow_ledger": self.workflow_ledger.to_dict(),
              "compaction": self.compaction.to_dict() if self.compaction else None,
          }
              workflow_timeline=normalize_workflow_timeline(
                  data.get("workflow_timeline")
              ),
++            workflow_ledger=normalize_workflow_ledger(data.get("workflow_ledger")),
              compaction=(
                  SessionCompaction.from_dict(data["compaction"])
                  if data.get("compaction")
      last_turn_transition_kind: str | None = None
      last_turn_transition_reason_code: str | None = None
      workflow_timeline: list[WorkflowTimelineEntry] = field(default_factory=list)
++    workflow_ledger: WorkflowLedger = field(default_factory=WorkflowLedger)
      compaction: SessionCompaction | None = None
      rotate_after_bytes: int = DEFAULT_ROTATE_AFTER_BYTES
      max_rotated_files: int = MAX_ROTATED_FILES
          self.last_turn_transition_kind = None
          self.last_turn_transition_reason_code = None
          self.workflow_timeline = []
++        self.workflow_ledger = WorkflowLedger()
          self.compaction = None
          self.usage_totals = {}
          self.touch()
          self.touch()
          self.persist()
++    def update_workflow_ledger(self, ledger: WorkflowLedger) -> None:
++        """Replace persisted workflow-ledger state."""
++
++        self.workflow_ledger = normalize_workflow_ledger(ledger)
++        self.touch()
++        self.persist()
++
      def maybe_compact(self) -> SessionCompactionResult | None:
          """Compact the transcript when the current request grows too large."""
              last_turn_transition_kind=self.last_turn_transition_kind,
              last_turn_transition_reason_code=self.last_turn_transition_reason_code,
              workflow_timeline=list(self.workflow_timeline),
++            workflow_ledger=self.workflow_ledger.copy(),
              compaction=self.compaction,
          )
          return self.store.save(snapshot)
              snapshot.last_turn_transition_reason_code
          )
          instance.workflow_timeline = list(snapshot.workflow_timeline)
++        instance.workflow_ledger = snapshot.workflow_ledger.copy()
          instance.compaction = snapshot.compaction
          instance.rotate_after_bytes = rotate_after_bytes
          instance.max_rotated_files = max_rotated_files

src/loader/runtime/workflow_ledger.pyadded

++"""Durable workflow ledger state for assumptions, anchors, and boundaries."""
++
++from __future__ import annotations
++
++import re
++from dataclasses import dataclass, field
++from typing import TYPE_CHECKING, Any
++
++if TYPE_CHECKING:
++    from .workflow import ClarifyBrief
++    from .workflow_policy import ArtifactFreshness
++
++
++@dataclass(slots=True)
++class WorkflowLedgerItem:
++    """One durable workflow-ledger item."""
++
++    text: str
++    status: str
++    introduced_phase: str
++    updated_phase: str | None = None
++    evidence: list[str] = field(default_factory=list)
++
++    def to_dict(self) -> dict[str, Any]:
++        return {
++            "text": self.text,
++            "status": self.status,
++            "introduced_phase": self.introduced_phase,
++            "updated_phase": self.updated_phase,
++            "evidence": list(self.evidence),
++        }
++
++    @classmethod
++    def from_dict(cls, data: dict[str, Any]) -> WorkflowLedgerItem:
++        return cls(
++            text=str(data.get("text", "")).strip(),
++            status=str(data.get("status", "open")).strip() or "open",
++            introduced_phase=str(data.get("introduced_phase", "unknown")).strip()
++            or "unknown",
++            updated_phase=_optional_text(data.get("updated_phase")),
++            evidence=[str(item).strip() for item in data.get("evidence", []) if str(item).strip()],
++        )
++
++    def with_evidence(self, summary: str, *, phase: str, status: str | None = None) -> None:
++        """Update one item with fresh evidence."""
++
++        cleaned = summary.strip()
++        if cleaned and cleaned not in self.evidence:
++            self.evidence.append(cleaned)
++        if status is not None:
++            self.status = status
++        self.updated_phase = phase
++
++
++@dataclass(slots=True)
++class WorkflowLedger:
++    """Persisted semantic workflow state used for inspection and recovery."""
++
++    assumptions: list[WorkflowLedgerItem] = field(default_factory=list)
++    acceptance_anchors: list[WorkflowLedgerItem] = field(default_factory=list)
++    decision_boundaries: list[WorkflowLedgerItem] = field(default_factory=list)
++
++    def to_dict(self) -> dict[str, Any]:
++        return {
++            "assumptions": [item.to_dict() for item in self.assumptions],
++            "acceptance_anchors": [item.to_dict() for item in self.acceptance_anchors],
++            "decision_boundaries": [item.to_dict() for item in self.decision_boundaries],
++        }
++
++    @classmethod
++    def from_dict(cls, data: dict[str, Any]) -> WorkflowLedger:
++        return cls(
++            assumptions=_items_from_dict(data.get("assumptions")),
++            acceptance_anchors=_items_from_dict(data.get("acceptance_anchors")),
++            decision_boundaries=_items_from_dict(data.get("decision_boundaries")),
++        )
++
++    def copy(self) -> WorkflowLedger:
++        """Return a detached copy safe for mutation."""
++
++        return WorkflowLedger.from_dict(self.to_dict())
++
++    def has_items(self) -> bool:
++        """Return whether any semantic ledger state exists."""
++
++        return bool(
++            self.assumptions
++            or self.acceptance_anchors
++            or self.decision_boundaries
++        )
++
++
++def seed_workflow_ledger_from_brief(
++    ledger: WorkflowLedger,
++    brief: ClarifyBrief,
++    *,
++    phase: str = "clarify",
++) -> WorkflowLedger:
++    """Merge clarify-brief semantics into the durable workflow ledger."""
++
++    next_ledger = ledger.copy()
++    _merge_text_items(
++        next_ledger.assumptions,
++        brief.assumptions,
++        status="open",
++        phase=phase,
++    )
++    _merge_text_items(
++        next_ledger.acceptance_anchors,
++        brief.acceptance_criteria,
++        status="active",
++        phase=phase,
++    )
++    _merge_text_items(
++        next_ledger.decision_boundaries,
++        brief.decision_boundaries,
++        status="tracked",
++        phase=phase,
++    )
++    return next_ledger
++
++
++def seed_workflow_ledger_from_acceptance_criteria(
++    ledger: WorkflowLedger,
++    acceptance_criteria: list[str],
++    *,
++    phase: str = "plan",
++) -> WorkflowLedger:
++    """Merge acceptance anchors discovered during planning or verification."""
++
++    next_ledger = ledger.copy()
++    _merge_text_items(
++        next_ledger.acceptance_anchors,
++        acceptance_criteria,
++        status="active",
++        phase=phase,
++    )
++    return next_ledger
++
++
++def apply_freshness_to_workflow_ledger(
++    ledger: WorkflowLedger,
++    freshness: ArtifactFreshness,
++    *,
++    phase: str = "recovery",
++) -> WorkflowLedger:
++    """Apply drift evidence to the durable workflow ledger."""
++
++    next_ledger = ledger.copy()
++    for evidence in freshness.evidence:
++        summary = evidence.summary.strip()
++        if not summary:
++            continue
++
++        if evidence.kind == "contradicted_assumption":
++            item = _find_best_match(next_ledger.assumptions, summary)
++            if item is None:
++                item = WorkflowLedgerItem(
++                    text=_extract_focus_text(summary),
++                    status="contradicted",
++                    introduced_phase=phase,
++                )
++                next_ledger.assumptions.append(item)
++            item.with_evidence(summary, phase=phase, status="contradicted")
++            continue
++
++        if evidence.kind in {"acceptance_anchor", "verification_contradiction"}:
++            item = _find_best_match(next_ledger.acceptance_anchors, summary)
++            if item is None:
++                item = WorkflowLedgerItem(
++                    text=_extract_focus_text(summary),
++                    status="changed",
++                    introduced_phase=phase,
++                )
++                next_ledger.acceptance_anchors.append(item)
++            item.with_evidence(summary, phase=phase, status="changed")
++            continue
++
++        if evidence.kind == "task_boundary_change":
++            item = _find_best_match(next_ledger.decision_boundaries, summary)
++            if item is None:
++                item = WorkflowLedgerItem(
++                    text=_extract_focus_text(summary),
++                    status="reopened",
++                    introduced_phase=phase,
++                )
++                next_ledger.decision_boundaries.append(item)
++            item.with_evidence(summary, phase=phase, status="reopened")
++
++    return next_ledger
++
++
++def workflow_ledger_highlights(ledger: WorkflowLedger) -> list[str]:
++    """Return concise operator-facing highlights for one ledger."""
++
++    highlights: list[str] = []
++    contradicted = [item.text for item in ledger.assumptions if item.status == "contradicted"]
++    changed_anchors = [
++        item.text for item in ledger.acceptance_anchors if item.status == "changed"
++    ]
++    reopened = [
++        item.text for item in ledger.decision_boundaries if item.status == "reopened"
++    ]
++    if contradicted:
++        highlights.append(f"Contradicted assumptions: {', '.join(contradicted[:2])}")
++    if changed_anchors:
++        highlights.append(f"Changed acceptance anchors: {', '.join(changed_anchors[:2])}")
++    if reopened:
++        highlights.append(f"Reopened boundaries: {', '.join(reopened[:2])}")
++    return highlights
++
++
++def _items_from_dict(value: Any) -> list[WorkflowLedgerItem]:
++    if not isinstance(value, list):
++        return []
++    items: list[WorkflowLedgerItem] = []
++    for raw in value:
++        if not isinstance(raw, dict):
++            continue
++        item = WorkflowLedgerItem.from_dict(raw)
++        if item.text:
++            items.append(item)
++    return items
++
++
++def _merge_text_items(
++    items: list[WorkflowLedgerItem],
++    values: list[str],
++    *,
++    status: str,
++    phase: str,
++) -> None:
++    for text in values:
++        normalized = _normalized_text(text)
++        if not normalized:
++            continue
++        existing = _find_exact_match(items, normalized)
++        if existing is not None:
++            existing.updated_phase = phase
++            continue
++        items.append(
++            WorkflowLedgerItem(
++                text=text.strip(),
++                status=status,
++                introduced_phase=phase,
++            )
++        )
++
++
++def _find_exact_match(
++    items: list[WorkflowLedgerItem],
++    normalized_text: str,
++) -> WorkflowLedgerItem | None:
++    for item in items:
++        if _normalized_text(item.text) == normalized_text:
++            return item
++    return None
++
++
++def _find_best_match(
++    items: list[WorkflowLedgerItem],
++    summary: str,
++) -> WorkflowLedgerItem | None:
++    normalized_summary = _normalized_text(summary)
++    summary_tokens = _semantic_tokens(summary)
++    best_item: WorkflowLedgerItem | None = None
++    best_score = 0
++
++    for item in items:
++        normalized_item = _normalized_text(item.text)
++        if not normalized_item:
++            continue
++        if normalized_item in normalized_summary or normalized_summary in normalized_item:
++            return item
++
++        overlap = len(_semantic_tokens(item.text) & summary_tokens)
++        if overlap > best_score:
++            best_item = item
++            best_score = overlap
++
++    if best_score >= 2:
++        return best_item
++    return None
++
++
++def _extract_focus_text(summary: str) -> str:
++    quoted = re.findall(r"`([^`]+)`", summary)
++    if quoted:
++        return quoted[0].strip()
++    shortened = " ".join(summary.split()).strip()
++    if len(shortened) <= 96:
++        return shortened
++    return shortened[:93].rstrip() + "..."
++
++
++def _normalized_text(value: str | None) -> str:
++    if value is None:
++        return ""
++    return " ".join(re.sub(r"[`*_]+", "", str(value)).lower().split()).strip()
++
++
++def _optional_text(value: Any) -> str | None:
++    if value is None:
++        return None
++    text = str(value).strip()
++    return text or None
++
++
++def _semantic_tokens(text: str) -> set[str]:
++    return {
++        token
++        for token in re.findall(r"[a-z0-9_./-]+", _normalized_text(text))
++        if len(token) > 2 and token not in _STOP_WORDS
++    }
++
++
++_STOP_WORDS = {
++    "the",
++    "and",
++    "for",
++    "with",
++    "that",
++    "this",
++    "from",
++    "into",
++    "before",
++    "after",
++    "current",
++    "still",
++    "brief",
++    "plan",
++    "task",
++    "scope",
++    "exists",
++    "runtime",
++}

tests/test_session_state.pymodified

  from loader.agent.loop import Agent, AgentConfig, ReasoningConfig
  from loader.llm.base import CompletionResponse, Message, Role, ToolCall
  from loader.runtime.session import ConversationSession
++from loader.runtime.workflow_ledger import WorkflowLedger, WorkflowLedgerItem
  from loader.runtime.workflow_policy import WorkflowTimelineEntry
  from tests.helpers.runtime_harness import ScriptedBackend
+     ]
++def test_session_persists_workflow_ledger_state(temp_dir: Path) -> None:
++    session = ConversationSession(
++        system_message_factory=_dummy_system,
++        few_shot_factory=_dummy_few_shots,
++        project_root=temp_dir,
++    )
++
++    session.update_workflow_ledger(
++        WorkflowLedger(
++            assumptions=[
++                WorkflowLedgerItem(
++                    text="notes.txt stays out of scope unless clarified otherwise.",
++                    status="contradicted",
++                    introduced_phase="clarify",
++                    updated_phase="recovery",
++                    evidence=["Clarify scope assumed `notes.txt` stayed out of scope."],
++                )
++            ],
++            acceptance_anchors=[
++                WorkflowLedgerItem(
++                    text="notes.txt exists in the workspace root.",
++                    status="changed",
++                    introduced_phase="clarify",
++                    updated_phase="recovery",
++                )
++            ],
++            decision_boundaries=[
++                WorkflowLedgerItem(
++                    text="Escalate before broad UX changes.",
++                    status="tracked",
++                    introduced_phase="clarify",
++                )
++            ],
++        )
++    )
++
++    reloaded = ConversationSession.load(
++        project_root=temp_dir,
++        system_message_factory=_dummy_system,
++        few_shot_factory=_dummy_few_shots,
++        session_id=session.session_id,
++    )
++
++    assert reloaded is not None
++    assert reloaded.workflow_ledger.assumptions[0].status == "contradicted"
++    assert reloaded.workflow_ledger.assumptions[0].updated_phase == "recovery"
++    assert reloaded.workflow_ledger.acceptance_anchors[0].status == "changed"
++    assert reloaded.workflow_ledger.decision_boundaries[0].text == (
++        "Escalate before broad UX changes."
++    )
++
++
  @pytest.mark.asyncio
  async def test_turn_summary_usage_rolls_up_into_session_totals(temp_dir: Path) -> None:
      backend = ScriptedBackend(

tests/test_workflow_ledger.pyadded

++"""Tests for durable workflow-ledger semantics."""
++
++from __future__ import annotations
++
++from loader.runtime.workflow import ArtifactEvidence, ArtifactFreshness, ClarifyBrief
++from loader.runtime.workflow_ledger import (
++    WorkflowLedger,
++    apply_freshness_to_workflow_ledger,
++    seed_workflow_ledger_from_acceptance_criteria,
++    seed_workflow_ledger_from_brief,
++    workflow_ledger_highlights,
++)
++
++
++def test_workflow_ledger_seeds_from_brief_and_tracks_drift() -> None:
++    brief = ClarifyBrief(
++        task_statement="Keep the runtime artifact aligned with the actual work.",
++        assumptions=["notes.txt stays out of scope unless clarified otherwise."],
++        acceptance_criteria=["planned.txt exists in the workspace root."],
++        decision_boundaries=["Escalate before broad UX changes."],
++    )
++    brief.fill_defaults()
++
++    ledger = seed_workflow_ledger_from_brief(WorkflowLedger(), brief)
++    ledger = seed_workflow_ledger_from_acceptance_criteria(
++        ledger,
++        ["planned.txt exists in the workspace root."],
++        phase="plan",
++    )
++    freshness = ArtifactFreshness(
++        evidence=[
++            ArtifactEvidence(
++                kind="contradicted_assumption",
++                summary="Clarify scope assumed `notes.txt` stayed out of scope.",
++            ),
++            ArtifactEvidence(
++                kind="verification_contradiction",
++                summary=(
++                    "Failed verification exposed missing brief coverage for "
++                    "`notes.txt exists in the workspace root.`."
++                ),
++            ),
++            ArtifactEvidence(
++                kind="task_boundary_change",
++                summary="The active task framing outgrew the persisted clarify brief.",
++            ),
++        ]
++    )
++
++    updated = apply_freshness_to_workflow_ledger(ledger, freshness)
++
++    assert any(
++        item.text == "notes.txt stays out of scope unless clarified otherwise."
++        and item.status == "contradicted"
++        for item in updated.assumptions
++    )
++    assert any(
++        "notes.txt exists in the workspace root." in item.text
++        and item.status == "changed"
++        for item in updated.acceptance_anchors
++    )
++    assert any(item.status == "reopened" for item in updated.decision_boundaries)
++    assert workflow_ledger_highlights(updated)