`82a0ca7`

Add typed workflow signal extraction

Authored by

espadonne 1 month ago

SHA: 82a0ca7faa669e27386a80e93191ea14ec673abd
Parents: 1aaccad
Tree: a0cc95f

6 changed files

Status	File	+	-
M	`src/loader/runtime/conversation.py`	35	22
M	`src/loader/runtime/workflow.py`	3	0
M	`src/loader/runtime/workflow_policy.py`	71	25
A	`src/loader/runtime/workflow_signals.py`	221	0
M	`tests/test_workflow_policy.py`	20	0
A	`tests/test_workflow_signals.py`	55	0

src/loader/runtime/conversation.pymodified

      WorkflowDecisionKind,
      WorkflowMode,
      WorkflowPolicy,
++    WorkflowSignalExtractor,
      WorkflowTimelineEntry,
      WorkflowTimelineEntryKind,
      build_execute_bridge,
          self.tracer = RuntimeTracer()
          self.executor: ToolExecutor | None = None
          self.dod_store = DefinitionOfDoneStore(agent.project_root)
--        self.workflow_policy = WorkflowPolicy()
++        self.workflow_signals = WorkflowSignalExtractor()
++        self.workflow_policy = WorkflowPolicy(self.workflow_signals)
          self.artifact_store = WorkflowArtifactStore(agent.project_root)
          self.turn_requester = AssistantTurnRequester(agent, self.tracer)
          self.tool_batches = ToolBatchRunner(agent, self.dod_store)
          requested_mode: str | None,
      ) -> str:
          requested = WorkflowMode.from_str(requested_mode)
--        decision = self.workflow_policy.route(
++        decision = self.workflow_policy.route_from_signals(
--            task,
++            self.workflow_signals.extract_route_signals(
--            requested_mode=requested,
++                task,
--            has_brief=self._artifact_exists(dod.clarify_brief),
++                requested_mode=requested.value if requested is not None else None,
--            has_plan=self._artifact_exists(dod.implementation_plan)
++                has_brief=self._artifact_exists(dod.clarify_brief),
--            and self._artifact_exists(dod.verification_plan),
++                has_plan=self._artifact_exists(dod.implementation_plan)
++                and self._artifact_exists(dod.verification_plan),
++                timeline=self.agent.session.workflow_timeline,
++            )
+         )
          await self._set_workflow_mode(
              decision,
                  summary=summary,
                  on_user_question=on_user_question,
+             )
--            decision = self.workflow_policy.route(
++            decision = self.workflow_policy.route_from_signals(
--                task,
++                self.workflow_signals.extract_route_signals(
--                has_brief=self._artifact_exists(dod.clarify_brief),
++                    task,
--                has_plan=self._artifact_exists(dod.implementation_plan)
++                    has_brief=self._artifact_exists(dod.clarify_brief),
--                and self._artifact_exists(dod.verification_plan),
++                    has_plan=self._artifact_exists(dod.implementation_plan)
--                allow_clarify=False,
++                    and self._artifact_exists(dod.verification_plan),
--                unresolved_questions=clarify_review.unresolved_questions,
++                    allow_clarify=False,
++                    unresolved_questions=clarify_review.unresolved_questions,
++                    timeline=self.agent.session.workflow_timeline,
++                )
+             )
              await self._set_workflow_mode(
                  decision.with_context(
          if not freshness.stale_plan:
              return False
--        decision = self.workflow_policy.route(
++        decision = self.workflow_policy.route_from_signals(
--            task,
++            self.workflow_signals.extract_route_signals(
--            has_brief=self._artifact_exists(dod.clarify_brief),
++                task,
--            has_plan=True,
++                has_brief=self._artifact_exists(dod.clarify_brief),
--            allow_clarify=False,
++                has_plan=True,
--            stale_plan=True,
++                allow_clarify=False,
--            verification_pressure=bool(dod.retry_count or dod.last_verification_result == "failed"),
++                stale_plan=True,
--            unresolved_questions=freshness.reasons,
++                verification_pressure=bool(
++                    dod.retry_count or dod.last_verification_result == "failed"
++                ),
++                unresolved_questions=freshness.reasons,
++                timeline=self.agent.session.workflow_timeline,
++            )
+         )
          await self._set_workflow_mode(
              decision,

src/loader/runtime/workflow.pymodified

      WorkflowTimelineEntry,
      WorkflowTimelineEntryKind,
  )
++from .workflow_signals import WorkflowSignalExtractor, WorkflowSignalPacket
  __all__ = [
      "ArtifactFreshness",
      "WorkflowDecisionKind",
      "WorkflowMode",
      "WorkflowPolicy",
++    "WorkflowSignalExtractor",
++    "WorkflowSignalPacket",
      "WorkflowTimelineEntry",
      "WorkflowTimelineEntryKind",
      "build_execute_bridge",

src/loader/runtime/workflow_policy.pymodified

  from pathlib import Path
  from typing import Any
++from .workflow_signals import WorkflowSignalExtractor, WorkflowSignalPacket
++
  class WorkflowMode(StrEnum):
      """High-level runtime modes for one Loader task turn."""
      scheduled_next_mode: WorkflowMode | None = None
      unresolved_questions: list[str] = field(default_factory=list)
      pressure_summary: list[str] = field(default_factory=list)
++    signal_summary: list[str] = field(default_factory=list)
      @property
      def reason(self) -> str:
          scheduled_next_mode: WorkflowMode | None = None,
          unresolved_questions: list[str] | None = None,
          pressure_summary: list[str] | None = None,
++        signal_summary: list[str] | None = None,
      ) -> ModeDecision:
          """Build a non-router workflow decision for handoffs and reentry."""
              scheduled_next_mode=scheduled_next_mode,
              unresolved_questions=list(unresolved_questions or []),
              pressure_summary=list(pressure_summary or []),
++            signal_summary=list(signal_summary or []),
          )
      def with_context(
          scheduled_next_mode: WorkflowMode | None = None,
          unresolved_questions: list[str] | None = None,
          pressure_summary: list[str] | None = None,
++        signal_summary: list[str] | None = None,
      ) -> ModeDecision:
          """Return a copy with updated contextual routing metadata."""
                  if pressure_summary is None
                  else pressure_summary
              ),
++            signal_summary=list(
++                self.signal_summary if signal_summary is None else signal_summary
++            ),
          )
      runner_up_score: float | None = None
      scheduled_next_mode: str | None = None
      unresolved_questions: list[str] = field(default_factory=list)
++    signal_summary: list[str] = field(default_factory=list)
      prompt_format: str | None = None
      prompt_sections: list[str] = field(default_factory=list)
      artifact_paths: list[str] = field(default_factory=list)
              "runner_up_score": self.runner_up_score,
              "scheduled_next_mode": self.scheduled_next_mode,
              "unresolved_questions": list(self.unresolved_questions),
++            "signal_summary": list(self.signal_summary),
              "prompt_format": self.prompt_format,
              "prompt_sections": list(self.prompt_sections),
              "artifact_paths": list(self.artifact_paths),
              runner_up_score=_optional_float(data.get("runner_up_score")),
              scheduled_next_mode=_optional_text(data.get("scheduled_next_mode")),
              unresolved_questions=_string_list(data.get("unresolved_questions")),
++            signal_summary=_string_list(data.get("signal_summary")),
              prompt_format=_optional_text(data.get("prompt_format")),
              prompt_sections=_string_list(data.get("prompt_sections")),
              artifact_paths=_string_list(data.get("artifact_paths")),
                  else None
              ),
              unresolved_questions=list(decision.unresolved_questions),
++            signal_summary=list(decision.signal_summary),
              prompt_format=prompt_format,
              prompt_sections=list(prompt_sections or []),
              artifact_paths=list(artifact_paths or []),
      clarify_threshold = 0.55
      plan_threshold = 0.45
++    def __init__(self, signal_extractor: WorkflowSignalExtractor | None = None) -> None:
++        self.signal_extractor = signal_extractor or WorkflowSignalExtractor()
++
      def route(
          self,
          task: str,
          mutating_history: bool = False,
          stale_plan: bool = False,
          unresolved_questions: list[str] | None = None,
++        timeline: list[WorkflowTimelineEntry] | None = None,
      ) -> ModeDecision:
--        unresolved_questions = list(unresolved_questions or [])
++        signals = self.signal_extractor.extract_route_signals(
++            task,
++            requested_mode=requested_mode.value if requested_mode is not None else None,
++            has_brief=has_brief,
++            has_plan=has_plan,
++            allow_clarify=allow_clarify,
++            verification_pressure=verification_pressure,
++            mutating_history=mutating_history,
++            stale_plan=stale_plan,
++            unresolved_questions=unresolved_questions,
++            timeline=timeline,
++        )
++        return self.route_from_signals(signals)
++
++    def route_from_signals(self, signals: WorkflowSignalPacket) -> ModeDecision:
++        """Route from a typed workflow-signal packet."""
++
++        requested_mode = WorkflowMode.from_str(signals.requested_mode)
          if requested_mode is not None:
              return ModeDecision(
                  mode=requested_mode,
                      if requested_mode in {WorkflowMode.CLARIFY, WorkflowMode.PLAN}
                      else None
                  ),
++                signal_summary=list(signals.signal_summary),
              )
--        if stale_plan:
++        if signals.stale_artifact_pressure > 0:
              return ModeDecision(
                  mode=WorkflowMode.PLAN,
                  reason_code="stale_plan_artifacts",
                  runner_up_mode=WorkflowMode.EXECUTE,
                  runner_up_score=0.6,
                  scheduled_next_mode=WorkflowMode.EXECUTE,
--                unresolved_questions=unresolved_questions,
++                unresolved_questions=list(signals.unresolved_questions),
                  pressure_summary=[
                      "plan refresh pressure: stale artifacts require a refreshed plan",
                      "execute pressure: continue directly with the stale artifacts",
                  ],
++                signal_summary=list(signals.signal_summary),
              )
--        if has_plan:
++        if signals.has_plan:
              return ModeDecision(
                  mode=WorkflowMode.EXECUTE,
                  reason_code="existing_plan_artifacts",
                  route_score=0.9,
                  runner_up_mode=WorkflowMode.PLAN,
                  runner_up_score=0.45,
--                unresolved_questions=unresolved_questions,
++                unresolved_questions=list(signals.unresolved_questions),
                  pressure_summary=[
                      "execute pressure: persisted plan artifacts already exist",
                      "plan pressure: a plan refresh is available but not required",
                  ],
++                signal_summary=list(signals.signal_summary),
              )
--        ambiguity = self._ambiguity_score(task)
++        ambiguity = signals.ambiguity_score
--        complexity = self._complexity_score(task)
++        complexity = signals.complexity_score
          clarify_pressure = ambiguity
--        if allow_clarify and not has_brief:
++        if signals.allow_clarify and not signals.has_brief:
              clarify_pressure += 0.15
--        if unresolved_questions:
++        if signals.unresolved_questions:
--            clarify_pressure += 0.12
++            clarify_pressure += min(0.12, 0.04 * len(signals.unresolved_questions))
          if complexity < 0.55:
              clarify_pressure += 0.05
--        if not allow_clarify:
++        if signals.recent_clarify_count and signals.unresolved_questions:
++            clarify_pressure += 0.04
++        if not signals.allow_clarify:
              clarify_pressure = 0.0
          plan_pressure = complexity
--        if verification_pressure:
++        plan_pressure += signals.verification_pressure
--            plan_pressure += 0.12
++        plan_pressure += signals.mutation_pressure
--        if mutating_history:
++        if signals.has_brief:
--            plan_pressure += 0.08
--        if has_brief:
              plan_pressure += 0.06
--        if unresolved_questions:
++        if signals.unresolved_questions:
              plan_pressure += 0.06
++        if signals.recent_reentry_count:
++            plan_pressure += 0.06
++        if signals.recent_plan_refresh_count:
++            plan_pressure += 0.04
          execute_pressure = 0.35
--        if has_brief:
++        if signals.has_brief:
              execute_pressure += 0.14
          if ambiguity < 0.35:
              execute_pressure += 0.16
          if complexity < 0.45:
              execute_pressure += 0.12
--        if not unresolved_questions:
++        if not signals.unresolved_questions:
              execute_pressure += 0.05
++        if signals.recent_verify_skip_count and not signals.verification_pressure:
++            execute_pressure += 0.03
          scores = {
              WorkflowMode.CLARIFY: round(min(clarify_pressure, 1.0), 3),
          if (
              winner == WorkflowMode.CLARIFY
              and winner_score >= self.clarify_threshold
--            and allow_clarify
++            and signals.allow_clarify
          ):
              return ModeDecision(
                  mode=WorkflowMode.CLARIFY,
                  runner_up_mode=runner_up,
                  runner_up_score=runner_up_score,
                  scheduled_next_mode=WorkflowMode.EXECUTE,
--                unresolved_questions=unresolved_questions,
++                unresolved_questions=list(signals.unresolved_questions),
                  pressure_summary=pressure_summary,
++                signal_summary=list(signals.signal_summary),
              )
          if winner == WorkflowMode.PLAN and winner_score >= self.plan_threshold:
              reason_code = (
                  "verification_pressure_requires_plan"
--                if verification_pressure
++                if signals.verification_pressure
                  else "task_is_complex"
              )
              reason_summary = (
                  "verification pressure and task complexity favor a persisted plan"
--                if verification_pressure
++                if signals.verification_pressure
                  else "workflow pressure favors a persisted plan before execution"
              )
              return ModeDecision(
                  runner_up_mode=runner_up,
                  runner_up_score=runner_up_score,
                  scheduled_next_mode=WorkflowMode.EXECUTE,
--                unresolved_questions=unresolved_questions,
++                unresolved_questions=list(signals.unresolved_questions),
                  pressure_summary=pressure_summary,
++                signal_summary=list(signals.signal_summary),
              )
          return ModeDecision(
              route_score=winner_score,
              runner_up_mode=runner_up,
              runner_up_score=runner_up_score,
--            unresolved_questions=unresolved_questions,
++            unresolved_questions=list(signals.unresolved_questions),
              pressure_summary=pressure_summary,
++            signal_summary=list(signals.signal_summary),
          )
      def review_clarify(

src/loader/runtime/workflow_signals.pyadded

++"""Typed workflow-signal extraction for runtime policy decisions."""
++
++from __future__ import annotations
++
++import re
++from dataclasses import dataclass, field
++from typing import TYPE_CHECKING
++
++if TYPE_CHECKING:
++    from .workflow_policy import WorkflowTimelineEntry
++
++
++@dataclass(slots=True)
++class WorkflowSignalPacket:
++    """Typed route context consumed by workflow policy."""
++
++    task: str
++    requested_mode: str | None = None
++    has_brief: bool = False
++    has_plan: bool = False
++    allow_clarify: bool = True
++    ambiguity_score: float = 0.0
++    complexity_score: float = 0.0
++    verification_pressure: float = 0.0
++    mutation_pressure: float = 0.0
++    artifact_reuse_pressure: float = 0.0
++    stale_artifact_pressure: float = 0.0
++    unresolved_questions: list[str] = field(default_factory=list)
++    recent_clarify_count: int = 0
++    recent_reentry_count: int = 0
++    recent_plan_refresh_count: int = 0
++    recent_verify_skip_count: int = 0
++    signal_summary: list[str] = field(default_factory=list)
++
++
++class WorkflowSignalExtractor:
++    """Build typed workflow-signal packets from runtime state."""
++
++    def extract_route_signals(
++        self,
++        task: str,
++        *,
++        requested_mode: str | None = None,
++        has_brief: bool = False,
++        has_plan: bool = False,
++        allow_clarify: bool = True,
++        verification_pressure: bool = False,
++        mutating_history: bool = False,
++        stale_plan: bool = False,
++        unresolved_questions: list[str] | None = None,
++        timeline: list[WorkflowTimelineEntry] | None = None,
++    ) -> WorkflowSignalPacket:
++        """Derive workflow signals from task state and recent timeline context."""
++
++        unresolved_questions = list(unresolved_questions or [])
++        recent_timeline = list(timeline or [])[-6:]
++        recent_clarify_count = sum(
++            1
++            for entry in recent_timeline
++            if entry.mode == "clarify" or entry.kind.startswith("clarify")
++        )
++        recent_reentry_count = sum(
++            1
++            for entry in recent_timeline
++            if entry.kind == "reentry" or entry.decision_kind == "reentry"
++        )
++        recent_plan_refresh_count = sum(
++            1
++            for entry in recent_timeline
++            if entry.kind == "plan_refresh" or "plan_refresh" in entry.reason_code
++        )
++        recent_verify_skip_count = sum(
++            1
++            for entry in recent_timeline
++            if entry.kind == "verify_skip"
++        )
++        ambiguity_score = self._ambiguity_score(task)
++        complexity_score = self._complexity_score(task)
++        verification_signal = 0.18 if verification_pressure else 0.0
++        mutation_signal = 0.12 if mutating_history else 0.0
++        artifact_reuse_signal = 0.2 if has_plan else 0.0
++        stale_artifact_signal = 0.45 if stale_plan else 0.0
++
++        signal_summary: list[str] = [
++            f"ambiguity={ambiguity_score:.2f}",
++            f"complexity={complexity_score:.2f}",
++        ]
++        if requested_mode:
++            signal_summary.append(f"requested_mode={requested_mode}")
++        if has_brief:
++            signal_summary.append("clarify_brief=available")
++        if has_plan:
++            signal_summary.append("plan_artifacts=available")
++        if stale_plan:
++            signal_summary.append("plan_artifacts=stale")
++        if unresolved_questions:
++            signal_summary.append(
++                f"open_questions={min(len(unresolved_questions), 9)}"
++            )
++        if verification_pressure:
++            signal_summary.append("verification_pressure=active")
++        if mutating_history:
++            signal_summary.append("mutation_pressure=active")
++        if recent_clarify_count:
++            signal_summary.append(f"recent_clarify={recent_clarify_count}")
++        if recent_reentry_count:
++            signal_summary.append(f"recent_reentry={recent_reentry_count}")
++        if recent_plan_refresh_count:
++            signal_summary.append(f"recent_plan_refresh={recent_plan_refresh_count}")
++        if recent_verify_skip_count:
++            signal_summary.append(f"recent_verify_skip={recent_verify_skip_count}")
++
++        return WorkflowSignalPacket(
++            task=task,
++            requested_mode=requested_mode,
++            has_brief=has_brief,
++            has_plan=has_plan,
++            allow_clarify=allow_clarify,
++            ambiguity_score=ambiguity_score,
++            complexity_score=complexity_score,
++            verification_pressure=verification_signal,
++            mutation_pressure=mutation_signal,
++            artifact_reuse_pressure=artifact_reuse_signal,
++            stale_artifact_pressure=stale_artifact_signal,
++            unresolved_questions=unresolved_questions,
++            recent_clarify_count=recent_clarify_count,
++            recent_reentry_count=recent_reentry_count,
++            recent_plan_refresh_count=recent_plan_refresh_count,
++            recent_verify_skip_count=recent_verify_skip_count,
++            signal_summary=signal_summary,
++        )
++
++    @staticmethod
++    def _ambiguity_score(task: str) -> float:
++        lowered = task.lower()
++        words = re.findall(r"\w+", lowered)
++        score = 0.0
++
++        if (
++            "--clarify" in lowered
++            or "don't assume" in lowered
++            or "do not assume" in lowered
++            or "not sure" in lowered
++            or "figure out" in lowered
++            or "interview me" in lowered
++            or "ask me" in lowered
++            or lowered.startswith("clarify ")
++        ):
++            score += 0.65
++
++        if any(
++            phrase in lowered
++            for phrase in (
++                "something",
++                "somehow",
++                "better",
++                "improve",
++                "fix this",
++                "make it",
++                "more like",
++                "feels more like",
++            )
++        ):
++            score += 0.2
++
++        if not _has_concrete_anchor(task):
++            score += 0.2
++
++        if len(words) <= 12 and any(
++            verb in lowered
++            for verb in ("build", "add", "improve", "refactor", "implement")
++        ):
++            score += 0.15
++
++        return round(min(score, 1.0), 3)
++
++    @staticmethod
++    def _complexity_score(task: str) -> float:
++        lowered = task.lower()
++        words = re.findall(r"\w+", lowered)
++        score = 0.0
++
++        if len(words) >= 18:
++            score += 0.2
++        if len(words) >= 30:
++            score += 0.15
++
++        if any(
++            phrase in lowered
++            for phrase in (
++                "refactor",
++                "architecture",
++                "migrate",
++                "persistent",
++                "workflow",
++                "deep dive",
++                "report",
++                "implementation plan",
++                "verification plan",
++            )
++        ):
++            score += 0.3
++
++        if lowered.count(" and ") >= 2 or lowered.count(",") >= 2:
++            score += 0.15
++
++        if _has_concrete_anchor(task):
++            score += 0.1
++
++        return round(min(score, 1.0), 3)
++
++
++def _has_concrete_anchor(task: str) -> bool:
++    return bool(
++        re.search(r"[./_\\-]", task)
++        or re.search(r"`[^`]+`", task)
++        or any(
++            token in task.lower()
++            for token in ("test", "file", "function", "class")
++        )
++    )

tests/test_workflow_policy.pymodified

      WorkflowTimelineEntry,
      WorkflowTimelineEntryKind,
+ )
++from loader.runtime.workflow_signals import WorkflowSignalPacket
  def test_workflow_policy_reports_winner_and_runner_up() -> None:
      assert decision.runner_up_mode is not None
      assert decision.runner_up_score > 0
      assert decision.pressure_summary
++    assert decision.signal_summary
++
++
++def test_workflow_policy_routes_from_typed_signal_packet() -> None:
++    policy = WorkflowPolicy()
++
++    decision = policy.route_from_signals(
++        WorkflowSignalPacket(
++            task="Keep improving Loader.",
++            ambiguity_score=0.62,
++            complexity_score=0.28,
++            allow_clarify=True,
++            signal_summary=["ambiguity=0.62", "complexity=0.28"],
++        )
++    )
++
++    assert decision.mode == WorkflowMode.CLARIFY
++    assert decision.signal_summary == ["ambiguity=0.62", "complexity=0.28"]
  def test_workflow_policy_prefers_plan_refresh_for_stale_plan() -> None:
          runner_up_score=0.66,
          scheduled_next_mode="execute",
          unresolved_questions=["Scope is still broad."],
++        signal_summary=["ambiguity=0.20", "complexity=0.81"],
          prompt_format="native",
          prompt_sections=["Runtime Config", "Workflow Context"],
          artifact_paths=["/tmp/implementation.md"],

tests/test_workflow_signals.pyadded

++"""Tests for typed workflow-signal extraction."""
++
++from __future__ import annotations
++
++from loader.runtime.workflow import (
++    WorkflowSignalExtractor,
++    WorkflowTimelineEntry,
++)
++
++
++def test_workflow_signal_extractor_captures_recent_timeline_pressure() -> None:
++    extractor = WorkflowSignalExtractor()
++    timeline = [
++        WorkflowTimelineEntry(
++            timestamp="2026-04-07T12:00:00Z",
++            kind="clarify_continue",
++            mode="clarify",
++            reason_code="clarify_follow_up_needed",
++            summary="clarify: clarify pressure remains high",
++            decision_kind="forced",
++        ),
++        WorkflowTimelineEntry(
++            timestamp="2026-04-07T12:01:00Z",
++            kind="reentry",
++            mode="execute",
++            reason_code="verification_failed_reentry",
++            summary="execute: verification failed; returning to execute",
++            decision_kind="reentry",
++        ),
++        WorkflowTimelineEntry(
++            timestamp="2026-04-07T12:02:00Z",
++            kind="verify_skip",
++            mode="verify",
++            reason_code="verification_not_required",
++            summary="verify: verification skipped",
++            decision_kind="forced",
++        ),
++    ]
++
++    signals = extractor.extract_route_signals(
++        "Improve Loader so it feels more like claw-code.",
++        has_brief=True,
++        unresolved_questions=["Scope is still broad."],
++        timeline=timeline,
++    )
++
++    assert signals.ambiguity_score > 0
++    assert signals.has_brief is True
++    assert signals.recent_clarify_count == 1
++    assert signals.recent_reentry_count == 1
++    assert signals.recent_verify_skip_count == 1
++    assert "clarify_brief=available" in signals.signal_summary
++    assert "open_questions=1" in signals.signal_summary
++    assert "recent_reentry=1" in signals.signal_summary
++