`68fd28c`

Add pressure-pass clarify reviews

Authored by

espadonne 1 month ago

SHA: 68fd28c6038d3de538f2b9e3a0e71b851f0895d0
Parents: 67878a4
Tree: 06f6b37

5 changed files

Status	File	+	-
M	`src/loader/runtime/clarify_strategy.py`	239	3
M	`src/loader/runtime/workflow_lanes.py`	30	3
M	`src/loader/runtime/workflow_policy.py`	69	2
M	`tests/test_clarify_strategy.py`	49	0
M	`tests/test_workflow_policy.py`	23	0

src/loader/runtime/clarify_strategy.pymodified

      LIKELY_TOUCHPOINTS = "likely_touchpoints"
 +class ClarifyStage(StrEnum):
 +    """High-level interview stage for bounded clarify mode."""
++
 +    INTENT = "intent"
 +    BOUNDARIES = "boundaries"
 +    READINESS = "readiness"
++
++
 +class ClarifyPressureKind(StrEnum):
 +    """Which kind of pressure pass the next clarify round should apply."""
++
 +    EXAMPLE = "example"
 +    TRADEOFF = "tradeoff"
 +    ASSUMPTION = "assumption"
++
++
  _DEFAULT_SLOT_ORDER = [
      ClarifySlot.DESIRED_OUTCOME,
      ClarifySlot.NON_GOALS,
      unresolved_slots: list[ClarifySlot] = field(default_factory=list)
      unresolved_questions: list[str] = field(default_factory=list)
      focus_slot: ClarifySlot | None = None
 +    stage: ClarifyStage = ClarifyStage.INTENT
 +    pressure_kind: ClarifyPressureKind | None = None
 +    pressure_pass_complete: bool = False
 +    missing_readiness_gates: list[str] = field(default_factory=list)
  def assess_clarify_snapshot(
      task: str,
      answer: str,
      snapshot: ClarifySnapshot,
 +    round_index: int = 1,
 +    pressure_pass_complete: bool = False,
  ) -> ClarifyAssessment:
      """Determine which clarify slots remain unresolved after one round."""
      normalized_answer = answer.strip()
      answer_is_short = len(re.findall(r"\w+", normalized_answer)) < 4
      answer_is_broad = _answer_uses_broad_language(normalized_answer)
 +    effective_pressure_pass_complete = (
 +        pressure_pass_complete or _answer_demonstrates_pressure_pass(normalized_answer)
 +    )
 +    missing_readiness_gates: list[str] = []
++
 +    non_goals_explicit = ClarifySlot.NON_GOALS.value in explicit and bool(
 +        [item for item in snapshot.non_goals if item.strip()]
 +    )
 +    decision_boundaries_explicit = ClarifySlot.DECISION_BOUNDARIES.value in explicit and bool(
 +        [item for item in snapshot.decision_boundaries if item.strip()]
 +    )
      if not normalized_answer:
          unresolved_questions.append(
          unresolved_questions.append(
              "The desired outcome is still not explicit enough to guide execution."
+         )
 -    if ClarifySlot.NON_GOALS.value not in explicit or any(
 +    if not non_goals_explicit or any(
          "anything not confirmed" in item.lower() for item in snapshot.non_goals
      ):
          unresolved_slots.append(ClarifySlot.NON_GOALS)
          unresolved_questions.append(
              "Constraints are still too implicit for a safe implementation pass."
+         )
 -    if ClarifySlot.DECISION_BOUNDARIES.value not in explicit:
 +    if not decision_boundaries_explicit:
          unresolved_slots.append(ClarifySlot.DECISION_BOUNDARIES)
          unresolved_questions.append(
              "Decision boundaries are still too fuzzy for autonomous execution."
          unresolved_questions.append(
              "The clarified scope still uses broad or ambiguous language."
+         )
++
 +    if not non_goals_explicit:
 +        missing_readiness_gates.append("non_goals")
 +    if not decision_boundaries_explicit:
 +        missing_readiness_gates.append("decision_boundaries")
 +    if round_index >= 2 and not effective_pressure_pass_complete:
 +        missing_readiness_gates.append("pressure_pass")
++
 +    pressure_kind = _choose_pressure_kind(
 +        round_index=round_index,
 +        answer_is_broad=answer_is_broad,
 +        missing_readiness_gates=missing_readiness_gates,
 +        pressure_pass_complete=effective_pressure_pass_complete,
 +        unresolved_slots=ordered_slots,
 +    )
 +    if pressure_kind == ClarifyPressureKind.EXAMPLE:
 +        unresolved_questions.append(
 +            "Loader still needs a concrete example or counterexample before planning."
 +        )
 +    elif pressure_kind == ClarifyPressureKind.TRADEOFF:
 +        unresolved_questions.append(
 +            "Loader still needs an explicit tradeoff or stop boundary before planning."
 +        )
 +    elif pressure_kind == ClarifyPressureKind.ASSUMPTION:
 +        unresolved_questions.append(
 +            "Loader still needs one challenged assumption before it should proceed."
 +        )
++
 +    stage = _resolve_stage(
 +        unresolved_slots=ordered_slots,
 +        missing_readiness_gates=missing_readiness_gates,
 +    )
      return ClarifyAssessment(
          unresolved_slots=ordered_slots,
          unresolved_questions=list(dict.fromkeys(unresolved_questions)),
          focus_slot=ordered_slots[0] if ordered_slots else None,
 +        stage=stage,
 +        pressure_kind=pressure_kind,
 +        pressure_pass_complete=effective_pressure_pass_complete,
 +        missing_readiness_gates=list(dict.fromkeys(missing_readiness_gates)),
+     )
 -def build_clarify_question(task: str, focus_slot: ClarifySlot | str | None) -> str:
 +def build_clarify_question(
 +    task: str,
 +    focus_slot: ClarifySlot | str | None,
 +    pressure_kind: ClarifyPressureKind | str | None = None,
 +) -> str:
      """Render one targeted question for the current clarify focus slot."""
      slot = (
          if focus_slot
          else ClarifySlot.DESIRED_OUTCOME
+     )
 +    pressure = (
 +        pressure_kind
 +        if isinstance(pressure_kind, ClarifyPressureKind)
 +        else ClarifyPressureKind(pressure_kind)
 +        if pressure_kind
 +        else None
 +    )
++
 +    if pressure == ClarifyPressureKind.EXAMPLE:
 +        prompts = {
 +            ClarifySlot.DESIRED_OUTCOME: (
 +                "What is one concrete example of the finished outcome, and one nearby "
 +                "result that should still count as out of scope?"
 +            ),
 +            ClarifySlot.NON_GOALS: (
 +                "What is one tempting broader change I should avoid even if it seems helpful?"
 +            ),
 +            ClarifySlot.ACCEPTANCE_CRITERIA: (
 +                "What concrete example would prove this is done, and what shortcut "
 +                "would still be wrong?"
 +            ),
 +            ClarifySlot.CONSTRAINTS: (
 +                "What is one concrete invariant I must preserve, and what change would violate it?"
 +            ),
 +            ClarifySlot.DECISION_BOUNDARIES: (
 +                "Give one example of a choice I may make alone and one example that "
 +                "should force me to stop and confirm."
 +            ),
 +            ClarifySlot.LIKELY_TOUCHPOINTS: (
 +                "Which file should change first, and which nearby file should I "
 +                "explicitly leave alone?"
 +            ),
 +        }
 +        return prompts[slot]
++
 +    if pressure == ClarifyPressureKind.TRADEOFF:
 +        prompts = {
 +            ClarifySlot.DESIRED_OUTCOME: (
 +                "What result matters most here, and what broader improvement should I "
 +                "still avoid chasing?"
 +            ),
 +            ClarifySlot.NON_GOALS: (
 +                "What should stay unchanged even if changing it would make the "
 +                "implementation easier?"
 +            ),
 +            ClarifySlot.ACCEPTANCE_CRITERIA: (
 +                "What outcome would count as success, and what tempting shortcut "
 +                "should still count as failure?"
 +            ),
 +            ClarifySlot.CONSTRAINTS: (
 +                "What must stay true even if it makes the change slower or less sweeping?"
 +            ),
 +            ClarifySlot.DECISION_BOUNDARIES: (
 +                "Which decision may I take on my own, and which one should I stop "
 +                "and confirm before proceeding?"
 +            ),
 +            ClarifySlot.LIKELY_TOUCHPOINTS: (
 +                "Which file should I focus on, and what file or surface should stay unchanged?"
 +            ),
 +        }
 +        return prompts[slot]
++
 +    if pressure == ClarifyPressureKind.ASSUMPTION:
 +        prompts = {
 +            ClarifySlot.DESIRED_OUTCOME: (
 +                "What assumption about the desired outcome am I most likely to get "
 +                "wrong if I act now?"
 +            ),
 +            ClarifySlot.NON_GOALS: (
 +                "What assumption about scope should I not make without checking first?"
 +            ),
 +            ClarifySlot.ACCEPTANCE_CRITERIA: (
 +                "What assumption about 'done' would be risky to make without your confirmation?"
 +            ),
 +            ClarifySlot.CONSTRAINTS: (
 +                "What assumption about constraints would be unsafe for me to guess?"
 +            ),
 +            ClarifySlot.DECISION_BOUNDARIES: (
 +                "What decision would be risky for me to assume I can make without checking?"
 +            ),
 +            ClarifySlot.LIKELY_TOUCHPOINTS: (
 +                "What assumption about the right touchpoint or file would be most "
 +                "dangerous if I guessed wrong?"
 +            ),
 +        }
 +        return prompts[slot]
++
      prompts = {
          ClarifySlot.DESIRED_OUTCOME: (
              "What concrete outcome should this change achieve when it's done?"
      return _SLOT_LABELS[resolved]
 +def describe_clarify_stage(stage: ClarifyStage | str | None) -> str:
 +    """Render a friendly clarify-stage label."""
++
 +    if stage is None:
 +        return "general"
 +    resolved = stage if isinstance(stage, ClarifyStage) else ClarifyStage(stage)
 +    return resolved.value
++
++
 +def describe_clarify_pressure_kind(
 +    pressure_kind: ClarifyPressureKind | str | None,
 +) -> str:
 +    """Render a friendly pressure-pass label."""
++
 +    if pressure_kind is None:
 +        return "none"
 +    resolved = (
 +        pressure_kind
 +        if isinstance(pressure_kind, ClarifyPressureKind)
 +        else ClarifyPressureKind(pressure_kind)
 +    )
 +    return resolved.value
++
++
  def _prioritize_slots(
      slots: list[ClarifySlot],
      *,
      return ordered
 +def _resolve_stage(
 +    *,
 +    unresolved_slots: list[ClarifySlot],
 +    missing_readiness_gates: list[str],
 +) -> ClarifyStage:
 +    if missing_readiness_gates:
 +        return ClarifyStage.READINESS
 +    if ClarifySlot.DESIRED_OUTCOME in unresolved_slots:
 +        return ClarifyStage.INTENT
 +    return ClarifyStage.BOUNDARIES
++
++
 +def _choose_pressure_kind(
 +    *,
 +    round_index: int,
 +    answer_is_broad: bool,
 +    missing_readiness_gates: list[str],
 +    pressure_pass_complete: bool,
 +    unresolved_slots: list[ClarifySlot],
 +) -> ClarifyPressureKind | None:
 +    if round_index < 2 or pressure_pass_complete or not unresolved_slots:
 +        return None
 +    if answer_is_broad:
 +        return ClarifyPressureKind.EXAMPLE
 +    if any(gate in {"non_goals", "decision_boundaries"} for gate in missing_readiness_gates):
 +        return ClarifyPressureKind.TRADEOFF
 +    return ClarifyPressureKind.ASSUMPTION
++
++
  def _answer_uses_broad_language(answer: str) -> bool:
      lowered = answer.lower()
      if not lowered:
              "fix it",
              "something",
              "somehow",
 +            "maybe",
 +            "around there",
 +        )
 +    )
++
++
 +def _answer_demonstrates_pressure_pass(answer: str) -> bool:
 +    lowered = answer.lower()
 +    if not lowered:
 +        return False
 +    return any(
 +        phrase in lowered
 +        for phrase in (
 +            "do not",
 +            "don't",
 +            "keep",
 +            "leave",
 +            "unchanged",
 +            "out of scope",
 +            "avoid",
 +            "only",
 +            "stop and ask",
 +            "confirm first",
+         )
+     )

src/loader/runtime/workflow_lanes.pymodified

  from typing import Any
  from ..llm.base import Message, Role, ToolCall
 -from .clarify_strategy import ClarifySnapshot, build_clarify_question, describe_clarify_slot
 +from .clarify_strategy import (
 +    ClarifySnapshot,
 +    build_clarify_question,
 +    describe_clarify_pressure_kind,
 +    describe_clarify_slot,
 +    describe_clarify_stage,
 +)
  from .dod import DefinitionOfDone, DefinitionOfDoneStore
  from .events import AgentEvent, TurnSummary
  from .executor import ToolExecutor
              reason_summary="clarify gathered enough boundaries to proceed",
              unresolved_slots=[],
              focus_slot=None,
 +            stage="intent",
 +            pressure_kind=None,
 +            pressure_pass_complete=False,
 +            missing_readiness_gates=[],
+         )
          for round_index in range(1, max_rounds + 1):
                  rounds=rounds,
                  unresolved_questions=review.unresolved_questions,
                  unresolved_slots=review.unresolved_slots,
 +                stage=review.stage,
 +                pressure_kind=review.pressure_kind,
+             )
              rounds.append((question, answer))
              review = self.workflow_policy.review_clarify(
                  snapshot=self._clarify_snapshot(task, latest_brief),
                  round_index=round_index,
                  max_rounds=max_rounds,
 +                pressure_pass_complete=review.pressure_pass_complete,
+             )
              if review.should_continue:
                  append_timeline(
          rounds: list[tuple[str, str]],
          unresolved_questions: list[str],
          unresolved_slots: list[str],
 +        stage: str | None,
 +        pressure_kind: str | None,
      ) -> tuple[ClarifyBrief, str, str]:
          ask_tool = self.agent.registry.get("AskUserQuestion")
          assert ask_tool is not None
                  rounds=rounds,
                  unresolved_questions=unresolved_questions,
                  unresolved_slots=unresolved_slots,
 +                stage=stage,
 +                pressure_kind=pressure_kind,
              ),
              tools=[ask_tool.to_schema()],
              max_tokens=500,
                  task,
                  response.content,
                  unresolved_slots,
 +                pressure_kind,
+             )
              title = None
              options = None
          rounds: list[tuple[str, str]],
          unresolved_questions: list[str],
          unresolved_slots: list[str],
 +        stage: str | None,
 +        pressure_kind: str | None,
      ) -> str:
          history_lines = []
          for index, (question, answer) in enumerate(rounds, start=1):
          unresolved = "\n".join(f"- {item}" for item in unresolved_questions) or "- none"
          focus_slot = unresolved_slots[0] if unresolved_slots else None
          focus_label = describe_clarify_slot(focus_slot)
 +        stage_label = describe_clarify_stage(stage)
 +        pressure_label = describe_clarify_pressure_kind(pressure_kind)
          return (
              "Clarify the task before planning or implementation.\n\n"
              f"Task: {task}\n"
              f"Round: {round_index}\n"
 +            f"Stage: {stage_label}\n"
              f"Focus slot: {focus_label}\n"
 +            f"Pressure pass: {pressure_label}\n"
              "Ask exactly one focused question via AskUserQuestion.\n"
 -            "Use the unresolved questions and prior answers to tighten scope.\n\n"
 +            "Use the unresolved questions and prior answers to tighten scope.\n"
 +            "If a pressure pass is active, prefer examples, tradeoffs, or "
 +            "challenged assumptions over generic restatement.\n\n"
              "Unresolved questions:\n"
              f"{unresolved}\n\n"
              "Prior clarify history:\n"
          task: str,
          response_content: str,
          unresolved_slots: list[str],
 +        pressure_kind: str | None,
      ) -> str:
          match = re.search(r"([A-Z][^?]+\?)", response_content)
          if match:
              return match.group(1).strip()
          focus_slot = unresolved_slots[0] if unresolved_slots else None
 -        return build_clarify_question(task, focus_slot)
 +        return build_clarify_question(task, focus_slot, pressure_kind)
      @staticmethod
      def _clarify_snapshot(task: str, brief: ClarifyBrief) -> ClarifySnapshot:

src/loader/runtime/workflow_policy.pymodified

  from pathlib import Path
  from typing import Any
 -from .clarify_strategy import ClarifySnapshot, assess_clarify_snapshot, describe_clarify_slot
 +from .clarify_strategy import (
 +    ClarifySnapshot,
 +    assess_clarify_snapshot,
 +    describe_clarify_pressure_kind,
 +    describe_clarify_slot,
 +)
  from .workflow_signals import WorkflowSignalExtractor, WorkflowSignalPacket
      unresolved_questions: list[str] = field(default_factory=list)
      unresolved_slots: list[str] = field(default_factory=list)
      focus_slot: str | None = None
 +    stage: str | None = None
 +    pressure_kind: str | None = None
 +    pressure_pass_complete: bool = False
 +    missing_readiness_gates: list[str] = field(default_factory=list)
  @dataclass(slots=True)
          snapshot: ClarifySnapshot,
          round_index: int,
          max_rounds: int,
 +        pressure_pass_complete: bool = False,
      ) -> ClarifyReview:
          """Determine whether clarify should continue for another round."""
              task=task,
              answer=answer,
              snapshot=snapshot,
 +            round_index=round_index,
 +            pressure_pass_complete=pressure_pass_complete,
+         )
          unresolved = list(assessment.unresolved_questions)
          focus_slot = assessment.focus_slot.value if assessment.focus_slot else None
          focus_label = describe_clarify_slot(assessment.focus_slot)
 +        pressure_kind = (
 +            assessment.pressure_kind.value if assessment.pressure_kind is not None else None
 +        )
 +        pressure_label = describe_clarify_pressure_kind(assessment.pressure_kind)
 +        readiness_gates = list(assessment.missing_readiness_gates)
          if unresolved and round_index < max_rounds:
 +            if assessment.pressure_kind is not None:
 +                return ClarifyReview(
 +                    should_continue=True,
 +                    reason_code="clarify_pressure_pass_required",
 +                    reason_summary=(
 +                        "clarify still needs a "
 +                        f"{pressure_label} pass around {focus_label}"
 +                    ),
 +                    unresolved_questions=unresolved,
 +                    unresolved_slots=[slot.value for slot in assessment.unresolved_slots],
 +                    focus_slot=focus_slot,
 +                    stage=assessment.stage.value,
 +                    pressure_kind=pressure_kind,
 +                    pressure_pass_complete=assessment.pressure_pass_complete,
 +                    missing_readiness_gates=readiness_gates,
 +                )
++
              return ClarifyReview(
                  should_continue=True,
                  reason_code="clarify_follow_up_needed",
                  unresolved_questions=unresolved,
                  unresolved_slots=[slot.value for slot in assessment.unresolved_slots],
                  focus_slot=focus_slot,
 +                stage=assessment.stage.value,
 +                pressure_kind=pressure_kind,
 +                pressure_pass_complete=assessment.pressure_pass_complete,
 +                missing_readiness_gates=readiness_gates,
+             )
          if unresolved:
 +            if not assessment.pressure_pass_complete and round_index >= 2:
 +                return ClarifyReview(
 +                    should_continue=False,
 +                    reason_code="clarify_budget_exhausted_without_pressure_pass",
 +                    reason_summary=(
 +                        "clarify budget exhausted before Loader completed a "
 +                        "bounded pressure pass"
 +                    ),
 +                    unresolved_questions=unresolved,
 +                    unresolved_slots=[slot.value for slot in assessment.unresolved_slots],
 +                    focus_slot=focus_slot,
 +                    stage=assessment.stage.value,
 +                    pressure_kind=pressure_kind,
 +                    pressure_pass_complete=assessment.pressure_pass_complete,
 +                    missing_readiness_gates=readiness_gates,
 +                )
++
              return ClarifyReview(
                  should_continue=False,
                  reason_code="clarify_budget_exhausted",
                  unresolved_questions=unresolved,
                  unresolved_slots=[slot.value for slot in assessment.unresolved_slots],
                  focus_slot=focus_slot,
 +                stage=assessment.stage.value,
 +                pressure_kind=pressure_kind,
 +                pressure_pass_complete=assessment.pressure_pass_complete,
 +                missing_readiness_gates=readiness_gates,
+             )
          return ClarifyReview(
              should_continue=False,
              reason_code="clarify_complete",
 -            reason_summary="clarify gathered enough boundaries to proceed",
 +            reason_summary=(
 +                "clarify gathered enough boundaries and completed a bounded pressure pass"
 +                if assessment.pressure_pass_complete
 +                else "clarify gathered enough boundaries to proceed"
 +            ),
              unresolved_questions=[],
              unresolved_slots=[],
              focus_slot=None,
 +            stage=assessment.stage.value,
 +            pressure_kind=pressure_kind,
 +            pressure_pass_complete=assessment.pressure_pass_complete,
 +            missing_readiness_gates=readiness_gates,
+         )
      def assess_artifact_freshness(

tests/test_clarify_strategy.pymodified

  from __future__ import annotations
  from loader.runtime.clarify_strategy import (
 +    ClarifyPressureKind,
      ClarifySlot,
      ClarifySnapshot,
 +    ClarifyStage,
      assess_clarify_snapshot,
      build_clarify_question,
+ )
      assert "out of scope" in question.lower()
++
 +def test_assess_clarify_snapshot_requests_tradeoff_pressure_pass_on_later_round() -> None:
 +    assessment = assess_clarify_snapshot(
 +        task="Improve Loader runtime behavior.",
 +        answer="Focus on src/loader/runtime/conversation.py.",
 +        snapshot=ClarifySnapshot(
 +            task_statement="Improve Loader runtime behavior.",
 +            explicit_sections=["desired_outcome", "likely_touchpoints"],
 +            desired_outcome=["Make the runtime flow more disciplined."],
 +            likely_touchpoints=["src/loader/runtime/conversation.py"],
 +        ),
 +        round_index=2,
 +    )
++
 +    assert assessment.stage == ClarifyStage.READINESS
 +    assert assessment.pressure_kind == ClarifyPressureKind.TRADEOFF
 +    assert assessment.pressure_pass_complete is False
 +    assert "non_goals" in assessment.missing_readiness_gates
 +    assert "decision_boundaries" in assessment.missing_readiness_gates
++
++
 +def test_assess_clarify_snapshot_marks_pressure_pass_complete_for_boundary_answer() -> None:
 +    assessment = assess_clarify_snapshot(
 +        task="Improve Loader runtime behavior.",
 +        answer="Keep the CLI unchanged and do not broaden the UX without confirming first.",
 +        snapshot=ClarifySnapshot(
 +            task_statement="Improve Loader runtime behavior.",
 +            explicit_sections=["desired_outcome", "non_goals", "decision_boundaries"],
 +            desired_outcome=["Make the runtime flow more disciplined."],
 +            non_goals=["Keep the CLI unchanged."],
 +            decision_boundaries=["Confirm before broad UX changes."],
 +        ),
 +        round_index=2,
 +    )
++
 +    assert assessment.pressure_pass_complete is True
 +    assert "pressure_pass" not in assessment.missing_readiness_gates
++
++
 +def test_build_clarify_question_can_render_pressure_pass_question() -> None:
 +    question = build_clarify_question(
 +        "Tighten the runtime behavior.",
 +        ClarifySlot.NON_GOALS,
 +        ClarifyPressureKind.TRADEOFF,
 +    )
++
 +    assert "unchanged" in question.lower() or "avoid" in question.lower()

tests/test_workflow_policy.pymodified

      assert review.focus_slot == "likely_touchpoints"
 +def test_workflow_policy_requests_pressure_pass_on_later_clarify_round() -> None:
 +    policy = WorkflowPolicy()
++
 +    review = policy.review_clarify(
 +        task="Improve Loader runtime behavior.",
 +        answer="Focus on src/loader/runtime/conversation.py.",
 +        snapshot=ClarifySnapshot(
 +            task_statement="Improve Loader runtime behavior.",
 +            explicit_sections=["desired_outcome", "likely_touchpoints"],
 +            desired_outcome=["Make the runtime flow more disciplined."],
 +            likely_touchpoints=["src/loader/runtime/conversation.py"],
 +        ),
 +        round_index=2,
 +        max_rounds=4,
 +    )
++
 +    assert review.should_continue is True
 +    assert review.reason_code == "clarify_pressure_pass_required"
 +    assert review.stage == "readiness"
 +    assert review.pressure_kind == "tradeoff"
 +    assert review.pressure_pass_complete is False
++
++
  def test_workflow_timeline_entry_round_trips() -> None:
      entry = WorkflowTimelineEntry(
          timestamp="2026-04-07T12:00:00Z",