`68fd28c`

Add pressure-pass clarify reviews

Authored by

espadonne 1 month ago

SHA: 68fd28c6038d3de538f2b9e3a0e71b851f0895d0
Parents: 67878a4
Tree: 06f6b37

5 changed files

Status	File	+	-
M	`src/loader/runtime/clarify_strategy.py`	239	3
M	`src/loader/runtime/workflow_lanes.py`	30	3
M	`src/loader/runtime/workflow_policy.py`	69	2
M	`tests/test_clarify_strategy.py`	49	0
M	`tests/test_workflow_policy.py`	23	0

src/loader/runtime/clarify_strategy.pymodified

      LIKELY_TOUCHPOINTS = "likely_touchpoints"
++class ClarifyStage(StrEnum):
++    """High-level interview stage for bounded clarify mode."""
++
++    INTENT = "intent"
++    BOUNDARIES = "boundaries"
++    READINESS = "readiness"
++
++
++class ClarifyPressureKind(StrEnum):
++    """Which kind of pressure pass the next clarify round should apply."""
++
++    EXAMPLE = "example"
++    TRADEOFF = "tradeoff"
++    ASSUMPTION = "assumption"
++
++
  _DEFAULT_SLOT_ORDER = [
      ClarifySlot.DESIRED_OUTCOME,
      ClarifySlot.NON_GOALS,
      unresolved_slots: list[ClarifySlot] = field(default_factory=list)
      unresolved_questions: list[str] = field(default_factory=list)
      focus_slot: ClarifySlot | None = None
++    stage: ClarifyStage = ClarifyStage.INTENT
++    pressure_kind: ClarifyPressureKind | None = None
++    pressure_pass_complete: bool = False
++    missing_readiness_gates: list[str] = field(default_factory=list)
  def assess_clarify_snapshot(
      task: str,
      answer: str,
      snapshot: ClarifySnapshot,
++    round_index: int = 1,
++    pressure_pass_complete: bool = False,
  ) -> ClarifyAssessment:
      """Determine which clarify slots remain unresolved after one round."""
      normalized_answer = answer.strip()
      answer_is_short = len(re.findall(r"\w+", normalized_answer)) < 4
      answer_is_broad = _answer_uses_broad_language(normalized_answer)
++    effective_pressure_pass_complete = (
++        pressure_pass_complete or _answer_demonstrates_pressure_pass(normalized_answer)
++    )
++    missing_readiness_gates: list[str] = []
++
++    non_goals_explicit = ClarifySlot.NON_GOALS.value in explicit and bool(
++        [item for item in snapshot.non_goals if item.strip()]
++    )
++    decision_boundaries_explicit = ClarifySlot.DECISION_BOUNDARIES.value in explicit and bool(
++        [item for item in snapshot.decision_boundaries if item.strip()]
++    )
      if not normalized_answer:
          unresolved_questions.append(
          unresolved_questions.append(
              "The desired outcome is still not explicit enough to guide execution."
+         )
--    if ClarifySlot.NON_GOALS.value not in explicit or any(
++    if not non_goals_explicit or any(
          "anything not confirmed" in item.lower() for item in snapshot.non_goals
      ):
          unresolved_slots.append(ClarifySlot.NON_GOALS)
          unresolved_questions.append(
              "Constraints are still too implicit for a safe implementation pass."
+         )
--    if ClarifySlot.DECISION_BOUNDARIES.value not in explicit:
++    if not decision_boundaries_explicit:
          unresolved_slots.append(ClarifySlot.DECISION_BOUNDARIES)
          unresolved_questions.append(
              "Decision boundaries are still too fuzzy for autonomous execution."
          unresolved_questions.append(
              "The clarified scope still uses broad or ambiguous language."
+         )
++
++    if not non_goals_explicit:
++        missing_readiness_gates.append("non_goals")
++    if not decision_boundaries_explicit:
++        missing_readiness_gates.append("decision_boundaries")
++    if round_index >= 2 and not effective_pressure_pass_complete:
++        missing_readiness_gates.append("pressure_pass")
++
++    pressure_kind = _choose_pressure_kind(
++        round_index=round_index,
++        answer_is_broad=answer_is_broad,
++        missing_readiness_gates=missing_readiness_gates,
++        pressure_pass_complete=effective_pressure_pass_complete,
++        unresolved_slots=ordered_slots,
++    )
++    if pressure_kind == ClarifyPressureKind.EXAMPLE:
++        unresolved_questions.append(
++            "Loader still needs a concrete example or counterexample before planning."
++        )
++    elif pressure_kind == ClarifyPressureKind.TRADEOFF:
++        unresolved_questions.append(
++            "Loader still needs an explicit tradeoff or stop boundary before planning."
++        )
++    elif pressure_kind == ClarifyPressureKind.ASSUMPTION:
++        unresolved_questions.append(
++            "Loader still needs one challenged assumption before it should proceed."
++        )
++
++    stage = _resolve_stage(
++        unresolved_slots=ordered_slots,
++        missing_readiness_gates=missing_readiness_gates,
++    )
      return ClarifyAssessment(
          unresolved_slots=ordered_slots,
          unresolved_questions=list(dict.fromkeys(unresolved_questions)),
          focus_slot=ordered_slots[0] if ordered_slots else None,
++        stage=stage,
++        pressure_kind=pressure_kind,
++        pressure_pass_complete=effective_pressure_pass_complete,
++        missing_readiness_gates=list(dict.fromkeys(missing_readiness_gates)),
+     )
--def build_clarify_question(task: str, focus_slot: ClarifySlot | str | None) -> str:
++def build_clarify_question(
++    task: str,
++    focus_slot: ClarifySlot | str | None,
++    pressure_kind: ClarifyPressureKind | str | None = None,
++) -> str:
      """Render one targeted question for the current clarify focus slot."""
      slot = (
          if focus_slot
          else ClarifySlot.DESIRED_OUTCOME
+     )
++    pressure = (
++        pressure_kind
++        if isinstance(pressure_kind, ClarifyPressureKind)
++        else ClarifyPressureKind(pressure_kind)
++        if pressure_kind
++        else None
++    )
++
++    if pressure == ClarifyPressureKind.EXAMPLE:
++        prompts = {
++            ClarifySlot.DESIRED_OUTCOME: (
++                "What is one concrete example of the finished outcome, and one nearby "
++                "result that should still count as out of scope?"
++            ),
++            ClarifySlot.NON_GOALS: (
++                "What is one tempting broader change I should avoid even if it seems helpful?"
++            ),
++            ClarifySlot.ACCEPTANCE_CRITERIA: (
++                "What concrete example would prove this is done, and what shortcut "
++                "would still be wrong?"
++            ),
++            ClarifySlot.CONSTRAINTS: (
++                "What is one concrete invariant I must preserve, and what change would violate it?"
++            ),
++            ClarifySlot.DECISION_BOUNDARIES: (
++                "Give one example of a choice I may make alone and one example that "
++                "should force me to stop and confirm."
++            ),
++            ClarifySlot.LIKELY_TOUCHPOINTS: (
++                "Which file should change first, and which nearby file should I "
++                "explicitly leave alone?"
++            ),
++        }
++        return prompts[slot]
++
++    if pressure == ClarifyPressureKind.TRADEOFF:
++        prompts = {
++            ClarifySlot.DESIRED_OUTCOME: (
++                "What result matters most here, and what broader improvement should I "
++                "still avoid chasing?"
++            ),
++            ClarifySlot.NON_GOALS: (
++                "What should stay unchanged even if changing it would make the "
++                "implementation easier?"
++            ),
++            ClarifySlot.ACCEPTANCE_CRITERIA: (
++                "What outcome would count as success, and what tempting shortcut "
++                "should still count as failure?"
++            ),
++            ClarifySlot.CONSTRAINTS: (
++                "What must stay true even if it makes the change slower or less sweeping?"
++            ),
++            ClarifySlot.DECISION_BOUNDARIES: (
++                "Which decision may I take on my own, and which one should I stop "
++                "and confirm before proceeding?"
++            ),
++            ClarifySlot.LIKELY_TOUCHPOINTS: (
++                "Which file should I focus on, and what file or surface should stay unchanged?"
++            ),
++        }
++        return prompts[slot]
++
++    if pressure == ClarifyPressureKind.ASSUMPTION:
++        prompts = {
++            ClarifySlot.DESIRED_OUTCOME: (
++                "What assumption about the desired outcome am I most likely to get "
++                "wrong if I act now?"
++            ),
++            ClarifySlot.NON_GOALS: (
++                "What assumption about scope should I not make without checking first?"
++            ),
++            ClarifySlot.ACCEPTANCE_CRITERIA: (
++                "What assumption about 'done' would be risky to make without your confirmation?"
++            ),
++            ClarifySlot.CONSTRAINTS: (
++                "What assumption about constraints would be unsafe for me to guess?"
++            ),
++            ClarifySlot.DECISION_BOUNDARIES: (
++                "What decision would be risky for me to assume I can make without checking?"
++            ),
++            ClarifySlot.LIKELY_TOUCHPOINTS: (
++                "What assumption about the right touchpoint or file would be most "
++                "dangerous if I guessed wrong?"
++            ),
++        }
++        return prompts[slot]
++
      prompts = {
          ClarifySlot.DESIRED_OUTCOME: (
              "What concrete outcome should this change achieve when it's done?"
      return _SLOT_LABELS[resolved]
++def describe_clarify_stage(stage: ClarifyStage | str | None) -> str:
++    """Render a friendly clarify-stage label."""
++
++    if stage is None:
++        return "general"
++    resolved = stage if isinstance(stage, ClarifyStage) else ClarifyStage(stage)
++    return resolved.value
++
++
++def describe_clarify_pressure_kind(
++    pressure_kind: ClarifyPressureKind | str | None,
++) -> str:
++    """Render a friendly pressure-pass label."""
++
++    if pressure_kind is None:
++        return "none"
++    resolved = (
++        pressure_kind
++        if isinstance(pressure_kind, ClarifyPressureKind)
++        else ClarifyPressureKind(pressure_kind)
++    )
++    return resolved.value
++
++
  def _prioritize_slots(
      slots: list[ClarifySlot],
      *,
      return ordered
++def _resolve_stage(
++    *,
++    unresolved_slots: list[ClarifySlot],
++    missing_readiness_gates: list[str],
++) -> ClarifyStage:
++    if missing_readiness_gates:
++        return ClarifyStage.READINESS
++    if ClarifySlot.DESIRED_OUTCOME in unresolved_slots:
++        return ClarifyStage.INTENT
++    return ClarifyStage.BOUNDARIES
++
++
++def _choose_pressure_kind(
++    *,
++    round_index: int,
++    answer_is_broad: bool,
++    missing_readiness_gates: list[str],
++    pressure_pass_complete: bool,
++    unresolved_slots: list[ClarifySlot],
++) -> ClarifyPressureKind | None:
++    if round_index < 2 or pressure_pass_complete or not unresolved_slots:
++        return None
++    if answer_is_broad:
++        return ClarifyPressureKind.EXAMPLE
++    if any(gate in {"non_goals", "decision_boundaries"} for gate in missing_readiness_gates):
++        return ClarifyPressureKind.TRADEOFF
++    return ClarifyPressureKind.ASSUMPTION
++
++
  def _answer_uses_broad_language(answer: str) -> bool:
      lowered = answer.lower()
      if not lowered:
              "fix it",
              "something",
              "somehow",
++            "maybe",
++            "around there",
++        )
++    )
++
++
++def _answer_demonstrates_pressure_pass(answer: str) -> bool:
++    lowered = answer.lower()
++    if not lowered:
++        return False
++    return any(
++        phrase in lowered
++        for phrase in (
++            "do not",
++            "don't",
++            "keep",
++            "leave",
++            "unchanged",
++            "out of scope",
++            "avoid",
++            "only",
++            "stop and ask",
++            "confirm first",
+         )
+     )

src/loader/runtime/workflow_lanes.pymodified

  from typing import Any
  from ..llm.base import Message, Role, ToolCall
--from .clarify_strategy import ClarifySnapshot, build_clarify_question, describe_clarify_slot
++from .clarify_strategy import (
++    ClarifySnapshot,
++    build_clarify_question,
++    describe_clarify_pressure_kind,
++    describe_clarify_slot,
++    describe_clarify_stage,
++)
  from .dod import DefinitionOfDone, DefinitionOfDoneStore
  from .events import AgentEvent, TurnSummary
  from .executor import ToolExecutor
              reason_summary="clarify gathered enough boundaries to proceed",
              unresolved_slots=[],
              focus_slot=None,
++            stage="intent",
++            pressure_kind=None,
++            pressure_pass_complete=False,
++            missing_readiness_gates=[],
          )
          for round_index in range(1, max_rounds + 1):
                  rounds=rounds,
                  unresolved_questions=review.unresolved_questions,
                  unresolved_slots=review.unresolved_slots,
++                stage=review.stage,
++                pressure_kind=review.pressure_kind,
              )
              rounds.append((question, answer))
              review = self.workflow_policy.review_clarify(
                  snapshot=self._clarify_snapshot(task, latest_brief),
                  round_index=round_index,
                  max_rounds=max_rounds,
++                pressure_pass_complete=review.pressure_pass_complete,
              )
              if review.should_continue:
                  append_timeline(
          rounds: list[tuple[str, str]],
          unresolved_questions: list[str],
          unresolved_slots: list[str],
++        stage: str | None,
++        pressure_kind: str | None,
      ) -> tuple[ClarifyBrief, str, str]:
          ask_tool = self.agent.registry.get("AskUserQuestion")
          assert ask_tool is not None
                  rounds=rounds,
                  unresolved_questions=unresolved_questions,
                  unresolved_slots=unresolved_slots,
++                stage=stage,
++                pressure_kind=pressure_kind,
              ),
              tools=[ask_tool.to_schema()],
              max_tokens=500,
                  task,
                  response.content,
                  unresolved_slots,
++                pressure_kind,
              )
              title = None
              options = None
          rounds: list[tuple[str, str]],
          unresolved_questions: list[str],
          unresolved_slots: list[str],
++        stage: str | None,
++        pressure_kind: str | None,
      ) -> str:
          history_lines = []
          for index, (question, answer) in enumerate(rounds, start=1):
          unresolved = "\n".join(f"- {item}" for item in unresolved_questions) or "- none"
          focus_slot = unresolved_slots[0] if unresolved_slots else None
          focus_label = describe_clarify_slot(focus_slot)
++        stage_label = describe_clarify_stage(stage)
++        pressure_label = describe_clarify_pressure_kind(pressure_kind)
          return (
              "Clarify the task before planning or implementation.\n\n"
              f"Task: {task}\n"
              f"Round: {round_index}\n"
++            f"Stage: {stage_label}\n"
              f"Focus slot: {focus_label}\n"
++            f"Pressure pass: {pressure_label}\n"
              "Ask exactly one focused question via AskUserQuestion.\n"
--            "Use the unresolved questions and prior answers to tighten scope.\n\n"
++            "Use the unresolved questions and prior answers to tighten scope.\n"
++            "If a pressure pass is active, prefer examples, tradeoffs, or "
++            "challenged assumptions over generic restatement.\n\n"
              "Unresolved questions:\n"
              f"{unresolved}\n\n"
              "Prior clarify history:\n"
          task: str,
          response_content: str,
          unresolved_slots: list[str],
++        pressure_kind: str | None,
      ) -> str:
          match = re.search(r"([A-Z][^?]+\?)", response_content)
          if match:
              return match.group(1).strip()
          focus_slot = unresolved_slots[0] if unresolved_slots else None
--        return build_clarify_question(task, focus_slot)
++        return build_clarify_question(task, focus_slot, pressure_kind)
      @staticmethod
      def _clarify_snapshot(task: str, brief: ClarifyBrief) -> ClarifySnapshot:

src/loader/runtime/workflow_policy.pymodified

  from pathlib import Path
  from typing import Any
--from .clarify_strategy import ClarifySnapshot, assess_clarify_snapshot, describe_clarify_slot
++from .clarify_strategy import (
++    ClarifySnapshot,
++    assess_clarify_snapshot,
++    describe_clarify_pressure_kind,
++    describe_clarify_slot,
++)
  from .workflow_signals import WorkflowSignalExtractor, WorkflowSignalPacket
      unresolved_questions: list[str] = field(default_factory=list)
      unresolved_slots: list[str] = field(default_factory=list)
      focus_slot: str | None = None
++    stage: str | None = None
++    pressure_kind: str | None = None
++    pressure_pass_complete: bool = False
++    missing_readiness_gates: list[str] = field(default_factory=list)
  @dataclass(slots=True)
          snapshot: ClarifySnapshot,
          round_index: int,
          max_rounds: int,
++        pressure_pass_complete: bool = False,
      ) -> ClarifyReview:
          """Determine whether clarify should continue for another round."""
              task=task,
              answer=answer,
              snapshot=snapshot,
++            round_index=round_index,
++            pressure_pass_complete=pressure_pass_complete,
+         )
          unresolved = list(assessment.unresolved_questions)
          focus_slot = assessment.focus_slot.value if assessment.focus_slot else None
          focus_label = describe_clarify_slot(assessment.focus_slot)
++        pressure_kind = (
++            assessment.pressure_kind.value if assessment.pressure_kind is not None else None
++        )
++        pressure_label = describe_clarify_pressure_kind(assessment.pressure_kind)
++        readiness_gates = list(assessment.missing_readiness_gates)
          if unresolved and round_index < max_rounds:
++            if assessment.pressure_kind is not None:
++                return ClarifyReview(
++                    should_continue=True,
++                    reason_code="clarify_pressure_pass_required",
++                    reason_summary=(
++                        "clarify still needs a "
++                        f"{pressure_label} pass around {focus_label}"
++                    ),
++                    unresolved_questions=unresolved,
++                    unresolved_slots=[slot.value for slot in assessment.unresolved_slots],
++                    focus_slot=focus_slot,
++                    stage=assessment.stage.value,
++                    pressure_kind=pressure_kind,
++                    pressure_pass_complete=assessment.pressure_pass_complete,
++                    missing_readiness_gates=readiness_gates,
++                )
++
              return ClarifyReview(
                  should_continue=True,
                  reason_code="clarify_follow_up_needed",
                  unresolved_questions=unresolved,
                  unresolved_slots=[slot.value for slot in assessment.unresolved_slots],
                  focus_slot=focus_slot,
++                stage=assessment.stage.value,
++                pressure_kind=pressure_kind,
++                pressure_pass_complete=assessment.pressure_pass_complete,
++                missing_readiness_gates=readiness_gates,
+             )
          if unresolved:
++            if not assessment.pressure_pass_complete and round_index >= 2:
++                return ClarifyReview(
++                    should_continue=False,
++                    reason_code="clarify_budget_exhausted_without_pressure_pass",
++                    reason_summary=(
++                        "clarify budget exhausted before Loader completed a "
++                        "bounded pressure pass"
++                    ),
++                    unresolved_questions=unresolved,
++                    unresolved_slots=[slot.value for slot in assessment.unresolved_slots],
++                    focus_slot=focus_slot,
++                    stage=assessment.stage.value,
++                    pressure_kind=pressure_kind,
++                    pressure_pass_complete=assessment.pressure_pass_complete,
++                    missing_readiness_gates=readiness_gates,
++                )
++
              return ClarifyReview(
                  should_continue=False,
                  reason_code="clarify_budget_exhausted",
                  unresolved_questions=unresolved,
                  unresolved_slots=[slot.value for slot in assessment.unresolved_slots],
                  focus_slot=focus_slot,
++                stage=assessment.stage.value,
++                pressure_kind=pressure_kind,
++                pressure_pass_complete=assessment.pressure_pass_complete,
++                missing_readiness_gates=readiness_gates,
+             )
          return ClarifyReview(
              should_continue=False,
              reason_code="clarify_complete",
--            reason_summary="clarify gathered enough boundaries to proceed",
++            reason_summary=(
++                "clarify gathered enough boundaries and completed a bounded pressure pass"
++                if assessment.pressure_pass_complete
++                else "clarify gathered enough boundaries to proceed"
++            ),
              unresolved_questions=[],
              unresolved_slots=[],
              focus_slot=None,
++            stage=assessment.stage.value,
++            pressure_kind=pressure_kind,
++            pressure_pass_complete=assessment.pressure_pass_complete,
++            missing_readiness_gates=readiness_gates,
+         )
      def assess_artifact_freshness(

tests/test_clarify_strategy.pymodified

  from __future__ import annotations
  from loader.runtime.clarify_strategy import (
++    ClarifyPressureKind,
      ClarifySlot,
      ClarifySnapshot,
++    ClarifyStage,
      assess_clarify_snapshot,
      build_clarify_question,
+ )
      assert "out of scope" in question.lower()
++
++def test_assess_clarify_snapshot_requests_tradeoff_pressure_pass_on_later_round() -> None:
++    assessment = assess_clarify_snapshot(
++        task="Improve Loader runtime behavior.",
++        answer="Focus on src/loader/runtime/conversation.py.",
++        snapshot=ClarifySnapshot(
++            task_statement="Improve Loader runtime behavior.",
++            explicit_sections=["desired_outcome", "likely_touchpoints"],
++            desired_outcome=["Make the runtime flow more disciplined."],
++            likely_touchpoints=["src/loader/runtime/conversation.py"],
++        ),
++        round_index=2,
++    )
++
++    assert assessment.stage == ClarifyStage.READINESS
++    assert assessment.pressure_kind == ClarifyPressureKind.TRADEOFF
++    assert assessment.pressure_pass_complete is False
++    assert "non_goals" in assessment.missing_readiness_gates
++    assert "decision_boundaries" in assessment.missing_readiness_gates
++
++
++def test_assess_clarify_snapshot_marks_pressure_pass_complete_for_boundary_answer() -> None:
++    assessment = assess_clarify_snapshot(
++        task="Improve Loader runtime behavior.",
++        answer="Keep the CLI unchanged and do not broaden the UX without confirming first.",
++        snapshot=ClarifySnapshot(
++            task_statement="Improve Loader runtime behavior.",
++            explicit_sections=["desired_outcome", "non_goals", "decision_boundaries"],
++            desired_outcome=["Make the runtime flow more disciplined."],
++            non_goals=["Keep the CLI unchanged."],
++            decision_boundaries=["Confirm before broad UX changes."],
++        ),
++        round_index=2,
++    )
++
++    assert assessment.pressure_pass_complete is True
++    assert "pressure_pass" not in assessment.missing_readiness_gates
++
++
++def test_build_clarify_question_can_render_pressure_pass_question() -> None:
++    question = build_clarify_question(
++        "Tighten the runtime behavior.",
++        ClarifySlot.NON_GOALS,
++        ClarifyPressureKind.TRADEOFF,
++    )
++
++    assert "unchanged" in question.lower() or "avoid" in question.lower()

tests/test_workflow_policy.pymodified

      assert review.focus_slot == "likely_touchpoints"
++def test_workflow_policy_requests_pressure_pass_on_later_clarify_round() -> None:
++    policy = WorkflowPolicy()
++
++    review = policy.review_clarify(
++        task="Improve Loader runtime behavior.",
++        answer="Focus on src/loader/runtime/conversation.py.",
++        snapshot=ClarifySnapshot(
++            task_statement="Improve Loader runtime behavior.",
++            explicit_sections=["desired_outcome", "likely_touchpoints"],
++            desired_outcome=["Make the runtime flow more disciplined."],
++            likely_touchpoints=["src/loader/runtime/conversation.py"],
++        ),
++        round_index=2,
++        max_rounds=4,
++    )
++
++    assert review.should_continue is True
++    assert review.reason_code == "clarify_pressure_pass_required"
++    assert review.stage == "readiness"
++    assert review.pressure_kind == "tradeoff"
++    assert review.pressure_pass_complete is False
++
++
  def test_workflow_timeline_entry_round_trips() -> None:
      entry = WorkflowTimelineEntry(
          timestamp="2026-04-07T12:00:00Z",