Add pressure-pass clarify reviews
- SHA
68fd28c6038d3de538f2b9e3a0e71b851f0895d0- Parents
-
67878a4 - Tree
06f6b37
68fd28c
68fd28c6038d3de538f2b9e3a0e71b851f0895d067878a4
06f6b37| Status | File | + | - |
|---|---|---|---|
| M |
src/loader/runtime/clarify_strategy.py
|
239 | 3 |
| M |
src/loader/runtime/workflow_lanes.py
|
30 | 3 |
| M |
src/loader/runtime/workflow_policy.py
|
69 | 2 |
| M |
tests/test_clarify_strategy.py
|
49 | 0 |
| M |
tests/test_workflow_policy.py
|
23 | 0 |
src/loader/runtime/clarify_strategy.pymodified@@ -18,6 +18,22 @@ class ClarifySlot(StrEnum): | |||
| 18 | LIKELY_TOUCHPOINTS = "likely_touchpoints" | 18 | LIKELY_TOUCHPOINTS = "likely_touchpoints" |
| 19 | 19 | ||
| 20 | 20 | ||
| 21 | +class ClarifyStage(StrEnum): | ||
| 22 | + """High-level interview stage for bounded clarify mode.""" | ||
| 23 | + | ||
| 24 | + INTENT = "intent" | ||
| 25 | + BOUNDARIES = "boundaries" | ||
| 26 | + READINESS = "readiness" | ||
| 27 | + | ||
| 28 | + | ||
| 29 | +class ClarifyPressureKind(StrEnum): | ||
| 30 | + """Which kind of pressure pass the next clarify round should apply.""" | ||
| 31 | + | ||
| 32 | + EXAMPLE = "example" | ||
| 33 | + TRADEOFF = "tradeoff" | ||
| 34 | + ASSUMPTION = "assumption" | ||
| 35 | + | ||
| 36 | + | ||
| 21 | _DEFAULT_SLOT_ORDER = [ | 37 | _DEFAULT_SLOT_ORDER = [ |
| 22 | ClarifySlot.DESIRED_OUTCOME, | 38 | ClarifySlot.DESIRED_OUTCOME, |
| 23 | ClarifySlot.NON_GOALS, | 39 | ClarifySlot.NON_GOALS, |
@@ -58,6 +74,10 @@ class ClarifyAssessment: | |||
| 58 | unresolved_slots: list[ClarifySlot] = field(default_factory=list) | 74 | unresolved_slots: list[ClarifySlot] = field(default_factory=list) |
| 59 | unresolved_questions: list[str] = field(default_factory=list) | 75 | unresolved_questions: list[str] = field(default_factory=list) |
| 60 | focus_slot: ClarifySlot | None = None | 76 | focus_slot: ClarifySlot | None = None |
| 77 | + stage: ClarifyStage = ClarifyStage.INTENT | ||
| 78 | + pressure_kind: ClarifyPressureKind | None = None | ||
| 79 | + pressure_pass_complete: bool = False | ||
| 80 | + missing_readiness_gates: list[str] = field(default_factory=list) | ||
| 61 | 81 | ||
| 62 | 82 | ||
| 63 | def assess_clarify_snapshot( | 83 | def assess_clarify_snapshot( |
@@ -65,6 +85,8 @@ def assess_clarify_snapshot( | |||
| 65 | task: str, | 85 | task: str, |
| 66 | answer: str, | 86 | answer: str, |
| 67 | snapshot: ClarifySnapshot, | 87 | snapshot: ClarifySnapshot, |
| 88 | + round_index: int = 1, | ||
| 89 | + pressure_pass_complete: bool = False, | ||
| 68 | ) -> ClarifyAssessment: | 90 | ) -> ClarifyAssessment: |
| 69 | """Determine which clarify slots remain unresolved after one round.""" | 91 | """Determine which clarify slots remain unresolved after one round.""" |
| 70 | 92 | ||
@@ -74,6 +96,17 @@ def assess_clarify_snapshot( | |||
| 74 | normalized_answer = answer.strip() | 96 | normalized_answer = answer.strip() |
| 75 | answer_is_short = len(re.findall(r"\w+", normalized_answer)) < 4 | 97 | answer_is_short = len(re.findall(r"\w+", normalized_answer)) < 4 |
| 76 | answer_is_broad = _answer_uses_broad_language(normalized_answer) | 98 | answer_is_broad = _answer_uses_broad_language(normalized_answer) |
| 99 | + effective_pressure_pass_complete = ( | ||
| 100 | + pressure_pass_complete or _answer_demonstrates_pressure_pass(normalized_answer) | ||
| 101 | + ) | ||
| 102 | + missing_readiness_gates: list[str] = [] | ||
| 103 | + | ||
| 104 | + non_goals_explicit = ClarifySlot.NON_GOALS.value in explicit and bool( | ||
| 105 | + [item for item in snapshot.non_goals if item.strip()] | ||
| 106 | + ) | ||
| 107 | + decision_boundaries_explicit = ClarifySlot.DECISION_BOUNDARIES.value in explicit and bool( | ||
| 108 | + [item for item in snapshot.decision_boundaries if item.strip()] | ||
| 109 | + ) | ||
| 77 | 110 | ||
| 78 | if not normalized_answer: | 111 | if not normalized_answer: |
| 79 | unresolved_questions.append( | 112 | unresolved_questions.append( |
@@ -89,7 +122,7 @@ def assess_clarify_snapshot( | |||
| 89 | unresolved_questions.append( | 122 | unresolved_questions.append( |
| 90 | "The desired outcome is still not explicit enough to guide execution." | 123 | "The desired outcome is still not explicit enough to guide execution." |
| 91 | ) | 124 | ) |
| 92 | - if ClarifySlot.NON_GOALS.value not in explicit or any( | 125 | + if not non_goals_explicit or any( |
| 93 | "anything not confirmed" in item.lower() for item in snapshot.non_goals | 126 | "anything not confirmed" in item.lower() for item in snapshot.non_goals |
| 94 | ): | 127 | ): |
| 95 | unresolved_slots.append(ClarifySlot.NON_GOALS) | 128 | unresolved_slots.append(ClarifySlot.NON_GOALS) |
@@ -109,7 +142,7 @@ def assess_clarify_snapshot( | |||
| 109 | unresolved_questions.append( | 142 | unresolved_questions.append( |
| 110 | "Constraints are still too implicit for a safe implementation pass." | 143 | "Constraints are still too implicit for a safe implementation pass." |
| 111 | ) | 144 | ) |
| 112 | - if ClarifySlot.DECISION_BOUNDARIES.value not in explicit: | 145 | + if not decision_boundaries_explicit: |
| 113 | unresolved_slots.append(ClarifySlot.DECISION_BOUNDARIES) | 146 | unresolved_slots.append(ClarifySlot.DECISION_BOUNDARIES) |
| 114 | unresolved_questions.append( | 147 | unresolved_questions.append( |
| 115 | "Decision boundaries are still too fuzzy for autonomous execution." | 148 | "Decision boundaries are still too fuzzy for autonomous execution." |
@@ -131,14 +164,54 @@ def assess_clarify_snapshot( | |||
| 131 | unresolved_questions.append( | 164 | unresolved_questions.append( |
| 132 | "The clarified scope still uses broad or ambiguous language." | 165 | "The clarified scope still uses broad or ambiguous language." |
| 133 | ) | 166 | ) |
| 167 | + | ||
| 168 | + if not non_goals_explicit: | ||
| 169 | + missing_readiness_gates.append("non_goals") | ||
| 170 | + if not decision_boundaries_explicit: | ||
| 171 | + missing_readiness_gates.append("decision_boundaries") | ||
| 172 | + if round_index >= 2 and not effective_pressure_pass_complete: | ||
| 173 | + missing_readiness_gates.append("pressure_pass") | ||
| 174 | + | ||
| 175 | + pressure_kind = _choose_pressure_kind( | ||
| 176 | + round_index=round_index, | ||
| 177 | + answer_is_broad=answer_is_broad, | ||
| 178 | + missing_readiness_gates=missing_readiness_gates, | ||
| 179 | + pressure_pass_complete=effective_pressure_pass_complete, | ||
| 180 | + unresolved_slots=ordered_slots, | ||
| 181 | + ) | ||
| 182 | + if pressure_kind == ClarifyPressureKind.EXAMPLE: | ||
| 183 | + unresolved_questions.append( | ||
| 184 | + "Loader still needs a concrete example or counterexample before planning." | ||
| 185 | + ) | ||
| 186 | + elif pressure_kind == ClarifyPressureKind.TRADEOFF: | ||
| 187 | + unresolved_questions.append( | ||
| 188 | + "Loader still needs an explicit tradeoff or stop boundary before planning." | ||
| 189 | + ) | ||
| 190 | + elif pressure_kind == ClarifyPressureKind.ASSUMPTION: | ||
| 191 | + unresolved_questions.append( | ||
| 192 | + "Loader still needs one challenged assumption before it should proceed." | ||
| 193 | + ) | ||
| 194 | + | ||
| 195 | + stage = _resolve_stage( | ||
| 196 | + unresolved_slots=ordered_slots, | ||
| 197 | + missing_readiness_gates=missing_readiness_gates, | ||
| 198 | + ) | ||
| 134 | return ClarifyAssessment( | 199 | return ClarifyAssessment( |
| 135 | unresolved_slots=ordered_slots, | 200 | unresolved_slots=ordered_slots, |
| 136 | unresolved_questions=list(dict.fromkeys(unresolved_questions)), | 201 | unresolved_questions=list(dict.fromkeys(unresolved_questions)), |
| 137 | focus_slot=ordered_slots[0] if ordered_slots else None, | 202 | focus_slot=ordered_slots[0] if ordered_slots else None, |
| 203 | + stage=stage, | ||
| 204 | + pressure_kind=pressure_kind, | ||
| 205 | + pressure_pass_complete=effective_pressure_pass_complete, | ||
| 206 | + missing_readiness_gates=list(dict.fromkeys(missing_readiness_gates)), | ||
| 138 | ) | 207 | ) |
| 139 | 208 | ||
| 140 | 209 | ||
| 141 | -def build_clarify_question(task: str, focus_slot: ClarifySlot | str | None) -> str: | 210 | +def build_clarify_question( |
| 211 | + task: str, | ||
| 212 | + focus_slot: ClarifySlot | str | None, | ||
| 213 | + pressure_kind: ClarifyPressureKind | str | None = None, | ||
| 214 | +) -> str: | ||
| 142 | """Render one targeted question for the current clarify focus slot.""" | 215 | """Render one targeted question for the current clarify focus slot.""" |
| 143 | 216 | ||
| 144 | slot = ( | 217 | slot = ( |
@@ -148,6 +221,93 @@ def build_clarify_question(task: str, focus_slot: ClarifySlot | str | None) -> s | |||
| 148 | if focus_slot | 221 | if focus_slot |
| 149 | else ClarifySlot.DESIRED_OUTCOME | 222 | else ClarifySlot.DESIRED_OUTCOME |
| 150 | ) | 223 | ) |
| 224 | + pressure = ( | ||
| 225 | + pressure_kind | ||
| 226 | + if isinstance(pressure_kind, ClarifyPressureKind) | ||
| 227 | + else ClarifyPressureKind(pressure_kind) | ||
| 228 | + if pressure_kind | ||
| 229 | + else None | ||
| 230 | + ) | ||
| 231 | + | ||
| 232 | + if pressure == ClarifyPressureKind.EXAMPLE: | ||
| 233 | + prompts = { | ||
| 234 | + ClarifySlot.DESIRED_OUTCOME: ( | ||
| 235 | + "What is one concrete example of the finished outcome, and one nearby " | ||
| 236 | + "result that should still count as out of scope?" | ||
| 237 | + ), | ||
| 238 | + ClarifySlot.NON_GOALS: ( | ||
| 239 | + "What is one tempting broader change I should avoid even if it seems helpful?" | ||
| 240 | + ), | ||
| 241 | + ClarifySlot.ACCEPTANCE_CRITERIA: ( | ||
| 242 | + "What concrete example would prove this is done, and what shortcut " | ||
| 243 | + "would still be wrong?" | ||
| 244 | + ), | ||
| 245 | + ClarifySlot.CONSTRAINTS: ( | ||
| 246 | + "What is one concrete invariant I must preserve, and what change would violate it?" | ||
| 247 | + ), | ||
| 248 | + ClarifySlot.DECISION_BOUNDARIES: ( | ||
| 249 | + "Give one example of a choice I may make alone and one example that " | ||
| 250 | + "should force me to stop and confirm." | ||
| 251 | + ), | ||
| 252 | + ClarifySlot.LIKELY_TOUCHPOINTS: ( | ||
| 253 | + "Which file should change first, and which nearby file should I " | ||
| 254 | + "explicitly leave alone?" | ||
| 255 | + ), | ||
| 256 | + } | ||
| 257 | + return prompts[slot] | ||
| 258 | + | ||
| 259 | + if pressure == ClarifyPressureKind.TRADEOFF: | ||
| 260 | + prompts = { | ||
| 261 | + ClarifySlot.DESIRED_OUTCOME: ( | ||
| 262 | + "What result matters most here, and what broader improvement should I " | ||
| 263 | + "still avoid chasing?" | ||
| 264 | + ), | ||
| 265 | + ClarifySlot.NON_GOALS: ( | ||
| 266 | + "What should stay unchanged even if changing it would make the " | ||
| 267 | + "implementation easier?" | ||
| 268 | + ), | ||
| 269 | + ClarifySlot.ACCEPTANCE_CRITERIA: ( | ||
| 270 | + "What outcome would count as success, and what tempting shortcut " | ||
| 271 | + "should still count as failure?" | ||
| 272 | + ), | ||
| 273 | + ClarifySlot.CONSTRAINTS: ( | ||
| 274 | + "What must stay true even if it makes the change slower or less sweeping?" | ||
| 275 | + ), | ||
| 276 | + ClarifySlot.DECISION_BOUNDARIES: ( | ||
| 277 | + "Which decision may I take on my own, and which one should I stop " | ||
| 278 | + "and confirm before proceeding?" | ||
| 279 | + ), | ||
| 280 | + ClarifySlot.LIKELY_TOUCHPOINTS: ( | ||
| 281 | + "Which file should I focus on, and what file or surface should stay unchanged?" | ||
| 282 | + ), | ||
| 283 | + } | ||
| 284 | + return prompts[slot] | ||
| 285 | + | ||
| 286 | + if pressure == ClarifyPressureKind.ASSUMPTION: | ||
| 287 | + prompts = { | ||
| 288 | + ClarifySlot.DESIRED_OUTCOME: ( | ||
| 289 | + "What assumption about the desired outcome am I most likely to get " | ||
| 290 | + "wrong if I act now?" | ||
| 291 | + ), | ||
| 292 | + ClarifySlot.NON_GOALS: ( | ||
| 293 | + "What assumption about scope should I not make without checking first?" | ||
| 294 | + ), | ||
| 295 | + ClarifySlot.ACCEPTANCE_CRITERIA: ( | ||
| 296 | + "What assumption about 'done' would be risky to make without your confirmation?" | ||
| 297 | + ), | ||
| 298 | + ClarifySlot.CONSTRAINTS: ( | ||
| 299 | + "What assumption about constraints would be unsafe for me to guess?" | ||
| 300 | + ), | ||
| 301 | + ClarifySlot.DECISION_BOUNDARIES: ( | ||
| 302 | + "What decision would be risky for me to assume I can make without checking?" | ||
| 303 | + ), | ||
| 304 | + ClarifySlot.LIKELY_TOUCHPOINTS: ( | ||
| 305 | + "What assumption about the right touchpoint or file would be most " | ||
| 306 | + "dangerous if I guessed wrong?" | ||
| 307 | + ), | ||
| 308 | + } | ||
| 309 | + return prompts[slot] | ||
| 310 | + | ||
| 151 | prompts = { | 311 | prompts = { |
| 152 | ClarifySlot.DESIRED_OUTCOME: ( | 312 | ClarifySlot.DESIRED_OUTCOME: ( |
| 153 | "What concrete outcome should this change achieve when it's done?" | 313 | "What concrete outcome should this change achieve when it's done?" |
@@ -183,6 +343,30 @@ def describe_clarify_slot(slot: ClarifySlot | str | None) -> str: | |||
| 183 | return _SLOT_LABELS[resolved] | 343 | return _SLOT_LABELS[resolved] |
| 184 | 344 | ||
| 185 | 345 | ||
| 346 | +def describe_clarify_stage(stage: ClarifyStage | str | None) -> str: | ||
| 347 | + """Render a friendly clarify-stage label.""" | ||
| 348 | + | ||
| 349 | + if stage is None: | ||
| 350 | + return "general" | ||
| 351 | + resolved = stage if isinstance(stage, ClarifyStage) else ClarifyStage(stage) | ||
| 352 | + return resolved.value | ||
| 353 | + | ||
| 354 | + | ||
| 355 | +def describe_clarify_pressure_kind( | ||
| 356 | + pressure_kind: ClarifyPressureKind | str | None, | ||
| 357 | +) -> str: | ||
| 358 | + """Render a friendly pressure-pass label.""" | ||
| 359 | + | ||
| 360 | + if pressure_kind is None: | ||
| 361 | + return "none" | ||
| 362 | + resolved = ( | ||
| 363 | + pressure_kind | ||
| 364 | + if isinstance(pressure_kind, ClarifyPressureKind) | ||
| 365 | + else ClarifyPressureKind(pressure_kind) | ||
| 366 | + ) | ||
| 367 | + return resolved.value | ||
| 368 | + | ||
| 369 | + | ||
| 186 | def _prioritize_slots( | 370 | def _prioritize_slots( |
| 187 | slots: list[ClarifySlot], | 371 | slots: list[ClarifySlot], |
| 188 | *, | 372 | *, |
@@ -203,6 +387,35 @@ def _prioritize_slots( | |||
| 203 | return ordered | 387 | return ordered |
| 204 | 388 | ||
| 205 | 389 | ||
| 390 | +def _resolve_stage( | ||
| 391 | + *, | ||
| 392 | + unresolved_slots: list[ClarifySlot], | ||
| 393 | + missing_readiness_gates: list[str], | ||
| 394 | +) -> ClarifyStage: | ||
| 395 | + if missing_readiness_gates: | ||
| 396 | + return ClarifyStage.READINESS | ||
| 397 | + if ClarifySlot.DESIRED_OUTCOME in unresolved_slots: | ||
| 398 | + return ClarifyStage.INTENT | ||
| 399 | + return ClarifyStage.BOUNDARIES | ||
| 400 | + | ||
| 401 | + | ||
| 402 | +def _choose_pressure_kind( | ||
| 403 | + *, | ||
| 404 | + round_index: int, | ||
| 405 | + answer_is_broad: bool, | ||
| 406 | + missing_readiness_gates: list[str], | ||
| 407 | + pressure_pass_complete: bool, | ||
| 408 | + unresolved_slots: list[ClarifySlot], | ||
| 409 | +) -> ClarifyPressureKind | None: | ||
| 410 | + if round_index < 2 or pressure_pass_complete or not unresolved_slots: | ||
| 411 | + return None | ||
| 412 | + if answer_is_broad: | ||
| 413 | + return ClarifyPressureKind.EXAMPLE | ||
| 414 | + if any(gate in {"non_goals", "decision_boundaries"} for gate in missing_readiness_gates): | ||
| 415 | + return ClarifyPressureKind.TRADEOFF | ||
| 416 | + return ClarifyPressureKind.ASSUMPTION | ||
| 417 | + | ||
| 418 | + | ||
| 206 | def _answer_uses_broad_language(answer: str) -> bool: | 419 | def _answer_uses_broad_language(answer: str) -> bool: |
| 207 | lowered = answer.lower() | 420 | lowered = answer.lower() |
| 208 | if not lowered: | 421 | if not lowered: |
@@ -217,6 +430,29 @@ def _answer_uses_broad_language(answer: str) -> bool: | |||
| 217 | "fix it", | 430 | "fix it", |
| 218 | "something", | 431 | "something", |
| 219 | "somehow", | 432 | "somehow", |
| 433 | + "maybe", | ||
| 434 | + "around there", | ||
| 435 | + ) | ||
| 436 | + ) | ||
| 437 | + | ||
| 438 | + | ||
| 439 | +def _answer_demonstrates_pressure_pass(answer: str) -> bool: | ||
| 440 | + lowered = answer.lower() | ||
| 441 | + if not lowered: | ||
| 442 | + return False | ||
| 443 | + return any( | ||
| 444 | + phrase in lowered | ||
| 445 | + for phrase in ( | ||
| 446 | + "do not", | ||
| 447 | + "don't", | ||
| 448 | + "keep", | ||
| 449 | + "leave", | ||
| 450 | + "unchanged", | ||
| 451 | + "out of scope", | ||
| 452 | + "avoid", | ||
| 453 | + "only", | ||
| 454 | + "stop and ask", | ||
| 455 | + "confirm first", | ||
| 220 | ) | 456 | ) |
| 221 | ) | 457 | ) |
| 222 | 458 | ||
src/loader/runtime/workflow_lanes.pymodified@@ -9,7 +9,13 @@ from pathlib import Path | |||
| 9 | from typing import Any | 9 | from typing import Any |
| 10 | 10 | ||
| 11 | from ..llm.base import Message, Role, ToolCall | 11 | from ..llm.base import Message, Role, ToolCall |
| 12 | -from .clarify_strategy import ClarifySnapshot, build_clarify_question, describe_clarify_slot | 12 | +from .clarify_strategy import ( |
| 13 | + ClarifySnapshot, | ||
| 14 | + build_clarify_question, | ||
| 15 | + describe_clarify_pressure_kind, | ||
| 16 | + describe_clarify_slot, | ||
| 17 | + describe_clarify_stage, | ||
| 18 | +) | ||
| 13 | from .dod import DefinitionOfDone, DefinitionOfDoneStore | 19 | from .dod import DefinitionOfDone, DefinitionOfDoneStore |
| 14 | from .events import AgentEvent, TurnSummary | 20 | from .events import AgentEvent, TurnSummary |
| 15 | from .executor import ToolExecutor | 21 | from .executor import ToolExecutor |
@@ -67,6 +73,10 @@ class WorkflowLaneRunner: | |||
| 67 | reason_summary="clarify gathered enough boundaries to proceed", | 73 | reason_summary="clarify gathered enough boundaries to proceed", |
| 68 | unresolved_slots=[], | 74 | unresolved_slots=[], |
| 69 | focus_slot=None, | 75 | focus_slot=None, |
| 76 | + stage="intent", | ||
| 77 | + pressure_kind=None, | ||
| 78 | + pressure_pass_complete=False, | ||
| 79 | + missing_readiness_gates=[], | ||
| 70 | ) | 80 | ) |
| 71 | 81 | ||
| 72 | for round_index in range(1, max_rounds + 1): | 82 | for round_index in range(1, max_rounds + 1): |
@@ -79,6 +89,8 @@ class WorkflowLaneRunner: | |||
| 79 | rounds=rounds, | 89 | rounds=rounds, |
| 80 | unresolved_questions=review.unresolved_questions, | 90 | unresolved_questions=review.unresolved_questions, |
| 81 | unresolved_slots=review.unresolved_slots, | 91 | unresolved_slots=review.unresolved_slots, |
| 92 | + stage=review.stage, | ||
| 93 | + pressure_kind=review.pressure_kind, | ||
| 82 | ) | 94 | ) |
| 83 | rounds.append((question, answer)) | 95 | rounds.append((question, answer)) |
| 84 | review = self.workflow_policy.review_clarify( | 96 | review = self.workflow_policy.review_clarify( |
@@ -87,6 +99,7 @@ class WorkflowLaneRunner: | |||
| 87 | snapshot=self._clarify_snapshot(task, latest_brief), | 99 | snapshot=self._clarify_snapshot(task, latest_brief), |
| 88 | round_index=round_index, | 100 | round_index=round_index, |
| 89 | max_rounds=max_rounds, | 101 | max_rounds=max_rounds, |
| 102 | + pressure_pass_complete=review.pressure_pass_complete, | ||
| 90 | ) | 103 | ) |
| 91 | if review.should_continue: | 104 | if review.should_continue: |
| 92 | append_timeline( | 105 | append_timeline( |
@@ -301,6 +314,8 @@ class WorkflowLaneRunner: | |||
| 301 | rounds: list[tuple[str, str]], | 314 | rounds: list[tuple[str, str]], |
| 302 | unresolved_questions: list[str], | 315 | unresolved_questions: list[str], |
| 303 | unresolved_slots: list[str], | 316 | unresolved_slots: list[str], |
| 317 | + stage: str | None, | ||
| 318 | + pressure_kind: str | None, | ||
| 304 | ) -> tuple[ClarifyBrief, str, str]: | 319 | ) -> tuple[ClarifyBrief, str, str]: |
| 305 | ask_tool = self.agent.registry.get("AskUserQuestion") | 320 | ask_tool = self.agent.registry.get("AskUserQuestion") |
| 306 | assert ask_tool is not None | 321 | assert ask_tool is not None |
@@ -311,6 +326,8 @@ class WorkflowLaneRunner: | |||
| 311 | rounds=rounds, | 326 | rounds=rounds, |
| 312 | unresolved_questions=unresolved_questions, | 327 | unresolved_questions=unresolved_questions, |
| 313 | unresolved_slots=unresolved_slots, | 328 | unresolved_slots=unresolved_slots, |
| 329 | + stage=stage, | ||
| 330 | + pressure_kind=pressure_kind, | ||
| 314 | ), | 331 | ), |
| 315 | tools=[ask_tool.to_schema()], | 332 | tools=[ask_tool.to_schema()], |
| 316 | max_tokens=500, | 333 | max_tokens=500, |
@@ -328,6 +345,7 @@ class WorkflowLaneRunner: | |||
| 328 | task, | 345 | task, |
| 329 | response.content, | 346 | response.content, |
| 330 | unresolved_slots, | 347 | unresolved_slots, |
| 348 | + pressure_kind, | ||
| 331 | ) | 349 | ) |
| 332 | title = None | 350 | title = None |
| 333 | options = None | 351 | options = None |
@@ -410,6 +428,8 @@ class WorkflowLaneRunner: | |||
| 410 | rounds: list[tuple[str, str]], | 428 | rounds: list[tuple[str, str]], |
| 411 | unresolved_questions: list[str], | 429 | unresolved_questions: list[str], |
| 412 | unresolved_slots: list[str], | 430 | unresolved_slots: list[str], |
| 431 | + stage: str | None, | ||
| 432 | + pressure_kind: str | None, | ||
| 413 | ) -> str: | 433 | ) -> str: |
| 414 | history_lines = [] | 434 | history_lines = [] |
| 415 | for index, (question, answer) in enumerate(rounds, start=1): | 435 | for index, (question, answer) in enumerate(rounds, start=1): |
@@ -422,13 +442,19 @@ class WorkflowLaneRunner: | |||
| 422 | unresolved = "\n".join(f"- {item}" for item in unresolved_questions) or "- none" | 442 | unresolved = "\n".join(f"- {item}" for item in unresolved_questions) or "- none" |
| 423 | focus_slot = unresolved_slots[0] if unresolved_slots else None | 443 | focus_slot = unresolved_slots[0] if unresolved_slots else None |
| 424 | focus_label = describe_clarify_slot(focus_slot) | 444 | focus_label = describe_clarify_slot(focus_slot) |
| 445 | + stage_label = describe_clarify_stage(stage) | ||
| 446 | + pressure_label = describe_clarify_pressure_kind(pressure_kind) | ||
| 425 | return ( | 447 | return ( |
| 426 | "Clarify the task before planning or implementation.\n\n" | 448 | "Clarify the task before planning or implementation.\n\n" |
| 427 | f"Task: {task}\n" | 449 | f"Task: {task}\n" |
| 428 | f"Round: {round_index}\n" | 450 | f"Round: {round_index}\n" |
| 451 | + f"Stage: {stage_label}\n" | ||
| 429 | f"Focus slot: {focus_label}\n" | 452 | f"Focus slot: {focus_label}\n" |
| 453 | + f"Pressure pass: {pressure_label}\n" | ||
| 430 | "Ask exactly one focused question via AskUserQuestion.\n" | 454 | "Ask exactly one focused question via AskUserQuestion.\n" |
| 431 | - "Use the unresolved questions and prior answers to tighten scope.\n\n" | 455 | + "Use the unresolved questions and prior answers to tighten scope.\n" |
| 456 | + "If a pressure pass is active, prefer examples, tradeoffs, or " | ||
| 457 | + "challenged assumptions over generic restatement.\n\n" | ||
| 432 | "Unresolved questions:\n" | 458 | "Unresolved questions:\n" |
| 433 | f"{unresolved}\n\n" | 459 | f"{unresolved}\n\n" |
| 434 | "Prior clarify history:\n" | 460 | "Prior clarify history:\n" |
@@ -516,12 +542,13 @@ class WorkflowLaneRunner: | |||
| 516 | task: str, | 542 | task: str, |
| 517 | response_content: str, | 543 | response_content: str, |
| 518 | unresolved_slots: list[str], | 544 | unresolved_slots: list[str], |
| 545 | + pressure_kind: str | None, | ||
| 519 | ) -> str: | 546 | ) -> str: |
| 520 | match = re.search(r"([A-Z][^?]+\?)", response_content) | 547 | match = re.search(r"([A-Z][^?]+\?)", response_content) |
| 521 | if match: | 548 | if match: |
| 522 | return match.group(1).strip() | 549 | return match.group(1).strip() |
| 523 | focus_slot = unresolved_slots[0] if unresolved_slots else None | 550 | focus_slot = unresolved_slots[0] if unresolved_slots else None |
| 524 | - return build_clarify_question(task, focus_slot) | 551 | + return build_clarify_question(task, focus_slot, pressure_kind) |
| 525 | 552 | ||
| 526 | @staticmethod | 553 | @staticmethod |
| 527 | def _clarify_snapshot(task: str, brief: ClarifyBrief) -> ClarifySnapshot: | 554 | def _clarify_snapshot(task: str, brief: ClarifyBrief) -> ClarifySnapshot: |
src/loader/runtime/workflow_policy.pymodified@@ -9,7 +9,12 @@ from enum import StrEnum | |||
| 9 | from pathlib import Path | 9 | from pathlib import Path |
| 10 | from typing import Any | 10 | from typing import Any |
| 11 | 11 | ||
| 12 | -from .clarify_strategy import ClarifySnapshot, assess_clarify_snapshot, describe_clarify_slot | 12 | +from .clarify_strategy import ( |
| 13 | + ClarifySnapshot, | ||
| 14 | + assess_clarify_snapshot, | ||
| 15 | + describe_clarify_pressure_kind, | ||
| 16 | + describe_clarify_slot, | ||
| 17 | +) | ||
| 13 | from .workflow_signals import WorkflowSignalExtractor, WorkflowSignalPacket | 18 | from .workflow_signals import WorkflowSignalExtractor, WorkflowSignalPacket |
| 14 | 19 | ||
| 15 | 20 | ||
@@ -172,6 +177,10 @@ class ClarifyReview: | |||
| 172 | unresolved_questions: list[str] = field(default_factory=list) | 177 | unresolved_questions: list[str] = field(default_factory=list) |
| 173 | unresolved_slots: list[str] = field(default_factory=list) | 178 | unresolved_slots: list[str] = field(default_factory=list) |
| 174 | focus_slot: str | None = None | 179 | focus_slot: str | None = None |
| 180 | + stage: str | None = None | ||
| 181 | + pressure_kind: str | None = None | ||
| 182 | + pressure_pass_complete: bool = False | ||
| 183 | + missing_readiness_gates: list[str] = field(default_factory=list) | ||
| 175 | 184 | ||
| 176 | 185 | ||
| 177 | @dataclass(slots=True) | 186 | @dataclass(slots=True) |
@@ -498,6 +507,7 @@ class WorkflowPolicy: | |||
| 498 | snapshot: ClarifySnapshot, | 507 | snapshot: ClarifySnapshot, |
| 499 | round_index: int, | 508 | round_index: int, |
| 500 | max_rounds: int, | 509 | max_rounds: int, |
| 510 | + pressure_pass_complete: bool = False, | ||
| 501 | ) -> ClarifyReview: | 511 | ) -> ClarifyReview: |
| 502 | """Determine whether clarify should continue for another round.""" | 512 | """Determine whether clarify should continue for another round.""" |
| 503 | 513 | ||
@@ -505,12 +515,36 @@ class WorkflowPolicy: | |||
| 505 | task=task, | 515 | task=task, |
| 506 | answer=answer, | 516 | answer=answer, |
| 507 | snapshot=snapshot, | 517 | snapshot=snapshot, |
| 518 | + round_index=round_index, | ||
| 519 | + pressure_pass_complete=pressure_pass_complete, | ||
| 508 | ) | 520 | ) |
| 509 | unresolved = list(assessment.unresolved_questions) | 521 | unresolved = list(assessment.unresolved_questions) |
| 510 | focus_slot = assessment.focus_slot.value if assessment.focus_slot else None | 522 | focus_slot = assessment.focus_slot.value if assessment.focus_slot else None |
| 511 | focus_label = describe_clarify_slot(assessment.focus_slot) | 523 | focus_label = describe_clarify_slot(assessment.focus_slot) |
| 524 | + pressure_kind = ( | ||
| 525 | + assessment.pressure_kind.value if assessment.pressure_kind is not None else None | ||
| 526 | + ) | ||
| 527 | + pressure_label = describe_clarify_pressure_kind(assessment.pressure_kind) | ||
| 528 | + readiness_gates = list(assessment.missing_readiness_gates) | ||
| 512 | 529 | ||
| 513 | if unresolved and round_index < max_rounds: | 530 | if unresolved and round_index < max_rounds: |
| 531 | + if assessment.pressure_kind is not None: | ||
| 532 | + return ClarifyReview( | ||
| 533 | + should_continue=True, | ||
| 534 | + reason_code="clarify_pressure_pass_required", | ||
| 535 | + reason_summary=( | ||
| 536 | + "clarify still needs a " | ||
| 537 | + f"{pressure_label} pass around {focus_label}" | ||
| 538 | + ), | ||
| 539 | + unresolved_questions=unresolved, | ||
| 540 | + unresolved_slots=[slot.value for slot in assessment.unresolved_slots], | ||
| 541 | + focus_slot=focus_slot, | ||
| 542 | + stage=assessment.stage.value, | ||
| 543 | + pressure_kind=pressure_kind, | ||
| 544 | + pressure_pass_complete=assessment.pressure_pass_complete, | ||
| 545 | + missing_readiness_gates=readiness_gates, | ||
| 546 | + ) | ||
| 547 | + | ||
| 514 | return ClarifyReview( | 548 | return ClarifyReview( |
| 515 | should_continue=True, | 549 | should_continue=True, |
| 516 | reason_code="clarify_follow_up_needed", | 550 | reason_code="clarify_follow_up_needed", |
@@ -521,9 +555,30 @@ class WorkflowPolicy: | |||
| 521 | unresolved_questions=unresolved, | 555 | unresolved_questions=unresolved, |
| 522 | unresolved_slots=[slot.value for slot in assessment.unresolved_slots], | 556 | unresolved_slots=[slot.value for slot in assessment.unresolved_slots], |
| 523 | focus_slot=focus_slot, | 557 | focus_slot=focus_slot, |
| 558 | + stage=assessment.stage.value, | ||
| 559 | + pressure_kind=pressure_kind, | ||
| 560 | + pressure_pass_complete=assessment.pressure_pass_complete, | ||
| 561 | + missing_readiness_gates=readiness_gates, | ||
| 524 | ) | 562 | ) |
| 525 | 563 | ||
| 526 | if unresolved: | 564 | if unresolved: |
| 565 | + if not assessment.pressure_pass_complete and round_index >= 2: | ||
| 566 | + return ClarifyReview( | ||
| 567 | + should_continue=False, | ||
| 568 | + reason_code="clarify_budget_exhausted_without_pressure_pass", | ||
| 569 | + reason_summary=( | ||
| 570 | + "clarify budget exhausted before Loader completed a " | ||
| 571 | + "bounded pressure pass" | ||
| 572 | + ), | ||
| 573 | + unresolved_questions=unresolved, | ||
| 574 | + unresolved_slots=[slot.value for slot in assessment.unresolved_slots], | ||
| 575 | + focus_slot=focus_slot, | ||
| 576 | + stage=assessment.stage.value, | ||
| 577 | + pressure_kind=pressure_kind, | ||
| 578 | + pressure_pass_complete=assessment.pressure_pass_complete, | ||
| 579 | + missing_readiness_gates=readiness_gates, | ||
| 580 | + ) | ||
| 581 | + | ||
| 527 | return ClarifyReview( | 582 | return ClarifyReview( |
| 528 | should_continue=False, | 583 | should_continue=False, |
| 529 | reason_code="clarify_budget_exhausted", | 584 | reason_code="clarify_budget_exhausted", |
@@ -531,15 +586,27 @@ class WorkflowPolicy: | |||
| 531 | unresolved_questions=unresolved, | 586 | unresolved_questions=unresolved, |
| 532 | unresolved_slots=[slot.value for slot in assessment.unresolved_slots], | 587 | unresolved_slots=[slot.value for slot in assessment.unresolved_slots], |
| 533 | focus_slot=focus_slot, | 588 | focus_slot=focus_slot, |
| 589 | + stage=assessment.stage.value, | ||
| 590 | + pressure_kind=pressure_kind, | ||
| 591 | + pressure_pass_complete=assessment.pressure_pass_complete, | ||
| 592 | + missing_readiness_gates=readiness_gates, | ||
| 534 | ) | 593 | ) |
| 535 | 594 | ||
| 536 | return ClarifyReview( | 595 | return ClarifyReview( |
| 537 | should_continue=False, | 596 | should_continue=False, |
| 538 | reason_code="clarify_complete", | 597 | reason_code="clarify_complete", |
| 539 | - reason_summary="clarify gathered enough boundaries to proceed", | 598 | + reason_summary=( |
| 599 | + "clarify gathered enough boundaries and completed a bounded pressure pass" | ||
| 600 | + if assessment.pressure_pass_complete | ||
| 601 | + else "clarify gathered enough boundaries to proceed" | ||
| 602 | + ), | ||
| 540 | unresolved_questions=[], | 603 | unresolved_questions=[], |
| 541 | unresolved_slots=[], | 604 | unresolved_slots=[], |
| 542 | focus_slot=None, | 605 | focus_slot=None, |
| 606 | + stage=assessment.stage.value, | ||
| 607 | + pressure_kind=pressure_kind, | ||
| 608 | + pressure_pass_complete=assessment.pressure_pass_complete, | ||
| 609 | + missing_readiness_gates=readiness_gates, | ||
| 543 | ) | 610 | ) |
| 544 | 611 | ||
| 545 | def assess_artifact_freshness( | 612 | def assess_artifact_freshness( |
tests/test_clarify_strategy.pymodified@@ -3,8 +3,10 @@ | |||
| 3 | from __future__ import annotations | 3 | from __future__ import annotations |
| 4 | 4 | ||
| 5 | from loader.runtime.clarify_strategy import ( | 5 | from loader.runtime.clarify_strategy import ( |
| 6 | + ClarifyPressureKind, | ||
| 6 | ClarifySlot, | 7 | ClarifySlot, |
| 7 | ClarifySnapshot, | 8 | ClarifySnapshot, |
| 9 | + ClarifyStage, | ||
| 8 | assess_clarify_snapshot, | 10 | assess_clarify_snapshot, |
| 9 | build_clarify_question, | 11 | build_clarify_question, |
| 10 | ) | 12 | ) |
@@ -33,3 +35,50 @@ def test_build_clarify_question_targets_requested_slot() -> None: | |||
| 33 | 35 | ||
| 34 | assert "out of scope" in question.lower() | 36 | assert "out of scope" in question.lower() |
| 35 | 37 | ||
| 38 | + | ||
| 39 | +def test_assess_clarify_snapshot_requests_tradeoff_pressure_pass_on_later_round() -> None: | ||
| 40 | + assessment = assess_clarify_snapshot( | ||
| 41 | + task="Improve Loader runtime behavior.", | ||
| 42 | + answer="Focus on src/loader/runtime/conversation.py.", | ||
| 43 | + snapshot=ClarifySnapshot( | ||
| 44 | + task_statement="Improve Loader runtime behavior.", | ||
| 45 | + explicit_sections=["desired_outcome", "likely_touchpoints"], | ||
| 46 | + desired_outcome=["Make the runtime flow more disciplined."], | ||
| 47 | + likely_touchpoints=["src/loader/runtime/conversation.py"], | ||
| 48 | + ), | ||
| 49 | + round_index=2, | ||
| 50 | + ) | ||
| 51 | + | ||
| 52 | + assert assessment.stage == ClarifyStage.READINESS | ||
| 53 | + assert assessment.pressure_kind == ClarifyPressureKind.TRADEOFF | ||
| 54 | + assert assessment.pressure_pass_complete is False | ||
| 55 | + assert "non_goals" in assessment.missing_readiness_gates | ||
| 56 | + assert "decision_boundaries" in assessment.missing_readiness_gates | ||
| 57 | + | ||
| 58 | + | ||
| 59 | +def test_assess_clarify_snapshot_marks_pressure_pass_complete_for_boundary_answer() -> None: | ||
| 60 | + assessment = assess_clarify_snapshot( | ||
| 61 | + task="Improve Loader runtime behavior.", | ||
| 62 | + answer="Keep the CLI unchanged and do not broaden the UX without confirming first.", | ||
| 63 | + snapshot=ClarifySnapshot( | ||
| 64 | + task_statement="Improve Loader runtime behavior.", | ||
| 65 | + explicit_sections=["desired_outcome", "non_goals", "decision_boundaries"], | ||
| 66 | + desired_outcome=["Make the runtime flow more disciplined."], | ||
| 67 | + non_goals=["Keep the CLI unchanged."], | ||
| 68 | + decision_boundaries=["Confirm before broad UX changes."], | ||
| 69 | + ), | ||
| 70 | + round_index=2, | ||
| 71 | + ) | ||
| 72 | + | ||
| 73 | + assert assessment.pressure_pass_complete is True | ||
| 74 | + assert "pressure_pass" not in assessment.missing_readiness_gates | ||
| 75 | + | ||
| 76 | + | ||
| 77 | +def test_build_clarify_question_can_render_pressure_pass_question() -> None: | ||
| 78 | + question = build_clarify_question( | ||
| 79 | + "Tighten the runtime behavior.", | ||
| 80 | + ClarifySlot.NON_GOALS, | ||
| 81 | + ClarifyPressureKind.TRADEOFF, | ||
| 82 | + ) | ||
| 83 | + | ||
| 84 | + assert "unchanged" in question.lower() or "avoid" in question.lower() | ||
tests/test_workflow_policy.pymodified@@ -91,6 +91,29 @@ def test_workflow_policy_requests_follow_up_when_clarify_answer_is_still_ambiguo | |||
| 91 | assert review.focus_slot == "likely_touchpoints" | 91 | assert review.focus_slot == "likely_touchpoints" |
| 92 | 92 | ||
| 93 | 93 | ||
| 94 | +def test_workflow_policy_requests_pressure_pass_on_later_clarify_round() -> None: | ||
| 95 | + policy = WorkflowPolicy() | ||
| 96 | + | ||
| 97 | + review = policy.review_clarify( | ||
| 98 | + task="Improve Loader runtime behavior.", | ||
| 99 | + answer="Focus on src/loader/runtime/conversation.py.", | ||
| 100 | + snapshot=ClarifySnapshot( | ||
| 101 | + task_statement="Improve Loader runtime behavior.", | ||
| 102 | + explicit_sections=["desired_outcome", "likely_touchpoints"], | ||
| 103 | + desired_outcome=["Make the runtime flow more disciplined."], | ||
| 104 | + likely_touchpoints=["src/loader/runtime/conversation.py"], | ||
| 105 | + ), | ||
| 106 | + round_index=2, | ||
| 107 | + max_rounds=4, | ||
| 108 | + ) | ||
| 109 | + | ||
| 110 | + assert review.should_continue is True | ||
| 111 | + assert review.reason_code == "clarify_pressure_pass_required" | ||
| 112 | + assert review.stage == "readiness" | ||
| 113 | + assert review.pressure_kind == "tradeoff" | ||
| 114 | + assert review.pressure_pass_complete is False | ||
| 115 | + | ||
| 116 | + | ||
| 94 | def test_workflow_timeline_entry_round_trips() -> None: | 117 | def test_workflow_timeline_entry_round_trips() -> None: |
| 95 | entry = WorkflowTimelineEntry( | 118 | entry = WorkflowTimelineEntry( |
| 96 | timestamp="2026-04-07T12:00:00Z", | 119 | timestamp="2026-04-07T12:00:00Z", |