Add pressure-pass clarify reviews
- SHA
68fd28c6038d3de538f2b9e3a0e71b851f0895d0- Parents
-
67878a4 - Tree
06f6b37
68fd28c
68fd28c6038d3de538f2b9e3a0e71b851f0895d067878a4
06f6b37| Status | File | + | - |
|---|---|---|---|
| M |
src/loader/runtime/clarify_strategy.py
|
239 | 3 |
| M |
src/loader/runtime/workflow_lanes.py
|
30 | 3 |
| M |
src/loader/runtime/workflow_policy.py
|
69 | 2 |
| M |
tests/test_clarify_strategy.py
|
49 | 0 |
| M |
tests/test_workflow_policy.py
|
23 | 0 |
src/loader/runtime/clarify_strategy.pymodified@@ -18,6 +18,22 @@ class ClarifySlot(StrEnum): | ||
| 18 | 18 | LIKELY_TOUCHPOINTS = "likely_touchpoints" |
| 19 | 19 | |
| 20 | 20 | |
| 21 | +class ClarifyStage(StrEnum): | |
| 22 | + """High-level interview stage for bounded clarify mode.""" | |
| 23 | + | |
| 24 | + INTENT = "intent" | |
| 25 | + BOUNDARIES = "boundaries" | |
| 26 | + READINESS = "readiness" | |
| 27 | + | |
| 28 | + | |
| 29 | +class ClarifyPressureKind(StrEnum): | |
| 30 | + """Which kind of pressure pass the next clarify round should apply.""" | |
| 31 | + | |
| 32 | + EXAMPLE = "example" | |
| 33 | + TRADEOFF = "tradeoff" | |
| 34 | + ASSUMPTION = "assumption" | |
| 35 | + | |
| 36 | + | |
| 21 | 37 | _DEFAULT_SLOT_ORDER = [ |
| 22 | 38 | ClarifySlot.DESIRED_OUTCOME, |
| 23 | 39 | ClarifySlot.NON_GOALS, |
@@ -58,6 +74,10 @@ class ClarifyAssessment: | ||
| 58 | 74 | unresolved_slots: list[ClarifySlot] = field(default_factory=list) |
| 59 | 75 | unresolved_questions: list[str] = field(default_factory=list) |
| 60 | 76 | focus_slot: ClarifySlot | None = None |
| 77 | + stage: ClarifyStage = ClarifyStage.INTENT | |
| 78 | + pressure_kind: ClarifyPressureKind | None = None | |
| 79 | + pressure_pass_complete: bool = False | |
| 80 | + missing_readiness_gates: list[str] = field(default_factory=list) | |
| 61 | 81 | |
| 62 | 82 | |
| 63 | 83 | def assess_clarify_snapshot( |
@@ -65,6 +85,8 @@ def assess_clarify_snapshot( | ||
| 65 | 85 | task: str, |
| 66 | 86 | answer: str, |
| 67 | 87 | snapshot: ClarifySnapshot, |
| 88 | + round_index: int = 1, | |
| 89 | + pressure_pass_complete: bool = False, | |
| 68 | 90 | ) -> ClarifyAssessment: |
| 69 | 91 | """Determine which clarify slots remain unresolved after one round.""" |
| 70 | 92 | |
@@ -74,6 +96,17 @@ def assess_clarify_snapshot( | ||
| 74 | 96 | normalized_answer = answer.strip() |
| 75 | 97 | answer_is_short = len(re.findall(r"\w+", normalized_answer)) < 4 |
| 76 | 98 | answer_is_broad = _answer_uses_broad_language(normalized_answer) |
| 99 | + effective_pressure_pass_complete = ( | |
| 100 | + pressure_pass_complete or _answer_demonstrates_pressure_pass(normalized_answer) | |
| 101 | + ) | |
| 102 | + missing_readiness_gates: list[str] = [] | |
| 103 | + | |
| 104 | + non_goals_explicit = ClarifySlot.NON_GOALS.value in explicit and bool( | |
| 105 | + [item for item in snapshot.non_goals if item.strip()] | |
| 106 | + ) | |
| 107 | + decision_boundaries_explicit = ClarifySlot.DECISION_BOUNDARIES.value in explicit and bool( | |
| 108 | + [item for item in snapshot.decision_boundaries if item.strip()] | |
| 109 | + ) | |
| 77 | 110 | |
| 78 | 111 | if not normalized_answer: |
| 79 | 112 | unresolved_questions.append( |
@@ -89,7 +122,7 @@ def assess_clarify_snapshot( | ||
| 89 | 122 | unresolved_questions.append( |
| 90 | 123 | "The desired outcome is still not explicit enough to guide execution." |
| 91 | 124 | ) |
| 92 | - if ClarifySlot.NON_GOALS.value not in explicit or any( | |
| 125 | + if not non_goals_explicit or any( | |
| 93 | 126 | "anything not confirmed" in item.lower() for item in snapshot.non_goals |
| 94 | 127 | ): |
| 95 | 128 | unresolved_slots.append(ClarifySlot.NON_GOALS) |
@@ -109,7 +142,7 @@ def assess_clarify_snapshot( | ||
| 109 | 142 | unresolved_questions.append( |
| 110 | 143 | "Constraints are still too implicit for a safe implementation pass." |
| 111 | 144 | ) |
| 112 | - if ClarifySlot.DECISION_BOUNDARIES.value not in explicit: | |
| 145 | + if not decision_boundaries_explicit: | |
| 113 | 146 | unresolved_slots.append(ClarifySlot.DECISION_BOUNDARIES) |
| 114 | 147 | unresolved_questions.append( |
| 115 | 148 | "Decision boundaries are still too fuzzy for autonomous execution." |
@@ -131,14 +164,54 @@ def assess_clarify_snapshot( | ||
| 131 | 164 | unresolved_questions.append( |
| 132 | 165 | "The clarified scope still uses broad or ambiguous language." |
| 133 | 166 | ) |
| 167 | + | |
| 168 | + if not non_goals_explicit: | |
| 169 | + missing_readiness_gates.append("non_goals") | |
| 170 | + if not decision_boundaries_explicit: | |
| 171 | + missing_readiness_gates.append("decision_boundaries") | |
| 172 | + if round_index >= 2 and not effective_pressure_pass_complete: | |
| 173 | + missing_readiness_gates.append("pressure_pass") | |
| 174 | + | |
| 175 | + pressure_kind = _choose_pressure_kind( | |
| 176 | + round_index=round_index, | |
| 177 | + answer_is_broad=answer_is_broad, | |
| 178 | + missing_readiness_gates=missing_readiness_gates, | |
| 179 | + pressure_pass_complete=effective_pressure_pass_complete, | |
| 180 | + unresolved_slots=ordered_slots, | |
| 181 | + ) | |
| 182 | + if pressure_kind == ClarifyPressureKind.EXAMPLE: | |
| 183 | + unresolved_questions.append( | |
| 184 | + "Loader still needs a concrete example or counterexample before planning." | |
| 185 | + ) | |
| 186 | + elif pressure_kind == ClarifyPressureKind.TRADEOFF: | |
| 187 | + unresolved_questions.append( | |
| 188 | + "Loader still needs an explicit tradeoff or stop boundary before planning." | |
| 189 | + ) | |
| 190 | + elif pressure_kind == ClarifyPressureKind.ASSUMPTION: | |
| 191 | + unresolved_questions.append( | |
| 192 | + "Loader still needs one challenged assumption before it should proceed." | |
| 193 | + ) | |
| 194 | + | |
| 195 | + stage = _resolve_stage( | |
| 196 | + unresolved_slots=ordered_slots, | |
| 197 | + missing_readiness_gates=missing_readiness_gates, | |
| 198 | + ) | |
| 134 | 199 | return ClarifyAssessment( |
| 135 | 200 | unresolved_slots=ordered_slots, |
| 136 | 201 | unresolved_questions=list(dict.fromkeys(unresolved_questions)), |
| 137 | 202 | focus_slot=ordered_slots[0] if ordered_slots else None, |
| 203 | + stage=stage, | |
| 204 | + pressure_kind=pressure_kind, | |
| 205 | + pressure_pass_complete=effective_pressure_pass_complete, | |
| 206 | + missing_readiness_gates=list(dict.fromkeys(missing_readiness_gates)), | |
| 138 | 207 | ) |
| 139 | 208 | |
| 140 | 209 | |
| 141 | -def build_clarify_question(task: str, focus_slot: ClarifySlot | str | None) -> str: | |
| 210 | +def build_clarify_question( | |
| 211 | + task: str, | |
| 212 | + focus_slot: ClarifySlot | str | None, | |
| 213 | + pressure_kind: ClarifyPressureKind | str | None = None, | |
| 214 | +) -> str: | |
| 142 | 215 | """Render one targeted question for the current clarify focus slot.""" |
| 143 | 216 | |
| 144 | 217 | slot = ( |
@@ -148,6 +221,93 @@ def build_clarify_question(task: str, focus_slot: ClarifySlot | str | None) -> s | ||
| 148 | 221 | if focus_slot |
| 149 | 222 | else ClarifySlot.DESIRED_OUTCOME |
| 150 | 223 | ) |
| 224 | + pressure = ( | |
| 225 | + pressure_kind | |
| 226 | + if isinstance(pressure_kind, ClarifyPressureKind) | |
| 227 | + else ClarifyPressureKind(pressure_kind) | |
| 228 | + if pressure_kind | |
| 229 | + else None | |
| 230 | + ) | |
| 231 | + | |
| 232 | + if pressure == ClarifyPressureKind.EXAMPLE: | |
| 233 | + prompts = { | |
| 234 | + ClarifySlot.DESIRED_OUTCOME: ( | |
| 235 | + "What is one concrete example of the finished outcome, and one nearby " | |
| 236 | + "result that should still count as out of scope?" | |
| 237 | + ), | |
| 238 | + ClarifySlot.NON_GOALS: ( | |
| 239 | + "What is one tempting broader change I should avoid even if it seems helpful?" | |
| 240 | + ), | |
| 241 | + ClarifySlot.ACCEPTANCE_CRITERIA: ( | |
| 242 | + "What concrete example would prove this is done, and what shortcut " | |
| 243 | + "would still be wrong?" | |
| 244 | + ), | |
| 245 | + ClarifySlot.CONSTRAINTS: ( | |
| 246 | + "What is one concrete invariant I must preserve, and what change would violate it?" | |
| 247 | + ), | |
| 248 | + ClarifySlot.DECISION_BOUNDARIES: ( | |
| 249 | + "Give one example of a choice I may make alone and one example that " | |
| 250 | + "should force me to stop and confirm." | |
| 251 | + ), | |
| 252 | + ClarifySlot.LIKELY_TOUCHPOINTS: ( | |
| 253 | + "Which file should change first, and which nearby file should I " | |
| 254 | + "explicitly leave alone?" | |
| 255 | + ), | |
| 256 | + } | |
| 257 | + return prompts[slot] | |
| 258 | + | |
| 259 | + if pressure == ClarifyPressureKind.TRADEOFF: | |
| 260 | + prompts = { | |
| 261 | + ClarifySlot.DESIRED_OUTCOME: ( | |
| 262 | + "What result matters most here, and what broader improvement should I " | |
| 263 | + "still avoid chasing?" | |
| 264 | + ), | |
| 265 | + ClarifySlot.NON_GOALS: ( | |
| 266 | + "What should stay unchanged even if changing it would make the " | |
| 267 | + "implementation easier?" | |
| 268 | + ), | |
| 269 | + ClarifySlot.ACCEPTANCE_CRITERIA: ( | |
| 270 | + "What outcome would count as success, and what tempting shortcut " | |
| 271 | + "should still count as failure?" | |
| 272 | + ), | |
| 273 | + ClarifySlot.CONSTRAINTS: ( | |
| 274 | + "What must stay true even if it makes the change slower or less sweeping?" | |
| 275 | + ), | |
| 276 | + ClarifySlot.DECISION_BOUNDARIES: ( | |
| 277 | + "Which decision may I take on my own, and which one should I stop " | |
| 278 | + "and confirm before proceeding?" | |
| 279 | + ), | |
| 280 | + ClarifySlot.LIKELY_TOUCHPOINTS: ( | |
| 281 | + "Which file should I focus on, and what file or surface should stay unchanged?" | |
| 282 | + ), | |
| 283 | + } | |
| 284 | + return prompts[slot] | |
| 285 | + | |
| 286 | + if pressure == ClarifyPressureKind.ASSUMPTION: | |
| 287 | + prompts = { | |
| 288 | + ClarifySlot.DESIRED_OUTCOME: ( | |
| 289 | + "What assumption about the desired outcome am I most likely to get " | |
| 290 | + "wrong if I act now?" | |
| 291 | + ), | |
| 292 | + ClarifySlot.NON_GOALS: ( | |
| 293 | + "What assumption about scope should I not make without checking first?" | |
| 294 | + ), | |
| 295 | + ClarifySlot.ACCEPTANCE_CRITERIA: ( | |
| 296 | + "What assumption about 'done' would be risky to make without your confirmation?" | |
| 297 | + ), | |
| 298 | + ClarifySlot.CONSTRAINTS: ( | |
| 299 | + "What assumption about constraints would be unsafe for me to guess?" | |
| 300 | + ), | |
| 301 | + ClarifySlot.DECISION_BOUNDARIES: ( | |
| 302 | + "What decision would be risky for me to assume I can make without checking?" | |
| 303 | + ), | |
| 304 | + ClarifySlot.LIKELY_TOUCHPOINTS: ( | |
| 305 | + "What assumption about the right touchpoint or file would be most " | |
| 306 | + "dangerous if I guessed wrong?" | |
| 307 | + ), | |
| 308 | + } | |
| 309 | + return prompts[slot] | |
| 310 | + | |
| 151 | 311 | prompts = { |
| 152 | 312 | ClarifySlot.DESIRED_OUTCOME: ( |
| 153 | 313 | "What concrete outcome should this change achieve when it's done?" |
@@ -183,6 +343,30 @@ def describe_clarify_slot(slot: ClarifySlot | str | None) -> str: | ||
| 183 | 343 | return _SLOT_LABELS[resolved] |
| 184 | 344 | |
| 185 | 345 | |
| 346 | +def describe_clarify_stage(stage: ClarifyStage | str | None) -> str: | |
| 347 | + """Render a friendly clarify-stage label.""" | |
| 348 | + | |
| 349 | + if stage is None: | |
| 350 | + return "general" | |
| 351 | + resolved = stage if isinstance(stage, ClarifyStage) else ClarifyStage(stage) | |
| 352 | + return resolved.value | |
| 353 | + | |
| 354 | + | |
| 355 | +def describe_clarify_pressure_kind( | |
| 356 | + pressure_kind: ClarifyPressureKind | str | None, | |
| 357 | +) -> str: | |
| 358 | + """Render a friendly pressure-pass label.""" | |
| 359 | + | |
| 360 | + if pressure_kind is None: | |
| 361 | + return "none" | |
| 362 | + resolved = ( | |
| 363 | + pressure_kind | |
| 364 | + if isinstance(pressure_kind, ClarifyPressureKind) | |
| 365 | + else ClarifyPressureKind(pressure_kind) | |
| 366 | + ) | |
| 367 | + return resolved.value | |
| 368 | + | |
| 369 | + | |
| 186 | 370 | def _prioritize_slots( |
| 187 | 371 | slots: list[ClarifySlot], |
| 188 | 372 | *, |
@@ -203,6 +387,35 @@ def _prioritize_slots( | ||
| 203 | 387 | return ordered |
| 204 | 388 | |
| 205 | 389 | |
| 390 | +def _resolve_stage( | |
| 391 | + *, | |
| 392 | + unresolved_slots: list[ClarifySlot], | |
| 393 | + missing_readiness_gates: list[str], | |
| 394 | +) -> ClarifyStage: | |
| 395 | + if missing_readiness_gates: | |
| 396 | + return ClarifyStage.READINESS | |
| 397 | + if ClarifySlot.DESIRED_OUTCOME in unresolved_slots: | |
| 398 | + return ClarifyStage.INTENT | |
| 399 | + return ClarifyStage.BOUNDARIES | |
| 400 | + | |
| 401 | + | |
| 402 | +def _choose_pressure_kind( | |
| 403 | + *, | |
| 404 | + round_index: int, | |
| 405 | + answer_is_broad: bool, | |
| 406 | + missing_readiness_gates: list[str], | |
| 407 | + pressure_pass_complete: bool, | |
| 408 | + unresolved_slots: list[ClarifySlot], | |
| 409 | +) -> ClarifyPressureKind | None: | |
| 410 | + if round_index < 2 or pressure_pass_complete or not unresolved_slots: | |
| 411 | + return None | |
| 412 | + if answer_is_broad: | |
| 413 | + return ClarifyPressureKind.EXAMPLE | |
| 414 | + if any(gate in {"non_goals", "decision_boundaries"} for gate in missing_readiness_gates): | |
| 415 | + return ClarifyPressureKind.TRADEOFF | |
| 416 | + return ClarifyPressureKind.ASSUMPTION | |
| 417 | + | |
| 418 | + | |
| 206 | 419 | def _answer_uses_broad_language(answer: str) -> bool: |
| 207 | 420 | lowered = answer.lower() |
| 208 | 421 | if not lowered: |
@@ -217,6 +430,29 @@ def _answer_uses_broad_language(answer: str) -> bool: | ||
| 217 | 430 | "fix it", |
| 218 | 431 | "something", |
| 219 | 432 | "somehow", |
| 433 | + "maybe", | |
| 434 | + "around there", | |
| 435 | + ) | |
| 436 | + ) | |
| 437 | + | |
| 438 | + | |
| 439 | +def _answer_demonstrates_pressure_pass(answer: str) -> bool: | |
| 440 | + lowered = answer.lower() | |
| 441 | + if not lowered: | |
| 442 | + return False | |
| 443 | + return any( | |
| 444 | + phrase in lowered | |
| 445 | + for phrase in ( | |
| 446 | + "do not", | |
| 447 | + "don't", | |
| 448 | + "keep", | |
| 449 | + "leave", | |
| 450 | + "unchanged", | |
| 451 | + "out of scope", | |
| 452 | + "avoid", | |
| 453 | + "only", | |
| 454 | + "stop and ask", | |
| 455 | + "confirm first", | |
| 220 | 456 | ) |
| 221 | 457 | ) |
| 222 | 458 | |
src/loader/runtime/workflow_lanes.pymodified@@ -9,7 +9,13 @@ from pathlib import Path | ||
| 9 | 9 | from typing import Any |
| 10 | 10 | |
| 11 | 11 | from ..llm.base import Message, Role, ToolCall |
| 12 | -from .clarify_strategy import ClarifySnapshot, build_clarify_question, describe_clarify_slot | |
| 12 | +from .clarify_strategy import ( | |
| 13 | + ClarifySnapshot, | |
| 14 | + build_clarify_question, | |
| 15 | + describe_clarify_pressure_kind, | |
| 16 | + describe_clarify_slot, | |
| 17 | + describe_clarify_stage, | |
| 18 | +) | |
| 13 | 19 | from .dod import DefinitionOfDone, DefinitionOfDoneStore |
| 14 | 20 | from .events import AgentEvent, TurnSummary |
| 15 | 21 | from .executor import ToolExecutor |
@@ -67,6 +73,10 @@ class WorkflowLaneRunner: | ||
| 67 | 73 | reason_summary="clarify gathered enough boundaries to proceed", |
| 68 | 74 | unresolved_slots=[], |
| 69 | 75 | focus_slot=None, |
| 76 | + stage="intent", | |
| 77 | + pressure_kind=None, | |
| 78 | + pressure_pass_complete=False, | |
| 79 | + missing_readiness_gates=[], | |
| 70 | 80 | ) |
| 71 | 81 | |
| 72 | 82 | for round_index in range(1, max_rounds + 1): |
@@ -79,6 +89,8 @@ class WorkflowLaneRunner: | ||
| 79 | 89 | rounds=rounds, |
| 80 | 90 | unresolved_questions=review.unresolved_questions, |
| 81 | 91 | unresolved_slots=review.unresolved_slots, |
| 92 | + stage=review.stage, | |
| 93 | + pressure_kind=review.pressure_kind, | |
| 82 | 94 | ) |
| 83 | 95 | rounds.append((question, answer)) |
| 84 | 96 | review = self.workflow_policy.review_clarify( |
@@ -87,6 +99,7 @@ class WorkflowLaneRunner: | ||
| 87 | 99 | snapshot=self._clarify_snapshot(task, latest_brief), |
| 88 | 100 | round_index=round_index, |
| 89 | 101 | max_rounds=max_rounds, |
| 102 | + pressure_pass_complete=review.pressure_pass_complete, | |
| 90 | 103 | ) |
| 91 | 104 | if review.should_continue: |
| 92 | 105 | append_timeline( |
@@ -301,6 +314,8 @@ class WorkflowLaneRunner: | ||
| 301 | 314 | rounds: list[tuple[str, str]], |
| 302 | 315 | unresolved_questions: list[str], |
| 303 | 316 | unresolved_slots: list[str], |
| 317 | + stage: str | None, | |
| 318 | + pressure_kind: str | None, | |
| 304 | 319 | ) -> tuple[ClarifyBrief, str, str]: |
| 305 | 320 | ask_tool = self.agent.registry.get("AskUserQuestion") |
| 306 | 321 | assert ask_tool is not None |
@@ -311,6 +326,8 @@ class WorkflowLaneRunner: | ||
| 311 | 326 | rounds=rounds, |
| 312 | 327 | unresolved_questions=unresolved_questions, |
| 313 | 328 | unresolved_slots=unresolved_slots, |
| 329 | + stage=stage, | |
| 330 | + pressure_kind=pressure_kind, | |
| 314 | 331 | ), |
| 315 | 332 | tools=[ask_tool.to_schema()], |
| 316 | 333 | max_tokens=500, |
@@ -328,6 +345,7 @@ class WorkflowLaneRunner: | ||
| 328 | 345 | task, |
| 329 | 346 | response.content, |
| 330 | 347 | unresolved_slots, |
| 348 | + pressure_kind, | |
| 331 | 349 | ) |
| 332 | 350 | title = None |
| 333 | 351 | options = None |
@@ -410,6 +428,8 @@ class WorkflowLaneRunner: | ||
| 410 | 428 | rounds: list[tuple[str, str]], |
| 411 | 429 | unresolved_questions: list[str], |
| 412 | 430 | unresolved_slots: list[str], |
| 431 | + stage: str | None, | |
| 432 | + pressure_kind: str | None, | |
| 413 | 433 | ) -> str: |
| 414 | 434 | history_lines = [] |
| 415 | 435 | for index, (question, answer) in enumerate(rounds, start=1): |
@@ -422,13 +442,19 @@ class WorkflowLaneRunner: | ||
| 422 | 442 | unresolved = "\n".join(f"- {item}" for item in unresolved_questions) or "- none" |
| 423 | 443 | focus_slot = unresolved_slots[0] if unresolved_slots else None |
| 424 | 444 | focus_label = describe_clarify_slot(focus_slot) |
| 445 | + stage_label = describe_clarify_stage(stage) | |
| 446 | + pressure_label = describe_clarify_pressure_kind(pressure_kind) | |
| 425 | 447 | return ( |
| 426 | 448 | "Clarify the task before planning or implementation.\n\n" |
| 427 | 449 | f"Task: {task}\n" |
| 428 | 450 | f"Round: {round_index}\n" |
| 451 | + f"Stage: {stage_label}\n" | |
| 429 | 452 | f"Focus slot: {focus_label}\n" |
| 453 | + f"Pressure pass: {pressure_label}\n" | |
| 430 | 454 | "Ask exactly one focused question via AskUserQuestion.\n" |
| 431 | - "Use the unresolved questions and prior answers to tighten scope.\n\n" | |
| 455 | + "Use the unresolved questions and prior answers to tighten scope.\n" | |
| 456 | + "If a pressure pass is active, prefer examples, tradeoffs, or " | |
| 457 | + "challenged assumptions over generic restatement.\n\n" | |
| 432 | 458 | "Unresolved questions:\n" |
| 433 | 459 | f"{unresolved}\n\n" |
| 434 | 460 | "Prior clarify history:\n" |
@@ -516,12 +542,13 @@ class WorkflowLaneRunner: | ||
| 516 | 542 | task: str, |
| 517 | 543 | response_content: str, |
| 518 | 544 | unresolved_slots: list[str], |
| 545 | + pressure_kind: str | None, | |
| 519 | 546 | ) -> str: |
| 520 | 547 | match = re.search(r"([A-Z][^?]+\?)", response_content) |
| 521 | 548 | if match: |
| 522 | 549 | return match.group(1).strip() |
| 523 | 550 | focus_slot = unresolved_slots[0] if unresolved_slots else None |
| 524 | - return build_clarify_question(task, focus_slot) | |
| 551 | + return build_clarify_question(task, focus_slot, pressure_kind) | |
| 525 | 552 | |
| 526 | 553 | @staticmethod |
| 527 | 554 | def _clarify_snapshot(task: str, brief: ClarifyBrief) -> ClarifySnapshot: |
src/loader/runtime/workflow_policy.pymodified@@ -9,7 +9,12 @@ from enum import StrEnum | ||
| 9 | 9 | from pathlib import Path |
| 10 | 10 | from typing import Any |
| 11 | 11 | |
| 12 | -from .clarify_strategy import ClarifySnapshot, assess_clarify_snapshot, describe_clarify_slot | |
| 12 | +from .clarify_strategy import ( | |
| 13 | + ClarifySnapshot, | |
| 14 | + assess_clarify_snapshot, | |
| 15 | + describe_clarify_pressure_kind, | |
| 16 | + describe_clarify_slot, | |
| 17 | +) | |
| 13 | 18 | from .workflow_signals import WorkflowSignalExtractor, WorkflowSignalPacket |
| 14 | 19 | |
| 15 | 20 | |
@@ -172,6 +177,10 @@ class ClarifyReview: | ||
| 172 | 177 | unresolved_questions: list[str] = field(default_factory=list) |
| 173 | 178 | unresolved_slots: list[str] = field(default_factory=list) |
| 174 | 179 | focus_slot: str | None = None |
| 180 | + stage: str | None = None | |
| 181 | + pressure_kind: str | None = None | |
| 182 | + pressure_pass_complete: bool = False | |
| 183 | + missing_readiness_gates: list[str] = field(default_factory=list) | |
| 175 | 184 | |
| 176 | 185 | |
| 177 | 186 | @dataclass(slots=True) |
@@ -498,6 +507,7 @@ class WorkflowPolicy: | ||
| 498 | 507 | snapshot: ClarifySnapshot, |
| 499 | 508 | round_index: int, |
| 500 | 509 | max_rounds: int, |
| 510 | + pressure_pass_complete: bool = False, | |
| 501 | 511 | ) -> ClarifyReview: |
| 502 | 512 | """Determine whether clarify should continue for another round.""" |
| 503 | 513 | |
@@ -505,12 +515,36 @@ class WorkflowPolicy: | ||
| 505 | 515 | task=task, |
| 506 | 516 | answer=answer, |
| 507 | 517 | snapshot=snapshot, |
| 518 | + round_index=round_index, | |
| 519 | + pressure_pass_complete=pressure_pass_complete, | |
| 508 | 520 | ) |
| 509 | 521 | unresolved = list(assessment.unresolved_questions) |
| 510 | 522 | focus_slot = assessment.focus_slot.value if assessment.focus_slot else None |
| 511 | 523 | focus_label = describe_clarify_slot(assessment.focus_slot) |
| 524 | + pressure_kind = ( | |
| 525 | + assessment.pressure_kind.value if assessment.pressure_kind is not None else None | |
| 526 | + ) | |
| 527 | + pressure_label = describe_clarify_pressure_kind(assessment.pressure_kind) | |
| 528 | + readiness_gates = list(assessment.missing_readiness_gates) | |
| 512 | 529 | |
| 513 | 530 | if unresolved and round_index < max_rounds: |
| 531 | + if assessment.pressure_kind is not None: | |
| 532 | + return ClarifyReview( | |
| 533 | + should_continue=True, | |
| 534 | + reason_code="clarify_pressure_pass_required", | |
| 535 | + reason_summary=( | |
| 536 | + "clarify still needs a " | |
| 537 | + f"{pressure_label} pass around {focus_label}" | |
| 538 | + ), | |
| 539 | + unresolved_questions=unresolved, | |
| 540 | + unresolved_slots=[slot.value for slot in assessment.unresolved_slots], | |
| 541 | + focus_slot=focus_slot, | |
| 542 | + stage=assessment.stage.value, | |
| 543 | + pressure_kind=pressure_kind, | |
| 544 | + pressure_pass_complete=assessment.pressure_pass_complete, | |
| 545 | + missing_readiness_gates=readiness_gates, | |
| 546 | + ) | |
| 547 | + | |
| 514 | 548 | return ClarifyReview( |
| 515 | 549 | should_continue=True, |
| 516 | 550 | reason_code="clarify_follow_up_needed", |
@@ -521,9 +555,30 @@ class WorkflowPolicy: | ||
| 521 | 555 | unresolved_questions=unresolved, |
| 522 | 556 | unresolved_slots=[slot.value for slot in assessment.unresolved_slots], |
| 523 | 557 | focus_slot=focus_slot, |
| 558 | + stage=assessment.stage.value, | |
| 559 | + pressure_kind=pressure_kind, | |
| 560 | + pressure_pass_complete=assessment.pressure_pass_complete, | |
| 561 | + missing_readiness_gates=readiness_gates, | |
| 524 | 562 | ) |
| 525 | 563 | |
| 526 | 564 | if unresolved: |
| 565 | + if not assessment.pressure_pass_complete and round_index >= 2: | |
| 566 | + return ClarifyReview( | |
| 567 | + should_continue=False, | |
| 568 | + reason_code="clarify_budget_exhausted_without_pressure_pass", | |
| 569 | + reason_summary=( | |
| 570 | + "clarify budget exhausted before Loader completed a " | |
| 571 | + "bounded pressure pass" | |
| 572 | + ), | |
| 573 | + unresolved_questions=unresolved, | |
| 574 | + unresolved_slots=[slot.value for slot in assessment.unresolved_slots], | |
| 575 | + focus_slot=focus_slot, | |
| 576 | + stage=assessment.stage.value, | |
| 577 | + pressure_kind=pressure_kind, | |
| 578 | + pressure_pass_complete=assessment.pressure_pass_complete, | |
| 579 | + missing_readiness_gates=readiness_gates, | |
| 580 | + ) | |
| 581 | + | |
| 527 | 582 | return ClarifyReview( |
| 528 | 583 | should_continue=False, |
| 529 | 584 | reason_code="clarify_budget_exhausted", |
@@ -531,15 +586,27 @@ class WorkflowPolicy: | ||
| 531 | 586 | unresolved_questions=unresolved, |
| 532 | 587 | unresolved_slots=[slot.value for slot in assessment.unresolved_slots], |
| 533 | 588 | focus_slot=focus_slot, |
| 589 | + stage=assessment.stage.value, | |
| 590 | + pressure_kind=pressure_kind, | |
| 591 | + pressure_pass_complete=assessment.pressure_pass_complete, | |
| 592 | + missing_readiness_gates=readiness_gates, | |
| 534 | 593 | ) |
| 535 | 594 | |
| 536 | 595 | return ClarifyReview( |
| 537 | 596 | should_continue=False, |
| 538 | 597 | reason_code="clarify_complete", |
| 539 | - reason_summary="clarify gathered enough boundaries to proceed", | |
| 598 | + reason_summary=( | |
| 599 | + "clarify gathered enough boundaries and completed a bounded pressure pass" | |
| 600 | + if assessment.pressure_pass_complete | |
| 601 | + else "clarify gathered enough boundaries to proceed" | |
| 602 | + ), | |
| 540 | 603 | unresolved_questions=[], |
| 541 | 604 | unresolved_slots=[], |
| 542 | 605 | focus_slot=None, |
| 606 | + stage=assessment.stage.value, | |
| 607 | + pressure_kind=pressure_kind, | |
| 608 | + pressure_pass_complete=assessment.pressure_pass_complete, | |
| 609 | + missing_readiness_gates=readiness_gates, | |
| 543 | 610 | ) |
| 544 | 611 | |
| 545 | 612 | def assess_artifact_freshness( |
tests/test_clarify_strategy.pymodified@@ -3,8 +3,10 @@ | ||
| 3 | 3 | from __future__ import annotations |
| 4 | 4 | |
| 5 | 5 | from loader.runtime.clarify_strategy import ( |
| 6 | + ClarifyPressureKind, | |
| 6 | 7 | ClarifySlot, |
| 7 | 8 | ClarifySnapshot, |
| 9 | + ClarifyStage, | |
| 8 | 10 | assess_clarify_snapshot, |
| 9 | 11 | build_clarify_question, |
| 10 | 12 | ) |
@@ -33,3 +35,50 @@ def test_build_clarify_question_targets_requested_slot() -> None: | ||
| 33 | 35 | |
| 34 | 36 | assert "out of scope" in question.lower() |
| 35 | 37 | |
| 38 | + | |
| 39 | +def test_assess_clarify_snapshot_requests_tradeoff_pressure_pass_on_later_round() -> None: | |
| 40 | + assessment = assess_clarify_snapshot( | |
| 41 | + task="Improve Loader runtime behavior.", | |
| 42 | + answer="Focus on src/loader/runtime/conversation.py.", | |
| 43 | + snapshot=ClarifySnapshot( | |
| 44 | + task_statement="Improve Loader runtime behavior.", | |
| 45 | + explicit_sections=["desired_outcome", "likely_touchpoints"], | |
| 46 | + desired_outcome=["Make the runtime flow more disciplined."], | |
| 47 | + likely_touchpoints=["src/loader/runtime/conversation.py"], | |
| 48 | + ), | |
| 49 | + round_index=2, | |
| 50 | + ) | |
| 51 | + | |
| 52 | + assert assessment.stage == ClarifyStage.READINESS | |
| 53 | + assert assessment.pressure_kind == ClarifyPressureKind.TRADEOFF | |
| 54 | + assert assessment.pressure_pass_complete is False | |
| 55 | + assert "non_goals" in assessment.missing_readiness_gates | |
| 56 | + assert "decision_boundaries" in assessment.missing_readiness_gates | |
| 57 | + | |
| 58 | + | |
| 59 | +def test_assess_clarify_snapshot_marks_pressure_pass_complete_for_boundary_answer() -> None: | |
| 60 | + assessment = assess_clarify_snapshot( | |
| 61 | + task="Improve Loader runtime behavior.", | |
| 62 | + answer="Keep the CLI unchanged and do not broaden the UX without confirming first.", | |
| 63 | + snapshot=ClarifySnapshot( | |
| 64 | + task_statement="Improve Loader runtime behavior.", | |
| 65 | + explicit_sections=["desired_outcome", "non_goals", "decision_boundaries"], | |
| 66 | + desired_outcome=["Make the runtime flow more disciplined."], | |
| 67 | + non_goals=["Keep the CLI unchanged."], | |
| 68 | + decision_boundaries=["Confirm before broad UX changes."], | |
| 69 | + ), | |
| 70 | + round_index=2, | |
| 71 | + ) | |
| 72 | + | |
| 73 | + assert assessment.pressure_pass_complete is True | |
| 74 | + assert "pressure_pass" not in assessment.missing_readiness_gates | |
| 75 | + | |
| 76 | + | |
| 77 | +def test_build_clarify_question_can_render_pressure_pass_question() -> None: | |
| 78 | + question = build_clarify_question( | |
| 79 | + "Tighten the runtime behavior.", | |
| 80 | + ClarifySlot.NON_GOALS, | |
| 81 | + ClarifyPressureKind.TRADEOFF, | |
| 82 | + ) | |
| 83 | + | |
| 84 | + assert "unchanged" in question.lower() or "avoid" in question.lower() | |
tests/test_workflow_policy.pymodified@@ -91,6 +91,29 @@ def test_workflow_policy_requests_follow_up_when_clarify_answer_is_still_ambiguo | ||
| 91 | 91 | assert review.focus_slot == "likely_touchpoints" |
| 92 | 92 | |
| 93 | 93 | |
| 94 | +def test_workflow_policy_requests_pressure_pass_on_later_clarify_round() -> None: | |
| 95 | + policy = WorkflowPolicy() | |
| 96 | + | |
| 97 | + review = policy.review_clarify( | |
| 98 | + task="Improve Loader runtime behavior.", | |
| 99 | + answer="Focus on src/loader/runtime/conversation.py.", | |
| 100 | + snapshot=ClarifySnapshot( | |
| 101 | + task_statement="Improve Loader runtime behavior.", | |
| 102 | + explicit_sections=["desired_outcome", "likely_touchpoints"], | |
| 103 | + desired_outcome=["Make the runtime flow more disciplined."], | |
| 104 | + likely_touchpoints=["src/loader/runtime/conversation.py"], | |
| 105 | + ), | |
| 106 | + round_index=2, | |
| 107 | + max_rounds=4, | |
| 108 | + ) | |
| 109 | + | |
| 110 | + assert review.should_continue is True | |
| 111 | + assert review.reason_code == "clarify_pressure_pass_required" | |
| 112 | + assert review.stage == "readiness" | |
| 113 | + assert review.pressure_kind == "tradeoff" | |
| 114 | + assert review.pressure_pass_complete is False | |
| 115 | + | |
| 116 | + | |
| 94 | 117 | def test_workflow_timeline_entry_round_trips() -> None: |
| 95 | 118 | entry = WorkflowTimelineEntry( |
| 96 | 119 | timestamp="2026-04-07T12:00:00Z", |