| 1 | """Workflow policy, clarify review, and timeline contracts.""" |
| 2 | |
| 3 | from __future__ import annotations |
| 4 | |
| 5 | import re |
| 6 | from dataclasses import dataclass, field |
| 7 | from datetime import UTC, datetime |
| 8 | from enum import StrEnum |
| 9 | from pathlib import Path |
| 10 | from typing import Any |
| 11 | |
| 12 | from .clarify_strategy import ( |
| 13 | ClarifySnapshot, |
| 14 | assess_clarify_snapshot, |
| 15 | describe_clarify_pressure_kind, |
| 16 | describe_clarify_slot, |
| 17 | ) |
| 18 | from .evidence_provenance import ( |
| 19 | EvidenceProvenance, |
| 20 | normalize_evidence_provenance, |
| 21 | summarize_evidence_provenance, |
| 22 | ) |
| 23 | from .verification_observations import ( |
| 24 | VerificationObservation, |
| 25 | normalize_verification_observations, |
| 26 | summarize_verification_observations, |
| 27 | ) |
| 28 | from .workflow_signals import WorkflowSignalExtractor, WorkflowSignalPacket |
| 29 | |
| 30 | |
| 31 | class WorkflowMode(StrEnum): |
| 32 | """High-level runtime modes for one Loader task turn.""" |
| 33 | |
| 34 | CLARIFY = "clarify" |
| 35 | PLAN = "plan" |
| 36 | EXECUTE = "execute" |
| 37 | VERIFY = "verify" |
| 38 | |
| 39 | @classmethod |
| 40 | def from_str(cls, value: str | None) -> WorkflowMode | None: |
| 41 | if value is None: |
| 42 | return None |
| 43 | normalized = value.strip().lower() |
| 44 | for mode in cls: |
| 45 | if mode.value == normalized: |
| 46 | return mode |
| 47 | raise ValueError(f"Unknown workflow mode: {value}") |
| 48 | |
| 49 | |
| 50 | class WorkflowDecisionKind(StrEnum): |
| 51 | """Classification for why a workflow mode was selected.""" |
| 52 | |
| 53 | INITIAL_ROUTE = "initial_route" |
| 54 | REQUESTED = "requested" |
| 55 | ARTIFACT_REUSE = "artifact_reuse" |
| 56 | HANDOFF = "handoff" |
| 57 | REENTRY = "reentry" |
| 58 | FORCED = "forced" |
| 59 | |
| 60 | |
| 61 | class WorkflowTimelineEntryKind(StrEnum): |
| 62 | """Workflow timeline entry categories.""" |
| 63 | |
| 64 | ROUTE = "route" |
| 65 | HANDOFF = "handoff" |
| 66 | REENTRY = "reentry" |
| 67 | CLARIFY_CONTINUE = "clarify_continue" |
| 68 | CLARIFY_EXIT = "clarify_exit" |
| 69 | PLAN_REFRESH = "plan_refresh" |
| 70 | VERIFY_OBSERVATION = "verify_observation" |
| 71 | VERIFY_SKIP = "verify_skip" |
| 72 | COMPLETION_CHECK = "completion_check" |
| 73 | COMPLETION_CONTINUE = "completion_continue" |
| 74 | COMPLETION_COMPLETE = "completion_complete" |
| 75 | COMPLETION_FINALIZE = "completion_finalize" |
| 76 | REPAIR_RETRY = "repair_retry" |
| 77 | REPAIR_FAIL = "repair_fail" |
| 78 | |
| 79 | |
| 80 | @dataclass(slots=True) |
| 81 | class ModeDecision: |
| 82 | """Workflow-policy output for one route or handoff.""" |
| 83 | |
| 84 | mode: WorkflowMode |
| 85 | reason_code: str |
| 86 | reason_summary: str |
| 87 | decision_kind: WorkflowDecisionKind = WorkflowDecisionKind.INITIAL_ROUTE |
| 88 | ambiguity_score: float = 0.0 |
| 89 | complexity_score: float = 0.0 |
| 90 | route_score: float = 0.0 |
| 91 | runner_up_mode: WorkflowMode | None = None |
| 92 | runner_up_score: float = 0.0 |
| 93 | scheduled_next_mode: WorkflowMode | None = None |
| 94 | unresolved_questions: list[str] = field(default_factory=list) |
| 95 | pressure_summary: list[str] = field(default_factory=list) |
| 96 | signal_summary: list[str] = field(default_factory=list) |
| 97 | evidence_summary: list[str] = field(default_factory=list) |
| 98 | clarify_stage: str | None = None |
| 99 | clarify_pressure_kind: str | None = None |
| 100 | pressure_pass_complete: bool = False |
| 101 | missing_readiness_gates: list[str] = field(default_factory=list) |
| 102 | |
| 103 | @property |
| 104 | def reason(self) -> str: |
| 105 | return self.reason_summary |
| 106 | |
| 107 | @classmethod |
| 108 | def transition( |
| 109 | cls, |
| 110 | mode: WorkflowMode, |
| 111 | *, |
| 112 | reason_code: str, |
| 113 | reason_summary: str, |
| 114 | decision_kind: WorkflowDecisionKind = WorkflowDecisionKind.HANDOFF, |
| 115 | ambiguity_score: float = 0.0, |
| 116 | complexity_score: float = 0.0, |
| 117 | route_score: float = 0.0, |
| 118 | runner_up_mode: WorkflowMode | None = None, |
| 119 | runner_up_score: float = 0.0, |
| 120 | scheduled_next_mode: WorkflowMode | None = None, |
| 121 | unresolved_questions: list[str] | None = None, |
| 122 | pressure_summary: list[str] | None = None, |
| 123 | signal_summary: list[str] | None = None, |
| 124 | evidence_summary: list[str] | None = None, |
| 125 | clarify_stage: str | None = None, |
| 126 | clarify_pressure_kind: str | None = None, |
| 127 | pressure_pass_complete: bool = False, |
| 128 | missing_readiness_gates: list[str] | None = None, |
| 129 | ) -> ModeDecision: |
| 130 | """Build a non-router workflow decision for handoffs and reentry.""" |
| 131 | |
| 132 | return cls( |
| 133 | mode=mode, |
| 134 | reason_code=reason_code, |
| 135 | reason_summary=reason_summary, |
| 136 | decision_kind=decision_kind, |
| 137 | ambiguity_score=ambiguity_score, |
| 138 | complexity_score=complexity_score, |
| 139 | route_score=route_score, |
| 140 | runner_up_mode=runner_up_mode, |
| 141 | runner_up_score=runner_up_score, |
| 142 | scheduled_next_mode=scheduled_next_mode, |
| 143 | unresolved_questions=list(unresolved_questions or []), |
| 144 | pressure_summary=list(pressure_summary or []), |
| 145 | signal_summary=list(signal_summary or []), |
| 146 | evidence_summary=list(evidence_summary or []), |
| 147 | clarify_stage=clarify_stage, |
| 148 | clarify_pressure_kind=clarify_pressure_kind, |
| 149 | pressure_pass_complete=pressure_pass_complete, |
| 150 | missing_readiness_gates=list(missing_readiness_gates or []), |
| 151 | ) |
| 152 | |
| 153 | def with_context( |
| 154 | self, |
| 155 | *, |
| 156 | reason_code: str | None = None, |
| 157 | reason_summary: str | None = None, |
| 158 | decision_kind: WorkflowDecisionKind | None = None, |
| 159 | route_score: float | None = None, |
| 160 | runner_up_mode: WorkflowMode | None = None, |
| 161 | runner_up_score: float | None = None, |
| 162 | scheduled_next_mode: WorkflowMode | None = None, |
| 163 | unresolved_questions: list[str] | None = None, |
| 164 | pressure_summary: list[str] | None = None, |
| 165 | signal_summary: list[str] | None = None, |
| 166 | evidence_summary: list[str] | None = None, |
| 167 | clarify_stage: str | None = None, |
| 168 | clarify_pressure_kind: str | None = None, |
| 169 | pressure_pass_complete: bool | None = None, |
| 170 | missing_readiness_gates: list[str] | None = None, |
| 171 | ) -> ModeDecision: |
| 172 | """Return a copy with updated contextual routing metadata.""" |
| 173 | |
| 174 | return ModeDecision( |
| 175 | mode=self.mode, |
| 176 | reason_code=reason_code or self.reason_code, |
| 177 | reason_summary=reason_summary or self.reason_summary, |
| 178 | decision_kind=decision_kind or self.decision_kind, |
| 179 | ambiguity_score=self.ambiguity_score, |
| 180 | complexity_score=self.complexity_score, |
| 181 | route_score=self.route_score if route_score is None else route_score, |
| 182 | runner_up_mode=self.runner_up_mode if runner_up_mode is None else runner_up_mode, |
| 183 | runner_up_score=( |
| 184 | self.runner_up_score if runner_up_score is None else runner_up_score |
| 185 | ), |
| 186 | scheduled_next_mode=( |
| 187 | self.scheduled_next_mode |
| 188 | if scheduled_next_mode is None |
| 189 | else scheduled_next_mode |
| 190 | ), |
| 191 | unresolved_questions=list( |
| 192 | self.unresolved_questions |
| 193 | if unresolved_questions is None |
| 194 | else unresolved_questions |
| 195 | ), |
| 196 | pressure_summary=list( |
| 197 | self.pressure_summary |
| 198 | if pressure_summary is None |
| 199 | else pressure_summary |
| 200 | ), |
| 201 | signal_summary=list( |
| 202 | self.signal_summary if signal_summary is None else signal_summary |
| 203 | ), |
| 204 | evidence_summary=list( |
| 205 | self.evidence_summary |
| 206 | if evidence_summary is None |
| 207 | else evidence_summary |
| 208 | ), |
| 209 | clarify_stage=( |
| 210 | self.clarify_stage if clarify_stage is None else clarify_stage |
| 211 | ), |
| 212 | clarify_pressure_kind=( |
| 213 | self.clarify_pressure_kind |
| 214 | if clarify_pressure_kind is None |
| 215 | else clarify_pressure_kind |
| 216 | ), |
| 217 | pressure_pass_complete=( |
| 218 | self.pressure_pass_complete |
| 219 | if pressure_pass_complete is None |
| 220 | else pressure_pass_complete |
| 221 | ), |
| 222 | missing_readiness_gates=list( |
| 223 | self.missing_readiness_gates |
| 224 | if missing_readiness_gates is None |
| 225 | else missing_readiness_gates |
| 226 | ), |
| 227 | ) |
| 228 | |
| 229 | |
| 230 | @dataclass(slots=True) |
| 231 | class ClarifyReview: |
| 232 | """Outcome of one clarify-round review.""" |
| 233 | |
| 234 | should_continue: bool |
| 235 | reason_code: str |
| 236 | reason_summary: str |
| 237 | unresolved_questions: list[str] = field(default_factory=list) |
| 238 | unresolved_slots: list[str] = field(default_factory=list) |
| 239 | focus_slot: str | None = None |
| 240 | stage: str | None = None |
| 241 | pressure_kind: str | None = None |
| 242 | pressure_pass_complete: bool = False |
| 243 | missing_readiness_gates: list[str] = field(default_factory=list) |
| 244 | |
| 245 | |
| 246 | class ArtifactEvidenceKind(StrEnum): |
| 247 | """Structured evidence categories behind recovery choices.""" |
| 248 | |
| 249 | CONFIRMED_TOUCHPOINT = "confirmed_touchpoint" |
| 250 | INFERRED_TOUCHPOINT = "inferred_touchpoint" |
| 251 | ACCEPTANCE_ANCHOR = "acceptance_anchor" |
| 252 | CONTRADICTED_ASSUMPTION = "contradicted_assumption" |
| 253 | VERIFICATION_CONTRADICTION = "verification_contradiction" |
| 254 | TASK_BOUNDARY_CHANGE = "task_boundary_change" |
| 255 | |
| 256 | |
| 257 | @dataclass(slots=True) |
| 258 | class ArtifactEvidence: |
| 259 | """One typed piece of evidence describing workflow drift.""" |
| 260 | |
| 261 | kind: str |
| 262 | summary: str |
| 263 | |
| 264 | def render_summary(self) -> str: |
| 265 | """Render a concise operator-facing evidence string.""" |
| 266 | |
| 267 | return f"{self.kind.replace('_', ' ')}: {self.summary}" |
| 268 | |
| 269 | |
| 270 | @dataclass(slots=True) |
| 271 | class ArtifactFreshness: |
| 272 | """Whether persisted workflow artifacts still fit the current task state.""" |
| 273 | |
| 274 | stale_brief: bool = False |
| 275 | stale_plan: bool = False |
| 276 | reasons: list[str] = field(default_factory=list) |
| 277 | reason_codes: list[str] = field(default_factory=list) |
| 278 | recovery_strategy: str = "none" |
| 279 | evidence: list[ArtifactEvidence] = field(default_factory=list) |
| 280 | |
| 281 | @property |
| 282 | def requires_refresh(self) -> bool: |
| 283 | return self.stale_brief or self.stale_plan |
| 284 | |
| 285 | @property |
| 286 | def evidence_summary(self) -> list[str]: |
| 287 | return [item.render_summary() for item in self.evidence] |
| 288 | |
| 289 | |
| 290 | @dataclass(slots=True) |
| 291 | class WorkflowTimelineEntry: |
| 292 | """One persisted workflow history item.""" |
| 293 | |
| 294 | timestamp: str |
| 295 | kind: str |
| 296 | mode: str |
| 297 | reason_code: str |
| 298 | summary: str |
| 299 | decision_kind: str | None = None |
| 300 | route_score: float | None = None |
| 301 | runner_up_mode: str | None = None |
| 302 | runner_up_score: float | None = None |
| 303 | scheduled_next_mode: str | None = None |
| 304 | unresolved_questions: list[str] = field(default_factory=list) |
| 305 | signal_summary: list[str] = field(default_factory=list) |
| 306 | evidence_summary: list[str] = field(default_factory=list) |
| 307 | evidence_provenance: list[EvidenceProvenance] = field(default_factory=list) |
| 308 | verification_observations: list[VerificationObservation] = field(default_factory=list) |
| 309 | clarify_stage: str | None = None |
| 310 | clarify_pressure_kind: str | None = None |
| 311 | pressure_pass_complete: bool = False |
| 312 | missing_readiness_gates: list[str] = field(default_factory=list) |
| 313 | policy_stage: str | None = None |
| 314 | policy_outcome: str | None = None |
| 315 | prompt_format: str | None = None |
| 316 | prompt_sections: list[str] = field(default_factory=list) |
| 317 | artifact_paths: list[str] = field(default_factory=list) |
| 318 | |
| 319 | def to_dict(self) -> dict[str, Any]: |
| 320 | return { |
| 321 | "timestamp": self.timestamp, |
| 322 | "kind": self.kind, |
| 323 | "mode": self.mode, |
| 324 | "reason_code": self.reason_code, |
| 325 | "summary": self.summary, |
| 326 | "decision_kind": self.decision_kind, |
| 327 | "route_score": self.route_score, |
| 328 | "runner_up_mode": self.runner_up_mode, |
| 329 | "runner_up_score": self.runner_up_score, |
| 330 | "scheduled_next_mode": self.scheduled_next_mode, |
| 331 | "unresolved_questions": list(self.unresolved_questions), |
| 332 | "signal_summary": list(self.signal_summary), |
| 333 | "evidence_summary": list(self.evidence_summary), |
| 334 | "evidence_provenance": [ |
| 335 | item.to_dict() for item in self.evidence_provenance |
| 336 | ], |
| 337 | "verification_observations": [ |
| 338 | item.to_dict() for item in self.verification_observations |
| 339 | ], |
| 340 | "clarify_stage": self.clarify_stage, |
| 341 | "clarify_pressure_kind": self.clarify_pressure_kind, |
| 342 | "pressure_pass_complete": self.pressure_pass_complete, |
| 343 | "missing_readiness_gates": list(self.missing_readiness_gates), |
| 344 | "policy_stage": self.policy_stage, |
| 345 | "policy_outcome": self.policy_outcome, |
| 346 | "prompt_format": self.prompt_format, |
| 347 | "prompt_sections": list(self.prompt_sections), |
| 348 | "artifact_paths": list(self.artifact_paths), |
| 349 | } |
| 350 | |
| 351 | @classmethod |
| 352 | def from_dict(cls, data: dict[str, Any]) -> WorkflowTimelineEntry: |
| 353 | return cls( |
| 354 | timestamp=str(data.get("timestamp", "")), |
| 355 | kind=str(data.get("kind", WorkflowTimelineEntryKind.ROUTE.value)), |
| 356 | mode=str(data.get("mode", WorkflowMode.EXECUTE.value)), |
| 357 | reason_code=str(data.get("reason_code", "")), |
| 358 | summary=str(data.get("summary", "")), |
| 359 | decision_kind=_optional_text(data.get("decision_kind")), |
| 360 | route_score=_optional_float(data.get("route_score")), |
| 361 | runner_up_mode=_optional_text(data.get("runner_up_mode")), |
| 362 | runner_up_score=_optional_float(data.get("runner_up_score")), |
| 363 | scheduled_next_mode=_optional_text(data.get("scheduled_next_mode")), |
| 364 | unresolved_questions=_string_list(data.get("unresolved_questions")), |
| 365 | signal_summary=_string_list(data.get("signal_summary")), |
| 366 | evidence_summary=_string_list(data.get("evidence_summary")), |
| 367 | evidence_provenance=normalize_evidence_provenance( |
| 368 | data.get("evidence_provenance") |
| 369 | ), |
| 370 | verification_observations=normalize_verification_observations( |
| 371 | data.get("verification_observations") |
| 372 | ), |
| 373 | clarify_stage=_optional_text(data.get("clarify_stage")), |
| 374 | clarify_pressure_kind=_optional_text(data.get("clarify_pressure_kind")), |
| 375 | pressure_pass_complete=bool(data.get("pressure_pass_complete", False)), |
| 376 | missing_readiness_gates=_string_list(data.get("missing_readiness_gates")), |
| 377 | policy_stage=_optional_text(data.get("policy_stage")), |
| 378 | policy_outcome=_optional_text(data.get("policy_outcome")), |
| 379 | prompt_format=_optional_text(data.get("prompt_format")), |
| 380 | prompt_sections=_string_list(data.get("prompt_sections")), |
| 381 | artifact_paths=_string_list(data.get("artifact_paths")), |
| 382 | ) |
| 383 | |
| 384 | @classmethod |
| 385 | def from_decision( |
| 386 | cls, |
| 387 | decision: ModeDecision, |
| 388 | *, |
| 389 | kind: WorkflowTimelineEntryKind, |
| 390 | prompt_format: str | None = None, |
| 391 | prompt_sections: list[str] | None = None, |
| 392 | artifact_paths: list[str] | None = None, |
| 393 | ) -> WorkflowTimelineEntry: |
| 394 | summary = f"{decision.mode.value}: {decision.reason_summary}" |
| 395 | return cls( |
| 396 | timestamp=_utc_now(), |
| 397 | kind=kind.value, |
| 398 | mode=decision.mode.value, |
| 399 | reason_code=decision.reason_code, |
| 400 | summary=summary, |
| 401 | decision_kind=decision.decision_kind.value, |
| 402 | route_score=decision.route_score, |
| 403 | runner_up_mode=( |
| 404 | decision.runner_up_mode.value |
| 405 | if decision.runner_up_mode is not None |
| 406 | else None |
| 407 | ), |
| 408 | runner_up_score=decision.runner_up_score, |
| 409 | scheduled_next_mode=( |
| 410 | decision.scheduled_next_mode.value |
| 411 | if decision.scheduled_next_mode is not None |
| 412 | else None |
| 413 | ), |
| 414 | unresolved_questions=list(decision.unresolved_questions), |
| 415 | signal_summary=list(decision.signal_summary), |
| 416 | evidence_summary=list(decision.evidence_summary), |
| 417 | evidence_provenance=[], |
| 418 | clarify_stage=decision.clarify_stage, |
| 419 | clarify_pressure_kind=decision.clarify_pressure_kind, |
| 420 | pressure_pass_complete=decision.pressure_pass_complete, |
| 421 | missing_readiness_gates=list(decision.missing_readiness_gates), |
| 422 | prompt_format=prompt_format, |
| 423 | prompt_sections=list(prompt_sections or []), |
| 424 | artifact_paths=list(artifact_paths or []), |
| 425 | ) |
| 426 | |
| 427 | @classmethod |
| 428 | def accountability( |
| 429 | cls, |
| 430 | *, |
| 431 | kind: WorkflowTimelineEntryKind, |
| 432 | mode: WorkflowMode | str, |
| 433 | reason_code: str, |
| 434 | summary: str, |
| 435 | policy_stage: str | None = None, |
| 436 | policy_outcome: str | None = None, |
| 437 | decision_kind: WorkflowDecisionKind | str | None = WorkflowDecisionKind.FORCED, |
| 438 | prompt_format: str | None = None, |
| 439 | prompt_sections: list[str] | None = None, |
| 440 | signal_summary: list[str] | None = None, |
| 441 | evidence_summary: list[str] | None = None, |
| 442 | evidence_provenance: list[EvidenceProvenance] | None = None, |
| 443 | verification_observations: list[VerificationObservation] | None = None, |
| 444 | artifact_paths: list[str] | None = None, |
| 445 | ) -> WorkflowTimelineEntry: |
| 446 | """Build one typed non-routing accountability entry.""" |
| 447 | |
| 448 | resolved_mode = mode.value if isinstance(mode, WorkflowMode) else str(mode) |
| 449 | if isinstance(decision_kind, WorkflowDecisionKind): |
| 450 | resolved_decision_kind = decision_kind.value |
| 451 | elif decision_kind is None: |
| 452 | resolved_decision_kind = None |
| 453 | else: |
| 454 | resolved_decision_kind = str(decision_kind) |
| 455 | resolved_provenance = list(evidence_provenance or []) |
| 456 | resolved_observations = list(verification_observations or []) |
| 457 | resolved_evidence_summary = list( |
| 458 | evidence_summary or summarize_evidence_provenance(resolved_provenance) |
| 459 | ) |
| 460 | if not resolved_evidence_summary and resolved_observations: |
| 461 | resolved_evidence_summary = summarize_verification_observations( |
| 462 | resolved_observations |
| 463 | ) |
| 464 | return cls( |
| 465 | timestamp=_utc_now(), |
| 466 | kind=kind.value, |
| 467 | mode=resolved_mode, |
| 468 | reason_code=reason_code, |
| 469 | summary=summary, |
| 470 | decision_kind=resolved_decision_kind, |
| 471 | signal_summary=list(signal_summary or []), |
| 472 | evidence_summary=resolved_evidence_summary, |
| 473 | evidence_provenance=resolved_provenance, |
| 474 | verification_observations=resolved_observations, |
| 475 | policy_stage=policy_stage, |
| 476 | policy_outcome=policy_outcome, |
| 477 | prompt_format=prompt_format, |
| 478 | prompt_sections=list(prompt_sections or []), |
| 479 | artifact_paths=list(artifact_paths or []), |
| 480 | ) |
| 481 | |
| 482 | |
| 483 | class WorkflowPolicy: |
| 484 | """Scored workflow-policy engine for route and clarify decisions.""" |
| 485 | |
| 486 | clarify_threshold = 0.55 |
| 487 | plan_threshold = 0.45 |
| 488 | |
| 489 | def __init__(self, signal_extractor: WorkflowSignalExtractor | None = None) -> None: |
| 490 | self.signal_extractor = signal_extractor or WorkflowSignalExtractor() |
| 491 | |
| 492 | def route( |
| 493 | self, |
| 494 | task: str, |
| 495 | *, |
| 496 | requested_mode: WorkflowMode | None = None, |
| 497 | has_brief: bool = False, |
| 498 | has_plan: bool = False, |
| 499 | allow_clarify: bool = True, |
| 500 | verification_pressure: bool = False, |
| 501 | mutating_history: bool = False, |
| 502 | stale_plan: bool = False, |
| 503 | unresolved_questions: list[str] | None = None, |
| 504 | timeline: list[WorkflowTimelineEntry] | None = None, |
| 505 | ) -> ModeDecision: |
| 506 | signals = self.signal_extractor.extract_route_signals( |
| 507 | task, |
| 508 | requested_mode=requested_mode.value if requested_mode is not None else None, |
| 509 | has_brief=has_brief, |
| 510 | has_plan=has_plan, |
| 511 | allow_clarify=allow_clarify, |
| 512 | verification_pressure=verification_pressure, |
| 513 | mutating_history=mutating_history, |
| 514 | stale_plan=stale_plan, |
| 515 | unresolved_questions=unresolved_questions, |
| 516 | timeline=timeline, |
| 517 | ) |
| 518 | return self.route_from_signals(signals) |
| 519 | |
| 520 | def route_from_signals(self, signals: WorkflowSignalPacket) -> ModeDecision: |
| 521 | """Route from a typed workflow-signal packet.""" |
| 522 | |
| 523 | requested_mode = WorkflowMode.from_str(signals.requested_mode) |
| 524 | if requested_mode is not None: |
| 525 | return ModeDecision( |
| 526 | mode=requested_mode, |
| 527 | reason_code="explicit_request", |
| 528 | reason_summary=f"explicit {requested_mode.value} request", |
| 529 | decision_kind=WorkflowDecisionKind.REQUESTED, |
| 530 | route_score=1.0, |
| 531 | scheduled_next_mode=( |
| 532 | WorkflowMode.EXECUTE |
| 533 | if requested_mode in {WorkflowMode.CLARIFY, WorkflowMode.PLAN} |
| 534 | else None |
| 535 | ), |
| 536 | signal_summary=list(signals.signal_summary), |
| 537 | ) |
| 538 | |
| 539 | if signals.stale_artifact_pressure > 0: |
| 540 | return ModeDecision( |
| 541 | mode=WorkflowMode.PLAN, |
| 542 | reason_code="stale_plan_artifacts", |
| 543 | reason_summary="existing plan artifacts no longer match the task state", |
| 544 | decision_kind=WorkflowDecisionKind.REENTRY, |
| 545 | route_score=0.95, |
| 546 | runner_up_mode=WorkflowMode.EXECUTE, |
| 547 | runner_up_score=0.6, |
| 548 | scheduled_next_mode=WorkflowMode.EXECUTE, |
| 549 | unresolved_questions=list(signals.unresolved_questions), |
| 550 | pressure_summary=[ |
| 551 | "plan refresh pressure: stale artifacts require a refreshed plan", |
| 552 | "execute pressure: continue directly with the stale artifacts", |
| 553 | ], |
| 554 | signal_summary=list(signals.signal_summary), |
| 555 | ) |
| 556 | |
| 557 | if signals.has_plan: |
| 558 | return ModeDecision( |
| 559 | mode=WorkflowMode.EXECUTE, |
| 560 | reason_code="existing_plan_artifacts", |
| 561 | reason_summary="reusing existing plan artifacts", |
| 562 | decision_kind=WorkflowDecisionKind.ARTIFACT_REUSE, |
| 563 | route_score=0.9, |
| 564 | runner_up_mode=WorkflowMode.PLAN, |
| 565 | runner_up_score=0.45, |
| 566 | unresolved_questions=list(signals.unresolved_questions), |
| 567 | pressure_summary=[ |
| 568 | "execute pressure: persisted plan artifacts already exist", |
| 569 | "plan pressure: a plan refresh is available but not required", |
| 570 | ], |
| 571 | signal_summary=list(signals.signal_summary), |
| 572 | ) |
| 573 | |
| 574 | ambiguity = signals.ambiguity_score |
| 575 | complexity = signals.complexity_score |
| 576 | |
| 577 | clarify_pressure = ambiguity |
| 578 | if signals.allow_clarify and not signals.has_brief: |
| 579 | clarify_pressure += 0.15 |
| 580 | if signals.unresolved_questions: |
| 581 | clarify_pressure += min(0.12, 0.04 * len(signals.unresolved_questions)) |
| 582 | if complexity < 0.55: |
| 583 | clarify_pressure += 0.05 |
| 584 | if signals.recent_clarify_count and signals.unresolved_questions: |
| 585 | clarify_pressure += 0.04 |
| 586 | if not signals.allow_clarify: |
| 587 | clarify_pressure = 0.0 |
| 588 | |
| 589 | plan_pressure = complexity |
| 590 | plan_pressure += signals.verification_pressure |
| 591 | plan_pressure += signals.mutation_pressure |
| 592 | if signals.has_brief: |
| 593 | plan_pressure += 0.06 |
| 594 | if signals.unresolved_questions: |
| 595 | plan_pressure += 0.06 |
| 596 | if signals.recent_reentry_count: |
| 597 | plan_pressure += 0.06 |
| 598 | if signals.recent_plan_refresh_count: |
| 599 | plan_pressure += 0.04 |
| 600 | |
| 601 | execute_pressure = 0.35 |
| 602 | if signals.has_brief: |
| 603 | execute_pressure += 0.14 |
| 604 | if ambiguity < 0.35: |
| 605 | execute_pressure += 0.16 |
| 606 | if complexity < 0.45: |
| 607 | execute_pressure += 0.12 |
| 608 | if not signals.unresolved_questions: |
| 609 | execute_pressure += 0.05 |
| 610 | if signals.recent_verify_skip_count and not signals.verification_pressure: |
| 611 | execute_pressure += 0.03 |
| 612 | |
| 613 | scores = { |
| 614 | WorkflowMode.CLARIFY: round(min(clarify_pressure, 1.0), 3), |
| 615 | WorkflowMode.PLAN: round(min(plan_pressure, 1.0), 3), |
| 616 | WorkflowMode.EXECUTE: round(min(execute_pressure, 1.0), 3), |
| 617 | } |
| 618 | ordered = sorted(scores.items(), key=lambda item: item[1], reverse=True) |
| 619 | winner, winner_score = ordered[0] |
| 620 | runner_up, runner_up_score = ordered[1] |
| 621 | |
| 622 | pressure_summary = [ |
| 623 | f"{mode.value} pressure={score:.2f}" |
| 624 | for mode, score in ordered |
| 625 | ] |
| 626 | |
| 627 | if ( |
| 628 | winner == WorkflowMode.CLARIFY |
| 629 | and winner_score >= self.clarify_threshold |
| 630 | and signals.allow_clarify |
| 631 | ): |
| 632 | return ModeDecision( |
| 633 | mode=WorkflowMode.CLARIFY, |
| 634 | reason_code="task_is_ambiguous", |
| 635 | reason_summary="workflow pressure favors clarification before execution", |
| 636 | ambiguity_score=ambiguity, |
| 637 | complexity_score=complexity, |
| 638 | route_score=winner_score, |
| 639 | runner_up_mode=runner_up, |
| 640 | runner_up_score=runner_up_score, |
| 641 | scheduled_next_mode=WorkflowMode.EXECUTE, |
| 642 | unresolved_questions=list(signals.unresolved_questions), |
| 643 | pressure_summary=pressure_summary, |
| 644 | signal_summary=list(signals.signal_summary), |
| 645 | ) |
| 646 | |
| 647 | if winner == WorkflowMode.PLAN and winner_score >= self.plan_threshold: |
| 648 | reason_code = ( |
| 649 | "verification_pressure_requires_plan" |
| 650 | if signals.verification_pressure |
| 651 | else "task_is_complex" |
| 652 | ) |
| 653 | reason_summary = ( |
| 654 | "verification pressure and task complexity favor a persisted plan" |
| 655 | if signals.verification_pressure |
| 656 | else "workflow pressure favors a persisted plan before execution" |
| 657 | ) |
| 658 | return ModeDecision( |
| 659 | mode=WorkflowMode.PLAN, |
| 660 | reason_code=reason_code, |
| 661 | reason_summary=reason_summary, |
| 662 | ambiguity_score=ambiguity, |
| 663 | complexity_score=complexity, |
| 664 | route_score=winner_score, |
| 665 | runner_up_mode=runner_up, |
| 666 | runner_up_score=runner_up_score, |
| 667 | scheduled_next_mode=WorkflowMode.EXECUTE, |
| 668 | unresolved_questions=list(signals.unresolved_questions), |
| 669 | pressure_summary=pressure_summary, |
| 670 | signal_summary=list(signals.signal_summary), |
| 671 | ) |
| 672 | |
| 673 | return ModeDecision( |
| 674 | mode=WorkflowMode.EXECUTE, |
| 675 | reason_code="task_is_concrete", |
| 676 | reason_summary="workflow pressure favors direct execution", |
| 677 | ambiguity_score=ambiguity, |
| 678 | complexity_score=complexity, |
| 679 | route_score=winner_score, |
| 680 | runner_up_mode=runner_up, |
| 681 | runner_up_score=runner_up_score, |
| 682 | unresolved_questions=list(signals.unresolved_questions), |
| 683 | pressure_summary=pressure_summary, |
| 684 | signal_summary=list(signals.signal_summary), |
| 685 | ) |
| 686 | |
| 687 | def review_clarify( |
| 688 | self, |
| 689 | *, |
| 690 | task: str, |
| 691 | answer: str, |
| 692 | snapshot: ClarifySnapshot, |
| 693 | round_index: int, |
| 694 | max_rounds: int, |
| 695 | pressure_pass_complete: bool = False, |
| 696 | ) -> ClarifyReview: |
| 697 | """Determine whether clarify should continue for another round.""" |
| 698 | |
| 699 | assessment = assess_clarify_snapshot( |
| 700 | task=task, |
| 701 | answer=answer, |
| 702 | snapshot=snapshot, |
| 703 | round_index=round_index, |
| 704 | pressure_pass_complete=pressure_pass_complete, |
| 705 | ) |
| 706 | unresolved = list(assessment.unresolved_questions) |
| 707 | focus_slot = assessment.focus_slot.value if assessment.focus_slot else None |
| 708 | focus_label = describe_clarify_slot(assessment.focus_slot) |
| 709 | pressure_kind = ( |
| 710 | assessment.pressure_kind.value if assessment.pressure_kind is not None else None |
| 711 | ) |
| 712 | pressure_label = describe_clarify_pressure_kind(assessment.pressure_kind) |
| 713 | readiness_gates = list(assessment.missing_readiness_gates) |
| 714 | |
| 715 | if unresolved and round_index < max_rounds: |
| 716 | if assessment.pressure_kind is not None: |
| 717 | return ClarifyReview( |
| 718 | should_continue=True, |
| 719 | reason_code="clarify_pressure_pass_required", |
| 720 | reason_summary=( |
| 721 | "clarify still needs a " |
| 722 | f"{pressure_label} pass around {focus_label}" |
| 723 | ), |
| 724 | unresolved_questions=unresolved, |
| 725 | unresolved_slots=[slot.value for slot in assessment.unresolved_slots], |
| 726 | focus_slot=focus_slot, |
| 727 | stage=assessment.stage.value, |
| 728 | pressure_kind=pressure_kind, |
| 729 | pressure_pass_complete=assessment.pressure_pass_complete, |
| 730 | missing_readiness_gates=readiness_gates, |
| 731 | ) |
| 732 | |
| 733 | return ClarifyReview( |
| 734 | should_continue=True, |
| 735 | reason_code="clarify_follow_up_needed", |
| 736 | reason_summary=( |
| 737 | "clarify pressure remains high around " |
| 738 | f"{focus_label} after the latest answer" |
| 739 | ), |
| 740 | unresolved_questions=unresolved, |
| 741 | unresolved_slots=[slot.value for slot in assessment.unresolved_slots], |
| 742 | focus_slot=focus_slot, |
| 743 | stage=assessment.stage.value, |
| 744 | pressure_kind=pressure_kind, |
| 745 | pressure_pass_complete=assessment.pressure_pass_complete, |
| 746 | missing_readiness_gates=readiness_gates, |
| 747 | ) |
| 748 | |
| 749 | if unresolved: |
| 750 | if not assessment.pressure_pass_complete and round_index >= 2: |
| 751 | return ClarifyReview( |
| 752 | should_continue=False, |
| 753 | reason_code="clarify_budget_exhausted_without_pressure_pass", |
| 754 | reason_summary=( |
| 755 | "clarify budget exhausted before Loader completed a " |
| 756 | "bounded pressure pass" |
| 757 | ), |
| 758 | unresolved_questions=unresolved, |
| 759 | unresolved_slots=[slot.value for slot in assessment.unresolved_slots], |
| 760 | focus_slot=focus_slot, |
| 761 | stage=assessment.stage.value, |
| 762 | pressure_kind=pressure_kind, |
| 763 | pressure_pass_complete=assessment.pressure_pass_complete, |
| 764 | missing_readiness_gates=readiness_gates, |
| 765 | ) |
| 766 | |
| 767 | return ClarifyReview( |
| 768 | should_continue=False, |
| 769 | reason_code="clarify_budget_exhausted", |
| 770 | reason_summary="clarify budget exhausted; carrying unresolved questions forward", |
| 771 | unresolved_questions=unresolved, |
| 772 | unresolved_slots=[slot.value for slot in assessment.unresolved_slots], |
| 773 | focus_slot=focus_slot, |
| 774 | stage=assessment.stage.value, |
| 775 | pressure_kind=pressure_kind, |
| 776 | pressure_pass_complete=assessment.pressure_pass_complete, |
| 777 | missing_readiness_gates=readiness_gates, |
| 778 | ) |
| 779 | |
| 780 | return ClarifyReview( |
| 781 | should_continue=False, |
| 782 | reason_code="clarify_complete", |
| 783 | reason_summary=( |
| 784 | "clarify gathered enough boundaries and completed a bounded pressure pass" |
| 785 | if assessment.pressure_pass_complete |
| 786 | else "clarify gathered enough boundaries to proceed" |
| 787 | ), |
| 788 | unresolved_questions=[], |
| 789 | unresolved_slots=[], |
| 790 | focus_slot=None, |
| 791 | stage=assessment.stage.value, |
| 792 | pressure_kind=pressure_kind, |
| 793 | pressure_pass_complete=assessment.pressure_pass_complete, |
| 794 | missing_readiness_gates=readiness_gates, |
| 795 | ) |
| 796 | |
| 797 | def assess_artifact_freshness( |
| 798 | self, |
| 799 | *, |
| 800 | implementation_text: str | None, |
| 801 | verification_text: str | None, |
| 802 | touched_files: list[str], |
| 803 | ) -> ArtifactFreshness: |
| 804 | """Detect whether persisted plan artifacts have drifted from execution.""" |
| 805 | |
| 806 | if not implementation_text and not verification_text: |
| 807 | return ArtifactFreshness() |
| 808 | |
| 809 | combined = f"{implementation_text or ''}\n{verification_text or ''}".lower() |
| 810 | reasons: list[str] = [] |
| 811 | unexpected_paths: list[str] = [] |
| 812 | for file_path in touched_files: |
| 813 | name = Path(file_path).name.strip() |
| 814 | if name and name.lower() not in combined: |
| 815 | unexpected_paths.append(name) |
| 816 | |
| 817 | if unexpected_paths: |
| 818 | reasons.append( |
| 819 | "Touched files outside the current plan: " |
| 820 | + ", ".join(dict.fromkeys(unexpected_paths)) |
| 821 | ) |
| 822 | |
| 823 | return ArtifactFreshness( |
| 824 | stale_plan=bool(reasons), |
| 825 | reasons=reasons, |
| 826 | ) |
| 827 | |
| 828 | def _ambiguity_score(self, task: str) -> float: |
| 829 | lowered = task.lower() |
| 830 | words = re.findall(r"\w+", lowered) |
| 831 | score = 0.0 |
| 832 | |
| 833 | if ( |
| 834 | "--clarify" in lowered |
| 835 | or "don't assume" in lowered |
| 836 | or "do not assume" in lowered |
| 837 | or "not sure" in lowered |
| 838 | or "figure out" in lowered |
| 839 | or "interview me" in lowered |
| 840 | or "ask me" in lowered |
| 841 | or lowered.startswith("clarify ") |
| 842 | ): |
| 843 | score += 0.65 |
| 844 | |
| 845 | if any( |
| 846 | phrase in lowered |
| 847 | for phrase in ( |
| 848 | "something", |
| 849 | "somehow", |
| 850 | "better", |
| 851 | "improve", |
| 852 | "fix this", |
| 853 | "make it", |
| 854 | "more like", |
| 855 | "feels more like", |
| 856 | ) |
| 857 | ): |
| 858 | score += 0.2 |
| 859 | |
| 860 | if not _has_concrete_anchor(task): |
| 861 | score += 0.2 |
| 862 | |
| 863 | if len(words) <= 12 and any( |
| 864 | verb in lowered |
| 865 | for verb in ("build", "add", "improve", "refactor", "implement") |
| 866 | ): |
| 867 | score += 0.15 |
| 868 | |
| 869 | return min(score, 1.0) |
| 870 | |
| 871 | def _complexity_score(self, task: str) -> float: |
| 872 | lowered = task.lower() |
| 873 | words = re.findall(r"\w+", lowered) |
| 874 | score = 0.0 |
| 875 | |
| 876 | if len(words) >= 18: |
| 877 | score += 0.2 |
| 878 | if len(words) >= 30: |
| 879 | score += 0.15 |
| 880 | |
| 881 | if any( |
| 882 | phrase in lowered |
| 883 | for phrase in ( |
| 884 | "refactor", |
| 885 | "architecture", |
| 886 | "migrate", |
| 887 | "persistent", |
| 888 | "workflow", |
| 889 | "deep dive", |
| 890 | "report", |
| 891 | "implementation plan", |
| 892 | "verification plan", |
| 893 | ) |
| 894 | ): |
| 895 | score += 0.3 |
| 896 | |
| 897 | if lowered.count(" and ") >= 2 or lowered.count(",") >= 2: |
| 898 | score += 0.15 |
| 899 | |
| 900 | if _has_concrete_anchor(task): |
| 901 | score += 0.1 |
| 902 | |
| 903 | return min(score, 1.0) |
| 904 | |
| 905 | |
| 906 | class ModeRouter(WorkflowPolicy): |
| 907 | """Backward-compatible alias for the workflow policy router.""" |
| 908 | |
| 909 | |
| 910 | def _has_concrete_anchor(task: str) -> bool: |
| 911 | return bool( |
| 912 | re.search(r"[./_\\-]", task) |
| 913 | or re.search(r"`[^`]+`", task) |
| 914 | or any(token in task.lower() for token in ("test", "file", "function", "class")) |
| 915 | ) |
| 916 | |
| 917 | |
| 918 | def _utc_now() -> str: |
| 919 | return datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%SZ") |
| 920 | |
| 921 | |
| 922 | def _optional_text(value: Any) -> str | None: |
| 923 | if value is None: |
| 924 | return None |
| 925 | text = str(value).strip() |
| 926 | return text or None |
| 927 | |
| 928 | |
| 929 | def _optional_float(value: Any) -> float | None: |
| 930 | if value is None: |
| 931 | return None |
| 932 | return float(value) |
| 933 | |
| 934 | |
| 935 | def _string_list(value: Any) -> list[str]: |
| 936 | if not isinstance(value, list): |
| 937 | return [] |
| 938 | return [str(item) for item in value if str(item).strip()] |