Add typed workflow signal extraction
- SHA
82a0ca7faa669e27386a80e93191ea14ec673abd- Parents
-
1aaccad - Tree
a0cc95f
82a0ca7
82a0ca7faa669e27386a80e93191ea14ec673abd1aaccad
a0cc95f| Status | File | + | - |
|---|---|---|---|
| M |
src/loader/runtime/conversation.py
|
35 | 22 |
| M |
src/loader/runtime/workflow.py
|
3 | 0 |
| M |
src/loader/runtime/workflow_policy.py
|
71 | 25 |
| A |
src/loader/runtime/workflow_signals.py
|
221 | 0 |
| M |
tests/test_workflow_policy.py
|
20 | 0 |
| A |
tests/test_workflow_signals.py
|
55 | 0 |
src/loader/runtime/conversation.pymodified@@ -35,6 +35,7 @@ from .workflow import ( | |||
| 35 | WorkflowDecisionKind, | 35 | WorkflowDecisionKind, |
| 36 | WorkflowMode, | 36 | WorkflowMode, |
| 37 | WorkflowPolicy, | 37 | WorkflowPolicy, |
| 38 | + WorkflowSignalExtractor, | ||
| 38 | WorkflowTimelineEntry, | 39 | WorkflowTimelineEntry, |
| 39 | WorkflowTimelineEntryKind, | 40 | WorkflowTimelineEntryKind, |
| 40 | build_execute_bridge, | 41 | build_execute_bridge, |
@@ -54,7 +55,8 @@ class ConversationRuntime: | |||
| 54 | self.tracer = RuntimeTracer() | 55 | self.tracer = RuntimeTracer() |
| 55 | self.executor: ToolExecutor | None = None | 56 | self.executor: ToolExecutor | None = None |
| 56 | self.dod_store = DefinitionOfDoneStore(agent.project_root) | 57 | self.dod_store = DefinitionOfDoneStore(agent.project_root) |
| 57 | - self.workflow_policy = WorkflowPolicy() | 58 | + self.workflow_signals = WorkflowSignalExtractor() |
| 59 | + self.workflow_policy = WorkflowPolicy(self.workflow_signals) | ||
| 58 | self.artifact_store = WorkflowArtifactStore(agent.project_root) | 60 | self.artifact_store = WorkflowArtifactStore(agent.project_root) |
| 59 | self.turn_requester = AssistantTurnRequester(agent, self.tracer) | 61 | self.turn_requester = AssistantTurnRequester(agent, self.tracer) |
| 60 | self.tool_batches = ToolBatchRunner(agent, self.dod_store) | 62 | self.tool_batches = ToolBatchRunner(agent, self.dod_store) |
@@ -488,12 +490,15 @@ class ConversationRuntime: | |||
| 488 | requested_mode: str | None, | 490 | requested_mode: str | None, |
| 489 | ) -> str: | 491 | ) -> str: |
| 490 | requested = WorkflowMode.from_str(requested_mode) | 492 | requested = WorkflowMode.from_str(requested_mode) |
| 491 | - decision = self.workflow_policy.route( | 493 | + decision = self.workflow_policy.route_from_signals( |
| 492 | - task, | 494 | + self.workflow_signals.extract_route_signals( |
| 493 | - requested_mode=requested, | 495 | + task, |
| 494 | - has_brief=self._artifact_exists(dod.clarify_brief), | 496 | + requested_mode=requested.value if requested is not None else None, |
| 495 | - has_plan=self._artifact_exists(dod.implementation_plan) | 497 | + has_brief=self._artifact_exists(dod.clarify_brief), |
| 496 | - and self._artifact_exists(dod.verification_plan), | 498 | + has_plan=self._artifact_exists(dod.implementation_plan) |
| 499 | + and self._artifact_exists(dod.verification_plan), | ||
| 500 | + timeline=self.agent.session.workflow_timeline, | ||
| 501 | + ) | ||
| 497 | ) | 502 | ) |
| 498 | await self._set_workflow_mode( | 503 | await self._set_workflow_mode( |
| 499 | decision, | 504 | decision, |
@@ -516,13 +521,16 @@ class ConversationRuntime: | |||
| 516 | summary=summary, | 521 | summary=summary, |
| 517 | on_user_question=on_user_question, | 522 | on_user_question=on_user_question, |
| 518 | ) | 523 | ) |
| 519 | - decision = self.workflow_policy.route( | 524 | + decision = self.workflow_policy.route_from_signals( |
| 520 | - task, | 525 | + self.workflow_signals.extract_route_signals( |
| 521 | - has_brief=self._artifact_exists(dod.clarify_brief), | 526 | + task, |
| 522 | - has_plan=self._artifact_exists(dod.implementation_plan) | 527 | + has_brief=self._artifact_exists(dod.clarify_brief), |
| 523 | - and self._artifact_exists(dod.verification_plan), | 528 | + has_plan=self._artifact_exists(dod.implementation_plan) |
| 524 | - allow_clarify=False, | 529 | + and self._artifact_exists(dod.verification_plan), |
| 525 | - unresolved_questions=clarify_review.unresolved_questions, | 530 | + allow_clarify=False, |
| 531 | + unresolved_questions=clarify_review.unresolved_questions, | ||
| 532 | + timeline=self.agent.session.workflow_timeline, | ||
| 533 | + ) | ||
| 526 | ) | 534 | ) |
| 527 | await self._set_workflow_mode( | 535 | await self._set_workflow_mode( |
| 528 | decision.with_context( | 536 | decision.with_context( |
@@ -1096,14 +1104,19 @@ class ConversationRuntime: | |||
| 1096 | if not freshness.stale_plan: | 1104 | if not freshness.stale_plan: |
| 1097 | return False | 1105 | return False |
| 1098 | 1106 | ||
| 1099 | - decision = self.workflow_policy.route( | 1107 | + decision = self.workflow_policy.route_from_signals( |
| 1100 | - task, | 1108 | + self.workflow_signals.extract_route_signals( |
| 1101 | - has_brief=self._artifact_exists(dod.clarify_brief), | 1109 | + task, |
| 1102 | - has_plan=True, | 1110 | + has_brief=self._artifact_exists(dod.clarify_brief), |
| 1103 | - allow_clarify=False, | 1111 | + has_plan=True, |
| 1104 | - stale_plan=True, | 1112 | + allow_clarify=False, |
| 1105 | - verification_pressure=bool(dod.retry_count or dod.last_verification_result == "failed"), | 1113 | + stale_plan=True, |
| 1106 | - unresolved_questions=freshness.reasons, | 1114 | + verification_pressure=bool( |
| 1115 | + dod.retry_count or dod.last_verification_result == "failed" | ||
| 1116 | + ), | ||
| 1117 | + unresolved_questions=freshness.reasons, | ||
| 1118 | + timeline=self.agent.session.workflow_timeline, | ||
| 1119 | + ) | ||
| 1107 | ) | 1120 | ) |
| 1108 | await self._set_workflow_mode( | 1121 | await self._set_workflow_mode( |
| 1109 | decision, | 1122 | decision, |
src/loader/runtime/workflow.pymodified@@ -19,6 +19,7 @@ from .workflow_policy import ( | |||
| 19 | WorkflowTimelineEntry, | 19 | WorkflowTimelineEntry, |
| 20 | WorkflowTimelineEntryKind, | 20 | WorkflowTimelineEntryKind, |
| 21 | ) | 21 | ) |
| 22 | +from .workflow_signals import WorkflowSignalExtractor, WorkflowSignalPacket | ||
| 22 | 23 | ||
| 23 | __all__ = [ | 24 | __all__ = [ |
| 24 | "ArtifactFreshness", | 25 | "ArtifactFreshness", |
@@ -32,6 +33,8 @@ __all__ = [ | |||
| 32 | "WorkflowDecisionKind", | 33 | "WorkflowDecisionKind", |
| 33 | "WorkflowMode", | 34 | "WorkflowMode", |
| 34 | "WorkflowPolicy", | 35 | "WorkflowPolicy", |
| 36 | + "WorkflowSignalExtractor", | ||
| 37 | + "WorkflowSignalPacket", | ||
| 35 | "WorkflowTimelineEntry", | 38 | "WorkflowTimelineEntry", |
| 36 | "WorkflowTimelineEntryKind", | 39 | "WorkflowTimelineEntryKind", |
| 37 | "build_execute_bridge", | 40 | "build_execute_bridge", |
src/loader/runtime/workflow_policy.pymodified@@ -9,6 +9,8 @@ from enum import StrEnum | |||
| 9 | from pathlib import Path | 9 | from pathlib import Path |
| 10 | from typing import Any | 10 | from typing import Any |
| 11 | 11 | ||
| 12 | +from .workflow_signals import WorkflowSignalExtractor, WorkflowSignalPacket | ||
| 13 | + | ||
| 12 | 14 | ||
| 13 | class WorkflowMode(StrEnum): | 15 | class WorkflowMode(StrEnum): |
| 14 | """High-level runtime modes for one Loader task turn.""" | 16 | """High-level runtime modes for one Loader task turn.""" |
@@ -68,6 +70,7 @@ class ModeDecision: | |||
| 68 | scheduled_next_mode: WorkflowMode | None = None | 70 | scheduled_next_mode: WorkflowMode | None = None |
| 69 | unresolved_questions: list[str] = field(default_factory=list) | 71 | unresolved_questions: list[str] = field(default_factory=list) |
| 70 | pressure_summary: list[str] = field(default_factory=list) | 72 | pressure_summary: list[str] = field(default_factory=list) |
| 73 | + signal_summary: list[str] = field(default_factory=list) | ||
| 71 | 74 | ||
| 72 | @property | 75 | @property |
| 73 | def reason(self) -> str: | 76 | def reason(self) -> str: |
@@ -89,6 +92,7 @@ class ModeDecision: | |||
| 89 | scheduled_next_mode: WorkflowMode | None = None, | 92 | scheduled_next_mode: WorkflowMode | None = None, |
| 90 | unresolved_questions: list[str] | None = None, | 93 | unresolved_questions: list[str] | None = None, |
| 91 | pressure_summary: list[str] | None = None, | 94 | pressure_summary: list[str] | None = None, |
| 95 | + signal_summary: list[str] | None = None, | ||
| 92 | ) -> ModeDecision: | 96 | ) -> ModeDecision: |
| 93 | """Build a non-router workflow decision for handoffs and reentry.""" | 97 | """Build a non-router workflow decision for handoffs and reentry.""" |
| 94 | 98 | ||
@@ -105,6 +109,7 @@ class ModeDecision: | |||
| 105 | scheduled_next_mode=scheduled_next_mode, | 109 | scheduled_next_mode=scheduled_next_mode, |
| 106 | unresolved_questions=list(unresolved_questions or []), | 110 | unresolved_questions=list(unresolved_questions or []), |
| 107 | pressure_summary=list(pressure_summary or []), | 111 | pressure_summary=list(pressure_summary or []), |
| 112 | + signal_summary=list(signal_summary or []), | ||
| 108 | ) | 113 | ) |
| 109 | 114 | ||
| 110 | def with_context( | 115 | def with_context( |
@@ -119,6 +124,7 @@ class ModeDecision: | |||
| 119 | scheduled_next_mode: WorkflowMode | None = None, | 124 | scheduled_next_mode: WorkflowMode | None = None, |
| 120 | unresolved_questions: list[str] | None = None, | 125 | unresolved_questions: list[str] | None = None, |
| 121 | pressure_summary: list[str] | None = None, | 126 | pressure_summary: list[str] | None = None, |
| 127 | + signal_summary: list[str] | None = None, | ||
| 122 | ) -> ModeDecision: | 128 | ) -> ModeDecision: |
| 123 | """Return a copy with updated contextual routing metadata.""" | 129 | """Return a copy with updated contextual routing metadata.""" |
| 124 | 130 | ||
@@ -149,6 +155,9 @@ class ModeDecision: | |||
| 149 | if pressure_summary is None | 155 | if pressure_summary is None |
| 150 | else pressure_summary | 156 | else pressure_summary |
| 151 | ), | 157 | ), |
| 158 | + signal_summary=list( | ||
| 159 | + self.signal_summary if signal_summary is None else signal_summary | ||
| 160 | + ), | ||
| 152 | ) | 161 | ) |
| 153 | 162 | ||
| 154 | 163 | ||
@@ -190,6 +199,7 @@ class WorkflowTimelineEntry: | |||
| 190 | runner_up_score: float | None = None | 199 | runner_up_score: float | None = None |
| 191 | scheduled_next_mode: str | None = None | 200 | scheduled_next_mode: str | None = None |
| 192 | unresolved_questions: list[str] = field(default_factory=list) | 201 | unresolved_questions: list[str] = field(default_factory=list) |
| 202 | + signal_summary: list[str] = field(default_factory=list) | ||
| 193 | prompt_format: str | None = None | 203 | prompt_format: str | None = None |
| 194 | prompt_sections: list[str] = field(default_factory=list) | 204 | prompt_sections: list[str] = field(default_factory=list) |
| 195 | artifact_paths: list[str] = field(default_factory=list) | 205 | artifact_paths: list[str] = field(default_factory=list) |
@@ -207,6 +217,7 @@ class WorkflowTimelineEntry: | |||
| 207 | "runner_up_score": self.runner_up_score, | 217 | "runner_up_score": self.runner_up_score, |
| 208 | "scheduled_next_mode": self.scheduled_next_mode, | 218 | "scheduled_next_mode": self.scheduled_next_mode, |
| 209 | "unresolved_questions": list(self.unresolved_questions), | 219 | "unresolved_questions": list(self.unresolved_questions), |
| 220 | + "signal_summary": list(self.signal_summary), | ||
| 210 | "prompt_format": self.prompt_format, | 221 | "prompt_format": self.prompt_format, |
| 211 | "prompt_sections": list(self.prompt_sections), | 222 | "prompt_sections": list(self.prompt_sections), |
| 212 | "artifact_paths": list(self.artifact_paths), | 223 | "artifact_paths": list(self.artifact_paths), |
@@ -226,6 +237,7 @@ class WorkflowTimelineEntry: | |||
| 226 | runner_up_score=_optional_float(data.get("runner_up_score")), | 237 | runner_up_score=_optional_float(data.get("runner_up_score")), |
| 227 | scheduled_next_mode=_optional_text(data.get("scheduled_next_mode")), | 238 | scheduled_next_mode=_optional_text(data.get("scheduled_next_mode")), |
| 228 | unresolved_questions=_string_list(data.get("unresolved_questions")), | 239 | unresolved_questions=_string_list(data.get("unresolved_questions")), |
| 240 | + signal_summary=_string_list(data.get("signal_summary")), | ||
| 229 | prompt_format=_optional_text(data.get("prompt_format")), | 241 | prompt_format=_optional_text(data.get("prompt_format")), |
| 230 | prompt_sections=_string_list(data.get("prompt_sections")), | 242 | prompt_sections=_string_list(data.get("prompt_sections")), |
| 231 | artifact_paths=_string_list(data.get("artifact_paths")), | 243 | artifact_paths=_string_list(data.get("artifact_paths")), |
@@ -262,6 +274,7 @@ class WorkflowTimelineEntry: | |||
| 262 | else None | 274 | else None |
| 263 | ), | 275 | ), |
| 264 | unresolved_questions=list(decision.unresolved_questions), | 276 | unresolved_questions=list(decision.unresolved_questions), |
| 277 | + signal_summary=list(decision.signal_summary), | ||
| 265 | prompt_format=prompt_format, | 278 | prompt_format=prompt_format, |
| 266 | prompt_sections=list(prompt_sections or []), | 279 | prompt_sections=list(prompt_sections or []), |
| 267 | artifact_paths=list(artifact_paths or []), | 280 | artifact_paths=list(artifact_paths or []), |
@@ -274,6 +287,9 @@ class WorkflowPolicy: | |||
| 274 | clarify_threshold = 0.55 | 287 | clarify_threshold = 0.55 |
| 275 | plan_threshold = 0.45 | 288 | plan_threshold = 0.45 |
| 276 | 289 | ||
| 290 | + def __init__(self, signal_extractor: WorkflowSignalExtractor | None = None) -> None: | ||
| 291 | + self.signal_extractor = signal_extractor or WorkflowSignalExtractor() | ||
| 292 | + | ||
| 277 | def route( | 293 | def route( |
| 278 | self, | 294 | self, |
| 279 | task: str, | 295 | task: str, |
@@ -286,8 +302,26 @@ class WorkflowPolicy: | |||
| 286 | mutating_history: bool = False, | 302 | mutating_history: bool = False, |
| 287 | stale_plan: bool = False, | 303 | stale_plan: bool = False, |
| 288 | unresolved_questions: list[str] | None = None, | 304 | unresolved_questions: list[str] | None = None, |
| 305 | + timeline: list[WorkflowTimelineEntry] | None = None, | ||
| 289 | ) -> ModeDecision: | 306 | ) -> ModeDecision: |
| 290 | - unresolved_questions = list(unresolved_questions or []) | 307 | + signals = self.signal_extractor.extract_route_signals( |
| 308 | + task, | ||
| 309 | + requested_mode=requested_mode.value if requested_mode is not None else None, | ||
| 310 | + has_brief=has_brief, | ||
| 311 | + has_plan=has_plan, | ||
| 312 | + allow_clarify=allow_clarify, | ||
| 313 | + verification_pressure=verification_pressure, | ||
| 314 | + mutating_history=mutating_history, | ||
| 315 | + stale_plan=stale_plan, | ||
| 316 | + unresolved_questions=unresolved_questions, | ||
| 317 | + timeline=timeline, | ||
| 318 | + ) | ||
| 319 | + return self.route_from_signals(signals) | ||
| 320 | + | ||
| 321 | + def route_from_signals(self, signals: WorkflowSignalPacket) -> ModeDecision: | ||
| 322 | + """Route from a typed workflow-signal packet.""" | ||
| 323 | + | ||
| 324 | + requested_mode = WorkflowMode.from_str(signals.requested_mode) | ||
| 291 | if requested_mode is not None: | 325 | if requested_mode is not None: |
| 292 | return ModeDecision( | 326 | return ModeDecision( |
| 293 | mode=requested_mode, | 327 | mode=requested_mode, |
@@ -300,9 +334,10 @@ class WorkflowPolicy: | |||
| 300 | if requested_mode in {WorkflowMode.CLARIFY, WorkflowMode.PLAN} | 334 | if requested_mode in {WorkflowMode.CLARIFY, WorkflowMode.PLAN} |
| 301 | else None | 335 | else None |
| 302 | ), | 336 | ), |
| 337 | + signal_summary=list(signals.signal_summary), | ||
| 303 | ) | 338 | ) |
| 304 | 339 | ||
| 305 | - if stale_plan: | 340 | + if signals.stale_artifact_pressure > 0: |
| 306 | return ModeDecision( | 341 | return ModeDecision( |
| 307 | mode=WorkflowMode.PLAN, | 342 | mode=WorkflowMode.PLAN, |
| 308 | reason_code="stale_plan_artifacts", | 343 | reason_code="stale_plan_artifacts", |
@@ -312,14 +347,15 @@ class WorkflowPolicy: | |||
| 312 | runner_up_mode=WorkflowMode.EXECUTE, | 347 | runner_up_mode=WorkflowMode.EXECUTE, |
| 313 | runner_up_score=0.6, | 348 | runner_up_score=0.6, |
| 314 | scheduled_next_mode=WorkflowMode.EXECUTE, | 349 | scheduled_next_mode=WorkflowMode.EXECUTE, |
| 315 | - unresolved_questions=unresolved_questions, | 350 | + unresolved_questions=list(signals.unresolved_questions), |
| 316 | pressure_summary=[ | 351 | pressure_summary=[ |
| 317 | "plan refresh pressure: stale artifacts require a refreshed plan", | 352 | "plan refresh pressure: stale artifacts require a refreshed plan", |
| 318 | "execute pressure: continue directly with the stale artifacts", | 353 | "execute pressure: continue directly with the stale artifacts", |
| 319 | ], | 354 | ], |
| 355 | + signal_summary=list(signals.signal_summary), | ||
| 320 | ) | 356 | ) |
| 321 | 357 | ||
| 322 | - if has_plan: | 358 | + if signals.has_plan: |
| 323 | return ModeDecision( | 359 | return ModeDecision( |
| 324 | mode=WorkflowMode.EXECUTE, | 360 | mode=WorkflowMode.EXECUTE, |
| 325 | reason_code="existing_plan_artifacts", | 361 | reason_code="existing_plan_artifacts", |
@@ -328,45 +364,52 @@ class WorkflowPolicy: | |||
| 328 | route_score=0.9, | 364 | route_score=0.9, |
| 329 | runner_up_mode=WorkflowMode.PLAN, | 365 | runner_up_mode=WorkflowMode.PLAN, |
| 330 | runner_up_score=0.45, | 366 | runner_up_score=0.45, |
| 331 | - unresolved_questions=unresolved_questions, | 367 | + unresolved_questions=list(signals.unresolved_questions), |
| 332 | pressure_summary=[ | 368 | pressure_summary=[ |
| 333 | "execute pressure: persisted plan artifacts already exist", | 369 | "execute pressure: persisted plan artifacts already exist", |
| 334 | "plan pressure: a plan refresh is available but not required", | 370 | "plan pressure: a plan refresh is available but not required", |
| 335 | ], | 371 | ], |
| 372 | + signal_summary=list(signals.signal_summary), | ||
| 336 | ) | 373 | ) |
| 337 | 374 | ||
| 338 | - ambiguity = self._ambiguity_score(task) | 375 | + ambiguity = signals.ambiguity_score |
| 339 | - complexity = self._complexity_score(task) | 376 | + complexity = signals.complexity_score |
| 340 | 377 | ||
| 341 | clarify_pressure = ambiguity | 378 | clarify_pressure = ambiguity |
| 342 | - if allow_clarify and not has_brief: | 379 | + if signals.allow_clarify and not signals.has_brief: |
| 343 | clarify_pressure += 0.15 | 380 | clarify_pressure += 0.15 |
| 344 | - if unresolved_questions: | 381 | + if signals.unresolved_questions: |
| 345 | - clarify_pressure += 0.12 | 382 | + clarify_pressure += min(0.12, 0.04 * len(signals.unresolved_questions)) |
| 346 | if complexity < 0.55: | 383 | if complexity < 0.55: |
| 347 | clarify_pressure += 0.05 | 384 | clarify_pressure += 0.05 |
| 348 | - if not allow_clarify: | 385 | + if signals.recent_clarify_count and signals.unresolved_questions: |
| 386 | + clarify_pressure += 0.04 | ||
| 387 | + if not signals.allow_clarify: | ||
| 349 | clarify_pressure = 0.0 | 388 | clarify_pressure = 0.0 |
| 350 | 389 | ||
| 351 | plan_pressure = complexity | 390 | plan_pressure = complexity |
| 352 | - if verification_pressure: | 391 | + plan_pressure += signals.verification_pressure |
| 353 | - plan_pressure += 0.12 | 392 | + plan_pressure += signals.mutation_pressure |
| 354 | - if mutating_history: | 393 | + if signals.has_brief: |
| 355 | - plan_pressure += 0.08 | ||
| 356 | - if has_brief: | ||
| 357 | plan_pressure += 0.06 | 394 | plan_pressure += 0.06 |
| 358 | - if unresolved_questions: | 395 | + if signals.unresolved_questions: |
| 359 | plan_pressure += 0.06 | 396 | plan_pressure += 0.06 |
| 397 | + if signals.recent_reentry_count: | ||
| 398 | + plan_pressure += 0.06 | ||
| 399 | + if signals.recent_plan_refresh_count: | ||
| 400 | + plan_pressure += 0.04 | ||
| 360 | 401 | ||
| 361 | execute_pressure = 0.35 | 402 | execute_pressure = 0.35 |
| 362 | - if has_brief: | 403 | + if signals.has_brief: |
| 363 | execute_pressure += 0.14 | 404 | execute_pressure += 0.14 |
| 364 | if ambiguity < 0.35: | 405 | if ambiguity < 0.35: |
| 365 | execute_pressure += 0.16 | 406 | execute_pressure += 0.16 |
| 366 | if complexity < 0.45: | 407 | if complexity < 0.45: |
| 367 | execute_pressure += 0.12 | 408 | execute_pressure += 0.12 |
| 368 | - if not unresolved_questions: | 409 | + if not signals.unresolved_questions: |
| 369 | execute_pressure += 0.05 | 410 | execute_pressure += 0.05 |
| 411 | + if signals.recent_verify_skip_count and not signals.verification_pressure: | ||
| 412 | + execute_pressure += 0.03 | ||
| 370 | 413 | ||
| 371 | scores = { | 414 | scores = { |
| 372 | WorkflowMode.CLARIFY: round(min(clarify_pressure, 1.0), 3), | 415 | WorkflowMode.CLARIFY: round(min(clarify_pressure, 1.0), 3), |
@@ -385,7 +428,7 @@ class WorkflowPolicy: | |||
| 385 | if ( | 428 | if ( |
| 386 | winner == WorkflowMode.CLARIFY | 429 | winner == WorkflowMode.CLARIFY |
| 387 | and winner_score >= self.clarify_threshold | 430 | and winner_score >= self.clarify_threshold |
| 388 | - and allow_clarify | 431 | + and signals.allow_clarify |
| 389 | ): | 432 | ): |
| 390 | return ModeDecision( | 433 | return ModeDecision( |
| 391 | mode=WorkflowMode.CLARIFY, | 434 | mode=WorkflowMode.CLARIFY, |
@@ -397,19 +440,20 @@ class WorkflowPolicy: | |||
| 397 | runner_up_mode=runner_up, | 440 | runner_up_mode=runner_up, |
| 398 | runner_up_score=runner_up_score, | 441 | runner_up_score=runner_up_score, |
| 399 | scheduled_next_mode=WorkflowMode.EXECUTE, | 442 | scheduled_next_mode=WorkflowMode.EXECUTE, |
| 400 | - unresolved_questions=unresolved_questions, | 443 | + unresolved_questions=list(signals.unresolved_questions), |
| 401 | pressure_summary=pressure_summary, | 444 | pressure_summary=pressure_summary, |
| 445 | + signal_summary=list(signals.signal_summary), | ||
| 402 | ) | 446 | ) |
| 403 | 447 | ||
| 404 | if winner == WorkflowMode.PLAN and winner_score >= self.plan_threshold: | 448 | if winner == WorkflowMode.PLAN and winner_score >= self.plan_threshold: |
| 405 | reason_code = ( | 449 | reason_code = ( |
| 406 | "verification_pressure_requires_plan" | 450 | "verification_pressure_requires_plan" |
| 407 | - if verification_pressure | 451 | + if signals.verification_pressure |
| 408 | else "task_is_complex" | 452 | else "task_is_complex" |
| 409 | ) | 453 | ) |
| 410 | reason_summary = ( | 454 | reason_summary = ( |
| 411 | "verification pressure and task complexity favor a persisted plan" | 455 | "verification pressure and task complexity favor a persisted plan" |
| 412 | - if verification_pressure | 456 | + if signals.verification_pressure |
| 413 | else "workflow pressure favors a persisted plan before execution" | 457 | else "workflow pressure favors a persisted plan before execution" |
| 414 | ) | 458 | ) |
| 415 | return ModeDecision( | 459 | return ModeDecision( |
@@ -422,8 +466,9 @@ class WorkflowPolicy: | |||
| 422 | runner_up_mode=runner_up, | 466 | runner_up_mode=runner_up, |
| 423 | runner_up_score=runner_up_score, | 467 | runner_up_score=runner_up_score, |
| 424 | scheduled_next_mode=WorkflowMode.EXECUTE, | 468 | scheduled_next_mode=WorkflowMode.EXECUTE, |
| 425 | - unresolved_questions=unresolved_questions, | 469 | + unresolved_questions=list(signals.unresolved_questions), |
| 426 | pressure_summary=pressure_summary, | 470 | pressure_summary=pressure_summary, |
| 471 | + signal_summary=list(signals.signal_summary), | ||
| 427 | ) | 472 | ) |
| 428 | 473 | ||
| 429 | return ModeDecision( | 474 | return ModeDecision( |
@@ -435,8 +480,9 @@ class WorkflowPolicy: | |||
| 435 | route_score=winner_score, | 480 | route_score=winner_score, |
| 436 | runner_up_mode=runner_up, | 481 | runner_up_mode=runner_up, |
| 437 | runner_up_score=runner_up_score, | 482 | runner_up_score=runner_up_score, |
| 438 | - unresolved_questions=unresolved_questions, | 483 | + unresolved_questions=list(signals.unresolved_questions), |
| 439 | pressure_summary=pressure_summary, | 484 | pressure_summary=pressure_summary, |
| 485 | + signal_summary=list(signals.signal_summary), | ||
| 440 | ) | 486 | ) |
| 441 | 487 | ||
| 442 | def review_clarify( | 488 | def review_clarify( |
src/loader/runtime/workflow_signals.pyadded@@ -0,0 +1,221 @@ | |||
| 1 | +"""Typed workflow-signal extraction for runtime policy decisions.""" | ||
| 2 | + | ||
| 3 | +from __future__ import annotations | ||
| 4 | + | ||
| 5 | +import re | ||
| 6 | +from dataclasses import dataclass, field | ||
| 7 | +from typing import TYPE_CHECKING | ||
| 8 | + | ||
| 9 | +if TYPE_CHECKING: | ||
| 10 | + from .workflow_policy import WorkflowTimelineEntry | ||
| 11 | + | ||
| 12 | + | ||
| 13 | +@dataclass(slots=True) | ||
| 14 | +class WorkflowSignalPacket: | ||
| 15 | + """Typed route context consumed by workflow policy.""" | ||
| 16 | + | ||
| 17 | + task: str | ||
| 18 | + requested_mode: str | None = None | ||
| 19 | + has_brief: bool = False | ||
| 20 | + has_plan: bool = False | ||
| 21 | + allow_clarify: bool = True | ||
| 22 | + ambiguity_score: float = 0.0 | ||
| 23 | + complexity_score: float = 0.0 | ||
| 24 | + verification_pressure: float = 0.0 | ||
| 25 | + mutation_pressure: float = 0.0 | ||
| 26 | + artifact_reuse_pressure: float = 0.0 | ||
| 27 | + stale_artifact_pressure: float = 0.0 | ||
| 28 | + unresolved_questions: list[str] = field(default_factory=list) | ||
| 29 | + recent_clarify_count: int = 0 | ||
| 30 | + recent_reentry_count: int = 0 | ||
| 31 | + recent_plan_refresh_count: int = 0 | ||
| 32 | + recent_verify_skip_count: int = 0 | ||
| 33 | + signal_summary: list[str] = field(default_factory=list) | ||
| 34 | + | ||
| 35 | + | ||
| 36 | +class WorkflowSignalExtractor: | ||
| 37 | + """Build typed workflow-signal packets from runtime state.""" | ||
| 38 | + | ||
| 39 | + def extract_route_signals( | ||
| 40 | + self, | ||
| 41 | + task: str, | ||
| 42 | + *, | ||
| 43 | + requested_mode: str | None = None, | ||
| 44 | + has_brief: bool = False, | ||
| 45 | + has_plan: bool = False, | ||
| 46 | + allow_clarify: bool = True, | ||
| 47 | + verification_pressure: bool = False, | ||
| 48 | + mutating_history: bool = False, | ||
| 49 | + stale_plan: bool = False, | ||
| 50 | + unresolved_questions: list[str] | None = None, | ||
| 51 | + timeline: list[WorkflowTimelineEntry] | None = None, | ||
| 52 | + ) -> WorkflowSignalPacket: | ||
| 53 | + """Derive workflow signals from task state and recent timeline context.""" | ||
| 54 | + | ||
| 55 | + unresolved_questions = list(unresolved_questions or []) | ||
| 56 | + recent_timeline = list(timeline or [])[-6:] | ||
| 57 | + recent_clarify_count = sum( | ||
| 58 | + 1 | ||
| 59 | + for entry in recent_timeline | ||
| 60 | + if entry.mode == "clarify" or entry.kind.startswith("clarify") | ||
| 61 | + ) | ||
| 62 | + recent_reentry_count = sum( | ||
| 63 | + 1 | ||
| 64 | + for entry in recent_timeline | ||
| 65 | + if entry.kind == "reentry" or entry.decision_kind == "reentry" | ||
| 66 | + ) | ||
| 67 | + recent_plan_refresh_count = sum( | ||
| 68 | + 1 | ||
| 69 | + for entry in recent_timeline | ||
| 70 | + if entry.kind == "plan_refresh" or "plan_refresh" in entry.reason_code | ||
| 71 | + ) | ||
| 72 | + recent_verify_skip_count = sum( | ||
| 73 | + 1 | ||
| 74 | + for entry in recent_timeline | ||
| 75 | + if entry.kind == "verify_skip" | ||
| 76 | + ) | ||
| 77 | + ambiguity_score = self._ambiguity_score(task) | ||
| 78 | + complexity_score = self._complexity_score(task) | ||
| 79 | + verification_signal = 0.18 if verification_pressure else 0.0 | ||
| 80 | + mutation_signal = 0.12 if mutating_history else 0.0 | ||
| 81 | + artifact_reuse_signal = 0.2 if has_plan else 0.0 | ||
| 82 | + stale_artifact_signal = 0.45 if stale_plan else 0.0 | ||
| 83 | + | ||
| 84 | + signal_summary: list[str] = [ | ||
| 85 | + f"ambiguity={ambiguity_score:.2f}", | ||
| 86 | + f"complexity={complexity_score:.2f}", | ||
| 87 | + ] | ||
| 88 | + if requested_mode: | ||
| 89 | + signal_summary.append(f"requested_mode={requested_mode}") | ||
| 90 | + if has_brief: | ||
| 91 | + signal_summary.append("clarify_brief=available") | ||
| 92 | + if has_plan: | ||
| 93 | + signal_summary.append("plan_artifacts=available") | ||
| 94 | + if stale_plan: | ||
| 95 | + signal_summary.append("plan_artifacts=stale") | ||
| 96 | + if unresolved_questions: | ||
| 97 | + signal_summary.append( | ||
| 98 | + f"open_questions={min(len(unresolved_questions), 9)}" | ||
| 99 | + ) | ||
| 100 | + if verification_pressure: | ||
| 101 | + signal_summary.append("verification_pressure=active") | ||
| 102 | + if mutating_history: | ||
| 103 | + signal_summary.append("mutation_pressure=active") | ||
| 104 | + if recent_clarify_count: | ||
| 105 | + signal_summary.append(f"recent_clarify={recent_clarify_count}") | ||
| 106 | + if recent_reentry_count: | ||
| 107 | + signal_summary.append(f"recent_reentry={recent_reentry_count}") | ||
| 108 | + if recent_plan_refresh_count: | ||
| 109 | + signal_summary.append(f"recent_plan_refresh={recent_plan_refresh_count}") | ||
| 110 | + if recent_verify_skip_count: | ||
| 111 | + signal_summary.append(f"recent_verify_skip={recent_verify_skip_count}") | ||
| 112 | + | ||
| 113 | + return WorkflowSignalPacket( | ||
| 114 | + task=task, | ||
| 115 | + requested_mode=requested_mode, | ||
| 116 | + has_brief=has_brief, | ||
| 117 | + has_plan=has_plan, | ||
| 118 | + allow_clarify=allow_clarify, | ||
| 119 | + ambiguity_score=ambiguity_score, | ||
| 120 | + complexity_score=complexity_score, | ||
| 121 | + verification_pressure=verification_signal, | ||
| 122 | + mutation_pressure=mutation_signal, | ||
| 123 | + artifact_reuse_pressure=artifact_reuse_signal, | ||
| 124 | + stale_artifact_pressure=stale_artifact_signal, | ||
| 125 | + unresolved_questions=unresolved_questions, | ||
| 126 | + recent_clarify_count=recent_clarify_count, | ||
| 127 | + recent_reentry_count=recent_reentry_count, | ||
| 128 | + recent_plan_refresh_count=recent_plan_refresh_count, | ||
| 129 | + recent_verify_skip_count=recent_verify_skip_count, | ||
| 130 | + signal_summary=signal_summary, | ||
| 131 | + ) | ||
| 132 | + | ||
| 133 | + @staticmethod | ||
| 134 | + def _ambiguity_score(task: str) -> float: | ||
| 135 | + lowered = task.lower() | ||
| 136 | + words = re.findall(r"\w+", lowered) | ||
| 137 | + score = 0.0 | ||
| 138 | + | ||
| 139 | + if ( | ||
| 140 | + "--clarify" in lowered | ||
| 141 | + or "don't assume" in lowered | ||
| 142 | + or "do not assume" in lowered | ||
| 143 | + or "not sure" in lowered | ||
| 144 | + or "figure out" in lowered | ||
| 145 | + or "interview me" in lowered | ||
| 146 | + or "ask me" in lowered | ||
| 147 | + or lowered.startswith("clarify ") | ||
| 148 | + ): | ||
| 149 | + score += 0.65 | ||
| 150 | + | ||
| 151 | + if any( | ||
| 152 | + phrase in lowered | ||
| 153 | + for phrase in ( | ||
| 154 | + "something", | ||
| 155 | + "somehow", | ||
| 156 | + "better", | ||
| 157 | + "improve", | ||
| 158 | + "fix this", | ||
| 159 | + "make it", | ||
| 160 | + "more like", | ||
| 161 | + "feels more like", | ||
| 162 | + ) | ||
| 163 | + ): | ||
| 164 | + score += 0.2 | ||
| 165 | + | ||
| 166 | + if not _has_concrete_anchor(task): | ||
| 167 | + score += 0.2 | ||
| 168 | + | ||
| 169 | + if len(words) <= 12 and any( | ||
| 170 | + verb in lowered | ||
| 171 | + for verb in ("build", "add", "improve", "refactor", "implement") | ||
| 172 | + ): | ||
| 173 | + score += 0.15 | ||
| 174 | + | ||
| 175 | + return round(min(score, 1.0), 3) | ||
| 176 | + | ||
| 177 | + @staticmethod | ||
| 178 | + def _complexity_score(task: str) -> float: | ||
| 179 | + lowered = task.lower() | ||
| 180 | + words = re.findall(r"\w+", lowered) | ||
| 181 | + score = 0.0 | ||
| 182 | + | ||
| 183 | + if len(words) >= 18: | ||
| 184 | + score += 0.2 | ||
| 185 | + if len(words) >= 30: | ||
| 186 | + score += 0.15 | ||
| 187 | + | ||
| 188 | + if any( | ||
| 189 | + phrase in lowered | ||
| 190 | + for phrase in ( | ||
| 191 | + "refactor", | ||
| 192 | + "architecture", | ||
| 193 | + "migrate", | ||
| 194 | + "persistent", | ||
| 195 | + "workflow", | ||
| 196 | + "deep dive", | ||
| 197 | + "report", | ||
| 198 | + "implementation plan", | ||
| 199 | + "verification plan", | ||
| 200 | + ) | ||
| 201 | + ): | ||
| 202 | + score += 0.3 | ||
| 203 | + | ||
| 204 | + if lowered.count(" and ") >= 2 or lowered.count(",") >= 2: | ||
| 205 | + score += 0.15 | ||
| 206 | + | ||
| 207 | + if _has_concrete_anchor(task): | ||
| 208 | + score += 0.1 | ||
| 209 | + | ||
| 210 | + return round(min(score, 1.0), 3) | ||
| 211 | + | ||
| 212 | + | ||
| 213 | +def _has_concrete_anchor(task: str) -> bool: | ||
| 214 | + return bool( | ||
| 215 | + re.search(r"[./_\\-]", task) | ||
| 216 | + or re.search(r"`[^`]+`", task) | ||
| 217 | + or any( | ||
| 218 | + token in task.lower() | ||
| 219 | + for token in ("test", "file", "function", "class") | ||
| 220 | + ) | ||
| 221 | + ) | ||
tests/test_workflow_policy.pymodified@@ -8,6 +8,7 @@ from loader.runtime.workflow import ( | |||
| 8 | WorkflowTimelineEntry, | 8 | WorkflowTimelineEntry, |
| 9 | WorkflowTimelineEntryKind, | 9 | WorkflowTimelineEntryKind, |
| 10 | ) | 10 | ) |
| 11 | +from loader.runtime.workflow_signals import WorkflowSignalPacket | ||
| 11 | 12 | ||
| 12 | 13 | ||
| 13 | def test_workflow_policy_reports_winner_and_runner_up() -> None: | 14 | def test_workflow_policy_reports_winner_and_runner_up() -> None: |
@@ -20,6 +21,24 @@ def test_workflow_policy_reports_winner_and_runner_up() -> None: | |||
| 20 | assert decision.runner_up_mode is not None | 21 | assert decision.runner_up_mode is not None |
| 21 | assert decision.runner_up_score > 0 | 22 | assert decision.runner_up_score > 0 |
| 22 | assert decision.pressure_summary | 23 | assert decision.pressure_summary |
| 24 | + assert decision.signal_summary | ||
| 25 | + | ||
| 26 | + | ||
| 27 | +def test_workflow_policy_routes_from_typed_signal_packet() -> None: | ||
| 28 | + policy = WorkflowPolicy() | ||
| 29 | + | ||
| 30 | + decision = policy.route_from_signals( | ||
| 31 | + WorkflowSignalPacket( | ||
| 32 | + task="Keep improving Loader.", | ||
| 33 | + ambiguity_score=0.62, | ||
| 34 | + complexity_score=0.28, | ||
| 35 | + allow_clarify=True, | ||
| 36 | + signal_summary=["ambiguity=0.62", "complexity=0.28"], | ||
| 37 | + ) | ||
| 38 | + ) | ||
| 39 | + | ||
| 40 | + assert decision.mode == WorkflowMode.CLARIFY | ||
| 41 | + assert decision.signal_summary == ["ambiguity=0.62", "complexity=0.28"] | ||
| 23 | 42 | ||
| 24 | 43 | ||
| 25 | def test_workflow_policy_prefers_plan_refresh_for_stale_plan() -> None: | 44 | def test_workflow_policy_prefers_plan_refresh_for_stale_plan() -> None: |
@@ -79,6 +98,7 @@ def test_workflow_timeline_entry_round_trips() -> None: | |||
| 79 | runner_up_score=0.66, | 98 | runner_up_score=0.66, |
| 80 | scheduled_next_mode="execute", | 99 | scheduled_next_mode="execute", |
| 81 | unresolved_questions=["Scope is still broad."], | 100 | unresolved_questions=["Scope is still broad."], |
| 101 | + signal_summary=["ambiguity=0.20", "complexity=0.81"], | ||
| 82 | prompt_format="native", | 102 | prompt_format="native", |
| 83 | prompt_sections=["Runtime Config", "Workflow Context"], | 103 | prompt_sections=["Runtime Config", "Workflow Context"], |
| 84 | artifact_paths=["/tmp/implementation.md"], | 104 | artifact_paths=["/tmp/implementation.md"], |
tests/test_workflow_signals.pyadded@@ -0,0 +1,55 @@ | |||
| 1 | +"""Tests for typed workflow-signal extraction.""" | ||
| 2 | + | ||
| 3 | +from __future__ import annotations | ||
| 4 | + | ||
| 5 | +from loader.runtime.workflow import ( | ||
| 6 | + WorkflowSignalExtractor, | ||
| 7 | + WorkflowTimelineEntry, | ||
| 8 | +) | ||
| 9 | + | ||
| 10 | + | ||
| 11 | +def test_workflow_signal_extractor_captures_recent_timeline_pressure() -> None: | ||
| 12 | + extractor = WorkflowSignalExtractor() | ||
| 13 | + timeline = [ | ||
| 14 | + WorkflowTimelineEntry( | ||
| 15 | + timestamp="2026-04-07T12:00:00Z", | ||
| 16 | + kind="clarify_continue", | ||
| 17 | + mode="clarify", | ||
| 18 | + reason_code="clarify_follow_up_needed", | ||
| 19 | + summary="clarify: clarify pressure remains high", | ||
| 20 | + decision_kind="forced", | ||
| 21 | + ), | ||
| 22 | + WorkflowTimelineEntry( | ||
| 23 | + timestamp="2026-04-07T12:01:00Z", | ||
| 24 | + kind="reentry", | ||
| 25 | + mode="execute", | ||
| 26 | + reason_code="verification_failed_reentry", | ||
| 27 | + summary="execute: verification failed; returning to execute", | ||
| 28 | + decision_kind="reentry", | ||
| 29 | + ), | ||
| 30 | + WorkflowTimelineEntry( | ||
| 31 | + timestamp="2026-04-07T12:02:00Z", | ||
| 32 | + kind="verify_skip", | ||
| 33 | + mode="verify", | ||
| 34 | + reason_code="verification_not_required", | ||
| 35 | + summary="verify: verification skipped", | ||
| 36 | + decision_kind="forced", | ||
| 37 | + ), | ||
| 38 | + ] | ||
| 39 | + | ||
| 40 | + signals = extractor.extract_route_signals( | ||
| 41 | + "Improve Loader so it feels more like claw-code.", | ||
| 42 | + has_brief=True, | ||
| 43 | + unresolved_questions=["Scope is still broad."], | ||
| 44 | + timeline=timeline, | ||
| 45 | + ) | ||
| 46 | + | ||
| 47 | + assert signals.ambiguity_score > 0 | ||
| 48 | + assert signals.has_brief is True | ||
| 49 | + assert signals.recent_clarify_count == 1 | ||
| 50 | + assert signals.recent_reentry_count == 1 | ||
| 51 | + assert signals.recent_verify_skip_count == 1 | ||
| 52 | + assert "clarify_brief=available" in signals.signal_summary | ||
| 53 | + assert "open_questions=1" in signals.signal_summary | ||
| 54 | + assert "recent_reentry=1" in signals.signal_summary | ||
| 55 | + | ||