Add validated turn state machine and workflow metadata
- SHA
2f03a4d0c7747003aeaad1cde144e844376117cf- Parents
-
203d232 - Tree
8e95248
2f03a4d
2f03a4d0c7747003aeaad1cde144e844376117cf203d232
8e95248| Status | File | + | - |
|---|---|---|---|
| M |
src/loader/runtime/conversation.py
|
71 | 15 |
| M |
src/loader/runtime/events.py
|
7 | 0 |
| M |
src/loader/runtime/finalization.py
|
22 | 6 |
| M |
src/loader/runtime/inspection.py
|
35 | 0 |
| M |
src/loader/runtime/phases.py
|
141 | 7 |
| M |
src/loader/runtime/session.py
|
145 | 1 |
| M |
src/loader/runtime/workflow.py
|
96 | 9 |
| M |
tests/test_inspection.py
|
30 | 0 |
| M |
tests/test_runtime_phases.py
|
21 | 0 |
| M |
tests/test_session_state.py
|
22 | 0 |
| A |
tests/test_turn_state_machine.py
|
66 | 0 |
| M |
tests/test_workflow_runtime.py
|
14 | 1 |
src/loader/runtime/conversation.pymodified@@ -20,16 +20,18 @@ from .events import AgentEvent, TurnSummary | ||
| 20 | 20 | from .executor import ToolExecutor |
| 21 | 21 | from .finalization import TurnFinalizer, merge_usage |
| 22 | 22 | from .hooks import build_default_tool_hooks |
| 23 | -from .phases import TurnPhase, TurnPhaseTracker | |
| 23 | +from .phases import TurnPhase, TurnPhaseTracker, TurnTransitionKind | |
| 24 | 24 | from .repair import ResponseRepairer |
| 25 | 25 | from .tool_batches import ToolBatchRunner |
| 26 | 26 | from .tracing import RuntimeTracer |
| 27 | 27 | from .workflow import ( |
| 28 | 28 | VERIFICATION_SEPARATOR, |
| 29 | 29 | ClarifyBrief, |
| 30 | + ModeDecision, | |
| 30 | 31 | ModeRouter, |
| 31 | 32 | PlanningArtifacts, |
| 32 | 33 | WorkflowArtifactStore, |
| 34 | + WorkflowDecisionKind, | |
| 33 | 35 | WorkflowMode, |
| 34 | 36 | build_execute_bridge, |
| 35 | 37 | sync_todos_to_definition_of_done, |
@@ -77,6 +79,7 @@ class ConversationRuntime: | ||
| 77 | 79 | TurnPhase.PREPARE, |
| 78 | 80 | emit, |
| 79 | 81 | detail="Preparing runtime state", |
| 82 | + reason_code="prepare_runtime", | |
| 80 | 83 | ) |
| 81 | 84 | await self._prepare_runtime_capabilities() |
| 82 | 85 | |
@@ -174,6 +177,7 @@ class ConversationRuntime: | ||
| 174 | 177 | TurnPhase.ASSISTANT, |
| 175 | 178 | emit, |
| 176 | 179 | detail="Requesting assistant response", |
| 180 | + reason_code="request_assistant_response", | |
| 177 | 181 | ) |
| 178 | 182 | await emit(AgentEvent(type="thinking")) |
| 179 | 183 | assistant_turn = await self.turn_requester.request_turn( |
@@ -192,6 +196,8 @@ class ConversationRuntime: | ||
| 192 | 196 | TurnPhase.REPAIR, |
| 193 | 197 | emit, |
| 194 | 198 | detail="Repairing empty assistant response", |
| 199 | + reason_code="repair_empty_response", | |
| 200 | + kind=TurnTransitionKind.RETRY, | |
| 195 | 201 | ) |
| 196 | 202 | empty_retry_count += 1 |
| 197 | 203 | empty_decision = self.repairer.handle_empty_response( |
@@ -232,6 +238,8 @@ class ConversationRuntime: | ||
| 232 | 238 | TurnPhase.REPAIR, |
| 233 | 239 | emit, |
| 234 | 240 | detail="Repairing raw-text tool fallback", |
| 241 | + reason_code="repair_raw_text_tool_fallback", | |
| 242 | + kind=TurnTransitionKind.REROUTE, | |
| 235 | 243 | ) |
| 236 | 244 | await emit(AgentEvent(type="clear_stream")) |
| 237 | 245 | |
@@ -261,6 +269,7 @@ class ConversationRuntime: | ||
| 261 | 269 | TurnPhase.TOOLS, |
| 262 | 270 | emit, |
| 263 | 271 | detail="Executing tool batch", |
| 272 | + reason_code="execute_tool_batch", | |
| 264 | 273 | ) |
| 265 | 274 | assistant_message = Message( |
| 266 | 275 | role=Role.ASSISTANT, |
@@ -291,7 +300,12 @@ class ConversationRuntime: | ||
| 291 | 300 | actions_taken.extend(batch_result.actions_taken) |
| 292 | 301 | consecutive_errors = batch_result.consecutive_errors |
| 293 | 302 | if batch_result.halted: |
| 294 | - return await self._finalize_turn(summary, emit) | |
| 303 | + return await self._finalize_turn( | |
| 304 | + summary, | |
| 305 | + emit, | |
| 306 | + reason_code="tool_batch_halted", | |
| 307 | + reason_summary="Finalizing after halted tool batch", | |
| 308 | + ) | |
| 295 | 309 | |
| 296 | 310 | continue |
| 297 | 311 | |
@@ -305,6 +319,8 @@ class ConversationRuntime: | ||
| 305 | 319 | TurnPhase.REPAIR, |
| 306 | 320 | emit, |
| 307 | 321 | detail="Repairing fake tool narration", |
| 322 | + reason_code="repair_fake_tool_narration", | |
| 323 | + kind=TurnTransitionKind.REROUTE, | |
| 308 | 324 | ) |
| 309 | 325 | self.agent.session.append(Message(role=Role.ASSISTANT, content=response_content)) |
| 310 | 326 | self.agent.session.append(Message(role=Role.USER, content=repair_message)) |
@@ -321,6 +337,8 @@ class ConversationRuntime: | ||
| 321 | 337 | TurnPhase.REPAIR, |
| 322 | 338 | emit, |
| 323 | 339 | detail="Repairing execution deflection", |
| 340 | + reason_code="repair_execution_deflection", | |
| 341 | + kind=TurnTransitionKind.REROUTE, | |
| 324 | 342 | ) |
| 325 | 343 | self.agent.session.append(Message(role=Role.ASSISTANT, content=response_content)) |
| 326 | 344 | self.agent.session.append( |
@@ -334,6 +352,7 @@ class ConversationRuntime: | ||
| 334 | 352 | TurnPhase.CRITIQUE, |
| 335 | 353 | emit, |
| 336 | 354 | detail="Evaluating self-critique", |
| 355 | + reason_code="evaluate_self_critique", | |
| 337 | 356 | ) |
| 338 | 357 | critique_decision = await self.completion_policy.maybe_self_critique( |
| 339 | 358 | content=content, |
@@ -348,6 +367,7 @@ class ConversationRuntime: | ||
| 348 | 367 | TurnPhase.COMPLETION, |
| 349 | 368 | emit, |
| 350 | 369 | detail="Checking completion policy", |
| 370 | + reason_code="completion_gate", | |
| 351 | 371 | ) |
| 352 | 372 | text_loop_decision = await self.completion_policy.maybe_stop_for_text_loop( |
| 353 | 373 | content=content, |
@@ -355,7 +375,12 @@ class ConversationRuntime: | ||
| 355 | 375 | summary=summary, |
| 356 | 376 | ) |
| 357 | 377 | if text_loop_decision.should_stop: |
| 358 | - return await self._finalize_turn(summary, emit) | |
| 378 | + return await self._finalize_turn( | |
| 379 | + summary, | |
| 380 | + emit, | |
| 381 | + reason_code="text_loop_bailout", | |
| 382 | + reason_summary="Finalizing after text-loop bailout", | |
| 383 | + ) | |
| 359 | 384 | |
| 360 | 385 | self.agent.safeguards.record_response(content) |
| 361 | 386 | effective_task = original_task or task |
@@ -411,17 +436,27 @@ class ConversationRuntime: | ||
| 411 | 436 | await emit(AgentEvent(type="response", content=final_response)) |
| 412 | 437 | break |
| 413 | 438 | |
| 414 | - return await self._finalize_turn(summary, emit) | |
| 439 | + return await self._finalize_turn( | |
| 440 | + summary, | |
| 441 | + emit, | |
| 442 | + reason_code="turn_complete", | |
| 443 | + reason_summary="Finalizing completed turn", | |
| 444 | + ) | |
| 415 | 445 | |
| 416 | 446 | async def _finalize_turn( |
| 417 | 447 | self, |
| 418 | 448 | summary: TurnSummary, |
| 419 | 449 | emit: EventSink, |
| 450 | + *, | |
| 451 | + reason_code: str, | |
| 452 | + reason_summary: str, | |
| 420 | 453 | ) -> TurnSummary: |
| 421 | 454 | await self.phase_tracker.enter( |
| 422 | 455 | TurnPhase.FINALIZE, |
| 423 | 456 | emit, |
| 424 | - detail="Finalizing turn summary", | |
| 457 | + detail=reason_summary, | |
| 458 | + reason_code=reason_code, | |
| 459 | + kind=TurnTransitionKind.TERMINAL, | |
| 425 | 460 | ) |
| 426 | 461 | final_summary = self.finalizer.finalize_summary(summary) |
| 427 | 462 | self.phase_tracker.clear() |
@@ -447,11 +482,10 @@ class ConversationRuntime: | ||
| 447 | 482 | and self._artifact_exists(dod.verification_plan), |
| 448 | 483 | ) |
| 449 | 484 | await self._set_workflow_mode( |
| 450 | - decision.mode, | |
| 485 | + decision, | |
| 451 | 486 | dod=dod, |
| 452 | 487 | emit=emit, |
| 453 | 488 | summary=summary, |
| 454 | - reason=decision.reason, | |
| 455 | 489 | ) |
| 456 | 490 | |
| 457 | 491 | if decision.mode == WorkflowMode.CLARIFY: |
@@ -470,11 +504,14 @@ class ConversationRuntime: | ||
| 470 | 504 | allow_clarify=False, |
| 471 | 505 | ) |
| 472 | 506 | await self._set_workflow_mode( |
| 473 | - decision.mode, | |
| 507 | + decision.with_context( | |
| 508 | + reason_code=f"post_clarify_{decision.reason_code}", | |
| 509 | + reason_summary=f"clarify handoff: {decision.reason_summary}", | |
| 510 | + decision_kind=WorkflowDecisionKind.HANDOFF, | |
| 511 | + ), | |
| 474 | 512 | dod=dod, |
| 475 | 513 | emit=emit, |
| 476 | 514 | summary=summary, |
| 477 | - reason=f"clarify handoff: {decision.reason}", | |
| 478 | 515 | ) |
| 479 | 516 | |
| 480 | 517 | if decision.mode == WorkflowMode.PLAN: |
@@ -487,11 +524,15 @@ class ConversationRuntime: | ||
| 487 | 524 | on_user_question=on_user_question, |
| 488 | 525 | ) |
| 489 | 526 | await self._set_workflow_mode( |
| 490 | - WorkflowMode.EXECUTE, | |
| 527 | + ModeDecision.transition( | |
| 528 | + WorkflowMode.EXECUTE, | |
| 529 | + reason_code="plan_artifacts_created", | |
| 530 | + reason_summary="plan artifacts created; switching to execute", | |
| 531 | + decision_kind=WorkflowDecisionKind.HANDOFF, | |
| 532 | + ), | |
| 491 | 533 | dod=dod, |
| 492 | 534 | emit=emit, |
| 493 | 535 | summary=summary, |
| 494 | - reason="plan artifacts created; switching to execute", | |
| 495 | 536 | ) |
| 496 | 537 | |
| 497 | 538 | bridge = build_execute_bridge( |
@@ -518,25 +559,40 @@ class ConversationRuntime: | ||
| 518 | 559 | |
| 519 | 560 | async def _set_workflow_mode( |
| 520 | 561 | self, |
| 521 | - mode: WorkflowMode, | |
| 562 | + decision: ModeDecision, | |
| 522 | 563 | *, |
| 523 | 564 | dod: DefinitionOfDone, |
| 524 | 565 | emit: EventSink, |
| 525 | 566 | summary: TurnSummary, |
| 526 | - reason: str, | |
| 527 | 567 | ) -> None: |
| 568 | + mode = decision.mode | |
| 528 | 569 | self.agent.set_workflow_mode(mode.value) |
| 529 | - self.agent.session.update_runtime_state(workflow_mode=mode.value) | |
| 570 | + self.agent.session.update_runtime_state( | |
| 571 | + workflow_mode=mode.value, | |
| 572 | + workflow_reason_code=decision.reason_code, | |
| 573 | + workflow_reason_summary=decision.reason_summary, | |
| 574 | + workflow_decision_kind=decision.decision_kind.value, | |
| 575 | + workflow_ambiguity_score=decision.ambiguity_score, | |
| 576 | + workflow_complexity_score=decision.complexity_score, | |
| 577 | + workflow_scheduled_next_mode=( | |
| 578 | + decision.scheduled_next_mode.value | |
| 579 | + if decision.scheduled_next_mode is not None | |
| 580 | + else None | |
| 581 | + ), | |
| 582 | + ) | |
| 530 | 583 | dod.current_mode = mode.value |
| 531 | 584 | if not dod.mode_history or dod.mode_history[-1] != mode.value: |
| 532 | 585 | dod.mode_history.append(mode.value) |
| 533 | 586 | summary.workflow_mode = mode.value |
| 587 | + summary.workflow_reason_code = decision.reason_code | |
| 588 | + summary.workflow_reason_summary = decision.reason_summary | |
| 589 | + summary.workflow_decision_kind = decision.decision_kind.value | |
| 534 | 590 | summary.definition_of_done = dod |
| 535 | 591 | self.dod_store.save(dod) |
| 536 | 592 | await emit( |
| 537 | 593 | AgentEvent( |
| 538 | 594 | type="workflow_mode", |
| 539 | - content=f"Workflow: {mode.value} ({reason})", | |
| 595 | + content=f"Workflow: {mode.value} ({decision.reason_summary})", | |
| 540 | 596 | workflow_mode=mode.value, |
| 541 | 597 | definition_of_done=dod, |
| 542 | 598 | ) |
src/loader/runtime/events.pymodified@@ -40,6 +40,9 @@ class AgentEvent: | ||
| 40 | 40 | last_verification_result: str | None = None |
| 41 | 41 | workflow_mode: str | None = None |
| 42 | 42 | turn_phase: str | None = None |
| 43 | + transition_kind: str | None = None | |
| 44 | + transition_summary: str | None = None | |
| 45 | + transition_reason_code: str | None = None | |
| 43 | 46 | artifact_kind: str | None = None |
| 44 | 47 | artifact_path: str | None = None |
| 45 | 48 | |
@@ -69,4 +72,8 @@ class TurnSummary: | ||
| 69 | 72 | trace: list[RuntimeTraceEvent] = field(default_factory=list) |
| 70 | 73 | definition_of_done: DefinitionOfDone | None = None |
| 71 | 74 | workflow_mode: str | None = None |
| 75 | + workflow_reason_code: str | None = None | |
| 76 | + workflow_reason_summary: str | None = None | |
| 77 | + workflow_decision_kind: str | None = None | |
| 78 | + last_turn_transition_summary: str | None = None | |
| 72 | 79 | session_id: str | None = None |
src/loader/runtime/finalization.pymodified@@ -19,11 +19,16 @@ from .executor import ToolExecutor | ||
| 19 | 19 | from .memory import MemoryStore |
| 20 | 20 | from .session import normalize_usage |
| 21 | 21 | from .tracing import RuntimeTracer |
| 22 | -from .workflow import WorkflowMode, extract_verification_commands_from_markdown | |
| 22 | +from .workflow import ( | |
| 23 | + ModeDecision, | |
| 24 | + WorkflowDecisionKind, | |
| 25 | + WorkflowMode, | |
| 26 | + extract_verification_commands_from_markdown, | |
| 27 | +) | |
| 23 | 28 | |
| 24 | 29 | EventSink = Callable[[AgentEvent], Awaitable[None]] |
| 25 | 30 | WorkflowSetter = Callable[ |
| 26 | - [WorkflowMode, DefinitionOfDone, EventSink, TurnSummary, str], | |
| 31 | + [ModeDecision, DefinitionOfDone, EventSink, TurnSummary], | |
| 27 | 32 | Awaitable[None], |
| 28 | 33 | ] |
| 29 | 34 | |
@@ -123,11 +128,15 @@ class TurnFinalizer: | ||
| 123 | 128 | ) |
| 124 | 129 | |
| 125 | 130 | await self.set_workflow_mode( |
| 126 | - WorkflowMode.VERIFY, | |
| 131 | + ModeDecision.transition( | |
| 132 | + WorkflowMode.VERIFY, | |
| 133 | + reason_code="definition_of_done_requires_verification", | |
| 134 | + reason_summary="definition-of-done gate requires verification", | |
| 135 | + decision_kind=WorkflowDecisionKind.HANDOFF, | |
| 136 | + ), | |
| 127 | 137 | dod=dod, |
| 128 | 138 | emit=emit, |
| 129 | 139 | summary=summary, |
| 130 | - reason="definition-of-done gate requires verification", | |
| 131 | 140 | ) |
| 132 | 141 | verification_passed = await self.verify_definition_of_done( |
| 133 | 142 | dod=dod, |
@@ -184,11 +193,15 @@ class TurnFinalizer: | ||
| 184 | 193 | self.dod_store.save(dod) |
| 185 | 194 | await self.emit_dod_status(emit, dod) |
| 186 | 195 | await self.set_workflow_mode( |
| 187 | - WorkflowMode.EXECUTE, | |
| 196 | + ModeDecision.transition( | |
| 197 | + WorkflowMode.EXECUTE, | |
| 198 | + reason_code="verification_failed_reentry", | |
| 199 | + reason_summary="verification failed; returning to execute for fixes", | |
| 200 | + decision_kind=WorkflowDecisionKind.REENTRY, | |
| 201 | + ), | |
| 188 | 202 | dod=dod, |
| 189 | 203 | emit=emit, |
| 190 | 204 | summary=summary, |
| 191 | - reason="verification failed; returning to execute for fixes", | |
| 192 | 205 | ) |
| 193 | 206 | failure_prompt = ( |
| 194 | 207 | "[DEFINITION OF DONE CHECK FAILED]\n" |
@@ -284,6 +297,9 @@ class TurnFinalizer: | ||
| 284 | 297 | iterations=summary.iterations, |
| 285 | 298 | ) |
| 286 | 299 | summary.session_id = self.agent.session.session_id |
| 300 | + summary.last_turn_transition_summary = ( | |
| 301 | + self.agent.session.last_turn_transition_summary | |
| 302 | + ) | |
| 287 | 303 | if summary.definition_of_done and summary.definition_of_done.status == "done": |
| 288 | 304 | MemoryStore(self.agent.project_root).capture_definition_of_done( |
| 289 | 305 | build_verification_summary(summary.definition_of_done.evidence) |
src/loader/runtime/inspection.pymodified@@ -151,7 +151,16 @@ class StatusSnapshot: | ||
| 151 | 151 | capability_profile: CapabilityProfile |
| 152 | 152 | active_session_id: str | None |
| 153 | 153 | workflow_mode: str |
| 154 | + workflow_reason_code: str | None | |
| 155 | + workflow_reason_summary: str | None | |
| 156 | + workflow_decision_kind: str | None | |
| 157 | + workflow_ambiguity_score: float | None | |
| 158 | + workflow_complexity_score: float | None | |
| 159 | + workflow_scheduled_next_mode: str | None | |
| 154 | 160 | active_turn_phase: str | None |
| 161 | + last_turn_transition_summary: str | None | |
| 162 | + last_turn_transition_kind: str | None | |
| 163 | + last_turn_transition_reason_code: str | None | |
| 155 | 164 | permission_mode: str |
| 156 | 165 | permission_prompting_enabled: bool |
| 157 | 166 | permission_rule_counts: dict[str, int] |
@@ -180,12 +189,16 @@ class SessionSummary: | ||
| 180 | 189 | updated_at: str |
| 181 | 190 | message_count: int |
| 182 | 191 | workflow_mode: str |
| 192 | + workflow_reason_code: str | None | |
| 193 | + workflow_reason_summary: str | None | |
| 194 | + workflow_decision_kind: str | None | |
| 183 | 195 | permission_mode: str |
| 184 | 196 | permission_prompting_enabled: bool |
| 185 | 197 | permission_rule_counts: dict[str, int] |
| 186 | 198 | permission_rules_source: str | None |
| 187 | 199 | prompt_format: str | None |
| 188 | 200 | active_turn_phase: str | None |
| 201 | + last_turn_transition_summary: str | None | |
| 189 | 202 | current_task: str | None |
| 190 | 203 | active_dod_path: str | None |
| 191 | 204 | dod_status: str | None |
@@ -319,7 +332,16 @@ def collect_status_snapshot( | ||
| 319 | 332 | capability_profile=capability_profile, |
| 320 | 333 | active_session_id=None, |
| 321 | 334 | workflow_mode="execute", |
| 335 | + workflow_reason_code=None, | |
| 336 | + workflow_reason_summary=None, | |
| 337 | + workflow_decision_kind=None, | |
| 338 | + workflow_ambiguity_score=None, | |
| 339 | + workflow_complexity_score=None, | |
| 340 | + workflow_scheduled_next_mode=None, | |
| 322 | 341 | active_turn_phase=None, |
| 342 | + last_turn_transition_summary=None, | |
| 343 | + last_turn_transition_kind=None, | |
| 344 | + last_turn_transition_reason_code=None, | |
| 323 | 345 | permission_mode=default_permission_mode, |
| 324 | 346 | permission_prompting_enabled=( |
| 325 | 347 | _coerce_permission_mode(permission_mode) == PermissionMode.PROMPT |
@@ -365,7 +387,16 @@ def collect_status_snapshot( | ||
| 365 | 387 | capability_profile=capability_profile, |
| 366 | 388 | active_session_id=snapshot.session_id, |
| 367 | 389 | workflow_mode=snapshot.workflow_mode, |
| 390 | + workflow_reason_code=snapshot.workflow_reason_code, | |
| 391 | + workflow_reason_summary=snapshot.workflow_reason_summary, | |
| 392 | + workflow_decision_kind=snapshot.workflow_decision_kind, | |
| 393 | + workflow_ambiguity_score=snapshot.workflow_ambiguity_score, | |
| 394 | + workflow_complexity_score=snapshot.workflow_complexity_score, | |
| 395 | + workflow_scheduled_next_mode=snapshot.workflow_scheduled_next_mode, | |
| 368 | 396 | active_turn_phase=snapshot.active_turn_phase, |
| 397 | + last_turn_transition_summary=snapshot.last_turn_transition_summary, | |
| 398 | + last_turn_transition_kind=snapshot.last_turn_transition_kind, | |
| 399 | + last_turn_transition_reason_code=snapshot.last_turn_transition_reason_code, | |
| 369 | 400 | permission_mode=snapshot.permission_mode or default_permission_mode, |
| 370 | 401 | permission_prompting_enabled=permission_prompting_enabled, |
| 371 | 402 | permission_rule_counts=permission_rule_counts, |
@@ -410,6 +441,9 @@ def list_session_summaries(project_root: Path | str | None = None) -> list[Sessi | ||
| 410 | 441 | updated_at=snapshot.updated_at, |
| 411 | 442 | message_count=len(snapshot.messages), |
| 412 | 443 | workflow_mode=snapshot.workflow_mode, |
| 444 | + workflow_reason_code=snapshot.workflow_reason_code, | |
| 445 | + workflow_reason_summary=snapshot.workflow_reason_summary, | |
| 446 | + workflow_decision_kind=snapshot.workflow_decision_kind, | |
| 413 | 447 | permission_mode=snapshot.permission_mode, |
| 414 | 448 | permission_prompting_enabled=( |
| 415 | 449 | snapshot.permission_prompting_enabled |
@@ -419,6 +453,7 @@ def list_session_summaries(project_root: Path | str | None = None) -> list[Sessi | ||
| 419 | 453 | permission_rules_source=snapshot.permission_rules_source, |
| 420 | 454 | prompt_format=snapshot.prompt_format, |
| 421 | 455 | active_turn_phase=snapshot.active_turn_phase, |
| 456 | + last_turn_transition_summary=snapshot.last_turn_transition_summary, | |
| 422 | 457 | current_task=snapshot.current_task, |
| 423 | 458 | active_dod_path=snapshot.active_dod_path, |
| 424 | 459 | dod_status=dod.status if dod else None, |
src/loader/runtime/phases.pymodified@@ -3,6 +3,7 @@ | ||
| 3 | 3 | from __future__ import annotations |
| 4 | 4 | |
| 5 | 5 | from collections.abc import Awaitable, Callable |
| 6 | +from dataclasses import dataclass | |
| 6 | 7 | from enum import StrEnum |
| 7 | 8 | |
| 8 | 9 | from .events import AgentEvent |
@@ -23,13 +24,123 @@ class TurnPhase(StrEnum): | ||
| 23 | 24 | FINALIZE = "finalize" |
| 24 | 25 | |
| 25 | 26 | |
| 27 | +class TurnTransitionKind(StrEnum): | |
| 28 | + """Classification for why one turn-state transition occurred.""" | |
| 29 | + | |
| 30 | + NORMAL = "normal" | |
| 31 | + RETRY = "retry" | |
| 32 | + REROUTE = "reroute" | |
| 33 | + RECOVERY = "recovery" | |
| 34 | + TERMINAL = "terminal" | |
| 35 | + | |
| 36 | + | |
| 37 | +@dataclass(slots=True) | |
| 38 | +class TurnTransition: | |
| 39 | + """One validated turn-state transition.""" | |
| 40 | + | |
| 41 | + from_phase: str | None | |
| 42 | + to_phase: str | |
| 43 | + reason_code: str | |
| 44 | + reason_summary: str | |
| 45 | + kind: TurnTransitionKind | |
| 46 | + | |
| 47 | + @property | |
| 48 | + def summary(self) -> str: | |
| 49 | + source = self.from_phase or "start" | |
| 50 | + return ( | |
| 51 | + f"{source} -> {self.to_phase} " | |
| 52 | + f"[{self.kind.value}] {self.reason_summary}" | |
| 53 | + ) | |
| 54 | + | |
| 55 | + | |
| 56 | +class TurnStateMachine: | |
| 57 | + """Validate allowed turn-state transitions.""" | |
| 58 | + | |
| 59 | + _ALLOWED_TRANSITIONS: dict[str | None, set[str]] = { | |
| 60 | + None: {TurnPhase.PREPARE.value}, | |
| 61 | + TurnPhase.PREPARE.value: { | |
| 62 | + TurnPhase.ASSISTANT.value, | |
| 63 | + TurnPhase.FINALIZE.value, | |
| 64 | + }, | |
| 65 | + TurnPhase.ASSISTANT.value: { | |
| 66 | + TurnPhase.REPAIR.value, | |
| 67 | + TurnPhase.TOOLS.value, | |
| 68 | + TurnPhase.CRITIQUE.value, | |
| 69 | + TurnPhase.COMPLETION.value, | |
| 70 | + TurnPhase.FINALIZE.value, | |
| 71 | + }, | |
| 72 | + TurnPhase.REPAIR.value: { | |
| 73 | + TurnPhase.ASSISTANT.value, | |
| 74 | + TurnPhase.TOOLS.value, | |
| 75 | + TurnPhase.COMPLETION.value, | |
| 76 | + TurnPhase.FINALIZE.value, | |
| 77 | + }, | |
| 78 | + TurnPhase.TOOLS.value: { | |
| 79 | + TurnPhase.ASSISTANT.value, | |
| 80 | + TurnPhase.CRITIQUE.value, | |
| 81 | + TurnPhase.COMPLETION.value, | |
| 82 | + TurnPhase.FINALIZE.value, | |
| 83 | + }, | |
| 84 | + TurnPhase.CRITIQUE.value: { | |
| 85 | + TurnPhase.ASSISTANT.value, | |
| 86 | + TurnPhase.COMPLETION.value, | |
| 87 | + TurnPhase.FINALIZE.value, | |
| 88 | + }, | |
| 89 | + TurnPhase.COMPLETION.value: { | |
| 90 | + TurnPhase.ASSISTANT.value, | |
| 91 | + TurnPhase.FINALIZE.value, | |
| 92 | + }, | |
| 93 | + TurnPhase.FINALIZE.value: set(), | |
| 94 | + } | |
| 95 | + | |
| 96 | + def __init__(self) -> None: | |
| 97 | + self.current_phase: str | None = None | |
| 98 | + self.last_transition: TurnTransition | None = None | |
| 99 | + | |
| 100 | + def transition( | |
| 101 | + self, | |
| 102 | + phase: TurnPhase, | |
| 103 | + *, | |
| 104 | + reason_code: str, | |
| 105 | + reason_summary: str, | |
| 106 | + kind: TurnTransitionKind = TurnTransitionKind.NORMAL, | |
| 107 | + ) -> TurnTransition | None: | |
| 108 | + """Validate and record a transition to the target phase.""" | |
| 109 | + | |
| 110 | + if phase.value == self.current_phase: | |
| 111 | + return None | |
| 112 | + | |
| 113 | + allowed = self._ALLOWED_TRANSITIONS.get(self.current_phase, set()) | |
| 114 | + if phase.value not in allowed: | |
| 115 | + raise ValueError( | |
| 116 | + "Invalid turn-state transition: " | |
| 117 | + f"{self.current_phase or 'start'} -> {phase.value}" | |
| 118 | + ) | |
| 119 | + | |
| 120 | + transition = TurnTransition( | |
| 121 | + from_phase=self.current_phase, | |
| 122 | + to_phase=phase.value, | |
| 123 | + reason_code=reason_code, | |
| 124 | + reason_summary=reason_summary, | |
| 125 | + kind=kind, | |
| 126 | + ) | |
| 127 | + self.current_phase = phase.value | |
| 128 | + self.last_transition = transition | |
| 129 | + return transition | |
| 130 | + | |
| 131 | + def clear(self) -> None: | |
| 132 | + """Reset the active phase after a turn completes.""" | |
| 133 | + | |
| 134 | + self.current_phase = None | |
| 135 | + | |
| 136 | + | |
| 26 | 137 | class TurnPhaseTracker: |
| 27 | 138 | """Persist and emit turn-phase transitions.""" |
| 28 | 139 | |
| 29 | 140 | def __init__(self, agent, tracer: RuntimeTracer) -> None: |
| 30 | 141 | self.agent = agent |
| 31 | 142 | self.tracer = tracer |
| 32 | - self.current_phase: str | None = None | |
| 143 | + self.state_machine = TurnStateMachine() | |
| 33 | 144 | |
| 34 | 145 | async def enter( |
| 35 | 146 | self, |
@@ -37,25 +148,48 @@ class TurnPhaseTracker: | ||
| 37 | 148 | emit: EventSink, |
| 38 | 149 | *, |
| 39 | 150 | detail: str | None = None, |
| 151 | + reason_code: str | None = None, | |
| 152 | + kind: TurnTransitionKind = TurnTransitionKind.NORMAL, | |
| 40 | 153 | ) -> None: |
| 41 | 154 | """Move the runtime into a named phase and emit the transition.""" |
| 42 | 155 | |
| 43 | - if phase.value == self.current_phase: | |
| 156 | + summary = detail or f"Phase: {phase.value}" | |
| 157 | + transition = self.state_machine.transition( | |
| 158 | + phase, | |
| 159 | + reason_code=reason_code or phase.value, | |
| 160 | + reason_summary=summary, | |
| 161 | + kind=kind, | |
| 162 | + ) | |
| 163 | + if transition is None: | |
| 44 | 164 | return |
| 45 | 165 | |
| 46 | - self.current_phase = phase.value | |
| 47 | - self.agent.session.update_runtime_state(active_turn_phase=phase.value) | |
| 48 | - self.tracer.record("turn.phase_changed", phase=phase.value, detail=detail) | |
| 166 | + self.agent.session.update_runtime_state( | |
| 167 | + active_turn_phase=phase.value, | |
| 168 | + last_turn_transition_summary=transition.summary, | |
| 169 | + last_turn_transition_kind=transition.kind.value, | |
| 170 | + last_turn_transition_reason_code=transition.reason_code, | |
| 171 | + ) | |
| 172 | + self.tracer.record( | |
| 173 | + "turn.phase_changed", | |
| 174 | + phase=phase.value, | |
| 175 | + detail=summary, | |
| 176 | + from_phase=transition.from_phase, | |
| 177 | + transition_kind=transition.kind.value, | |
| 178 | + reason_code=transition.reason_code, | |
| 179 | + ) | |
| 49 | 180 | await emit( |
| 50 | 181 | AgentEvent( |
| 51 | 182 | type="turn_phase", |
| 52 | - content=detail or f"Phase: {phase.value}", | |
| 183 | + content=transition.summary, | |
| 53 | 184 | turn_phase=phase.value, |
| 185 | + transition_kind=transition.kind.value, | |
| 186 | + transition_summary=transition.summary, | |
| 187 | + transition_reason_code=transition.reason_code, | |
| 54 | 188 | ) |
| 55 | 189 | ) |
| 56 | 190 | |
| 57 | 191 | def clear(self) -> None: |
| 58 | 192 | """Clear the persisted active phase when the turn finishes.""" |
| 59 | 193 | |
| 60 | - self.current_phase = None | |
| 194 | + self.state_machine.clear() | |
| 61 | 195 | self.agent.session.update_runtime_state(active_turn_phase=None) |
src/loader/runtime/session.pymodified@@ -19,7 +19,7 @@ from .compaction import ( | ||
| 19 | 19 | estimate_message_tokens, |
| 20 | 20 | ) |
| 21 | 21 | |
| 22 | -SESSION_VERSION = 3 | |
| 22 | +SESSION_VERSION = 4 | |
| 23 | 23 | DEFAULT_ROTATE_AFTER_BYTES = 256 * 1024 |
| 24 | 24 | MAX_ROTATED_FILES = 3 |
| 25 | 25 | _UNSET = object() |
@@ -84,6 +84,23 @@ def normalize_prompt_sections(value: Any) -> list[str]: | ||
| 84 | 84 | return [str(item) for item in value if str(item).strip()] |
| 85 | 85 | |
| 86 | 86 | |
| 87 | +def normalize_optional_text(value: Any) -> str | None: | |
| 88 | + """Coerce persisted optional text fields.""" | |
| 89 | + | |
| 90 | + if value is None: | |
| 91 | + return None | |
| 92 | + text = str(value).strip() | |
| 93 | + return text or None | |
| 94 | + | |
| 95 | + | |
| 96 | +def normalize_optional_float(value: Any) -> float | None: | |
| 97 | + """Coerce persisted numeric workflow scores.""" | |
| 98 | + | |
| 99 | + if value is None: | |
| 100 | + return None | |
| 101 | + return float(value) | |
| 102 | + | |
| 103 | + | |
| 87 | 104 | @dataclass(slots=True) |
| 88 | 105 | class SessionCompaction: |
| 89 | 106 | """Metadata describing the latest transcript compaction.""" |
@@ -135,6 +152,15 @@ class SessionSnapshot: | ||
| 135 | 152 | prompt_format: str | None = None |
| 136 | 153 | prompt_sections: list[str] = field(default_factory=list) |
| 137 | 154 | active_turn_phase: str | None = None |
| 155 | + workflow_reason_code: str | None = None | |
| 156 | + workflow_reason_summary: str | None = None | |
| 157 | + workflow_decision_kind: str | None = None | |
| 158 | + workflow_ambiguity_score: float | None = None | |
| 159 | + workflow_complexity_score: float | None = None | |
| 160 | + workflow_scheduled_next_mode: str | None = None | |
| 161 | + last_turn_transition_summary: str | None = None | |
| 162 | + last_turn_transition_kind: str | None = None | |
| 163 | + last_turn_transition_reason_code: str | None = None | |
| 138 | 164 | compaction: SessionCompaction | None = None |
| 139 | 165 | version: int = SESSION_VERSION |
| 140 | 166 | |
@@ -156,6 +182,15 @@ class SessionSnapshot: | ||
| 156 | 182 | "prompt_format": self.prompt_format, |
| 157 | 183 | "prompt_sections": list(self.prompt_sections), |
| 158 | 184 | "active_turn_phase": self.active_turn_phase, |
| 185 | + "workflow_reason_code": self.workflow_reason_code, | |
| 186 | + "workflow_reason_summary": self.workflow_reason_summary, | |
| 187 | + "workflow_decision_kind": self.workflow_decision_kind, | |
| 188 | + "workflow_ambiguity_score": self.workflow_ambiguity_score, | |
| 189 | + "workflow_complexity_score": self.workflow_complexity_score, | |
| 190 | + "workflow_scheduled_next_mode": self.workflow_scheduled_next_mode, | |
| 191 | + "last_turn_transition_summary": self.last_turn_transition_summary, | |
| 192 | + "last_turn_transition_kind": self.last_turn_transition_kind, | |
| 193 | + "last_turn_transition_reason_code": self.last_turn_transition_reason_code, | |
| 159 | 194 | "compaction": self.compaction.to_dict() if self.compaction else None, |
| 160 | 195 | } |
| 161 | 196 | |
@@ -185,6 +220,33 @@ class SessionSnapshot: | ||
| 185 | 220 | prompt_format=data.get("prompt_format"), |
| 186 | 221 | prompt_sections=normalize_prompt_sections(data.get("prompt_sections")), |
| 187 | 222 | active_turn_phase=data.get("active_turn_phase"), |
| 223 | + workflow_reason_code=normalize_optional_text( | |
| 224 | + data.get("workflow_reason_code") | |
| 225 | + ), | |
| 226 | + workflow_reason_summary=normalize_optional_text( | |
| 227 | + data.get("workflow_reason_summary") | |
| 228 | + ), | |
| 229 | + workflow_decision_kind=normalize_optional_text( | |
| 230 | + data.get("workflow_decision_kind") | |
| 231 | + ), | |
| 232 | + workflow_ambiguity_score=normalize_optional_float( | |
| 233 | + data.get("workflow_ambiguity_score") | |
| 234 | + ), | |
| 235 | + workflow_complexity_score=normalize_optional_float( | |
| 236 | + data.get("workflow_complexity_score") | |
| 237 | + ), | |
| 238 | + workflow_scheduled_next_mode=normalize_optional_text( | |
| 239 | + data.get("workflow_scheduled_next_mode") | |
| 240 | + ), | |
| 241 | + last_turn_transition_summary=normalize_optional_text( | |
| 242 | + data.get("last_turn_transition_summary") | |
| 243 | + ), | |
| 244 | + last_turn_transition_kind=normalize_optional_text( | |
| 245 | + data.get("last_turn_transition_kind") | |
| 246 | + ), | |
| 247 | + last_turn_transition_reason_code=normalize_optional_text( | |
| 248 | + data.get("last_turn_transition_reason_code") | |
| 249 | + ), | |
| 188 | 250 | compaction=( |
| 189 | 251 | SessionCompaction.from_dict(data["compaction"]) |
| 190 | 252 | if data.get("compaction") |
@@ -317,6 +379,15 @@ class ConversationSession: | ||
| 317 | 379 | prompt_format: str | None = None |
| 318 | 380 | prompt_sections: list[str] = field(default_factory=list) |
| 319 | 381 | active_turn_phase: str | None = None |
| 382 | + workflow_reason_code: str | None = None | |
| 383 | + workflow_reason_summary: str | None = None | |
| 384 | + workflow_decision_kind: str | None = None | |
| 385 | + workflow_ambiguity_score: float | None = None | |
| 386 | + workflow_complexity_score: float | None = None | |
| 387 | + workflow_scheduled_next_mode: str | None = None | |
| 388 | + last_turn_transition_summary: str | None = None | |
| 389 | + last_turn_transition_kind: str | None = None | |
| 390 | + last_turn_transition_reason_code: str | None = None | |
| 320 | 391 | compaction: SessionCompaction | None = None |
| 321 | 392 | rotate_after_bytes: int = DEFAULT_ROTATE_AFTER_BYTES |
| 322 | 393 | max_rotated_files: int = MAX_ROTATED_FILES |
@@ -366,6 +437,16 @@ class ConversationSession: | ||
| 366 | 437 | self.active_dod_path = None |
| 367 | 438 | self.current_task = None |
| 368 | 439 | self.workflow_mode = "execute" |
| 440 | + self.workflow_reason_code = None | |
| 441 | + self.workflow_reason_summary = None | |
| 442 | + self.workflow_decision_kind = None | |
| 443 | + self.workflow_ambiguity_score = None | |
| 444 | + self.workflow_complexity_score = None | |
| 445 | + self.workflow_scheduled_next_mode = None | |
| 446 | + self.active_turn_phase = None | |
| 447 | + self.last_turn_transition_summary = None | |
| 448 | + self.last_turn_transition_kind = None | |
| 449 | + self.last_turn_transition_reason_code = None | |
| 369 | 450 | self.compaction = None |
| 370 | 451 | self.usage_totals = {} |
| 371 | 452 | self.touch() |
@@ -389,6 +470,15 @@ class ConversationSession: | ||
| 389 | 470 | prompt_format: str | None = None, |
| 390 | 471 | prompt_sections: list[str] | None = None, |
| 391 | 472 | active_turn_phase: str | None | object = _UNSET, |
| 473 | + workflow_reason_code: str | None | object = _UNSET, | |
| 474 | + workflow_reason_summary: str | None | object = _UNSET, | |
| 475 | + workflow_decision_kind: str | None | object = _UNSET, | |
| 476 | + workflow_ambiguity_score: float | None | object = _UNSET, | |
| 477 | + workflow_complexity_score: float | None | object = _UNSET, | |
| 478 | + workflow_scheduled_next_mode: str | None | object = _UNSET, | |
| 479 | + last_turn_transition_summary: str | None | object = _UNSET, | |
| 480 | + last_turn_transition_kind: str | None | object = _UNSET, | |
| 481 | + last_turn_transition_reason_code: str | None | object = _UNSET, | |
| 392 | 482 | ) -> None: |
| 393 | 483 | """Update persisted runtime state that lives beside the messages.""" |
| 394 | 484 | |
@@ -414,6 +504,40 @@ class ConversationSession: | ||
| 414 | 504 | self.prompt_sections = normalize_prompt_sections(prompt_sections) |
| 415 | 505 | if active_turn_phase is not _UNSET: |
| 416 | 506 | self.active_turn_phase = active_turn_phase |
| 507 | + if workflow_reason_code is not _UNSET: | |
| 508 | + self.workflow_reason_code = normalize_optional_text(workflow_reason_code) | |
| 509 | + if workflow_reason_summary is not _UNSET: | |
| 510 | + self.workflow_reason_summary = normalize_optional_text( | |
| 511 | + workflow_reason_summary | |
| 512 | + ) | |
| 513 | + if workflow_decision_kind is not _UNSET: | |
| 514 | + self.workflow_decision_kind = normalize_optional_text( | |
| 515 | + workflow_decision_kind | |
| 516 | + ) | |
| 517 | + if workflow_ambiguity_score is not _UNSET: | |
| 518 | + self.workflow_ambiguity_score = normalize_optional_float( | |
| 519 | + workflow_ambiguity_score | |
| 520 | + ) | |
| 521 | + if workflow_complexity_score is not _UNSET: | |
| 522 | + self.workflow_complexity_score = normalize_optional_float( | |
| 523 | + workflow_complexity_score | |
| 524 | + ) | |
| 525 | + if workflow_scheduled_next_mode is not _UNSET: | |
| 526 | + self.workflow_scheduled_next_mode = normalize_optional_text( | |
| 527 | + workflow_scheduled_next_mode | |
| 528 | + ) | |
| 529 | + if last_turn_transition_summary is not _UNSET: | |
| 530 | + self.last_turn_transition_summary = normalize_optional_text( | |
| 531 | + last_turn_transition_summary | |
| 532 | + ) | |
| 533 | + if last_turn_transition_kind is not _UNSET: | |
| 534 | + self.last_turn_transition_kind = normalize_optional_text( | |
| 535 | + last_turn_transition_kind | |
| 536 | + ) | |
| 537 | + if last_turn_transition_reason_code is not _UNSET: | |
| 538 | + self.last_turn_transition_reason_code = normalize_optional_text( | |
| 539 | + last_turn_transition_reason_code | |
| 540 | + ) | |
| 417 | 541 | self.touch() |
| 418 | 542 | self.persist() |
| 419 | 543 | |
@@ -487,6 +611,15 @@ class ConversationSession: | ||
| 487 | 611 | prompt_format=self.prompt_format, |
| 488 | 612 | prompt_sections=list(self.prompt_sections), |
| 489 | 613 | active_turn_phase=self.active_turn_phase, |
| 614 | + workflow_reason_code=self.workflow_reason_code, | |
| 615 | + workflow_reason_summary=self.workflow_reason_summary, | |
| 616 | + workflow_decision_kind=self.workflow_decision_kind, | |
| 617 | + workflow_ambiguity_score=self.workflow_ambiguity_score, | |
| 618 | + workflow_complexity_score=self.workflow_complexity_score, | |
| 619 | + workflow_scheduled_next_mode=self.workflow_scheduled_next_mode, | |
| 620 | + last_turn_transition_summary=self.last_turn_transition_summary, | |
| 621 | + last_turn_transition_kind=self.last_turn_transition_kind, | |
| 622 | + last_turn_transition_reason_code=self.last_turn_transition_reason_code, | |
| 490 | 623 | compaction=self.compaction, |
| 491 | 624 | ) |
| 492 | 625 | return self.store.save(snapshot) |
@@ -535,6 +668,17 @@ class ConversationSession: | ||
| 535 | 668 | instance.prompt_format = snapshot.prompt_format |
| 536 | 669 | instance.prompt_sections = list(snapshot.prompt_sections) |
| 537 | 670 | instance.active_turn_phase = snapshot.active_turn_phase |
| 671 | + instance.workflow_reason_code = snapshot.workflow_reason_code | |
| 672 | + instance.workflow_reason_summary = snapshot.workflow_reason_summary | |
| 673 | + instance.workflow_decision_kind = snapshot.workflow_decision_kind | |
| 674 | + instance.workflow_ambiguity_score = snapshot.workflow_ambiguity_score | |
| 675 | + instance.workflow_complexity_score = snapshot.workflow_complexity_score | |
| 676 | + instance.workflow_scheduled_next_mode = snapshot.workflow_scheduled_next_mode | |
| 677 | + instance.last_turn_transition_summary = snapshot.last_turn_transition_summary | |
| 678 | + instance.last_turn_transition_kind = snapshot.last_turn_transition_kind | |
| 679 | + instance.last_turn_transition_reason_code = ( | |
| 680 | + snapshot.last_turn_transition_reason_code | |
| 681 | + ) | |
| 538 | 682 | instance.compaction = snapshot.compaction |
| 539 | 683 | instance.rotate_after_bytes = rotate_after_bytes |
| 540 | 684 | instance.max_rotated_files = max_rotated_files |
src/loader/runtime/workflow.pymodified@@ -54,14 +54,76 @@ class WorkflowMode(StrEnum): | ||
| 54 | 54 | raise ValueError(f"Unknown workflow mode: {value}") |
| 55 | 55 | |
| 56 | 56 | |
| 57 | +class WorkflowDecisionKind(StrEnum): | |
| 58 | + """Classification for why a workflow mode was selected.""" | |
| 59 | + | |
| 60 | + INITIAL_ROUTE = "initial_route" | |
| 61 | + REQUESTED = "requested" | |
| 62 | + ARTIFACT_REUSE = "artifact_reuse" | |
| 63 | + HANDOFF = "handoff" | |
| 64 | + REENTRY = "reentry" | |
| 65 | + FORCED = "forced" | |
| 66 | + | |
| 67 | + | |
| 57 | 68 | @dataclass(slots=True) |
| 58 | 69 | class ModeDecision: |
| 59 | 70 | """Router output for the entry point of a task turn.""" |
| 60 | 71 | |
| 61 | 72 | mode: WorkflowMode |
| 62 | - reason: str | |
| 73 | + reason_code: str | |
| 74 | + reason_summary: str | |
| 75 | + decision_kind: WorkflowDecisionKind = WorkflowDecisionKind.INITIAL_ROUTE | |
| 63 | 76 | ambiguity_score: float = 0.0 |
| 64 | 77 | complexity_score: float = 0.0 |
| 78 | + scheduled_next_mode: WorkflowMode | None = None | |
| 79 | + | |
| 80 | + @property | |
| 81 | + def reason(self) -> str: | |
| 82 | + return self.reason_summary | |
| 83 | + | |
| 84 | + @classmethod | |
| 85 | + def transition( | |
| 86 | + cls, | |
| 87 | + mode: WorkflowMode, | |
| 88 | + *, | |
| 89 | + reason_code: str, | |
| 90 | + reason_summary: str, | |
| 91 | + decision_kind: WorkflowDecisionKind = WorkflowDecisionKind.HANDOFF, | |
| 92 | + ambiguity_score: float = 0.0, | |
| 93 | + complexity_score: float = 0.0, | |
| 94 | + scheduled_next_mode: WorkflowMode | None = None, | |
| 95 | + ) -> ModeDecision: | |
| 96 | + """Build a non-router workflow decision for handoffs and reentry.""" | |
| 97 | + | |
| 98 | + return cls( | |
| 99 | + mode=mode, | |
| 100 | + reason_code=reason_code, | |
| 101 | + reason_summary=reason_summary, | |
| 102 | + decision_kind=decision_kind, | |
| 103 | + ambiguity_score=ambiguity_score, | |
| 104 | + complexity_score=complexity_score, | |
| 105 | + scheduled_next_mode=scheduled_next_mode, | |
| 106 | + ) | |
| 107 | + | |
| 108 | + def with_context( | |
| 109 | + self, | |
| 110 | + *, | |
| 111 | + reason_code: str | None = None, | |
| 112 | + reason_summary: str | None = None, | |
| 113 | + decision_kind: WorkflowDecisionKind | None = None, | |
| 114 | + scheduled_next_mode: WorkflowMode | None = None, | |
| 115 | + ) -> ModeDecision: | |
| 116 | + """Return a copy with updated contextual routing metadata.""" | |
| 117 | + | |
| 118 | + return ModeDecision( | |
| 119 | + mode=self.mode, | |
| 120 | + reason_code=reason_code or self.reason_code, | |
| 121 | + reason_summary=reason_summary or self.reason_summary, | |
| 122 | + decision_kind=decision_kind or self.decision_kind, | |
| 123 | + ambiguity_score=self.ambiguity_score, | |
| 124 | + complexity_score=self.complexity_score, | |
| 125 | + scheduled_next_mode=scheduled_next_mode, | |
| 126 | + ) | |
| 65 | 127 | |
| 66 | 128 | |
| 67 | 129 | @dataclass(slots=True) |
@@ -145,7 +207,9 @@ class ClarifyBrief: | ||
| 145 | 207 | if not self.likely_touchpoints: |
| 146 | 208 | self.likely_touchpoints = ["Identify exact files during planning or execution."] |
| 147 | 209 | if not self.assumptions: |
| 148 | - self.assumptions = ["Unspecified details stay unchanged unless evidence says otherwise."] | |
| 210 | + self.assumptions = [ | |
| 211 | + "Unspecified details stay unchanged unless evidence says otherwise.", | |
| 212 | + ] | |
| 149 | 213 | if not self.acceptance_criteria: |
| 150 | 214 | self.acceptance_criteria = list( |
| 151 | 215 | dict.fromkeys(self.desired_outcome + self.in_scope[:2]) |
@@ -326,13 +390,17 @@ class ModeRouter: | ||
| 326 | 390 | if requested_mode is not None: |
| 327 | 391 | return ModeDecision( |
| 328 | 392 | mode=requested_mode, |
| 329 | - reason=f"explicit {requested_mode.value} request", | |
| 393 | + reason_code="explicit_request", | |
| 394 | + reason_summary=f"explicit {requested_mode.value} request", | |
| 395 | + decision_kind=WorkflowDecisionKind.REQUESTED, | |
| 330 | 396 | ) |
| 331 | 397 | |
| 332 | 398 | if has_plan: |
| 333 | 399 | return ModeDecision( |
| 334 | 400 | mode=WorkflowMode.EXECUTE, |
| 335 | - reason="reusing existing plan artifacts", | |
| 401 | + reason_code="existing_plan_artifacts", | |
| 402 | + reason_summary="reusing existing plan artifacts", | |
| 403 | + decision_kind=WorkflowDecisionKind.ARTIFACT_REUSE, | |
| 336 | 404 | ) |
| 337 | 405 | |
| 338 | 406 | ambiguity = self._ambiguity_score(task) |
@@ -341,22 +409,29 @@ class ModeRouter: | ||
| 341 | 409 | if allow_clarify and not has_brief and ambiguity >= self.clarify_threshold: |
| 342 | 410 | return ModeDecision( |
| 343 | 411 | mode=WorkflowMode.CLARIFY, |
| 344 | - reason="prompt is broad or missing boundaries", | |
| 412 | + reason_code="task_is_ambiguous", | |
| 413 | + reason_summary="prompt is broad or missing boundaries", | |
| 345 | 414 | ambiguity_score=ambiguity, |
| 346 | 415 | complexity_score=complexity, |
| 416 | + scheduled_next_mode=WorkflowMode.EXECUTE, | |
| 347 | 417 | ) |
| 348 | 418 | |
| 349 | 419 | if complexity >= self.plan_threshold: |
| 350 | 420 | return ModeDecision( |
| 351 | 421 | mode=WorkflowMode.PLAN, |
| 352 | - reason="task looks complex enough to benefit from a persisted plan", | |
| 422 | + reason_code="task_is_complex", | |
| 423 | + reason_summary=( | |
| 424 | + "task looks complex enough to benefit from a persisted plan" | |
| 425 | + ), | |
| 353 | 426 | ambiguity_score=ambiguity, |
| 354 | 427 | complexity_score=complexity, |
| 428 | + scheduled_next_mode=WorkflowMode.EXECUTE, | |
| 355 | 429 | ) |
| 356 | 430 | |
| 357 | 431 | return ModeDecision( |
| 358 | 432 | mode=WorkflowMode.EXECUTE, |
| 359 | - reason="task appears concrete enough for direct execution", | |
| 433 | + reason_code="task_is_concrete", | |
| 434 | + reason_summary="task appears concrete enough for direct execution", | |
| 360 | 435 | ambiguity_score=ambiguity, |
| 361 | 436 | complexity_score=complexity, |
| 362 | 437 | ) |
@@ -470,10 +545,22 @@ def sync_todos_to_definition_of_done( | ||
| 470 | 545 | """Reflect todo state into DoD pending/completed items.""" |
| 471 | 546 | |
| 472 | 547 | special_pending = [ |
| 473 | - item for item in dod.pending_items if item in {"Complete the requested work", "Collect verification evidence"} | |
| 548 | + item | |
| 549 | + for item in dod.pending_items | |
| 550 | + if item | |
| 551 | + in { | |
| 552 | + "Complete the requested work", | |
| 553 | + "Collect verification evidence", | |
| 554 | + } | |
| 474 | 555 | ] |
| 475 | 556 | special_completed = [ |
| 476 | - item for item in dod.completed_items if item in {"Complete the requested work", "Collect verification evidence"} | |
| 557 | + item | |
| 558 | + for item in dod.completed_items | |
| 559 | + if item | |
| 560 | + in { | |
| 561 | + "Complete the requested work", | |
| 562 | + "Collect verification evidence", | |
| 563 | + } | |
| 477 | 564 | ] |
| 478 | 565 | |
| 479 | 566 | pending: list[str] = [] |
tests/test_inspection.pymodified@@ -105,6 +105,16 @@ def _persist_session_with_dod(temp_dir: Path) -> tuple[str, str]: | ||
| 105 | 105 | permission_rules_source=str(temp_dir / ".loader" / "permission-rules.json"), |
| 106 | 106 | prompt_format="native", |
| 107 | 107 | prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"], |
| 108 | + workflow_reason_code="verification_failed_reentry", | |
| 109 | + workflow_reason_summary="verification failed; returning to execute for fixes", | |
| 110 | + workflow_decision_kind="reentry", | |
| 111 | + workflow_ambiguity_score=0.1, | |
| 112 | + workflow_complexity_score=0.7, | |
| 113 | + workflow_scheduled_next_mode="verify", | |
| 114 | + active_turn_phase="completion", | |
| 115 | + last_turn_transition_summary="completion -> finalize [terminal] Finalizing completed turn", | |
| 116 | + last_turn_transition_kind="terminal", | |
| 117 | + last_turn_transition_reason_code="turn_complete", | |
| 108 | 118 | ) |
| 109 | 119 | SessionStore(temp_dir).save(snapshot) |
| 110 | 120 | return snapshot.session_id, str(dod_path) |
@@ -226,6 +236,16 @@ def test_status_and_session_surfaces_reflect_persisted_state(temp_dir: Path) -> | ||
| 226 | 236 | "Workflow Context", |
| 227 | 237 | "Mode Guidance", |
| 228 | 238 | ] |
| 239 | + assert snapshot.workflow_reason_code == "verification_failed_reentry" | |
| 240 | + assert snapshot.workflow_reason_summary == ( | |
| 241 | + "verification failed; returning to execute for fixes" | |
| 242 | + ) | |
| 243 | + assert snapshot.workflow_decision_kind == "reentry" | |
| 244 | + assert snapshot.workflow_scheduled_next_mode == "verify" | |
| 245 | + assert snapshot.active_turn_phase == "completion" | |
| 246 | + assert snapshot.last_turn_transition_summary == ( | |
| 247 | + "completion -> finalize [terminal] Finalizing completed turn" | |
| 248 | + ) | |
| 229 | 249 | |
| 230 | 250 | assert len(sessions) == 1 |
| 231 | 251 | assert sessions[0].session_id == session_id |
@@ -237,6 +257,14 @@ def test_status_and_session_surfaces_reflect_persisted_state(temp_dir: Path) -> | ||
| 237 | 257 | temp_dir / ".loader" / "permission-rules.json" |
| 238 | 258 | ) |
| 239 | 259 | assert sessions[0].prompt_format == "native" |
| 260 | + assert sessions[0].workflow_reason_code == "verification_failed_reentry" | |
| 261 | + assert sessions[0].workflow_reason_summary == ( | |
| 262 | + "verification failed; returning to execute for fixes" | |
| 263 | + ) | |
| 264 | + assert sessions[0].workflow_decision_kind == "reentry" | |
| 265 | + assert sessions[0].last_turn_transition_summary == ( | |
| 266 | + "completion -> finalize [terminal] Finalizing completed turn" | |
| 267 | + ) | |
| 240 | 268 | |
| 241 | 269 | assert detail.snapshot.session_id == session_id |
| 242 | 270 | assert detail.is_current is True |
@@ -245,6 +273,8 @@ def test_status_and_session_surfaces_reflect_persisted_state(temp_dir: Path) -> | ||
| 245 | 273 | assert detail.snapshot.permission_rules_source == str( |
| 246 | 274 | temp_dir / ".loader" / "permission-rules.json" |
| 247 | 275 | ) |
| 276 | + assert detail.snapshot.workflow_reason_code == "verification_failed_reentry" | |
| 277 | + assert detail.snapshot.last_turn_transition_reason_code == "turn_complete" | |
| 248 | 278 | |
| 249 | 279 | |
| 250 | 280 | def test_status_and_session_commands_render_persisted_state( |
tests/test_runtime_phases.pymodified@@ -25,6 +25,10 @@ def _turn_phases(run) -> list[str]: | ||
| 25 | 25 | ] |
| 26 | 26 | |
| 27 | 27 | |
| 28 | +def _turn_phase_events(run) -> list: | |
| 29 | + return [event for event in run.events if event.type == "turn_phase"] | |
| 30 | + | |
| 31 | + | |
| 28 | 32 | @pytest.mark.asyncio |
| 29 | 33 | async def test_empty_output_enters_repair_phase(temp_dir: Path) -> None: |
| 30 | 34 | backend = ScriptedBackend( |
@@ -42,10 +46,22 @@ async def test_empty_output_enters_repair_phase(temp_dir: Path) -> None: | ||
| 42 | 46 | ) |
| 43 | 47 | |
| 44 | 48 | phases = _turn_phases(run) |
| 49 | + repair_event = next( | |
| 50 | + event | |
| 51 | + for event in _turn_phase_events(run) | |
| 52 | + if event.turn_phase == "repair" | |
| 53 | + ) | |
| 45 | 54 | assert "repair" in phases |
| 46 | 55 | assert phases[:3] == ["prepare", "assistant", "repair"] |
| 47 | 56 | assert phases[-2:] == ["completion", "finalize"] |
| 57 | + assert repair_event.transition_kind == "retry" | |
| 58 | + assert repair_event.transition_reason_code == "repair_empty_response" | |
| 59 | + assert run.agent.last_turn_summary is not None | |
| 60 | + assert run.agent.last_turn_summary.last_turn_transition_summary == ( | |
| 61 | + "completion -> finalize [terminal] Finalizing completed turn" | |
| 62 | + ) | |
| 48 | 63 | assert run.agent.session.active_turn_phase is None |
| 64 | + assert run.agent.session.last_turn_transition_reason_code == "turn_complete" | |
| 49 | 65 | |
| 50 | 66 | |
| 51 | 67 | @pytest.mark.asyncio |
@@ -97,4 +113,9 @@ async def test_completion_nudge_and_tool_batch_emit_named_phases( | ||
| 97 | 113 | assert "tools" in phases |
| 98 | 114 | assert phases[0] == "prepare" |
| 99 | 115 | assert phases[-1] == "finalize" |
| 116 | + assert run.agent.last_turn_summary is not None | |
| 117 | + assert run.agent.last_turn_summary.last_turn_transition_summary == ( | |
| 118 | + "completion -> finalize [terminal] Finalizing completed turn" | |
| 119 | + ) | |
| 120 | + assert run.agent.session.last_turn_transition_reason_code == "turn_complete" | |
| 100 | 121 | assert any(event.type == "completion_check" for event in run.events) |
tests/test_session_state.pymodified@@ -148,6 +148,15 @@ def test_session_persists_permission_policy_metadata(temp_dir: Path) -> None: | ||
| 148 | 148 | permission_rules_source=str(temp_dir / ".loader" / "permission-rules.json"), |
| 149 | 149 | prompt_format="native", |
| 150 | 150 | prompt_sections=["Runtime Config", "Workflow Context", "Project Context"], |
| 151 | + workflow_reason_code="task_is_complex", | |
| 152 | + workflow_reason_summary="task looks complex enough to benefit from a persisted plan", | |
| 153 | + workflow_decision_kind="initial_route", | |
| 154 | + workflow_ambiguity_score=0.2, | |
| 155 | + workflow_complexity_score=0.6, | |
| 156 | + workflow_scheduled_next_mode="execute", | |
| 157 | + last_turn_transition_summary="completion -> finalize [terminal] Finalizing completed turn", | |
| 158 | + last_turn_transition_kind="terminal", | |
| 159 | + last_turn_transition_reason_code="turn_complete", | |
| 151 | 160 | ) |
| 152 | 161 | |
| 153 | 162 | reloaded = ConversationSession.load( |
@@ -170,6 +179,19 @@ def test_session_persists_permission_policy_metadata(temp_dir: Path) -> None: | ||
| 170 | 179 | "Workflow Context", |
| 171 | 180 | "Project Context", |
| 172 | 181 | ] |
| 182 | + assert reloaded.workflow_reason_code == "task_is_complex" | |
| 183 | + assert reloaded.workflow_reason_summary == ( | |
| 184 | + "task looks complex enough to benefit from a persisted plan" | |
| 185 | + ) | |
| 186 | + assert reloaded.workflow_decision_kind == "initial_route" | |
| 187 | + assert reloaded.workflow_ambiguity_score == pytest.approx(0.2) | |
| 188 | + assert reloaded.workflow_complexity_score == pytest.approx(0.6) | |
| 189 | + assert reloaded.workflow_scheduled_next_mode == "execute" | |
| 190 | + assert reloaded.last_turn_transition_summary == ( | |
| 191 | + "completion -> finalize [terminal] Finalizing completed turn" | |
| 192 | + ) | |
| 193 | + assert reloaded.last_turn_transition_kind == "terminal" | |
| 194 | + assert reloaded.last_turn_transition_reason_code == "turn_complete" | |
| 173 | 195 | |
| 174 | 196 | |
| 175 | 197 | @pytest.mark.asyncio |
tests/test_turn_state_machine.pyadded@@ -0,0 +1,66 @@ | ||
| 1 | +"""Tests for the validated runtime turn state machine.""" | |
| 2 | + | |
| 3 | +from __future__ import annotations | |
| 4 | + | |
| 5 | +import pytest | |
| 6 | + | |
| 7 | +from loader.runtime.phases import ( | |
| 8 | + TurnPhase, | |
| 9 | + TurnStateMachine, | |
| 10 | + TurnTransitionKind, | |
| 11 | +) | |
| 12 | + | |
| 13 | + | |
| 14 | +def test_turn_state_machine_accepts_valid_transitions() -> None: | |
| 15 | + machine = TurnStateMachine() | |
| 16 | + | |
| 17 | + prepare = machine.transition( | |
| 18 | + TurnPhase.PREPARE, | |
| 19 | + reason_code="prepare_runtime", | |
| 20 | + reason_summary="Preparing runtime state", | |
| 21 | + ) | |
| 22 | + assistant = machine.transition( | |
| 23 | + TurnPhase.ASSISTANT, | |
| 24 | + reason_code="request_assistant_response", | |
| 25 | + reason_summary="Requesting assistant response", | |
| 26 | + ) | |
| 27 | + tools = machine.transition( | |
| 28 | + TurnPhase.TOOLS, | |
| 29 | + reason_code="execute_tool_batch", | |
| 30 | + reason_summary="Executing tool batch", | |
| 31 | + ) | |
| 32 | + finalize = machine.transition( | |
| 33 | + TurnPhase.FINALIZE, | |
| 34 | + reason_code="turn_complete", | |
| 35 | + reason_summary="Finalizing completed turn", | |
| 36 | + kind=TurnTransitionKind.TERMINAL, | |
| 37 | + ) | |
| 38 | + | |
| 39 | + assert prepare is not None | |
| 40 | + assert prepare.from_phase is None | |
| 41 | + assert prepare.to_phase == "prepare" | |
| 42 | + assert assistant is not None | |
| 43 | + assert assistant.from_phase == "prepare" | |
| 44 | + assert tools is not None | |
| 45 | + assert tools.from_phase == "assistant" | |
| 46 | + assert finalize is not None | |
| 47 | + assert finalize.kind is TurnTransitionKind.TERMINAL | |
| 48 | + assert machine.current_phase == "finalize" | |
| 49 | + assert machine.last_transition == finalize | |
| 50 | + assert finalize.summary == "tools -> finalize [terminal] Finalizing completed turn" | |
| 51 | + | |
| 52 | + | |
| 53 | +def test_turn_state_machine_rejects_invalid_transitions() -> None: | |
| 54 | + machine = TurnStateMachine() | |
| 55 | + machine.transition( | |
| 56 | + TurnPhase.PREPARE, | |
| 57 | + reason_code="prepare_runtime", | |
| 58 | + reason_summary="Preparing runtime state", | |
| 59 | + ) | |
| 60 | + | |
| 61 | + with pytest.raises(ValueError, match="prepare -> tools"): | |
| 62 | + machine.transition( | |
| 63 | + TurnPhase.TOOLS, | |
| 64 | + reason_code="execute_tool_batch", | |
| 65 | + reason_summary="Executing tool batch", | |
| 66 | + ) | |
tests/test_workflow_runtime.pymodified@@ -50,7 +50,10 @@ async def test_ambiguous_prompt_routes_to_clarify_and_persists_brief( | ||
| 50 | 50 | id="ask-1", |
| 51 | 51 | name="AskUserQuestion", |
| 52 | 52 | arguments={ |
| 53 | - "question": "What should stay out of scope for this Loader improvement?", | |
| 53 | + "question": ( | |
| 54 | + "What should stay out of scope for this Loader " | |
| 55 | + "improvement?" | |
| 56 | + ), | |
| 54 | 57 | }, |
| 55 | 58 | ) |
| 56 | 59 | ], |
@@ -112,6 +115,10 @@ async def test_ambiguous_prompt_routes_to_clarify_and_persists_brief( | ||
| 112 | 115 | assert Path(dod.clarify_brief).exists() |
| 113 | 116 | assert "runtime behavior" in dod.acceptance_criteria[0].lower() |
| 114 | 117 | assert "## Clarify Mode" in backend.invocations[0].messages[0].content |
| 118 | + assert run.agent.last_turn_summary is not None | |
| 119 | + assert run.agent.last_turn_summary.workflow_mode == "execute" | |
| 120 | + assert run.agent.last_turn_summary.workflow_reason_code == "post_clarify_task_is_concrete" | |
| 121 | + assert run.agent.last_turn_summary.workflow_decision_kind == "handoff" | |
| 115 | 122 | |
| 116 | 123 | |
| 117 | 124 | @pytest.mark.asyncio |
@@ -186,6 +193,12 @@ async def test_complex_prompt_routes_to_plan_and_uses_verification_artifact( | ||
| 186 | 193 | assert Path(dod.verification_plan).exists() |
| 187 | 194 | assert dod.verification_commands == [f"test -f {target}"] |
| 188 | 195 | assert "## Plan Mode" in backend.invocations[0].messages[0].content |
| 196 | + assert run.agent.last_turn_summary is not None | |
| 197 | + assert run.agent.last_turn_summary.workflow_mode == "verify" | |
| 198 | + assert run.agent.last_turn_summary.workflow_reason_code == ( | |
| 199 | + "definition_of_done_requires_verification" | |
| 200 | + ) | |
| 201 | + assert run.agent.last_turn_summary.workflow_decision_kind == "handoff" | |
| 189 | 202 | verify_calls = [ |
| 190 | 203 | event |
| 191 | 204 | for event in run.events |