| 1 | """Definition-of-done gating and turn finalization for the runtime.""" |
| 2 | |
| 3 | from __future__ import annotations |
| 4 | |
| 5 | from collections.abc import Awaitable, Callable |
| 6 | from dataclasses import dataclass, field |
| 7 | from datetime import UTC, datetime |
| 8 | from pathlib import Path |
| 9 | |
| 10 | from ..llm.base import Message, Role, ToolCall |
| 11 | from .context import RuntimeContext |
| 12 | from .dod import ( |
| 13 | DefinitionOfDone, |
| 14 | DefinitionOfDoneStore, |
| 15 | VerificationEvidence, |
| 16 | build_verification_summary, |
| 17 | derive_verification_commands, |
| 18 | ensure_active_verification_attempt, |
| 19 | ) |
| 20 | from .events import AgentEvent, TurnSummary |
| 21 | from .evidence_provenance import ( |
| 22 | EvidenceProvenance, |
| 23 | EvidenceProvenanceStatus, |
| 24 | summarize_evidence_provenance, |
| 25 | ) |
| 26 | from .executor import ToolExecutor |
| 27 | from .logging import get_runtime_logger |
| 28 | from .memory import MemoryStore |
| 29 | from .policy_timeline import append_verification_timeline_entry |
| 30 | from .session import normalize_usage |
| 31 | from .tracing import RuntimeTracer |
| 32 | from .verification_observations import ( |
| 33 | VerificationObservation, |
| 34 | VerificationObservationStatus, |
| 35 | ) |
| 36 | from .workflow import ( |
| 37 | ModeDecision, |
| 38 | WorkflowDecisionKind, |
| 39 | WorkflowMode, |
| 40 | WorkflowTimelineEntry, |
| 41 | WorkflowTimelineEntryKind, |
| 42 | extract_verification_commands_from_markdown, |
| 43 | ) |
| 44 | |
| 45 | EventSink = Callable[[AgentEvent], Awaitable[None]] |
| 46 | WorkflowSetter = Callable[ |
| 47 | [ModeDecision, DefinitionOfDone, EventSink, TurnSummary], |
| 48 | Awaitable[None], |
| 49 | ] |
| 50 | |
| 51 | |
| 52 | @dataclass |
| 53 | class CompletionGateResult: |
| 54 | """Outcome of the definition-of-done completion gate.""" |
| 55 | |
| 56 | should_continue: bool |
| 57 | reason_code: str |
| 58 | reason_summary: str |
| 59 | final_response: str |
| 60 | evidence_provenance: list[EvidenceProvenance] = field(default_factory=list) |
| 61 | verification_observations: list[VerificationObservation] = field(default_factory=list) |
| 62 | |
| 63 | |
| 64 | class TurnFinalizer: |
| 65 | """Owns DoD verification, status emission, and turn finalization.""" |
| 66 | |
| 67 | def __init__( |
| 68 | self, |
| 69 | context: RuntimeContext, |
| 70 | tracer: RuntimeTracer, |
| 71 | dod_store: DefinitionOfDoneStore, |
| 72 | set_workflow_mode: WorkflowSetter, |
| 73 | ) -> None: |
| 74 | self.context = context |
| 75 | self.tracer = tracer |
| 76 | self.dod_store = dod_store |
| 77 | self.set_workflow_mode = set_workflow_mode |
| 78 | |
| 79 | @property |
| 80 | def _prompt_format(self) -> str | None: |
| 81 | return self.context.prompt_format |
| 82 | |
| 83 | @property |
| 84 | def _prompt_sections(self) -> list[str]: |
| 85 | return list(self.context.prompt_sections) |
| 86 | |
| 87 | async def run_definition_of_done_gate( |
| 88 | self, |
| 89 | *, |
| 90 | dod: DefinitionOfDone, |
| 91 | candidate_response: str, |
| 92 | emit: EventSink, |
| 93 | summary: TurnSummary, |
| 94 | executor: ToolExecutor, |
| 95 | ) -> CompletionGateResult: |
| 96 | """Gate completion on DoD state and verification evidence.""" |
| 97 | |
| 98 | implementation_item = "Complete the requested work" |
| 99 | if implementation_item in dod.pending_items: |
| 100 | dod.pending_items.remove(implementation_item) |
| 101 | dod.completed_items.append(implementation_item) |
| 102 | |
| 103 | tracked_pending_items = [ |
| 104 | item for item in dod.pending_items if item != "Collect verification evidence" |
| 105 | ] |
| 106 | |
| 107 | mutating_paths = [path for path in dod.touched_files if path] |
| 108 | requires_verification = bool(mutating_paths or dod.mutating_actions) |
| 109 | rlog = get_runtime_logger() |
| 110 | rlog.completion_check( |
| 111 | "dod_gate", |
| 112 | "requires_verification" if requires_verification else "no_verification", |
| 113 | reason=f"files={mutating_paths[:3]}, actions={len(dod.mutating_actions)}" |
| 114 | if requires_verification else None, |
| 115 | ) |
| 116 | if tracked_pending_items and not requires_verification: |
| 117 | pending_provenance = [ |
| 118 | EvidenceProvenance( |
| 119 | category="tracked_work", |
| 120 | source="dod.pending_items", |
| 121 | summary=f"tracked work item still pending: {item}", |
| 122 | status=EvidenceProvenanceStatus.MISSING.value, |
| 123 | subject=item, |
| 124 | ) |
| 125 | for item in tracked_pending_items |
| 126 | ] |
| 127 | pending_text = "\n".join(f"- {item}" for item in tracked_pending_items) |
| 128 | self.dod_store.save(dod) |
| 129 | await self.emit_dod_status(emit, dod) |
| 130 | self.context.session.append( |
| 131 | Message( |
| 132 | role=Role.USER, |
| 133 | content=( |
| 134 | "[PENDING WORK REMAINS]\n" |
| 135 | "The tracked work items are not complete yet:\n" |
| 136 | f"{pending_text}\n\n" |
| 137 | "Continue the task, and update TodoWrite as you make progress." |
| 138 | ), |
| 139 | ) |
| 140 | ) |
| 141 | return CompletionGateResult( |
| 142 | should_continue=True, |
| 143 | reason_code="pending_items_continue", |
| 144 | reason_summary="continued because tracked work items still remained incomplete", |
| 145 | final_response="", |
| 146 | evidence_provenance=pending_provenance, |
| 147 | ) |
| 148 | |
| 149 | if not requires_verification: |
| 150 | skip_provenance = [ |
| 151 | EvidenceProvenance( |
| 152 | category="verification", |
| 153 | source="dod.mutating_actions", |
| 154 | summary="verification was skipped because no mutating work required checks", |
| 155 | status=EvidenceProvenanceStatus.CONTEXT.value, |
| 156 | ) |
| 157 | ] |
| 158 | skip_observations = [ |
| 159 | VerificationObservation( |
| 160 | status=VerificationObservationStatus.SKIPPED.value, |
| 161 | summary=( |
| 162 | "verification was skipped because no mutating work " |
| 163 | "required checks" |
| 164 | ), |
| 165 | ) |
| 166 | ] |
| 167 | dod.status = "done" |
| 168 | dod.last_verification_result = "skipped" |
| 169 | summary.verification_status = "skipped" |
| 170 | summary.definition_of_done = dod |
| 171 | self.context.session.append_workflow_timeline_entry( |
| 172 | WorkflowTimelineEntry( |
| 173 | timestamp=datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%SZ"), |
| 174 | kind=WorkflowTimelineEntryKind.VERIFY_SKIP.value, |
| 175 | mode=self.context.workflow_mode, |
| 176 | reason_code="verification_not_required", |
| 177 | summary="verification skipped because the turn made no mutating changes", |
| 178 | decision_kind=WorkflowDecisionKind.FORCED.value, |
| 179 | prompt_format=self._prompt_format, |
| 180 | prompt_sections=self._prompt_sections, |
| 181 | evidence_summary=summarize_evidence_provenance(skip_provenance), |
| 182 | evidence_provenance=skip_provenance, |
| 183 | verification_observations=skip_observations, |
| 184 | ) |
| 185 | ) |
| 186 | summary.workflow_timeline = list(self.context.session.workflow_timeline) |
| 187 | self.dod_store.save(dod) |
| 188 | await self.emit_dod_status(emit, dod) |
| 189 | return CompletionGateResult( |
| 190 | should_continue=False, |
| 191 | reason_code="non_mutating_response_accepted", |
| 192 | reason_summary=( |
| 193 | "accepted the response because no mutating work required " |
| 194 | "verification" |
| 195 | ), |
| 196 | final_response=candidate_response, |
| 197 | evidence_provenance=skip_provenance, |
| 198 | verification_observations=skip_observations, |
| 199 | ) |
| 200 | |
| 201 | verify_item = "Collect verification evidence" |
| 202 | if verify_item not in dod.pending_items and verify_item not in dod.completed_items: |
| 203 | dod.pending_items.append(verify_item) |
| 204 | |
| 205 | if ( |
| 206 | not dod.verification_commands |
| 207 | and dod.verification_plan |
| 208 | and Path(dod.verification_plan).exists() |
| 209 | ): |
| 210 | dod.verification_commands = extract_verification_commands_from_markdown( |
| 211 | Path(dod.verification_plan).read_text() |
| 212 | ) |
| 213 | |
| 214 | if not dod.verification_commands: |
| 215 | dod.verification_commands = derive_verification_commands( |
| 216 | dod, |
| 217 | project_root=self.context.project_root, |
| 218 | task_statement=dod.task_statement, |
| 219 | ) |
| 220 | |
| 221 | await self.set_workflow_mode( |
| 222 | ModeDecision.transition( |
| 223 | WorkflowMode.VERIFY, |
| 224 | reason_code="definition_of_done_requires_verification", |
| 225 | reason_summary="definition-of-done gate requires verification", |
| 226 | decision_kind=WorkflowDecisionKind.HANDOFF, |
| 227 | ), |
| 228 | dod=dod, |
| 229 | emit=emit, |
| 230 | summary=summary, |
| 231 | ) |
| 232 | if dod.verification_commands: |
| 233 | attempt = ensure_active_verification_attempt(dod) |
| 234 | dod.last_verification_result = VerificationObservationStatus.PENDING.value |
| 235 | self.dod_store.save(dod) |
| 236 | append_verification_timeline_entry( |
| 237 | self.context, |
| 238 | summary, |
| 239 | reason_code="verification_pending", |
| 240 | reason_summary=( |
| 241 | "verification is pending for the active command set" |
| 242 | ), |
| 243 | evidence_summary=[ |
| 244 | f"verification command pending: {command}" |
| 245 | for command in dod.verification_commands[:2] |
| 246 | ], |
| 247 | evidence_provenance=_pending_verification_provenance(dod), |
| 248 | verification_observations=_pending_verification_observations( |
| 249 | dod, |
| 250 | attempt_id=attempt.attempt_id, |
| 251 | attempt_number=attempt.attempt_number, |
| 252 | ), |
| 253 | ) |
| 254 | verification_passed = await self.verify_definition_of_done( |
| 255 | dod=dod, |
| 256 | emit=emit, |
| 257 | summary=summary, |
| 258 | executor=executor, |
| 259 | ) |
| 260 | verification_observations = _verification_result_observations( |
| 261 | dod, |
| 262 | passed=verification_passed, |
| 263 | attempt_id=dod.active_verification_attempt_id, |
| 264 | attempt_number=dod.active_verification_attempt_number, |
| 265 | ) |
| 266 | if verification_passed: |
| 267 | passed_provenance = _verification_result_provenance(dod, passed=True) |
| 268 | if verify_item in dod.pending_items: |
| 269 | dod.pending_items.remove(verify_item) |
| 270 | if verify_item not in dod.completed_items: |
| 271 | dod.completed_items.append(verify_item) |
| 272 | for pending in list(dod.pending_items): |
| 273 | if pending not in dod.completed_items: |
| 274 | dod.completed_items.append(pending) |
| 275 | dod.pending_items = [] |
| 276 | dod.status = "done" |
| 277 | dod.last_verification_result = "passed" |
| 278 | dod.confidence = "high" |
| 279 | summary.verification_status = "passed" |
| 280 | summary.definition_of_done = dod |
| 281 | self.dod_store.save(dod) |
| 282 | await self.emit_dod_status(emit, dod) |
| 283 | verified_response = candidate_response |
| 284 | verification_summary = build_verification_summary(dod.evidence) |
| 285 | if verification_summary not in verified_response: |
| 286 | verified_response = f"{candidate_response.rstrip()}\n\n{verification_summary}" |
| 287 | return CompletionGateResult( |
| 288 | should_continue=False, |
| 289 | reason_code="verification_passed", |
| 290 | reason_summary="accepted the response after verification evidence passed", |
| 291 | final_response=verified_response, |
| 292 | evidence_provenance=passed_provenance, |
| 293 | verification_observations=verification_observations, |
| 294 | ) |
| 295 | |
| 296 | dod.last_verification_result = "failed" |
| 297 | summary.verification_status = "failed" |
| 298 | summary.definition_of_done = dod |
| 299 | failed_provenance = _verification_result_provenance(dod, passed=False) |
| 300 | if dod.retry_count >= dod.retry_budget: |
| 301 | dod.status = "failed" |
| 302 | dod.confidence = "low" |
| 303 | self.dod_store.save(dod) |
| 304 | await self.emit_dod_status(emit, dod) |
| 305 | failure_summary = build_verification_summary(dod.evidence) |
| 306 | exhausted_response = ( |
| 307 | "I couldn't verify that the task is complete within the retry budget.\n\n" |
| 308 | f"{failure_summary}" |
| 309 | ) |
| 310 | return CompletionGateResult( |
| 311 | should_continue=False, |
| 312 | reason_code="verification_retry_budget_exhausted", |
| 313 | reason_summary="stopped after verification retry budget was exhausted", |
| 314 | final_response=exhausted_response, |
| 315 | evidence_provenance=failed_provenance, |
| 316 | verification_observations=verification_observations, |
| 317 | ) |
| 318 | |
| 319 | dod.retry_count += 1 |
| 320 | dod.status = "fixing" |
| 321 | dod.confidence = "medium" |
| 322 | self.dod_store.save(dod) |
| 323 | await self.emit_dod_status(emit, dod) |
| 324 | await self.set_workflow_mode( |
| 325 | ModeDecision.transition( |
| 326 | WorkflowMode.EXECUTE, |
| 327 | reason_code="verification_failed_reentry", |
| 328 | reason_summary="verification failed; returning to execute for fixes", |
| 329 | decision_kind=WorkflowDecisionKind.REENTRY, |
| 330 | ), |
| 331 | dod=dod, |
| 332 | emit=emit, |
| 333 | summary=summary, |
| 334 | ) |
| 335 | failure_prompt = ( |
| 336 | "[DEFINITION OF DONE CHECK FAILED]\n" |
| 337 | f"Task: {dod.task_statement}\n" |
| 338 | f"Attempt: {dod.retry_count}/{dod.retry_budget}\n" |
| 339 | f"Pending items: {', '.join(dod.pending_items)}\n\n" |
| 340 | f"{build_verification_summary(dod.evidence)}\n\n" |
| 341 | "Fix the failures above, then finish the task again." |
| 342 | ) |
| 343 | self.context.session.append(Message(role=Role.USER, content=failure_prompt)) |
| 344 | return CompletionGateResult( |
| 345 | should_continue=True, |
| 346 | reason_code="verification_failed_reentry", |
| 347 | reason_summary=( |
| 348 | "continued after verification failed and the runtime re-entered " |
| 349 | "execute mode" |
| 350 | ), |
| 351 | final_response="", |
| 352 | evidence_provenance=failed_provenance, |
| 353 | verification_observations=verification_observations, |
| 354 | ) |
| 355 | |
| 356 | async def verify_definition_of_done( |
| 357 | self, |
| 358 | *, |
| 359 | dod: DefinitionOfDone, |
| 360 | emit: EventSink, |
| 361 | summary: TurnSummary, |
| 362 | executor: ToolExecutor, |
| 363 | ) -> bool: |
| 364 | """Collect verification evidence for one DoD.""" |
| 365 | |
| 366 | dod.status = "verifying" |
| 367 | self.dod_store.save(dod) |
| 368 | await self.emit_dod_status(emit, dod) |
| 369 | attempt = ensure_active_verification_attempt(dod) |
| 370 | |
| 371 | if not dod.verification_commands: |
| 372 | missing_provenance = _missing_verification_provenance() |
| 373 | missing_observations = _missing_verification_observations( |
| 374 | attempt_id=attempt.attempt_id, |
| 375 | attempt_number=attempt.attempt_number, |
| 376 | ) |
| 377 | append_verification_timeline_entry( |
| 378 | self.context, |
| 379 | summary, |
| 380 | reason_code="verification_commands_missing", |
| 381 | reason_summary="verification commands were still missing at execution time", |
| 382 | evidence_provenance=missing_provenance, |
| 383 | verification_observations=missing_observations, |
| 384 | ) |
| 385 | summary.verification_status = "failed" |
| 386 | return False |
| 387 | |
| 388 | dod.evidence = [] |
| 389 | all_passed = True |
| 390 | for index, command in enumerate(dod.verification_commands, start=1): |
| 391 | verification_call = ToolCall( |
| 392 | id=f"verify-{summary.iterations}-{index}", |
| 393 | name="bash", |
| 394 | arguments={"command": command, "cwd": str(self.context.project_root)}, |
| 395 | ) |
| 396 | await emit( |
| 397 | AgentEvent( |
| 398 | type="tool_call", |
| 399 | tool_name=verification_call.name, |
| 400 | tool_args=verification_call.arguments, |
| 401 | phase="verification", |
| 402 | ) |
| 403 | ) |
| 404 | outcome = await executor.execute_tool_call( |
| 405 | verification_call, |
| 406 | on_confirmation=None, |
| 407 | emit_confirmation=None, |
| 408 | source="verification", |
| 409 | skip_duplicate_check=True, |
| 410 | record_action=False, |
| 411 | skip_confirmation=True, |
| 412 | ) |
| 413 | await emit( |
| 414 | AgentEvent( |
| 415 | type="tool_result", |
| 416 | content=outcome.event_content, |
| 417 | tool_name=verification_call.name, |
| 418 | is_error=outcome.is_error, |
| 419 | phase="verification", |
| 420 | ) |
| 421 | ) |
| 422 | |
| 423 | metadata = outcome.registry_result.metadata if outcome.registry_result else {} |
| 424 | evidence = VerificationEvidence( |
| 425 | command=command, |
| 426 | passed=not outcome.is_error, |
| 427 | exit_code=metadata.get("exit_code"), |
| 428 | stdout=str(metadata.get("stdout", "")), |
| 429 | stderr=str(metadata.get("stderr", "")), |
| 430 | output=outcome.result_output, |
| 431 | kind=_classify_verification_kind(command), |
| 432 | ) |
| 433 | dod.evidence.append(evidence) |
| 434 | observation = _verification_observation_from_evidence( |
| 435 | evidence, |
| 436 | attempt_id=attempt.attempt_id, |
| 437 | attempt_number=attempt.attempt_number, |
| 438 | ) |
| 439 | provenance = _verification_provenance_from_evidence(evidence) |
| 440 | append_verification_timeline_entry( |
| 441 | self.context, |
| 442 | summary, |
| 443 | reason_code=( |
| 444 | "verification_command_passed" |
| 445 | if evidence.passed |
| 446 | else "verification_command_failed" |
| 447 | ), |
| 448 | reason_summary=( |
| 449 | f"verification passed for `{command}`" |
| 450 | if evidence.passed |
| 451 | else f"verification failed for `{command}`" |
| 452 | ), |
| 453 | evidence_provenance=provenance, |
| 454 | verification_observations=[observation], |
| 455 | ) |
| 456 | all_passed = all_passed and evidence.passed |
| 457 | summary.tool_result_messages.append(outcome.message) |
| 458 | self.context.session.append(outcome.message) |
| 459 | |
| 460 | self.dod_store.save(dod) |
| 461 | summary.verification_status = "passed" if all_passed else "failed" |
| 462 | return all_passed |
| 463 | |
| 464 | def finalize_summary(self, summary: TurnSummary) -> TurnSummary: |
| 465 | """Finalize usage, memory capture, and trace data for one turn.""" |
| 466 | |
| 467 | summary.usage["tool_calls"] = len(summary.tool_result_messages) |
| 468 | summary.usage["iterations"] = summary.iterations |
| 469 | summary.cumulative_usage = self.context.session.record_turn_usage( |
| 470 | summary.usage, |
| 471 | tool_calls=len(summary.tool_result_messages), |
| 472 | iterations=summary.iterations, |
| 473 | ) |
| 474 | summary.session_id = self.context.session.session_id |
| 475 | summary.completion_decision_code = getattr( |
| 476 | self.context.session, |
| 477 | "last_completion_decision_code", |
| 478 | None, |
| 479 | ) |
| 480 | summary.completion_decision_summary = getattr( |
| 481 | self.context.session, |
| 482 | "last_completion_decision_summary", |
| 483 | None, |
| 484 | ) |
| 485 | summary.completion_trace = list( |
| 486 | getattr(self.context.session, "completion_trace", []) |
| 487 | ) |
| 488 | summary.last_turn_transition_summary = ( |
| 489 | getattr(self.context.session, "last_turn_transition_summary", None) |
| 490 | ) |
| 491 | summary.workflow_timeline = list( |
| 492 | getattr(self.context.session, "workflow_timeline", []) |
| 493 | ) |
| 494 | if summary.definition_of_done and summary.definition_of_done.status == "done": |
| 495 | MemoryStore(self.context.project_root).capture_definition_of_done( |
| 496 | build_verification_summary(summary.definition_of_done.evidence) |
| 497 | ) |
| 498 | summary.trace = list(self.tracer.events) |
| 499 | return summary |
| 500 | |
| 501 | async def emit_dod_status(self, emit: EventSink, dod: DefinitionOfDone) -> None: |
| 502 | """Emit the latest definition-of-done status.""" |
| 503 | |
| 504 | self.dod_store.save(dod) |
| 505 | await emit( |
| 506 | AgentEvent( |
| 507 | type="dod_status", |
| 508 | content=( |
| 509 | f"DoD: {dod.status} " |
| 510 | f"({len(dod.pending_items)} pending" |
| 511 | + ( |
| 512 | f", last verification: {dod.last_verification_result}" |
| 513 | if dod.last_verification_result |
| 514 | else "" |
| 515 | ) |
| 516 | + ")" |
| 517 | ), |
| 518 | dod_status=dod.status, |
| 519 | pending_items_count=len(dod.pending_items), |
| 520 | last_verification_result=dod.last_verification_result, |
| 521 | definition_of_done=dod, |
| 522 | ) |
| 523 | ) |
| 524 | |
| 525 | |
| 526 | def _verification_result_provenance( |
| 527 | dod: DefinitionOfDone, |
| 528 | *, |
| 529 | passed: bool, |
| 530 | ) -> list[EvidenceProvenance]: |
| 531 | entries: list[EvidenceProvenance] = [] |
| 532 | target_status = ( |
| 533 | EvidenceProvenanceStatus.SUPPORTS.value |
| 534 | if passed |
| 535 | else EvidenceProvenanceStatus.CONTRADICTS.value |
| 536 | ) |
| 537 | for evidence in dod.evidence: |
| 538 | if evidence.passed != passed: |
| 539 | continue |
| 540 | command = evidence.command or "verification" |
| 541 | summary = ( |
| 542 | f"verification passed for `{command}`" |
| 543 | if passed |
| 544 | else f"verification failed for `{command}`" |
| 545 | ) |
| 546 | detail = _verification_detail(evidence) |
| 547 | entries.append( |
| 548 | EvidenceProvenance( |
| 549 | category="verification", |
| 550 | source="dod.evidence", |
| 551 | summary=summary, |
| 552 | status=target_status, |
| 553 | subject=command, |
| 554 | detail=detail, |
| 555 | ) |
| 556 | ) |
| 557 | if entries: |
| 558 | observed_commands = { |
| 559 | evidence.command for evidence in dod.evidence if evidence.command |
| 560 | } |
| 561 | if not passed: |
| 562 | for command in dod.verification_commands: |
| 563 | if not command or command in observed_commands: |
| 564 | continue |
| 565 | entries.append( |
| 566 | EvidenceProvenance( |
| 567 | category="verification", |
| 568 | source="dod.verification_commands", |
| 569 | summary=( |
| 570 | "verification did not produce an observed result for " |
| 571 | f"`{command}`" |
| 572 | ), |
| 573 | status=EvidenceProvenanceStatus.MISSING.value, |
| 574 | subject=command, |
| 575 | ) |
| 576 | ) |
| 577 | return entries |
| 578 | |
| 579 | if not passed: |
| 580 | for command in dod.verification_commands: |
| 581 | if not command: |
| 582 | continue |
| 583 | entries.append( |
| 584 | EvidenceProvenance( |
| 585 | category="verification", |
| 586 | source="dod.verification_commands", |
| 587 | summary=( |
| 588 | "verification did not produce an observed result for " |
| 589 | f"`{command}`" |
| 590 | ), |
| 591 | status=EvidenceProvenanceStatus.MISSING.value, |
| 592 | subject=command, |
| 593 | ) |
| 594 | ) |
| 595 | if entries: |
| 596 | return entries |
| 597 | return [ |
| 598 | EvidenceProvenance( |
| 599 | category="verification", |
| 600 | source="dod.verification_commands", |
| 601 | summary="verification commands were still missing at execution time", |
| 602 | status=EvidenceProvenanceStatus.MISSING.value, |
| 603 | ) |
| 604 | ] |
| 605 | |
| 606 | for command in dod.verification_commands: |
| 607 | if not command: |
| 608 | continue |
| 609 | entries.append( |
| 610 | EvidenceProvenance( |
| 611 | category="verification", |
| 612 | source="dod.verification_commands", |
| 613 | summary=( |
| 614 | f"verification passed for `{command}`" |
| 615 | if passed |
| 616 | else f"verification failed for `{command}`" |
| 617 | ), |
| 618 | status=target_status, |
| 619 | subject=command, |
| 620 | ) |
| 621 | ) |
| 622 | return entries |
| 623 | |
| 624 | |
| 625 | def _verification_result_observations( |
| 626 | dod: DefinitionOfDone, |
| 627 | *, |
| 628 | passed: bool, |
| 629 | attempt_id: str | None, |
| 630 | attempt_number: int | None, |
| 631 | ) -> list[VerificationObservation]: |
| 632 | entries: list[VerificationObservation] = [] |
| 633 | target_status = ( |
| 634 | VerificationObservationStatus.PASSED.value |
| 635 | if passed |
| 636 | else VerificationObservationStatus.FAILED.value |
| 637 | ) |
| 638 | observed_commands: set[str] = set() |
| 639 | for evidence in dod.evidence: |
| 640 | if evidence.passed != passed: |
| 641 | continue |
| 642 | command = evidence.command or "verification" |
| 643 | observed_commands.add(command) |
| 644 | entries.append( |
| 645 | VerificationObservation( |
| 646 | status=target_status, |
| 647 | summary=( |
| 648 | f"verification passed for `{command}`" |
| 649 | if passed |
| 650 | else f"verification failed for `{command}`" |
| 651 | ), |
| 652 | command=evidence.command or None, |
| 653 | kind=evidence.kind, |
| 654 | exit_code=evidence.exit_code, |
| 655 | detail=_verification_detail(evidence), |
| 656 | attempt_id=attempt_id, |
| 657 | attempt_number=attempt_number, |
| 658 | ) |
| 659 | ) |
| 660 | |
| 661 | if passed: |
| 662 | return entries |
| 663 | |
| 664 | for command in dod.verification_commands: |
| 665 | if not command or command in observed_commands: |
| 666 | continue |
| 667 | entries.append( |
| 668 | VerificationObservation( |
| 669 | status=VerificationObservationStatus.MISSING.value, |
| 670 | summary=f"verification did not produce an observed result for `{command}`", |
| 671 | command=command, |
| 672 | kind=_classify_verification_kind(command), |
| 673 | attempt_id=attempt_id, |
| 674 | attempt_number=attempt_number, |
| 675 | ) |
| 676 | ) |
| 677 | |
| 678 | if entries: |
| 679 | return entries |
| 680 | |
| 681 | return [ |
| 682 | VerificationObservation( |
| 683 | status=VerificationObservationStatus.MISSING.value, |
| 684 | summary="verification commands were still missing at execution time", |
| 685 | attempt_id=attempt_id, |
| 686 | attempt_number=attempt_number, |
| 687 | ) |
| 688 | ] |
| 689 | |
| 690 | |
| 691 | def _verification_observation_from_evidence( |
| 692 | evidence: VerificationEvidence, |
| 693 | *, |
| 694 | attempt_id: str | None, |
| 695 | attempt_number: int | None, |
| 696 | ) -> VerificationObservation: |
| 697 | command = evidence.command or "verification" |
| 698 | return VerificationObservation( |
| 699 | status=( |
| 700 | VerificationObservationStatus.PASSED.value |
| 701 | if evidence.passed |
| 702 | else VerificationObservationStatus.FAILED.value |
| 703 | ), |
| 704 | summary=( |
| 705 | f"verification passed for `{command}`" |
| 706 | if evidence.passed |
| 707 | else f"verification failed for `{command}`" |
| 708 | ), |
| 709 | command=evidence.command or None, |
| 710 | kind=evidence.kind, |
| 711 | exit_code=evidence.exit_code, |
| 712 | detail=_verification_detail(evidence), |
| 713 | attempt_id=attempt_id, |
| 714 | attempt_number=attempt_number, |
| 715 | ) |
| 716 | |
| 717 | |
| 718 | def _verification_provenance_from_evidence( |
| 719 | evidence: VerificationEvidence, |
| 720 | ) -> list[EvidenceProvenance]: |
| 721 | command = evidence.command or "verification" |
| 722 | return [ |
| 723 | EvidenceProvenance( |
| 724 | category="verification", |
| 725 | source="dod.evidence", |
| 726 | summary=( |
| 727 | f"verification passed for `{command}`" |
| 728 | if evidence.passed |
| 729 | else f"verification failed for `{command}`" |
| 730 | ), |
| 731 | status=( |
| 732 | EvidenceProvenanceStatus.SUPPORTS.value |
| 733 | if evidence.passed |
| 734 | else EvidenceProvenanceStatus.CONTRADICTS.value |
| 735 | ), |
| 736 | subject=command, |
| 737 | detail=_verification_detail(evidence), |
| 738 | ) |
| 739 | ] |
| 740 | |
| 741 | |
| 742 | def _missing_verification_observations( |
| 743 | *, |
| 744 | attempt_id: str | None, |
| 745 | attempt_number: int | None, |
| 746 | ) -> list[VerificationObservation]: |
| 747 | return [ |
| 748 | VerificationObservation( |
| 749 | status=VerificationObservationStatus.MISSING.value, |
| 750 | summary="verification commands were still missing at execution time", |
| 751 | attempt_id=attempt_id, |
| 752 | attempt_number=attempt_number, |
| 753 | ) |
| 754 | ] |
| 755 | |
| 756 | |
| 757 | def _missing_verification_provenance() -> list[EvidenceProvenance]: |
| 758 | return [ |
| 759 | EvidenceProvenance( |
| 760 | category="verification", |
| 761 | source="dod.verification_commands", |
| 762 | summary="verification commands were still missing at execution time", |
| 763 | status=EvidenceProvenanceStatus.MISSING.value, |
| 764 | ) |
| 765 | ] |
| 766 | |
| 767 | |
| 768 | def _pending_verification_observations( |
| 769 | dod: DefinitionOfDone, |
| 770 | *, |
| 771 | attempt_id: str | None, |
| 772 | attempt_number: int | None, |
| 773 | ) -> list[VerificationObservation]: |
| 774 | observations: list[VerificationObservation] = [] |
| 775 | for command in dod.verification_commands: |
| 776 | observations.append( |
| 777 | VerificationObservation( |
| 778 | status=VerificationObservationStatus.PENDING.value, |
| 779 | summary=f"verification pending for `{command}`", |
| 780 | command=command, |
| 781 | attempt_id=attempt_id, |
| 782 | attempt_number=attempt_number, |
| 783 | ) |
| 784 | ) |
| 785 | return observations |
| 786 | |
| 787 | |
| 788 | def _pending_verification_provenance( |
| 789 | dod: DefinitionOfDone, |
| 790 | ) -> list[EvidenceProvenance]: |
| 791 | provenance: list[EvidenceProvenance] = [] |
| 792 | for command in dod.verification_commands: |
| 793 | provenance.append( |
| 794 | EvidenceProvenance( |
| 795 | category="verification", |
| 796 | source="dod.verification_commands", |
| 797 | summary=f"verification command pending: {command}", |
| 798 | status=EvidenceProvenanceStatus.MISSING.value, |
| 799 | subject=command, |
| 800 | ) |
| 801 | ) |
| 802 | return provenance |
| 803 | |
| 804 | |
| 805 | def _verification_detail(evidence: VerificationEvidence) -> str | None: |
| 806 | for candidate in (evidence.stdout, evidence.stderr, evidence.output): |
| 807 | text = str(candidate).strip() |
| 808 | if text: |
| 809 | return text.splitlines()[0] |
| 810 | return None |
| 811 | |
| 812 | |
| 813 | def _classify_verification_kind(command: str) -> str: |
| 814 | """Classify the verification command into a summary kind.""" |
| 815 | |
| 816 | command_lower = command.lower() |
| 817 | if "lint" in command_lower or "ruff" in command_lower: |
| 818 | return "lint" |
| 819 | if "type" in command_lower or "mypy" in command_lower or "py_compile" in command_lower: |
| 820 | return "typecheck" |
| 821 | if "test" in command_lower or "pytest" in command_lower: |
| 822 | return "test" |
| 823 | if "build" in command_lower: |
| 824 | return "build" |
| 825 | return "runtime" |
| 826 | |
| 827 | |
| 828 | def merge_usage(target: dict[str, int], update: dict[str, int]) -> None: |
| 829 | """Merge normalized usage into an existing usage accumulator.""" |
| 830 | |
| 831 | for key, value in normalize_usage(update).items(): |
| 832 | target[key] = target.get(key, 0) + value |