Add typed evidence provenance core
- SHA
ad2304e9eac54db4448433967bf510cdca73066a- Parents
-
2671488 - Tree
36ab49b
ad2304e
ad2304e9eac54db4448433967bf510cdca73066a2671488
36ab49b| Status | File | + | - |
|---|---|---|---|
| M |
src/loader/runtime/completion_trace.py
|
15 | 1 |
| A |
src/loader/runtime/evidence_provenance.py
|
93 | 0 |
| M |
src/loader/runtime/policy_timeline.py
|
3 | 0 |
| M |
src/loader/runtime/workflow_policy.py
|
19 | 1 |
| A |
tests/test_evidence_provenance.py
|
79 | 0 |
src/loader/runtime/completion_trace.pymodified@@ -5,6 +5,11 @@ from __future__ import annotations | ||
| 5 | 5 | from dataclasses import dataclass, field |
| 6 | 6 | from typing import Any |
| 7 | 7 | |
| 8 | +from .evidence_provenance import ( | |
| 9 | + EvidenceProvenance, | |
| 10 | + normalize_evidence_provenance, | |
| 11 | + summarize_evidence_provenance, | |
| 12 | +) | |
| 8 | 13 | from .workflow_policy import WorkflowTimelineEntry |
| 9 | 14 | |
| 10 | 15 | |
@@ -17,6 +22,7 @@ class CompletionTraceEntry: | ||
| 17 | 22 | decision_code: str |
| 18 | 23 | decision_summary: str |
| 19 | 24 | evidence_summary: list[str] = field(default_factory=list) |
| 25 | + evidence_provenance: list[EvidenceProvenance] = field(default_factory=list) | |
| 20 | 26 | |
| 21 | 27 | def to_dict(self) -> dict[str, str]: |
| 22 | 28 | """Serialize the entry into persisted session state.""" |
@@ -27,6 +33,7 @@ class CompletionTraceEntry: | ||
| 27 | 33 | "decision_code": self.decision_code, |
| 28 | 34 | "decision_summary": self.decision_summary, |
| 29 | 35 | "evidence_summary": list(self.evidence_summary), |
| 36 | + "evidence_provenance": [item.to_dict() for item in self.evidence_provenance], | |
| 30 | 37 | } |
| 31 | 38 | |
| 32 | 39 | @classmethod |
@@ -43,6 +50,9 @@ class CompletionTraceEntry: | ||
| 43 | 50 | for item in data.get("evidence_summary", []) |
| 44 | 51 | if str(item).strip() |
| 45 | 52 | ], |
| 53 | + evidence_provenance=normalize_evidence_provenance( | |
| 54 | + data.get("evidence_provenance") | |
| 55 | + ), | |
| 46 | 56 | ) |
| 47 | 57 | |
| 48 | 58 | |
@@ -125,7 +135,11 @@ def _completion_trace_entry_from_timeline_entry( | ||
| 125 | 135 | outcome=entry.policy_outcome or _completion_outcome_from_kind(entry.kind), |
| 126 | 136 | decision_code=entry.reason_code, |
| 127 | 137 | decision_summary=summary, |
| 128 | - evidence_summary=list(entry.evidence_summary), | |
| 138 | + evidence_summary=list( | |
| 139 | + entry.evidence_summary | |
| 140 | + or summarize_evidence_provenance(entry.evidence_provenance) | |
| 141 | + ), | |
| 142 | + evidence_provenance=list(entry.evidence_provenance), | |
| 129 | 143 | ) |
| 130 | 144 | |
| 131 | 145 | |
src/loader/runtime/evidence_provenance.pyadded@@ -0,0 +1,93 @@ | ||
| 1 | +"""Typed evidence provenance carried through runtime policy decisions.""" | |
| 2 | + | |
| 3 | +from __future__ import annotations | |
| 4 | + | |
| 5 | +from dataclasses import dataclass | |
| 6 | +from enum import StrEnum | |
| 7 | +from typing import Any | |
| 8 | + | |
| 9 | + | |
| 10 | +class EvidenceProvenanceStatus(StrEnum): | |
| 11 | + """How one evidence item relates to a runtime decision.""" | |
| 12 | + | |
| 13 | + SUPPORTS = "supports" | |
| 14 | + MISSING = "missing" | |
| 15 | + CONTRADICTS = "contradicts" | |
| 16 | + CONTEXT = "context" | |
| 17 | + | |
| 18 | + | |
| 19 | +@dataclass(slots=True) | |
| 20 | +class EvidenceProvenance: | |
| 21 | + """One typed piece of evidence behind a completion or verification decision.""" | |
| 22 | + | |
| 23 | + category: str | |
| 24 | + source: str | |
| 25 | + summary: str | |
| 26 | + status: str = EvidenceProvenanceStatus.CONTEXT.value | |
| 27 | + subject: str | None = None | |
| 28 | + detail: str | None = None | |
| 29 | + | |
| 30 | + def to_dict(self) -> dict[str, Any]: | |
| 31 | + """Serialize one provenance item for persisted runtime state.""" | |
| 32 | + | |
| 33 | + return { | |
| 34 | + "category": self.category, | |
| 35 | + "source": self.source, | |
| 36 | + "summary": self.summary, | |
| 37 | + "status": self.status, | |
| 38 | + "subject": self.subject, | |
| 39 | + "detail": self.detail, | |
| 40 | + } | |
| 41 | + | |
| 42 | + @classmethod | |
| 43 | + def from_dict(cls, data: dict[str, Any]) -> EvidenceProvenance: | |
| 44 | + """Load one persisted provenance item.""" | |
| 45 | + | |
| 46 | + return cls( | |
| 47 | + category=str(data.get("category", "")), | |
| 48 | + source=str(data.get("source", "")), | |
| 49 | + summary=str(data.get("summary", "")), | |
| 50 | + status=str(data.get("status", EvidenceProvenanceStatus.CONTEXT.value)), | |
| 51 | + subject=_optional_text(data.get("subject")), | |
| 52 | + detail=_optional_text(data.get("detail")), | |
| 53 | + ) | |
| 54 | + | |
| 55 | + def render_summary(self) -> str: | |
| 56 | + """Render one concise human-facing summary.""" | |
| 57 | + | |
| 58 | + return self.summary | |
| 59 | + | |
| 60 | + | |
| 61 | +def normalize_evidence_provenance(value: Any) -> list[EvidenceProvenance]: | |
| 62 | + """Coerce persisted provenance payloads into typed entries.""" | |
| 63 | + | |
| 64 | + if not isinstance(value, list): | |
| 65 | + return [] | |
| 66 | + entries: list[EvidenceProvenance] = [] | |
| 67 | + for item in value: | |
| 68 | + if isinstance(item, dict): | |
| 69 | + entries.append(EvidenceProvenance.from_dict(item)) | |
| 70 | + return entries | |
| 71 | + | |
| 72 | + | |
| 73 | +def summarize_evidence_provenance( | |
| 74 | + entries: list[EvidenceProvenance], | |
| 75 | + *, | |
| 76 | + max_items: int | None = None, | |
| 77 | +) -> list[str]: | |
| 78 | + """Project typed provenance into concise evidence-summary strings.""" | |
| 79 | + | |
| 80 | + summaries: list[str] = [] | |
| 81 | + limit = len(entries) if max_items is None else max_items | |
| 82 | + for entry in entries[:limit]: | |
| 83 | + summary = entry.render_summary().strip() | |
| 84 | + if summary and summary not in summaries: | |
| 85 | + summaries.append(summary) | |
| 86 | + return summaries | |
| 87 | + | |
| 88 | + | |
| 89 | +def _optional_text(value: Any) -> str | None: | |
| 90 | + if value is None: | |
| 91 | + return None | |
| 92 | + text = str(value).strip() | |
| 93 | + return text or None | |
src/loader/runtime/policy_timeline.pymodified@@ -4,6 +4,7 @@ from __future__ import annotations | ||
| 4 | 4 | |
| 5 | 5 | from .context import RuntimeContext |
| 6 | 6 | from .events import TurnSummary |
| 7 | +from .evidence_provenance import EvidenceProvenance | |
| 7 | 8 | from .workflow_policy import ( |
| 8 | 9 | WorkflowDecisionKind, |
| 9 | 10 | WorkflowTimelineEntry, |
@@ -22,6 +23,7 @@ def append_policy_timeline_entry( | ||
| 22 | 23 | policy_outcome: str | None = None, |
| 23 | 24 | decision_kind: WorkflowDecisionKind | str | None = WorkflowDecisionKind.FORCED, |
| 24 | 25 | evidence_summary: list[str] | None = None, |
| 26 | + evidence_provenance: list[EvidenceProvenance] | None = None, | |
| 25 | 27 | ) -> WorkflowTimelineEntry: |
| 26 | 28 | """Append one typed completion/repair accountability event.""" |
| 27 | 29 | |
@@ -36,6 +38,7 @@ def append_policy_timeline_entry( | ||
| 36 | 38 | prompt_format=context.prompt_format, |
| 37 | 39 | prompt_sections=context.prompt_sections, |
| 38 | 40 | evidence_summary=evidence_summary, |
| 41 | + evidence_provenance=evidence_provenance, | |
| 39 | 42 | ) |
| 40 | 43 | context.session.append_workflow_timeline_entry(entry) |
| 41 | 44 | summary.workflow_timeline = list(context.session.workflow_timeline) |
src/loader/runtime/workflow_policy.pymodified@@ -15,6 +15,11 @@ from .clarify_strategy import ( | ||
| 15 | 15 | describe_clarify_pressure_kind, |
| 16 | 16 | describe_clarify_slot, |
| 17 | 17 | ) |
| 18 | +from .evidence_provenance import ( | |
| 19 | + EvidenceProvenance, | |
| 20 | + normalize_evidence_provenance, | |
| 21 | + summarize_evidence_provenance, | |
| 22 | +) | |
| 18 | 23 | from .workflow_signals import WorkflowSignalExtractor, WorkflowSignalPacket |
| 19 | 24 | |
| 20 | 25 | |
@@ -293,6 +298,7 @@ class WorkflowTimelineEntry: | ||
| 293 | 298 | unresolved_questions: list[str] = field(default_factory=list) |
| 294 | 299 | signal_summary: list[str] = field(default_factory=list) |
| 295 | 300 | evidence_summary: list[str] = field(default_factory=list) |
| 301 | + evidence_provenance: list[EvidenceProvenance] = field(default_factory=list) | |
| 296 | 302 | clarify_stage: str | None = None |
| 297 | 303 | clarify_pressure_kind: str | None = None |
| 298 | 304 | pressure_pass_complete: bool = False |
@@ -318,6 +324,9 @@ class WorkflowTimelineEntry: | ||
| 318 | 324 | "unresolved_questions": list(self.unresolved_questions), |
| 319 | 325 | "signal_summary": list(self.signal_summary), |
| 320 | 326 | "evidence_summary": list(self.evidence_summary), |
| 327 | + "evidence_provenance": [ | |
| 328 | + item.to_dict() for item in self.evidence_provenance | |
| 329 | + ], | |
| 321 | 330 | "clarify_stage": self.clarify_stage, |
| 322 | 331 | "clarify_pressure_kind": self.clarify_pressure_kind, |
| 323 | 332 | "pressure_pass_complete": self.pressure_pass_complete, |
@@ -345,6 +354,9 @@ class WorkflowTimelineEntry: | ||
| 345 | 354 | unresolved_questions=_string_list(data.get("unresolved_questions")), |
| 346 | 355 | signal_summary=_string_list(data.get("signal_summary")), |
| 347 | 356 | evidence_summary=_string_list(data.get("evidence_summary")), |
| 357 | + evidence_provenance=normalize_evidence_provenance( | |
| 358 | + data.get("evidence_provenance") | |
| 359 | + ), | |
| 348 | 360 | clarify_stage=_optional_text(data.get("clarify_stage")), |
| 349 | 361 | clarify_pressure_kind=_optional_text(data.get("clarify_pressure_kind")), |
| 350 | 362 | pressure_pass_complete=bool(data.get("pressure_pass_complete", False)), |
@@ -389,6 +401,7 @@ class WorkflowTimelineEntry: | ||
| 389 | 401 | unresolved_questions=list(decision.unresolved_questions), |
| 390 | 402 | signal_summary=list(decision.signal_summary), |
| 391 | 403 | evidence_summary=list(decision.evidence_summary), |
| 404 | + evidence_provenance=[], | |
| 392 | 405 | clarify_stage=decision.clarify_stage, |
| 393 | 406 | clarify_pressure_kind=decision.clarify_pressure_kind, |
| 394 | 407 | pressure_pass_complete=decision.pressure_pass_complete, |
@@ -413,6 +426,7 @@ class WorkflowTimelineEntry: | ||
| 413 | 426 | prompt_sections: list[str] | None = None, |
| 414 | 427 | signal_summary: list[str] | None = None, |
| 415 | 428 | evidence_summary: list[str] | None = None, |
| 429 | + evidence_provenance: list[EvidenceProvenance] | None = None, | |
| 416 | 430 | artifact_paths: list[str] | None = None, |
| 417 | 431 | ) -> WorkflowTimelineEntry: |
| 418 | 432 | """Build one typed non-routing accountability entry.""" |
@@ -424,6 +438,7 @@ class WorkflowTimelineEntry: | ||
| 424 | 438 | resolved_decision_kind = None |
| 425 | 439 | else: |
| 426 | 440 | resolved_decision_kind = str(decision_kind) |
| 441 | + resolved_provenance = list(evidence_provenance or []) | |
| 427 | 442 | return cls( |
| 428 | 443 | timestamp=_utc_now(), |
| 429 | 444 | kind=kind.value, |
@@ -432,7 +447,10 @@ class WorkflowTimelineEntry: | ||
| 432 | 447 | summary=summary, |
| 433 | 448 | decision_kind=resolved_decision_kind, |
| 434 | 449 | signal_summary=list(signal_summary or []), |
| 435 | - evidence_summary=list(evidence_summary or []), | |
| 450 | + evidence_summary=list( | |
| 451 | + evidence_summary or summarize_evidence_provenance(resolved_provenance) | |
| 452 | + ), | |
| 453 | + evidence_provenance=resolved_provenance, | |
| 436 | 454 | policy_stage=policy_stage, |
| 437 | 455 | policy_outcome=policy_outcome, |
| 438 | 456 | prompt_format=prompt_format, |
tests/test_evidence_provenance.pyadded@@ -0,0 +1,79 @@ | ||
| 1 | +"""Tests for typed evidence provenance on policy timelines and traces.""" | |
| 2 | + | |
| 3 | +from __future__ import annotations | |
| 4 | + | |
| 5 | +from loader.runtime.completion_trace import completion_trace_from_workflow_timeline | |
| 6 | +from loader.runtime.evidence_provenance import ( | |
| 7 | + EvidenceProvenance, | |
| 8 | + EvidenceProvenanceStatus, | |
| 9 | +) | |
| 10 | +from loader.runtime.workflow_policy import WorkflowTimelineEntry, WorkflowTimelineEntryKind | |
| 11 | + | |
| 12 | + | |
| 13 | +def test_workflow_timeline_entry_derives_evidence_summary_from_provenance() -> None: | |
| 14 | + entry = WorkflowTimelineEntry.accountability( | |
| 15 | + kind=WorkflowTimelineEntryKind.COMPLETION_FINALIZE, | |
| 16 | + mode="execute", | |
| 17 | + reason_code="continuation_budget_exhausted", | |
| 18 | + summary="completion: stopped because follow-through evidence was still missing", | |
| 19 | + policy_stage="continuation_check", | |
| 20 | + policy_outcome="finalize", | |
| 21 | + evidence_provenance=[ | |
| 22 | + EvidenceProvenance( | |
| 23 | + category="verification", | |
| 24 | + source="dod.evidence", | |
| 25 | + summary="verification evidence was still missing for `pytest -q`", | |
| 26 | + status=EvidenceProvenanceStatus.MISSING.value, | |
| 27 | + subject="pytest -q", | |
| 28 | + ) | |
| 29 | + ], | |
| 30 | + ) | |
| 31 | + | |
| 32 | + assert entry.evidence_summary == [ | |
| 33 | + "verification evidence was still missing for `pytest -q`" | |
| 34 | + ] | |
| 35 | + assert entry.evidence_provenance[0].status == EvidenceProvenanceStatus.MISSING.value | |
| 36 | + | |
| 37 | + | |
| 38 | +def test_completion_trace_projection_preserves_evidence_provenance() -> None: | |
| 39 | + timeline = [ | |
| 40 | + WorkflowTimelineEntry.accountability( | |
| 41 | + kind=WorkflowTimelineEntryKind.COMPLETION_FINALIZE, | |
| 42 | + mode="execute", | |
| 43 | + reason_code="continuation_budget_exhausted", | |
| 44 | + summary="completion: stopped because follow-through evidence was still missing", | |
| 45 | + policy_stage="continuation_check", | |
| 46 | + policy_outcome="finalize", | |
| 47 | + evidence_provenance=[ | |
| 48 | + EvidenceProvenance( | |
| 49 | + category="verification", | |
| 50 | + source="dod.evidence", | |
| 51 | + summary="verification evidence was still missing for `pytest -q`", | |
| 52 | + status=EvidenceProvenanceStatus.MISSING.value, | |
| 53 | + subject="pytest -q", | |
| 54 | + ), | |
| 55 | + EvidenceProvenance( | |
| 56 | + category="action", | |
| 57 | + source="actions_taken", | |
| 58 | + summary="recorded work already showed the requested edit happened", | |
| 59 | + status=EvidenceProvenanceStatus.SUPPORTS.value, | |
| 60 | + ), | |
| 61 | + ], | |
| 62 | + ) | |
| 63 | + ] | |
| 64 | + | |
| 65 | + trace = completion_trace_from_workflow_timeline( | |
| 66 | + timeline, | |
| 67 | + last_decision_code="continuation_budget_exhausted", | |
| 68 | + ) | |
| 69 | + | |
| 70 | + assert len(trace) == 1 | |
| 71 | + assert trace[0].decision_code == "continuation_budget_exhausted" | |
| 72 | + assert trace[0].evidence_summary == [ | |
| 73 | + "verification evidence was still missing for `pytest -q`", | |
| 74 | + "recorded work already showed the requested edit happened", | |
| 75 | + ] | |
| 76 | + assert [item.status for item in trace[0].evidence_provenance] == [ | |
| 77 | + EvidenceProvenanceStatus.MISSING.value, | |
| 78 | + EvidenceProvenanceStatus.SUPPORTS.value, | |
| 79 | + ] | |