| 1 |
"""Parse a sway JSON report into harvest candidates. |
| 2 |
|
| 3 |
Sway emits reports with this shape (see |
| 4 |
``sway/src/dlm_sway/suite/report.py``): |
| 5 |
|
| 6 |
.. code-block:: json |
| 7 |
|
| 8 |
{ |
| 9 |
"schema_version": 1, |
| 10 |
"sway_version": "...", |
| 11 |
"base_model_id": "...", |
| 12 |
"adapter_id": "...", |
| 13 |
"probes": [ |
| 14 |
{ |
| 15 |
"name": "...", |
| 16 |
"kind": "...", |
| 17 |
"verdict": "pass" | "fail" | "warn" | "skip" | "error", |
| 18 |
"score": 0.0, |
| 19 |
"evidence": {...}, |
| 20 |
"message": "...", |
| 21 |
... |
| 22 |
} |
| 23 |
] |
| 24 |
} |
| 25 |
|
| 26 |
The harvest pull path filters for ``verdict == "fail"`` and lifts |
| 27 |
out ``evidence.prompt`` + ``evidence.reference`` as the Q/A pair for |
| 28 |
the next retrain. Probes without both fields are skipped with a |
| 29 |
:class:`NoReferenceError` under strict mode (default) or a log line |
| 30 |
under ``strict=False``. |
| 31 |
|
| 32 |
``evidence.confidence`` (optional, 0-1) gates candidates via the |
| 33 |
caller's ``--min-confidence``. Absent confidence is treated as 1.0 |
| 34 |
— the probe itself already failed, which is our signal. |
| 35 |
""" |
| 36 |
|
| 37 |
from __future__ import annotations |
| 38 |
|
| 39 |
import json |
| 40 |
import logging |
| 41 |
from dataclasses import dataclass |
| 42 |
from pathlib import Path |
| 43 |
from typing import Any, Final |
| 44 |
|
| 45 |
from dlm.harvest.errors import MalformedSwayReportError, NoReferenceError |
| 46 |
|
| 47 |
_LOG = logging.getLogger(__name__) |
| 48 |
|
| 49 |
# Sway's JSON schema version we know how to parse. A higher version |
| 50 |
# in a report triggers a refusal with a clear pointer — sway's schema |
| 51 |
# is stable but not fixed forever. |
| 52 |
_SUPPORTED_SWAY_SCHEMA: Final[int] = 1 |
| 53 |
|
| 54 |
|
| 55 |
@dataclass(frozen=True) |
| 56 |
class HarvestCandidate: |
| 57 |
"""One failing probe ready to become a `!probe`-tagged section. |
| 58 |
|
| 59 |
Attributes |
| 60 |
---------- |
| 61 |
prompt: |
| 62 |
The question text. Becomes the `### Q` body. |
| 63 |
reference: |
| 64 |
The expected answer. Becomes the `### A` body. |
| 65 |
confidence: |
| 66 |
0-1 weight sway assigned to this probe's reference, when |
| 67 |
present. Defaults to 1.0 when the report doesn't carry it. |
| 68 |
probe_name: |
| 69 |
Human-readable probe name from the sway spec. Used for the |
| 70 |
harvest tag so users can trace a synthesized section back to |
| 71 |
its probe origin. |
| 72 |
probe_kind: |
| 73 |
Probe discriminator (``section_internalization`` etc.). |
| 74 |
source_adapter_version: |
| 75 |
The adapter revision sway was scoring when it failed, if |
| 76 |
`adapter_id` carries one. Informational; the harvest |
| 77 |
itself doesn't need it. |
| 78 |
""" |
| 79 |
|
| 80 |
prompt: str |
| 81 |
reference: str |
| 82 |
confidence: float |
| 83 |
probe_name: str |
| 84 |
probe_kind: str |
| 85 |
source_adapter_version: str | None |
| 86 |
|
| 87 |
|
| 88 |
def read_sway_report( |
| 89 |
path: Path | str, |
| 90 |
*, |
| 91 |
strict: bool = True, |
| 92 |
min_confidence: float = 0.0, |
| 93 |
) -> list[HarvestCandidate]: |
| 94 |
"""Parse a sway JSON report at `path` into harvest candidates. |
| 95 |
|
| 96 |
Parameters |
| 97 |
---------- |
| 98 |
path: |
| 99 |
Path to the sway JSON report. |
| 100 |
strict: |
| 101 |
If True (default), raise :class:`NoReferenceError` when a |
| 102 |
failing probe lacks a ``prompt`` / ``reference`` pair. If |
| 103 |
False, log a warning and skip the probe. |
| 104 |
min_confidence: |
| 105 |
Minimum ``evidence.confidence`` for a candidate to survive. |
| 106 |
Default 0.0 accepts all. |
| 107 |
|
| 108 |
Raises |
| 109 |
------ |
| 110 |
MalformedSwayReportError: |
| 111 |
File unreadable, not JSON, missing required keys, or carries |
| 112 |
a newer ``schema_version`` than this reader supports. |
| 113 |
NoReferenceError: |
| 114 |
Strict mode + at least one failing probe lacks a reference. |
| 115 |
""" |
| 116 |
report_path = Path(path) |
| 117 |
try: |
| 118 |
raw = report_path.read_text(encoding="utf-8") |
| 119 |
except OSError as exc: |
| 120 |
raise MalformedSwayReportError(f"cannot read sway report at {report_path}: {exc}") from exc |
| 121 |
|
| 122 |
try: |
| 123 |
payload = json.loads(raw) |
| 124 |
except json.JSONDecodeError as exc: |
| 125 |
raise MalformedSwayReportError( |
| 126 |
f"sway report at {report_path} is not valid JSON: {exc}" |
| 127 |
) from exc |
| 128 |
|
| 129 |
if not isinstance(payload, dict): |
| 130 |
raise MalformedSwayReportError( |
| 131 |
f"sway report at {report_path} must be a JSON object; got {type(payload).__name__}" |
| 132 |
) |
| 133 |
|
| 134 |
schema_version = payload.get("schema_version") |
| 135 |
if not isinstance(schema_version, int): |
| 136 |
raise MalformedSwayReportError( |
| 137 |
f"sway report at {report_path} missing integer `schema_version`" |
| 138 |
) |
| 139 |
if schema_version > _SUPPORTED_SWAY_SCHEMA: |
| 140 |
raise MalformedSwayReportError( |
| 141 |
f"sway report schema_version={schema_version} is newer than this " |
| 142 |
f"reader supports ({_SUPPORTED_SWAY_SCHEMA}); bump the sway pin " |
| 143 |
"in `dlm.lock` after verifying harvest still round-trips" |
| 144 |
) |
| 145 |
|
| 146 |
probes = payload.get("probes") |
| 147 |
if not isinstance(probes, list): |
| 148 |
raise MalformedSwayReportError(f"sway report at {report_path} missing `probes` array") |
| 149 |
|
| 150 |
adapter_id = payload.get("adapter_id") |
| 151 |
source_adapter_version: str | None = None |
| 152 |
if isinstance(adapter_id, str) and adapter_id: |
| 153 |
source_adapter_version = adapter_id |
| 154 |
|
| 155 |
candidates: list[HarvestCandidate] = [] |
| 156 |
for idx, probe in enumerate(probes): |
| 157 |
if not isinstance(probe, dict): |
| 158 |
_LOG.warning( |
| 159 |
"sway report %s: probe index %d is not an object; skipping", |
| 160 |
report_path, |
| 161 |
idx, |
| 162 |
) |
| 163 |
continue |
| 164 |
if probe.get("verdict") != "fail": |
| 165 |
continue |
| 166 |
try: |
| 167 |
candidate = _probe_to_candidate( |
| 168 |
probe, |
| 169 |
source_adapter_version=source_adapter_version, |
| 170 |
) |
| 171 |
except NoReferenceError: |
| 172 |
if strict: |
| 173 |
raise |
| 174 |
_LOG.warning( |
| 175 |
"sway report %s: probe %r failed but carries no " |
| 176 |
"reference; skipping (use --strict to fail)", |
| 177 |
report_path, |
| 178 |
probe.get("name", "<unnamed>"), |
| 179 |
) |
| 180 |
continue |
| 181 |
if candidate.confidence < min_confidence: |
| 182 |
_LOG.info( |
| 183 |
"harvest: skipping %r (confidence=%.2f < %.2f)", |
| 184 |
candidate.probe_name, |
| 185 |
candidate.confidence, |
| 186 |
min_confidence, |
| 187 |
) |
| 188 |
continue |
| 189 |
candidates.append(candidate) |
| 190 |
|
| 191 |
return candidates |
| 192 |
|
| 193 |
|
| 194 |
def _probe_to_candidate( |
| 195 |
probe: dict[str, Any], |
| 196 |
*, |
| 197 |
source_adapter_version: str | None, |
| 198 |
) -> HarvestCandidate: |
| 199 |
"""Lift one failing probe into a `HarvestCandidate`. |
| 200 |
|
| 201 |
Raises :class:`NoReferenceError` when the evidence doesn't |
| 202 |
carry both a prompt and a reference — that probe cannot be |
| 203 |
round-tripped into a supervised Q/A row. |
| 204 |
""" |
| 205 |
name = str(probe.get("name") or "<unnamed>") |
| 206 |
kind = str(probe.get("kind") or "") |
| 207 |
evidence = probe.get("evidence") or {} |
| 208 |
if not isinstance(evidence, dict): |
| 209 |
raise NoReferenceError(f"probe {name!r}: evidence is not an object; cannot harvest") |
| 210 |
|
| 211 |
prompt_raw = evidence.get("prompt") |
| 212 |
reference_raw = evidence.get("reference") |
| 213 |
if not isinstance(prompt_raw, str) or not prompt_raw.strip(): |
| 214 |
raise NoReferenceError(f"probe {name!r}: evidence.prompt missing or non-string") |
| 215 |
if not isinstance(reference_raw, str) or not reference_raw.strip(): |
| 216 |
raise NoReferenceError(f"probe {name!r}: evidence.reference missing or non-string") |
| 217 |
|
| 218 |
confidence_raw = evidence.get("confidence", 1.0) |
| 219 |
try: |
| 220 |
confidence = float(confidence_raw) |
| 221 |
except (TypeError, ValueError): |
| 222 |
confidence = 1.0 |
| 223 |
confidence = max(0.0, min(1.0, confidence)) |
| 224 |
|
| 225 |
return HarvestCandidate( |
| 226 |
prompt=prompt_raw.strip(), |
| 227 |
reference=reference_raw.strip(), |
| 228 |
confidence=confidence, |
| 229 |
probe_name=name, |
| 230 |
probe_kind=kind, |
| 231 |
source_adapter_version=source_adapter_version, |
| 232 |
) |