Python · 7642 bytes Raw Blame History
1 """Parse a sway JSON report into harvest candidates.
2
3 Sway emits reports with this shape (see
4 ``sway/src/dlm_sway/suite/report.py``):
5
6 .. code-block:: json
7
8 {
9 "schema_version": 1,
10 "sway_version": "...",
11 "base_model_id": "...",
12 "adapter_id": "...",
13 "probes": [
14 {
15 "name": "...",
16 "kind": "...",
17 "verdict": "pass" | "fail" | "warn" | "skip" | "error",
18 "score": 0.0,
19 "evidence": {...},
20 "message": "...",
21 ...
22 }
23 ]
24 }
25
26 The harvest pull path filters for ``verdict == "fail"`` and lifts
27 out ``evidence.prompt`` + ``evidence.reference`` as the Q/A pair for
28 the next retrain. Probes without both fields are skipped with a
29 :class:`NoReferenceError` under strict mode (default) or a log line
30 under ``strict=False``.
31
32 ``evidence.confidence`` (optional, 0-1) gates candidates via the
33 caller's ``--min-confidence``. Absent confidence is treated as 1.0
34 — the probe itself already failed, which is our signal.
35 """
36
37 from __future__ import annotations
38
39 import json
40 import logging
41 from dataclasses import dataclass
42 from pathlib import Path
43 from typing import Any, Final
44
45 from dlm.harvest.errors import MalformedSwayReportError, NoReferenceError
46
47 _LOG = logging.getLogger(__name__)
48
49 # Sway's JSON schema version we know how to parse. A higher version
50 # in a report triggers a refusal with a clear pointer — sway's schema
51 # is stable but not fixed forever.
52 _SUPPORTED_SWAY_SCHEMA: Final[int] = 1
53
54
55 @dataclass(frozen=True)
56 class HarvestCandidate:
57 """One failing probe ready to become a `!probe`-tagged section.
58
59 Attributes
60 ----------
61 prompt:
62 The question text. Becomes the `### Q` body.
63 reference:
64 The expected answer. Becomes the `### A` body.
65 confidence:
66 0-1 weight sway assigned to this probe's reference, when
67 present. Defaults to 1.0 when the report doesn't carry it.
68 probe_name:
69 Human-readable probe name from the sway spec. Used for the
70 harvest tag so users can trace a synthesized section back to
71 its probe origin.
72 probe_kind:
73 Probe discriminator (``section_internalization`` etc.).
74 source_adapter_version:
75 The adapter revision sway was scoring when it failed, if
76 `adapter_id` carries one. Informational; the harvest
77 itself doesn't need it.
78 """
79
80 prompt: str
81 reference: str
82 confidence: float
83 probe_name: str
84 probe_kind: str
85 source_adapter_version: str | None
86
87
88 def read_sway_report(
89 path: Path | str,
90 *,
91 strict: bool = True,
92 min_confidence: float = 0.0,
93 ) -> list[HarvestCandidate]:
94 """Parse a sway JSON report at `path` into harvest candidates.
95
96 Parameters
97 ----------
98 path:
99 Path to the sway JSON report.
100 strict:
101 If True (default), raise :class:`NoReferenceError` when a
102 failing probe lacks a ``prompt`` / ``reference`` pair. If
103 False, log a warning and skip the probe.
104 min_confidence:
105 Minimum ``evidence.confidence`` for a candidate to survive.
106 Default 0.0 accepts all.
107
108 Raises
109 ------
110 MalformedSwayReportError:
111 File unreadable, not JSON, missing required keys, or carries
112 a newer ``schema_version`` than this reader supports.
113 NoReferenceError:
114 Strict mode + at least one failing probe lacks a reference.
115 """
116 report_path = Path(path)
117 try:
118 raw = report_path.read_text(encoding="utf-8")
119 except OSError as exc:
120 raise MalformedSwayReportError(f"cannot read sway report at {report_path}: {exc}") from exc
121
122 try:
123 payload = json.loads(raw)
124 except json.JSONDecodeError as exc:
125 raise MalformedSwayReportError(
126 f"sway report at {report_path} is not valid JSON: {exc}"
127 ) from exc
128
129 if not isinstance(payload, dict):
130 raise MalformedSwayReportError(
131 f"sway report at {report_path} must be a JSON object; got {type(payload).__name__}"
132 )
133
134 schema_version = payload.get("schema_version")
135 if not isinstance(schema_version, int):
136 raise MalformedSwayReportError(
137 f"sway report at {report_path} missing integer `schema_version`"
138 )
139 if schema_version > _SUPPORTED_SWAY_SCHEMA:
140 raise MalformedSwayReportError(
141 f"sway report schema_version={schema_version} is newer than this "
142 f"reader supports ({_SUPPORTED_SWAY_SCHEMA}); bump the sway pin "
143 "in `dlm.lock` after verifying harvest still round-trips"
144 )
145
146 probes = payload.get("probes")
147 if not isinstance(probes, list):
148 raise MalformedSwayReportError(f"sway report at {report_path} missing `probes` array")
149
150 adapter_id = payload.get("adapter_id")
151 source_adapter_version: str | None = None
152 if isinstance(adapter_id, str) and adapter_id:
153 source_adapter_version = adapter_id
154
155 candidates: list[HarvestCandidate] = []
156 for idx, probe in enumerate(probes):
157 if not isinstance(probe, dict):
158 _LOG.warning(
159 "sway report %s: probe index %d is not an object; skipping",
160 report_path,
161 idx,
162 )
163 continue
164 if probe.get("verdict") != "fail":
165 continue
166 try:
167 candidate = _probe_to_candidate(
168 probe,
169 source_adapter_version=source_adapter_version,
170 )
171 except NoReferenceError:
172 if strict:
173 raise
174 _LOG.warning(
175 "sway report %s: probe %r failed but carries no "
176 "reference; skipping (use --strict to fail)",
177 report_path,
178 probe.get("name", "<unnamed>"),
179 )
180 continue
181 if candidate.confidence < min_confidence:
182 _LOG.info(
183 "harvest: skipping %r (confidence=%.2f < %.2f)",
184 candidate.probe_name,
185 candidate.confidence,
186 min_confidence,
187 )
188 continue
189 candidates.append(candidate)
190
191 return candidates
192
193
194 def _probe_to_candidate(
195 probe: dict[str, Any],
196 *,
197 source_adapter_version: str | None,
198 ) -> HarvestCandidate:
199 """Lift one failing probe into a `HarvestCandidate`.
200
201 Raises :class:`NoReferenceError` when the evidence doesn't
202 carry both a prompt and a reference — that probe cannot be
203 round-tripped into a supervised Q/A row.
204 """
205 name = str(probe.get("name") or "<unnamed>")
206 kind = str(probe.get("kind") or "")
207 evidence = probe.get("evidence") or {}
208 if not isinstance(evidence, dict):
209 raise NoReferenceError(f"probe {name!r}: evidence is not an object; cannot harvest")
210
211 prompt_raw = evidence.get("prompt")
212 reference_raw = evidence.get("reference")
213 if not isinstance(prompt_raw, str) or not prompt_raw.strip():
214 raise NoReferenceError(f"probe {name!r}: evidence.prompt missing or non-string")
215 if not isinstance(reference_raw, str) or not reference_raw.strip():
216 raise NoReferenceError(f"probe {name!r}: evidence.reference missing or non-string")
217
218 confidence_raw = evidence.get("confidence", 1.0)
219 try:
220 confidence = float(confidence_raw)
221 except (TypeError, ValueError):
222 confidence = 1.0
223 confidence = max(0.0, min(1.0, confidence))
224
225 return HarvestCandidate(
226 prompt=prompt_raw.strip(),
227 reference=reference_raw.strip(),
228 confidence=confidence,
229 probe_name=name,
230 probe_kind=kind,
231 source_adapter_version=source_adapter_version,
232 )