documentlanguagemodel Public

Watch 0 Fork 0 Star 0

Python · 7642 bytes Raw Blame History

  
        1
        """Parse a sway JSON report into harvest candidates.
      
        2
        
        3
        Sway emits reports with this shape (see
      
        4
        ``sway/src/dlm_sway/suite/report.py``):
      
        5
        
        6
        .. code-block:: json
      
        7
        
        8
            {
      
        9
              "schema_version": 1,
      
        10
              "sway_version": "...",
      
        11
              "base_model_id": "...",
      
        12
              "adapter_id": "...",
      
        13
              "probes": [
      
        14
                {
      
        15
                  "name": "...",
      
        16
                  "kind": "...",
      
        17
                  "verdict": "pass" | "fail" | "warn" | "skip" | "error",
      
        18
                  "score": 0.0,
      
        19
                  "evidence": {...},
      
        20
                  "message": "...",
      
        21
                  ...
      
        22
                }
      
        23
              ]
      
        24
            }
      
        25
        
        26
        The harvest pull path filters for ``verdict == "fail"`` and lifts
      
        27
        out ``evidence.prompt`` + ``evidence.reference`` as the Q/A pair for
      
        28
        the next retrain. Probes without both fields are skipped with a
      
        29
        :class:`NoReferenceError` under strict mode (default) or a log line
      
        30
        under ``strict=False``.
      
        31
        
        32
        ``evidence.confidence`` (optional, 0-1) gates candidates via the
      
        33
        caller's ``--min-confidence``. Absent confidence is treated as 1.0
      
        34
        — the probe itself already failed, which is our signal.
      
        35
        """
      
        36
        
        37
        from __future__ import annotations
      
        38
        
        39
        import json
      
        40
        import logging
      
        41
        from dataclasses import dataclass
      
        42
        from pathlib import Path
      
        43
        from typing import Any, Final
      
        44
        
        45
        from dlm.harvest.errors import MalformedSwayReportError, NoReferenceError
      
        46
        
        47
        _LOG = logging.getLogger(__name__)
      
        48
        
        49
        # Sway's JSON schema version we know how to parse. A higher version
      
        50
        # in a report triggers a refusal with a clear pointer — sway's schema
      
        51
        # is stable but not fixed forever.
      
        52
        _SUPPORTED_SWAY_SCHEMA: Final[int] = 1
      
        53
        
        54
        
        55
        @dataclass(frozen=True)
      
        56
        class HarvestCandidate:
      
        57
            """One failing probe ready to become a `!probe`-tagged section.
      
        58
        
        59
            Attributes
      
        60
            ----------
      
        61
            prompt:
      
        62
                The question text. Becomes the `### Q` body.
      
        63
            reference:
      
        64
                The expected answer. Becomes the `### A` body.
      
        65
            confidence:
      
        66
                0-1 weight sway assigned to this probe's reference, when
      
        67
                present. Defaults to 1.0 when the report doesn't carry it.
      
        68
            probe_name:
      
        69
                Human-readable probe name from the sway spec. Used for the
      
        70
                harvest tag so users can trace a synthesized section back to
      
        71
                its probe origin.
      
        72
            probe_kind:
      
        73
                Probe discriminator (``section_internalization`` etc.).
      
        74
            source_adapter_version:
      
        75
                The adapter revision sway was scoring when it failed, if
      
        76
                `adapter_id` carries one. Informational; the harvest
      
        77
                itself doesn't need it.
      
        78
            """
      
        79
        
        80
            prompt: str
      
        81
            reference: str
      
        82
            confidence: float
      
        83
            probe_name: str
      
        84
            probe_kind: str
      
        85
            source_adapter_version: str | None
      
        86
        
        87
        
        88
        def read_sway_report(
      
        89
            path: Path | str,
      
        90
            *,
      
        91
            strict: bool = True,
      
        92
            min_confidence: float = 0.0,
      
        93
        ) -> list[HarvestCandidate]:
      
        94
            """Parse a sway JSON report at `path` into harvest candidates.
      
        95
        
        96
            Parameters
      
        97
            ----------
      
        98
            path:
      
        99
                Path to the sway JSON report.
      
        100
            strict:
      
        101
                If True (default), raise :class:`NoReferenceError` when a
      
        102
                failing probe lacks a ``prompt`` / ``reference`` pair. If
      
        103
                False, log a warning and skip the probe.
      
        104
            min_confidence:
      
        105
                Minimum ``evidence.confidence`` for a candidate to survive.
      
        106
                Default 0.0 accepts all.
      
        107
        
        108
            Raises
      
        109
            ------
      
        110
            MalformedSwayReportError:
      
        111
                File unreadable, not JSON, missing required keys, or carries
      
        112
                a newer ``schema_version`` than this reader supports.
      
        113
            NoReferenceError:
      
        114
                Strict mode + at least one failing probe lacks a reference.
      
        115
            """
      
        116
            report_path = Path(path)
      
        117
            try:
      
        118
                raw = report_path.read_text(encoding="utf-8")
      
        119
            except OSError as exc:
      
        120
                raise MalformedSwayReportError(f"cannot read sway report at {report_path}: {exc}") from exc
      
        121
        
        122
            try:
      
        123
                payload = json.loads(raw)
      
        124
            except json.JSONDecodeError as exc:
      
        125
                raise MalformedSwayReportError(
      
        126
                    f"sway report at {report_path} is not valid JSON: {exc}"
      
        127
                ) from exc
      
        128
        
        129
            if not isinstance(payload, dict):
      
        130
                raise MalformedSwayReportError(
      
        131
                    f"sway report at {report_path} must be a JSON object; got {type(payload).__name__}"
      
        132
                )
      
        133
        
        134
            schema_version = payload.get("schema_version")
      
        135
            if not isinstance(schema_version, int):
      
        136
                raise MalformedSwayReportError(
      
        137
                    f"sway report at {report_path} missing integer `schema_version`"
      
        138
                )
      
        139
            if schema_version > _SUPPORTED_SWAY_SCHEMA:
      
        140
                raise MalformedSwayReportError(
      
        141
                    f"sway report schema_version={schema_version} is newer than this "
      
        142
                    f"reader supports ({_SUPPORTED_SWAY_SCHEMA}); bump the sway pin "
      
        143
                    "in `dlm.lock` after verifying harvest still round-trips"
      
        144
                )
      
        145
        
        146
            probes = payload.get("probes")
      
        147
            if not isinstance(probes, list):
      
        148
                raise MalformedSwayReportError(f"sway report at {report_path} missing `probes` array")
      
        149
        
        150
            adapter_id = payload.get("adapter_id")
      
        151
            source_adapter_version: str | None = None
      
        152
            if isinstance(adapter_id, str) and adapter_id:
      
        153
                source_adapter_version = adapter_id
      
        154
        
        155
            candidates: list[HarvestCandidate] = []
      
        156
            for idx, probe in enumerate(probes):
      
        157
                if not isinstance(probe, dict):
      
        158
                    _LOG.warning(
      
        159
                        "sway report %s: probe index %d is not an object; skipping",
      
        160
                        report_path,
      
        161
                        idx,
      
        162
                    )
      
        163
                    continue
      
        164
                if probe.get("verdict") != "fail":
      
        165
                    continue
      
        166
                try:
      
        167
                    candidate = _probe_to_candidate(
      
        168
                        probe,
      
        169
                        source_adapter_version=source_adapter_version,
      
        170
                    )
      
        171
                except NoReferenceError:
      
        172
                    if strict:
      
        173
                        raise
      
        174
                    _LOG.warning(
      
        175
                        "sway report %s: probe %r failed but carries no "
      
        176
                        "reference; skipping (use --strict to fail)",
      
        177
                        report_path,
      
        178
                        probe.get("name", "<unnamed>"),
      
        179
                    )
      
        180
                    continue
      
        181
                if candidate.confidence < min_confidence:
      
        182
                    _LOG.info(
      
        183
                        "harvest: skipping %r (confidence=%.2f < %.2f)",
      
        184
                        candidate.probe_name,
      
        185
                        candidate.confidence,
      
        186
                        min_confidence,
      
        187
                    )
      
        188
                    continue
      
        189
                candidates.append(candidate)
      
        190
        
        191
            return candidates
      
        192
        
        193
        
        194
        def _probe_to_candidate(
      
        195
            probe: dict[str, Any],
      
        196
            *,
      
        197
            source_adapter_version: str | None,
      
        198
        ) -> HarvestCandidate:
      
        199
            """Lift one failing probe into a `HarvestCandidate`.
      
        200
        
        201
            Raises :class:`NoReferenceError` when the evidence doesn't
      
        202
            carry both a prompt and a reference — that probe cannot be
      
        203
            round-tripped into a supervised Q/A row.
      
        204
            """
      
        205
            name = str(probe.get("name") or "<unnamed>")
      
        206
            kind = str(probe.get("kind") or "")
      
        207
            evidence = probe.get("evidence") or {}
      
        208
            if not isinstance(evidence, dict):
      
        209
                raise NoReferenceError(f"probe {name!r}: evidence is not an object; cannot harvest")
      
        210
        
        211
            prompt_raw = evidence.get("prompt")
      
        212
            reference_raw = evidence.get("reference")
      
        213
            if not isinstance(prompt_raw, str) or not prompt_raw.strip():
      
        214
                raise NoReferenceError(f"probe {name!r}: evidence.prompt missing or non-string")
      
        215
            if not isinstance(reference_raw, str) or not reference_raw.strip():
      
        216
                raise NoReferenceError(f"probe {name!r}: evidence.reference missing or non-string")
      
        217
        
        218
            confidence_raw = evidence.get("confidence", 1.0)
      
        219
            try:
      
        220
                confidence = float(confidence_raw)
      
        221
            except (TypeError, ValueError):
      
        222
                confidence = 1.0
      
        223
            confidence = max(0.0, min(1.0, confidence))
      
        224
        
        225
            return HarvestCandidate(
      
        226
                prompt=prompt_raw.strip(),
      
        227
                reference=reference_raw.strip(),
      
        228
                confidence=confidence,
      
        229
                probe_name=name,
      
        230
                probe_kind=kind,
      
        231
                source_adapter_version=source_adapter_version,
      
        232
            )

1	"""Parse a sway JSON report into harvest candidates.
2
3	Sway emits reports with this shape (see
4	``sway/src/dlm_sway/suite/report.py``):
5
6	.. code-block:: json
7
8	{
9	"schema_version": 1,
10	"sway_version": "...",
11	"base_model_id": "...",
12	"adapter_id": "...",
13	"probes": [
14	{
15	"name": "...",
16	"kind": "...",
17	"verdict": "pass" \| "fail" \| "warn" \| "skip" \| "error",
18	"score": 0.0,
19	"evidence": {...},
20	"message": "...",
21	...
22	}
23	]
24	}
25
26	The harvest pull path filters for ``verdict == "fail"`` and lifts
27	out ``evidence.prompt`` + ``evidence.reference`` as the Q/A pair for
28	the next retrain. Probes without both fields are skipped with a
29	:class:`NoReferenceError` under strict mode (default) or a log line
30	under ``strict=False``.
31
32	``evidence.confidence`` (optional, 0-1) gates candidates via the
33	caller's ``--min-confidence``. Absent confidence is treated as 1.0
34	— the probe itself already failed, which is our signal.
35	"""
36
37	from __future__ import annotations
38
39	import json
40	import logging
41	from dataclasses import dataclass
42	from pathlib import Path
43	from typing import Any, Final
44
45	from dlm.harvest.errors import MalformedSwayReportError, NoReferenceError
46
47	_LOG = logging.getLogger(__name__)
48
49	# Sway's JSON schema version we know how to parse. A higher version
50	# in a report triggers a refusal with a clear pointer — sway's schema
51	# is stable but not fixed forever.
52	_SUPPORTED_SWAY_SCHEMA: Final[int] = 1
53
54
55	@dataclass(frozen=True)
56	class HarvestCandidate:
57	"""One failing probe ready to become a `!probe`-tagged section.
58
59	Attributes
60	----------
61	prompt:
62	The question text. Becomes the `### Q` body.
63	reference:
64	The expected answer. Becomes the `### A` body.
65	confidence:
66	0-1 weight sway assigned to this probe's reference, when
67	present. Defaults to 1.0 when the report doesn't carry it.
68	probe_name:
69	Human-readable probe name from the sway spec. Used for the
70	harvest tag so users can trace a synthesized section back to
71	its probe origin.
72	probe_kind:
73	Probe discriminator (``section_internalization`` etc.).
74	source_adapter_version:
75	The adapter revision sway was scoring when it failed, if
76	`adapter_id` carries one. Informational; the harvest
77	itself doesn't need it.
78	"""
79
80	prompt: str
81	reference: str
82	confidence: float
83	probe_name: str
84	probe_kind: str
85	source_adapter_version: str \| None
86
87
88	def read_sway_report(
89	path: Path \| str,
90	*,
91	strict: bool = True,
92	min_confidence: float = 0.0,
93	) -> list[HarvestCandidate]:
94	"""Parse a sway JSON report at `path` into harvest candidates.
95
96	Parameters
97	----------
98	path:
99	Path to the sway JSON report.
100	strict:
101	If True (default), raise :class:`NoReferenceError` when a
102	failing probe lacks a ``prompt`` / ``reference`` pair. If
103	False, log a warning and skip the probe.
104	min_confidence:
105	Minimum ``evidence.confidence`` for a candidate to survive.
106	Default 0.0 accepts all.
107
108	Raises
109	------
110	MalformedSwayReportError:
111	File unreadable, not JSON, missing required keys, or carries
112	a newer ``schema_version`` than this reader supports.
113	NoReferenceError:
114	Strict mode + at least one failing probe lacks a reference.
115	"""
116	report_path = Path(path)
117	try:
118	raw = report_path.read_text(encoding="utf-8")
119	except OSError as exc:
120	raise MalformedSwayReportError(f"cannot read sway report at {report_path}: {exc}") from exc
121
122	try:
123	payload = json.loads(raw)
124	except json.JSONDecodeError as exc:
125	raise MalformedSwayReportError(
126	f"sway report at {report_path} is not valid JSON: {exc}"
127	) from exc
128
129	if not isinstance(payload, dict):
130	raise MalformedSwayReportError(
131	f"sway report at {report_path} must be a JSON object; got {type(payload).__name__}"
132	)
133
134	schema_version = payload.get("schema_version")
135	if not isinstance(schema_version, int):
136	raise MalformedSwayReportError(
137	f"sway report at {report_path} missing integer `schema_version`"
138	)
139	if schema_version > _SUPPORTED_SWAY_SCHEMA:
140	raise MalformedSwayReportError(
141	f"sway report schema_version={schema_version} is newer than this "
142	f"reader supports ({_SUPPORTED_SWAY_SCHEMA}); bump the sway pin "
143	"in `dlm.lock` after verifying harvest still round-trips"
144	)
145
146	probes = payload.get("probes")
147	if not isinstance(probes, list):
148	raise MalformedSwayReportError(f"sway report at {report_path} missing `probes` array")
149
150	adapter_id = payload.get("adapter_id")
151	source_adapter_version: str \| None = None
152	if isinstance(adapter_id, str) and adapter_id:
153	source_adapter_version = adapter_id
154
155	candidates: list[HarvestCandidate] = []
156	for idx, probe in enumerate(probes):
157	if not isinstance(probe, dict):
158	_LOG.warning(
159	"sway report %s: probe index %d is not an object; skipping",
160	report_path,
161	idx,
162	)
163	continue
164	if probe.get("verdict") != "fail":
165	continue
166	try:
167	candidate = _probe_to_candidate(
168	probe,
169	source_adapter_version=source_adapter_version,
170	)
171	except NoReferenceError:
172	if strict:
173	raise
174	_LOG.warning(
175	"sway report %s: probe %r failed but carries no "
176	"reference; skipping (use --strict to fail)",
177	report_path,
178	probe.get("name", "<unnamed>"),
179	)
180	continue
181	if candidate.confidence < min_confidence:
182	_LOG.info(
183	"harvest: skipping %r (confidence=%.2f < %.2f)",
184	candidate.probe_name,
185	candidate.confidence,
186	min_confidence,
187	)
188	continue
189	candidates.append(candidate)
190
191	return candidates
192
193
194	def _probe_to_candidate(
195	probe: dict[str, Any],
196	*,
197	source_adapter_version: str \| None,
198	) -> HarvestCandidate:
199	"""Lift one failing probe into a `HarvestCandidate`.
200
201	Raises :class:`NoReferenceError` when the evidence doesn't
202	carry both a prompt and a reference — that probe cannot be
203	round-tripped into a supervised Q/A row.
204	"""
205	name = str(probe.get("name") or "<unnamed>")
206	kind = str(probe.get("kind") or "")
207	evidence = probe.get("evidence") or {}
208	if not isinstance(evidence, dict):
209	raise NoReferenceError(f"probe {name!r}: evidence is not an object; cannot harvest")
210
211	prompt_raw = evidence.get("prompt")
212	reference_raw = evidence.get("reference")
213	if not isinstance(prompt_raw, str) or not prompt_raw.strip():
214	raise NoReferenceError(f"probe {name!r}: evidence.prompt missing or non-string")
215	if not isinstance(reference_raw, str) or not reference_raw.strip():
216	raise NoReferenceError(f"probe {name!r}: evidence.reference missing or non-string")
217
218	confidence_raw = evidence.get("confidence", 1.0)
219	try:
220	confidence = float(confidence_raw)
221	except (TypeError, ValueError):
222	confidence = 1.0
223	confidence = max(0.0, min(1.0, confidence))
224
225	return HarvestCandidate(
226	prompt=prompt_raw.strip(),
227	reference=reference_raw.strip(),
228	confidence=confidence,
229	probe_name=name,
230	probe_kind=kind,
231	source_adapter_version=source_adapter_version,
232	)