JSON · 2340 bytes Raw Blame History
1 {
2 "determinism": {
3 "class": "best_effort",
4 "notes": [
5 "CPU-only backend: strict determinism depends on BLAS impl"
6 ],
7 "seed": 0
8 },
9 "null_stats": {},
10 "probes": [
11 {
12 "base_value": null,
13 "ci_95": [
14 0.008465445404266456,
15 0.02977853737438285
16 ],
17 "evidence": {
18 "divergence_kind": "js",
19 "max": 0.03615893521221293,
20 "num_prompts": 4,
21 "per_prompt": [
22 0.010637343860892606,
23 0.006726111759252678,
24 0.013683446339307789,
25 0.03615893521221293
26 ],
27 "raw_ci_95": [
28 0.008465445404266456,
29 0.02977853737438285
30 ],
31 "weight": 1.0,
32 "z_by_rank": null
33 },
34 "ft_value": null,
35 "kind": "delta_kl",
36 "message": "mean js=0.0168 (\u2265 0.0) (no calibration for delta_kl)",
37 "name": "dk_golden",
38 "raw": 0.0168014592929165,
39 "score": 0.024239382001588428,
40 "verdict": "pass",
41 "z_score": null
42 },
43 {
44 "base_value": null,
45 "ci_95": [
46 0.0,
47 0.0
48 ],
49 "evidence": {
50 "fraction_regressed": 0.0,
51 "mean_delta_nats": -0.03518710732460022,
52 "raw_ci_95": [
53 0.0,
54 0.0
55 ],
56 "regressed_count": 0,
57 "regression_nats_threshold": 1.0,
58 "total_items": 20,
59 "weight": 1.0,
60 "worst_offenders": [],
61 "z_by_rank": null
62 },
63 "ft_value": -0.03518710732460022,
64 "kind": "calibration_drift",
65 "message": "0/20 items regressed >1.0 nats (frac=0.0%), mean_delta=-0.035 nats/tok (no calibration for calibration_drift)",
66 "name": "cal_golden",
67 "raw": 0.0,
68 "score": 0.8572834380467733,
69 "verdict": "pass",
70 "z_score": null
71 }
72 ],
73 "schema_version": 1,
74 "score": {
75 "band": "partial",
76 "components": {
77 "ablation": 0.0,
78 "adherence": 0.024239382001588428,
79 "attribution": 0.0,
80 "baseline": 0.0,
81 "calibration": 0.8572834380467733
82 },
83 "findings": [
84 "adherence score is 0.02 \u2014 below the noise threshold"
85 ],
86 "overall": 0.3574570044196624,
87 "weights": {
88 "ablation": 0.15,
89 "adherence": 0.3,
90 "attribution": 0.35,
91 "baseline": 0.0,
92 "calibration": 0.2
93 }
94 },
95 "spec_path": "<memory>"
96 }