| 1 | { |
| 2 | "adapter_id": "/fixture/runs/adapter/v0003", |
| 3 | "backend_stats": {}, |
| 4 | "base_model_id": "HuggingFaceTB/SmolLM2-135M-Instruct", |
| 5 | "determinism": { |
| 6 | "class": "best_effort", |
| 7 | "notes": [ |
| 8 | "CPU-only backend: strict determinism depends on BLAS impl" |
| 9 | ], |
| 10 | "seed": 0 |
| 11 | }, |
| 12 | "finished_at": "2026-01-01T12:00:02.500000+00:00", |
| 13 | "null_stats": { |
| 14 | "delta_kl": { |
| 15 | "mean": 0.01, |
| 16 | "n": 3.0, |
| 17 | "std": 0.005 |
| 18 | } |
| 19 | }, |
| 20 | "probes": [ |
| 21 | { |
| 22 | "base_value": null, |
| 23 | "ci_95": [ |
| 24 | 0.412, |
| 25 | 0.497 |
| 26 | ], |
| 27 | "duration_s": 0.123, |
| 28 | "evidence": { |
| 29 | "divergence_kind": "js", |
| 30 | "num_prompts": 4, |
| 31 | "weight": 1.0 |
| 32 | }, |
| 33 | "ft_value": null, |
| 34 | "kind": "delta_kl", |
| 35 | "message": "mean js=0.4560, z=+5.12\u03c3 vs null", |
| 36 | "name": "dk", |
| 37 | "raw": 0.456, |
| 38 | "score": 0.87, |
| 39 | "verdict": "pass", |
| 40 | "z_score": 5.12 |
| 41 | }, |
| 42 | { |
| 43 | "base_value": null, |
| 44 | "ci_95": null, |
| 45 | "duration_s": 0.456, |
| 46 | "evidence": { |
| 47 | "num_sections": 4, |
| 48 | "passing_frac": 0.25, |
| 49 | "weight": 1.0 |
| 50 | }, |
| 51 | "ft_value": null, |
| 52 | "kind": "section_internalization", |
| 53 | "message": "1/4 sections cleared effective_sis\u22650.05", |
| 54 | "name": "sis", |
| 55 | "raw": 0.012, |
| 56 | "score": 0.3, |
| 57 | "verdict": "fail", |
| 58 | "z_score": 0.5 |
| 59 | }, |
| 60 | { |
| 61 | "base_value": null, |
| 62 | "ci_95": null, |
| 63 | "duration_s": 0.001, |
| 64 | "evidence": {}, |
| 65 | "ft_value": null, |
| 66 | "kind": "leakage", |
| 67 | "message": "no PROSE sections to test for leakage", |
| 68 | "name": "lk", |
| 69 | "raw": null, |
| 70 | "score": null, |
| 71 | "verdict": "skip", |
| 72 | "z_score": null |
| 73 | }, |
| 74 | { |
| 75 | "base_value": null, |
| 76 | "ci_95": null, |
| 77 | "duration_s": 0.0, |
| 78 | "evidence": {}, |
| 79 | "ft_value": null, |
| 80 | "kind": "adapter_ablation", |
| 81 | "message": "backend does not implement ScalableDifferentialBackend", |
| 82 | "name": "ablation", |
| 83 | "raw": null, |
| 84 | "score": null, |
| 85 | "verdict": "error", |
| 86 | "z_score": null |
| 87 | } |
| 88 | ], |
| 89 | "schema_version": 1, |
| 90 | "score": { |
| 91 | "band": "healthy", |
| 92 | "components": { |
| 93 | "ablation": 0.0, |
| 94 | "adherence": 0.87, |
| 95 | "attribution": 0.3, |
| 96 | "baseline": 1.0, |
| 97 | "calibration": 0.5 |
| 98 | }, |
| 99 | "findings": [ |
| 100 | "sis (section_internalization) failed: 1/4 sections cleared effective_sis\u22650.05", |
| 101 | "ablation score is 0.00 \u2014 below the noise threshold" |
| 102 | ], |
| 103 | "overall": 0.65, |
| 104 | "weights": { |
| 105 | "ablation": 0.15, |
| 106 | "adherence": 0.3, |
| 107 | "attribution": 0.35, |
| 108 | "baseline": 0.0, |
| 109 | "calibration": 0.2 |
| 110 | } |
| 111 | }, |
| 112 | "spec_path": "/fixture/sway.yaml", |
| 113 | "started_at": "2026-01-01T12:00:00+00:00", |
| 114 | "sway_version": "0.1.0.dev0", |
| 115 | "wall_seconds": 2.5 |
| 116 | } |