tenseleyflow/sway / e0e16ed

Browse files

tests/golden: expected_linux.json from dispatch run 24746071833 (S18.8)

Authored by espadonne
SHA
e0e16ed241c90790a3446d117635f9f10bb193f4
Parents
27af978
Tree
5ddb874

1 changed file

StatusFile+-
A tests/golden/expected_linux.json 96 0
tests/golden/expected_linux.jsonadded
@@ -0,0 +1,96 @@
1
+{
2
+  "determinism": {
3
+    "class": "best_effort",
4
+    "notes": [
5
+      "CPU-only backend: strict determinism depends on BLAS impl"
6
+    ],
7
+    "seed": 0
8
+  },
9
+  "null_stats": {},
10
+  "probes": [
11
+    {
12
+      "base_value": null,
13
+      "ci_95": [
14
+        0.008465445404266456,
15
+        0.02977853737438285
16
+      ],
17
+      "evidence": {
18
+        "divergence_kind": "js",
19
+        "max": 0.03615893521221293,
20
+        "num_prompts": 4,
21
+        "per_prompt": [
22
+          0.010637343860892606,
23
+          0.006726111759252678,
24
+          0.013683446339307789,
25
+          0.03615893521221293
26
+        ],
27
+        "raw_ci_95": [
28
+          0.008465445404266456,
29
+          0.02977853737438285
30
+        ],
31
+        "weight": 1.0,
32
+        "z_by_rank": null
33
+      },
34
+      "ft_value": null,
35
+      "kind": "delta_kl",
36
+      "message": "mean js=0.0168 (\u2265 0.0) (no calibration for delta_kl)",
37
+      "name": "dk_golden",
38
+      "raw": 0.0168014592929165,
39
+      "score": 0.024239382001588428,
40
+      "verdict": "pass",
41
+      "z_score": null
42
+    },
43
+    {
44
+      "base_value": null,
45
+      "ci_95": [
46
+        0.0,
47
+        0.0
48
+      ],
49
+      "evidence": {
50
+        "fraction_regressed": 0.0,
51
+        "mean_delta_nats": -0.03518710732460022,
52
+        "raw_ci_95": [
53
+          0.0,
54
+          0.0
55
+        ],
56
+        "regressed_count": 0,
57
+        "regression_nats_threshold": 1.0,
58
+        "total_items": 20,
59
+        "weight": 1.0,
60
+        "worst_offenders": [],
61
+        "z_by_rank": null
62
+      },
63
+      "ft_value": -0.03518710732460022,
64
+      "kind": "calibration_drift",
65
+      "message": "0/20 items regressed >1.0 nats (frac=0.0%), mean_delta=-0.035 nats/tok (no calibration for calibration_drift)",
66
+      "name": "cal_golden",
67
+      "raw": 0.0,
68
+      "score": 0.8572834380467733,
69
+      "verdict": "pass",
70
+      "z_score": null
71
+    }
72
+  ],
73
+  "schema_version": 1,
74
+  "score": {
75
+    "band": "partial",
76
+    "components": {
77
+      "ablation": 0.0,
78
+      "adherence": 0.024239382001588428,
79
+      "attribution": 0.0,
80
+      "baseline": 0.0,
81
+      "calibration": 0.8572834380467733
82
+    },
83
+    "findings": [
84
+      "adherence score is 0.02 \u2014 below the noise threshold"
85
+    ],
86
+    "overall": 0.3574570044196624,
87
+    "weights": {
88
+      "ablation": 0.15,
89
+      "adherence": 0.3,
90
+      "attribution": 0.35,
91
+      "baseline": 0.0,
92
+      "calibration": 0.2
93
+    }
94
+  },
95
+  "spec_path": "<memory>"
96
+}