`e0e16ed`

tests/golden: expected_linux.json from dispatch run 24746071833 (S18.8)

Authored by

espadonne 3 weeks ago

SHA: e0e16ed241c90790a3446d117635f9f10bb193f4
Parents: 27af978
Tree: 5ddb874

1 changed file

Status	File	+	-
A	`tests/golden/expected_linux.json`	96	0

tests/golden/expected_linux.jsonadded

 +{
 +  "determinism": {
 +    "class": "best_effort",
 +    "notes": [
 +      "CPU-only backend: strict determinism depends on BLAS impl"
 +    ],
 +    "seed": 0
 +  },
 +  "null_stats": {},
 +  "probes": [
 +    {
 +      "base_value": null,
 +      "ci_95": [
 +        0.008465445404266456,
 +        0.02977853737438285
 +      ],
 +      "evidence": {
 +        "divergence_kind": "js",
 +        "max": 0.03615893521221293,
 +        "num_prompts": 4,
 +        "per_prompt": [
 +          0.010637343860892606,
 +          0.006726111759252678,
 +          0.013683446339307789,
 +          0.03615893521221293
 +        ],
 +        "raw_ci_95": [
 +          0.008465445404266456,
 +          0.02977853737438285
 +        ],
 +        "weight": 1.0,
 +        "z_by_rank": null
 +      },
 +      "ft_value": null,
 +      "kind": "delta_kl",
 +      "message": "mean js=0.0168 (\u2265 0.0) (no calibration for delta_kl)",
 +      "name": "dk_golden",
 +      "raw": 0.0168014592929165,
 +      "score": 0.024239382001588428,
 +      "verdict": "pass",
 +      "z_score": null
 +    },
 +    {
 +      "base_value": null,
 +      "ci_95": [
 +        0.0,
 +        0.0
 +      ],
 +      "evidence": {
 +        "fraction_regressed": 0.0,
 +        "mean_delta_nats": -0.03518710732460022,
 +        "raw_ci_95": [
 +          0.0,
 +          0.0
 +        ],
 +        "regressed_count": 0,
 +        "regression_nats_threshold": 1.0,
 +        "total_items": 20,
 +        "weight": 1.0,
 +        "worst_offenders": [],
 +        "z_by_rank": null
 +      },
 +      "ft_value": -0.03518710732460022,
 +      "kind": "calibration_drift",
 +      "message": "0/20 items regressed >1.0 nats (frac=0.0%), mean_delta=-0.035 nats/tok (no calibration for calibration_drift)",
 +      "name": "cal_golden",
 +      "raw": 0.0,
 +      "score": 0.8572834380467733,
 +      "verdict": "pass",
 +      "z_score": null
 +    }
 +  ],
 +  "schema_version": 1,
 +  "score": {
 +    "band": "partial",
 +    "components": {
 +      "ablation": 0.0,
 +      "adherence": 0.024239382001588428,
 +      "attribution": 0.0,
 +      "baseline": 0.0,
 +      "calibration": 0.8572834380467733
 +    },
 +    "findings": [
 +      "adherence score is 0.02 \u2014 below the noise threshold"
 +    ],
 +    "overall": 0.3574570044196624,
 +    "weights": {
 +      "ablation": 0.15,
 +      "adherence": 0.3,
 +      "attribution": 0.35,
 +      "baseline": 0.0,
 +      "calibration": 0.2
 +    }
 +  },
 +  "spec_path": "<memory>"
 +}