`ee4f548`

tests/golden: expected_darwin.json — locally-generated pin (S18.5)

Authored by

espadonne 3 weeks ago

SHA: ee4f54843a142679c8871ea3c170bb54a42146c5
Parents: f230789
Tree: 39561c8

1 changed file

Status	File	+	-
A	`tests/golden/expected_darwin.json`	96	0

tests/golden/expected_darwin.jsonadded

 +{
 +  "determinism": {
 +    "class": "best_effort",
 +    "notes": [
 +      "MPS: bit-identical across runs is best-effort"
 +    ],
 +    "seed": 0
 +  },
 +  "null_stats": {},
 +  "probes": [
 +    {
 +      "base_value": null,
 +      "ci_95": [
 +        0.00845183397355073,
 +        0.02976923823248102
 +      ],
 +      "evidence": {
 +        "divergence_kind": "js",
 +        "max": 0.036150663063872254,
 +        "num_prompts": 4,
 +        "per_prompt": [
 +          0.01062496373830732,
 +          0.006711906030413639,
 +          0.013671617802962006,
 +          0.036150663063872254
 +        ],
 +        "raw_ci_95": [
 +          0.00845183397355073,
 +          0.02976923823248102
 +        ],
 +        "weight": 1.0,
 +        "z_by_rank": null
 +      },
 +      "ft_value": null,
 +      "kind": "delta_kl",
 +      "message": "mean js=0.0168 (\u2265 0.0) (no calibration for delta_kl)",
 +      "name": "dk_golden",
 +      "raw": 0.016789787658888805,
 +      "score": 0.0242225433930576,
 +      "verdict": "pass",
 +      "z_score": null
 +    },
 +    {
 +      "base_value": null,
 +      "ci_95": [
 +        0.0,
 +        0.0
 +      ],
 +      "evidence": {
 +        "fraction_regressed": 0.0,
 +        "mean_delta_nats": -0.03518791794776917,
 +        "raw_ci_95": [
 +          0.0,
 +          0.0
 +        ],
 +        "regressed_count": 0,
 +        "regression_nats_threshold": 1.0,
 +        "total_items": 20,
 +        "weight": 1.0,
 +        "worst_offenders": [],
 +        "z_by_rank": null
 +      },
 +      "ft_value": -0.03518791794776917,
 +      "kind": "calibration_drift",
 +      "message": "0/20 items regressed >1.0 nats (frac=0.0%), mean_delta=-0.035 nats/tok (no calibration for calibration_drift)",
 +      "name": "cal_golden",
 +      "raw": 0.0,
 +      "score": 0.8572832218805948,
 +      "verdict": "pass",
 +      "z_score": null
 +    }
 +  ],
 +  "schema_version": 1,
 +  "score": {
 +    "band": "partial",
 +    "components": {
 +      "ablation": 0.0,
 +      "adherence": 0.0242225433930576,
 +      "attribution": 0.0,
 +      "baseline": 0.0,
 +      "calibration": 0.8572832218805948
 +    },
 +    "findings": [
 +      "adherence score is 0.02 \u2014 below the noise threshold"
 +    ],
 +    "overall": 0.3574468147880725,
 +    "weights": {
 +      "ablation": 0.15,
 +      "adherence": 0.3,
 +      "attribution": 0.35,
 +      "baseline": 0.0,
 +      "calibration": 0.2
 +    }
 +  },
 +  "spec_path": "<memory>"
 +}