Python · 1235 bytes Raw Blame History
1 """Eval harness — val loss, perplexity, probes, retention, early stopping.
2
3 Heavy imports are deferred to the boundaries that need them.
4 """
5
6 from __future__ import annotations
7
8 from dlm.eval.early_stop import EarlyStopConfig, build_callback, was_early_stopped
9 from dlm.eval.errors import EvalError, ProbeFormatError, RetentionSliceError
10 from dlm.eval.perplexity import perplexity
11 from dlm.eval.probes import Probe, extract_probes
12 from dlm.eval.retention import RetentionSlice, build_retention_slice, retention_delta
13 from dlm.eval.summary import (
14 ProbeOutput,
15 SourceProvenanceRecord,
16 TrainingSummary,
17 load_summary,
18 save_summary,
19 summary_path_for,
20 )
21 from dlm.eval.val_loss import eval_metrics_from_eval_pred, summarize_eval_state
22
23 __all__ = [
24 "EarlyStopConfig",
25 "EvalError",
26 "Probe",
27 "ProbeFormatError",
28 "ProbeOutput",
29 "RetentionSlice",
30 "RetentionSliceError",
31 "SourceProvenanceRecord",
32 "TrainingSummary",
33 "build_callback",
34 "build_retention_slice",
35 "eval_metrics_from_eval_pred",
36 "extract_probes",
37 "load_summary",
38 "perplexity",
39 "retention_delta",
40 "save_summary",
41 "summarize_eval_state",
42 "summary_path_for",
43 "was_early_stopped",
44 ]