Python · 979 bytes Raw Blame History
1 """Perplexity utility.
2
3 Perplexity of a held-out validation split is `exp(cross_entropy_loss)`.
4 Lower is better; a language model that assigns probability 1 to every
5 token has loss 0 and perplexity 1. On small documents the numbers are
6 noisy — the val set is rarely large enough for a stable PPL — but the
7 *trend* across eval steps is the signal.
8
9 Pulled out as its own module so `dlm metrics` can import it without
10 pulling in torch / transformers.
11 """
12
13 from __future__ import annotations
14
15 import math
16
17
18 def perplexity(loss: float) -> float:
19 """Return `exp(loss)`, or `math.inf` for non-finite / negative inputs.
20
21 A non-finite loss (NaN / inf) would cause `math.exp` to overflow or
22 return NaN; we surface `math.inf` so log / metric pipelines have a
23 sortable sentinel rather than a bad float.
24 """
25 if not math.isfinite(loss) or loss < 0.0:
26 return math.inf
27 try:
28 return math.exp(loss)
29 except OverflowError:
30 return math.inf