| 1 |
"""Perplexity utility. |
| 2 |
|
| 3 |
Perplexity of a held-out validation split is `exp(cross_entropy_loss)`. |
| 4 |
Lower is better; a language model that assigns probability 1 to every |
| 5 |
token has loss 0 and perplexity 1. On small documents the numbers are |
| 6 |
noisy — the val set is rarely large enough for a stable PPL — but the |
| 7 |
*trend* across eval steps is the signal. |
| 8 |
|
| 9 |
Pulled out as its own module so `dlm metrics` can import it without |
| 10 |
pulling in torch / transformers. |
| 11 |
""" |
| 12 |
|
| 13 |
from __future__ import annotations |
| 14 |
|
| 15 |
import math |
| 16 |
|
| 17 |
|
| 18 |
def perplexity(loss: float) -> float: |
| 19 |
"""Return `exp(loss)`, or `math.inf` for non-finite / negative inputs. |
| 20 |
|
| 21 |
A non-finite loss (NaN / inf) would cause `math.exp` to overflow or |
| 22 |
return NaN; we surface `math.inf` so log / metric pipelines have a |
| 23 |
sortable sentinel rather than a bad float. |
| 24 |
""" |
| 25 |
if not math.isfinite(loss) or loss < 0.0: |
| 26 |
return math.inf |
| 27 |
try: |
| 28 |
return math.exp(loss) |
| 29 |
except OverflowError: |
| 30 |
return math.inf |