Python · 4223 bytes Raw Blame History
1 """Audio waveform resampling.
2
3 Opt-in helper driven by `training.audio.auto_resample=True`. Off the
4 happy path the preprocessor + collator still refuse on SR mismatch
5 (v11 contract); `auto_resample=True` flips those hard errors to a
6 resample-on-decode call through `resample`.
7
8 Two backends, preferred in order:
9
10 1. **soxr** — libsoxr bindings (`soxr.resample`). High-quality
11 polyphase resampler, written in C, ~10× faster than scipy on
12 typical audio. Optional because it requires the libsoxr native
13 library (ships as a wheel on most platforms).
14 2. **scipy.signal.resample_poly** — pure-Python fallback using
15 scipy's polyphase implementation. Always available when scipy is
16 installed. Slightly lower quality than soxr but still high.
17
18 If neither is importable, `resample` raises `AudioResampleUnavailable`
19 with an actionable install hint. Callers should surface this at
20 plan-resolve time rather than letting a training loop crash mid-run.
21 """
22
23 from __future__ import annotations
24
25 from collections.abc import Callable
26 from typing import Any
27
28 import numpy as np
29
30 from dlm.data.errors import DataError
31
32 _Backend = Callable[..., np.ndarray]
33
34
35 class AudioResampleUnavailable(DataError): # noqa: N818 — mirrors DataError sibling naming
36 """Neither soxr nor scipy is importable for `training.audio.auto_resample=True`.
37
38 Suggests the two install paths: ``pip install soxr`` (preferred,
39 libsoxr native lib) or ``pip install scipy`` (pure-Python
40 fallback). Surfaces once at backend-probe time, never mid-batch.
41 """
42
43
44 def resample(waveform: np.ndarray, *, src_sr: int, dst_sr: int) -> np.ndarray:
45 """Return `waveform` resampled from `src_sr` → `dst_sr`.
46
47 Input and output are 1-D float32 mono arrays. If `src_sr == dst_sr`
48 returns the input unchanged (no copy). On rate change, routes
49 through soxr if importable, else scipy.signal.resample_poly. No
50 silent fallback beyond that — if neither backend is available the
51 call raises rather than returning the un-resampled waveform (that
52 would train on the wrong sample rate, a silent correctness bug).
53 """
54 if src_sr == dst_sr:
55 return waveform
56 if src_sr <= 0 or dst_sr <= 0:
57 raise ValueError(
58 f"resample: sample rates must be positive, got src_sr={src_sr} dst_sr={dst_sr}"
59 )
60
61 backend = _pick_backend()
62 return backend(waveform, src_sr=src_sr, dst_sr=dst_sr)
63
64
65 def _pick_backend() -> _Backend:
66 """Resolve the first importable resampler. Raises when none found.
67
68 Probes each backend's actual import path rather than returning a
69 wrapper that fails later — surfacing the missing-dep error at
70 backend-pick time keeps the failure near the user's config.
71 """
72 try:
73 import soxr # noqa: F401
74 except ImportError:
75 pass
76 else:
77 return _soxr_resample
78
79 try:
80 import scipy.signal # noqa: F401
81 except ImportError:
82 pass
83 else:
84 return _scipy_resample
85
86 raise AudioResampleUnavailable(
87 "training.audio.auto_resample=True requires either soxr or scipy; "
88 "install one of: `pip install soxr` (recommended) or "
89 "`pip install scipy`. Until then re-encode the audio files to "
90 "the base's pinned rate manually with `ffmpeg -i <in> -ar <sr> <out>`."
91 )
92
93
94 def _soxr_resample(waveform: np.ndarray, *, src_sr: int, dst_sr: int) -> np.ndarray:
95 """soxr backend. Highest quality + speed, requires libsoxr wheel."""
96 import soxr
97
98 out: Any = soxr.resample(waveform, src_sr, dst_sr, quality="HQ")
99 return np.ascontiguousarray(out, dtype=np.float32)
100
101
102 def _scipy_resample(waveform: np.ndarray, *, src_sr: int, dst_sr: int) -> np.ndarray:
103 """scipy.signal.resample_poly fallback.
104
105 Reduces (src_sr, dst_sr) to their coprime pair so the polyphase
106 filter uses the minimal up/down factors. scipy handles any
107 integer ratio; non-integer ratios reduce the same way.
108 """
109 from math import gcd
110
111 from scipy.signal import resample_poly
112
113 divisor = gcd(src_sr, dst_sr)
114 up = dst_sr // divisor
115 down = src_sr // divisor
116 out: Any = resample_poly(waveform, up=up, down=down)
117 return np.ascontiguousarray(out, dtype=np.float32)