| 1 |
"""Audio waveform resampling. |
| 2 |
|
| 3 |
Opt-in helper driven by `training.audio.auto_resample=True`. Off the |
| 4 |
happy path the preprocessor + collator still refuse on SR mismatch |
| 5 |
(v11 contract); `auto_resample=True` flips those hard errors to a |
| 6 |
resample-on-decode call through `resample`. |
| 7 |
|
| 8 |
Two backends, preferred in order: |
| 9 |
|
| 10 |
1. **soxr** — libsoxr bindings (`soxr.resample`). High-quality |
| 11 |
polyphase resampler, written in C, ~10× faster than scipy on |
| 12 |
typical audio. Optional because it requires the libsoxr native |
| 13 |
library (ships as a wheel on most platforms). |
| 14 |
2. **scipy.signal.resample_poly** — pure-Python fallback using |
| 15 |
scipy's polyphase implementation. Always available when scipy is |
| 16 |
installed. Slightly lower quality than soxr but still high. |
| 17 |
|
| 18 |
If neither is importable, `resample` raises `AudioResampleUnavailable` |
| 19 |
with an actionable install hint. Callers should surface this at |
| 20 |
plan-resolve time rather than letting a training loop crash mid-run. |
| 21 |
""" |
| 22 |
|
| 23 |
from __future__ import annotations |
| 24 |
|
| 25 |
from collections.abc import Callable |
| 26 |
from typing import Any |
| 27 |
|
| 28 |
import numpy as np |
| 29 |
|
| 30 |
from dlm.data.errors import DataError |
| 31 |
|
| 32 |
_Backend = Callable[..., np.ndarray] |
| 33 |
|
| 34 |
|
| 35 |
class AudioResampleUnavailable(DataError): # noqa: N818 — mirrors DataError sibling naming |
| 36 |
"""Neither soxr nor scipy is importable for `training.audio.auto_resample=True`. |
| 37 |
|
| 38 |
Suggests the two install paths: ``pip install soxr`` (preferred, |
| 39 |
libsoxr native lib) or ``pip install scipy`` (pure-Python |
| 40 |
fallback). Surfaces once at backend-probe time, never mid-batch. |
| 41 |
""" |
| 42 |
|
| 43 |
|
| 44 |
def resample(waveform: np.ndarray, *, src_sr: int, dst_sr: int) -> np.ndarray: |
| 45 |
"""Return `waveform` resampled from `src_sr` → `dst_sr`. |
| 46 |
|
| 47 |
Input and output are 1-D float32 mono arrays. If `src_sr == dst_sr` |
| 48 |
returns the input unchanged (no copy). On rate change, routes |
| 49 |
through soxr if importable, else scipy.signal.resample_poly. No |
| 50 |
silent fallback beyond that — if neither backend is available the |
| 51 |
call raises rather than returning the un-resampled waveform (that |
| 52 |
would train on the wrong sample rate, a silent correctness bug). |
| 53 |
""" |
| 54 |
if src_sr == dst_sr: |
| 55 |
return waveform |
| 56 |
if src_sr <= 0 or dst_sr <= 0: |
| 57 |
raise ValueError( |
| 58 |
f"resample: sample rates must be positive, got src_sr={src_sr} dst_sr={dst_sr}" |
| 59 |
) |
| 60 |
|
| 61 |
backend = _pick_backend() |
| 62 |
return backend(waveform, src_sr=src_sr, dst_sr=dst_sr) |
| 63 |
|
| 64 |
|
| 65 |
def _pick_backend() -> _Backend: |
| 66 |
"""Resolve the first importable resampler. Raises when none found. |
| 67 |
|
| 68 |
Probes each backend's actual import path rather than returning a |
| 69 |
wrapper that fails later — surfacing the missing-dep error at |
| 70 |
backend-pick time keeps the failure near the user's config. |
| 71 |
""" |
| 72 |
try: |
| 73 |
import soxr # noqa: F401 |
| 74 |
except ImportError: |
| 75 |
pass |
| 76 |
else: |
| 77 |
return _soxr_resample |
| 78 |
|
| 79 |
try: |
| 80 |
import scipy.signal # noqa: F401 |
| 81 |
except ImportError: |
| 82 |
pass |
| 83 |
else: |
| 84 |
return _scipy_resample |
| 85 |
|
| 86 |
raise AudioResampleUnavailable( |
| 87 |
"training.audio.auto_resample=True requires either soxr or scipy; " |
| 88 |
"install one of: `pip install soxr` (recommended) or " |
| 89 |
"`pip install scipy`. Until then re-encode the audio files to " |
| 90 |
"the base's pinned rate manually with `ffmpeg -i <in> -ar <sr> <out>`." |
| 91 |
) |
| 92 |
|
| 93 |
|
| 94 |
def _soxr_resample(waveform: np.ndarray, *, src_sr: int, dst_sr: int) -> np.ndarray: |
| 95 |
"""soxr backend. Highest quality + speed, requires libsoxr wheel.""" |
| 96 |
import soxr |
| 97 |
|
| 98 |
out: Any = soxr.resample(waveform, src_sr, dst_sr, quality="HQ") |
| 99 |
return np.ascontiguousarray(out, dtype=np.float32) |
| 100 |
|
| 101 |
|
| 102 |
def _scipy_resample(waveform: np.ndarray, *, src_sr: int, dst_sr: int) -> np.ndarray: |
| 103 |
"""scipy.signal.resample_poly fallback. |
| 104 |
|
| 105 |
Reduces (src_sr, dst_sr) to their coprime pair so the polyphase |
| 106 |
filter uses the minimal up/down factors. scipy handles any |
| 107 |
integer ratio; non-integer ratios reduce the same way. |
| 108 |
""" |
| 109 |
from math import gcd |
| 110 |
|
| 111 |
from scipy.signal import resample_poly |
| 112 |
|
| 113 |
divisor = gcd(src_sr, dst_sr) |
| 114 |
up = dst_sr // divisor |
| 115 |
down = src_sr // divisor |
| 116 |
out: Any = resample_poly(waveform, up=up, down=down) |
| 117 |
return np.ascontiguousarray(out, dtype=np.float32) |