documentlanguagemodel Public

Watch 0 Fork 0 Star 0

Python · 4223 bytes Raw Blame History

  
        1
        """Audio waveform resampling.
      
        2
        
        3
        Opt-in helper driven by `training.audio.auto_resample=True`. Off the
      
        4
        happy path the preprocessor + collator still refuse on SR mismatch
      
        5
        (v11 contract); `auto_resample=True` flips those hard errors to a
      
        6
        resample-on-decode call through `resample`.
      
        7
        
        8
        Two backends, preferred in order:
      
        9
        
        10
        1. **soxr** — libsoxr bindings (`soxr.resample`). High-quality
      
        11
           polyphase resampler, written in C, ~10× faster than scipy on
      
        12
           typical audio. Optional because it requires the libsoxr native
      
        13
           library (ships as a wheel on most platforms).
      
        14
        2. **scipy.signal.resample_poly** — pure-Python fallback using
      
        15
           scipy's polyphase implementation. Always available when scipy is
      
        16
           installed. Slightly lower quality than soxr but still high.
      
        17
        
        18
        If neither is importable, `resample` raises `AudioResampleUnavailable`
      
        19
        with an actionable install hint. Callers should surface this at
      
        20
        plan-resolve time rather than letting a training loop crash mid-run.
      
        21
        """
      
        22
        
        23
        from __future__ import annotations
      
        24
        
        25
        from collections.abc import Callable
      
        26
        from typing import Any
      
        27
        
        28
        import numpy as np
      
        29
        
        30
        from dlm.data.errors import DataError
      
        31
        
        32
        _Backend = Callable[..., np.ndarray]
      
        33
        
        34
        
        35
        class AudioResampleUnavailable(DataError):  # noqa: N818 — mirrors DataError sibling naming
      
        36
            """Neither soxr nor scipy is importable for `training.audio.auto_resample=True`.
      
        37
        
        38
            Suggests the two install paths: ``pip install soxr`` (preferred,
      
        39
            libsoxr native lib) or ``pip install scipy`` (pure-Python
      
        40
            fallback). Surfaces once at backend-probe time, never mid-batch.
      
        41
            """
      
        42
        
        43
        
        44
        def resample(waveform: np.ndarray, *, src_sr: int, dst_sr: int) -> np.ndarray:
      
        45
            """Return `waveform` resampled from `src_sr` → `dst_sr`.
      
        46
        
        47
            Input and output are 1-D float32 mono arrays. If `src_sr == dst_sr`
      
        48
            returns the input unchanged (no copy). On rate change, routes
      
        49
            through soxr if importable, else scipy.signal.resample_poly. No
      
        50
            silent fallback beyond that — if neither backend is available the
      
        51
            call raises rather than returning the un-resampled waveform (that
      
        52
            would train on the wrong sample rate, a silent correctness bug).
      
        53
            """
      
        54
            if src_sr == dst_sr:
      
        55
                return waveform
      
        56
            if src_sr <= 0 or dst_sr <= 0:
      
        57
                raise ValueError(
      
        58
                    f"resample: sample rates must be positive, got src_sr={src_sr} dst_sr={dst_sr}"
      
        59
                )
      
        60
        
        61
            backend = _pick_backend()
      
        62
            return backend(waveform, src_sr=src_sr, dst_sr=dst_sr)
      
        63
        
        64
        
        65
        def _pick_backend() -> _Backend:
      
        66
            """Resolve the first importable resampler. Raises when none found.
      
        67
        
        68
            Probes each backend's actual import path rather than returning a
      
        69
            wrapper that fails later — surfacing the missing-dep error at
      
        70
            backend-pick time keeps the failure near the user's config.
      
        71
            """
      
        72
            try:
      
        73
                import soxr  # noqa: F401
      
        74
            except ImportError:
      
        75
                pass
      
        76
            else:
      
        77
                return _soxr_resample
      
        78
        
        79
            try:
      
        80
                import scipy.signal  # noqa: F401
      
        81
            except ImportError:
      
        82
                pass
      
        83
            else:
      
        84
                return _scipy_resample
      
        85
        
        86
            raise AudioResampleUnavailable(
      
        87
                "training.audio.auto_resample=True requires either soxr or scipy; "
      
        88
                "install one of: `pip install soxr` (recommended) or "
      
        89
                "`pip install scipy`. Until then re-encode the audio files to "
      
        90
                "the base's pinned rate manually with `ffmpeg -i <in> -ar <sr> <out>`."
      
        91
            )
      
        92
        
        93
        
        94
        def _soxr_resample(waveform: np.ndarray, *, src_sr: int, dst_sr: int) -> np.ndarray:
      
        95
            """soxr backend. Highest quality + speed, requires libsoxr wheel."""
      
        96
            import soxr
      
        97
        
        98
            out: Any = soxr.resample(waveform, src_sr, dst_sr, quality="HQ")
      
        99
            return np.ascontiguousarray(out, dtype=np.float32)
      
        100
        
        101
        
        102
        def _scipy_resample(waveform: np.ndarray, *, src_sr: int, dst_sr: int) -> np.ndarray:
      
        103
            """scipy.signal.resample_poly fallback.
      
        104
        
        105
            Reduces (src_sr, dst_sr) to their coprime pair so the polyphase
      
        106
            filter uses the minimal up/down factors. scipy handles any
      
        107
            integer ratio; non-integer ratios reduce the same way.
      
        108
            """
      
        109
            from math import gcd
      
        110
        
        111
            from scipy.signal import resample_poly
      
        112
        
        113
            divisor = gcd(src_sr, dst_sr)
      
        114
            up = dst_sr // divisor
      
        115
            down = src_sr // divisor
      
        116
            out: Any = resample_poly(waveform, up=up, down=down)
      
        117
            return np.ascontiguousarray(out, dtype=np.float32)

1	"""Audio waveform resampling.
2
3	Opt-in helper driven by `training.audio.auto_resample=True`. Off the
4	happy path the preprocessor + collator still refuse on SR mismatch
5	(v11 contract); `auto_resample=True` flips those hard errors to a
6	resample-on-decode call through `resample`.
7
8	Two backends, preferred in order:
9
10	1. soxr — libsoxr bindings (`soxr.resample`). High-quality
11	polyphase resampler, written in C, ~10× faster than scipy on
12	typical audio. Optional because it requires the libsoxr native
13	library (ships as a wheel on most platforms).
14	2. scipy.signal.resample_poly — pure-Python fallback using
15	scipy's polyphase implementation. Always available when scipy is
16	installed. Slightly lower quality than soxr but still high.
17
18	If neither is importable, `resample` raises `AudioResampleUnavailable`
19	with an actionable install hint. Callers should surface this at
20	plan-resolve time rather than letting a training loop crash mid-run.
21	"""
22
23	from __future__ import annotations
24
25	from collections.abc import Callable
26	from typing import Any
27
28	import numpy as np
29
30	from dlm.data.errors import DataError
31
32	_Backend = Callable[..., np.ndarray]
33
34
35	class AudioResampleUnavailable(DataError): # noqa: N818 — mirrors DataError sibling naming
36	"""Neither soxr nor scipy is importable for `training.audio.auto_resample=True`.
37
38	Suggests the two install paths: ``pip install soxr`` (preferred,
39	libsoxr native lib) or ``pip install scipy`` (pure-Python
40	fallback). Surfaces once at backend-probe time, never mid-batch.
41	"""
42
43
44	def resample(waveform: np.ndarray, *, src_sr: int, dst_sr: int) -> np.ndarray:
45	"""Return `waveform` resampled from `src_sr` → `dst_sr`.
46
47	Input and output are 1-D float32 mono arrays. If `src_sr == dst_sr`
48	returns the input unchanged (no copy). On rate change, routes
49	through soxr if importable, else scipy.signal.resample_poly. No
50	silent fallback beyond that — if neither backend is available the
51	call raises rather than returning the un-resampled waveform (that
52	would train on the wrong sample rate, a silent correctness bug).
53	"""
54	if src_sr == dst_sr:
55	return waveform
56	if src_sr <= 0 or dst_sr <= 0:
57	raise ValueError(
58	f"resample: sample rates must be positive, got src_sr={src_sr} dst_sr={dst_sr}"
59	)
60
61	backend = _pick_backend()
62	return backend(waveform, src_sr=src_sr, dst_sr=dst_sr)
63
64
65	def _pick_backend() -> _Backend:
66	"""Resolve the first importable resampler. Raises when none found.
67
68	Probes each backend's actual import path rather than returning a
69	wrapper that fails later — surfacing the missing-dep error at
70	backend-pick time keeps the failure near the user's config.
71	"""
72	try:
73	import soxr # noqa: F401
74	except ImportError:
75	pass
76	else:
77	return _soxr_resample
78
79	try:
80	import scipy.signal # noqa: F401
81	except ImportError:
82	pass
83	else:
84	return _scipy_resample
85
86	raise AudioResampleUnavailable(
87	"training.audio.auto_resample=True requires either soxr or scipy; "
88	"install one of: `pip install soxr` (recommended) or "
89	"`pip install scipy`. Until then re-encode the audio files to "
90	"the base's pinned rate manually with `ffmpeg -i <in> -ar <sr> <out>`."
91	)
92
93
94	def _soxr_resample(waveform: np.ndarray, *, src_sr: int, dst_sr: int) -> np.ndarray:
95	"""soxr backend. Highest quality + speed, requires libsoxr wheel."""
96	import soxr
97
98	out: Any = soxr.resample(waveform, src_sr, dst_sr, quality="HQ")
99	return np.ascontiguousarray(out, dtype=np.float32)
100
101
102	def _scipy_resample(waveform: np.ndarray, *, src_sr: int, dst_sr: int) -> np.ndarray:
103	"""scipy.signal.resample_poly fallback.
104
105	Reduces (src_sr, dst_sr) to their coprime pair so the polyphase
106	filter uses the minimal up/down factors. scipy handles any
107	integer ratio; non-integer ratios reduce the same way.
108	"""
109	from math import gcd
110
111	from scipy.signal import resample_poly
112
113	divisor = gcd(src_sr, dst_sr)
114	up = dst_sr // divisor
115	down = src_sr // divisor
116	out: Any = resample_poly(waveform, up=up, down=down)
117	return np.ascontiguousarray(out, dtype=np.float32)