`1c7561e`

Cover inference edge branches

Authored by mfwolffe <wolffemf@dukes.jmu.edu> 2 weeks ago

SHA: 1c7561eba8ff40bbde71f0be01f69cabf84f8d6a
Parents: 62c3366
Tree: ad20a71

5 changed files

Status	File	+
A	`tests/unit/inference/test_audio_generate.py`	137
M	`tests/unit/inference/test_backend_select.py`	35
M	`tests/unit/inference/test_gate.py`	36
M	`tests/unit/inference/test_mlx_adapter_conversion.py`	14
A	`tests/unit/inference/test_mlx_backend.py`	125

tests/unit/inference/test_audio_generate.pyadded

 +"""Audio inference helpers — prompt shaping, waveform loading, generation."""
++
 +from __future__ import annotations
++
 +import sys
 +from pathlib import Path
 +from types import ModuleType
++
 +import numpy as np
 +import pytest
 +import torch
++
 +from dlm.inference.audio_generate import format_audio_prompt, generate_audio, load_audios
++
++
 +class TestFormatAudioPrompt:
 +    def test_respects_user_placed_audio_token(self) -> None:
 +        prompt = "Please compare <audio> and explain."
 +        assert format_audio_prompt(prompt, audio_token="<audio>", num_audios=2) == prompt
++
 +    def test_prepends_one_token_per_audio(self) -> None:
 +        assert (
 +            format_audio_prompt("describe", audio_token="<audio>", num_audios=2)
 +            == "<audio><audio>\ndescribe"
 +        )
++
 +    def test_empty_prompt_emits_tokens_only(self) -> None:
 +        assert (
 +            format_audio_prompt("", audio_token="<audio>", num_audios=3) == "<audio><audio><audio>"
 +        )
++
++
 +class TestLoadAudios:
 +    def test_missing_file_raises(self, tmp_path: Path) -> None:
 +        with pytest.raises(FileNotFoundError, match="audio not found"):
 +            load_audios([tmp_path / "missing.wav"], target_sample_rate=16_000)
++
 +    def test_downmixes_stereo_to_mono(
 +        self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
 +    ) -> None:
 +        path = tmp_path / "stereo.wav"
 +        path.write_bytes(b"stub")
++
 +        fake_sf = ModuleType("soundfile")
 +        fake_sf.read = lambda _path, dtype, always_2d: (
 +            np.array([[1.0, 3.0], [5.0, 7.0]], dtype=np.float32),
 +            16_000,
 +        )
 +        monkeypatch.setitem(sys.modules, "soundfile", fake_sf)
++
 +        [waveform] = load_audios([path], target_sample_rate=16_000)
 +        assert waveform.dtype == np.float32
 +        assert waveform.tolist() == pytest.approx([2.0, 6.0])
++
 +    def test_sample_rate_mismatch_refused_without_auto_resample(
 +        self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
 +    ) -> None:
 +        path = tmp_path / "native.wav"
 +        path.write_bytes(b"stub")
++
 +        fake_sf = ModuleType("soundfile")
 +        fake_sf.read = lambda _path, dtype, always_2d: (np.array([1.0], dtype=np.float32), 22_050)
 +        monkeypatch.setitem(sys.modules, "soundfile", fake_sf)
++
 +        with pytest.raises(ValueError, match="does not match pinned 16000 Hz"):
 +            load_audios([path], target_sample_rate=16_000, auto_resample=False)
++
 +    def test_sample_rate_mismatch_resamples_when_enabled(
 +        self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
 +    ) -> None:
 +        path = tmp_path / "native.wav"
 +        path.write_bytes(b"stub")
++
 +        fake_sf = ModuleType("soundfile")
 +        fake_sf.read = lambda _path, dtype, always_2d: (
 +            np.array([1.0, 2.0], dtype=np.float32),
 +            22_050,
 +        )
 +        monkeypatch.setitem(sys.modules, "soundfile", fake_sf)
 +        monkeypatch.setattr(
 +            "dlm.data.audio_resample.resample",
 +            lambda mono, src_sr, dst_sr: np.array([9.0, 8.0], dtype=np.float32),
 +        )
++
 +        [waveform] = load_audios([path], target_sample_rate=16_000, auto_resample=True)
 +        assert waveform.tolist() == pytest.approx([9.0, 8.0])
++
++
 +class _Inputs(dict[str, torch.Tensor]):
 +    def to(self, device: object) -> _Inputs:
 +        return self
++
++
 +class TestGenerateAudio:
 +    def test_generate_audio_decodes_response_only_tokens(self) -> None:
 +        class _Tokenizer:
 +            pad_token_id = 99
++
 +            def decode(self, tokens: torch.Tensor, skip_special_tokens: bool = True) -> str:
 +                assert tokens.tolist() == [4, 5]
 +                return "transcript"
++
 +        class _Processor:
 +            def __init__(self) -> None:
 +                self.tokenizer = _Tokenizer()
++
 +            def __call__(
 +                self,
 +                *,
 +                audios: list[np.ndarray],
 +                text: str,
 +                sampling_rate: int,
 +                return_tensors: str,
 +            ) -> _Inputs:
 +                assert len(audios) == 1
 +                assert text == "<audio>\nwhat happened?"
 +                assert sampling_rate == 16_000
 +                return _Inputs({"input_ids": torch.tensor([[1, 2, 3]])})
++
 +        class _Model:
 +            device = torch.device("cpu")
++
 +            def generate(self, **kwargs: object) -> torch.Tensor:
 +                assert kwargs["pad_token_id"] == 99
 +                return torch.tensor([[1, 2, 3, 4, 5]])
++
 +        out = generate_audio(
 +            _Model(),
 +            _Processor(),
 +            "what happened?",
 +            [np.array([1.0], dtype=np.float32)],
 +            audio_token="<audio>",
 +            sample_rate=16_000,
 +            max_new_tokens=2,
 +            temperature=0.0,
 +        )
 +        assert out == "transcript"

tests/unit/inference/test_backend_select.pymodified

  from dlm.inference.backends.select import (
      UnsupportedBackendError,
      build_backend,
 +    is_apple_silicon,
      select_backend,
+ )
          backend = build_backend("pytorch", MagicMock())
          assert isinstance(backend, PyTorchBackend)
 +    def test_mlx_returns_mlx_backend(self) -> None:
 +        from dlm.inference.backends.mlx_backend import MlxBackend
++
 +        backend = build_backend("mlx", MagicMock())
 +        assert isinstance(backend, MlxBackend)
++
      def test_unknown_backend_raises(self) -> None:
          with pytest.raises(ValueError, match="unknown backend"):
              build_backend("haskell", MagicMock())  # type: ignore[arg-type]
          ):
              assert sel.mlx_available() is False
              m_find.assert_not_called()
++
 +    def test_mlx_available_checks_both_packages_on_apple_silicon(self) -> None:
 +        from dlm.inference.backends import select as sel
++
 +        with (
 +            patch.object(sel, "is_apple_silicon", return_value=True),
 +            patch.object(
 +                sel.importlib.util, "find_spec", side_effect=[object(), object()]
 +            ) as m_find,
 +        ):
 +            assert sel.mlx_available() is True
 +            assert m_find.call_count == 2
++
++
 +class TestPlatformHelper:
 +    def test_is_apple_silicon_true_only_for_darwin_arm64(self) -> None:
 +        with (
 +            patch("dlm.inference.backends.select.sys.platform", "darwin"),
 +            patch("dlm.inference.backends.select.platform.machine", return_value="arm64"),
 +        ):
 +            assert is_apple_silicon() is True
++
 +    def test_is_apple_silicon_false_for_other_hosts(self) -> None:
 +        with (
 +            patch("dlm.inference.backends.select.sys.platform", "linux"),
 +            patch("dlm.inference.backends.select.platform.machine", return_value="x86_64"),
 +        ):
 +            assert is_apple_silicon() is False

tests/unit/inference/test_gate.pymodified

      forward = __call__
 +class _NoMaskTokenizer(_StubTokenizer):
 +    def __call__(
 +        self,
 +        prompt: str,
 +        *,
 +        return_tensors: str = "pt",
 +        truncation: bool = True,
 +        max_length: int = 512,
 +    ) -> dict[str, object]:
 +        import torch
++
 +        ids = torch.randint(0, 100, (1, self._seq_len))
 +        return {"input_ids": ids}
++
++
 +class _NoParamBaseModel(_StubBaseModel):
 +    def parameters(self):  # type: ignore[no-untyped-def]
 +        return iter(())
++
++
  def _train_gate_on_store(
      tmp_path: Path,
      *,
          e2 = embed_prompt(prompt="hello world", tokenizer=tokenizer, base_model=model)
          assert not torch.allclose(e1, e2)
 +    def test_falls_back_to_cpu_when_model_has_no_parameters(self) -> None:
 +        embedding = embed_prompt(
 +            prompt="hello",
 +            tokenizer=_StubTokenizer(),
 +            base_model=_NoParamBaseModel(hidden_dim=8),
 +        )
 +        assert embedding.shape == (8,)
++
 +    def test_mean_pools_without_attention_mask(self) -> None:
 +        embedding = embed_prompt(
 +            prompt="hello",
 +            tokenizer=_NoMaskTokenizer(),
 +            base_model=_StubBaseModel(hidden_dim=8),
 +        )
 +        assert embedding.shape == (8,)
++
  class TestLoadGateHandle:
      def test_uniform_handle_from_cold_start(self, tmp_path: Path) -> None:

tests/unit/inference/test_mlx_adapter_conversion.pymodified

+         ]
          with pytest.raises(MlxConversionError, match="map to the same"):
              map_all_keys(collision)
++
++
 +class TestBuildMlxAdapterConfig:
 +    def test_non_positive_layer_count_rejected(self) -> None:
 +        from dlm.inference.mlx_adapter import build_mlx_adapter_config
++
 +        with pytest.raises(MlxConversionError, match="expected >=1"):
 +            build_mlx_adapter_config(
 +                {
 +                    "r": 8,
 +                    "target_modules": ["q_proj"],
 +                },
 +                0,
 +            )

tests/unit/inference/test_mlx_backend.pyadded

 +"""MLX backend helpers and lightweight backend-path coverage."""
++
 +from __future__ import annotations
++
 +import json
 +import sys
 +from pathlib import Path
 +from types import ModuleType, SimpleNamespace
++
 +import pytest
++
 +from dlm.base_models import BASE_MODELS
 +from dlm.inference.backends.mlx_backend import MlxBackend, _resolve_base_num_hidden_layers
 +from dlm.inference.errors import AdapterNotFoundError
 +from dlm.inference.mlx_adapter import MlxConversionError
++
++
 +class TestResolveBaseNumHiddenLayers:
 +    def test_prefers_transformers_auto_config(self, monkeypatch: pytest.MonkeyPatch) -> None:
 +        monkeypatch.setattr(
 +            "transformers.AutoConfig.from_pretrained",
 +            lambda hf_id, local_files_only=True: SimpleNamespace(num_hidden_layers=24),
 +        )
 +        assert _resolve_base_num_hidden_layers("org/demo") == 24
++
 +    def test_falls_back_to_cached_config_json(
 +        self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
 +    ) -> None:
 +        snapshot = tmp_path / "snapshots" / ("a" * 40)
 +        snapshot.mkdir(parents=True)
 +        (snapshot / "config.json").write_text(
 +            json.dumps({"num_hidden_layers": 18}), encoding="utf-8"
 +        )
 +        monkeypatch.setattr(
 +            "transformers.AutoConfig.from_pretrained",
 +            lambda hf_id, local_files_only=True: SimpleNamespace(num_hidden_layers=None),
 +        )
 +        monkeypatch.setattr("huggingface_hub.snapshot_download", lambda **kwargs: str(snapshot))
 +        assert _resolve_base_num_hidden_layers("org/demo") == 18
++
 +    def test_cache_lookup_errors_raise_conversion_error(
 +        self, monkeypatch: pytest.MonkeyPatch
 +    ) -> None:
 +        monkeypatch.setattr(
 +            "transformers.AutoConfig.from_pretrained",
 +            lambda hf_id, local_files_only=True: (_ for _ in ()).throw(RuntimeError("boom")),
 +        )
 +        monkeypatch.setattr(
 +            "huggingface_hub.snapshot_download",
 +            lambda **kwargs: (_ for _ in ()).throw(OSError("missing")),
 +        )
 +        with pytest.raises(MlxConversionError, match="could not resolve num_hidden_layers"):
 +            _resolve_base_num_hidden_layers("org/demo")
++
 +    def test_missing_num_hidden_layers_raises_conversion_error(
 +        self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
 +    ) -> None:
 +        snapshot = tmp_path / "snapshots" / ("a" * 40)
 +        snapshot.mkdir(parents=True)
 +        (snapshot / "config.json").write_text("{}", encoding="utf-8")
 +        monkeypatch.setattr(
 +            "transformers.AutoConfig.from_pretrained",
 +            lambda hf_id, local_files_only=True: SimpleNamespace(num_hidden_layers=None),
 +        )
 +        monkeypatch.setattr("huggingface_hub.snapshot_download", lambda **kwargs: str(snapshot))
 +        with pytest.raises(MlxConversionError, match="has no usable num_hidden_layers"):
 +            _resolve_base_num_hidden_layers("org/demo")
++
++
 +class TestMlxBackend:
 +    def test_generate_before_load_raises(self) -> None:
 +        backend = MlxBackend(SimpleNamespace())
 +        with pytest.raises(RuntimeError, match="before load"):
 +            backend.generate("hello")
++
 +    def test_load_missing_adapter_raises(
 +        self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
 +    ) -> None:
 +        backend = MlxBackend(SimpleNamespace())
 +        monkeypatch.setattr(
 +            "dlm.inference.loader.resolve_adapter_path",
 +            lambda store, adapter_name=None: tmp_path / "missing",
 +        )
 +        with pytest.raises(AdapterNotFoundError, match="does not exist"):
 +            backend.load(BASE_MODELS["smollm2-135m"], SimpleNamespace(root=tmp_path))
++
 +    def test_load_generate_and_unload_happy_path(
 +        self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
 +    ) -> None:
 +        adapter_dir = tmp_path / "adapter"
 +        adapter_dir.mkdir()
 +        staged_dir = tmp_path / "staged"
++
 +        backend = MlxBackend(SimpleNamespace())
 +        monkeypatch.setattr(
 +            "dlm.inference.loader.resolve_adapter_path",
 +            lambda store, adapter_name=None: adapter_dir,
 +        )
 +        monkeypatch.setattr(
 +            "dlm.inference.backends.mlx_backend.stage_mlx_adapter_dir",
 +            lambda peft_adapter_dir, dst_dir, *, base_hf_id: staged_dir,
 +        )
++
 +        fake_mlx = ModuleType("mlx_lm")
 +        fake_mlx.load = lambda hf_id, adapter_path: ("model", "tokenizer")
 +        fake_mlx.generate = lambda model, tokenizer, *, prompt, max_tokens, sampler, verbose: (
 +            "mlx output"
 +        )
 +        fake_sample_utils = ModuleType("mlx_lm.sample_utils")
 +        fake_sample_utils.make_sampler = lambda temp, top_p, top_k: {
 +            "temp": temp,
 +            "top_p": top_p,
 +            "top_k": top_k,
 +        }
 +        monkeypatch.setitem(sys.modules, "mlx_lm", fake_mlx)
 +        monkeypatch.setitem(sys.modules, "mlx_lm.sample_utils", fake_sample_utils)
++
 +        backend.load(BASE_MODELS["smollm2-135m"], SimpleNamespace(root=tmp_path))
 +        assert backend.generate(
 +            "prompt", max_new_tokens=4, temperature=0.5, top_p=0.9, top_k=12
 +        ) == ("mlx output")
 +        backend.unload()
 +        assert backend._workdir is None
 +        assert backend._model is None
 +        assert backend._tokenizer is None