test: add smoke, telemetry-gate, and io.text suites
- SHA
52f787783c654b405ab3da6b435a717b0a2042a4- Parents
-
d2d152a - Tree
ddc2dab
52f7877
52f787783c654b405ab3da6b435a717b0a2042a4d2d152a
ddc2dab| Status | File | + | - |
|---|---|---|---|
| A |
tests/__init__.py
|
0 | 0 |
| A |
tests/e2e/__init__.py
|
0 | 0 |
| A |
tests/integration/__init__.py
|
0 | 0 |
| A |
tests/test_smoke.py
|
65 | 0 |
| A |
tests/unit/__init__.py
|
0 | 0 |
| A |
tests/unit/test_cli_telemetry.py
|
60 | 0 |
| A |
tests/unit/test_io_text.py
|
99 | 0 |
tests/__init__.pyaddedtests/e2e/__init__.pyaddedtests/integration/__init__.pyaddedtests/test_smoke.pyadded@@ -0,0 +1,65 @@ | |||
| 1 | +"""Smoke tests: the package imports and the CLI boots.""" | ||
| 2 | + | ||
| 3 | +from __future__ import annotations | ||
| 4 | + | ||
| 5 | +import subprocess | ||
| 6 | +import sys | ||
| 7 | + | ||
| 8 | +from typer.testing import CliRunner | ||
| 9 | + | ||
| 10 | +import dlm | ||
| 11 | +from dlm.cli.app import app | ||
| 12 | + | ||
| 13 | + | ||
| 14 | +def test_package_version_is_set() -> None: | ||
| 15 | + assert dlm.__version__ | ||
| 16 | + assert dlm.__version__ != "0.0.0+unknown", ( | ||
| 17 | + "package must be installed via uv sync / pip install for version lookup" | ||
| 18 | + ) | ||
| 19 | + | ||
| 20 | + | ||
| 21 | +def test_cli_version_flag() -> None: | ||
| 22 | + runner = CliRunner() | ||
| 23 | + result = runner.invoke(app, ["--version"]) | ||
| 24 | + assert result.exit_code == 0, result.output | ||
| 25 | + assert result.output.strip() == f"dlm {dlm.__version__}" | ||
| 26 | + | ||
| 27 | + | ||
| 28 | +def test_cli_help_lists_all_v1_subcommands() -> None: | ||
| 29 | + runner = CliRunner() | ||
| 30 | + result = runner.invoke(app, ["--help"]) | ||
| 31 | + assert result.exit_code == 0 | ||
| 32 | + expected = { | ||
| 33 | + "init", | ||
| 34 | + "train", | ||
| 35 | + "prompt", | ||
| 36 | + "export", | ||
| 37 | + "pack", | ||
| 38 | + "unpack", | ||
| 39 | + "doctor", | ||
| 40 | + "show", | ||
| 41 | + "migrate", | ||
| 42 | + } | ||
| 43 | + for name in expected: | ||
| 44 | + assert name in result.output, f"`dlm --help` missing subcommand {name!r}" | ||
| 45 | + | ||
| 46 | + | ||
| 47 | +def test_cli_subcommand_stub_raises_notimplementederror() -> None: | ||
| 48 | + runner = CliRunner() | ||
| 49 | + # Typer surfaces the exception; invoke without catch_exceptions to capture. | ||
| 50 | + result = runner.invoke(app, ["train", "nonexistent.dlm"], catch_exceptions=True) | ||
| 51 | + assert result.exit_code != 0 | ||
| 52 | + assert isinstance(result.exception, NotImplementedError) | ||
| 53 | + assert "Sprint 09" in str(result.exception) | ||
| 54 | + | ||
| 55 | + | ||
| 56 | +def test_python_module_entrypoint_runs() -> None: | ||
| 57 | + """`python -m dlm --version` works (packaging sanity).""" | ||
| 58 | + result = subprocess.run( | ||
| 59 | + [sys.executable, "-m", "dlm", "--version"], | ||
| 60 | + check=True, | ||
| 61 | + capture_output=True, | ||
| 62 | + text=True, | ||
| 63 | + timeout=10, | ||
| 64 | + ) | ||
| 65 | + assert result.stdout.strip() == f"dlm {dlm.__version__}" | ||
tests/unit/__init__.pyaddedtests/unit/test_cli_telemetry.pyadded@@ -0,0 +1,60 @@ | |||
| 1 | +"""Audit F13: the CLI entry point must set telemetry-off env vars before any | ||
| 2 | +downstream imports. We test this by spawning a fresh subprocess (so we get a | ||
| 3 | +clean env) and asserting the vars are set after `dlm --version` returns. | ||
| 4 | +""" | ||
| 5 | + | ||
| 6 | +from __future__ import annotations | ||
| 7 | + | ||
| 8 | +import subprocess | ||
| 9 | +import sys | ||
| 10 | +import textwrap | ||
| 11 | + | ||
| 12 | + | ||
| 13 | +def test_cli_entry_forces_telemetry_off_env_vars() -> None: | ||
| 14 | + probe = textwrap.dedent( | ||
| 15 | + """\ | ||
| 16 | + import os | ||
| 17 | + # Intentionally unset — we want to see if `import dlm.cli.app` sets them. | ||
| 18 | + for v in ("HF_HUB_DISABLE_TELEMETRY", "DO_NOT_TRACK", | ||
| 19 | + "TRANSFORMERS_NO_ADVISORY_WARNINGS"): | ||
| 20 | + os.environ.pop(v, None) | ||
| 21 | + import dlm.cli.app # noqa: F401 | ||
| 22 | + assert os.environ["HF_HUB_DISABLE_TELEMETRY"] == "1" | ||
| 23 | + assert os.environ["DO_NOT_TRACK"] == "1" | ||
| 24 | + assert os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] == "1" | ||
| 25 | + print("ok") | ||
| 26 | + """ | ||
| 27 | + ) | ||
| 28 | + result = subprocess.run( | ||
| 29 | + [sys.executable, "-c", probe], | ||
| 30 | + check=False, | ||
| 31 | + capture_output=True, | ||
| 32 | + text=True, | ||
| 33 | + timeout=10, | ||
| 34 | + ) | ||
| 35 | + assert result.returncode == 0, result.stderr | ||
| 36 | + assert result.stdout.strip() == "ok" | ||
| 37 | + | ||
| 38 | + | ||
| 39 | +def test_user_preset_telemetry_vars_are_respected() -> None: | ||
| 40 | + """If a user has explicitly set one of these vars to "0", we must NOT | ||
| 41 | + overwrite them — `setdefault` semantics. | ||
| 42 | + """ | ||
| 43 | + probe = textwrap.dedent( | ||
| 44 | + """\ | ||
| 45 | + import os | ||
| 46 | + os.environ["DO_NOT_TRACK"] = "0" | ||
| 47 | + import dlm.cli.app # noqa: F401 | ||
| 48 | + assert os.environ["DO_NOT_TRACK"] == "0" | ||
| 49 | + print("ok") | ||
| 50 | + """ | ||
| 51 | + ) | ||
| 52 | + result = subprocess.run( | ||
| 53 | + [sys.executable, "-c", probe], | ||
| 54 | + check=False, | ||
| 55 | + capture_output=True, | ||
| 56 | + text=True, | ||
| 57 | + timeout=10, | ||
| 58 | + ) | ||
| 59 | + assert result.returncode == 0, result.stderr | ||
| 60 | + assert result.stdout.strip() == "ok" | ||
tests/unit/test_io_text.pyadded@@ -0,0 +1,99 @@ | |||
| 1 | +"""Audit F15: UTF-8 strict, BOM strip, CRLF normalization.""" | ||
| 2 | + | ||
| 3 | +from __future__ import annotations | ||
| 4 | + | ||
| 5 | +import logging | ||
| 6 | +from pathlib import Path | ||
| 7 | + | ||
| 8 | +import pytest | ||
| 9 | + | ||
| 10 | +from dlm.io.text import ( | ||
| 11 | + DlmEncodingError, | ||
| 12 | + normalize_for_hashing, | ||
| 13 | + read_text, | ||
| 14 | + read_text_str, | ||
| 15 | + write_text, | ||
| 16 | +) | ||
| 17 | + | ||
| 18 | + | ||
| 19 | +class TestReadText: | ||
| 20 | + def test_utf8_roundtrip(self, tmp_path: Path) -> None: | ||
| 21 | + p = tmp_path / "sample.txt" | ||
| 22 | + p.write_bytes(b"hello, world\n") | ||
| 23 | + assert read_text(p) == "hello, world\n" | ||
| 24 | + | ||
| 25 | + def test_bom_is_stripped_and_warned( | ||
| 26 | + self, tmp_path: Path, caplog: pytest.LogCaptureFixture | ||
| 27 | + ) -> None: | ||
| 28 | + p = tmp_path / "bom.txt" | ||
| 29 | + p.write_bytes(b"\xef\xbb\xbfhello\n") | ||
| 30 | + with caplog.at_level(logging.WARNING, logger="dlm.io.text"): | ||
| 31 | + text = read_text(p) | ||
| 32 | + assert text == "hello\n" | ||
| 33 | + assert any("BOM" in rec.message for rec in caplog.records) | ||
| 34 | + | ||
| 35 | + def test_crlf_is_normalized(self, tmp_path: Path) -> None: | ||
| 36 | + p = tmp_path / "windows.txt" | ||
| 37 | + p.write_bytes(b"line1\r\nline2\r\n") | ||
| 38 | + assert read_text(p) == "line1\nline2\n" | ||
| 39 | + | ||
| 40 | + def test_lone_cr_is_normalized(self, tmp_path: Path) -> None: | ||
| 41 | + p = tmp_path / "classic_mac.txt" | ||
| 42 | + p.write_bytes(b"line1\rline2\r") | ||
| 43 | + assert read_text(p) == "line1\nline2\n" | ||
| 44 | + | ||
| 45 | + def test_mixed_endings_normalized(self, tmp_path: Path) -> None: | ||
| 46 | + p = tmp_path / "mixed.txt" | ||
| 47 | + p.write_bytes(b"a\r\nb\nc\r") | ||
| 48 | + assert read_text(p) == "a\nb\nc\n" | ||
| 49 | + | ||
| 50 | + def test_invalid_utf8_raises_with_offset(self, tmp_path: Path) -> None: | ||
| 51 | + p = tmp_path / "bad.txt" | ||
| 52 | + # \xff is never valid as a start byte in UTF-8 | ||
| 53 | + p.write_bytes(b"ok\xffbad") | ||
| 54 | + with pytest.raises(DlmEncodingError) as exc_info: | ||
| 55 | + read_text(p) | ||
| 56 | + assert exc_info.value.byte_offset == 2 | ||
| 57 | + assert exc_info.value.path == p | ||
| 58 | + | ||
| 59 | + | ||
| 60 | +class TestReadTextStr: | ||
| 61 | + def test_bytes_roundtrip(self) -> None: | ||
| 62 | + assert read_text_str(b"hello\n") == "hello\n" | ||
| 63 | + | ||
| 64 | + def test_invalid_raises_with_source(self) -> None: | ||
| 65 | + with pytest.raises(DlmEncodingError) as exc_info: | ||
| 66 | + read_text_str(b"\xff", source="fixture:broken") | ||
| 67 | + assert "fixture:broken" in str(exc_info.value) | ||
| 68 | + | ||
| 69 | + | ||
| 70 | +class TestWriteText: | ||
| 71 | + def test_writes_utf8_lf_no_bom(self, tmp_path: Path) -> None: | ||
| 72 | + p = tmp_path / "out.txt" | ||
| 73 | + write_text(p, "line1\r\nline2\r\n") | ||
| 74 | + raw = p.read_bytes() | ||
| 75 | + assert raw == b"line1\nline2\n" | ||
| 76 | + | ||
| 77 | + def test_write_is_atomic_leaves_no_tmp(self, tmp_path: Path) -> None: | ||
| 78 | + p = tmp_path / "out.txt" | ||
| 79 | + write_text(p, "content\n") | ||
| 80 | + siblings = list(tmp_path.iterdir()) | ||
| 81 | + assert siblings == [p], "write_text must clean up temp files" | ||
| 82 | + | ||
| 83 | + | ||
| 84 | +class TestNormalizeForHashing: | ||
| 85 | + def test_bom_stripped(self) -> None: | ||
| 86 | + assert normalize_for_hashing("\ufeffhello") == "hello" | ||
| 87 | + | ||
| 88 | + def test_crlf_normalized(self) -> None: | ||
| 89 | + assert normalize_for_hashing("a\r\nb\rc") == "a\nb\nc" | ||
| 90 | + | ||
| 91 | + def test_hash_identity_across_platforms(self) -> None: | ||
| 92 | + """Windows-style CRLF and Unix-style LF with same content must hash-identically. | ||
| 93 | + | ||
| 94 | + This is the core Sprint 03 / audit F15 contract: section IDs are stable | ||
| 95 | + under line-ending edits. | ||
| 96 | + """ | ||
| 97 | + unix = "header\n\nbody line 1\nbody line 2\n" | ||
| 98 | + windows = "header\r\n\r\nbody line 1\r\nbody line 2\r\n" | ||
| 99 | + assert normalize_for_hashing(unix) == normalize_for_hashing(windows) | ||