test: add smoke, telemetry-gate, and io.text suites
- SHA
52f787783c654b405ab3da6b435a717b0a2042a4- Parents
-
d2d152a - Tree
ddc2dab
52f7877
52f787783c654b405ab3da6b435a717b0a2042a4d2d152a
ddc2dab| Status | File | + | - |
|---|---|---|---|
| A |
tests/__init__.py
|
0 | 0 |
| A |
tests/e2e/__init__.py
|
0 | 0 |
| A |
tests/integration/__init__.py
|
0 | 0 |
| A |
tests/test_smoke.py
|
65 | 0 |
| A |
tests/unit/__init__.py
|
0 | 0 |
| A |
tests/unit/test_cli_telemetry.py
|
60 | 0 |
| A |
tests/unit/test_io_text.py
|
99 | 0 |
tests/__init__.pyaddedtests/e2e/__init__.pyaddedtests/integration/__init__.pyaddedtests/test_smoke.pyadded@@ -0,0 +1,65 @@ | ||
| 1 | +"""Smoke tests: the package imports and the CLI boots.""" | |
| 2 | + | |
| 3 | +from __future__ import annotations | |
| 4 | + | |
| 5 | +import subprocess | |
| 6 | +import sys | |
| 7 | + | |
| 8 | +from typer.testing import CliRunner | |
| 9 | + | |
| 10 | +import dlm | |
| 11 | +from dlm.cli.app import app | |
| 12 | + | |
| 13 | + | |
| 14 | +def test_package_version_is_set() -> None: | |
| 15 | + assert dlm.__version__ | |
| 16 | + assert dlm.__version__ != "0.0.0+unknown", ( | |
| 17 | + "package must be installed via uv sync / pip install for version lookup" | |
| 18 | + ) | |
| 19 | + | |
| 20 | + | |
| 21 | +def test_cli_version_flag() -> None: | |
| 22 | + runner = CliRunner() | |
| 23 | + result = runner.invoke(app, ["--version"]) | |
| 24 | + assert result.exit_code == 0, result.output | |
| 25 | + assert result.output.strip() == f"dlm {dlm.__version__}" | |
| 26 | + | |
| 27 | + | |
| 28 | +def test_cli_help_lists_all_v1_subcommands() -> None: | |
| 29 | + runner = CliRunner() | |
| 30 | + result = runner.invoke(app, ["--help"]) | |
| 31 | + assert result.exit_code == 0 | |
| 32 | + expected = { | |
| 33 | + "init", | |
| 34 | + "train", | |
| 35 | + "prompt", | |
| 36 | + "export", | |
| 37 | + "pack", | |
| 38 | + "unpack", | |
| 39 | + "doctor", | |
| 40 | + "show", | |
| 41 | + "migrate", | |
| 42 | + } | |
| 43 | + for name in expected: | |
| 44 | + assert name in result.output, f"`dlm --help` missing subcommand {name!r}" | |
| 45 | + | |
| 46 | + | |
| 47 | +def test_cli_subcommand_stub_raises_notimplementederror() -> None: | |
| 48 | + runner = CliRunner() | |
| 49 | + # Typer surfaces the exception; invoke without catch_exceptions to capture. | |
| 50 | + result = runner.invoke(app, ["train", "nonexistent.dlm"], catch_exceptions=True) | |
| 51 | + assert result.exit_code != 0 | |
| 52 | + assert isinstance(result.exception, NotImplementedError) | |
| 53 | + assert "Sprint 09" in str(result.exception) | |
| 54 | + | |
| 55 | + | |
| 56 | +def test_python_module_entrypoint_runs() -> None: | |
| 57 | + """`python -m dlm --version` works (packaging sanity).""" | |
| 58 | + result = subprocess.run( | |
| 59 | + [sys.executable, "-m", "dlm", "--version"], | |
| 60 | + check=True, | |
| 61 | + capture_output=True, | |
| 62 | + text=True, | |
| 63 | + timeout=10, | |
| 64 | + ) | |
| 65 | + assert result.stdout.strip() == f"dlm {dlm.__version__}" | |
tests/unit/__init__.pyaddedtests/unit/test_cli_telemetry.pyadded@@ -0,0 +1,60 @@ | ||
| 1 | +"""Audit F13: the CLI entry point must set telemetry-off env vars before any | |
| 2 | +downstream imports. We test this by spawning a fresh subprocess (so we get a | |
| 3 | +clean env) and asserting the vars are set after `dlm --version` returns. | |
| 4 | +""" | |
| 5 | + | |
| 6 | +from __future__ import annotations | |
| 7 | + | |
| 8 | +import subprocess | |
| 9 | +import sys | |
| 10 | +import textwrap | |
| 11 | + | |
| 12 | + | |
| 13 | +def test_cli_entry_forces_telemetry_off_env_vars() -> None: | |
| 14 | + probe = textwrap.dedent( | |
| 15 | + """\ | |
| 16 | + import os | |
| 17 | + # Intentionally unset — we want to see if `import dlm.cli.app` sets them. | |
| 18 | + for v in ("HF_HUB_DISABLE_TELEMETRY", "DO_NOT_TRACK", | |
| 19 | + "TRANSFORMERS_NO_ADVISORY_WARNINGS"): | |
| 20 | + os.environ.pop(v, None) | |
| 21 | + import dlm.cli.app # noqa: F401 | |
| 22 | + assert os.environ["HF_HUB_DISABLE_TELEMETRY"] == "1" | |
| 23 | + assert os.environ["DO_NOT_TRACK"] == "1" | |
| 24 | + assert os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] == "1" | |
| 25 | + print("ok") | |
| 26 | + """ | |
| 27 | + ) | |
| 28 | + result = subprocess.run( | |
| 29 | + [sys.executable, "-c", probe], | |
| 30 | + check=False, | |
| 31 | + capture_output=True, | |
| 32 | + text=True, | |
| 33 | + timeout=10, | |
| 34 | + ) | |
| 35 | + assert result.returncode == 0, result.stderr | |
| 36 | + assert result.stdout.strip() == "ok" | |
| 37 | + | |
| 38 | + | |
| 39 | +def test_user_preset_telemetry_vars_are_respected() -> None: | |
| 40 | + """If a user has explicitly set one of these vars to "0", we must NOT | |
| 41 | + overwrite them — `setdefault` semantics. | |
| 42 | + """ | |
| 43 | + probe = textwrap.dedent( | |
| 44 | + """\ | |
| 45 | + import os | |
| 46 | + os.environ["DO_NOT_TRACK"] = "0" | |
| 47 | + import dlm.cli.app # noqa: F401 | |
| 48 | + assert os.environ["DO_NOT_TRACK"] == "0" | |
| 49 | + print("ok") | |
| 50 | + """ | |
| 51 | + ) | |
| 52 | + result = subprocess.run( | |
| 53 | + [sys.executable, "-c", probe], | |
| 54 | + check=False, | |
| 55 | + capture_output=True, | |
| 56 | + text=True, | |
| 57 | + timeout=10, | |
| 58 | + ) | |
| 59 | + assert result.returncode == 0, result.stderr | |
| 60 | + assert result.stdout.strip() == "ok" | |
tests/unit/test_io_text.pyadded@@ -0,0 +1,99 @@ | ||
| 1 | +"""Audit F15: UTF-8 strict, BOM strip, CRLF normalization.""" | |
| 2 | + | |
| 3 | +from __future__ import annotations | |
| 4 | + | |
| 5 | +import logging | |
| 6 | +from pathlib import Path | |
| 7 | + | |
| 8 | +import pytest | |
| 9 | + | |
| 10 | +from dlm.io.text import ( | |
| 11 | + DlmEncodingError, | |
| 12 | + normalize_for_hashing, | |
| 13 | + read_text, | |
| 14 | + read_text_str, | |
| 15 | + write_text, | |
| 16 | +) | |
| 17 | + | |
| 18 | + | |
| 19 | +class TestReadText: | |
| 20 | + def test_utf8_roundtrip(self, tmp_path: Path) -> None: | |
| 21 | + p = tmp_path / "sample.txt" | |
| 22 | + p.write_bytes(b"hello, world\n") | |
| 23 | + assert read_text(p) == "hello, world\n" | |
| 24 | + | |
| 25 | + def test_bom_is_stripped_and_warned( | |
| 26 | + self, tmp_path: Path, caplog: pytest.LogCaptureFixture | |
| 27 | + ) -> None: | |
| 28 | + p = tmp_path / "bom.txt" | |
| 29 | + p.write_bytes(b"\xef\xbb\xbfhello\n") | |
| 30 | + with caplog.at_level(logging.WARNING, logger="dlm.io.text"): | |
| 31 | + text = read_text(p) | |
| 32 | + assert text == "hello\n" | |
| 33 | + assert any("BOM" in rec.message for rec in caplog.records) | |
| 34 | + | |
| 35 | + def test_crlf_is_normalized(self, tmp_path: Path) -> None: | |
| 36 | + p = tmp_path / "windows.txt" | |
| 37 | + p.write_bytes(b"line1\r\nline2\r\n") | |
| 38 | + assert read_text(p) == "line1\nline2\n" | |
| 39 | + | |
| 40 | + def test_lone_cr_is_normalized(self, tmp_path: Path) -> None: | |
| 41 | + p = tmp_path / "classic_mac.txt" | |
| 42 | + p.write_bytes(b"line1\rline2\r") | |
| 43 | + assert read_text(p) == "line1\nline2\n" | |
| 44 | + | |
| 45 | + def test_mixed_endings_normalized(self, tmp_path: Path) -> None: | |
| 46 | + p = tmp_path / "mixed.txt" | |
| 47 | + p.write_bytes(b"a\r\nb\nc\r") | |
| 48 | + assert read_text(p) == "a\nb\nc\n" | |
| 49 | + | |
| 50 | + def test_invalid_utf8_raises_with_offset(self, tmp_path: Path) -> None: | |
| 51 | + p = tmp_path / "bad.txt" | |
| 52 | + # \xff is never valid as a start byte in UTF-8 | |
| 53 | + p.write_bytes(b"ok\xffbad") | |
| 54 | + with pytest.raises(DlmEncodingError) as exc_info: | |
| 55 | + read_text(p) | |
| 56 | + assert exc_info.value.byte_offset == 2 | |
| 57 | + assert exc_info.value.path == p | |
| 58 | + | |
| 59 | + | |
| 60 | +class TestReadTextStr: | |
| 61 | + def test_bytes_roundtrip(self) -> None: | |
| 62 | + assert read_text_str(b"hello\n") == "hello\n" | |
| 63 | + | |
| 64 | + def test_invalid_raises_with_source(self) -> None: | |
| 65 | + with pytest.raises(DlmEncodingError) as exc_info: | |
| 66 | + read_text_str(b"\xff", source="fixture:broken") | |
| 67 | + assert "fixture:broken" in str(exc_info.value) | |
| 68 | + | |
| 69 | + | |
| 70 | +class TestWriteText: | |
| 71 | + def test_writes_utf8_lf_no_bom(self, tmp_path: Path) -> None: | |
| 72 | + p = tmp_path / "out.txt" | |
| 73 | + write_text(p, "line1\r\nline2\r\n") | |
| 74 | + raw = p.read_bytes() | |
| 75 | + assert raw == b"line1\nline2\n" | |
| 76 | + | |
| 77 | + def test_write_is_atomic_leaves_no_tmp(self, tmp_path: Path) -> None: | |
| 78 | + p = tmp_path / "out.txt" | |
| 79 | + write_text(p, "content\n") | |
| 80 | + siblings = list(tmp_path.iterdir()) | |
| 81 | + assert siblings == [p], "write_text must clean up temp files" | |
| 82 | + | |
| 83 | + | |
| 84 | +class TestNormalizeForHashing: | |
| 85 | + def test_bom_stripped(self) -> None: | |
| 86 | + assert normalize_for_hashing("\ufeffhello") == "hello" | |
| 87 | + | |
| 88 | + def test_crlf_normalized(self) -> None: | |
| 89 | + assert normalize_for_hashing("a\r\nb\rc") == "a\nb\nc" | |
| 90 | + | |
| 91 | + def test_hash_identity_across_platforms(self) -> None: | |
| 92 | + """Windows-style CRLF and Unix-style LF with same content must hash-identically. | |
| 93 | + | |
| 94 | + This is the core Sprint 03 / audit F15 contract: section IDs are stable | |
| 95 | + under line-ending edits. | |
| 96 | + """ | |
| 97 | + unix = "header\n\nbody line 1\nbody line 2\n" | |
| 98 | + windows = "header\r\n\r\nbody line 1\r\nbody line 2\r\n" | |
| 99 | + assert normalize_for_hashing(unix) == normalize_for_hashing(windows) | |