tenseleyflow/sway / 98ad941

Browse files

rename CLI + source references to sway; keep dlm-sway as the PyPI wheel name

Authored by espadonne
SHA
98ad9417c94e1bbeb97cf5e553878d7953513f69
Parents
3890e1e
Tree
02c7f68

13 changed files

StatusFile+-
A .gitignore 28 0
M README.md 28 16
M pyproject.toml 4 3
M src/dlm_sway/__init__.py 5 1
M src/dlm_sway/backends/hf.py 4 3
M src/dlm_sway/cli/__init__.py 1 1
M src/dlm_sway/cli/app.py 4 4
M src/dlm_sway/cli/commands.py 5 5
M src/dlm_sway/core/errors.py 2 2
M src/dlm_sway/suite/report.py 4 4
M src/dlm_sway/suite/spec.py 2 2
M tests/unit/test_cli.py 4 4
M tests/unit/test_suite_score_report.py 2 2
.gitignoreadded
@@ -0,0 +1,28 @@
1
+# Python
2
+__pycache__/
3
+*.py[cod]
4
+*$py.class
5
+*.so
6
+
7
+# Virtual envs
8
+.venv/
9
+venv/
10
+.env
11
+
12
+# Tooling caches
13
+.mypy_cache/
14
+.pytest_cache/
15
+.ruff_cache/
16
+.coverage
17
+htmlcov/
18
+
19
+# Build artifacts
20
+build/
21
+dist/
22
+*.egg-info/
23
+
24
+# Editor
25
+.idea/
26
+.vscode/
27
+*.swp
28
+.DS_Store
README.mdmodified
@@ -1,4 +1,4 @@
1
-# dlm-sway
1
+# sway
22
 
33
 Differential testing for fine-tuned causal language models.
44
 
@@ -6,10 +6,16 @@ Differential testing for fine-tuned causal language models.
66
 in a meaningful way, or is the model just defaulting to the pretrained
77
 base?*
88
 
9
-`dlm-sway` gives you a trustworthy, reproducible answer with eleven
9
+`sway` gives you a trustworthy, reproducible answer with eleven
1010
 purpose-built primitives, each z-scored against a null-adapter baseline.
1111
 No LLM judges. No external APIs. Deterministic on CPU where possible.
1212
 
13
+> **Naming note.** The source repo and CLI entry point are both `sway`.
14
+> The PyPI wheel is named `dlm-sway` because `sway` is already taken on
15
+> PyPI by an unrelated project. `pip install dlm-sway` installs the
16
+> `sway` command — mismatched names are a PyPA convention (see
17
+> `pyyaml` → `import yaml`).
18
+
1319
 ## Install
1420
 
1521
 ```bash
@@ -22,7 +28,7 @@ pip install "dlm-sway[dlm]" # auto-generate tests from a .dlm file
2228
 ## 90-second smoke test
2329
 
2430
 ```bash
25
-dlm-sway check path/to/adapter --base HuggingFaceTB/SmolLM2-135M-Instruct
31
+sway check path/to/adapter --base HuggingFaceTB/SmolLM2-135M-Instruct
2632
 ```
2733
 
2834
 Outputs a verdict in under a minute on CPU for small models: *your
@@ -39,17 +45,18 @@ models:
3945
   ft:   {kind: hf, base: "HuggingFaceTB/SmolLM2-135M-Instruct",
4046
          adapter: "./runs/adapter/v0003"}
4147
 suite:
42
-  - {name: knows_concept, kind: dir,
43
-     prompt: "The Dunning-Kruger effect describes",
44
-     target: " a cognitive bias where",
45
-     distractor: " a programming language"}
46
-  - {name: no_reversion, kind: adapter_revert, paraphrases: 4}
48
+  - {name: null_baseline,       kind: null_adapter, runs: 3}
49
+  - {name: doc_divergence,      kind: delta_kl,
50
+     prompts: ["The key insight is", "An important rule"]}
4751
   - {name: section_attribution, kind: section_internalization}
52
+  - {name: no_leakage,          kind: leakage}
53
+  - {name: ablation_shape,      kind: adapter_ablation,
54
+     prompts: ["Tell me more about"]}
4855
 ```
4956
 
5057
 ```bash
51
-dlm-sway run sway.yaml              # full report to terminal + JSON
52
-dlm-sway gate sway.yaml --junit     # CI-friendly; non-zero on fail
58
+sway run sway.yaml              # full report to terminal + JSON
59
+sway gate sway.yaml --junit     # CI-friendly; non-zero on fail
5360
 ```
5461
 
5562
 ## Why it exists
@@ -60,8 +67,7 @@ user-authored document. The right question is *"did the adapter actually
6067
 move the model toward what I wrote?"* — and existing tools answer this
6168
 poorly.
6269
 
63
-`dlm-sway` answers it directly via eleven primitives across four
64
-categories:
70
+`sway` answers it directly via eleven primitives across four categories:
6571
 
6672
 | Category      | Primitives                                            |
6773
 |---------------|-------------------------------------------------------|
@@ -77,16 +83,22 @@ response. A degenerate one shows a step function or an overshoot-then-
7783
 crash. Nobody else does this because nobody else gets this close to the
7884
 adapter math.
7985
 
86
+**The calibration.** Every numeric probe z-scores its raw metric against
87
+a null-adapter baseline — a same-structure LoRA with random-init weights.
88
+"Your adapter's KL is 4.2σ above noise" is a far stronger claim than a
89
+fixed threshold. The null-adapter calibration requires a backend that
90
+implements `NullCalibratedBackend` (the HF backend does).
91
+
8092
 ## The `.dlm` integration
8193
 
8294
 If you trained your adapter via the [DocumentLanguageModel
83
-project](https://github.com/tenseleyFlow/DocumentLanguageModel), sway
84
-can auto-generate a test suite from your document's sections:
95
+project](https://github.com/tenseleyFlow/DocumentLanguageModel), `sway`
96
+auto-generates a test suite from your document's sections:
8597
 
8698
 ```bash
8799
 pip install "dlm-sway[hf,dlm]"
88
-dlm-sway autogen path/to/doc.dlm -o sway.yaml
89
-dlm-sway run sway.yaml
100
+sway autogen path/to/doc.dlm -o sway.yaml
101
+sway run sway.yaml
90102
 ```
91103
 
92104
 Per-section attribution tells you *which* parts of your document
pyproject.tomlmodified
@@ -87,11 +87,12 @@ all = [
8787
 ]
8888
 
8989
 [project.scripts]
90
-dlm-sway = "dlm_sway.cli.app:main"
90
+sway = "dlm_sway.cli.app:main"
9191
 
9292
 [project.urls]
93
-Homepage = "https://github.com/tenseleyFlow/DocumentLanguageModel"
94
-Issues = "https://github.com/tenseleyFlow/DocumentLanguageModel/issues"
93
+Homepage = "https://github.com/tenseleyFlow/sway"
94
+Issues = "https://github.com/tenseleyFlow/sway/issues"
95
+"Related project" = "https://github.com/tenseleyFlow/DocumentLanguageModel"
9596
 
9697
 [dependency-groups]
9798
 dev = [
src/dlm_sway/__init__.pymodified
@@ -1,4 +1,8 @@
1
-"""dlm-sway — differential testing for fine-tuned causal language models."""
1
+"""sway — differential testing for fine-tuned causal language models.
2
+
3
+Published on PyPI as ``dlm-sway`` (the short name is taken); the CLI
4
+entry point and source repo are ``sway``.
5
+"""
26
 
37
 from __future__ import annotations
48
 
src/dlm_sway/backends/hf.pymodified
@@ -256,9 +256,10 @@ class HuggingFaceDifferentialBackend:
256256
     def as_base(self) -> Iterator[_HFView]:
257257
         self._enter("base")
258258
         try:
259
-            # peft.PeftModel.disable_adapter is a context manager; mypy
260
-            # mis-reads it as a Tensor on this transformers version.
261
-            with self._peft_model.disable_adapter():  # type: ignore[operator]
259
+            # peft.PeftModel.disable_adapter is a context manager; newer
260
+            # transformers builds ship stubs that mis-type it as a Tensor,
261
+            # so we warn-only there (see hf backend mypy overrides).
262
+            with self._peft_model.disable_adapter():
262263
                 yield self._make_view("base")
263264
         finally:
264265
             self._exit()
src/dlm_sway/cli/__init__.pymodified
@@ -1,1 +1,1 @@
1
-"""Command-line interface (entry point: ``dlm-sway``)."""
1
+"""Command-line interface (entry point: ``sway``)."""
src/dlm_sway/cli/app.pymodified
@@ -1,7 +1,7 @@
1
-"""dlm-sway CLI entry point.
1
+"""sway CLI entry point.
22
 
33
 ``pip install dlm-sway`` installs this module's :func:`main` as the
4
-``dlm-sway`` console script. Every subcommand is a thin wrapper around a
4
+``sway`` console script. Every subcommand is a thin wrapper around a
55
 library-level function so the CLI surface mirrors what programmatic
66
 callers get.
77
 """
@@ -14,7 +14,7 @@ from dlm_sway import __version__
1414
 from dlm_sway.cli import commands
1515
 
1616
 app = typer.Typer(
17
-    name="dlm-sway",
17
+    name="sway",
1818
     no_args_is_help=True,
1919
     add_completion=False,
2020
     help="Differential testing for fine-tuned causal language models.",
@@ -23,7 +23,7 @@ app = typer.Typer(
2323
 
2424
 def _version_callback(value: bool) -> None:
2525
     if value:
26
-        typer.echo(f"dlm-sway {__version__}")
26
+        typer.echo(f"sway {__version__}")
2727
         raise typer.Exit()
2828
 
2929
 
src/dlm_sway/cli/commands.pymodified
@@ -1,4 +1,4 @@
1
-"""Command implementations for the ``dlm-sway`` CLI.
1
+"""Command implementations for the ``sway`` CLI.
22
 
33
 Each function here is wired to a subcommand in :mod:`dlm_sway.cli.app`.
44
 Commands deliberately do as little as possible themselves — the real
@@ -212,7 +212,7 @@ def autogen_cmd(
212212
         typer.Option("--out", "-o", help="Where to write the generated sway.yaml."),
213213
     ] = Path("sway.yaml"),
214214
 ) -> None:
215
-    """Generate a sway.yaml from a .dlm file (requires dlm-sway[dlm])."""
215
+    """Generate a sway.yaml from a .dlm file (requires the ``dlm-sway[dlm]`` extra)."""
216216
     import importlib
217217
 
218218
     try:
@@ -237,7 +237,7 @@ def autogen_cmd(
237237
 def doctor_cmd() -> None:
238238
     """Print backend availability and version info."""
239239
     console = Console()
240
-    console.print(f"[bold]dlm-sway[/bold] {__version__}")
240
+    console.print(f"[bold]sway[/bold] {__version__}")
241241
     console.print(f"  python:    {sys.version.split()[0]}")
242242
     console.print(f"  platform:  {sys.platform}")
243243
     console.print()
@@ -360,7 +360,7 @@ def _probe_import(name: str) -> str:
360360
 def _render_markdown_from_json(raw: dict[str, Any]) -> str:
361361
     score: dict[str, Any] = raw.get("score", {})
362362
     lines: list[str] = [
363
-        "# dlm-sway report",
363
+        "# sway report",
364364
         "",
365365
         f"**Overall:** {score.get('overall', 0.0):.2f} (`{score.get('band', '?')}`)  ",
366366
         f"**Base:** `{raw.get('base_model_id', '?')}`  ",
@@ -384,7 +384,7 @@ def _render_junit_from_json(raw: dict[str, Any]) -> str:
384384
     import xml.etree.ElementTree as ET
385385
 
386386
     probes: list[dict[str, Any]] = raw.get("probes", [])
387
-    testsuite = ET.Element("testsuite", {"name": "dlm-sway", "tests": str(len(probes))})
387
+    testsuite = ET.Element("testsuite", {"name": "sway", "tests": str(len(probes))})
388388
     for p in probes:
389389
         tc = ET.SubElement(testsuite, "testcase", {"classname": p["kind"], "name": p["name"]})
390390
         if p["verdict"] == "fail":
src/dlm_sway/core/errors.pymodified
@@ -1,4 +1,4 @@
1
-"""Exception hierarchy for dlm-sway.
1
+"""Exception hierarchy for sway.
22
 
33
 Every error sway raises inherits from :class:`SwayError` so callers can
44
 catch the whole family with a single ``except``. Subclasses carry enough
@@ -11,7 +11,7 @@ from __future__ import annotations
1111
 
1212
 
1313
 class SwayError(Exception):
14
-    """Root of the dlm-sway exception hierarchy."""
14
+    """Root of the sway exception hierarchy."""
1515
 
1616
 
1717
 class SpecValidationError(SwayError):
src/dlm_sway/suite/report.pymodified
@@ -8,7 +8,7 @@ JSON is the machine-readable source of truth — same fields as the
88
 :class:`SuiteResult` dataclass but flattened for easy downstream parsing
99
 (dashboards, diff tools, history tracking).
1010
 
11
-JUnit XML exists to drop into CI pipelines so ``dlm-sway gate``
11
+JUnit XML exists to drop into CI pipelines so ``sway gate``
1212
 integrates with existing test dashboards with no extra glue.
1313
 """
1414
 
@@ -40,7 +40,7 @@ def to_terminal(suite: SuiteResult, score: SwayScore, *, console: Console | None
4040
     c = console or Console()
4141
 
4242
     header = Text.assemble(
43
-        ("dlm-sway report — ", "bold"),
43
+        ("sway report — ", "bold"),
4444
         (suite.base_model_id, "cyan"),
4545
         ("  vs  ", "dim"),
4646
         (_adapter_label(suite.adapter_id), "cyan"),
@@ -153,7 +153,7 @@ def to_junit(suite: SuiteResult, score: SwayScore) -> str:
153153
     testsuite = ET.Element(
154154
         "testsuite",
155155
         {
156
-            "name": "dlm-sway",
156
+            "name": "sway",
157157
             "tests": str(len(suite.probes)),
158158
             "failures": str(sum(1 for p in suite.probes if p.verdict == Verdict.FAIL)),
159159
             "errors": str(sum(1 for p in suite.probes if p.verdict == Verdict.ERROR)),
@@ -187,7 +187,7 @@ def to_junit(suite: SuiteResult, score: SwayScore) -> str:
187187
 def to_markdown(suite: SuiteResult, score: SwayScore) -> str:
188188
     """A portable, CI-friendly markdown report."""
189189
     buf = StringIO()
190
-    buf.write("# dlm-sway report\n\n")
190
+    buf.write("# sway report\n\n")
191191
     buf.write(f"**Overall:** {score.overall:.2f} (`{score.band}`)  \n")
192192
     buf.write(f"**Base:** `{suite.base_model_id}`  \n")
193193
     buf.write(f"**Adapter:** `{_adapter_label(suite.adapter_id)}`  \n")
src/dlm_sway/suite/spec.pymodified
@@ -38,7 +38,7 @@ class SuiteDefaults(BaseModel):
3838
     instead of toggling on one. More memory-heavy; only useful when a
3939
     backend can't do in-place toggling."""
4040
     coverage_threshold: Annotated[float, Field(ge=0.0, le=1.0)] = 0.6
41
-    """Minimum composite score for ``dlm-sway gate`` to pass."""
41
+    """Minimum composite score for ``sway gate`` to pass."""
4242
 
4343
 
4444
 class SwaySpec(BaseModel):
@@ -58,7 +58,7 @@ class SwaySpec(BaseModel):
5858
     """Optional path to a ``.dlm`` file. When present, the runner asks
5959
     :mod:`dlm_sway.integrations.dlm.resolver` for typed sections and
6060
     hands them to probes via :attr:`RunContext.sections`. Auto-populated
61
-    by ``dlm-sway autogen``."""
61
+    by ``sway autogen``."""
6262
 
6363
     def check_version(self) -> None:
6464
         """Raise ``ValueError`` if the spec version is unsupported.
tests/unit/test_cli.pymodified
@@ -1,4 +1,4 @@
1
-"""Smoke tests for the dlm-sway CLI.
1
+"""Smoke tests for the sway CLI.
22
 
33
 We avoid exercising backends (they need real models) and instead test
44
 arg parsing, error paths, and the read-only commands (``doctor``,
@@ -18,7 +18,7 @@ from dlm_sway.cli.app import app
1818
 def test_version_exits_zero() -> None:
1919
     result = CliRunner().invoke(app, ["--version"])
2020
     assert result.exit_code == 0
21
-    assert "dlm-sway" in result.stdout
21
+    assert "sway" in result.stdout
2222
 
2323
 
2424
 def test_help_lists_all_commands() -> None:
@@ -32,7 +32,7 @@ def test_doctor_runs(capsys) -> None: # type: ignore[no-untyped-def]
3232
     result = CliRunner().invoke(app, ["doctor"])
3333
     assert result.exit_code == 0
3434
     # Rich applies color codes by default; assert the bare product name appears.
35
-    assert "dlm-sway" in result.stdout
35
+    assert "sway" in result.stdout
3636
     assert "backends" in result.stdout
3737
 
3838
 
@@ -69,7 +69,7 @@ def test_report_from_json(tmp_path: Path) -> None:
6969
 
7070
     md = CliRunner().invoke(app, ["report", str(path), "--format", "md"])
7171
     assert md.exit_code == 0
72
-    assert "dlm-sway report" in md.stdout
72
+    assert "sway report" in md.stdout
7373
 
7474
     junit = CliRunner().invoke(app, ["report", str(path), "--format", "junit"])
7575
     assert junit.exit_code == 0
tests/unit/test_suite_score_report.pymodified
@@ -165,7 +165,7 @@ class TestMarkdown:
165165
         )
166166
         s = score.compute(suite)
167167
         md = report.to_markdown(suite, s)
168
-        assert "dlm-sway report" in md
168
+        assert "sway report" in md
169169
         assert "| p1 | `__score_adherence`" in md
170170
 
171171
 
@@ -204,7 +204,7 @@ class TestTerminal:
204204
         console = Console(file=buf, force_terminal=False, width=120)
205205
         report.to_terminal(suite, s, console=console)
206206
         out = buf.getvalue()
207
-        assert "dlm-sway report" in out
207
+        assert "sway report" in out
208208
         assert "overall:" in out
209209
         assert "p1" in out
210210
         assert "p2" in out