tenseleyflow/sway / c5ec164

Browse files

sway(bridge): resolver — .dlm → sway Section + adapter path

Authored by espadonne
SHA
c5ec164228950bf66d8ceefc0b04c165bda0a081
Parents
ceff0ea
Tree
626d86a

1 changed file

StatusFile+-
A src/dlm_sway/integrations/dlm/resolver.py 206 0
src/dlm_sway/integrations/dlm/resolver.pyadded
@@ -0,0 +1,206 @@
1
+"""Resolve a ``.dlm`` file to the artifacts sway needs.
2
+
3
+Imports ``dlm.*`` — requires the ``dlm-sway[dlm]`` extra. Everything
4
+outside this package is oblivious to dlm's internal shape; the bridge
5
+is the only place that knows, e.g., that a dlm section carries a
6
+``kind`` field named ``type`` or that adapters live at
7
+``adapter/versions/vNNNN/``.
8
+"""
9
+
10
+from __future__ import annotations
11
+
12
+import hashlib
13
+from dataclasses import dataclass
14
+from pathlib import Path
15
+
16
+from dlm_sway.core.errors import SwayError
17
+from dlm_sway.core.sections import (
18
+    Section,
19
+    SectionKind,
20
+    SectionPreference,
21
+    SectionProbe,
22
+)
23
+
24
+
25
+@dataclass(frozen=True, slots=True)
26
+class DlmHandle:
27
+    """Everything the sway bridge pulls out of a ``.dlm`` file.
28
+
29
+    Attributes
30
+    ----------
31
+    dlm_id:
32
+        Stable identifier from the frontmatter.
33
+    base_model:
34
+        Either a HF id (``qwen2.5-1.5b``) or an ``hf:org/name`` escape
35
+        hatch, taken verbatim from the frontmatter.
36
+    adapter_path:
37
+        Directory containing the current trained PEFT adapter (resolved
38
+        via dlm's own ``StorePath.for_dlm``). ``None`` if the document
39
+        hasn't been trained yet.
40
+    sections:
41
+        Typed sections ready for sway's probes.
42
+    doc_text:
43
+        Concatenated raw content of all sections. Used by probes that
44
+        need a whole-document stylistic reference (C1).
45
+    """
46
+
47
+    dlm_id: str
48
+    base_model: str
49
+    adapter_path: Path | None
50
+    sections: tuple[Section, ...]
51
+    doc_text: str
52
+
53
+
54
+def resolve_dlm(dlm_path: Path) -> DlmHandle:
55
+    """Parse ``dlm_path`` and return a :class:`DlmHandle`.
56
+
57
+    Raises :class:`~dlm_sway.core.errors.SwayError` with a clear message
58
+    when the file is malformed or when the resolved adapter path doesn't
59
+    exist on disk.
60
+    """
61
+    try:
62
+        from dlm.doc.parser import parse_file as dlm_parse_file
63
+    except ImportError as exc:
64
+        raise SwayError("dlm package not installed — run: pip install 'dlm-sway[dlm]'") from exc
65
+
66
+    parsed = dlm_parse_file(dlm_path)
67
+    fm = parsed.frontmatter
68
+    sections = tuple(_translate_section(s) for s in parsed.sections)
69
+    doc_text = "\n\n".join(s.content for s in sections)
70
+
71
+    adapter_path = _resolve_adapter_path(fm.dlm_id)
72
+
73
+    return DlmHandle(
74
+        dlm_id=fm.dlm_id,
75
+        base_model=fm.base_model,
76
+        adapter_path=adapter_path,
77
+        sections=sections,
78
+        doc_text=doc_text,
79
+    )
80
+
81
+
82
+def _resolve_adapter_path(dlm_id: str) -> Path | None:
83
+    """Locate the current adapter directory for ``dlm_id``.
84
+
85
+    Uses dlm's ``StorePath`` helper if available, else falls back to
86
+    the canonical ``~/.dlm/store/<dlm_id>/adapter/current.txt`` pointer.
87
+    Returns ``None`` if no adapter has been trained yet.
88
+    """
89
+    try:
90
+        from dlm.store.paths import StorePath
91
+
92
+        _store_path_cls: object | None = StorePath
93
+    except ImportError:
94
+        _store_path_cls = None
95
+
96
+    if _store_path_cls is not None:
97
+        try:
98
+            store = _store_path_cls.for_dlm(dlm_id)  # type: ignore[attr-defined]
99
+        except Exception:  # noqa: BLE001 — unknown dlm exception shapes
100
+            return None
101
+        try:
102
+            resolved = store.resolve_current_adapter()
103
+        except (AttributeError, FileNotFoundError):
104
+            resolved = None
105
+        if resolved is not None and resolved.exists():
106
+            return Path(resolved)
107
+
108
+    # Manual fallback in case the dlm API evolves.
109
+    import os
110
+
111
+    home = Path(os.environ.get("DLM_HOME", "~/.dlm")).expanduser()
112
+    current_file = home / "store" / dlm_id / "adapter" / "current.txt"
113
+    if current_file.exists():
114
+        pointer = current_file.read_text(encoding="utf-8").strip()
115
+        candidate = (current_file.parent / pointer).resolve()
116
+        if candidate.exists():
117
+            return candidate
118
+    return None
119
+
120
+
121
+def _translate_section(dlm_section: object) -> Section:
122
+    """Adapt a ``dlm.doc.sections.Section`` to sway's section type.
123
+
124
+    The shape dlm uses has been stable through the v0.x series but we
125
+    treat field access defensively so a minor dlm refactor can't silently
126
+    misread section content.
127
+    """
128
+    kind_raw = getattr(dlm_section, "kind", None)
129
+    # dlm uses the attribute name "kind" on its Section dataclass.
130
+    kind = _normalize_kind(kind_raw)
131
+    content = str(getattr(dlm_section, "content", ""))
132
+    section_id = str(
133
+        getattr(dlm_section, "section_id", None)
134
+        or getattr(dlm_section, "id", None)
135
+        or _content_hash(content)
136
+    )
137
+    tag = getattr(dlm_section, "tag", None)
138
+
139
+    probes: tuple[SectionProbe, ...] = ()
140
+    preferences: tuple[SectionPreference, ...] = ()
141
+    if kind == "instruction":
142
+        probes = tuple(_extract_instruction_probes(dlm_section))
143
+    elif kind == "preference":
144
+        preferences = tuple(_extract_preference_triples(dlm_section))
145
+
146
+    return Section(
147
+        id=section_id,
148
+        kind=kind,
149
+        content=content,
150
+        probes=probes,
151
+        preferences=preferences,
152
+        tag=tag if isinstance(tag, str) else None,
153
+    )
154
+
155
+
156
+def _normalize_kind(raw: object) -> SectionKind:
157
+    """Map dlm's SectionType/str to sway's lowercase kind."""
158
+    if raw is None:
159
+        return "prose"
160
+    value = str(raw).lower()
161
+    # dlm uses uppercase StrEnum values like "PROSE"; normalize.
162
+    if value.endswith("prose") or "prose" in value:
163
+        return "prose"
164
+    if "instruction" in value:
165
+        return "instruction"
166
+    if "preference" in value:
167
+        return "preference"
168
+    return "prose"
169
+
170
+
171
+def _extract_instruction_probes(dlm_section: object) -> list[SectionProbe]:
172
+    """Pull (Q, A) pairs out of a dlm INSTRUCTION section.
173
+
174
+    dlm's Section carries its parsed Q/A as ``probes`` or ``qa`` depending
175
+    on version. We read the first non-empty one and build
176
+    :class:`SectionProbe` records defensively.
177
+    """
178
+    raw_probes = getattr(dlm_section, "probes", None) or getattr(dlm_section, "qa", None)
179
+    if not raw_probes:
180
+        return []
181
+    out: list[SectionProbe] = []
182
+    for rp in raw_probes:
183
+        q = str(getattr(rp, "prompt", getattr(rp, "question", "")))
184
+        a = str(getattr(rp, "gold", getattr(rp, "answer", "")))
185
+        if q and a:
186
+            out.append(SectionProbe(prompt=q, gold=a))
187
+    return out
188
+
189
+
190
+def _extract_preference_triples(dlm_section: object) -> list[SectionPreference]:
191
+    """Pull (prompt, chosen, rejected) triples out of a dlm PREFERENCE section."""
192
+    raw = getattr(dlm_section, "preferences", None) or getattr(dlm_section, "triples", None)
193
+    if not raw:
194
+        return []
195
+    out: list[SectionPreference] = []
196
+    for r in raw:
197
+        p = str(getattr(r, "prompt", ""))
198
+        c = str(getattr(r, "chosen", ""))
199
+        rej = str(getattr(r, "rejected", ""))
200
+        if p and c and rej:
201
+            out.append(SectionPreference(prompt=p, chosen=c, rejected=rej))
202
+    return out
203
+
204
+
205
+def _content_hash(content: str) -> str:
206
+    return hashlib.sha256(content.encode("utf-8")).hexdigest()[:16]