tenseleyflow/sway / 31ea572

Browse files

sway(core): Section / SectionProbe / SectionPreference dataclasses

Authored by espadonne
SHA
31ea572226e9b77ec58fa67b3d90fb6c17309ecd
Parents
6537793
Tree
b39c1f0

2 changed files

StatusFile+-
A src/dlm_sway/core/sections.py 76 0
A tests/unit/test_sections.py 35 0
src/dlm_sway/core/sections.pyadded
@@ -0,0 +1,76 @@
1
+"""Minimal section contract for attribution probes.
2
+
3
+The flagship B1 ``section_internalization`` probe needs *structured*
4
+input — a section has an id, a kind, content text, and possibly some
5
+Q/A pairs or chosen/rejected triples. sway defines this shape here so
6
+the probes stay oblivious to the upstream (``.dlm`` parser, custom
7
+loaders, synthetic test fixtures).
8
+
9
+Field names are aligned with :mod:`dlm.doc.sections` but this module
10
+does not import ``dlm`` — the bridge at
11
+:mod:`dlm_sway.integrations.dlm` does the adaptation.
12
+"""
13
+
14
+from __future__ import annotations
15
+
16
+from dataclasses import dataclass, field
17
+from typing import Literal
18
+
19
+SectionKind = Literal["prose", "instruction", "preference"]
20
+
21
+
22
+@dataclass(frozen=True, slots=True)
23
+class SectionProbe:
24
+    """A ``(prompt, gold)`` pair lifted from an INSTRUCTION section."""
25
+
26
+    prompt: str
27
+    gold: str
28
+
29
+
30
+@dataclass(frozen=True, slots=True)
31
+class SectionPreference:
32
+    """A ``(prompt, chosen, rejected)`` triple from a PREFERENCE section."""
33
+
34
+    prompt: str
35
+    chosen: str
36
+    rejected: str
37
+
38
+
39
+@dataclass(frozen=True, slots=True)
40
+class Section:
41
+    """One typed chunk of a training document.
42
+
43
+    Attributes
44
+    ----------
45
+    id:
46
+        Content-addressed identifier. ``.dlm`` uses a 16-hex-char
47
+        sha256 prefix; sway doesn't enforce a format.
48
+    kind:
49
+        Discriminator for which of :attr:`probes` /
50
+        :attr:`preferences` / :attr:`content` is the primary signal.
51
+    content:
52
+        Raw section text. Always populated; used by the rolling-PPL
53
+        path for PROSE sections.
54
+    probes:
55
+        For INSTRUCTION: parsed Q/A pairs. Empty tuple for others.
56
+    preferences:
57
+        For PREFERENCE: parsed chosen/rejected triples. Empty otherwise.
58
+    tag:
59
+        Optional free-form label for the section (e.g., "intro",
60
+        "api-reference"). Surfaces in per-section reports.
61
+    """
62
+
63
+    id: str
64
+    kind: SectionKind
65
+    content: str
66
+    probes: tuple[SectionProbe, ...] = field(default_factory=tuple)
67
+    preferences: tuple[SectionPreference, ...] = field(default_factory=tuple)
68
+    tag: str | None = None
69
+
70
+
71
+def filter_kinds(
72
+    sections: tuple[Section, ...], kinds: tuple[SectionKind, ...]
73
+) -> tuple[Section, ...]:
74
+    """Return only sections whose ``kind`` matches one of ``kinds``."""
75
+    allow = set(kinds)
76
+    return tuple(s for s in sections if s.kind in allow)
tests/unit/test_sections.pyadded
@@ -0,0 +1,35 @@
1
+"""Tests for :mod:`dlm_sway.core.sections`."""
2
+
3
+from __future__ import annotations
4
+
5
+from dlm_sway.core.sections import (
6
+    Section,
7
+    SectionPreference,
8
+    SectionProbe,
9
+    filter_kinds,
10
+)
11
+
12
+
13
+def test_default_field_types() -> None:
14
+    s = Section(id="abc", kind="prose", content="hello world")
15
+    assert s.probes == ()
16
+    assert s.preferences == ()
17
+    assert s.tag is None
18
+
19
+
20
+def test_filter_kinds() -> None:
21
+    sections = (
22
+        Section(id="a", kind="prose", content="x"),
23
+        Section(id="b", kind="instruction", content="y"),
24
+        Section(id="c", kind="preference", content="z"),
25
+    )
26
+    only_prose = filter_kinds(sections, ("prose",))
27
+    assert len(only_prose) == 1
28
+    assert only_prose[0].id == "a"
29
+
30
+
31
+def test_section_probe_and_preference() -> None:
32
+    p = SectionProbe(prompt="Q", gold="A")
33
+    assert p.prompt == "Q"
34
+    pref = SectionPreference(prompt="P", chosen="good", rejected="bad")
35
+    assert pref.chosen == "good"