Python · 2294 bytes Raw Blame History
1 """v1 → v2 migrator: `training.dpo` → `training.preference`.
2
3 v1 (flat):
4
5 training:
6 dpo:
7 enabled: true
8 beta: 0.1
9 loss_type: sigmoid
10 learning_rate: 5e-6
11 num_epochs: 1
12 reference: pre_dpo_adapter
13
14 v2 (method-switched + grouped hyperparams):
15
16 training:
17 preference:
18 enabled: true
19 method: dpo
20 hyperparams:
21 beta: 0.1
22 alpha: 0.1 # ORPO default; DPO ignores
23 learning_rate: 5e-6
24 num_epochs: 1
25 loss_type: sigmoid
26 reference: pre_adapter # renamed from pre_dpo_adapter
27
28 Docs that never set `dpo.*` keep the default `PreferenceConfig()` — the
29 migrator only rewrites the block when it's present. Idempotent: a v1
30 doc with no `training.dpo` passes through with just the top-level dict
31 copied.
32 """
33
34 from __future__ import annotations
35
36 from typing import Any, cast
37
38
39 def migrate(raw: dict[str, object]) -> dict[str, object]:
40 out = dict(raw)
41 training = out.get("training")
42 if not isinstance(training, dict):
43 return out
44
45 training_out = dict(training)
46 dpo = training_out.pop("dpo", None)
47 if dpo is None:
48 out["training"] = training_out
49 return out
50 if not isinstance(dpo, dict):
51 # Malformed — leave under the new key so the Pydantic validator
52 # raises a useful schema error rather than the migrator
53 # silently dropping the user's data.
54 training_out["preference"] = dpo
55 out["training"] = training_out
56 return out
57
58 dpo_map = cast(dict[str, Any], dpo)
59 preference: dict[str, object] = {"method": "dpo"}
60 if "enabled" in dpo_map:
61 preference["enabled"] = dpo_map["enabled"]
62 hyperparams: dict[str, object] = {}
63 for key in ("beta", "learning_rate", "num_epochs"):
64 if key in dpo_map:
65 hyperparams[key] = dpo_map[key]
66 if hyperparams:
67 preference["hyperparams"] = hyperparams
68 if "loss_type" in dpo_map:
69 preference["loss_type"] = dpo_map["loss_type"]
70 if "reference" in dpo_map:
71 ref = dpo_map["reference"]
72 preference["reference"] = "pre_adapter" if ref == "pre_dpo_adapter" else ref
73
74 training_out["preference"] = preference
75 out["training"] = training_out
76 return out