@@ -0,0 +1,33 @@ |
| 1 | +{"determinism_class": "best_effort", "determinism_notes": ["MPS determinism is best-effort; loss curves are close but not bit-identical."], "pinned_versions": [["bitsandbytes", "0.49.2"], ["peft", "0.19.1"], ["torch", "2.11.0"], ["transformers", "5.5.4"], ["trl", "1.2.0"]], "plan": {"attn_implementation": "sdpa", "effective_batch_size": 16, "est_peak_vram_gb": 7.65, "est_step_seconds": 4.64, "grad_accum": 1, "gradient_checkpointing": false, "micro_batch_size": 16, "precision": "fp32", "quant_compute_dtype": null, "reason": "precision=fp32, attn=sdpa, qlora=off", "use_qlora": false, "world_size": 1}, "run_id": 1, "seed": 42, "type": "banner"} |
| 2 | +{"new": ["1f7383bc187c6d75", "20e6e5b9e3cf1023", "ccba98c6985ce1f7", "099218769319ed17"], "removed": [], "timestamp": "2026-04-26T06:25:16", "type": "delta", "unchanged": []} |
| 3 | +{"type": "step", "step": 0, "loss": 5.234, "lr": 0.0001, "grad_norm": 0.523, "tokens_per_sec": 1234.5, "timestamp": "2026-04-26T06:25:17"} |
| 4 | +{"type": "step", "step": 1, "loss": 4.892, "lr": 0.0001, "grad_norm": 0.487, "tokens_per_sec": 1245.1, "timestamp": "2026-04-26T06:25:18"} |
| 5 | +{"type": "step", "step": 2, "loss": 4.512, "lr": 0.0001, "grad_norm": 0.451, "tokens_per_sec": 1238.7, "timestamp": "2026-04-26T06:25:19"} |
| 6 | +{"type": "step", "step": 3, "loss": 4.187, "lr": 0.0001, "grad_norm": 0.428, "tokens_per_sec": 1241.2, "timestamp": "2026-04-26T06:25:20"} |
| 7 | +{"type": "step", "step": 4, "loss": 3.901, "lr": 0.0001, "grad_norm": 0.412, "tokens_per_sec": 1247.8, "timestamp": "2026-04-26T06:25:21"} |
| 8 | +{"type": "step", "step": 5, "loss": 3.654, "lr": 0.0001, "grad_norm": 0.398, "tokens_per_sec": 1239.4, "timestamp": "2026-04-26T06:25:22"} |
| 9 | +{"type": "step", "step": 6, "loss": 3.421, "lr": 0.0001, "grad_norm": 0.385, "tokens_per_sec": 1242.6, "timestamp": "2026-04-26T06:25:23"} |
| 10 | +{"type": "step", "step": 7, "loss": 3.218, "lr": 0.0001, "grad_norm": 0.371, "tokens_per_sec": 1244.9, "timestamp": "2026-04-26T06:25:24"} |
| 11 | +{"type": "step", "step": 8, "loss": 3.034, "lr": 0.0001, "grad_norm": 0.358, "tokens_per_sec": 1240.1, "timestamp": "2026-04-26T06:25:25"} |
| 12 | +{"type": "step", "step": 9, "loss": 2.871, "lr": 0.0001, "grad_norm": 0.346, "tokens_per_sec": 1243.5, "timestamp": "2026-04-26T06:25:26"} |
| 13 | +{"type": "step", "step": 10, "loss": 2.726, "lr": 0.0001, "grad_norm": 0.334, "tokens_per_sec": 1245.7, "timestamp": "2026-04-26T06:25:27"} |
| 14 | +{"type": "step", "step": 11, "loss": 2.598, "lr": 0.0001, "grad_norm": 0.322, "tokens_per_sec": 1241.9, "timestamp": "2026-04-26T06:25:28"} |
| 15 | +{"type": "step", "step": 12, "loss": 2.485, "lr": 0.0001, "grad_norm": 0.311, "tokens_per_sec": 1244.3, "timestamp": "2026-04-26T06:25:29"} |
| 16 | +{"type": "step", "step": 13, "loss": 2.387, "lr": 0.0001, "grad_norm": 0.301, "tokens_per_sec": 1242.5, "timestamp": "2026-04-26T06:25:30"} |
| 17 | +{"type": "step", "step": 14, "loss": 2.302, "lr": 0.0001, "grad_norm": 0.291, "tokens_per_sec": 1244.8, "timestamp": "2026-04-26T06:25:31"} |
| 18 | +{"type": "step", "step": 15, "loss": 2.228, "lr": 0.0001, "grad_norm": 0.281, "tokens_per_sec": 1241.2, "timestamp": "2026-04-26T06:25:32"} |
| 19 | +{"type": "step", "step": 16, "loss": 2.165, "lr": 0.0001, "grad_norm": 0.272, "tokens_per_sec": 1243.6, "timestamp": "2026-04-26T06:25:33"} |
| 20 | +{"type": "step", "step": 17, "loss": 2.111, "lr": 0.0001, "grad_norm": 0.263, "tokens_per_sec": 1245.0, "timestamp": "2026-04-26T06:25:34"} |
| 21 | +{"type": "step", "step": 18, "loss": 2.066, "lr": 0.0001, "grad_norm": 0.255, "tokens_per_sec": 1242.9, "timestamp": "2026-04-26T06:25:35"} |
| 22 | +{"type": "step", "step": 19, "loss": 2.029, "lr": 0.0001, "grad_norm": 0.247, "tokens_per_sec": 1244.4, "timestamp": "2026-04-26T06:25:36"} |
| 23 | +{"type": "step", "step": 20, "loss": 1.998, "lr": 0.0001, "grad_norm": 0.239, "tokens_per_sec": 1243.1, "timestamp": "2026-04-26T06:25:37"} |
| 24 | +{"type": "step", "step": 21, "loss": 1.974, "lr": 0.0001, "grad_norm": 0.232, "tokens_per_sec": 1245.5, "timestamp": "2026-04-26T06:25:38"} |
| 25 | +{"type": "step", "step": 22, "loss": 1.955, "lr": 0.0001, "grad_norm": 0.225, "tokens_per_sec": 1242.7, "timestamp": "2026-04-26T06:25:39"} |
| 26 | +{"type": "step", "step": 23, "loss": 1.940, "lr": 0.0001, "grad_norm": 0.218, "tokens_per_sec": 1244.0, "timestamp": "2026-04-26T06:25:40"} |
| 27 | +{"type": "step", "step": 24, "loss": 1.929, "lr": 0.0001, "grad_norm": 0.211, "tokens_per_sec": 1243.8, "timestamp": "2026-04-26T06:25:41"} |
| 28 | +{"type": "step", "step": 25, "loss": 1.921, "lr": 0.0001, "grad_norm": 0.205, "tokens_per_sec": 1242.4, "timestamp": "2026-04-26T06:25:42"} |
| 29 | +{"type": "step", "step": 26, "loss": 1.916, "lr": 0.0001, "grad_norm": 0.199, "tokens_per_sec": 1244.7, "timestamp": "2026-04-26T06:25:43"} |
| 30 | +{"type": "step", "step": 27, "loss": 1.913, "lr": 0.0001, "grad_norm": 0.193, "tokens_per_sec": 1243.3, "timestamp": "2026-04-26T06:25:44"} |
| 31 | +{"type": "step", "step": 28, "loss": 1.911, "lr": 0.0001, "grad_norm": 0.187, "tokens_per_sec": 1245.2, "timestamp": "2026-04-26T06:25:45"} |
| 32 | +{"type": "step", "step": 29, "loss": 1.911, "lr": 0.0001, "grad_norm": 0.182, "tokens_per_sec": 1243.0, "timestamp": "2026-04-26T06:25:46"} |
| 33 | +{"adapter_version": 1, "early_stopped": false, "elapsed_seconds": 30.5, "run_id": 1, "steps": 30, "summary_path": "/tmp/sway-fixture/logs/train-000001-20260426T062514.summary.json", "timestamp": "2026-04-26T06:25:47", "type": "run_complete"} |