Rust · 20667 bytes Raw Blame History
1 //! Machine IR — low-level representation between SSA IR and ARM64 assembly.
2 //!
3 //! Uses virtual registers (VReg) that will be assigned to physical registers
4 //! by the register allocator. Before allocation, all vregs are spilled.
5
6 /// Virtual register identifier.
7 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
8 pub struct VRegId(pub u32);
9
10 /// Virtual register with type class.
11 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
12 pub struct VReg {
13 pub id: VRegId,
14 pub class: RegClass,
15 }
16
17 /// Register class — determines which physical registers can hold this value.
18 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
19 pub enum RegClass {
20 /// General purpose (X0-X28, excluding x18/x29/x30).
21 Gp64,
22 /// 32-bit general purpose (W0-W28).
23 Gp32,
24 /// FP/SIMD double (D0-D31).
25 Fp64,
26 /// FP/SIMD single (S0-S31).
27 Fp32,
28 /// 128-bit NEON vector (Q0-Q31). Covers 4×f32, 2×f64, 4×i32,
29 /// 2×i64, etc. — every shape in `IrType::Vector`. Codegen
30 /// shares the same physical bank as Fp32/Fp64 (the V registers
31 /// are the 128-bit form of D/S), so the regalloc assigns them
32 /// from the same pool but at 128-bit width.
33 V128,
34 }
35
36 /// ARM64 opcodes that we emit.
37 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
38 pub enum ArmOpcode {
39 // ---- Integer arithmetic ----
40 AddReg, // ADD Xd, Xn, Xm
41 AddsReg, // ADDS Xd, Xn, Xm (sets flags)
42 AdcReg, // ADC Xd, Xn, Xm
43 AddImm, // ADD Xd, Xn, #imm
44 SubReg, // SUB Xd, Xn, Xm
45 SubsReg, // SUBS Xd, Xn, Xm (sets flags)
46 SbcReg, // SBC Xd, Xn, Xm
47 SubImm, // SUB Xd, Xn, #imm
48 Mul, // MUL Xd, Xn, Xm
49 Sdiv, // SDIV Xd, Xn, Xm
50 Madd, // MADD Xd, Xn, Xm, Xa (Xa + Xn*Xm; produced by mul-add peephole)
51 Msub, // MSUB Xd, Xn, Xm, Xa (Xa - Xn*Xm; for imod and the mul-sub peephole)
52 Neg, // NEG Xd, Xm (alias: SUB Xd, XZR, Xm)
53
54 // ---- Logic ----
55 AndReg,
56 OrrReg,
57 EorReg,
58 OrnReg, // MVN is ORN Xd, XZR, Xm
59
60 // ---- Shifts ----
61 LslReg,
62 LsrReg,
63 AsrReg,
64
65 // ---- Bit manipulation ----
66 Mvn, // MVN Xd, Xm (bitwise NOT, alias: ORN Xd, XZR, Xm)
67 Clz, // CLZ Xd, Xn (count leading zeros)
68 Rbit, // RBIT Xd, Xn (reverse bits)
69
70 // ---- Comparison & select ----
71 CmpReg, // CMP Xn, Xm (alias: SUBS XZR, Xn, Xm)
72 CmpImm, // CMP Xn, #imm
73 Cset, // CSET Xd, cond
74 CselReg, // CSEL Xd, Xn, Xm, cond
75 FCmpReg, // FCMP Dn, Dm
76 FCset, // CSET Xd, cond (after FCMP)
77 FcselReg, // FCSEL Dd, Dn, Dm, cond
78
79 // ---- Float arithmetic ----
80 FaddS,
81 FaddD,
82 FsubS,
83 FsubD,
84 FmulS,
85 FmulD,
86 FdivS,
87 FdivD,
88 FnegS,
89 FnegD,
90 FabsS,
91 FabsD,
92 FsqrtS,
93 FsqrtD,
94 // Fused multiply-add/subtract (FMADD/FMSUB/FNMSUB).
95 // 3-source: dest = Sa ± Sn*Sm.
96 FmaddS,
97 FmaddD, // FMADD: dest = Sa + Sn*Sm
98
99 // ---- NEON SIMD vector arithmetic (Sprint 12 Stage 2) ----
100 //
101 // Each opcode encodes the lane shape so emit/encoding stays
102 // table-driven. Naming convention: `<Op><LaneCount><LaneType>`.
103 // Examples: `FaddV4S` is "fadd Vd.4s, Vn.4s, Vm.4s", `FmlaV2D`
104 // is "fmla Vd.2d, Vn.2d, Vm.2d".
105 //
106 // Operands across this family: dest VReg of class V128 plus the
107 // expected source operands for that op. Lane shape is implicit
108 // in the opcode; emit just dispatches.
109 AddV4S, // ADD Vd.4s, Vn.4s, Vm.4s (integer)
110 AddV2D, // ADD Vd.2d, Vn.2d, Vm.2d
111 SubV4S,
112 SubV2D,
113 MulV4S, // MUL Vd.4s, Vn.4s, Vm.4s (integer; 2D not in NEON)
114 NegV4S,
115 NegV2D,
116 FaddV4S, // FADD Vd.4s, Vn.4s, Vm.4s
117 FaddV2D, // FADD Vd.2d, Vn.2d, Vm.2d
118 FsubV4S,
119 FsubV2D,
120 FmulV4S,
121 FmulV2D,
122 FdivV4S,
123 FdivV2D,
124 FnegV4S,
125 FnegV2D,
126 FabsV4S,
127 FabsV2D,
128 FsqrtV4S,
129 FsqrtV2D,
130 FmlaV4S, // FMLA Vd.4s, Vn.4s, Vm.4s (Vd += Vn*Vm)
131 FmlaV2D,
132 /// BSL Vd.16B, Vn.16B, Vm.16B — bit select. Per-bit:
133 /// `Vd[i] = Vd[i] ? Vn[i] : Vm[i]`. Vd is destructive
134 /// (input mask + output). Used to lower VSelect.
135 BslV16B,
136 /// Vector compare (per-lane all-ones / all-zeros result).
137 FcmgtV4S,
138 FcmgtV2D,
139 FcmgeV4S,
140 FcmgeV2D,
141 FcmeqV4S,
142 FcmeqV2D,
143 CmgtV4S,
144 CmgeV4S,
145 CmeqV4S,
146 FminV4S,
147 FminV2D,
148 FmaxV4S,
149 FmaxV2D,
150 SminV4S, // SMIN (signed integer)
151 SmaxV4S,
152 UminV4S,
153 UmaxV4S,
154
155 // Cross-lane reductions
156 FaddpV2S, // FADDP Sd, Vn.2s (pair-add → scalar; 2-lane f32)
157 /// `FADDP.4S Vd, Vn, Vm` — 3-operand pairwise add over four
158 /// f32 lanes. For cross-lane f32 sum reduction we use this with
159 /// `Vn = Vm = v_src` then follow with FaddpV2S to fold the
160 /// remaining two lanes (NEON has no `faddv.4s`).
161 FaddpV4S,
162 FaddpV2D, // FADDP Dd, Vn.2d (pair-add → scalar; 2-lane f64)
163 Faddv4S, // FADDV Sd, Vn.4s (across 4 f32 lanes → scalar)
164 Sminv4S, // SMINV Sd, Vn.4s
165 Smaxv4S,
166 /// `FMAXV.4S Sd, Vn` — across-lane f32 max reduction → scalar.
167 FmaxvV4S,
168 /// `FMINV.4S Sd, Vn` — across-lane f32 min reduction → scalar.
169 FminvV4S,
170 /// `FMAXP.2D Dd, Vn` — pairwise f64 max reduction (2 lanes → scalar).
171 /// NEON has no `fmaxv.2d`; for two f64 lanes the pairwise form is
172 /// the across-lane reduction.
173 FmaxpV2DScalar,
174 /// `FMINP.2D Dd, Vn` — pairwise f64 min reduction (2 lanes → scalar).
175 FminpV2DScalar,
176 /// `ADDP.2D Vd, Vn, Vm` — pairwise integer add over two i64 lanes.
177 /// Used for i64 cross-lane reduction: `addp.2d v_dst, v_src, v_src`
178 /// puts the sum of the two lanes in v_dst[0].
179 AddpV2D,
180 Uminv4S,
181 Umaxv4S,
182 Addv4S, // integer cross-lane add over 4×i32
183
184 // Lane move / broadcast
185 DupGen4S, // DUP Vd.4s, Wn (broadcast scalar to 4 lanes)
186 DupGen2D, // DUP Vd.2d, Xn
187 DupEl4S, // DUP Vd.4s, Vn.s[0] (broadcast lane 0 to 4 lanes)
188 DupEl2D,
189 Ins4S, // INS Vd.s[lane], Wn (insert scalar into one lane)
190 Ins2D,
191 Umov4S, // UMOV Wd, Vn.s[lane] (extract lane to scalar)
192 Umov2D,
193 FmovEl4S, // FMOV Sd, Vn.s[lane] (extract f32 lane)
194 FmovEl2D,
195
196 // Vector load/store (128-bit Q register)
197 LdrQ, // LDR Qt, [Xn, #imm]
198 StrQ, // STR Qt, [Xn, #imm]
199 /// `mov.16b vN, vM` — 128-bit register-to-register copy.
200 /// Used by regalloc when moving a V128 vreg between physical
201 /// regs; FmovReg only handles the low 64 bits and would corrupt
202 /// the upper lanes of a V128.
203 Mov16B,
204 FmsubS,
205 FmsubD, // FMSUB: dest = Sa - Sn*Sm
206 FnmsubS,
207 FnmsubD, // FNMSUB: dest = Sn*Sm - Sa
208
209 // ---- Conversions ----
210 ScvtfSW,
211 ScvtfDW, // signed int32 → float
212 ScvtfSX,
213 ScvtfDX, // signed int64 → float
214 FcvtzsWS,
215 FcvtzsWD, // float → int32
216 FcvtzsXS,
217 FcvtzsXD, // float → int64
218 FcvtSD,
219 FcvtDS, // float↔double
220
221 // ---- Move ----
222 Movz, // MOVZ Xd, #imm16, LSL #shift
223 Movk, // MOVK Xd, #imm16, LSL #shift
224 Movn, // MOVN Xd, #imm16, LSL #shift
225 MovReg, // MOV Xd, Xm (alias: ORR Xd, XZR, Xm)
226 FmovReg, // FMOV Dd, Dm
227
228 // ---- Memory ----
229 StrImm, // STR Xt, [Xn, #imm]
230 LdrImm, // LDR Xt, [Xn, #imm]
231 StrhImm, // STRH Wt, [Xn, #imm] (store 16-bit half)
232 LdrshImm, // LDRSH Wt, [Xn, #imm] (load 16-bit half, sign-extended)
233 StrbImm, // STRB Wt, [Xn, #imm] (store 8-bit byte)
234 LdrsbImm, // LDRSB Wt, [Xn, #imm] (load 8-bit byte, sign-extended)
235 StrFpImm, // STR Dt, [Xn, #imm] (float store)
236 LdrFpImm, // LDR Dt, [Xn, #imm] (float load)
237 // Register-offset loads/stores: address = base + index << shift.
238 // Operands: [dest, base, idx, Imm(shift)]. Shift ∈ {0,1,2,3}.
239 // Sprint 05: emitted by `scaled_addressing_fusion` from a
240 // Movz+Mul+AddReg+Ldr/Str sequence when elem_size ∈ {1,2,4,8}.
241 LdrReg, // LDR Xt|Wt, [Xn, Xm, lsl #shift]
242 StrReg, // STR Xt|Wt, [Xn, Xm, lsl #shift]
243 LdrFpReg, // LDR Dt|St, [Xn, Xm, lsl #shift]
244 StrFpReg, // STR Dt|St, [Xn, Xm, lsl #shift]
245 StpPre, // STP Xt1, Xt2, [Xn, #imm]! (pre-index)
246 LdpPost, // LDP Xt1, Xt2, [Xn], #imm (post-index)
247 StpOffset, // STP Xt1, Xt2, [Xn, #imm] (signed offset, no writeback)
248 LdpOffset, // LDP Xt1, Xt2, [Xn, #imm] (signed offset, no writeback)
249 AdrpLdr, // ADRP + LDR sequence (load value from PC-relative address)
250 AdrpAdd, // ADRP + ADD sequence (compute PC-relative address)
251
252 // ---- Branch ----
253 B, // B label
254 BCond, // B.cond label
255 // Compare-and-branch (single-instruction zero check). Operands:
256 // [VReg|PhysReg of register to test, BlockRef target]
257 // Width inferred from the test register's class (Gp32 → cbz w; Gp64 → cbz x).
258 // ±1MB range (19-bit signed × 4), same as BCond — relaxed identically.
259 Cbz,
260 Cbnz,
261 // Test-bit-and-branch. Operands:
262 // [VReg|PhysReg of test reg, Imm(bit_index 0..63), BlockRef target]
263 // ±32KB range (14-bit signed × 4), tighter than BCond — needs its own relax bound.
264 Tbz,
265 Tbnz,
266 Bl, // BL label (call)
267 Blr, // BLR reg (indirect call)
268 Ret, // RET
269
270 // ---- Extend ----
271 Sxtw, // SXTW Xd, Wn (sign-extend 32→64)
272 Sxth, // SXTH Wd|Xd, Wn (sign-extend 16→32 or 16→64)
273 Sxtb, // SXTB Wd|Xd, Wn (sign-extend 8→32 or 8→64)
274
275 // ---- Special ----
276 Nop,
277 Brk, // BRK #imm16 (debug trap)
278 }
279
280 /// ARM64 condition codes.
281 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
282 pub enum ArmCond {
283 Eq,
284 Ne,
285 Hs,
286 Lo, // unsigned >=, <
287 Mi,
288 Pl, // negative, positive
289 Hi,
290 Ls, // unsigned >, <=
291 Ge,
292 Lt, // signed >=, <
293 Gt,
294 Le, // signed >, <=
295 }
296
297 impl ArmCond {
298 /// The condition that takes the opposite branch — used by the
299 /// branch-relaxation pass when expanding a far `B.cond` into a
300 /// short `B.{!cond}` over an unconditional `B`. The pairs are
301 /// EQ/NE, HS/LO, MI/PL, HI/LS, GE/LT, GT/LE; involution is
302 /// guaranteed (`c.inverse().inverse() == c`).
303 pub fn inverse(self) -> ArmCond {
304 match self {
305 ArmCond::Eq => ArmCond::Ne,
306 ArmCond::Ne => ArmCond::Eq,
307 ArmCond::Hs => ArmCond::Lo,
308 ArmCond::Lo => ArmCond::Hs,
309 ArmCond::Mi => ArmCond::Pl,
310 ArmCond::Pl => ArmCond::Mi,
311 ArmCond::Hi => ArmCond::Ls,
312 ArmCond::Ls => ArmCond::Hi,
313 ArmCond::Ge => ArmCond::Lt,
314 ArmCond::Lt => ArmCond::Ge,
315 ArmCond::Gt => ArmCond::Le,
316 ArmCond::Le => ArmCond::Gt,
317 }
318 }
319 }
320
321 /// A machine operand.
322 #[derive(Debug, Clone, PartialEq)]
323 pub enum MachineOperand {
324 /// Virtual register.
325 VReg(VRegId),
326 /// Physical register (post-allocation or fixed registers like SP, FP, LR).
327 PhysReg(PhysReg),
328 /// Immediate value.
329 Imm(i64),
330 /// Stack frame slot (offset from FP).
331 FrameSlot(i32),
332 /// Condition code.
333 Cond(ArmCond),
334 /// Reference to a machine block (branch target).
335 BlockRef(MBlockId),
336 /// External symbol name (for BL to functions).
337 Extern(String),
338 /// Module-level global by name. Used by ADRP+ADD for SAVE'd
339 /// locals and module variables, where the operand resolves to
340 /// `_globalname@PAGE` / `_globalname@PAGEOFF` at emit time.
341 GlobalLabel(String),
342 /// Constant pool entry index.
343 ConstPool(u32),
344 /// Shift amount for MOVZ/MOVK (0, 16, 32, 48).
345 Shift(u8),
346 }
347
348 /// Physical register reference.
349 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
350 pub enum PhysReg {
351 /// 64-bit general purpose register (X0-X30).
352 Gp(u8),
353 /// 32-bit general purpose register (W0-W30).
354 Gp32(u8),
355 /// 64-bit FP/SIMD register (D0-D31).
356 Fp(u8),
357 /// 32-bit FP/SIMD register (S0-S31).
358 Fp32(u8),
359 /// Stack pointer.
360 Sp,
361 /// Zero register (64-bit context).
362 Xzr,
363 /// Zero register (32-bit context).
364 Wzr,
365 }
366
367 impl PhysReg {
368 pub const FP: PhysReg = PhysReg::Gp(29);
369 pub const LR: PhysReg = PhysReg::Gp(30);
370 }
371
372 /// Machine block identifier.
373 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
374 pub struct MBlockId(pub u32);
375
376 /// A machine instruction.
377 #[derive(Debug, Clone)]
378 pub struct MachineInst {
379 pub opcode: ArmOpcode,
380 pub operands: Vec<MachineOperand>,
381 /// Virtual register defined by this instruction (if any).
382 pub def: Option<VRegId>,
383 }
384
385 /// A machine basic block.
386 #[derive(Debug, Clone)]
387 pub struct MachineBlock {
388 pub id: MBlockId,
389 pub label: String,
390 pub insts: Vec<MachineInst>,
391 }
392
393 impl MachineBlock {
394 pub fn new(id: MBlockId, label: String) -> Self {
395 Self {
396 id,
397 label,
398 insts: Vec::new(),
399 }
400 }
401 }
402
403 /// Constant pool entry.
404 #[derive(Debug, Clone)]
405 pub enum ConstPoolEntry {
406 F32(f32),
407 F64(f64),
408 I64(i64),
409 Bytes(Vec<u8>),
410 }
411
412 /// Stack frame layout.
413 #[derive(Debug, Clone)]
414 pub struct StackFrame {
415 /// Slots for local variables (name → offset from FP).
416 pub locals: Vec<FrameSlot>,
417 /// Total frame size in bytes (16-byte aligned).
418 pub size: u32,
419 /// Offset of the next available local slot.
420 next_offset: i32,
421 /// Maximum outgoing stack argument area reserved at the bottom of the frame.
422 outgoing_arg_size: u32,
423 }
424
425 /// A stack frame slot.
426 #[derive(Debug, Clone)]
427 pub struct FrameSlot {
428 pub offset: i32, // negative offset from FP
429 pub size: u32, // size in bytes
430 }
431
432 impl StackFrame {
433 pub fn new() -> Self {
434 // Apple ARM64 frame layout:
435 // FP points at saved FP/LR (top of frame).
436 // Locals are at negative offsets from FP.
437 // SP is at the bottom of the frame.
438 //
439 // [FP+0] = saved x29
440 // [FP+8] = saved x30
441 // [FP-8] = first local
442 // [FP-16] = second local
443 // ...
444 // [SP] = bottom of frame
445 //
446 // Prologue: sub sp, sp, #FRAME_SIZE
447 // stp x29, x30, [sp, #FRAME_SIZE - 16]
448 // add x29, sp, #FRAME_SIZE - 16
449 // Epilogue: ldp x29, x30, [sp, #FRAME_SIZE - 16]
450 // add sp, sp, #FRAME_SIZE
451 // ret
452 Self {
453 locals: Vec::new(),
454 size: 16,
455 next_offset: 0,
456 outgoing_arg_size: 0,
457 }
458 }
459 }
460
461 impl Default for StackFrame {
462 fn default() -> Self {
463 Self::new()
464 }
465 }
466
467 impl StackFrame {
468 /// Allocate a local variable slot. Returns a negative offset from FP.
469 /// Locals grow downward from FP: first local at [FP-8], etc.
470 ///
471 /// Alignment ladders 4 → 8 → 16 by size. The 16 case matters for
472 /// 128-bit NEON vector spills — Apple Silicon's `LDR Q` / `STR Q`
473 /// require 16-byte alignment; an 8-byte cap silently produces
474 /// addresses that may fault on slow paths.
475 pub fn alloc_local(&mut self, size: u32) -> i32 {
476 let align = if size >= 16 {
477 16i32
478 } else if size >= 8 {
479 8
480 } else {
481 4
482 };
483 self.next_offset += size as i32;
484 self.next_offset = (self.next_offset + align - 1) & !(align - 1);
485 let offset = -self.next_offset; // negative from FP
486 self.locals.push(FrameSlot { offset, size });
487 self.recompute_size();
488 offset
489 }
490
491 /// Frame size = 16 (FP+LR) + locals, 16-byte aligned.
492 fn recompute_size(&mut self) {
493 let raw = 16 + self.next_offset as u32 + self.outgoing_arg_size;
494 self.size = (raw + 15) & !15;
495 }
496
497 /// Reserve the maximum outgoing stack argument area this function needs.
498 pub fn reserve_outgoing_args(&mut self, size: u32) {
499 if size > self.outgoing_arg_size {
500 self.outgoing_arg_size = size;
501 self.recompute_size();
502 }
503 }
504 }
505
506 /// A machine function — the codegen output for one IR function.
507 #[derive(Debug, Clone)]
508 pub struct MachineFunction {
509 pub name: String,
510 pub blocks: Vec<MachineBlock>,
511 pub frame: StackFrame,
512 pub vregs: Vec<VReg>,
513 pub const_pool: Vec<ConstPoolEntry>,
514 pub internal_only: bool,
515 next_vreg: u32,
516 next_block: u32,
517 }
518
519 impl MachineFunction {
520 pub fn new(name: String) -> Self {
521 let entry = MachineBlock::new(MBlockId(0), format!("_{}", name));
522 Self {
523 name,
524 blocks: vec![entry],
525 frame: StackFrame::new(),
526 vregs: Vec::new(),
527 const_pool: Vec::new(),
528 internal_only: false,
529 next_vreg: 0,
530 next_block: 1,
531 }
532 }
533
534 /// Allocate a new virtual register.
535 pub fn new_vreg(&mut self, class: RegClass) -> VRegId {
536 let id = VRegId(self.next_vreg);
537 self.next_vreg += 1;
538 self.vregs.push(VReg { id, class });
539 id
540 }
541
542 /// Create a new machine block.
543 pub fn new_block(&mut self, label: &str) -> MBlockId {
544 let id = MBlockId(self.next_block);
545 self.next_block += 1;
546 self.blocks.push(MachineBlock::new(id, label.into()));
547 id
548 }
549
550 /// Allocate a fresh block-id without inserting a block. The
551 /// caller is responsible for placing the block at the right
552 /// position in `self.blocks`. Used by passes that need physical
553 /// block adjacency (e.g. branch relaxation, which inserts a
554 /// skip block immediately after the source block).
555 pub fn next_block_id(&mut self) -> u32 {
556 let id = self.next_block;
557 self.next_block += 1;
558 id
559 }
560
561 /// Get a block by ID.
562 pub fn block(&self, id: MBlockId) -> &MachineBlock {
563 self.blocks
564 .iter()
565 .find(|b| b.id == id)
566 .expect("machine block not found")
567 }
568
569 /// Get a mutable block by ID.
570 pub fn block_mut(&mut self, id: MBlockId) -> &mut MachineBlock {
571 self.blocks
572 .iter_mut()
573 .find(|b| b.id == id)
574 .expect("machine block not found")
575 }
576
577 /// Add a constant pool entry, return its index.
578 pub fn add_const(&mut self, entry: ConstPoolEntry) -> u32 {
579 let idx = self.const_pool.len() as u32;
580 self.const_pool.push(entry);
581 idx
582 }
583
584 /// Allocate a local stack slot.
585 pub fn alloc_local(&mut self, size: u32) -> i32 {
586 self.frame.alloc_local(size)
587 }
588
589 /// Reserve outgoing stack argument space for calls made by this function.
590 pub fn reserve_outgoing_args(&mut self, size: u32) {
591 self.frame.reserve_outgoing_args(size)
592 }
593 }
594
595 #[cfg(test)]
596 mod tests {
597 use super::*;
598
599 #[test]
600 fn stack_frame_alignment() {
601 let mut frame = StackFrame::new();
602 frame.alloc_local(4); // i32
603 assert_eq!(
604 frame.size % 16,
605 0,
606 "frame size {} not 16-byte aligned",
607 frame.size
608 );
609
610 frame.alloc_local(8); // i64
611 assert_eq!(frame.size % 16, 0);
612
613 frame.alloc_local(1); // bool
614 assert_eq!(frame.size % 16, 0);
615 }
616
617 #[test]
618 fn stack_slots_dont_overlap() {
619 let mut frame = StackFrame::new();
620 let off1 = frame.alloc_local(4);
621 let off2 = frame.alloc_local(4);
622 let off3 = frame.alloc_local(8);
623 assert_ne!(off1, off2);
624 assert_ne!(off2, off3);
625 // All offsets should be negative (below FP).
626 assert!(off1 < 0);
627 assert!(off2 < 0);
628 assert!(off3 < 0);
629 // No overlap: each slot's range is [offset, offset+size).
630 assert!(off2 + 4 <= off1 || off1 + 4 <= off2);
631 }
632
633 #[test]
634 fn vreg_allocation() {
635 let mut mf = MachineFunction::new("test".into());
636 let v0 = mf.new_vreg(RegClass::Gp64);
637 let v1 = mf.new_vreg(RegClass::Fp64);
638 assert_eq!(v0, VRegId(0));
639 assert_eq!(v1, VRegId(1));
640 assert_eq!(mf.vregs.len(), 2);
641 assert_eq!(mf.vregs[0].class, RegClass::Gp64);
642 assert_eq!(mf.vregs[1].class, RegClass::Fp64);
643 }
644
645 #[test]
646 fn const_pool() {
647 let mut mf = MachineFunction::new("test".into());
648 let idx0 = mf.add_const(ConstPoolEntry::F64(3.14));
649 let idx1 = mf.add_const(ConstPoolEntry::F32(2.0));
650 assert_eq!(idx0, 0);
651 assert_eq!(idx1, 1);
652 }
653
654 #[test]
655 fn frame_size_starts_at_16() {
656 let frame = StackFrame::new();
657 assert_eq!(frame.size, 16); // just FP+LR
658 }
659
660 #[test]
661 fn reserve_outgoing_args_grows_frame() {
662 let mut frame = StackFrame::new();
663 frame.alloc_local(8);
664 let before = frame.size;
665 frame.reserve_outgoing_args(16);
666 assert!(frame.size >= before + 16);
667 assert_eq!(frame.size % 16, 0);
668 }
669 }
670