Rust · 72410 bytes Raw Blame History
1 //! Assembly text emission — converts Machine IR to ARM64 assembly text.
2 //!
3 //! Produces output compatible with both afs-as and Apple's system assembler.
4
5 use super::mir::*;
6 use std::fmt::Write;
7
8 fn split_i128_words(value: i128) -> (u64, u64) {
9 let bits = value as u128;
10 (bits as u64, (bits >> 64) as u64)
11 }
12
13 fn emit_i128_words(out: &mut String, value: i128) {
14 let (lo, hi) = split_i128_words(value);
15 writeln!(out, " .quad 0x{:016x}", lo).unwrap();
16 writeln!(out, " .quad 0x{:016x}", hi).unwrap();
17 }
18
19 fn emit_byte_values(out: &mut String, bytes: &[u8]) {
20 if bytes.is_empty() {
21 return;
22 }
23 let joined = bytes
24 .iter()
25 .map(|b| b.to_string())
26 .collect::<Vec<_>>()
27 .join(", ");
28 writeln!(out, " .byte {}", joined).unwrap();
29 }
30
31 fn byte_array_align_log2(byte_count: u64) -> u8 {
32 if byte_count >= 8 {
33 3
34 } else if byte_count >= 4 {
35 2
36 } else if byte_count >= 2 {
37 1
38 } else {
39 0
40 }
41 }
42
43 /// Emit module-level globals as a `.section __DATA,__data` block.
44 /// Each global gets a label and a directive matching its type
45 /// (`.long`, `.quad`, `.single`, `.double`, etc.) plus the
46 /// initializer value. Zero-initialized globals still emit an
47 /// explicit zero so the symbol resolves at link time.
48 ///
49 /// Array-typed globals: the IR type is `Array<i8, byte_size>` so
50 /// the element count isn't directly recoverable from the type.
51 /// The caller must use `IntArray`/`FloatArray` initializers that
52 /// carry the element count explicitly. Zero-initialized arrays
53 /// fall back to `.space byte_size`.
54 ///
55 /// Module globals (`afs_mod_*` and `afs_common_*`) are emitted as
56 /// `.globl` so other translation units can reference them via USE.
57 /// Non-module globals (SAVE-promoted locals) stay `.private_extern`
58 /// to prevent cross-TU collisions (audit Maj-1).
59 pub fn emit_globals(globals: &[crate::ir::inst::Global]) -> String {
60 use crate::ir::inst::GlobalInit;
61 use crate::ir::types::{FloatWidth, IntWidth, IrType};
62
63 let mut out = String::new();
64 if globals.is_empty() {
65 return out;
66 }
67
68 writeln!(out, ".section __DATA,__data").unwrap();
69 for g in globals {
70 let symbol = if g.name.starts_with('_') {
71 g.name.clone()
72 } else {
73 format!("_{}", g.name)
74 };
75 // Module globals need external linkage for multi-file.
76 let is_module_global = g.name.starts_with("afs_mod_") || g.name.starts_with("afs_common_");
77 if is_module_global {
78 writeln!(out, ".globl {}", symbol).unwrap();
79 } else {
80 writeln!(out, ".private_extern {}", symbol).unwrap();
81 }
82
83 // Array globals carry `Array<elem_ty, count>`. Pick the
84 // directive from the element type so `.long` / `.quad` /
85 // `.single` / `.double` all work correctly.
86 if let IrType::Array(elem_ty, count) = &g.ty {
87 let (align, directive, _elem_bytes, is_float) = match elem_ty.as_ref() {
88 IrType::Int(IntWidth::I8) | IrType::Bool => {
89 (byte_array_align_log2(*count), ".byte", 1, false)
90 }
91 IrType::Int(IntWidth::I16) => (1, ".short", 2, false),
92 IrType::Int(IntWidth::I32) => (2, ".long", 4, false),
93 IrType::Int(IntWidth::I64) => (3, ".quad", 8, false),
94 IrType::Int(IntWidth::I128) => (4, ".quad", 16, false),
95 IrType::Float(FloatWidth::F32) => (2, ".single", 4, true),
96 IrType::Float(FloatWidth::F64) => (3, ".double", 8, true),
97 _ => (3, ".quad", 8, false),
98 };
99 if align > 0 {
100 writeln!(out, ".p2align {}", align).unwrap();
101 }
102 writeln!(out, "{}:", symbol).unwrap();
103 match &g.initializer {
104 Some(GlobalInit::IntArray(vs))
105 if matches!(elem_ty.as_ref(), IrType::Int(IntWidth::I128)) =>
106 {
107 for v in vs {
108 emit_i128_words(&mut out, *v);
109 }
110 }
111 Some(GlobalInit::IntArray(vs)) if !is_float => {
112 for v in vs {
113 writeln!(out, " {} {}", directive, v).unwrap();
114 }
115 }
116 Some(GlobalInit::FloatArray(vs)) if is_float => {
117 for v in vs {
118 writeln!(out, " {} {}", directive, v).unwrap();
119 }
120 }
121 Some(GlobalInit::String(bytes)) => {
122 emit_byte_values(&mut out, bytes);
123 let total_bytes = g.ty.size_bytes() as usize;
124 if bytes.len() < total_bytes {
125 writeln!(out, " .space {}", total_bytes - bytes.len()).unwrap();
126 }
127 }
128 _ => {
129 // Nested arrays (for example arrays of byte-packed derived
130 // values) don't have a scalar element directive. Emit their
131 // zero-initialized storage using the full IR type size
132 // instead of falling back to a bogus ".quad * count" size.
133 let byte_size = g.ty.size_bytes();
134 writeln!(out, " .space {}", byte_size).unwrap();
135 }
136 }
137 continue;
138 }
139
140 if matches!(g.ty, IrType::Int(IntWidth::I128)) {
141 writeln!(out, ".p2align 4").unwrap();
142 writeln!(out, "{}:", symbol).unwrap();
143 match &g.initializer {
144 Some(GlobalInit::Int(v)) => emit_i128_words(&mut out, *v),
145 Some(GlobalInit::Zero) | None => emit_i128_words(&mut out, 0),
146 _ => writeln!(out, " .space 16").unwrap(),
147 }
148 continue;
149 }
150
151 // Scalar globals: pick alignment + storage directive.
152 // Audit Med-5: NaN/Inf must round-trip portably across
153 // assemblers. Apple's `as` accepts `.single NaN` but GNU
154 // binutils does not. Emit non-finite floats as their
155 // bit-pattern via `.long` / `.quad` so the same .s file
156 // assembles cleanly on both.
157 let is_nonfinite_float = matches!(
158 (&g.ty, &g.initializer),
159 (IrType::Float(_), Some(GlobalInit::Float(v))) if !v.is_finite()
160 );
161 let (align, directive, default_zero) = if is_nonfinite_float {
162 match &g.ty {
163 IrType::Float(FloatWidth::F32) => (2, ".long", "0"),
164 _ => (3, ".quad", "0"),
165 }
166 } else {
167 match &g.ty {
168 IrType::Int(IntWidth::I8) | IrType::Bool => (0, ".byte", "0"),
169 IrType::Int(IntWidth::I16) => (1, ".short", "0"),
170 IrType::Int(IntWidth::I32) => (2, ".long", "0"),
171 IrType::Int(IntWidth::I64) => (3, ".quad", "0"),
172 IrType::Float(FloatWidth::F32) => (2, ".single", "0.0"),
173 IrType::Float(FloatWidth::F64) => (3, ".double", "0.0"),
174 _ => (3, ".quad", "0"), // pointers and aggregates: 8-byte slot
175 }
176 };
177 if align > 0 {
178 writeln!(out, ".p2align {}", align).unwrap();
179 }
180 writeln!(out, "{}:", symbol).unwrap();
181 let value = match &g.initializer {
182 Some(GlobalInit::Int(v)) => v.to_string(),
183 Some(GlobalInit::Float(v)) => {
184 if v.is_finite() {
185 format!("{}", v)
186 } else {
187 // Bit-pattern emission for NaN / ±Inf.
188 match &g.ty {
189 IrType::Float(FloatWidth::F32) => {
190 format!("0x{:08x}", (*v as f32).to_bits())
191 }
192 _ => format!("0x{:016x}", v.to_bits()),
193 }
194 }
195 }
196 Some(GlobalInit::Zero) | None => default_zero.into(),
197 Some(GlobalInit::String(bytes))
198 if matches!(g.ty, IrType::Int(IntWidth::I8) | IrType::Bool) =>
199 {
200 bytes.first().copied().unwrap_or(0).to_string()
201 }
202 Some(GlobalInit::String(_)) => default_zero.into(),
203 Some(GlobalInit::IntArray(_)) | Some(GlobalInit::FloatArray(_)) => {
204 // Array initializer on a scalar-typed global —
205 // shouldn't happen, but emit zero as a safe fallback.
206 default_zero.into()
207 }
208 };
209 writeln!(out, " {} {}", directive, value).unwrap();
210 }
211 out
212 }
213
214 /// Emit a machine function as ARM64 assembly text.
215 pub fn emit_function(mf: &MachineFunction) -> String {
216 let mut out = String::new();
217
218 // Function directive.
219 if mf.internal_only {
220 writeln!(out, ".private_extern _{}", mf.name).unwrap();
221 } else {
222 writeln!(out, ".globl _{}", mf.name).unwrap();
223 }
224 writeln!(out, ".p2align 2").unwrap();
225 writeln!(out, "_{}:", mf.name).unwrap();
226
227 for block in &mf.blocks {
228 // Don't re-emit entry label (it's the function label).
229 if block.id != MBlockId(0) {
230 writeln!(out, "{}:", block.label).unwrap();
231 }
232
233 for inst in &block.insts {
234 writeln!(out, " {}", emit_inst(inst, mf)).unwrap();
235 }
236 }
237
238 // Constant pool.
239 if !mf.const_pool.is_empty() {
240 writeln!(out).unwrap();
241 writeln!(out, ".section __DATA,__const").unwrap();
242 for (i, entry) in mf.const_pool.iter().enumerate() {
243 let label = const_pool_label(&mf.name, i as u32);
244 match entry {
245 ConstPoolEntry::F32(v) => {
246 writeln!(out, ".p2align 2").unwrap();
247 writeln!(out, "{}:", label).unwrap();
248 // Emit as hex integer to avoid decimal expansion issues
249 // with large/small floats that the assembler can't parse.
250 writeln!(out, " .long 0x{:08x}", v.to_bits()).unwrap();
251 }
252 ConstPoolEntry::F64(v) => {
253 writeln!(out, ".p2align 3").unwrap();
254 writeln!(out, "{}:", label).unwrap();
255 writeln!(out, " .quad 0x{:016x}", v.to_bits()).unwrap();
256 }
257 ConstPoolEntry::I64(v) => {
258 writeln!(out, ".p2align 3").unwrap();
259 writeln!(out, "{}:", label).unwrap();
260 writeln!(out, " .quad {}", v).unwrap();
261 }
262 ConstPoolEntry::Bytes(b) => {
263 writeln!(out, ".p2align 3").unwrap();
264 writeln!(out, "{}:", label).unwrap();
265 write!(out, " .ascii \"").unwrap();
266 for &byte in b {
267 match byte {
268 b'\\' => write!(out, "\\\\").unwrap(),
269 b'"' => write!(out, "\\\"").unwrap(),
270 b'\n' => write!(out, "\\n").unwrap(),
271 b'\t' => write!(out, "\\t").unwrap(),
272 b if b.is_ascii_graphic() || b == b' ' => {
273 write!(out, "{}", b as char).unwrap();
274 }
275 b => write!(out, "\\x{:02x}", b).unwrap(),
276 }
277 }
278 writeln!(out, "\"").unwrap();
279 }
280 }
281 }
282 }
283
284 out
285 }
286
287 /// Format `OP sp, sp, #N` (or `add x29, sp, #N`), falling back
288 /// to a 2-3 instruction synthesized sequence via the AAPCS64
289 /// scratch register x16 (IP0) when N exceeds the 12-bit
290 /// immediate range. x16 is free in the prologue/epilogue per
291 /// AAPCS64 — it has no caller-saved value at function entry
292 /// and can be clobbered before/after the FP/LR save.
293 ///
294 /// Audit6 BLOCKING-5 (related to BLOCKING-4): functions whose
295 /// frame size exceeds 4095 bytes used to emit raw
296 /// `sub sp, sp, #4144` and the assembler rejected the
297 /// immediate. This came up after audit6 BLOCKING-4 added
298 /// per-allocate descriptor buffers, but it's a latent bug that
299 /// any large-frame function would hit.
300 fn fmt_sp_imm(op: &str, dest: &str, base: &str, n: i64) -> String {
301 if (0..=4095).contains(&n) {
302 return format!("{} {}, {}, #{}", op, dest, base, n);
303 }
304 // Synthesize the immediate in x16 then use the register form.
305 let lo = n & 0xFFFF;
306 let hi = (n >> 16) & 0xFFFF;
307 let mov = if hi == 0 {
308 format!("movz x16, #{}", lo)
309 } else {
310 format!("movz x16, #{}\n movk x16, #{}, lsl #16", lo, hi)
311 };
312 format!("{}\n {} {}, {}, x16", mov, op, dest, base)
313 }
314
315 fn fmt_stack_alloc(frame_size: i64) -> String {
316 // Apple Silicon uses large guard pages, so jumping the stack pointer
317 // down by a huge frame in one shot can skip the guard and fault on the
318 // first real touch. Probe the stack one chunk at a time for large
319 // frames to keep growth fault-safe.
320 const STACK_PROBE_STRIDE: i64 = 16 * 1024;
321
322 if frame_size <= STACK_PROBE_STRIDE {
323 return fmt_sp_imm("sub", "sp", "sp", frame_size);
324 }
325
326 let mut lines = Vec::new();
327 let mut remaining = frame_size;
328 while remaining > 0 {
329 let step = remaining.min(STACK_PROBE_STRIDE);
330 lines.push(fmt_sp_imm("sub", "sp", "sp", step));
331 lines.push("str xzr, [sp]".to_string());
332 remaining -= step;
333 }
334 lines.join("\n ")
335 }
336
337 fn fmt_u64_imm(reg: &str, value: u64) -> String {
338 let mut parts = Vec::new();
339 for shift in [0u32, 16, 32, 48] {
340 let chunk = ((value >> shift) & 0xFFFF) as u16;
341 if chunk == 0 && !parts.is_empty() {
342 continue;
343 }
344 if parts.is_empty() {
345 parts.push(format!("movz {}, #{}", reg, chunk));
346 } else {
347 parts.push(format!("movk {}, #{}, lsl #{}", reg, chunk, shift));
348 }
349 }
350 if parts.is_empty() {
351 format!("movz {}, #0", reg)
352 } else {
353 parts.join("\n ")
354 }
355 }
356
357 fn fmt_addr_with_offset(dest: &str, base: &str, offset: i64, scratch: &str) -> String {
358 if offset == 0 {
359 return format!("mov {}, {}", dest, base);
360 }
361
362 if (0..=4095).contains(&offset) {
363 return format!("add {}, {}, #{}", dest, base, offset);
364 }
365 if (-4095..=-1).contains(&offset) {
366 return format!("sub {}, {}, #{}", dest, base, -offset);
367 }
368
369 let imm = fmt_u64_imm(scratch, offset.unsigned_abs());
370 let op = if offset.is_negative() { "sub" } else { "add" };
371 format!("{}\n {} {}, {}, {}", imm, op, dest, base, scratch)
372 }
373
374 /// Emit a single machine instruction as assembly text. Public so the
375 /// branch-relaxation pass can count emit-time instruction bytes
376 /// directly rather than re-deriving each opcode's expansion rules.
377 pub fn emit_inst_text(inst: &MachineInst, mf: &MachineFunction) -> String {
378 emit_inst(inst, mf)
379 }
380
381 /// Emit a single machine instruction as assembly text.
382 fn emit_inst(inst: &MachineInst, mf: &MachineFunction) -> String {
383 match inst.opcode {
384 ArmOpcode::AddReg => format!(
385 "add {}, {}, {}",
386 op_str(&inst.operands[0]),
387 op_str(&inst.operands[1]),
388 op_str(&inst.operands[2])
389 ),
390 ArmOpcode::AddsReg => format!(
391 "adds {}, {}, {}",
392 op_str(&inst.operands[0]),
393 op_str(&inst.operands[1]),
394 op_str(&inst.operands[2])
395 ),
396 ArmOpcode::AdcReg => format!(
397 "adc {}, {}, {}",
398 op_str(&inst.operands[0]),
399 op_str(&inst.operands[1]),
400 op_str(&inst.operands[2])
401 ),
402 ArmOpcode::AddImm => {
403 let dest = op_str(&inst.operands[0]);
404 let base = op_str(&inst.operands[1]);
405 let imm: i64 = match &inst.operands[2] {
406 MachineOperand::FrameSlot(off) => *off as i64,
407 MachineOperand::Imm(-1) => {
408 // Sentinel: prologue FP setup → frame_size - 16
409 mf.frame.size.saturating_sub(16) as i64
410 }
411 MachineOperand::Imm(v) => *v,
412 _ => return format!("add {}, {}, {}", dest, base, op_str(&inst.operands[2])),
413 };
414 // Both `add x29, sp, #N` (FP setup) and `add Xd, Xn, #N`
415 // need the > 4095 fallback. Use the same scratch
416 // synthesis since x16 is safe in the prologue.
417 fmt_sp_imm("add", &dest, &base, imm)
418 }
419 ArmOpcode::SubReg => format!(
420 "sub {}, {}, {}",
421 op_str(&inst.operands[0]),
422 op_str(&inst.operands[1]),
423 op_str(&inst.operands[2])
424 ),
425 ArmOpcode::SubsReg => format!(
426 "subs {}, {}, {}",
427 op_str(&inst.operands[0]),
428 op_str(&inst.operands[1]),
429 op_str(&inst.operands[2])
430 ),
431 ArmOpcode::SbcReg => format!(
432 "sbc {}, {}, {}",
433 op_str(&inst.operands[0]),
434 op_str(&inst.operands[1]),
435 op_str(&inst.operands[2])
436 ),
437 ArmOpcode::SubImm => {
438 let imm: i64 = match &inst.operands[2] {
439 MachineOperand::Imm(-1) => {
440 // Sentinel: epilogue SP restore → frame_size - 16
441 mf.frame.size.saturating_sub(16) as i64
442 }
443 MachineOperand::Imm(v) => *v,
444 _ => 0,
445 };
446 let dest = op_str(&inst.operands[0]);
447 let base = op_str(&inst.operands[1]);
448 fmt_sp_imm("sub", &dest, &base, imm)
449 }
450 ArmOpcode::Mul => format!(
451 "mul {}, {}, {}",
452 op_str(&inst.operands[0]),
453 op_str(&inst.operands[1]),
454 op_str(&inst.operands[2])
455 ),
456 ArmOpcode::Sdiv => format!(
457 "sdiv {}, {}, {}",
458 op_str(&inst.operands[0]),
459 op_str(&inst.operands[1]),
460 op_str(&inst.operands[2])
461 ),
462 ArmOpcode::Madd => format!(
463 "madd {}, {}, {}, {}",
464 op_str(&inst.operands[0]),
465 op_str(&inst.operands[1]),
466 op_str(&inst.operands[2]),
467 op_str(&inst.operands[3])
468 ),
469 ArmOpcode::Msub => format!(
470 "msub {}, {}, {}, {}",
471 op_str(&inst.operands[0]),
472 op_str(&inst.operands[1]),
473 op_str(&inst.operands[2]),
474 op_str(&inst.operands[3])
475 ),
476 ArmOpcode::Neg => format!(
477 "neg {}, {}",
478 op_str(&inst.operands[0]),
479 op_str(&inst.operands[1])
480 ),
481
482 ArmOpcode::AndReg => format!(
483 "and {}, {}, {}",
484 op_str(&inst.operands[0]),
485 op_str(&inst.operands[1]),
486 op_str(&inst.operands[2])
487 ),
488 ArmOpcode::OrrReg => format!(
489 "orr {}, {}, {}",
490 op_str(&inst.operands[0]),
491 op_str(&inst.operands[1]),
492 op_str(&inst.operands[2])
493 ),
494 ArmOpcode::EorReg => format!(
495 "eor {}, {}, {}",
496 op_str(&inst.operands[0]),
497 op_str(&inst.operands[1]),
498 op_str(&inst.operands[2])
499 ),
500 ArmOpcode::OrnReg => format!(
501 "orn {}, {}, {}",
502 op_str(&inst.operands[0]),
503 op_str(&inst.operands[1]),
504 op_str(&inst.operands[2])
505 ),
506 ArmOpcode::LslReg => format!(
507 "lsl {}, {}, {}",
508 op_str(&inst.operands[0]),
509 op_str(&inst.operands[1]),
510 op_str(&inst.operands[2])
511 ),
512 ArmOpcode::LsrReg => format!(
513 "lsr {}, {}, {}",
514 op_str(&inst.operands[0]),
515 op_str(&inst.operands[1]),
516 op_str(&inst.operands[2])
517 ),
518 ArmOpcode::AsrReg => format!(
519 "asr {}, {}, {}",
520 op_str(&inst.operands[0]),
521 op_str(&inst.operands[1]),
522 op_str(&inst.operands[2])
523 ),
524
525 ArmOpcode::Mvn => format!(
526 "mvn {}, {}",
527 op_str(&inst.operands[0]),
528 op_str(&inst.operands[1])
529 ),
530 ArmOpcode::Clz => format!(
531 "clz {}, {}",
532 op_str(&inst.operands[0]),
533 op_str(&inst.operands[1])
534 ),
535 ArmOpcode::Rbit => format!(
536 "rbit {}, {}",
537 op_str(&inst.operands[0]),
538 op_str(&inst.operands[1])
539 ),
540
541 ArmOpcode::CmpReg => format!(
542 "cmp {}, {}",
543 op_str(&inst.operands[0]),
544 op_str(&inst.operands[1])
545 ),
546 ArmOpcode::CmpImm => format!(
547 "cmp {}, #{}",
548 op_str(&inst.operands[0]),
549 if let MachineOperand::Imm(v) = &inst.operands[1] {
550 *v
551 } else {
552 0
553 }
554 ),
555 ArmOpcode::Cset | ArmOpcode::FCset => {
556 let cond = if let MachineOperand::Cond(c) = &inst.operands[1] {
557 cond_str(*c)
558 } else {
559 "eq"
560 };
561 format!("cset {}, {}", op_str(&inst.operands[0]), cond)
562 }
563 ArmOpcode::CselReg => {
564 let cond = if let MachineOperand::Cond(c) = &inst.operands[3] {
565 cond_str(*c)
566 } else {
567 "eq"
568 };
569 format!(
570 "csel {}, {}, {}, {}",
571 op_str(&inst.operands[0]),
572 op_str(&inst.operands[1]),
573 op_str(&inst.operands[2]),
574 cond
575 )
576 }
577 ArmOpcode::FCmpReg => format!(
578 "fcmp {}, {}",
579 op_str(&inst.operands[0]),
580 op_str(&inst.operands[1])
581 ),
582 ArmOpcode::FcselReg => {
583 let cond = if let MachineOperand::Cond(c) = &inst.operands[3] {
584 cond_str(*c)
585 } else {
586 "eq"
587 };
588 format!(
589 "fcsel {}, {}, {}, {}",
590 op_str(&inst.operands[0]),
591 op_str(&inst.operands[1]),
592 op_str(&inst.operands[2]),
593 cond
594 )
595 }
596
597 ArmOpcode::FaddS | ArmOpcode::FaddD => format!(
598 "fadd {}, {}, {}",
599 op_str(&inst.operands[0]),
600 op_str(&inst.operands[1]),
601 op_str(&inst.operands[2])
602 ),
603 ArmOpcode::FsubS | ArmOpcode::FsubD => format!(
604 "fsub {}, {}, {}",
605 op_str(&inst.operands[0]),
606 op_str(&inst.operands[1]),
607 op_str(&inst.operands[2])
608 ),
609 ArmOpcode::FmulS | ArmOpcode::FmulD => format!(
610 "fmul {}, {}, {}",
611 op_str(&inst.operands[0]),
612 op_str(&inst.operands[1]),
613 op_str(&inst.operands[2])
614 ),
615 ArmOpcode::FdivS | ArmOpcode::FdivD => format!(
616 "fdiv {}, {}, {}",
617 op_str(&inst.operands[0]),
618 op_str(&inst.operands[1]),
619 op_str(&inst.operands[2])
620 ),
621 ArmOpcode::FnegS | ArmOpcode::FnegD => format!(
622 "fneg {}, {}",
623 op_str(&inst.operands[0]),
624 op_str(&inst.operands[1])
625 ),
626 ArmOpcode::FabsS | ArmOpcode::FabsD => format!(
627 "fabs {}, {}",
628 op_str(&inst.operands[0]),
629 op_str(&inst.operands[1])
630 ),
631 ArmOpcode::FsqrtS | ArmOpcode::FsqrtD => format!(
632 "fsqrt {}, {}",
633 op_str(&inst.operands[0]),
634 op_str(&inst.operands[1])
635 ),
636 // Fused multiply-add/subtract: 4-operand (dest, Sn, Sm, Sa).
637 // FMADD Sd, Sn, Sm, Sa → Sd = Sa + Sn*Sm
638 // FMSUB Sd, Sn, Sm, Sa → Sd = Sa - Sn*Sm
639 // FNMSUB Sd, Sn, Sm, Sa → Sd = Sn*Sm - Sa
640 ArmOpcode::FmaddS | ArmOpcode::FmaddD => format!(
641 "fmadd {}, {}, {}, {}",
642 op_str(&inst.operands[0]),
643 op_str(&inst.operands[1]),
644 op_str(&inst.operands[2]),
645 op_str(&inst.operands[3])
646 ),
647 ArmOpcode::FmsubS | ArmOpcode::FmsubD => format!(
648 "fmsub {}, {}, {}, {}",
649 op_str(&inst.operands[0]),
650 op_str(&inst.operands[1]),
651 op_str(&inst.operands[2]),
652 op_str(&inst.operands[3])
653 ),
654 ArmOpcode::FnmsubS | ArmOpcode::FnmsubD => format!(
655 "fnmsub {}, {}, {}, {}",
656 op_str(&inst.operands[0]),
657 op_str(&inst.operands[1]),
658 op_str(&inst.operands[2]),
659 op_str(&inst.operands[3])
660 ),
661
662 ArmOpcode::ScvtfSW | ArmOpcode::ScvtfDW | ArmOpcode::ScvtfSX | ArmOpcode::ScvtfDX => {
663 format!(
664 "scvtf {}, {}",
665 op_str(&inst.operands[0]),
666 op_str(&inst.operands[1])
667 )
668 }
669 ArmOpcode::FcvtzsWS | ArmOpcode::FcvtzsWD | ArmOpcode::FcvtzsXS | ArmOpcode::FcvtzsXD => {
670 format!(
671 "fcvtzs {}, {}",
672 op_str(&inst.operands[0]),
673 op_str(&inst.operands[1])
674 )
675 }
676 ArmOpcode::FcvtSD => format!(
677 "fcvt {}, {}",
678 fp_reg_str(&inst.operands[0], false),
679 fp_reg_str(&inst.operands[1], true)
680 ),
681 ArmOpcode::FcvtDS => format!(
682 "fcvt {}, {}",
683 fp_reg_str(&inst.operands[0], true),
684 fp_reg_str(&inst.operands[1], false)
685 ),
686
687 ArmOpcode::Movz => {
688 let imm = if let MachineOperand::Imm(v) = &inst.operands[1] {
689 *v
690 } else {
691 0
692 };
693 let shift = if let MachineOperand::Shift(s) = &inst.operands[2] {
694 *s
695 } else {
696 0
697 };
698 if shift == 0 {
699 format!("movz {}, #{}", op_str(&inst.operands[0]), imm)
700 } else {
701 format!(
702 "movz {}, #{}, lsl #{}",
703 op_str(&inst.operands[0]),
704 imm,
705 shift
706 )
707 }
708 }
709 ArmOpcode::Movk => {
710 let imm = if let MachineOperand::Imm(v) = &inst.operands[1] {
711 *v
712 } else {
713 0
714 };
715 let shift = if let MachineOperand::Shift(s) = &inst.operands[2] {
716 *s
717 } else {
718 0
719 };
720 format!(
721 "movk {}, #{}, lsl #{}",
722 op_str(&inst.operands[0]),
723 imm,
724 shift
725 )
726 }
727 ArmOpcode::Movn => {
728 let imm = if let MachineOperand::Imm(v) = &inst.operands[1] {
729 *v
730 } else {
731 0
732 };
733 let shift = if let MachineOperand::Shift(s) = &inst.operands[2] {
734 *s
735 } else {
736 0
737 };
738 format!(
739 "movn {}, #{}, lsl #{}",
740 op_str(&inst.operands[0]),
741 imm,
742 shift
743 )
744 }
745 ArmOpcode::MovReg => {
746 let dest = op_str(&inst.operands[0]);
747 let src = op_str(&inst.operands[1]);
748 // Handle width mismatch: w→x extend or x→w truncate.
749 let dest_is_x = dest.starts_with('x');
750 let dest_is_w = dest.starts_with('w');
751 let src_is_w = src.starts_with('w');
752 let src_is_x = src.starts_with('x');
753 // Cross-register-class move: AArch64 `mov` only encodes GP↔GP
754 // (and FP↔FP via FmovReg). When register-allocation hands us
755 // a MovReg straddling classes, emit `fmov` which transfers
756 // bits between an integer GPR and an SIMD/FP register.
757 let dest_is_gp = dest_is_x || dest_is_w;
758 let src_is_gp = src_is_x || src_is_w;
759 let dest_is_fp = dest.starts_with('s') || dest.starts_with('d');
760 let src_is_fp = src.starts_with('s') || src.starts_with('d');
761 if dest_is_gp && src_is_fp {
762 // GPR ← FPR: pick GPR width to match FPR (s→w, d→x).
763 let gp = if src.starts_with('d') {
764 if dest_is_x {
765 dest.clone()
766 } else {
767 format!("x{}", &dest[1..])
768 }
769 } else {
770 if dest_is_w {
771 dest.clone()
772 } else {
773 format!("w{}", &dest[1..])
774 }
775 };
776 return format!("fmov {}, {}", gp, src);
777 }
778 if dest_is_fp && src_is_gp {
779 let gp = if dest.starts_with('d') {
780 if src_is_x {
781 src.clone()
782 } else {
783 format!("x{}", &src[1..])
784 }
785 } else {
786 if src_is_w {
787 src.clone()
788 } else {
789 format!("w{}", &src[1..])
790 }
791 };
792 return format!("fmov {}, {}", dest, gp);
793 }
794 if dest_is_x && src_is_w {
795 // Zero-extend 32→64: use uxtw.
796 format!("uxtw {}, {}", dest, src)
797 } else if dest_is_w && src_is_x {
798 // Truncate 64→32 by reading the source register through its
799 // 32-bit view. `mov wN, xM` is not a valid AArch64 encoding.
800 format!("mov {}, w{}", dest, &src[1..])
801 } else {
802 format!("mov {}, {}", dest, src)
803 }
804 }
805 ArmOpcode::FmovReg => format!(
806 "fmov {}, {}",
807 op_str(&inst.operands[0]),
808 op_str(&inst.operands[1])
809 ),
810 ArmOpcode::Mov16B => format!(
811 "mov.16b {}, {}",
812 v_reg_bare(&inst.operands[0]),
813 v_reg_bare(&inst.operands[1]),
814 ),
815 ArmOpcode::AddpV2D => format!(
816 "addp.2d {}, {}, {}",
817 v_reg_bare(&inst.operands[0]),
818 v_reg_bare(&inst.operands[1]),
819 v_reg_bare(&inst.operands[2]),
820 ),
821 ArmOpcode::FaddpV4S => format!(
822 "faddp.4s {}, {}, {}",
823 v_reg_bare(&inst.operands[0]),
824 v_reg_bare(&inst.operands[1]),
825 v_reg_bare(&inst.operands[2]),
826 ),
827
828 ArmOpcode::LdrImm | ArmOpcode::LdrFpImm | ArmOpcode::LdrsbImm | ArmOpcode::LdrshImm => {
829 let dest = op_str(&inst.operands[0]);
830 let base = op_str(&inst.operands[1]);
831 let offset_val = match &inst.operands[2] {
832 MachineOperand::FrameSlot(off) => *off as i64,
833 MachineOperand::Imm(v) => *v,
834 _ => 0,
835 };
836 // Pick the mnemonic by opcode. LDRSB / LDRSH expect a
837 // Wt destination (sign-extended into the lower 32 bits);
838 // the dest operand is already a Gp32 vreg in those
839 // cases, so the formatted register name is `w_`.
840 let mnemonic = match inst.opcode {
841 ArmOpcode::LdrsbImm => "ldrsb",
842 ArmOpcode::LdrshImm => "ldrsh",
843 _ => "ldr",
844 };
845 if (-256..=255).contains(&offset_val) {
846 format!("{} {}, [{}, #{}]", mnemonic, dest, base, offset_val)
847 } else {
848 // Large offset: compute address in x8, then load.
849 format!(
850 "{}\n {} {}, [x8]",
851 fmt_addr_with_offset("x8", &base, offset_val, "x16"),
852 mnemonic,
853 dest
854 )
855 }
856 }
857 ArmOpcode::StrImm | ArmOpcode::StrFpImm | ArmOpcode::StrbImm | ArmOpcode::StrhImm => {
858 let src = op_str(&inst.operands[0]);
859 let base = op_str(&inst.operands[1]);
860 let offset_val = match &inst.operands[2] {
861 MachineOperand::FrameSlot(off) => *off as i64,
862 MachineOperand::Imm(v) => *v,
863 _ => 0,
864 };
865 let mnemonic = match inst.opcode {
866 ArmOpcode::StrbImm => "strb",
867 ArmOpcode::StrhImm => "strh",
868 _ => "str",
869 };
870 if (-256..=255).contains(&offset_val) {
871 format!("{} {}, [{}, #{}]", mnemonic, src, base, offset_val)
872 } else {
873 // Large offset: compute address in x8, then store.
874 format!(
875 "{}\n {} {}, [x8]",
876 fmt_addr_with_offset("x8", &base, offset_val, "x16"),
877 mnemonic,
878 src
879 )
880 }
881 }
882 // Sprint 05: scaled-register-offset addressing. Operands are
883 // [dest, base, idx, Imm(shift)]. Shift 0 elides the `, lsl
884 // #0` suffix per the assembler convention.
885 ArmOpcode::LdrReg | ArmOpcode::LdrFpReg | ArmOpcode::StrReg | ArmOpcode::StrFpReg => {
886 let dest = op_str(&inst.operands[0]);
887 let base = op_str(&inst.operands[1]);
888 let idx = op_str(&inst.operands[2]);
889 let shift = match &inst.operands[3] {
890 MachineOperand::Imm(v) => *v,
891 _ => 0,
892 };
893 let mnemonic = match inst.opcode {
894 ArmOpcode::LdrReg | ArmOpcode::LdrFpReg => "ldr",
895 ArmOpcode::StrReg | ArmOpcode::StrFpReg => "str",
896 _ => unreachable!(),
897 };
898 if shift == 0 {
899 format!("{} {}, [{}, {}]", mnemonic, dest, base, idx)
900 } else {
901 format!("{} {}, [{}, {}, lsl #{}]", mnemonic, dest, base, idx, shift)
902 }
903 }
904
905 ArmOpcode::StpPre => {
906 let frame_size = mf.frame.size as i64;
907 let stp_offset = frame_size - 16;
908 // The `sub sp, sp, #N` portion handles N > 4095 via
909 // x16 synthesis (audit6 BLOCKING-5 root cause), and
910 // probes very large frames so macOS guard pages aren't
911 // skipped in one jump. The `stp ... [sp, #stp_offset]`
912 // form is also bounded
913 // (signed 7-bit immediate * 8 = ±504 byte range), so
914 // we fall back to two `str` instructions when over.
915 // For very large frames (stp_offset > 32760, the
916 // signed 12-bit max for 64-bit ldr/str unsigned imm),
917 // we'd need a register-form load/store — not yet
918 // exercised in any test, so the panic catches it.
919 let sub_sp = fmt_stack_alloc(frame_size);
920 if stp_offset <= 504 {
921 format!("{}\n stp x29, x30, [sp, #{}]", sub_sp, stp_offset)
922 } else if stp_offset <= 32760 {
923 format!(
924 "{}\n str x29, [sp, #{}]\n str x30, [sp, #{}]",
925 sub_sp,
926 stp_offset,
927 stp_offset + 8
928 )
929 } else {
930 // Frame too large for any ldr/str unsigned immediate.
931 // Synthesize the address in x9 (caller-saved scratch)
932 // then use register-offset str.
933 let x9_addr = fmt_sp_imm("add", "x9", "sp", stp_offset);
934 format!(
935 "{}\n {}\n str x29, [x9]\n str x30, [x9, #8]",
936 sub_sp, x9_addr
937 )
938 }
939 }
940 ArmOpcode::LdpPost => {
941 let frame_size = mf.frame.size as i64;
942 let ldp_offset = frame_size - 16;
943 let add_sp = fmt_sp_imm("add", "sp", "sp", frame_size);
944 if ldp_offset <= 504 {
945 format!("ldp x29, x30, [sp, #{}]\n {}", ldp_offset, add_sp)
946 } else if ldp_offset <= 32760 {
947 format!(
948 "ldr x29, [sp, #{}]\n ldr x30, [sp, #{}]\n {}",
949 ldp_offset,
950 ldp_offset + 8,
951 add_sp
952 )
953 } else {
954 // Frame too large for unsigned immediate ldr.
955 // Synthesize address in x9 then restore with register-offset ldr.
956 let x9_addr = fmt_sp_imm("add", "x9", "sp", ldp_offset);
957 format!(
958 "{}\n ldr x29, [x9]\n ldr x30, [x9, #8]\n {}",
959 x9_addr, add_sp
960 )
961 }
962 }
963
964 // Non-preindex STP/LDP for callee-save pairs.
965 // Operands: [src1/dst1, src2/dst2, base, imm].
966 ArmOpcode::StpOffset => {
967 let r1 = op_str(&inst.operands[0]);
968 let r2 = op_str(&inst.operands[1]);
969 let base = op_str(&inst.operands[2]);
970 let off = match &inst.operands[3] {
971 MachineOperand::Imm(v) => *v,
972 MachineOperand::FrameSlot(v) => *v as i64,
973 _ => 0,
974 };
975 // STP signed-offset range: 7-bit signed × 8 → [-512, 504].
976 // Fall back to two individual STR instructions if out of range.
977 if (-512..=504).contains(&off) {
978 format!("stp {}, {}, [{}, #{}]", r1, r2, base, off)
979 } else {
980 format!(
981 "{}\n str {}, [x9]\n str {}, [x9, #8]",
982 fmt_addr_with_offset("x9", &base, off, "x16"),
983 r1,
984 r2
985 )
986 }
987 }
988 ArmOpcode::LdpOffset => {
989 let r1 = op_str(&inst.operands[0]);
990 let r2 = op_str(&inst.operands[1]);
991 let base = op_str(&inst.operands[2]);
992 let off = match &inst.operands[3] {
993 MachineOperand::Imm(v) => *v,
994 MachineOperand::FrameSlot(v) => *v as i64,
995 _ => 0,
996 };
997 // LDP signed-offset range: 7-bit signed × 8 → [-512, 504].
998 // Fall back to two individual LDR instructions if out of range.
999 if (-512..=504).contains(&off) {
1000 format!("ldp {}, {}, [{}, #{}]", r1, r2, base, off)
1001 } else {
1002 format!(
1003 "{}\n ldr {}, [x9]\n ldr {}, [x9, #8]",
1004 fmt_addr_with_offset("x9", &base, off, "x16"),
1005 r1,
1006 r2
1007 )
1008 }
1009 }
1010
1011 ArmOpcode::AdrpLdr => {
1012 if let MachineOperand::ConstPool(idx) = &inst.operands[1] {
1013 let label = const_pool_label(&mf.name, *idx);
1014 let dest = op_str(&inst.operands[0]);
1015 // ADRP requires a GP register. If dest is FP (s/d), use x8 as scratch.
1016 let is_fp = dest.starts_with('s') || dest.starts_with('d');
1017 if is_fp {
1018 format!(
1019 "adrp x8, {1}@PAGE\n ldr {0}, [x8, {1}@PAGEOFF]",
1020 dest, label
1021 )
1022 } else {
1023 format!(
1024 "adrp {0}, {1}@PAGE\n ldr {0}, [{0}, {1}@PAGEOFF]",
1025 dest, label
1026 )
1027 }
1028 } else {
1029 "nop ; bad adrp+ldr".into()
1030 }
1031 }
1032 ArmOpcode::AdrpAdd => {
1033 let dest = op_str(&inst.operands[0]);
1034 match &inst.operands[1] {
1035 MachineOperand::ConstPool(idx) => {
1036 let label = const_pool_label(&mf.name, *idx);
1037 format!(
1038 "adrp {0}, {1}@PAGE\n add {0}, {0}, {1}@PAGEOFF",
1039 dest, label
1040 )
1041 }
1042 MachineOperand::GlobalLabel(name) => {
1043 // Mach-O convention: globals get an underscore prefix.
1044 let sym = if name.starts_with('_') {
1045 name.clone()
1046 } else {
1047 format!("_{}", name)
1048 };
1049 format!(
1050 "adrp {0}, {1}@PAGE\n add {0}, {0}, {1}@PAGEOFF",
1051 dest, sym
1052 )
1053 }
1054 _ => "nop ; bad adrp+add".into(),
1055 }
1056 }
1057
1058 ArmOpcode::B => {
1059 match &inst.operands[0] {
1060 MachineOperand::BlockRef(id) => format!("b {}", mf.block(*id).label),
1061 // Tail call to an external symbol (TCO): B _callee
1062 MachineOperand::Extern(name) => {
1063 if name.starts_with('_') {
1064 format!("b {}", name)
1065 } else {
1066 format!("b _{}", name)
1067 }
1068 }
1069 _ => "b ???".into(),
1070 }
1071 }
1072 ArmOpcode::BCond => {
1073 let cond = if let MachineOperand::Cond(c) = &inst.operands[0] {
1074 cond_str(*c)
1075 } else {
1076 "eq"
1077 };
1078 let target = if let MachineOperand::BlockRef(id) = &inst.operands[1] {
1079 mf.block(*id).label.clone()
1080 } else {
1081 "???".into()
1082 };
1083 format!("b.{} {}", cond, target)
1084 }
1085 ArmOpcode::Cbz | ArmOpcode::Cbnz => {
1086 let mnemonic = match inst.opcode {
1087 ArmOpcode::Cbz => "cbz",
1088 _ => "cbnz",
1089 };
1090 let target = if let MachineOperand::BlockRef(id) = &inst.operands[1] {
1091 mf.block(*id).label.clone()
1092 } else {
1093 "???".into()
1094 };
1095 format!("{} {}, {}", mnemonic, op_str(&inst.operands[0]), target)
1096 }
1097 ArmOpcode::Tbz | ArmOpcode::Tbnz => {
1098 let mnemonic = match inst.opcode {
1099 ArmOpcode::Tbz => "tbz",
1100 _ => "tbnz",
1101 };
1102 let bit = if let MachineOperand::Imm(v) = &inst.operands[1] {
1103 *v
1104 } else {
1105 0
1106 };
1107 let target = if let MachineOperand::BlockRef(id) = &inst.operands[2] {
1108 mf.block(*id).label.clone()
1109 } else {
1110 "???".into()
1111 };
1112 format!(
1113 "{} {}, #{}, {}",
1114 mnemonic,
1115 op_str(&inst.operands[0]),
1116 bit,
1117 target
1118 )
1119 }
1120 ArmOpcode::Bl => {
1121 if let MachineOperand::Extern(name) = &inst.operands[0] {
1122 // Mach-O convention: C symbols get a _ prefix.
1123 if name.starts_with('_') {
1124 format!("bl {}", name) // already prefixed
1125 } else {
1126 format!("bl _{}", name) // add Mach-O prefix
1127 }
1128 } else {
1129 "bl ???".into()
1130 }
1131 }
1132 ArmOpcode::Blr => format!("blr {}", op_str(&inst.operands[0])),
1133 ArmOpcode::Sxtw => format!(
1134 "sxtw {}, {}",
1135 op_str(&inst.operands[0]),
1136 op_str(&inst.operands[1])
1137 ),
1138 ArmOpcode::Sxth => format!(
1139 "sxth {}, {}",
1140 op_str(&inst.operands[0]),
1141 op_str(&inst.operands[1])
1142 ),
1143 ArmOpcode::Sxtb => format!(
1144 "sxtb {}, {}",
1145 op_str(&inst.operands[0]),
1146 op_str(&inst.operands[1])
1147 ),
1148 ArmOpcode::Ret => "ret".into(),
1149 ArmOpcode::Nop => "nop".into(),
1150 ArmOpcode::Brk => {
1151 let imm = if let MachineOperand::Imm(v) = &inst.operands[0] {
1152 *v
1153 } else {
1154 1
1155 };
1156 format!("brk #{}", imm)
1157 }
1158
1159 // ---- NEON SIMD vector ops (Sprint 12 Stage 2) ----
1160 //
1161 // Each op forwards to a small helper so the lane-shape suffix
1162 // (.4s / .2d / .s[n] / .d[n]) lives in one place.
1163 ArmOpcode::AddV4S => fmt_vbinop(inst, "add", "4s"),
1164 ArmOpcode::AddV2D => fmt_vbinop(inst, "add", "2d"),
1165 ArmOpcode::SubV4S => fmt_vbinop(inst, "sub", "4s"),
1166 ArmOpcode::SubV2D => fmt_vbinop(inst, "sub", "2d"),
1167 ArmOpcode::MulV4S => fmt_vbinop(inst, "mul", "4s"),
1168 ArmOpcode::NegV4S => fmt_vunop(inst, "neg", "4s"),
1169 ArmOpcode::NegV2D => fmt_vunop(inst, "neg", "2d"),
1170 ArmOpcode::FaddV4S => fmt_vbinop(inst, "fadd", "4s"),
1171 ArmOpcode::FaddV2D => fmt_vbinop(inst, "fadd", "2d"),
1172 ArmOpcode::FsubV4S => fmt_vbinop(inst, "fsub", "4s"),
1173 ArmOpcode::FsubV2D => fmt_vbinop(inst, "fsub", "2d"),
1174 ArmOpcode::FmulV4S => fmt_vbinop(inst, "fmul", "4s"),
1175 ArmOpcode::FmulV2D => fmt_vbinop(inst, "fmul", "2d"),
1176 ArmOpcode::FdivV4S => fmt_vbinop(inst, "fdiv", "4s"),
1177 ArmOpcode::FdivV2D => fmt_vbinop(inst, "fdiv", "2d"),
1178 ArmOpcode::FnegV4S => fmt_vunop(inst, "fneg", "4s"),
1179 ArmOpcode::FnegV2D => fmt_vunop(inst, "fneg", "2d"),
1180 ArmOpcode::FabsV4S => fmt_vunop(inst, "fabs", "4s"),
1181 ArmOpcode::FabsV2D => fmt_vunop(inst, "fabs", "2d"),
1182 ArmOpcode::FsqrtV4S => fmt_vunop(inst, "fsqrt", "4s"),
1183 ArmOpcode::FsqrtV2D => fmt_vunop(inst, "fsqrt", "2d"),
1184 ArmOpcode::BslV16B => fmt_vbinop(inst, "bsl", "16b"),
1185 ArmOpcode::FcmgtV4S => fmt_vbinop(inst, "fcmgt", "4s"),
1186 ArmOpcode::FcmgtV2D => fmt_vbinop(inst, "fcmgt", "2d"),
1187 ArmOpcode::FcmgeV4S => fmt_vbinop(inst, "fcmge", "4s"),
1188 ArmOpcode::FcmgeV2D => fmt_vbinop(inst, "fcmge", "2d"),
1189 ArmOpcode::FcmeqV4S => fmt_vbinop(inst, "fcmeq", "4s"),
1190 ArmOpcode::FcmeqV2D => fmt_vbinop(inst, "fcmeq", "2d"),
1191 ArmOpcode::CmgtV4S => fmt_vbinop(inst, "cmgt", "4s"),
1192 ArmOpcode::CmgeV4S => fmt_vbinop(inst, "cmge", "4s"),
1193 ArmOpcode::CmeqV4S => fmt_vbinop(inst, "cmeq", "4s"),
1194 ArmOpcode::FmlaV4S => fmt_vbinop(inst, "fmla", "4s"),
1195 ArmOpcode::FmlaV2D => fmt_vbinop(inst, "fmla", "2d"),
1196 ArmOpcode::FminV4S => fmt_vbinop(inst, "fmin", "4s"),
1197 ArmOpcode::FminV2D => fmt_vbinop(inst, "fmin", "2d"),
1198 ArmOpcode::FmaxV4S => fmt_vbinop(inst, "fmax", "4s"),
1199 ArmOpcode::FmaxV2D => fmt_vbinop(inst, "fmax", "2d"),
1200 ArmOpcode::SminV4S => fmt_vbinop(inst, "smin", "4s"),
1201 ArmOpcode::SmaxV4S => fmt_vbinop(inst, "smax", "4s"),
1202 ArmOpcode::UminV4S => fmt_vbinop(inst, "umin", "4s"),
1203 ArmOpcode::UmaxV4S => fmt_vbinop(inst, "umax", "4s"),
1204
1205 // afs-as dialect: cross-lane reductions encode the shape in
1206 // the mnemonic suffix; the destination is a scalar `s/d` and
1207 // the source is the bare vector register.
1208 ArmOpcode::FaddpV2S => format!(
1209 "faddp.2s {}, {}",
1210 fp32_scalar(&inst.operands[0]),
1211 v_reg_bare(&inst.operands[1]),
1212 ),
1213 ArmOpcode::FaddpV2D => format!(
1214 "faddp.2d {}, {}",
1215 fp64_scalar(&inst.operands[0]),
1216 v_reg_bare(&inst.operands[1]),
1217 ),
1218 ArmOpcode::Faddv4S => format!(
1219 "faddv.4s {}, {}",
1220 fp32_scalar(&inst.operands[0]),
1221 v_reg_bare(&inst.operands[1]),
1222 ),
1223 ArmOpcode::Sminv4S => format!(
1224 "sminv.4s {}, {}",
1225 fp32_scalar(&inst.operands[0]),
1226 v_reg_bare(&inst.operands[1]),
1227 ),
1228 ArmOpcode::Smaxv4S => format!(
1229 "smaxv.4s {}, {}",
1230 fp32_scalar(&inst.operands[0]),
1231 v_reg_bare(&inst.operands[1]),
1232 ),
1233 ArmOpcode::FmaxvV4S => format!(
1234 "fmaxv.4s {}, {}",
1235 fp32_scalar(&inst.operands[0]),
1236 v_reg_bare(&inst.operands[1]),
1237 ),
1238 ArmOpcode::FminvV4S => format!(
1239 "fminv.4s {}, {}",
1240 fp32_scalar(&inst.operands[0]),
1241 v_reg_bare(&inst.operands[1]),
1242 ),
1243 ArmOpcode::FmaxpV2DScalar => format!(
1244 "fmaxp.2d {}, {}",
1245 fp64_scalar(&inst.operands[0]),
1246 v_reg_bare(&inst.operands[1]),
1247 ),
1248 ArmOpcode::FminpV2DScalar => format!(
1249 "fminp.2d {}, {}",
1250 fp64_scalar(&inst.operands[0]),
1251 v_reg_bare(&inst.operands[1]),
1252 ),
1253 ArmOpcode::Uminv4S => format!(
1254 "uminv.4s {}, {}",
1255 fp32_scalar(&inst.operands[0]),
1256 v_reg_bare(&inst.operands[1]),
1257 ),
1258 ArmOpcode::Umaxv4S => format!(
1259 "umaxv.4s {}, {}",
1260 fp32_scalar(&inst.operands[0]),
1261 v_reg_bare(&inst.operands[1]),
1262 ),
1263 ArmOpcode::Addv4S => format!(
1264 "addv.4s {}, {}",
1265 fp32_scalar(&inst.operands[0]),
1266 v_reg_bare(&inst.operands[1]),
1267 ),
1268
1269 ArmOpcode::DupGen4S => format!(
1270 "dup.4s {}, {}",
1271 v_reg_bare(&inst.operands[0]),
1272 op_str(&inst.operands[1]),
1273 ),
1274 ArmOpcode::DupGen2D => format!(
1275 "dup.2d {}, {}",
1276 v_reg_bare(&inst.operands[0]),
1277 op_str(&inst.operands[1]),
1278 ),
1279 ArmOpcode::DupEl4S => format!(
1280 "dup.4s {}, {}",
1281 v_reg_bare(&inst.operands[0]),
1282 v_lane_bare(&inst.operands[1], "s", 0),
1283 ),
1284 ArmOpcode::DupEl2D => format!(
1285 "dup.2d {}, {}",
1286 v_reg_bare(&inst.operands[0]),
1287 v_lane_bare(&inst.operands[1], "d", 0),
1288 ),
1289 ArmOpcode::Ins4S => {
1290 let lane = imm_u8(&inst.operands[1]);
1291 format!(
1292 "ins.s {}, {}",
1293 v_lane_bare(&inst.operands[0], "s", lane),
1294 op_str(&inst.operands[2]),
1295 )
1296 }
1297 ArmOpcode::Ins2D => {
1298 let lane = imm_u8(&inst.operands[1]);
1299 format!(
1300 "ins.d {}, {}",
1301 v_lane_bare(&inst.operands[0], "d", lane),
1302 op_str(&inst.operands[2]),
1303 )
1304 }
1305 ArmOpcode::Umov4S => {
1306 let lane = imm_u8(&inst.operands[2]);
1307 format!(
1308 "umov.s {}, {}",
1309 op_str(&inst.operands[0]),
1310 v_lane_bare(&inst.operands[1], "s", lane),
1311 )
1312 }
1313 ArmOpcode::Umov2D => {
1314 let lane = imm_u8(&inst.operands[2]);
1315 format!(
1316 "umov.d {}, {}",
1317 op_str(&inst.operands[0]),
1318 v_lane_bare(&inst.operands[1], "d", lane),
1319 )
1320 }
1321 ArmOpcode::FmovEl4S => {
1322 let lane = imm_u8(&inst.operands[2]);
1323 format!(
1324 "mov.s {}, {}",
1325 fp32_scalar(&inst.operands[0]),
1326 v_lane_bare(&inst.operands[1], "s", lane),
1327 )
1328 }
1329 ArmOpcode::FmovEl2D => {
1330 let lane = imm_u8(&inst.operands[2]);
1331 format!(
1332 "mov.d {}, {}",
1333 fp64_scalar(&inst.operands[0]),
1334 v_lane_bare(&inst.operands[1], "d", lane),
1335 )
1336 }
1337
1338 ArmOpcode::LdrQ => format!(
1339 "ldr {}, [{}, {}]",
1340 q_reg(&inst.operands[0]),
1341 op_str(&inst.operands[1]),
1342 op_str(&inst.operands[2]),
1343 ),
1344 ArmOpcode::StrQ => format!(
1345 "str {}, [{}, {}]",
1346 q_reg(&inst.operands[0]),
1347 op_str(&inst.operands[1]),
1348 op_str(&inst.operands[2]),
1349 ),
1350 }
1351 }
1352
1353 // ---- NEON formatting helpers ----
1354
1355 fn v_reg(op: &MachineOperand, shape: &str) -> String {
1356 match op {
1357 MachineOperand::VReg(id) => format!("v{}.{}", id.0, shape),
1358 MachineOperand::PhysReg(PhysReg::Fp(n)) | MachineOperand::PhysReg(PhysReg::Fp32(n)) => {
1359 format!("v{}.{}", n, shape)
1360 }
1361 _ => format!("{}.{}", op_str(op), shape),
1362 }
1363 }
1364
1365 fn q_reg(op: &MachineOperand) -> String {
1366 match op {
1367 MachineOperand::VReg(id) => format!("q{}", id.0),
1368 MachineOperand::PhysReg(PhysReg::Fp(n)) | MachineOperand::PhysReg(PhysReg::Fp32(n)) => {
1369 format!("q{}", n)
1370 }
1371 _ => format!("q{}", op_str(op)),
1372 }
1373 }
1374
1375 fn v_lane(op: &MachineOperand, lane_ty: &str, lane: u8) -> String {
1376 match op {
1377 MachineOperand::VReg(id) => format!("v{}.{}[{}]", id.0, lane_ty, lane),
1378 MachineOperand::PhysReg(PhysReg::Fp(n)) | MachineOperand::PhysReg(PhysReg::Fp32(n)) => {
1379 format!("v{}.{}[{}]", n, lane_ty, lane)
1380 }
1381 _ => format!("v{}.{}[{}]", op_str(op), lane_ty, lane),
1382 }
1383 }
1384
1385 fn fp32_scalar(op: &MachineOperand) -> String {
1386 match op {
1387 MachineOperand::VReg(id) => format!("s{}", id.0),
1388 MachineOperand::PhysReg(PhysReg::Fp(n)) | MachineOperand::PhysReg(PhysReg::Fp32(n)) => {
1389 format!("s{}", n)
1390 }
1391 _ => op_str(op),
1392 }
1393 }
1394
1395 fn fp64_scalar(op: &MachineOperand) -> String {
1396 match op {
1397 MachineOperand::VReg(id) => format!("d{}", id.0),
1398 MachineOperand::PhysReg(PhysReg::Fp(n)) | MachineOperand::PhysReg(PhysReg::Fp32(n)) => {
1399 format!("d{}", n)
1400 }
1401 _ => op_str(op),
1402 }
1403 }
1404
1405 fn imm_u8(op: &MachineOperand) -> u8 {
1406 if let MachineOperand::Imm(v) = op {
1407 *v as u8
1408 } else {
1409 0
1410 }
1411 }
1412
1413 fn fmt_vbinop(inst: &MachineInst, mnemonic: &str, shape: &str) -> String {
1414 // afs-as dialect: shape suffix is part of the mnemonic, operand
1415 // registers are bare (`fadd.4s v0, v1, v2`). Encodes to the same
1416 // bytes as the Apple/GNU `fadd v0.4s, v1.4s, v2.4s` form.
1417 format!(
1418 "{}.{} {}, {}, {}",
1419 mnemonic,
1420 shape,
1421 v_reg_bare(&inst.operands[0]),
1422 v_reg_bare(&inst.operands[1]),
1423 v_reg_bare(&inst.operands[2]),
1424 )
1425 }
1426
1427 fn fmt_vunop(inst: &MachineInst, mnemonic: &str, shape: &str) -> String {
1428 format!(
1429 "{}.{} {}, {}",
1430 mnemonic,
1431 shape,
1432 v_reg_bare(&inst.operands[0]),
1433 v_reg_bare(&inst.operands[1]),
1434 )
1435 }
1436
1437 fn v_reg_bare(op: &MachineOperand) -> String {
1438 match op {
1439 MachineOperand::VReg(id) => format!("v{}", id.0),
1440 MachineOperand::PhysReg(PhysReg::Fp(n)) | MachineOperand::PhysReg(PhysReg::Fp32(n)) => {
1441 format!("v{}", n)
1442 }
1443 _ => op_str(op),
1444 }
1445 }
1446
1447 fn v_lane_bare(op: &MachineOperand, _lane_ty: &str, lane: u8) -> String {
1448 // afs-as dialect for `umov.s w3, v0[2]` — bare reg with `[lane]`
1449 // suffix; the element-size width is encoded into the mnemonic
1450 // (`umov.s` / `umov.d`).
1451 match op {
1452 MachineOperand::VReg(id) => format!("v{}[{}]", id.0, lane),
1453 MachineOperand::PhysReg(PhysReg::Fp(n)) | MachineOperand::PhysReg(PhysReg::Fp32(n)) => {
1454 format!("v{}[{}]", n, lane)
1455 }
1456 _ => format!("{}[{}]", op_str(op), lane),
1457 }
1458 }
1459
1460 /// Format a machine operand as assembly text.
1461 fn op_str(op: &MachineOperand) -> String {
1462 match op {
1463 MachineOperand::VReg(id) => format!("v{}", id.0), // placeholder until regalloc
1464 MachineOperand::PhysReg(PhysReg::Sp) => "sp".into(),
1465 MachineOperand::PhysReg(PhysReg::Xzr) => "xzr".into(),
1466 MachineOperand::PhysReg(PhysReg::Wzr) => "wzr".into(),
1467 MachineOperand::PhysReg(PhysReg::Gp(n)) => format!("x{}", n),
1468 MachineOperand::PhysReg(PhysReg::Gp32(n)) => format!("w{}", n),
1469 MachineOperand::PhysReg(PhysReg::Fp(n)) => format!("d{}", n),
1470 MachineOperand::PhysReg(PhysReg::Fp32(n)) => format!("s{}", n),
1471 MachineOperand::Imm(v) => format!("#{}", v),
1472 MachineOperand::FrameSlot(off) => format!("[fp, #{}]", off),
1473 MachineOperand::Cond(c) => cond_str(*c).into(),
1474 MachineOperand::BlockRef(id) => format!("bb{}", id.0),
1475 MachineOperand::Extern(name) => name.clone(),
1476 MachineOperand::GlobalLabel(name) => {
1477 if name.starts_with('_') {
1478 name.clone()
1479 } else {
1480 format!("_{}", name)
1481 }
1482 }
1483 MachineOperand::ConstPool(idx) => format!("cp{}", idx),
1484 MachineOperand::Shift(s) => format!("lsl #{}", s),
1485 }
1486 }
1487
1488 fn fp_reg_str(op: &MachineOperand, is_f64: bool) -> String {
1489 match op {
1490 MachineOperand::PhysReg(PhysReg::Fp(n)) | MachineOperand::PhysReg(PhysReg::Fp32(n)) => {
1491 if is_f64 {
1492 format!("d{}", n)
1493 } else {
1494 format!("s{}", n)
1495 }
1496 }
1497 _ => op_str(op),
1498 }
1499 }
1500
1501 fn cond_str(c: ArmCond) -> &'static str {
1502 match c {
1503 ArmCond::Eq => "eq",
1504 ArmCond::Ne => "ne",
1505 ArmCond::Hs => "hs",
1506 ArmCond::Lo => "lo",
1507 ArmCond::Mi => "mi",
1508 ArmCond::Pl => "pl",
1509 ArmCond::Hi => "hi",
1510 ArmCond::Ls => "ls",
1511 ArmCond::Ge => "ge",
1512 ArmCond::Lt => "lt",
1513 ArmCond::Gt => "gt",
1514 ArmCond::Le => "le",
1515 }
1516 }
1517
1518 /// Generate a constant pool label.
1519 fn const_pool_label(func: &str, idx: u32) -> String {
1520 format!("__{}_cp{}", func, idx)
1521 }
1522
1523 #[cfg(test)]
1524 mod tests {
1525 use super::*;
1526 use crate::codegen::isel::select_function;
1527 use crate::ir::builder::FuncBuilder;
1528 use crate::ir::inst::*;
1529 use crate::ir::types::*;
1530
1531 fn emit_simple(build: impl FnOnce(&mut FuncBuilder)) -> String {
1532 let mut func = Function::new("test".into(), vec![], IrType::Void);
1533 {
1534 let mut b = FuncBuilder::new(&mut func);
1535 build(&mut b);
1536 }
1537 let mf = select_function(&func);
1538 emit_function(&mf)
1539 }
1540
1541 #[test]
1542 fn emit_prologue_epilogue() {
1543 let asm = emit_simple(|b| b.ret_void());
1544 assert!(
1545 asm.contains("sub sp, sp,"),
1546 "missing frame allocation: {}",
1547 asm
1548 );
1549 assert!(
1550 asm.contains("stp x29, x30, [sp,"),
1551 "missing prologue save: {}",
1552 asm
1553 );
1554 assert!(
1555 asm.contains("ldp x29, x30, [sp,"),
1556 "missing epilogue restore: {}",
1557 asm
1558 );
1559 assert!(
1560 asm.contains("add sp, sp,"),
1561 "missing frame deallocation: {}",
1562 asm
1563 );
1564 assert!(asm.contains("ret"), "missing ret: {}", asm);
1565 }
1566
1567 #[test]
1568 fn emit_integer_add() {
1569 let asm = emit_simple(|b| {
1570 let x = b.const_i32(10);
1571 let y = b.const_i32(20);
1572 let _z = b.iadd(x, y);
1573 b.ret_void();
1574 });
1575 assert!(asm.contains("add "), "missing add: {}", asm);
1576 }
1577
1578 #[test]
1579 fn emit_function_label() {
1580 let asm = emit_simple(|b| b.ret_void());
1581 assert!(asm.contains(".globl _test"), "missing .globl: {}", asm);
1582 assert!(asm.contains("_test:"), "missing function label: {}", asm);
1583 }
1584
1585 /// Verify that functions with frame sizes > 4095 use x16 scratch
1586 /// synthesis for the `sub sp, sp, #N` prologue and `add sp, sp, #N`
1587 /// epilogue rather than an out-of-range immediate.
1588 #[test]
1589 fn emit_large_frame_prologue() {
1590 // 700 allocas of i64 = 700 * 8 = 5600 bytes, well over 4095.
1591 let asm = emit_simple(|b| {
1592 for _ in 0..700 {
1593 let _ = b.alloca(IrType::Int(IntWidth::I64));
1594 }
1595 b.ret_void();
1596 });
1597 // The 12-bit immediate max is 4095, so the emitter must
1598 // synthesize the frame size via x16.
1599 assert!(
1600 asm.contains("movz x16,"),
1601 "large frame should use x16 synthesis: {}",
1602 asm
1603 );
1604 assert!(
1605 asm.contains("sub sp, sp, x16"),
1606 "large frame sub should use register form: {}",
1607 asm
1608 );
1609 assert!(
1610 asm.contains("add sp, sp, x16"),
1611 "large frame add should use register form: {}",
1612 asm
1613 );
1614 // Must NOT contain a raw "sub sp, sp, #5" that exceeds 4095.
1615 assert!(
1616 !asm.contains("sub sp, sp, #5"),
1617 "should not emit out-of-range immediate: {}",
1618 asm
1619 );
1620 }
1621
1622 #[test]
1623 fn emit_huge_frame_with_stack_probes() {
1624 let asm = emit_simple(|b| {
1625 for _ in 0..3000 {
1626 let _ = b.alloca(IrType::Int(IntWidth::I64));
1627 }
1628 b.ret_void();
1629 });
1630 assert!(
1631 asm.contains("str xzr, [sp]"),
1632 "huge frame should probe each chunk: {}",
1633 asm
1634 );
1635 }
1636
1637 #[test]
1638 fn emit_branch() {
1639 let asm = emit_simple(|b| {
1640 let cond = b.const_bool(true);
1641 let bb_t = b.create_block("then");
1642 let bb_f = b.create_block("else");
1643 b.cond_branch(cond, bb_t, vec![], bb_f, vec![]);
1644 b.set_block(bb_t);
1645 b.ret_void();
1646 b.set_block(bb_f);
1647 b.ret_void();
1648 });
1649 assert!(asm.contains("b.ne"), "missing conditional branch: {}", asm);
1650 assert!(asm.contains("then_"), "missing then label: {}", asm);
1651 assert!(asm.contains("else_"), "missing else label: {}", asm);
1652 }
1653
1654 #[test]
1655 fn emit_i128_scalar_global_as_two_quads() {
1656 let asm = emit_globals(&[Global {
1657 name: "big".into(),
1658 ty: IrType::Int(IntWidth::I128),
1659 initializer: Some(GlobalInit::Int(18_446_744_073_709_551_616i128)),
1660 }]);
1661
1662 assert!(
1663 asm.contains(".section __DATA,__data"),
1664 "missing data section:\n{}",
1665 asm
1666 );
1667 assert!(
1668 asm.contains(".private_extern _big"),
1669 "missing global symbol:\n{}",
1670 asm
1671 );
1672 assert!(
1673 asm.contains(".p2align 4"),
1674 "i128 globals need 16-byte alignment:\n{}",
1675 asm
1676 );
1677 assert_eq!(
1678 asm.matches(".quad").count(),
1679 2,
1680 "scalar i128 should emit two quads:\n{}",
1681 asm
1682 );
1683 assert!(
1684 asm.contains(".quad 0x0000000000000000\n .quad 0x0000000000000001"),
1685 "scalar i128 should emit low/high 64-bit words in memory order:\n{}",
1686 asm
1687 );
1688 }
1689
1690 #[test]
1691 fn emit_i128_array_global_as_word_pairs() {
1692 let asm = emit_globals(&[Global {
1693 name: "arr".into(),
1694 ty: IrType::Array(Box::new(IrType::Int(IntWidth::I128)), 2),
1695 initializer: Some(GlobalInit::IntArray(vec![1, -1])),
1696 }]);
1697
1698 assert_eq!(
1699 asm.matches(".quad").count(),
1700 4,
1701 "two i128 elements should emit four quads:\n{}",
1702 asm
1703 );
1704 assert!(
1705 asm.contains(".quad 0x0000000000000001\n .quad 0x0000000000000000"),
1706 "positive i128 array element should preserve low/high word order:\n{}",
1707 asm
1708 );
1709 assert!(
1710 asm.contains(".quad 0xffffffffffffffff\n .quad 0xffffffffffffffff"),
1711 "negative i128 array element should preserve two's-complement words:\n{}",
1712 asm
1713 );
1714 }
1715
1716 #[test]
1717 fn emit_byte_array_global_uses_natural_alignment() {
1718 let asm = emit_globals(&[Global {
1719 name: "history".into(),
1720 ty: IrType::Array(Box::new(IrType::Int(IntWidth::I8)), 400),
1721 initializer: Some(GlobalInit::Zero),
1722 }]);
1723
1724 assert!(
1725 asm.contains(".p2align 3\n_history:"),
1726 "byte-array globals that model descriptors/derived storage need 8-byte alignment:\n{}",
1727 asm
1728 );
1729 }
1730
1731 #[test]
1732 fn emit_nested_byte_array_global_uses_full_storage_size() {
1733 let asm = emit_globals(&[Global {
1734 name: "command_cache".into(),
1735 ty: IrType::Array(
1736 Box::new(IrType::Array(Box::new(IrType::Int(IntWidth::I8)), 264)),
1737 4,
1738 ),
1739 initializer: Some(GlobalInit::Zero),
1740 }]);
1741
1742 assert!(
1743 asm.contains("_command_cache:\n .space 1056"),
1744 "nested byte-array globals should reserve their full storage size:\n{}",
1745 asm
1746 );
1747 }
1748
1749 #[test]
1750 fn emit_mov_reg_truncates_x_source_through_w_view() {
1751 let mf = MachineFunction::new("test".into());
1752 let inst = MachineInst {
1753 opcode: ArmOpcode::MovReg,
1754 operands: vec![
1755 MachineOperand::PhysReg(PhysReg::Gp32(21)),
1756 MachineOperand::PhysReg(PhysReg::Gp(20)),
1757 ],
1758 def: None,
1759 };
1760
1761 assert_eq!(emit_inst(&inst, &mf), "mov w21, w20");
1762 }
1763
1764 #[test]
1765 fn emit_fcvt_uses_fp_register_widths() {
1766 let mf = MachineFunction::new("test".into());
1767 let to_single = MachineInst {
1768 opcode: ArmOpcode::FcvtSD,
1769 operands: vec![
1770 MachineOperand::PhysReg(PhysReg::Fp(0)),
1771 MachineOperand::PhysReg(PhysReg::Fp(1)),
1772 ],
1773 def: None,
1774 };
1775 let to_double = MachineInst {
1776 opcode: ArmOpcode::FcvtDS,
1777 operands: vec![
1778 MachineOperand::PhysReg(PhysReg::Fp32(2)),
1779 MachineOperand::PhysReg(PhysReg::Fp32(3)),
1780 ],
1781 def: None,
1782 };
1783
1784 assert_eq!(emit_inst(&to_single, &mf), "fcvt s0, d1");
1785 assert_eq!(emit_inst(&to_double, &mf), "fcvt d2, s3");
1786 }
1787
1788 #[test]
1789 fn emit_large_negative_pair_offsets_use_scratch_addressing() {
1790 let mf = MachineFunction::new("test".into());
1791 let stp = MachineInst {
1792 opcode: ArmOpcode::StpOffset,
1793 operands: vec![
1794 MachineOperand::PhysReg(PhysReg::Gp(0)),
1795 MachineOperand::PhysReg(PhysReg::Gp(1)),
1796 MachineOperand::PhysReg(PhysReg::FP),
1797 MachineOperand::Imm(-544),
1798 ],
1799 def: None,
1800 };
1801 let ldp = MachineInst {
1802 opcode: ArmOpcode::LdpOffset,
1803 operands: vec![
1804 MachineOperand::PhysReg(PhysReg::Gp(2)),
1805 MachineOperand::PhysReg(PhysReg::Gp(3)),
1806 MachineOperand::PhysReg(PhysReg::FP),
1807 MachineOperand::Imm(-544),
1808 ],
1809 def: None,
1810 };
1811
1812 let stp_asm = emit_inst(&stp, &mf);
1813 let ldp_asm = emit_inst(&ldp, &mf);
1814 assert!(
1815 stp_asm.contains("sub x9, x29, #544"),
1816 "large negative stp offset should synthesize address: {}",
1817 stp_asm
1818 );
1819 assert!(
1820 ldp_asm.contains("sub x9, x29, #544"),
1821 "large negative ldp offset should synthesize address: {}",
1822 ldp_asm
1823 );
1824 assert!(
1825 !stp_asm.contains("[x29, #-544]"),
1826 "stp should not emit out-of-range raw offset: {}",
1827 stp_asm
1828 );
1829 assert!(
1830 !ldp_asm.contains("[x29, #-544]"),
1831 "ldp should not emit out-of-range raw offset: {}",
1832 ldp_asm
1833 );
1834 }
1835
1836 #[test]
1837 fn emit_internal_only_function_as_private_extern() {
1838 let mut mf = MachineFunction::new("helper".into());
1839 mf.internal_only = true;
1840
1841 let asm = emit_function(&mf);
1842
1843 assert!(
1844 asm.contains(".private_extern _helper"),
1845 "internal-only functions should not be emitted as globals:\n{}",
1846 asm
1847 );
1848 assert!(
1849 !asm.contains(".globl _helper"),
1850 "internal-only functions should not keep external linkage:\n{}",
1851 asm
1852 );
1853 }
1854
1855 // ---- NEON SIMD emit smoke tests (Sprint 12 Stage 2) ----
1856 //
1857 // The vectorizer doesn't generate any of these yet, but the emit
1858 // formatters can be exercised directly by hand-building a
1859 // MachineInst and feeding it through `emit_inst`. These tests
1860 // pin the assembly text form so future codegen wiring has a
1861 // golden reference.
1862
1863 use crate::codegen::mir::{ArmOpcode, MachineFunction, MachineInst, MachineOperand, RegClass};
1864
1865 fn emit_one(opcode: ArmOpcode, operands: Vec<MachineOperand>) -> String {
1866 let mut mf = MachineFunction::new("t".into());
1867 mf.new_block("entry");
1868 let inst = MachineInst {
1869 opcode,
1870 operands,
1871 def: None,
1872 };
1873 emit_inst(&inst, &mf)
1874 }
1875
1876 #[test]
1877 fn emit_fadd_v_4s_form() {
1878 let mut mf = MachineFunction::new("t".into());
1879 let v0 = mf.new_vreg(RegClass::V128);
1880 let v1 = mf.new_vreg(RegClass::V128);
1881 let v2 = mf.new_vreg(RegClass::V128);
1882 let asm = emit_one(
1883 ArmOpcode::FaddV4S,
1884 vec![
1885 MachineOperand::VReg(v0),
1886 MachineOperand::VReg(v1),
1887 MachineOperand::VReg(v2),
1888 ],
1889 );
1890 let _ = mf;
1891 // afs-as dialect: shape suffix on mnemonic, bare regs.
1892 assert_eq!(asm, "fadd.4s v0, v1, v2");
1893 }
1894
1895 #[test]
1896 fn emit_fadd_v_2d_form() {
1897 let asm = emit_one(
1898 ArmOpcode::FaddV2D,
1899 vec![
1900 MachineOperand::VReg(crate::codegen::mir::VRegId(0)),
1901 MachineOperand::VReg(crate::codegen::mir::VRegId(1)),
1902 MachineOperand::VReg(crate::codegen::mir::VRegId(2)),
1903 ],
1904 );
1905 assert_eq!(asm, "fadd.2d v0, v1, v2");
1906 }
1907
1908 #[test]
1909 fn emit_fmla_v_4s_form() {
1910 let asm = emit_one(
1911 ArmOpcode::FmlaV4S,
1912 vec![
1913 MachineOperand::VReg(crate::codegen::mir::VRegId(0)),
1914 MachineOperand::VReg(crate::codegen::mir::VRegId(1)),
1915 MachineOperand::VReg(crate::codegen::mir::VRegId(2)),
1916 ],
1917 );
1918 assert_eq!(asm, "fmla.4s v0, v1, v2");
1919 }
1920
1921 #[test]
1922 fn emit_addv_4s_reduction_form() {
1923 let asm = emit_one(
1924 ArmOpcode::Addv4S,
1925 vec![
1926 MachineOperand::VReg(crate::codegen::mir::VRegId(0)),
1927 MachineOperand::VReg(crate::codegen::mir::VRegId(1)),
1928 ],
1929 );
1930 assert_eq!(asm, "addv.4s s0, v1");
1931 }
1932
1933 #[test]
1934 fn emit_dup_gen_4s_broadcasts_w_register() {
1935 let asm = emit_one(
1936 ArmOpcode::DupGen4S,
1937 vec![
1938 MachineOperand::VReg(crate::codegen::mir::VRegId(0)),
1939 MachineOperand::PhysReg(crate::codegen::mir::PhysReg::Gp32(2)),
1940 ],
1941 );
1942 assert_eq!(asm, "dup.4s v0, w2");
1943 }
1944
1945 #[test]
1946 fn emit_dup_el_4s_broadcasts_fp_lane_zero() {
1947 // Splatting an Fp32 scalar (which lives in v2's lane 0) into
1948 // a 4×f32 vector uses the lane-dup form. The gp form
1949 // `dup.4s v0, s2` is rejected by the assembler. afs-as
1950 // dialect: bare `vN[L]` (no `.s` suffix), with the lane
1951 // element width encoded into the `dup.4s` mnemonic.
1952 let asm = emit_one(
1953 ArmOpcode::DupEl4S,
1954 vec![
1955 MachineOperand::VReg(crate::codegen::mir::VRegId(0)),
1956 MachineOperand::VReg(crate::codegen::mir::VRegId(2)),
1957 ],
1958 );
1959 assert_eq!(asm, "dup.4s v0, v2[0]");
1960 }
1961
1962 #[test]
1963 fn emit_dup_el_2d_broadcasts_fp_lane_zero() {
1964 let asm = emit_one(
1965 ArmOpcode::DupEl2D,
1966 vec![
1967 MachineOperand::VReg(crate::codegen::mir::VRegId(0)),
1968 MachineOperand::VReg(crate::codegen::mir::VRegId(2)),
1969 ],
1970 );
1971 assert_eq!(asm, "dup.2d v0, v2[0]");
1972 }
1973
1974 #[test]
1975 fn emit_ldr_q_form() {
1976 let asm = emit_one(
1977 ArmOpcode::LdrQ,
1978 vec![
1979 MachineOperand::VReg(crate::codegen::mir::VRegId(0)),
1980 MachineOperand::PhysReg(crate::codegen::mir::PhysReg::Gp(1)),
1981 MachineOperand::Imm(16),
1982 ],
1983 );
1984 assert_eq!(asm, "ldr q0, [x1, #16]");
1985 }
1986
1987 #[test]
1988 fn emit_str_q_form() {
1989 let asm = emit_one(
1990 ArmOpcode::StrQ,
1991 vec![
1992 MachineOperand::VReg(crate::codegen::mir::VRegId(0)),
1993 MachineOperand::PhysReg(crate::codegen::mir::PhysReg::Gp(1)),
1994 MachineOperand::Imm(0),
1995 ],
1996 );
1997 assert_eq!(asm, "str q0, [x1, #0]");
1998 }
1999
2000 #[test]
2001 fn emit_umov_extracts_lane() {
2002 let asm = emit_one(
2003 ArmOpcode::Umov4S,
2004 vec![
2005 MachineOperand::PhysReg(crate::codegen::mir::PhysReg::Gp32(3)),
2006 MachineOperand::VReg(crate::codegen::mir::VRegId(0)),
2007 MachineOperand::Imm(2),
2008 ],
2009 );
2010 assert_eq!(asm, "umov.s w3, v0[2]");
2011 }
2012 }
2013