| 1 | //! Assembly text emission — converts Machine IR to ARM64 assembly text. |
| 2 | //! |
| 3 | //! Produces output compatible with both afs-as and Apple's system assembler. |
| 4 | |
| 5 | use super::mir::*; |
| 6 | use std::fmt::Write; |
| 7 | |
| 8 | fn split_i128_words(value: i128) -> (u64, u64) { |
| 9 | let bits = value as u128; |
| 10 | (bits as u64, (bits >> 64) as u64) |
| 11 | } |
| 12 | |
| 13 | fn emit_i128_words(out: &mut String, value: i128) { |
| 14 | let (lo, hi) = split_i128_words(value); |
| 15 | writeln!(out, " .quad 0x{:016x}", lo).unwrap(); |
| 16 | writeln!(out, " .quad 0x{:016x}", hi).unwrap(); |
| 17 | } |
| 18 | |
| 19 | fn emit_byte_values(out: &mut String, bytes: &[u8]) { |
| 20 | if bytes.is_empty() { |
| 21 | return; |
| 22 | } |
| 23 | let joined = bytes |
| 24 | .iter() |
| 25 | .map(|b| b.to_string()) |
| 26 | .collect::<Vec<_>>() |
| 27 | .join(", "); |
| 28 | writeln!(out, " .byte {}", joined).unwrap(); |
| 29 | } |
| 30 | |
| 31 | fn byte_array_align_log2(byte_count: u64) -> u8 { |
| 32 | if byte_count >= 8 { |
| 33 | 3 |
| 34 | } else if byte_count >= 4 { |
| 35 | 2 |
| 36 | } else if byte_count >= 2 { |
| 37 | 1 |
| 38 | } else { |
| 39 | 0 |
| 40 | } |
| 41 | } |
| 42 | |
| 43 | /// Emit module-level globals as a `.section __DATA,__data` block. |
| 44 | /// Each global gets a label and a directive matching its type |
| 45 | /// (`.long`, `.quad`, `.single`, `.double`, etc.) plus the |
| 46 | /// initializer value. Zero-initialized globals still emit an |
| 47 | /// explicit zero so the symbol resolves at link time. |
| 48 | /// |
| 49 | /// Array-typed globals: the IR type is `Array<i8, byte_size>` so |
| 50 | /// the element count isn't directly recoverable from the type. |
| 51 | /// The caller must use `IntArray`/`FloatArray` initializers that |
| 52 | /// carry the element count explicitly. Zero-initialized arrays |
| 53 | /// fall back to `.space byte_size`. |
| 54 | /// |
| 55 | /// Module globals (`afs_mod_*` and `afs_common_*`) are emitted as |
| 56 | /// `.globl` so other translation units can reference them via USE. |
| 57 | /// Non-module globals (SAVE-promoted locals) stay `.private_extern` |
| 58 | /// to prevent cross-TU collisions (audit Maj-1). |
| 59 | pub fn emit_globals(globals: &[crate::ir::inst::Global]) -> String { |
| 60 | use crate::ir::inst::GlobalInit; |
| 61 | use crate::ir::types::{FloatWidth, IntWidth, IrType}; |
| 62 | |
| 63 | let mut out = String::new(); |
| 64 | if globals.is_empty() { |
| 65 | return out; |
| 66 | } |
| 67 | |
| 68 | writeln!(out, ".section __DATA,__data").unwrap(); |
| 69 | for g in globals { |
| 70 | let symbol = if g.name.starts_with('_') { |
| 71 | g.name.clone() |
| 72 | } else { |
| 73 | format!("_{}", g.name) |
| 74 | }; |
| 75 | // Module globals need external linkage for multi-file. |
| 76 | let is_module_global = g.name.starts_with("afs_mod_") || g.name.starts_with("afs_common_"); |
| 77 | if is_module_global { |
| 78 | writeln!(out, ".globl {}", symbol).unwrap(); |
| 79 | } else { |
| 80 | writeln!(out, ".private_extern {}", symbol).unwrap(); |
| 81 | } |
| 82 | |
| 83 | // Array globals carry `Array<elem_ty, count>`. Pick the |
| 84 | // directive from the element type so `.long` / `.quad` / |
| 85 | // `.single` / `.double` all work correctly. |
| 86 | if let IrType::Array(elem_ty, count) = &g.ty { |
| 87 | let (align, directive, _elem_bytes, is_float) = match elem_ty.as_ref() { |
| 88 | IrType::Int(IntWidth::I8) | IrType::Bool => { |
| 89 | (byte_array_align_log2(*count), ".byte", 1, false) |
| 90 | } |
| 91 | IrType::Int(IntWidth::I16) => (1, ".short", 2, false), |
| 92 | IrType::Int(IntWidth::I32) => (2, ".long", 4, false), |
| 93 | IrType::Int(IntWidth::I64) => (3, ".quad", 8, false), |
| 94 | IrType::Int(IntWidth::I128) => (4, ".quad", 16, false), |
| 95 | IrType::Float(FloatWidth::F32) => (2, ".single", 4, true), |
| 96 | IrType::Float(FloatWidth::F64) => (3, ".double", 8, true), |
| 97 | _ => (3, ".quad", 8, false), |
| 98 | }; |
| 99 | if align > 0 { |
| 100 | writeln!(out, ".p2align {}", align).unwrap(); |
| 101 | } |
| 102 | writeln!(out, "{}:", symbol).unwrap(); |
| 103 | match &g.initializer { |
| 104 | Some(GlobalInit::IntArray(vs)) |
| 105 | if matches!(elem_ty.as_ref(), IrType::Int(IntWidth::I128)) => |
| 106 | { |
| 107 | for v in vs { |
| 108 | emit_i128_words(&mut out, *v); |
| 109 | } |
| 110 | } |
| 111 | Some(GlobalInit::IntArray(vs)) if !is_float => { |
| 112 | for v in vs { |
| 113 | writeln!(out, " {} {}", directive, v).unwrap(); |
| 114 | } |
| 115 | } |
| 116 | Some(GlobalInit::FloatArray(vs)) if is_float => { |
| 117 | for v in vs { |
| 118 | writeln!(out, " {} {}", directive, v).unwrap(); |
| 119 | } |
| 120 | } |
| 121 | Some(GlobalInit::String(bytes)) => { |
| 122 | emit_byte_values(&mut out, bytes); |
| 123 | let total_bytes = g.ty.size_bytes() as usize; |
| 124 | if bytes.len() < total_bytes { |
| 125 | writeln!(out, " .space {}", total_bytes - bytes.len()).unwrap(); |
| 126 | } |
| 127 | } |
| 128 | _ => { |
| 129 | // Nested arrays (for example arrays of byte-packed derived |
| 130 | // values) don't have a scalar element directive. Emit their |
| 131 | // zero-initialized storage using the full IR type size |
| 132 | // instead of falling back to a bogus ".quad * count" size. |
| 133 | let byte_size = g.ty.size_bytes(); |
| 134 | writeln!(out, " .space {}", byte_size).unwrap(); |
| 135 | } |
| 136 | } |
| 137 | continue; |
| 138 | } |
| 139 | |
| 140 | if matches!(g.ty, IrType::Int(IntWidth::I128)) { |
| 141 | writeln!(out, ".p2align 4").unwrap(); |
| 142 | writeln!(out, "{}:", symbol).unwrap(); |
| 143 | match &g.initializer { |
| 144 | Some(GlobalInit::Int(v)) => emit_i128_words(&mut out, *v), |
| 145 | Some(GlobalInit::Zero) | None => emit_i128_words(&mut out, 0), |
| 146 | _ => writeln!(out, " .space 16").unwrap(), |
| 147 | } |
| 148 | continue; |
| 149 | } |
| 150 | |
| 151 | // Scalar globals: pick alignment + storage directive. |
| 152 | // Audit Med-5: NaN/Inf must round-trip portably across |
| 153 | // assemblers. Apple's `as` accepts `.single NaN` but GNU |
| 154 | // binutils does not. Emit non-finite floats as their |
| 155 | // bit-pattern via `.long` / `.quad` so the same .s file |
| 156 | // assembles cleanly on both. |
| 157 | let is_nonfinite_float = matches!( |
| 158 | (&g.ty, &g.initializer), |
| 159 | (IrType::Float(_), Some(GlobalInit::Float(v))) if !v.is_finite() |
| 160 | ); |
| 161 | let (align, directive, default_zero) = if is_nonfinite_float { |
| 162 | match &g.ty { |
| 163 | IrType::Float(FloatWidth::F32) => (2, ".long", "0"), |
| 164 | _ => (3, ".quad", "0"), |
| 165 | } |
| 166 | } else { |
| 167 | match &g.ty { |
| 168 | IrType::Int(IntWidth::I8) | IrType::Bool => (0, ".byte", "0"), |
| 169 | IrType::Int(IntWidth::I16) => (1, ".short", "0"), |
| 170 | IrType::Int(IntWidth::I32) => (2, ".long", "0"), |
| 171 | IrType::Int(IntWidth::I64) => (3, ".quad", "0"), |
| 172 | IrType::Float(FloatWidth::F32) => (2, ".single", "0.0"), |
| 173 | IrType::Float(FloatWidth::F64) => (3, ".double", "0.0"), |
| 174 | _ => (3, ".quad", "0"), // pointers and aggregates: 8-byte slot |
| 175 | } |
| 176 | }; |
| 177 | if align > 0 { |
| 178 | writeln!(out, ".p2align {}", align).unwrap(); |
| 179 | } |
| 180 | writeln!(out, "{}:", symbol).unwrap(); |
| 181 | let value = match &g.initializer { |
| 182 | Some(GlobalInit::Int(v)) => v.to_string(), |
| 183 | Some(GlobalInit::Float(v)) => { |
| 184 | if v.is_finite() { |
| 185 | format!("{}", v) |
| 186 | } else { |
| 187 | // Bit-pattern emission for NaN / ±Inf. |
| 188 | match &g.ty { |
| 189 | IrType::Float(FloatWidth::F32) => { |
| 190 | format!("0x{:08x}", (*v as f32).to_bits()) |
| 191 | } |
| 192 | _ => format!("0x{:016x}", v.to_bits()), |
| 193 | } |
| 194 | } |
| 195 | } |
| 196 | Some(GlobalInit::Zero) | None => default_zero.into(), |
| 197 | Some(GlobalInit::String(bytes)) |
| 198 | if matches!(g.ty, IrType::Int(IntWidth::I8) | IrType::Bool) => |
| 199 | { |
| 200 | bytes.first().copied().unwrap_or(0).to_string() |
| 201 | } |
| 202 | Some(GlobalInit::String(_)) => default_zero.into(), |
| 203 | Some(GlobalInit::IntArray(_)) | Some(GlobalInit::FloatArray(_)) => { |
| 204 | // Array initializer on a scalar-typed global — |
| 205 | // shouldn't happen, but emit zero as a safe fallback. |
| 206 | default_zero.into() |
| 207 | } |
| 208 | }; |
| 209 | writeln!(out, " {} {}", directive, value).unwrap(); |
| 210 | } |
| 211 | out |
| 212 | } |
| 213 | |
| 214 | /// Emit a machine function as ARM64 assembly text. |
| 215 | pub fn emit_function(mf: &MachineFunction) -> String { |
| 216 | let mut out = String::new(); |
| 217 | |
| 218 | // Function directive. |
| 219 | if mf.internal_only { |
| 220 | writeln!(out, ".private_extern _{}", mf.name).unwrap(); |
| 221 | } else { |
| 222 | writeln!(out, ".globl _{}", mf.name).unwrap(); |
| 223 | } |
| 224 | writeln!(out, ".p2align 2").unwrap(); |
| 225 | writeln!(out, "_{}:", mf.name).unwrap(); |
| 226 | |
| 227 | for block in &mf.blocks { |
| 228 | // Don't re-emit entry label (it's the function label). |
| 229 | if block.id != MBlockId(0) { |
| 230 | writeln!(out, "{}:", block.label).unwrap(); |
| 231 | } |
| 232 | |
| 233 | for inst in &block.insts { |
| 234 | writeln!(out, " {}", emit_inst(inst, mf)).unwrap(); |
| 235 | } |
| 236 | } |
| 237 | |
| 238 | // Constant pool. |
| 239 | if !mf.const_pool.is_empty() { |
| 240 | writeln!(out).unwrap(); |
| 241 | writeln!(out, ".section __DATA,__const").unwrap(); |
| 242 | for (i, entry) in mf.const_pool.iter().enumerate() { |
| 243 | let label = const_pool_label(&mf.name, i as u32); |
| 244 | match entry { |
| 245 | ConstPoolEntry::F32(v) => { |
| 246 | writeln!(out, ".p2align 2").unwrap(); |
| 247 | writeln!(out, "{}:", label).unwrap(); |
| 248 | // Emit as hex integer to avoid decimal expansion issues |
| 249 | // with large/small floats that the assembler can't parse. |
| 250 | writeln!(out, " .long 0x{:08x}", v.to_bits()).unwrap(); |
| 251 | } |
| 252 | ConstPoolEntry::F64(v) => { |
| 253 | writeln!(out, ".p2align 3").unwrap(); |
| 254 | writeln!(out, "{}:", label).unwrap(); |
| 255 | writeln!(out, " .quad 0x{:016x}", v.to_bits()).unwrap(); |
| 256 | } |
| 257 | ConstPoolEntry::I64(v) => { |
| 258 | writeln!(out, ".p2align 3").unwrap(); |
| 259 | writeln!(out, "{}:", label).unwrap(); |
| 260 | writeln!(out, " .quad {}", v).unwrap(); |
| 261 | } |
| 262 | ConstPoolEntry::Bytes(b) => { |
| 263 | writeln!(out, ".p2align 3").unwrap(); |
| 264 | writeln!(out, "{}:", label).unwrap(); |
| 265 | write!(out, " .ascii \"").unwrap(); |
| 266 | for &byte in b { |
| 267 | match byte { |
| 268 | b'\\' => write!(out, "\\\\").unwrap(), |
| 269 | b'"' => write!(out, "\\\"").unwrap(), |
| 270 | b'\n' => write!(out, "\\n").unwrap(), |
| 271 | b'\t' => write!(out, "\\t").unwrap(), |
| 272 | b if b.is_ascii_graphic() || b == b' ' => { |
| 273 | write!(out, "{}", b as char).unwrap(); |
| 274 | } |
| 275 | b => write!(out, "\\x{:02x}", b).unwrap(), |
| 276 | } |
| 277 | } |
| 278 | writeln!(out, "\"").unwrap(); |
| 279 | } |
| 280 | } |
| 281 | } |
| 282 | } |
| 283 | |
| 284 | out |
| 285 | } |
| 286 | |
| 287 | /// Format `OP sp, sp, #N` (or `add x29, sp, #N`), falling back |
| 288 | /// to a 2-3 instruction synthesized sequence via the AAPCS64 |
| 289 | /// scratch register x16 (IP0) when N exceeds the 12-bit |
| 290 | /// immediate range. x16 is free in the prologue/epilogue per |
| 291 | /// AAPCS64 — it has no caller-saved value at function entry |
| 292 | /// and can be clobbered before/after the FP/LR save. |
| 293 | /// |
| 294 | /// Audit6 BLOCKING-5 (related to BLOCKING-4): functions whose |
| 295 | /// frame size exceeds 4095 bytes used to emit raw |
| 296 | /// `sub sp, sp, #4144` and the assembler rejected the |
| 297 | /// immediate. This came up after audit6 BLOCKING-4 added |
| 298 | /// per-allocate descriptor buffers, but it's a latent bug that |
| 299 | /// any large-frame function would hit. |
| 300 | fn fmt_sp_imm(op: &str, dest: &str, base: &str, n: i64) -> String { |
| 301 | if (0..=4095).contains(&n) { |
| 302 | return format!("{} {}, {}, #{}", op, dest, base, n); |
| 303 | } |
| 304 | // Synthesize the immediate in x16 then use the register form. |
| 305 | let lo = n & 0xFFFF; |
| 306 | let hi = (n >> 16) & 0xFFFF; |
| 307 | let mov = if hi == 0 { |
| 308 | format!("movz x16, #{}", lo) |
| 309 | } else { |
| 310 | format!("movz x16, #{}\n movk x16, #{}, lsl #16", lo, hi) |
| 311 | }; |
| 312 | format!("{}\n {} {}, {}, x16", mov, op, dest, base) |
| 313 | } |
| 314 | |
| 315 | fn fmt_stack_alloc(frame_size: i64) -> String { |
| 316 | // Apple Silicon uses large guard pages, so jumping the stack pointer |
| 317 | // down by a huge frame in one shot can skip the guard and fault on the |
| 318 | // first real touch. Probe the stack one chunk at a time for large |
| 319 | // frames to keep growth fault-safe. |
| 320 | const STACK_PROBE_STRIDE: i64 = 16 * 1024; |
| 321 | |
| 322 | if frame_size <= STACK_PROBE_STRIDE { |
| 323 | return fmt_sp_imm("sub", "sp", "sp", frame_size); |
| 324 | } |
| 325 | |
| 326 | let mut lines = Vec::new(); |
| 327 | let mut remaining = frame_size; |
| 328 | while remaining > 0 { |
| 329 | let step = remaining.min(STACK_PROBE_STRIDE); |
| 330 | lines.push(fmt_sp_imm("sub", "sp", "sp", step)); |
| 331 | lines.push("str xzr, [sp]".to_string()); |
| 332 | remaining -= step; |
| 333 | } |
| 334 | lines.join("\n ") |
| 335 | } |
| 336 | |
| 337 | fn fmt_u64_imm(reg: &str, value: u64) -> String { |
| 338 | let mut parts = Vec::new(); |
| 339 | for shift in [0u32, 16, 32, 48] { |
| 340 | let chunk = ((value >> shift) & 0xFFFF) as u16; |
| 341 | if chunk == 0 && !parts.is_empty() { |
| 342 | continue; |
| 343 | } |
| 344 | if parts.is_empty() { |
| 345 | parts.push(format!("movz {}, #{}", reg, chunk)); |
| 346 | } else { |
| 347 | parts.push(format!("movk {}, #{}, lsl #{}", reg, chunk, shift)); |
| 348 | } |
| 349 | } |
| 350 | if parts.is_empty() { |
| 351 | format!("movz {}, #0", reg) |
| 352 | } else { |
| 353 | parts.join("\n ") |
| 354 | } |
| 355 | } |
| 356 | |
| 357 | fn fmt_addr_with_offset(dest: &str, base: &str, offset: i64, scratch: &str) -> String { |
| 358 | if offset == 0 { |
| 359 | return format!("mov {}, {}", dest, base); |
| 360 | } |
| 361 | |
| 362 | if (0..=4095).contains(&offset) { |
| 363 | return format!("add {}, {}, #{}", dest, base, offset); |
| 364 | } |
| 365 | if (-4095..=-1).contains(&offset) { |
| 366 | return format!("sub {}, {}, #{}", dest, base, -offset); |
| 367 | } |
| 368 | |
| 369 | let imm = fmt_u64_imm(scratch, offset.unsigned_abs()); |
| 370 | let op = if offset.is_negative() { "sub" } else { "add" }; |
| 371 | format!("{}\n {} {}, {}, {}", imm, op, dest, base, scratch) |
| 372 | } |
| 373 | |
| 374 | /// Emit a single machine instruction as assembly text. Public so the |
| 375 | /// branch-relaxation pass can count emit-time instruction bytes |
| 376 | /// directly rather than re-deriving each opcode's expansion rules. |
| 377 | pub fn emit_inst_text(inst: &MachineInst, mf: &MachineFunction) -> String { |
| 378 | emit_inst(inst, mf) |
| 379 | } |
| 380 | |
| 381 | /// Emit a single machine instruction as assembly text. |
| 382 | fn emit_inst(inst: &MachineInst, mf: &MachineFunction) -> String { |
| 383 | match inst.opcode { |
| 384 | ArmOpcode::AddReg => format!( |
| 385 | "add {}, {}, {}", |
| 386 | op_str(&inst.operands[0]), |
| 387 | op_str(&inst.operands[1]), |
| 388 | op_str(&inst.operands[2]) |
| 389 | ), |
| 390 | ArmOpcode::AddsReg => format!( |
| 391 | "adds {}, {}, {}", |
| 392 | op_str(&inst.operands[0]), |
| 393 | op_str(&inst.operands[1]), |
| 394 | op_str(&inst.operands[2]) |
| 395 | ), |
| 396 | ArmOpcode::AdcReg => format!( |
| 397 | "adc {}, {}, {}", |
| 398 | op_str(&inst.operands[0]), |
| 399 | op_str(&inst.operands[1]), |
| 400 | op_str(&inst.operands[2]) |
| 401 | ), |
| 402 | ArmOpcode::AddImm => { |
| 403 | let dest = op_str(&inst.operands[0]); |
| 404 | let base = op_str(&inst.operands[1]); |
| 405 | let imm: i64 = match &inst.operands[2] { |
| 406 | MachineOperand::FrameSlot(off) => *off as i64, |
| 407 | MachineOperand::Imm(-1) => { |
| 408 | // Sentinel: prologue FP setup → frame_size - 16 |
| 409 | mf.frame.size.saturating_sub(16) as i64 |
| 410 | } |
| 411 | MachineOperand::Imm(v) => *v, |
| 412 | _ => return format!("add {}, {}, {}", dest, base, op_str(&inst.operands[2])), |
| 413 | }; |
| 414 | // Both `add x29, sp, #N` (FP setup) and `add Xd, Xn, #N` |
| 415 | // need the > 4095 fallback. Use the same scratch |
| 416 | // synthesis since x16 is safe in the prologue. |
| 417 | fmt_sp_imm("add", &dest, &base, imm) |
| 418 | } |
| 419 | ArmOpcode::SubReg => format!( |
| 420 | "sub {}, {}, {}", |
| 421 | op_str(&inst.operands[0]), |
| 422 | op_str(&inst.operands[1]), |
| 423 | op_str(&inst.operands[2]) |
| 424 | ), |
| 425 | ArmOpcode::SubsReg => format!( |
| 426 | "subs {}, {}, {}", |
| 427 | op_str(&inst.operands[0]), |
| 428 | op_str(&inst.operands[1]), |
| 429 | op_str(&inst.operands[2]) |
| 430 | ), |
| 431 | ArmOpcode::SbcReg => format!( |
| 432 | "sbc {}, {}, {}", |
| 433 | op_str(&inst.operands[0]), |
| 434 | op_str(&inst.operands[1]), |
| 435 | op_str(&inst.operands[2]) |
| 436 | ), |
| 437 | ArmOpcode::SubImm => { |
| 438 | let imm: i64 = match &inst.operands[2] { |
| 439 | MachineOperand::Imm(-1) => { |
| 440 | // Sentinel: epilogue SP restore → frame_size - 16 |
| 441 | mf.frame.size.saturating_sub(16) as i64 |
| 442 | } |
| 443 | MachineOperand::Imm(v) => *v, |
| 444 | _ => 0, |
| 445 | }; |
| 446 | let dest = op_str(&inst.operands[0]); |
| 447 | let base = op_str(&inst.operands[1]); |
| 448 | fmt_sp_imm("sub", &dest, &base, imm) |
| 449 | } |
| 450 | ArmOpcode::Mul => format!( |
| 451 | "mul {}, {}, {}", |
| 452 | op_str(&inst.operands[0]), |
| 453 | op_str(&inst.operands[1]), |
| 454 | op_str(&inst.operands[2]) |
| 455 | ), |
| 456 | ArmOpcode::Sdiv => format!( |
| 457 | "sdiv {}, {}, {}", |
| 458 | op_str(&inst.operands[0]), |
| 459 | op_str(&inst.operands[1]), |
| 460 | op_str(&inst.operands[2]) |
| 461 | ), |
| 462 | ArmOpcode::Madd => format!( |
| 463 | "madd {}, {}, {}, {}", |
| 464 | op_str(&inst.operands[0]), |
| 465 | op_str(&inst.operands[1]), |
| 466 | op_str(&inst.operands[2]), |
| 467 | op_str(&inst.operands[3]) |
| 468 | ), |
| 469 | ArmOpcode::Msub => format!( |
| 470 | "msub {}, {}, {}, {}", |
| 471 | op_str(&inst.operands[0]), |
| 472 | op_str(&inst.operands[1]), |
| 473 | op_str(&inst.operands[2]), |
| 474 | op_str(&inst.operands[3]) |
| 475 | ), |
| 476 | ArmOpcode::Neg => format!( |
| 477 | "neg {}, {}", |
| 478 | op_str(&inst.operands[0]), |
| 479 | op_str(&inst.operands[1]) |
| 480 | ), |
| 481 | |
| 482 | ArmOpcode::AndReg => format!( |
| 483 | "and {}, {}, {}", |
| 484 | op_str(&inst.operands[0]), |
| 485 | op_str(&inst.operands[1]), |
| 486 | op_str(&inst.operands[2]) |
| 487 | ), |
| 488 | ArmOpcode::OrrReg => format!( |
| 489 | "orr {}, {}, {}", |
| 490 | op_str(&inst.operands[0]), |
| 491 | op_str(&inst.operands[1]), |
| 492 | op_str(&inst.operands[2]) |
| 493 | ), |
| 494 | ArmOpcode::EorReg => format!( |
| 495 | "eor {}, {}, {}", |
| 496 | op_str(&inst.operands[0]), |
| 497 | op_str(&inst.operands[1]), |
| 498 | op_str(&inst.operands[2]) |
| 499 | ), |
| 500 | ArmOpcode::OrnReg => format!( |
| 501 | "orn {}, {}, {}", |
| 502 | op_str(&inst.operands[0]), |
| 503 | op_str(&inst.operands[1]), |
| 504 | op_str(&inst.operands[2]) |
| 505 | ), |
| 506 | ArmOpcode::LslReg => format!( |
| 507 | "lsl {}, {}, {}", |
| 508 | op_str(&inst.operands[0]), |
| 509 | op_str(&inst.operands[1]), |
| 510 | op_str(&inst.operands[2]) |
| 511 | ), |
| 512 | ArmOpcode::LsrReg => format!( |
| 513 | "lsr {}, {}, {}", |
| 514 | op_str(&inst.operands[0]), |
| 515 | op_str(&inst.operands[1]), |
| 516 | op_str(&inst.operands[2]) |
| 517 | ), |
| 518 | ArmOpcode::AsrReg => format!( |
| 519 | "asr {}, {}, {}", |
| 520 | op_str(&inst.operands[0]), |
| 521 | op_str(&inst.operands[1]), |
| 522 | op_str(&inst.operands[2]) |
| 523 | ), |
| 524 | |
| 525 | ArmOpcode::Mvn => format!( |
| 526 | "mvn {}, {}", |
| 527 | op_str(&inst.operands[0]), |
| 528 | op_str(&inst.operands[1]) |
| 529 | ), |
| 530 | ArmOpcode::Clz => format!( |
| 531 | "clz {}, {}", |
| 532 | op_str(&inst.operands[0]), |
| 533 | op_str(&inst.operands[1]) |
| 534 | ), |
| 535 | ArmOpcode::Rbit => format!( |
| 536 | "rbit {}, {}", |
| 537 | op_str(&inst.operands[0]), |
| 538 | op_str(&inst.operands[1]) |
| 539 | ), |
| 540 | |
| 541 | ArmOpcode::CmpReg => format!( |
| 542 | "cmp {}, {}", |
| 543 | op_str(&inst.operands[0]), |
| 544 | op_str(&inst.operands[1]) |
| 545 | ), |
| 546 | ArmOpcode::CmpImm => format!( |
| 547 | "cmp {}, #{}", |
| 548 | op_str(&inst.operands[0]), |
| 549 | if let MachineOperand::Imm(v) = &inst.operands[1] { |
| 550 | *v |
| 551 | } else { |
| 552 | 0 |
| 553 | } |
| 554 | ), |
| 555 | ArmOpcode::Cset | ArmOpcode::FCset => { |
| 556 | let cond = if let MachineOperand::Cond(c) = &inst.operands[1] { |
| 557 | cond_str(*c) |
| 558 | } else { |
| 559 | "eq" |
| 560 | }; |
| 561 | format!("cset {}, {}", op_str(&inst.operands[0]), cond) |
| 562 | } |
| 563 | ArmOpcode::CselReg => { |
| 564 | let cond = if let MachineOperand::Cond(c) = &inst.operands[3] { |
| 565 | cond_str(*c) |
| 566 | } else { |
| 567 | "eq" |
| 568 | }; |
| 569 | format!( |
| 570 | "csel {}, {}, {}, {}", |
| 571 | op_str(&inst.operands[0]), |
| 572 | op_str(&inst.operands[1]), |
| 573 | op_str(&inst.operands[2]), |
| 574 | cond |
| 575 | ) |
| 576 | } |
| 577 | ArmOpcode::FCmpReg => format!( |
| 578 | "fcmp {}, {}", |
| 579 | op_str(&inst.operands[0]), |
| 580 | op_str(&inst.operands[1]) |
| 581 | ), |
| 582 | ArmOpcode::FcselReg => { |
| 583 | let cond = if let MachineOperand::Cond(c) = &inst.operands[3] { |
| 584 | cond_str(*c) |
| 585 | } else { |
| 586 | "eq" |
| 587 | }; |
| 588 | format!( |
| 589 | "fcsel {}, {}, {}, {}", |
| 590 | op_str(&inst.operands[0]), |
| 591 | op_str(&inst.operands[1]), |
| 592 | op_str(&inst.operands[2]), |
| 593 | cond |
| 594 | ) |
| 595 | } |
| 596 | |
| 597 | ArmOpcode::FaddS | ArmOpcode::FaddD => format!( |
| 598 | "fadd {}, {}, {}", |
| 599 | op_str(&inst.operands[0]), |
| 600 | op_str(&inst.operands[1]), |
| 601 | op_str(&inst.operands[2]) |
| 602 | ), |
| 603 | ArmOpcode::FsubS | ArmOpcode::FsubD => format!( |
| 604 | "fsub {}, {}, {}", |
| 605 | op_str(&inst.operands[0]), |
| 606 | op_str(&inst.operands[1]), |
| 607 | op_str(&inst.operands[2]) |
| 608 | ), |
| 609 | ArmOpcode::FmulS | ArmOpcode::FmulD => format!( |
| 610 | "fmul {}, {}, {}", |
| 611 | op_str(&inst.operands[0]), |
| 612 | op_str(&inst.operands[1]), |
| 613 | op_str(&inst.operands[2]) |
| 614 | ), |
| 615 | ArmOpcode::FdivS | ArmOpcode::FdivD => format!( |
| 616 | "fdiv {}, {}, {}", |
| 617 | op_str(&inst.operands[0]), |
| 618 | op_str(&inst.operands[1]), |
| 619 | op_str(&inst.operands[2]) |
| 620 | ), |
| 621 | ArmOpcode::FnegS | ArmOpcode::FnegD => format!( |
| 622 | "fneg {}, {}", |
| 623 | op_str(&inst.operands[0]), |
| 624 | op_str(&inst.operands[1]) |
| 625 | ), |
| 626 | ArmOpcode::FabsS | ArmOpcode::FabsD => format!( |
| 627 | "fabs {}, {}", |
| 628 | op_str(&inst.operands[0]), |
| 629 | op_str(&inst.operands[1]) |
| 630 | ), |
| 631 | ArmOpcode::FsqrtS | ArmOpcode::FsqrtD => format!( |
| 632 | "fsqrt {}, {}", |
| 633 | op_str(&inst.operands[0]), |
| 634 | op_str(&inst.operands[1]) |
| 635 | ), |
| 636 | // Fused multiply-add/subtract: 4-operand (dest, Sn, Sm, Sa). |
| 637 | // FMADD Sd, Sn, Sm, Sa → Sd = Sa + Sn*Sm |
| 638 | // FMSUB Sd, Sn, Sm, Sa → Sd = Sa - Sn*Sm |
| 639 | // FNMSUB Sd, Sn, Sm, Sa → Sd = Sn*Sm - Sa |
| 640 | ArmOpcode::FmaddS | ArmOpcode::FmaddD => format!( |
| 641 | "fmadd {}, {}, {}, {}", |
| 642 | op_str(&inst.operands[0]), |
| 643 | op_str(&inst.operands[1]), |
| 644 | op_str(&inst.operands[2]), |
| 645 | op_str(&inst.operands[3]) |
| 646 | ), |
| 647 | ArmOpcode::FmsubS | ArmOpcode::FmsubD => format!( |
| 648 | "fmsub {}, {}, {}, {}", |
| 649 | op_str(&inst.operands[0]), |
| 650 | op_str(&inst.operands[1]), |
| 651 | op_str(&inst.operands[2]), |
| 652 | op_str(&inst.operands[3]) |
| 653 | ), |
| 654 | ArmOpcode::FnmsubS | ArmOpcode::FnmsubD => format!( |
| 655 | "fnmsub {}, {}, {}, {}", |
| 656 | op_str(&inst.operands[0]), |
| 657 | op_str(&inst.operands[1]), |
| 658 | op_str(&inst.operands[2]), |
| 659 | op_str(&inst.operands[3]) |
| 660 | ), |
| 661 | |
| 662 | ArmOpcode::ScvtfSW | ArmOpcode::ScvtfDW | ArmOpcode::ScvtfSX | ArmOpcode::ScvtfDX => { |
| 663 | format!( |
| 664 | "scvtf {}, {}", |
| 665 | op_str(&inst.operands[0]), |
| 666 | op_str(&inst.operands[1]) |
| 667 | ) |
| 668 | } |
| 669 | ArmOpcode::FcvtzsWS | ArmOpcode::FcvtzsWD | ArmOpcode::FcvtzsXS | ArmOpcode::FcvtzsXD => { |
| 670 | format!( |
| 671 | "fcvtzs {}, {}", |
| 672 | op_str(&inst.operands[0]), |
| 673 | op_str(&inst.operands[1]) |
| 674 | ) |
| 675 | } |
| 676 | ArmOpcode::FcvtSD => format!( |
| 677 | "fcvt {}, {}", |
| 678 | fp_reg_str(&inst.operands[0], false), |
| 679 | fp_reg_str(&inst.operands[1], true) |
| 680 | ), |
| 681 | ArmOpcode::FcvtDS => format!( |
| 682 | "fcvt {}, {}", |
| 683 | fp_reg_str(&inst.operands[0], true), |
| 684 | fp_reg_str(&inst.operands[1], false) |
| 685 | ), |
| 686 | |
| 687 | ArmOpcode::Movz => { |
| 688 | let imm = if let MachineOperand::Imm(v) = &inst.operands[1] { |
| 689 | *v |
| 690 | } else { |
| 691 | 0 |
| 692 | }; |
| 693 | let shift = if let MachineOperand::Shift(s) = &inst.operands[2] { |
| 694 | *s |
| 695 | } else { |
| 696 | 0 |
| 697 | }; |
| 698 | if shift == 0 { |
| 699 | format!("movz {}, #{}", op_str(&inst.operands[0]), imm) |
| 700 | } else { |
| 701 | format!( |
| 702 | "movz {}, #{}, lsl #{}", |
| 703 | op_str(&inst.operands[0]), |
| 704 | imm, |
| 705 | shift |
| 706 | ) |
| 707 | } |
| 708 | } |
| 709 | ArmOpcode::Movk => { |
| 710 | let imm = if let MachineOperand::Imm(v) = &inst.operands[1] { |
| 711 | *v |
| 712 | } else { |
| 713 | 0 |
| 714 | }; |
| 715 | let shift = if let MachineOperand::Shift(s) = &inst.operands[2] { |
| 716 | *s |
| 717 | } else { |
| 718 | 0 |
| 719 | }; |
| 720 | format!( |
| 721 | "movk {}, #{}, lsl #{}", |
| 722 | op_str(&inst.operands[0]), |
| 723 | imm, |
| 724 | shift |
| 725 | ) |
| 726 | } |
| 727 | ArmOpcode::Movn => { |
| 728 | let imm = if let MachineOperand::Imm(v) = &inst.operands[1] { |
| 729 | *v |
| 730 | } else { |
| 731 | 0 |
| 732 | }; |
| 733 | let shift = if let MachineOperand::Shift(s) = &inst.operands[2] { |
| 734 | *s |
| 735 | } else { |
| 736 | 0 |
| 737 | }; |
| 738 | format!( |
| 739 | "movn {}, #{}, lsl #{}", |
| 740 | op_str(&inst.operands[0]), |
| 741 | imm, |
| 742 | shift |
| 743 | ) |
| 744 | } |
| 745 | ArmOpcode::MovReg => { |
| 746 | let dest = op_str(&inst.operands[0]); |
| 747 | let src = op_str(&inst.operands[1]); |
| 748 | // Handle width mismatch: w→x extend or x→w truncate. |
| 749 | let dest_is_x = dest.starts_with('x'); |
| 750 | let dest_is_w = dest.starts_with('w'); |
| 751 | let src_is_w = src.starts_with('w'); |
| 752 | let src_is_x = src.starts_with('x'); |
| 753 | // Cross-register-class move: AArch64 `mov` only encodes GP↔GP |
| 754 | // (and FP↔FP via FmovReg). When register-allocation hands us |
| 755 | // a MovReg straddling classes, emit `fmov` which transfers |
| 756 | // bits between an integer GPR and an SIMD/FP register. |
| 757 | let dest_is_gp = dest_is_x || dest_is_w; |
| 758 | let src_is_gp = src_is_x || src_is_w; |
| 759 | let dest_is_fp = dest.starts_with('s') || dest.starts_with('d'); |
| 760 | let src_is_fp = src.starts_with('s') || src.starts_with('d'); |
| 761 | if dest_is_gp && src_is_fp { |
| 762 | // GPR ← FPR: pick GPR width to match FPR (s→w, d→x). |
| 763 | let gp = if src.starts_with('d') { |
| 764 | if dest_is_x { |
| 765 | dest.clone() |
| 766 | } else { |
| 767 | format!("x{}", &dest[1..]) |
| 768 | } |
| 769 | } else { |
| 770 | if dest_is_w { |
| 771 | dest.clone() |
| 772 | } else { |
| 773 | format!("w{}", &dest[1..]) |
| 774 | } |
| 775 | }; |
| 776 | return format!("fmov {}, {}", gp, src); |
| 777 | } |
| 778 | if dest_is_fp && src_is_gp { |
| 779 | let gp = if dest.starts_with('d') { |
| 780 | if src_is_x { |
| 781 | src.clone() |
| 782 | } else { |
| 783 | format!("x{}", &src[1..]) |
| 784 | } |
| 785 | } else { |
| 786 | if src_is_w { |
| 787 | src.clone() |
| 788 | } else { |
| 789 | format!("w{}", &src[1..]) |
| 790 | } |
| 791 | }; |
| 792 | return format!("fmov {}, {}", dest, gp); |
| 793 | } |
| 794 | if dest_is_x && src_is_w { |
| 795 | // Zero-extend 32→64: use uxtw. |
| 796 | format!("uxtw {}, {}", dest, src) |
| 797 | } else if dest_is_w && src_is_x { |
| 798 | // Truncate 64→32 by reading the source register through its |
| 799 | // 32-bit view. `mov wN, xM` is not a valid AArch64 encoding. |
| 800 | format!("mov {}, w{}", dest, &src[1..]) |
| 801 | } else { |
| 802 | format!("mov {}, {}", dest, src) |
| 803 | } |
| 804 | } |
| 805 | ArmOpcode::FmovReg => format!( |
| 806 | "fmov {}, {}", |
| 807 | op_str(&inst.operands[0]), |
| 808 | op_str(&inst.operands[1]) |
| 809 | ), |
| 810 | ArmOpcode::Mov16B => format!( |
| 811 | "mov.16b {}, {}", |
| 812 | v_reg_bare(&inst.operands[0]), |
| 813 | v_reg_bare(&inst.operands[1]), |
| 814 | ), |
| 815 | ArmOpcode::AddpV2D => format!( |
| 816 | "addp.2d {}, {}, {}", |
| 817 | v_reg_bare(&inst.operands[0]), |
| 818 | v_reg_bare(&inst.operands[1]), |
| 819 | v_reg_bare(&inst.operands[2]), |
| 820 | ), |
| 821 | ArmOpcode::FaddpV4S => format!( |
| 822 | "faddp.4s {}, {}, {}", |
| 823 | v_reg_bare(&inst.operands[0]), |
| 824 | v_reg_bare(&inst.operands[1]), |
| 825 | v_reg_bare(&inst.operands[2]), |
| 826 | ), |
| 827 | |
| 828 | ArmOpcode::LdrImm | ArmOpcode::LdrFpImm | ArmOpcode::LdrsbImm | ArmOpcode::LdrshImm => { |
| 829 | let dest = op_str(&inst.operands[0]); |
| 830 | let base = op_str(&inst.operands[1]); |
| 831 | let offset_val = match &inst.operands[2] { |
| 832 | MachineOperand::FrameSlot(off) => *off as i64, |
| 833 | MachineOperand::Imm(v) => *v, |
| 834 | _ => 0, |
| 835 | }; |
| 836 | // Pick the mnemonic by opcode. LDRSB / LDRSH expect a |
| 837 | // Wt destination (sign-extended into the lower 32 bits); |
| 838 | // the dest operand is already a Gp32 vreg in those |
| 839 | // cases, so the formatted register name is `w_`. |
| 840 | let mnemonic = match inst.opcode { |
| 841 | ArmOpcode::LdrsbImm => "ldrsb", |
| 842 | ArmOpcode::LdrshImm => "ldrsh", |
| 843 | _ => "ldr", |
| 844 | }; |
| 845 | if (-256..=255).contains(&offset_val) { |
| 846 | format!("{} {}, [{}, #{}]", mnemonic, dest, base, offset_val) |
| 847 | } else { |
| 848 | // Large offset: compute address in x8, then load. |
| 849 | format!( |
| 850 | "{}\n {} {}, [x8]", |
| 851 | fmt_addr_with_offset("x8", &base, offset_val, "x16"), |
| 852 | mnemonic, |
| 853 | dest |
| 854 | ) |
| 855 | } |
| 856 | } |
| 857 | ArmOpcode::StrImm | ArmOpcode::StrFpImm | ArmOpcode::StrbImm | ArmOpcode::StrhImm => { |
| 858 | let src = op_str(&inst.operands[0]); |
| 859 | let base = op_str(&inst.operands[1]); |
| 860 | let offset_val = match &inst.operands[2] { |
| 861 | MachineOperand::FrameSlot(off) => *off as i64, |
| 862 | MachineOperand::Imm(v) => *v, |
| 863 | _ => 0, |
| 864 | }; |
| 865 | let mnemonic = match inst.opcode { |
| 866 | ArmOpcode::StrbImm => "strb", |
| 867 | ArmOpcode::StrhImm => "strh", |
| 868 | _ => "str", |
| 869 | }; |
| 870 | if (-256..=255).contains(&offset_val) { |
| 871 | format!("{} {}, [{}, #{}]", mnemonic, src, base, offset_val) |
| 872 | } else { |
| 873 | // Large offset: compute address in x8, then store. |
| 874 | format!( |
| 875 | "{}\n {} {}, [x8]", |
| 876 | fmt_addr_with_offset("x8", &base, offset_val, "x16"), |
| 877 | mnemonic, |
| 878 | src |
| 879 | ) |
| 880 | } |
| 881 | } |
| 882 | // Sprint 05: scaled-register-offset addressing. Operands are |
| 883 | // [dest, base, idx, Imm(shift)]. Shift 0 elides the `, lsl |
| 884 | // #0` suffix per the assembler convention. |
| 885 | ArmOpcode::LdrReg | ArmOpcode::LdrFpReg | ArmOpcode::StrReg | ArmOpcode::StrFpReg => { |
| 886 | let dest = op_str(&inst.operands[0]); |
| 887 | let base = op_str(&inst.operands[1]); |
| 888 | let idx = op_str(&inst.operands[2]); |
| 889 | let shift = match &inst.operands[3] { |
| 890 | MachineOperand::Imm(v) => *v, |
| 891 | _ => 0, |
| 892 | }; |
| 893 | let mnemonic = match inst.opcode { |
| 894 | ArmOpcode::LdrReg | ArmOpcode::LdrFpReg => "ldr", |
| 895 | ArmOpcode::StrReg | ArmOpcode::StrFpReg => "str", |
| 896 | _ => unreachable!(), |
| 897 | }; |
| 898 | if shift == 0 { |
| 899 | format!("{} {}, [{}, {}]", mnemonic, dest, base, idx) |
| 900 | } else { |
| 901 | format!("{} {}, [{}, {}, lsl #{}]", mnemonic, dest, base, idx, shift) |
| 902 | } |
| 903 | } |
| 904 | |
| 905 | ArmOpcode::StpPre => { |
| 906 | let frame_size = mf.frame.size as i64; |
| 907 | let stp_offset = frame_size - 16; |
| 908 | // The `sub sp, sp, #N` portion handles N > 4095 via |
| 909 | // x16 synthesis (audit6 BLOCKING-5 root cause), and |
| 910 | // probes very large frames so macOS guard pages aren't |
| 911 | // skipped in one jump. The `stp ... [sp, #stp_offset]` |
| 912 | // form is also bounded |
| 913 | // (signed 7-bit immediate * 8 = ±504 byte range), so |
| 914 | // we fall back to two `str` instructions when over. |
| 915 | // For very large frames (stp_offset > 32760, the |
| 916 | // signed 12-bit max for 64-bit ldr/str unsigned imm), |
| 917 | // we'd need a register-form load/store — not yet |
| 918 | // exercised in any test, so the panic catches it. |
| 919 | let sub_sp = fmt_stack_alloc(frame_size); |
| 920 | if stp_offset <= 504 { |
| 921 | format!("{}\n stp x29, x30, [sp, #{}]", sub_sp, stp_offset) |
| 922 | } else if stp_offset <= 32760 { |
| 923 | format!( |
| 924 | "{}\n str x29, [sp, #{}]\n str x30, [sp, #{}]", |
| 925 | sub_sp, |
| 926 | stp_offset, |
| 927 | stp_offset + 8 |
| 928 | ) |
| 929 | } else { |
| 930 | // Frame too large for any ldr/str unsigned immediate. |
| 931 | // Synthesize the address in x9 (caller-saved scratch) |
| 932 | // then use register-offset str. |
| 933 | let x9_addr = fmt_sp_imm("add", "x9", "sp", stp_offset); |
| 934 | format!( |
| 935 | "{}\n {}\n str x29, [x9]\n str x30, [x9, #8]", |
| 936 | sub_sp, x9_addr |
| 937 | ) |
| 938 | } |
| 939 | } |
| 940 | ArmOpcode::LdpPost => { |
| 941 | let frame_size = mf.frame.size as i64; |
| 942 | let ldp_offset = frame_size - 16; |
| 943 | let add_sp = fmt_sp_imm("add", "sp", "sp", frame_size); |
| 944 | if ldp_offset <= 504 { |
| 945 | format!("ldp x29, x30, [sp, #{}]\n {}", ldp_offset, add_sp) |
| 946 | } else if ldp_offset <= 32760 { |
| 947 | format!( |
| 948 | "ldr x29, [sp, #{}]\n ldr x30, [sp, #{}]\n {}", |
| 949 | ldp_offset, |
| 950 | ldp_offset + 8, |
| 951 | add_sp |
| 952 | ) |
| 953 | } else { |
| 954 | // Frame too large for unsigned immediate ldr. |
| 955 | // Synthesize address in x9 then restore with register-offset ldr. |
| 956 | let x9_addr = fmt_sp_imm("add", "x9", "sp", ldp_offset); |
| 957 | format!( |
| 958 | "{}\n ldr x29, [x9]\n ldr x30, [x9, #8]\n {}", |
| 959 | x9_addr, add_sp |
| 960 | ) |
| 961 | } |
| 962 | } |
| 963 | |
| 964 | // Non-preindex STP/LDP for callee-save pairs. |
| 965 | // Operands: [src1/dst1, src2/dst2, base, imm]. |
| 966 | ArmOpcode::StpOffset => { |
| 967 | let r1 = op_str(&inst.operands[0]); |
| 968 | let r2 = op_str(&inst.operands[1]); |
| 969 | let base = op_str(&inst.operands[2]); |
| 970 | let off = match &inst.operands[3] { |
| 971 | MachineOperand::Imm(v) => *v, |
| 972 | MachineOperand::FrameSlot(v) => *v as i64, |
| 973 | _ => 0, |
| 974 | }; |
| 975 | // STP signed-offset range: 7-bit signed × 8 → [-512, 504]. |
| 976 | // Fall back to two individual STR instructions if out of range. |
| 977 | if (-512..=504).contains(&off) { |
| 978 | format!("stp {}, {}, [{}, #{}]", r1, r2, base, off) |
| 979 | } else { |
| 980 | format!( |
| 981 | "{}\n str {}, [x9]\n str {}, [x9, #8]", |
| 982 | fmt_addr_with_offset("x9", &base, off, "x16"), |
| 983 | r1, |
| 984 | r2 |
| 985 | ) |
| 986 | } |
| 987 | } |
| 988 | ArmOpcode::LdpOffset => { |
| 989 | let r1 = op_str(&inst.operands[0]); |
| 990 | let r2 = op_str(&inst.operands[1]); |
| 991 | let base = op_str(&inst.operands[2]); |
| 992 | let off = match &inst.operands[3] { |
| 993 | MachineOperand::Imm(v) => *v, |
| 994 | MachineOperand::FrameSlot(v) => *v as i64, |
| 995 | _ => 0, |
| 996 | }; |
| 997 | // LDP signed-offset range: 7-bit signed × 8 → [-512, 504]. |
| 998 | // Fall back to two individual LDR instructions if out of range. |
| 999 | if (-512..=504).contains(&off) { |
| 1000 | format!("ldp {}, {}, [{}, #{}]", r1, r2, base, off) |
| 1001 | } else { |
| 1002 | format!( |
| 1003 | "{}\n ldr {}, [x9]\n ldr {}, [x9, #8]", |
| 1004 | fmt_addr_with_offset("x9", &base, off, "x16"), |
| 1005 | r1, |
| 1006 | r2 |
| 1007 | ) |
| 1008 | } |
| 1009 | } |
| 1010 | |
| 1011 | ArmOpcode::AdrpLdr => { |
| 1012 | if let MachineOperand::ConstPool(idx) = &inst.operands[1] { |
| 1013 | let label = const_pool_label(&mf.name, *idx); |
| 1014 | let dest = op_str(&inst.operands[0]); |
| 1015 | // ADRP requires a GP register. If dest is FP (s/d), use x8 as scratch. |
| 1016 | let is_fp = dest.starts_with('s') || dest.starts_with('d'); |
| 1017 | if is_fp { |
| 1018 | format!( |
| 1019 | "adrp x8, {1}@PAGE\n ldr {0}, [x8, {1}@PAGEOFF]", |
| 1020 | dest, label |
| 1021 | ) |
| 1022 | } else { |
| 1023 | format!( |
| 1024 | "adrp {0}, {1}@PAGE\n ldr {0}, [{0}, {1}@PAGEOFF]", |
| 1025 | dest, label |
| 1026 | ) |
| 1027 | } |
| 1028 | } else { |
| 1029 | "nop ; bad adrp+ldr".into() |
| 1030 | } |
| 1031 | } |
| 1032 | ArmOpcode::AdrpAdd => { |
| 1033 | let dest = op_str(&inst.operands[0]); |
| 1034 | match &inst.operands[1] { |
| 1035 | MachineOperand::ConstPool(idx) => { |
| 1036 | let label = const_pool_label(&mf.name, *idx); |
| 1037 | format!( |
| 1038 | "adrp {0}, {1}@PAGE\n add {0}, {0}, {1}@PAGEOFF", |
| 1039 | dest, label |
| 1040 | ) |
| 1041 | } |
| 1042 | MachineOperand::GlobalLabel(name) => { |
| 1043 | // Mach-O convention: globals get an underscore prefix. |
| 1044 | let sym = if name.starts_with('_') { |
| 1045 | name.clone() |
| 1046 | } else { |
| 1047 | format!("_{}", name) |
| 1048 | }; |
| 1049 | format!( |
| 1050 | "adrp {0}, {1}@PAGE\n add {0}, {0}, {1}@PAGEOFF", |
| 1051 | dest, sym |
| 1052 | ) |
| 1053 | } |
| 1054 | _ => "nop ; bad adrp+add".into(), |
| 1055 | } |
| 1056 | } |
| 1057 | |
| 1058 | ArmOpcode::B => { |
| 1059 | match &inst.operands[0] { |
| 1060 | MachineOperand::BlockRef(id) => format!("b {}", mf.block(*id).label), |
| 1061 | // Tail call to an external symbol (TCO): B _callee |
| 1062 | MachineOperand::Extern(name) => { |
| 1063 | if name.starts_with('_') { |
| 1064 | format!("b {}", name) |
| 1065 | } else { |
| 1066 | format!("b _{}", name) |
| 1067 | } |
| 1068 | } |
| 1069 | _ => "b ???".into(), |
| 1070 | } |
| 1071 | } |
| 1072 | ArmOpcode::BCond => { |
| 1073 | let cond = if let MachineOperand::Cond(c) = &inst.operands[0] { |
| 1074 | cond_str(*c) |
| 1075 | } else { |
| 1076 | "eq" |
| 1077 | }; |
| 1078 | let target = if let MachineOperand::BlockRef(id) = &inst.operands[1] { |
| 1079 | mf.block(*id).label.clone() |
| 1080 | } else { |
| 1081 | "???".into() |
| 1082 | }; |
| 1083 | format!("b.{} {}", cond, target) |
| 1084 | } |
| 1085 | ArmOpcode::Cbz | ArmOpcode::Cbnz => { |
| 1086 | let mnemonic = match inst.opcode { |
| 1087 | ArmOpcode::Cbz => "cbz", |
| 1088 | _ => "cbnz", |
| 1089 | }; |
| 1090 | let target = if let MachineOperand::BlockRef(id) = &inst.operands[1] { |
| 1091 | mf.block(*id).label.clone() |
| 1092 | } else { |
| 1093 | "???".into() |
| 1094 | }; |
| 1095 | format!("{} {}, {}", mnemonic, op_str(&inst.operands[0]), target) |
| 1096 | } |
| 1097 | ArmOpcode::Tbz | ArmOpcode::Tbnz => { |
| 1098 | let mnemonic = match inst.opcode { |
| 1099 | ArmOpcode::Tbz => "tbz", |
| 1100 | _ => "tbnz", |
| 1101 | }; |
| 1102 | let bit = if let MachineOperand::Imm(v) = &inst.operands[1] { |
| 1103 | *v |
| 1104 | } else { |
| 1105 | 0 |
| 1106 | }; |
| 1107 | let target = if let MachineOperand::BlockRef(id) = &inst.operands[2] { |
| 1108 | mf.block(*id).label.clone() |
| 1109 | } else { |
| 1110 | "???".into() |
| 1111 | }; |
| 1112 | format!( |
| 1113 | "{} {}, #{}, {}", |
| 1114 | mnemonic, |
| 1115 | op_str(&inst.operands[0]), |
| 1116 | bit, |
| 1117 | target |
| 1118 | ) |
| 1119 | } |
| 1120 | ArmOpcode::Bl => { |
| 1121 | if let MachineOperand::Extern(name) = &inst.operands[0] { |
| 1122 | // Mach-O convention: C symbols get a _ prefix. |
| 1123 | if name.starts_with('_') { |
| 1124 | format!("bl {}", name) // already prefixed |
| 1125 | } else { |
| 1126 | format!("bl _{}", name) // add Mach-O prefix |
| 1127 | } |
| 1128 | } else { |
| 1129 | "bl ???".into() |
| 1130 | } |
| 1131 | } |
| 1132 | ArmOpcode::Blr => format!("blr {}", op_str(&inst.operands[0])), |
| 1133 | ArmOpcode::Sxtw => format!( |
| 1134 | "sxtw {}, {}", |
| 1135 | op_str(&inst.operands[0]), |
| 1136 | op_str(&inst.operands[1]) |
| 1137 | ), |
| 1138 | ArmOpcode::Sxth => format!( |
| 1139 | "sxth {}, {}", |
| 1140 | op_str(&inst.operands[0]), |
| 1141 | op_str(&inst.operands[1]) |
| 1142 | ), |
| 1143 | ArmOpcode::Sxtb => format!( |
| 1144 | "sxtb {}, {}", |
| 1145 | op_str(&inst.operands[0]), |
| 1146 | op_str(&inst.operands[1]) |
| 1147 | ), |
| 1148 | ArmOpcode::Ret => "ret".into(), |
| 1149 | ArmOpcode::Nop => "nop".into(), |
| 1150 | ArmOpcode::Brk => { |
| 1151 | let imm = if let MachineOperand::Imm(v) = &inst.operands[0] { |
| 1152 | *v |
| 1153 | } else { |
| 1154 | 1 |
| 1155 | }; |
| 1156 | format!("brk #{}", imm) |
| 1157 | } |
| 1158 | |
| 1159 | // ---- NEON SIMD vector ops (Sprint 12 Stage 2) ---- |
| 1160 | // |
| 1161 | // Each op forwards to a small helper so the lane-shape suffix |
| 1162 | // (.4s / .2d / .s[n] / .d[n]) lives in one place. |
| 1163 | ArmOpcode::AddV4S => fmt_vbinop(inst, "add", "4s"), |
| 1164 | ArmOpcode::AddV2D => fmt_vbinop(inst, "add", "2d"), |
| 1165 | ArmOpcode::SubV4S => fmt_vbinop(inst, "sub", "4s"), |
| 1166 | ArmOpcode::SubV2D => fmt_vbinop(inst, "sub", "2d"), |
| 1167 | ArmOpcode::MulV4S => fmt_vbinop(inst, "mul", "4s"), |
| 1168 | ArmOpcode::NegV4S => fmt_vunop(inst, "neg", "4s"), |
| 1169 | ArmOpcode::NegV2D => fmt_vunop(inst, "neg", "2d"), |
| 1170 | ArmOpcode::FaddV4S => fmt_vbinop(inst, "fadd", "4s"), |
| 1171 | ArmOpcode::FaddV2D => fmt_vbinop(inst, "fadd", "2d"), |
| 1172 | ArmOpcode::FsubV4S => fmt_vbinop(inst, "fsub", "4s"), |
| 1173 | ArmOpcode::FsubV2D => fmt_vbinop(inst, "fsub", "2d"), |
| 1174 | ArmOpcode::FmulV4S => fmt_vbinop(inst, "fmul", "4s"), |
| 1175 | ArmOpcode::FmulV2D => fmt_vbinop(inst, "fmul", "2d"), |
| 1176 | ArmOpcode::FdivV4S => fmt_vbinop(inst, "fdiv", "4s"), |
| 1177 | ArmOpcode::FdivV2D => fmt_vbinop(inst, "fdiv", "2d"), |
| 1178 | ArmOpcode::FnegV4S => fmt_vunop(inst, "fneg", "4s"), |
| 1179 | ArmOpcode::FnegV2D => fmt_vunop(inst, "fneg", "2d"), |
| 1180 | ArmOpcode::FabsV4S => fmt_vunop(inst, "fabs", "4s"), |
| 1181 | ArmOpcode::FabsV2D => fmt_vunop(inst, "fabs", "2d"), |
| 1182 | ArmOpcode::FsqrtV4S => fmt_vunop(inst, "fsqrt", "4s"), |
| 1183 | ArmOpcode::FsqrtV2D => fmt_vunop(inst, "fsqrt", "2d"), |
| 1184 | ArmOpcode::BslV16B => fmt_vbinop(inst, "bsl", "16b"), |
| 1185 | ArmOpcode::FcmgtV4S => fmt_vbinop(inst, "fcmgt", "4s"), |
| 1186 | ArmOpcode::FcmgtV2D => fmt_vbinop(inst, "fcmgt", "2d"), |
| 1187 | ArmOpcode::FcmgeV4S => fmt_vbinop(inst, "fcmge", "4s"), |
| 1188 | ArmOpcode::FcmgeV2D => fmt_vbinop(inst, "fcmge", "2d"), |
| 1189 | ArmOpcode::FcmeqV4S => fmt_vbinop(inst, "fcmeq", "4s"), |
| 1190 | ArmOpcode::FcmeqV2D => fmt_vbinop(inst, "fcmeq", "2d"), |
| 1191 | ArmOpcode::CmgtV4S => fmt_vbinop(inst, "cmgt", "4s"), |
| 1192 | ArmOpcode::CmgeV4S => fmt_vbinop(inst, "cmge", "4s"), |
| 1193 | ArmOpcode::CmeqV4S => fmt_vbinop(inst, "cmeq", "4s"), |
| 1194 | ArmOpcode::FmlaV4S => fmt_vbinop(inst, "fmla", "4s"), |
| 1195 | ArmOpcode::FmlaV2D => fmt_vbinop(inst, "fmla", "2d"), |
| 1196 | ArmOpcode::FminV4S => fmt_vbinop(inst, "fmin", "4s"), |
| 1197 | ArmOpcode::FminV2D => fmt_vbinop(inst, "fmin", "2d"), |
| 1198 | ArmOpcode::FmaxV4S => fmt_vbinop(inst, "fmax", "4s"), |
| 1199 | ArmOpcode::FmaxV2D => fmt_vbinop(inst, "fmax", "2d"), |
| 1200 | ArmOpcode::SminV4S => fmt_vbinop(inst, "smin", "4s"), |
| 1201 | ArmOpcode::SmaxV4S => fmt_vbinop(inst, "smax", "4s"), |
| 1202 | ArmOpcode::UminV4S => fmt_vbinop(inst, "umin", "4s"), |
| 1203 | ArmOpcode::UmaxV4S => fmt_vbinop(inst, "umax", "4s"), |
| 1204 | |
| 1205 | // afs-as dialect: cross-lane reductions encode the shape in |
| 1206 | // the mnemonic suffix; the destination is a scalar `s/d` and |
| 1207 | // the source is the bare vector register. |
| 1208 | ArmOpcode::FaddpV2S => format!( |
| 1209 | "faddp.2s {}, {}", |
| 1210 | fp32_scalar(&inst.operands[0]), |
| 1211 | v_reg_bare(&inst.operands[1]), |
| 1212 | ), |
| 1213 | ArmOpcode::FaddpV2D => format!( |
| 1214 | "faddp.2d {}, {}", |
| 1215 | fp64_scalar(&inst.operands[0]), |
| 1216 | v_reg_bare(&inst.operands[1]), |
| 1217 | ), |
| 1218 | ArmOpcode::Faddv4S => format!( |
| 1219 | "faddv.4s {}, {}", |
| 1220 | fp32_scalar(&inst.operands[0]), |
| 1221 | v_reg_bare(&inst.operands[1]), |
| 1222 | ), |
| 1223 | ArmOpcode::Sminv4S => format!( |
| 1224 | "sminv.4s {}, {}", |
| 1225 | fp32_scalar(&inst.operands[0]), |
| 1226 | v_reg_bare(&inst.operands[1]), |
| 1227 | ), |
| 1228 | ArmOpcode::Smaxv4S => format!( |
| 1229 | "smaxv.4s {}, {}", |
| 1230 | fp32_scalar(&inst.operands[0]), |
| 1231 | v_reg_bare(&inst.operands[1]), |
| 1232 | ), |
| 1233 | ArmOpcode::FmaxvV4S => format!( |
| 1234 | "fmaxv.4s {}, {}", |
| 1235 | fp32_scalar(&inst.operands[0]), |
| 1236 | v_reg_bare(&inst.operands[1]), |
| 1237 | ), |
| 1238 | ArmOpcode::FminvV4S => format!( |
| 1239 | "fminv.4s {}, {}", |
| 1240 | fp32_scalar(&inst.operands[0]), |
| 1241 | v_reg_bare(&inst.operands[1]), |
| 1242 | ), |
| 1243 | ArmOpcode::FmaxpV2DScalar => format!( |
| 1244 | "fmaxp.2d {}, {}", |
| 1245 | fp64_scalar(&inst.operands[0]), |
| 1246 | v_reg_bare(&inst.operands[1]), |
| 1247 | ), |
| 1248 | ArmOpcode::FminpV2DScalar => format!( |
| 1249 | "fminp.2d {}, {}", |
| 1250 | fp64_scalar(&inst.operands[0]), |
| 1251 | v_reg_bare(&inst.operands[1]), |
| 1252 | ), |
| 1253 | ArmOpcode::Uminv4S => format!( |
| 1254 | "uminv.4s {}, {}", |
| 1255 | fp32_scalar(&inst.operands[0]), |
| 1256 | v_reg_bare(&inst.operands[1]), |
| 1257 | ), |
| 1258 | ArmOpcode::Umaxv4S => format!( |
| 1259 | "umaxv.4s {}, {}", |
| 1260 | fp32_scalar(&inst.operands[0]), |
| 1261 | v_reg_bare(&inst.operands[1]), |
| 1262 | ), |
| 1263 | ArmOpcode::Addv4S => format!( |
| 1264 | "addv.4s {}, {}", |
| 1265 | fp32_scalar(&inst.operands[0]), |
| 1266 | v_reg_bare(&inst.operands[1]), |
| 1267 | ), |
| 1268 | |
| 1269 | ArmOpcode::DupGen4S => format!( |
| 1270 | "dup.4s {}, {}", |
| 1271 | v_reg_bare(&inst.operands[0]), |
| 1272 | op_str(&inst.operands[1]), |
| 1273 | ), |
| 1274 | ArmOpcode::DupGen2D => format!( |
| 1275 | "dup.2d {}, {}", |
| 1276 | v_reg_bare(&inst.operands[0]), |
| 1277 | op_str(&inst.operands[1]), |
| 1278 | ), |
| 1279 | ArmOpcode::DupEl4S => format!( |
| 1280 | "dup.4s {}, {}", |
| 1281 | v_reg_bare(&inst.operands[0]), |
| 1282 | v_lane_bare(&inst.operands[1], "s", 0), |
| 1283 | ), |
| 1284 | ArmOpcode::DupEl2D => format!( |
| 1285 | "dup.2d {}, {}", |
| 1286 | v_reg_bare(&inst.operands[0]), |
| 1287 | v_lane_bare(&inst.operands[1], "d", 0), |
| 1288 | ), |
| 1289 | ArmOpcode::Ins4S => { |
| 1290 | let lane = imm_u8(&inst.operands[1]); |
| 1291 | format!( |
| 1292 | "ins.s {}, {}", |
| 1293 | v_lane_bare(&inst.operands[0], "s", lane), |
| 1294 | op_str(&inst.operands[2]), |
| 1295 | ) |
| 1296 | } |
| 1297 | ArmOpcode::Ins2D => { |
| 1298 | let lane = imm_u8(&inst.operands[1]); |
| 1299 | format!( |
| 1300 | "ins.d {}, {}", |
| 1301 | v_lane_bare(&inst.operands[0], "d", lane), |
| 1302 | op_str(&inst.operands[2]), |
| 1303 | ) |
| 1304 | } |
| 1305 | ArmOpcode::Umov4S => { |
| 1306 | let lane = imm_u8(&inst.operands[2]); |
| 1307 | format!( |
| 1308 | "umov.s {}, {}", |
| 1309 | op_str(&inst.operands[0]), |
| 1310 | v_lane_bare(&inst.operands[1], "s", lane), |
| 1311 | ) |
| 1312 | } |
| 1313 | ArmOpcode::Umov2D => { |
| 1314 | let lane = imm_u8(&inst.operands[2]); |
| 1315 | format!( |
| 1316 | "umov.d {}, {}", |
| 1317 | op_str(&inst.operands[0]), |
| 1318 | v_lane_bare(&inst.operands[1], "d", lane), |
| 1319 | ) |
| 1320 | } |
| 1321 | ArmOpcode::FmovEl4S => { |
| 1322 | let lane = imm_u8(&inst.operands[2]); |
| 1323 | format!( |
| 1324 | "mov.s {}, {}", |
| 1325 | fp32_scalar(&inst.operands[0]), |
| 1326 | v_lane_bare(&inst.operands[1], "s", lane), |
| 1327 | ) |
| 1328 | } |
| 1329 | ArmOpcode::FmovEl2D => { |
| 1330 | let lane = imm_u8(&inst.operands[2]); |
| 1331 | format!( |
| 1332 | "mov.d {}, {}", |
| 1333 | fp64_scalar(&inst.operands[0]), |
| 1334 | v_lane_bare(&inst.operands[1], "d", lane), |
| 1335 | ) |
| 1336 | } |
| 1337 | |
| 1338 | ArmOpcode::LdrQ => format!( |
| 1339 | "ldr {}, [{}, {}]", |
| 1340 | q_reg(&inst.operands[0]), |
| 1341 | op_str(&inst.operands[1]), |
| 1342 | op_str(&inst.operands[2]), |
| 1343 | ), |
| 1344 | ArmOpcode::StrQ => format!( |
| 1345 | "str {}, [{}, {}]", |
| 1346 | q_reg(&inst.operands[0]), |
| 1347 | op_str(&inst.operands[1]), |
| 1348 | op_str(&inst.operands[2]), |
| 1349 | ), |
| 1350 | } |
| 1351 | } |
| 1352 | |
| 1353 | // ---- NEON formatting helpers ---- |
| 1354 | |
| 1355 | fn v_reg(op: &MachineOperand, shape: &str) -> String { |
| 1356 | match op { |
| 1357 | MachineOperand::VReg(id) => format!("v{}.{}", id.0, shape), |
| 1358 | MachineOperand::PhysReg(PhysReg::Fp(n)) | MachineOperand::PhysReg(PhysReg::Fp32(n)) => { |
| 1359 | format!("v{}.{}", n, shape) |
| 1360 | } |
| 1361 | _ => format!("{}.{}", op_str(op), shape), |
| 1362 | } |
| 1363 | } |
| 1364 | |
| 1365 | fn q_reg(op: &MachineOperand) -> String { |
| 1366 | match op { |
| 1367 | MachineOperand::VReg(id) => format!("q{}", id.0), |
| 1368 | MachineOperand::PhysReg(PhysReg::Fp(n)) | MachineOperand::PhysReg(PhysReg::Fp32(n)) => { |
| 1369 | format!("q{}", n) |
| 1370 | } |
| 1371 | _ => format!("q{}", op_str(op)), |
| 1372 | } |
| 1373 | } |
| 1374 | |
| 1375 | fn v_lane(op: &MachineOperand, lane_ty: &str, lane: u8) -> String { |
| 1376 | match op { |
| 1377 | MachineOperand::VReg(id) => format!("v{}.{}[{}]", id.0, lane_ty, lane), |
| 1378 | MachineOperand::PhysReg(PhysReg::Fp(n)) | MachineOperand::PhysReg(PhysReg::Fp32(n)) => { |
| 1379 | format!("v{}.{}[{}]", n, lane_ty, lane) |
| 1380 | } |
| 1381 | _ => format!("v{}.{}[{}]", op_str(op), lane_ty, lane), |
| 1382 | } |
| 1383 | } |
| 1384 | |
| 1385 | fn fp32_scalar(op: &MachineOperand) -> String { |
| 1386 | match op { |
| 1387 | MachineOperand::VReg(id) => format!("s{}", id.0), |
| 1388 | MachineOperand::PhysReg(PhysReg::Fp(n)) | MachineOperand::PhysReg(PhysReg::Fp32(n)) => { |
| 1389 | format!("s{}", n) |
| 1390 | } |
| 1391 | _ => op_str(op), |
| 1392 | } |
| 1393 | } |
| 1394 | |
| 1395 | fn fp64_scalar(op: &MachineOperand) -> String { |
| 1396 | match op { |
| 1397 | MachineOperand::VReg(id) => format!("d{}", id.0), |
| 1398 | MachineOperand::PhysReg(PhysReg::Fp(n)) | MachineOperand::PhysReg(PhysReg::Fp32(n)) => { |
| 1399 | format!("d{}", n) |
| 1400 | } |
| 1401 | _ => op_str(op), |
| 1402 | } |
| 1403 | } |
| 1404 | |
| 1405 | fn imm_u8(op: &MachineOperand) -> u8 { |
| 1406 | if let MachineOperand::Imm(v) = op { |
| 1407 | *v as u8 |
| 1408 | } else { |
| 1409 | 0 |
| 1410 | } |
| 1411 | } |
| 1412 | |
| 1413 | fn fmt_vbinop(inst: &MachineInst, mnemonic: &str, shape: &str) -> String { |
| 1414 | // afs-as dialect: shape suffix is part of the mnemonic, operand |
| 1415 | // registers are bare (`fadd.4s v0, v1, v2`). Encodes to the same |
| 1416 | // bytes as the Apple/GNU `fadd v0.4s, v1.4s, v2.4s` form. |
| 1417 | format!( |
| 1418 | "{}.{} {}, {}, {}", |
| 1419 | mnemonic, |
| 1420 | shape, |
| 1421 | v_reg_bare(&inst.operands[0]), |
| 1422 | v_reg_bare(&inst.operands[1]), |
| 1423 | v_reg_bare(&inst.operands[2]), |
| 1424 | ) |
| 1425 | } |
| 1426 | |
| 1427 | fn fmt_vunop(inst: &MachineInst, mnemonic: &str, shape: &str) -> String { |
| 1428 | format!( |
| 1429 | "{}.{} {}, {}", |
| 1430 | mnemonic, |
| 1431 | shape, |
| 1432 | v_reg_bare(&inst.operands[0]), |
| 1433 | v_reg_bare(&inst.operands[1]), |
| 1434 | ) |
| 1435 | } |
| 1436 | |
| 1437 | fn v_reg_bare(op: &MachineOperand) -> String { |
| 1438 | match op { |
| 1439 | MachineOperand::VReg(id) => format!("v{}", id.0), |
| 1440 | MachineOperand::PhysReg(PhysReg::Fp(n)) | MachineOperand::PhysReg(PhysReg::Fp32(n)) => { |
| 1441 | format!("v{}", n) |
| 1442 | } |
| 1443 | _ => op_str(op), |
| 1444 | } |
| 1445 | } |
| 1446 | |
| 1447 | fn v_lane_bare(op: &MachineOperand, _lane_ty: &str, lane: u8) -> String { |
| 1448 | // afs-as dialect for `umov.s w3, v0[2]` — bare reg with `[lane]` |
| 1449 | // suffix; the element-size width is encoded into the mnemonic |
| 1450 | // (`umov.s` / `umov.d`). |
| 1451 | match op { |
| 1452 | MachineOperand::VReg(id) => format!("v{}[{}]", id.0, lane), |
| 1453 | MachineOperand::PhysReg(PhysReg::Fp(n)) | MachineOperand::PhysReg(PhysReg::Fp32(n)) => { |
| 1454 | format!("v{}[{}]", n, lane) |
| 1455 | } |
| 1456 | _ => format!("{}[{}]", op_str(op), lane), |
| 1457 | } |
| 1458 | } |
| 1459 | |
| 1460 | /// Format a machine operand as assembly text. |
| 1461 | fn op_str(op: &MachineOperand) -> String { |
| 1462 | match op { |
| 1463 | MachineOperand::VReg(id) => format!("v{}", id.0), // placeholder until regalloc |
| 1464 | MachineOperand::PhysReg(PhysReg::Sp) => "sp".into(), |
| 1465 | MachineOperand::PhysReg(PhysReg::Xzr) => "xzr".into(), |
| 1466 | MachineOperand::PhysReg(PhysReg::Wzr) => "wzr".into(), |
| 1467 | MachineOperand::PhysReg(PhysReg::Gp(n)) => format!("x{}", n), |
| 1468 | MachineOperand::PhysReg(PhysReg::Gp32(n)) => format!("w{}", n), |
| 1469 | MachineOperand::PhysReg(PhysReg::Fp(n)) => format!("d{}", n), |
| 1470 | MachineOperand::PhysReg(PhysReg::Fp32(n)) => format!("s{}", n), |
| 1471 | MachineOperand::Imm(v) => format!("#{}", v), |
| 1472 | MachineOperand::FrameSlot(off) => format!("[fp, #{}]", off), |
| 1473 | MachineOperand::Cond(c) => cond_str(*c).into(), |
| 1474 | MachineOperand::BlockRef(id) => format!("bb{}", id.0), |
| 1475 | MachineOperand::Extern(name) => name.clone(), |
| 1476 | MachineOperand::GlobalLabel(name) => { |
| 1477 | if name.starts_with('_') { |
| 1478 | name.clone() |
| 1479 | } else { |
| 1480 | format!("_{}", name) |
| 1481 | } |
| 1482 | } |
| 1483 | MachineOperand::ConstPool(idx) => format!("cp{}", idx), |
| 1484 | MachineOperand::Shift(s) => format!("lsl #{}", s), |
| 1485 | } |
| 1486 | } |
| 1487 | |
| 1488 | fn fp_reg_str(op: &MachineOperand, is_f64: bool) -> String { |
| 1489 | match op { |
| 1490 | MachineOperand::PhysReg(PhysReg::Fp(n)) | MachineOperand::PhysReg(PhysReg::Fp32(n)) => { |
| 1491 | if is_f64 { |
| 1492 | format!("d{}", n) |
| 1493 | } else { |
| 1494 | format!("s{}", n) |
| 1495 | } |
| 1496 | } |
| 1497 | _ => op_str(op), |
| 1498 | } |
| 1499 | } |
| 1500 | |
| 1501 | fn cond_str(c: ArmCond) -> &'static str { |
| 1502 | match c { |
| 1503 | ArmCond::Eq => "eq", |
| 1504 | ArmCond::Ne => "ne", |
| 1505 | ArmCond::Hs => "hs", |
| 1506 | ArmCond::Lo => "lo", |
| 1507 | ArmCond::Mi => "mi", |
| 1508 | ArmCond::Pl => "pl", |
| 1509 | ArmCond::Hi => "hi", |
| 1510 | ArmCond::Ls => "ls", |
| 1511 | ArmCond::Ge => "ge", |
| 1512 | ArmCond::Lt => "lt", |
| 1513 | ArmCond::Gt => "gt", |
| 1514 | ArmCond::Le => "le", |
| 1515 | } |
| 1516 | } |
| 1517 | |
| 1518 | /// Generate a constant pool label. |
| 1519 | fn const_pool_label(func: &str, idx: u32) -> String { |
| 1520 | format!("__{}_cp{}", func, idx) |
| 1521 | } |
| 1522 | |
| 1523 | #[cfg(test)] |
| 1524 | mod tests { |
| 1525 | use super::*; |
| 1526 | use crate::codegen::isel::select_function; |
| 1527 | use crate::ir::builder::FuncBuilder; |
| 1528 | use crate::ir::inst::*; |
| 1529 | use crate::ir::types::*; |
| 1530 | |
| 1531 | fn emit_simple(build: impl FnOnce(&mut FuncBuilder)) -> String { |
| 1532 | let mut func = Function::new("test".into(), vec![], IrType::Void); |
| 1533 | { |
| 1534 | let mut b = FuncBuilder::new(&mut func); |
| 1535 | build(&mut b); |
| 1536 | } |
| 1537 | let mf = select_function(&func); |
| 1538 | emit_function(&mf) |
| 1539 | } |
| 1540 | |
| 1541 | #[test] |
| 1542 | fn emit_prologue_epilogue() { |
| 1543 | let asm = emit_simple(|b| b.ret_void()); |
| 1544 | assert!( |
| 1545 | asm.contains("sub sp, sp,"), |
| 1546 | "missing frame allocation: {}", |
| 1547 | asm |
| 1548 | ); |
| 1549 | assert!( |
| 1550 | asm.contains("stp x29, x30, [sp,"), |
| 1551 | "missing prologue save: {}", |
| 1552 | asm |
| 1553 | ); |
| 1554 | assert!( |
| 1555 | asm.contains("ldp x29, x30, [sp,"), |
| 1556 | "missing epilogue restore: {}", |
| 1557 | asm |
| 1558 | ); |
| 1559 | assert!( |
| 1560 | asm.contains("add sp, sp,"), |
| 1561 | "missing frame deallocation: {}", |
| 1562 | asm |
| 1563 | ); |
| 1564 | assert!(asm.contains("ret"), "missing ret: {}", asm); |
| 1565 | } |
| 1566 | |
| 1567 | #[test] |
| 1568 | fn emit_integer_add() { |
| 1569 | let asm = emit_simple(|b| { |
| 1570 | let x = b.const_i32(10); |
| 1571 | let y = b.const_i32(20); |
| 1572 | let _z = b.iadd(x, y); |
| 1573 | b.ret_void(); |
| 1574 | }); |
| 1575 | assert!(asm.contains("add "), "missing add: {}", asm); |
| 1576 | } |
| 1577 | |
| 1578 | #[test] |
| 1579 | fn emit_function_label() { |
| 1580 | let asm = emit_simple(|b| b.ret_void()); |
| 1581 | assert!(asm.contains(".globl _test"), "missing .globl: {}", asm); |
| 1582 | assert!(asm.contains("_test:"), "missing function label: {}", asm); |
| 1583 | } |
| 1584 | |
| 1585 | /// Verify that functions with frame sizes > 4095 use x16 scratch |
| 1586 | /// synthesis for the `sub sp, sp, #N` prologue and `add sp, sp, #N` |
| 1587 | /// epilogue rather than an out-of-range immediate. |
| 1588 | #[test] |
| 1589 | fn emit_large_frame_prologue() { |
| 1590 | // 700 allocas of i64 = 700 * 8 = 5600 bytes, well over 4095. |
| 1591 | let asm = emit_simple(|b| { |
| 1592 | for _ in 0..700 { |
| 1593 | let _ = b.alloca(IrType::Int(IntWidth::I64)); |
| 1594 | } |
| 1595 | b.ret_void(); |
| 1596 | }); |
| 1597 | // The 12-bit immediate max is 4095, so the emitter must |
| 1598 | // synthesize the frame size via x16. |
| 1599 | assert!( |
| 1600 | asm.contains("movz x16,"), |
| 1601 | "large frame should use x16 synthesis: {}", |
| 1602 | asm |
| 1603 | ); |
| 1604 | assert!( |
| 1605 | asm.contains("sub sp, sp, x16"), |
| 1606 | "large frame sub should use register form: {}", |
| 1607 | asm |
| 1608 | ); |
| 1609 | assert!( |
| 1610 | asm.contains("add sp, sp, x16"), |
| 1611 | "large frame add should use register form: {}", |
| 1612 | asm |
| 1613 | ); |
| 1614 | // Must NOT contain a raw "sub sp, sp, #5" that exceeds 4095. |
| 1615 | assert!( |
| 1616 | !asm.contains("sub sp, sp, #5"), |
| 1617 | "should not emit out-of-range immediate: {}", |
| 1618 | asm |
| 1619 | ); |
| 1620 | } |
| 1621 | |
| 1622 | #[test] |
| 1623 | fn emit_huge_frame_with_stack_probes() { |
| 1624 | let asm = emit_simple(|b| { |
| 1625 | for _ in 0..3000 { |
| 1626 | let _ = b.alloca(IrType::Int(IntWidth::I64)); |
| 1627 | } |
| 1628 | b.ret_void(); |
| 1629 | }); |
| 1630 | assert!( |
| 1631 | asm.contains("str xzr, [sp]"), |
| 1632 | "huge frame should probe each chunk: {}", |
| 1633 | asm |
| 1634 | ); |
| 1635 | } |
| 1636 | |
| 1637 | #[test] |
| 1638 | fn emit_branch() { |
| 1639 | let asm = emit_simple(|b| { |
| 1640 | let cond = b.const_bool(true); |
| 1641 | let bb_t = b.create_block("then"); |
| 1642 | let bb_f = b.create_block("else"); |
| 1643 | b.cond_branch(cond, bb_t, vec![], bb_f, vec![]); |
| 1644 | b.set_block(bb_t); |
| 1645 | b.ret_void(); |
| 1646 | b.set_block(bb_f); |
| 1647 | b.ret_void(); |
| 1648 | }); |
| 1649 | assert!(asm.contains("b.ne"), "missing conditional branch: {}", asm); |
| 1650 | assert!(asm.contains("then_"), "missing then label: {}", asm); |
| 1651 | assert!(asm.contains("else_"), "missing else label: {}", asm); |
| 1652 | } |
| 1653 | |
| 1654 | #[test] |
| 1655 | fn emit_i128_scalar_global_as_two_quads() { |
| 1656 | let asm = emit_globals(&[Global { |
| 1657 | name: "big".into(), |
| 1658 | ty: IrType::Int(IntWidth::I128), |
| 1659 | initializer: Some(GlobalInit::Int(18_446_744_073_709_551_616i128)), |
| 1660 | }]); |
| 1661 | |
| 1662 | assert!( |
| 1663 | asm.contains(".section __DATA,__data"), |
| 1664 | "missing data section:\n{}", |
| 1665 | asm |
| 1666 | ); |
| 1667 | assert!( |
| 1668 | asm.contains(".private_extern _big"), |
| 1669 | "missing global symbol:\n{}", |
| 1670 | asm |
| 1671 | ); |
| 1672 | assert!( |
| 1673 | asm.contains(".p2align 4"), |
| 1674 | "i128 globals need 16-byte alignment:\n{}", |
| 1675 | asm |
| 1676 | ); |
| 1677 | assert_eq!( |
| 1678 | asm.matches(".quad").count(), |
| 1679 | 2, |
| 1680 | "scalar i128 should emit two quads:\n{}", |
| 1681 | asm |
| 1682 | ); |
| 1683 | assert!( |
| 1684 | asm.contains(".quad 0x0000000000000000\n .quad 0x0000000000000001"), |
| 1685 | "scalar i128 should emit low/high 64-bit words in memory order:\n{}", |
| 1686 | asm |
| 1687 | ); |
| 1688 | } |
| 1689 | |
| 1690 | #[test] |
| 1691 | fn emit_i128_array_global_as_word_pairs() { |
| 1692 | let asm = emit_globals(&[Global { |
| 1693 | name: "arr".into(), |
| 1694 | ty: IrType::Array(Box::new(IrType::Int(IntWidth::I128)), 2), |
| 1695 | initializer: Some(GlobalInit::IntArray(vec![1, -1])), |
| 1696 | }]); |
| 1697 | |
| 1698 | assert_eq!( |
| 1699 | asm.matches(".quad").count(), |
| 1700 | 4, |
| 1701 | "two i128 elements should emit four quads:\n{}", |
| 1702 | asm |
| 1703 | ); |
| 1704 | assert!( |
| 1705 | asm.contains(".quad 0x0000000000000001\n .quad 0x0000000000000000"), |
| 1706 | "positive i128 array element should preserve low/high word order:\n{}", |
| 1707 | asm |
| 1708 | ); |
| 1709 | assert!( |
| 1710 | asm.contains(".quad 0xffffffffffffffff\n .quad 0xffffffffffffffff"), |
| 1711 | "negative i128 array element should preserve two's-complement words:\n{}", |
| 1712 | asm |
| 1713 | ); |
| 1714 | } |
| 1715 | |
| 1716 | #[test] |
| 1717 | fn emit_byte_array_global_uses_natural_alignment() { |
| 1718 | let asm = emit_globals(&[Global { |
| 1719 | name: "history".into(), |
| 1720 | ty: IrType::Array(Box::new(IrType::Int(IntWidth::I8)), 400), |
| 1721 | initializer: Some(GlobalInit::Zero), |
| 1722 | }]); |
| 1723 | |
| 1724 | assert!( |
| 1725 | asm.contains(".p2align 3\n_history:"), |
| 1726 | "byte-array globals that model descriptors/derived storage need 8-byte alignment:\n{}", |
| 1727 | asm |
| 1728 | ); |
| 1729 | } |
| 1730 | |
| 1731 | #[test] |
| 1732 | fn emit_nested_byte_array_global_uses_full_storage_size() { |
| 1733 | let asm = emit_globals(&[Global { |
| 1734 | name: "command_cache".into(), |
| 1735 | ty: IrType::Array( |
| 1736 | Box::new(IrType::Array(Box::new(IrType::Int(IntWidth::I8)), 264)), |
| 1737 | 4, |
| 1738 | ), |
| 1739 | initializer: Some(GlobalInit::Zero), |
| 1740 | }]); |
| 1741 | |
| 1742 | assert!( |
| 1743 | asm.contains("_command_cache:\n .space 1056"), |
| 1744 | "nested byte-array globals should reserve their full storage size:\n{}", |
| 1745 | asm |
| 1746 | ); |
| 1747 | } |
| 1748 | |
| 1749 | #[test] |
| 1750 | fn emit_mov_reg_truncates_x_source_through_w_view() { |
| 1751 | let mf = MachineFunction::new("test".into()); |
| 1752 | let inst = MachineInst { |
| 1753 | opcode: ArmOpcode::MovReg, |
| 1754 | operands: vec![ |
| 1755 | MachineOperand::PhysReg(PhysReg::Gp32(21)), |
| 1756 | MachineOperand::PhysReg(PhysReg::Gp(20)), |
| 1757 | ], |
| 1758 | def: None, |
| 1759 | }; |
| 1760 | |
| 1761 | assert_eq!(emit_inst(&inst, &mf), "mov w21, w20"); |
| 1762 | } |
| 1763 | |
| 1764 | #[test] |
| 1765 | fn emit_fcvt_uses_fp_register_widths() { |
| 1766 | let mf = MachineFunction::new("test".into()); |
| 1767 | let to_single = MachineInst { |
| 1768 | opcode: ArmOpcode::FcvtSD, |
| 1769 | operands: vec![ |
| 1770 | MachineOperand::PhysReg(PhysReg::Fp(0)), |
| 1771 | MachineOperand::PhysReg(PhysReg::Fp(1)), |
| 1772 | ], |
| 1773 | def: None, |
| 1774 | }; |
| 1775 | let to_double = MachineInst { |
| 1776 | opcode: ArmOpcode::FcvtDS, |
| 1777 | operands: vec![ |
| 1778 | MachineOperand::PhysReg(PhysReg::Fp32(2)), |
| 1779 | MachineOperand::PhysReg(PhysReg::Fp32(3)), |
| 1780 | ], |
| 1781 | def: None, |
| 1782 | }; |
| 1783 | |
| 1784 | assert_eq!(emit_inst(&to_single, &mf), "fcvt s0, d1"); |
| 1785 | assert_eq!(emit_inst(&to_double, &mf), "fcvt d2, s3"); |
| 1786 | } |
| 1787 | |
| 1788 | #[test] |
| 1789 | fn emit_large_negative_pair_offsets_use_scratch_addressing() { |
| 1790 | let mf = MachineFunction::new("test".into()); |
| 1791 | let stp = MachineInst { |
| 1792 | opcode: ArmOpcode::StpOffset, |
| 1793 | operands: vec![ |
| 1794 | MachineOperand::PhysReg(PhysReg::Gp(0)), |
| 1795 | MachineOperand::PhysReg(PhysReg::Gp(1)), |
| 1796 | MachineOperand::PhysReg(PhysReg::FP), |
| 1797 | MachineOperand::Imm(-544), |
| 1798 | ], |
| 1799 | def: None, |
| 1800 | }; |
| 1801 | let ldp = MachineInst { |
| 1802 | opcode: ArmOpcode::LdpOffset, |
| 1803 | operands: vec![ |
| 1804 | MachineOperand::PhysReg(PhysReg::Gp(2)), |
| 1805 | MachineOperand::PhysReg(PhysReg::Gp(3)), |
| 1806 | MachineOperand::PhysReg(PhysReg::FP), |
| 1807 | MachineOperand::Imm(-544), |
| 1808 | ], |
| 1809 | def: None, |
| 1810 | }; |
| 1811 | |
| 1812 | let stp_asm = emit_inst(&stp, &mf); |
| 1813 | let ldp_asm = emit_inst(&ldp, &mf); |
| 1814 | assert!( |
| 1815 | stp_asm.contains("sub x9, x29, #544"), |
| 1816 | "large negative stp offset should synthesize address: {}", |
| 1817 | stp_asm |
| 1818 | ); |
| 1819 | assert!( |
| 1820 | ldp_asm.contains("sub x9, x29, #544"), |
| 1821 | "large negative ldp offset should synthesize address: {}", |
| 1822 | ldp_asm |
| 1823 | ); |
| 1824 | assert!( |
| 1825 | !stp_asm.contains("[x29, #-544]"), |
| 1826 | "stp should not emit out-of-range raw offset: {}", |
| 1827 | stp_asm |
| 1828 | ); |
| 1829 | assert!( |
| 1830 | !ldp_asm.contains("[x29, #-544]"), |
| 1831 | "ldp should not emit out-of-range raw offset: {}", |
| 1832 | ldp_asm |
| 1833 | ); |
| 1834 | } |
| 1835 | |
| 1836 | #[test] |
| 1837 | fn emit_internal_only_function_as_private_extern() { |
| 1838 | let mut mf = MachineFunction::new("helper".into()); |
| 1839 | mf.internal_only = true; |
| 1840 | |
| 1841 | let asm = emit_function(&mf); |
| 1842 | |
| 1843 | assert!( |
| 1844 | asm.contains(".private_extern _helper"), |
| 1845 | "internal-only functions should not be emitted as globals:\n{}", |
| 1846 | asm |
| 1847 | ); |
| 1848 | assert!( |
| 1849 | !asm.contains(".globl _helper"), |
| 1850 | "internal-only functions should not keep external linkage:\n{}", |
| 1851 | asm |
| 1852 | ); |
| 1853 | } |
| 1854 | |
| 1855 | // ---- NEON SIMD emit smoke tests (Sprint 12 Stage 2) ---- |
| 1856 | // |
| 1857 | // The vectorizer doesn't generate any of these yet, but the emit |
| 1858 | // formatters can be exercised directly by hand-building a |
| 1859 | // MachineInst and feeding it through `emit_inst`. These tests |
| 1860 | // pin the assembly text form so future codegen wiring has a |
| 1861 | // golden reference. |
| 1862 | |
| 1863 | use crate::codegen::mir::{ArmOpcode, MachineFunction, MachineInst, MachineOperand, RegClass}; |
| 1864 | |
| 1865 | fn emit_one(opcode: ArmOpcode, operands: Vec<MachineOperand>) -> String { |
| 1866 | let mut mf = MachineFunction::new("t".into()); |
| 1867 | mf.new_block("entry"); |
| 1868 | let inst = MachineInst { |
| 1869 | opcode, |
| 1870 | operands, |
| 1871 | def: None, |
| 1872 | }; |
| 1873 | emit_inst(&inst, &mf) |
| 1874 | } |
| 1875 | |
| 1876 | #[test] |
| 1877 | fn emit_fadd_v_4s_form() { |
| 1878 | let mut mf = MachineFunction::new("t".into()); |
| 1879 | let v0 = mf.new_vreg(RegClass::V128); |
| 1880 | let v1 = mf.new_vreg(RegClass::V128); |
| 1881 | let v2 = mf.new_vreg(RegClass::V128); |
| 1882 | let asm = emit_one( |
| 1883 | ArmOpcode::FaddV4S, |
| 1884 | vec![ |
| 1885 | MachineOperand::VReg(v0), |
| 1886 | MachineOperand::VReg(v1), |
| 1887 | MachineOperand::VReg(v2), |
| 1888 | ], |
| 1889 | ); |
| 1890 | let _ = mf; |
| 1891 | // afs-as dialect: shape suffix on mnemonic, bare regs. |
| 1892 | assert_eq!(asm, "fadd.4s v0, v1, v2"); |
| 1893 | } |
| 1894 | |
| 1895 | #[test] |
| 1896 | fn emit_fadd_v_2d_form() { |
| 1897 | let asm = emit_one( |
| 1898 | ArmOpcode::FaddV2D, |
| 1899 | vec![ |
| 1900 | MachineOperand::VReg(crate::codegen::mir::VRegId(0)), |
| 1901 | MachineOperand::VReg(crate::codegen::mir::VRegId(1)), |
| 1902 | MachineOperand::VReg(crate::codegen::mir::VRegId(2)), |
| 1903 | ], |
| 1904 | ); |
| 1905 | assert_eq!(asm, "fadd.2d v0, v1, v2"); |
| 1906 | } |
| 1907 | |
| 1908 | #[test] |
| 1909 | fn emit_fmla_v_4s_form() { |
| 1910 | let asm = emit_one( |
| 1911 | ArmOpcode::FmlaV4S, |
| 1912 | vec![ |
| 1913 | MachineOperand::VReg(crate::codegen::mir::VRegId(0)), |
| 1914 | MachineOperand::VReg(crate::codegen::mir::VRegId(1)), |
| 1915 | MachineOperand::VReg(crate::codegen::mir::VRegId(2)), |
| 1916 | ], |
| 1917 | ); |
| 1918 | assert_eq!(asm, "fmla.4s v0, v1, v2"); |
| 1919 | } |
| 1920 | |
| 1921 | #[test] |
| 1922 | fn emit_addv_4s_reduction_form() { |
| 1923 | let asm = emit_one( |
| 1924 | ArmOpcode::Addv4S, |
| 1925 | vec![ |
| 1926 | MachineOperand::VReg(crate::codegen::mir::VRegId(0)), |
| 1927 | MachineOperand::VReg(crate::codegen::mir::VRegId(1)), |
| 1928 | ], |
| 1929 | ); |
| 1930 | assert_eq!(asm, "addv.4s s0, v1"); |
| 1931 | } |
| 1932 | |
| 1933 | #[test] |
| 1934 | fn emit_dup_gen_4s_broadcasts_w_register() { |
| 1935 | let asm = emit_one( |
| 1936 | ArmOpcode::DupGen4S, |
| 1937 | vec![ |
| 1938 | MachineOperand::VReg(crate::codegen::mir::VRegId(0)), |
| 1939 | MachineOperand::PhysReg(crate::codegen::mir::PhysReg::Gp32(2)), |
| 1940 | ], |
| 1941 | ); |
| 1942 | assert_eq!(asm, "dup.4s v0, w2"); |
| 1943 | } |
| 1944 | |
| 1945 | #[test] |
| 1946 | fn emit_dup_el_4s_broadcasts_fp_lane_zero() { |
| 1947 | // Splatting an Fp32 scalar (which lives in v2's lane 0) into |
| 1948 | // a 4×f32 vector uses the lane-dup form. The gp form |
| 1949 | // `dup.4s v0, s2` is rejected by the assembler. afs-as |
| 1950 | // dialect: bare `vN[L]` (no `.s` suffix), with the lane |
| 1951 | // element width encoded into the `dup.4s` mnemonic. |
| 1952 | let asm = emit_one( |
| 1953 | ArmOpcode::DupEl4S, |
| 1954 | vec![ |
| 1955 | MachineOperand::VReg(crate::codegen::mir::VRegId(0)), |
| 1956 | MachineOperand::VReg(crate::codegen::mir::VRegId(2)), |
| 1957 | ], |
| 1958 | ); |
| 1959 | assert_eq!(asm, "dup.4s v0, v2[0]"); |
| 1960 | } |
| 1961 | |
| 1962 | #[test] |
| 1963 | fn emit_dup_el_2d_broadcasts_fp_lane_zero() { |
| 1964 | let asm = emit_one( |
| 1965 | ArmOpcode::DupEl2D, |
| 1966 | vec![ |
| 1967 | MachineOperand::VReg(crate::codegen::mir::VRegId(0)), |
| 1968 | MachineOperand::VReg(crate::codegen::mir::VRegId(2)), |
| 1969 | ], |
| 1970 | ); |
| 1971 | assert_eq!(asm, "dup.2d v0, v2[0]"); |
| 1972 | } |
| 1973 | |
| 1974 | #[test] |
| 1975 | fn emit_ldr_q_form() { |
| 1976 | let asm = emit_one( |
| 1977 | ArmOpcode::LdrQ, |
| 1978 | vec![ |
| 1979 | MachineOperand::VReg(crate::codegen::mir::VRegId(0)), |
| 1980 | MachineOperand::PhysReg(crate::codegen::mir::PhysReg::Gp(1)), |
| 1981 | MachineOperand::Imm(16), |
| 1982 | ], |
| 1983 | ); |
| 1984 | assert_eq!(asm, "ldr q0, [x1, #16]"); |
| 1985 | } |
| 1986 | |
| 1987 | #[test] |
| 1988 | fn emit_str_q_form() { |
| 1989 | let asm = emit_one( |
| 1990 | ArmOpcode::StrQ, |
| 1991 | vec![ |
| 1992 | MachineOperand::VReg(crate::codegen::mir::VRegId(0)), |
| 1993 | MachineOperand::PhysReg(crate::codegen::mir::PhysReg::Gp(1)), |
| 1994 | MachineOperand::Imm(0), |
| 1995 | ], |
| 1996 | ); |
| 1997 | assert_eq!(asm, "str q0, [x1, #0]"); |
| 1998 | } |
| 1999 | |
| 2000 | #[test] |
| 2001 | fn emit_umov_extracts_lane() { |
| 2002 | let asm = emit_one( |
| 2003 | ArmOpcode::Umov4S, |
| 2004 | vec![ |
| 2005 | MachineOperand::PhysReg(crate::codegen::mir::PhysReg::Gp32(3)), |
| 2006 | MachineOperand::VReg(crate::codegen::mir::VRegId(0)), |
| 2007 | MachineOperand::Imm(2), |
| 2008 | ], |
| 2009 | ); |
| 2010 | assert_eq!(asm, "umov.s w3, v0[2]"); |
| 2011 | } |
| 2012 | } |
| 2013 |