armfortas Public

Watch 0 Fork 0 Star 0
Rust · 72410 bytes Raw Blame History
  
        1
        //! Assembly text emission — converts Machine IR to ARM64 assembly text.
      
        2
        //!
      
        3
        //! Produces output compatible with both afs-as and Apple's system assembler.
      
        4
        
        5
        use super::mir::*;
      
        6
        use std::fmt::Write;
      
        7
        
        8
        fn split_i128_words(value: i128) -> (u64, u64) {
      
        9
            let bits = value as u128;
      
        10
            (bits as u64, (bits >> 64) as u64)
      
        11
        }
      
        12
        
        13
        fn emit_i128_words(out: &mut String, value: i128) {
      
        14
            let (lo, hi) = split_i128_words(value);
      
        15
            writeln!(out, "    .quad 0x{:016x}", lo).unwrap();
      
        16
            writeln!(out, "    .quad 0x{:016x}", hi).unwrap();
      
        17
        }
      
        18
        
        19
        fn emit_byte_values(out: &mut String, bytes: &[u8]) {
      
        20
            if bytes.is_empty() {
      
        21
                return;
      
        22
            }
      
        23
            let joined = bytes
      
        24
                .iter()
      
        25
                .map(|b| b.to_string())
      
        26
                .collect::<Vec<_>>()
      
        27
                .join(", ");
      
        28
            writeln!(out, "    .byte {}", joined).unwrap();
      
        29
        }
      
        30
        
        31
        fn byte_array_align_log2(byte_count: u64) -> u8 {
      
        32
            if byte_count >= 8 {
      
        33
                3
      
        34
            } else if byte_count >= 4 {
      
        35
                2
      
        36
            } else if byte_count >= 2 {
      
        37
                1
      
        38
            } else {
      
        39
                0
      
        40
            }
      
        41
        }
      
        42
        
        43
        /// Emit module-level globals as a `.section __DATA,__data` block.
      
        44
        /// Each global gets a label and a directive matching its type
      
        45
        /// (`.long`, `.quad`, `.single`, `.double`, etc.) plus the
      
        46
        /// initializer value. Zero-initialized globals still emit an
      
        47
        /// explicit zero so the symbol resolves at link time.
      
        48
        ///
      
        49
        /// Array-typed globals: the IR type is `Array<i8, byte_size>` so
      
        50
        /// the element count isn't directly recoverable from the type.
      
        51
        /// The caller must use `IntArray`/`FloatArray` initializers that
      
        52
        /// carry the element count explicitly. Zero-initialized arrays
      
        53
        /// fall back to `.space byte_size`.
      
        54
        ///
      
        55
        /// Module globals (`afs_mod_*` and `afs_common_*`) are emitted as
      
        56
        /// `.globl` so other translation units can reference them via USE.
      
        57
        /// Non-module globals (SAVE-promoted locals) stay `.private_extern`
      
        58
        /// to prevent cross-TU collisions (audit Maj-1).
      
        59
        pub fn emit_globals(globals: &[crate::ir::inst::Global]) -> String {
      
        60
            use crate::ir::inst::GlobalInit;
      
        61
            use crate::ir::types::{FloatWidth, IntWidth, IrType};
      
        62
        
        63
            let mut out = String::new();
      
        64
            if globals.is_empty() {
      
        65
                return out;
      
        66
            }
      
        67
        
        68
            writeln!(out, ".section __DATA,__data").unwrap();
      
        69
            for g in globals {
      
        70
                let symbol = if g.name.starts_with('_') {
      
        71
                    g.name.clone()
      
        72
                } else {
      
        73
                    format!("_{}", g.name)
      
        74
                };
      
        75
                // Module globals need external linkage for multi-file.
      
        76
                let is_module_global = g.name.starts_with("afs_mod_") || g.name.starts_with("afs_common_");
      
        77
                if is_module_global {
      
        78
                    writeln!(out, ".globl {}", symbol).unwrap();
      
        79
                } else {
      
        80
                    writeln!(out, ".private_extern {}", symbol).unwrap();
      
        81
                }
      
        82
        
        83
                // Array globals carry `Array<elem_ty, count>`.  Pick the
      
        84
                // directive from the element type so `.long` / `.quad` /
      
        85
                // `.single` / `.double` all work correctly.
      
        86
                if let IrType::Array(elem_ty, count) = &g.ty {
      
        87
                    let (align, directive, _elem_bytes, is_float) = match elem_ty.as_ref() {
      
        88
                        IrType::Int(IntWidth::I8) | IrType::Bool => {
      
        89
                            (byte_array_align_log2(*count), ".byte", 1, false)
      
        90
                        }
      
        91
                        IrType::Int(IntWidth::I16) => (1, ".short", 2, false),
      
        92
                        IrType::Int(IntWidth::I32) => (2, ".long", 4, false),
      
        93
                        IrType::Int(IntWidth::I64) => (3, ".quad", 8, false),
      
        94
                        IrType::Int(IntWidth::I128) => (4, ".quad", 16, false),
      
        95
                        IrType::Float(FloatWidth::F32) => (2, ".single", 4, true),
      
        96
                        IrType::Float(FloatWidth::F64) => (3, ".double", 8, true),
      
        97
                        _ => (3, ".quad", 8, false),
      
        98
                    };
      
        99
                    if align > 0 {
      
        100
                        writeln!(out, ".p2align {}", align).unwrap();
      
        101
                    }
      
        102
                    writeln!(out, "{}:", symbol).unwrap();
      
        103
                    match &g.initializer {
      
        104
                        Some(GlobalInit::IntArray(vs))
      
        105
                            if matches!(elem_ty.as_ref(), IrType::Int(IntWidth::I128)) =>
      
        106
                        {
      
        107
                            for v in vs {
      
        108
                                emit_i128_words(&mut out, *v);
      
        109
                            }
      
        110
                        }
      
        111
                        Some(GlobalInit::IntArray(vs)) if !is_float => {
      
        112
                            for v in vs {
      
        113
                                writeln!(out, "    {} {}", directive, v).unwrap();
      
        114
                            }
      
        115
                        }
      
        116
                        Some(GlobalInit::FloatArray(vs)) if is_float => {
      
        117
                            for v in vs {
      
        118
                                writeln!(out, "    {} {}", directive, v).unwrap();
      
        119
                            }
      
        120
                        }
      
        121
                        Some(GlobalInit::String(bytes)) => {
      
        122
                            emit_byte_values(&mut out, bytes);
      
        123
                            let total_bytes = g.ty.size_bytes() as usize;
      
        124
                            if bytes.len() < total_bytes {
      
        125
                                writeln!(out, "    .space {}", total_bytes - bytes.len()).unwrap();
      
        126
                            }
      
        127
                        }
      
        128
                        _ => {
      
        129
                            // Nested arrays (for example arrays of byte-packed derived
      
        130
                            // values) don't have a scalar element directive. Emit their
      
        131
                            // zero-initialized storage using the full IR type size
      
        132
                            // instead of falling back to a bogus ".quad * count" size.
      
        133
                            let byte_size = g.ty.size_bytes();
      
        134
                            writeln!(out, "    .space {}", byte_size).unwrap();
      
        135
                        }
      
        136
                    }
      
        137
                    continue;
      
        138
                }
      
        139
        
        140
                if matches!(g.ty, IrType::Int(IntWidth::I128)) {
      
        141
                    writeln!(out, ".p2align 4").unwrap();
      
        142
                    writeln!(out, "{}:", symbol).unwrap();
      
        143
                    match &g.initializer {
      
        144
                        Some(GlobalInit::Int(v)) => emit_i128_words(&mut out, *v),
      
        145
                        Some(GlobalInit::Zero) | None => emit_i128_words(&mut out, 0),
      
        146
                        _ => writeln!(out, "    .space 16").unwrap(),
      
        147
                    }
      
        148
                    continue;
      
        149
                }
      
        150
        
        151
                // Scalar globals: pick alignment + storage directive.
      
        152
                // Audit Med-5: NaN/Inf must round-trip portably across
      
        153
                // assemblers. Apple's `as` accepts `.single NaN` but GNU
      
        154
                // binutils does not. Emit non-finite floats as their
      
        155
                // bit-pattern via `.long` / `.quad` so the same .s file
      
        156
                // assembles cleanly on both.
      
        157
                let is_nonfinite_float = matches!(
      
        158
                    (&g.ty, &g.initializer),
      
        159
                    (IrType::Float(_), Some(GlobalInit::Float(v))) if !v.is_finite()
      
        160
                );
      
        161
                let (align, directive, default_zero) = if is_nonfinite_float {
      
        162
                    match &g.ty {
      
        163
                        IrType::Float(FloatWidth::F32) => (2, ".long", "0"),
      
        164
                        _ => (3, ".quad", "0"),
      
        165
                    }
      
        166
                } else {
      
        167
                    match &g.ty {
      
        168
                        IrType::Int(IntWidth::I8) | IrType::Bool => (0, ".byte", "0"),
      
        169
                        IrType::Int(IntWidth::I16) => (1, ".short", "0"),
      
        170
                        IrType::Int(IntWidth::I32) => (2, ".long", "0"),
      
        171
                        IrType::Int(IntWidth::I64) => (3, ".quad", "0"),
      
        172
                        IrType::Float(FloatWidth::F32) => (2, ".single", "0.0"),
      
        173
                        IrType::Float(FloatWidth::F64) => (3, ".double", "0.0"),
      
        174
                        _ => (3, ".quad", "0"), // pointers and aggregates: 8-byte slot
      
        175
                    }
      
        176
                };
      
        177
                if align > 0 {
      
        178
                    writeln!(out, ".p2align {}", align).unwrap();
      
        179
                }
      
        180
                writeln!(out, "{}:", symbol).unwrap();
      
        181
                let value = match &g.initializer {
      
        182
                    Some(GlobalInit::Int(v)) => v.to_string(),
      
        183
                    Some(GlobalInit::Float(v)) => {
      
        184
                        if v.is_finite() {
      
        185
                            format!("{}", v)
      
        186
                        } else {
      
        187
                            // Bit-pattern emission for NaN / ±Inf.
      
        188
                            match &g.ty {
      
        189
                                IrType::Float(FloatWidth::F32) => {
      
        190
                                    format!("0x{:08x}", (*v as f32).to_bits())
      
        191
                                }
      
        192
                                _ => format!("0x{:016x}", v.to_bits()),
      
        193
                            }
      
        194
                        }
      
        195
                    }
      
        196
                    Some(GlobalInit::Zero) | None => default_zero.into(),
      
        197
                    Some(GlobalInit::String(bytes))
      
        198
                        if matches!(g.ty, IrType::Int(IntWidth::I8) | IrType::Bool) =>
      
        199
                    {
      
        200
                        bytes.first().copied().unwrap_or(0).to_string()
      
        201
                    }
      
        202
                    Some(GlobalInit::String(_)) => default_zero.into(),
      
        203
                    Some(GlobalInit::IntArray(_)) | Some(GlobalInit::FloatArray(_)) => {
      
        204
                        // Array initializer on a scalar-typed global —
      
        205
                        // shouldn't happen, but emit zero as a safe fallback.
      
        206
                        default_zero.into()
      
        207
                    }
      
        208
                };
      
        209
                writeln!(out, "    {} {}", directive, value).unwrap();
      
        210
            }
      
        211
            out
      
        212
        }
      
        213
        
        214
        /// Emit a machine function as ARM64 assembly text.
      
        215
        pub fn emit_function(mf: &MachineFunction) -> String {
      
        216
            let mut out = String::new();
      
        217
        
        218
            // Function directive.
      
        219
            if mf.internal_only {
      
        220
                writeln!(out, ".private_extern _{}", mf.name).unwrap();
      
        221
            } else {
      
        222
                writeln!(out, ".globl _{}", mf.name).unwrap();
      
        223
            }
      
        224
            writeln!(out, ".p2align 2").unwrap();
      
        225
            writeln!(out, "_{}:", mf.name).unwrap();
      
        226
        
        227
            for block in &mf.blocks {
      
        228
                // Don't re-emit entry label (it's the function label).
      
        229
                if block.id != MBlockId(0) {
      
        230
                    writeln!(out, "{}:", block.label).unwrap();
      
        231
                }
      
        232
        
        233
                for inst in &block.insts {
      
        234
                    writeln!(out, "    {}", emit_inst(inst, mf)).unwrap();
      
        235
                }
      
        236
            }
      
        237
        
        238
            // Constant pool.
      
        239
            if !mf.const_pool.is_empty() {
      
        240
                writeln!(out).unwrap();
      
        241
                writeln!(out, ".section __DATA,__const").unwrap();
      
        242
                for (i, entry) in mf.const_pool.iter().enumerate() {
      
        243
                    let label = const_pool_label(&mf.name, i as u32);
      
        244
                    match entry {
      
        245
                        ConstPoolEntry::F32(v) => {
      
        246
                            writeln!(out, ".p2align 2").unwrap();
      
        247
                            writeln!(out, "{}:", label).unwrap();
      
        248
                            // Emit as hex integer to avoid decimal expansion issues
      
        249
                            // with large/small floats that the assembler can't parse.
      
        250
                            writeln!(out, "    .long 0x{:08x}", v.to_bits()).unwrap();
      
        251
                        }
      
        252
                        ConstPoolEntry::F64(v) => {
      
        253
                            writeln!(out, ".p2align 3").unwrap();
      
        254
                            writeln!(out, "{}:", label).unwrap();
      
        255
                            writeln!(out, "    .quad 0x{:016x}", v.to_bits()).unwrap();
      
        256
                        }
      
        257
                        ConstPoolEntry::I64(v) => {
      
        258
                            writeln!(out, ".p2align 3").unwrap();
      
        259
                            writeln!(out, "{}:", label).unwrap();
      
        260
                            writeln!(out, "    .quad {}", v).unwrap();
      
        261
                        }
      
        262
                        ConstPoolEntry::Bytes(b) => {
      
        263
                            writeln!(out, ".p2align 3").unwrap();
      
        264
                            writeln!(out, "{}:", label).unwrap();
      
        265
                            write!(out, "    .ascii \"").unwrap();
      
        266
                            for &byte in b {
      
        267
                                match byte {
      
        268
                                    b'\\' => write!(out, "\\\\").unwrap(),
      
        269
                                    b'"' => write!(out, "\\\"").unwrap(),
      
        270
                                    b'\n' => write!(out, "\\n").unwrap(),
      
        271
                                    b'\t' => write!(out, "\\t").unwrap(),
      
        272
                                    b if b.is_ascii_graphic() || b == b' ' => {
      
        273
                                        write!(out, "{}", b as char).unwrap();
      
        274
                                    }
      
        275
                                    b => write!(out, "\\x{:02x}", b).unwrap(),
      
        276
                                }
      
        277
                            }
      
        278
                            writeln!(out, "\"").unwrap();
      
        279
                        }
      
        280
                    }
      
        281
                }
      
        282
            }
      
        283
        
        284
            out
      
        285
        }
      
        286
        
        287
        /// Format `OP sp, sp, #N` (or `add x29, sp, #N`), falling back
      
        288
        /// to a 2-3 instruction synthesized sequence via the AAPCS64
      
        289
        /// scratch register x16 (IP0) when N exceeds the 12-bit
      
        290
        /// immediate range. x16 is free in the prologue/epilogue per
      
        291
        /// AAPCS64 — it has no caller-saved value at function entry
      
        292
        /// and can be clobbered before/after the FP/LR save.
      
        293
        ///
      
        294
        /// Audit6 BLOCKING-5 (related to BLOCKING-4): functions whose
      
        295
        /// frame size exceeds 4095 bytes used to emit raw
      
        296
        /// `sub sp, sp, #4144` and the assembler rejected the
      
        297
        /// immediate. This came up after audit6 BLOCKING-4 added
      
        298
        /// per-allocate descriptor buffers, but it's a latent bug that
      
        299
        /// any large-frame function would hit.
      
        300
        fn fmt_sp_imm(op: &str, dest: &str, base: &str, n: i64) -> String {
      
        301
            if (0..=4095).contains(&n) {
      
        302
                return format!("{} {}, {}, #{}", op, dest, base, n);
      
        303
            }
      
        304
            // Synthesize the immediate in x16 then use the register form.
      
        305
            let lo = n & 0xFFFF;
      
        306
            let hi = (n >> 16) & 0xFFFF;
      
        307
            let mov = if hi == 0 {
      
        308
                format!("movz x16, #{}", lo)
      
        309
            } else {
      
        310
                format!("movz x16, #{}\n    movk x16, #{}, lsl #16", lo, hi)
      
        311
            };
      
        312
            format!("{}\n    {} {}, {}, x16", mov, op, dest, base)
      
        313
        }
      
        314
        
        315
        fn fmt_stack_alloc(frame_size: i64) -> String {
      
        316
            // Apple Silicon uses large guard pages, so jumping the stack pointer
      
        317
            // down by a huge frame in one shot can skip the guard and fault on the
      
        318
            // first real touch. Probe the stack one chunk at a time for large
      
        319
            // frames to keep growth fault-safe.
      
        320
            const STACK_PROBE_STRIDE: i64 = 16 * 1024;
      
        321
        
        322
            if frame_size <= STACK_PROBE_STRIDE {
      
        323
                return fmt_sp_imm("sub", "sp", "sp", frame_size);
      
        324
            }
      
        325
        
        326
            let mut lines = Vec::new();
      
        327
            let mut remaining = frame_size;
      
        328
            while remaining > 0 {
      
        329
                let step = remaining.min(STACK_PROBE_STRIDE);
      
        330
                lines.push(fmt_sp_imm("sub", "sp", "sp", step));
      
        331
                lines.push("str xzr, [sp]".to_string());
      
        332
                remaining -= step;
      
        333
            }
      
        334
            lines.join("\n    ")
      
        335
        }
      
        336
        
        337
        fn fmt_u64_imm(reg: &str, value: u64) -> String {
      
        338
            let mut parts = Vec::new();
      
        339
            for shift in [0u32, 16, 32, 48] {
      
        340
                let chunk = ((value >> shift) & 0xFFFF) as u16;
      
        341
                if chunk == 0 && !parts.is_empty() {
      
        342
                    continue;
      
        343
                }
      
        344
                if parts.is_empty() {
      
        345
                    parts.push(format!("movz {}, #{}", reg, chunk));
      
        346
                } else {
      
        347
                    parts.push(format!("movk {}, #{}, lsl #{}", reg, chunk, shift));
      
        348
                }
      
        349
            }
      
        350
            if parts.is_empty() {
      
        351
                format!("movz {}, #0", reg)
      
        352
            } else {
      
        353
                parts.join("\n    ")
      
        354
            }
      
        355
        }
      
        356
        
        357
        fn fmt_addr_with_offset(dest: &str, base: &str, offset: i64, scratch: &str) -> String {
      
        358
            if offset == 0 {
      
        359
                return format!("mov {}, {}", dest, base);
      
        360
            }
      
        361
        
        362
            if (0..=4095).contains(&offset) {
      
        363
                return format!("add {}, {}, #{}", dest, base, offset);
      
        364
            }
      
        365
            if (-4095..=-1).contains(&offset) {
      
        366
                return format!("sub {}, {}, #{}", dest, base, -offset);
      
        367
            }
      
        368
        
        369
            let imm = fmt_u64_imm(scratch, offset.unsigned_abs());
      
        370
            let op = if offset.is_negative() { "sub" } else { "add" };
      
        371
            format!("{}\n    {} {}, {}, {}", imm, op, dest, base, scratch)
      
        372
        }
      
        373
        
        374
        /// Emit a single machine instruction as assembly text. Public so the
      
        375
        /// branch-relaxation pass can count emit-time instruction bytes
      
        376
        /// directly rather than re-deriving each opcode's expansion rules.
      
        377
        pub fn emit_inst_text(inst: &MachineInst, mf: &MachineFunction) -> String {
      
        378
            emit_inst(inst, mf)
      
        379
        }
      
        380
        
        381
        /// Emit a single machine instruction as assembly text.
      
        382
        fn emit_inst(inst: &MachineInst, mf: &MachineFunction) -> String {
      
        383
            match inst.opcode {
      
        384
                ArmOpcode::AddReg => format!(
      
        385
                    "add {}, {}, {}",
      
        386
                    op_str(&inst.operands[0]),
      
        387
                    op_str(&inst.operands[1]),
      
        388
                    op_str(&inst.operands[2])
      
        389
                ),
      
        390
                ArmOpcode::AddsReg => format!(
      
        391
                    "adds {}, {}, {}",
      
        392
                    op_str(&inst.operands[0]),
      
        393
                    op_str(&inst.operands[1]),
      
        394
                    op_str(&inst.operands[2])
      
        395
                ),
      
        396
                ArmOpcode::AdcReg => format!(
      
        397
                    "adc {}, {}, {}",
      
        398
                    op_str(&inst.operands[0]),
      
        399
                    op_str(&inst.operands[1]),
      
        400
                    op_str(&inst.operands[2])
      
        401
                ),
      
        402
                ArmOpcode::AddImm => {
      
        403
                    let dest = op_str(&inst.operands[0]);
      
        404
                    let base = op_str(&inst.operands[1]);
      
        405
                    let imm: i64 = match &inst.operands[2] {
      
        406
                        MachineOperand::FrameSlot(off) => *off as i64,
      
        407
                        MachineOperand::Imm(-1) => {
      
        408
                            // Sentinel: prologue FP setup → frame_size - 16
      
        409
                            mf.frame.size.saturating_sub(16) as i64
      
        410
                        }
      
        411
                        MachineOperand::Imm(v) => *v,
      
        412
                        _ => return format!("add {}, {}, {}", dest, base, op_str(&inst.operands[2])),
      
        413
                    };
      
        414
                    // Both `add x29, sp, #N` (FP setup) and `add Xd, Xn, #N`
      
        415
                    // need the > 4095 fallback. Use the same scratch
      
        416
                    // synthesis since x16 is safe in the prologue.
      
        417
                    fmt_sp_imm("add", &dest, &base, imm)
      
        418
                }
      
        419
                ArmOpcode::SubReg => format!(
      
        420
                    "sub {}, {}, {}",
      
        421
                    op_str(&inst.operands[0]),
      
        422
                    op_str(&inst.operands[1]),
      
        423
                    op_str(&inst.operands[2])
      
        424
                ),
      
        425
                ArmOpcode::SubsReg => format!(
      
        426
                    "subs {}, {}, {}",
      
        427
                    op_str(&inst.operands[0]),
      
        428
                    op_str(&inst.operands[1]),
      
        429
                    op_str(&inst.operands[2])
      
        430
                ),
      
        431
                ArmOpcode::SbcReg => format!(
      
        432
                    "sbc {}, {}, {}",
      
        433
                    op_str(&inst.operands[0]),
      
        434
                    op_str(&inst.operands[1]),
      
        435
                    op_str(&inst.operands[2])
      
        436
                ),
      
        437
                ArmOpcode::SubImm => {
      
        438
                    let imm: i64 = match &inst.operands[2] {
      
        439
                        MachineOperand::Imm(-1) => {
      
        440
                            // Sentinel: epilogue SP restore → frame_size - 16
      
        441
                            mf.frame.size.saturating_sub(16) as i64
      
        442
                        }
      
        443
                        MachineOperand::Imm(v) => *v,
      
        444
                        _ => 0,
      
        445
                    };
      
        446
                    let dest = op_str(&inst.operands[0]);
      
        447
                    let base = op_str(&inst.operands[1]);
      
        448
                    fmt_sp_imm("sub", &dest, &base, imm)
      
        449
                }
      
        450
                ArmOpcode::Mul => format!(
      
        451
                    "mul {}, {}, {}",
      
        452
                    op_str(&inst.operands[0]),
      
        453
                    op_str(&inst.operands[1]),
      
        454
                    op_str(&inst.operands[2])
      
        455
                ),
      
        456
                ArmOpcode::Sdiv => format!(
      
        457
                    "sdiv {}, {}, {}",
      
        458
                    op_str(&inst.operands[0]),
      
        459
                    op_str(&inst.operands[1]),
      
        460
                    op_str(&inst.operands[2])
      
        461
                ),
      
        462
                ArmOpcode::Madd => format!(
      
        463
                    "madd {}, {}, {}, {}",
      
        464
                    op_str(&inst.operands[0]),
      
        465
                    op_str(&inst.operands[1]),
      
        466
                    op_str(&inst.operands[2]),
      
        467
                    op_str(&inst.operands[3])
      
        468
                ),
      
        469
                ArmOpcode::Msub => format!(
      
        470
                    "msub {}, {}, {}, {}",
      
        471
                    op_str(&inst.operands[0]),
      
        472
                    op_str(&inst.operands[1]),
      
        473
                    op_str(&inst.operands[2]),
      
        474
                    op_str(&inst.operands[3])
      
        475
                ),
      
        476
                ArmOpcode::Neg => format!(
      
        477
                    "neg {}, {}",
      
        478
                    op_str(&inst.operands[0]),
      
        479
                    op_str(&inst.operands[1])
      
        480
                ),
      
        481
        
        482
                ArmOpcode::AndReg => format!(
      
        483
                    "and {}, {}, {}",
      
        484
                    op_str(&inst.operands[0]),
      
        485
                    op_str(&inst.operands[1]),
      
        486
                    op_str(&inst.operands[2])
      
        487
                ),
      
        488
                ArmOpcode::OrrReg => format!(
      
        489
                    "orr {}, {}, {}",
      
        490
                    op_str(&inst.operands[0]),
      
        491
                    op_str(&inst.operands[1]),
      
        492
                    op_str(&inst.operands[2])
      
        493
                ),
      
        494
                ArmOpcode::EorReg => format!(
      
        495
                    "eor {}, {}, {}",
      
        496
                    op_str(&inst.operands[0]),
      
        497
                    op_str(&inst.operands[1]),
      
        498
                    op_str(&inst.operands[2])
      
        499
                ),
      
        500
                ArmOpcode::OrnReg => format!(
      
        501
                    "orn {}, {}, {}",
      
        502
                    op_str(&inst.operands[0]),
      
        503
                    op_str(&inst.operands[1]),
      
        504
                    op_str(&inst.operands[2])
      
        505
                ),
      
        506
                ArmOpcode::LslReg => format!(
      
        507
                    "lsl {}, {}, {}",
      
        508
                    op_str(&inst.operands[0]),
      
        509
                    op_str(&inst.operands[1]),
      
        510
                    op_str(&inst.operands[2])
      
        511
                ),
      
        512
                ArmOpcode::LsrReg => format!(
      
        513
                    "lsr {}, {}, {}",
      
        514
                    op_str(&inst.operands[0]),
      
        515
                    op_str(&inst.operands[1]),
      
        516
                    op_str(&inst.operands[2])
      
        517
                ),
      
        518
                ArmOpcode::AsrReg => format!(
      
        519
                    "asr {}, {}, {}",
      
        520
                    op_str(&inst.operands[0]),
      
        521
                    op_str(&inst.operands[1]),
      
        522
                    op_str(&inst.operands[2])
      
        523
                ),
      
        524
        
        525
                ArmOpcode::Mvn => format!(
      
        526
                    "mvn {}, {}",
      
        527
                    op_str(&inst.operands[0]),
      
        528
                    op_str(&inst.operands[1])
      
        529
                ),
      
        530
                ArmOpcode::Clz => format!(
      
        531
                    "clz {}, {}",
      
        532
                    op_str(&inst.operands[0]),
      
        533
                    op_str(&inst.operands[1])
      
        534
                ),
      
        535
                ArmOpcode::Rbit => format!(
      
        536
                    "rbit {}, {}",
      
        537
                    op_str(&inst.operands[0]),
      
        538
                    op_str(&inst.operands[1])
      
        539
                ),
      
        540
        
        541
                ArmOpcode::CmpReg => format!(
      
        542
                    "cmp {}, {}",
      
        543
                    op_str(&inst.operands[0]),
      
        544
                    op_str(&inst.operands[1])
      
        545
                ),
      
        546
                ArmOpcode::CmpImm => format!(
      
        547
                    "cmp {}, #{}",
      
        548
                    op_str(&inst.operands[0]),
      
        549
                    if let MachineOperand::Imm(v) = &inst.operands[1] {
      
        550
                        *v
      
        551
                    } else {
      
        552
                        0
      
        553
                    }
      
        554
                ),
      
        555
                ArmOpcode::Cset | ArmOpcode::FCset => {
      
        556
                    let cond = if let MachineOperand::Cond(c) = &inst.operands[1] {
      
        557
                        cond_str(*c)
      
        558
                    } else {
      
        559
                        "eq"
      
        560
                    };
      
        561
                    format!("cset {}, {}", op_str(&inst.operands[0]), cond)
      
        562
                }
      
        563
                ArmOpcode::CselReg => {
      
        564
                    let cond = if let MachineOperand::Cond(c) = &inst.operands[3] {
      
        565
                        cond_str(*c)
      
        566
                    } else {
      
        567
                        "eq"
      
        568
                    };
      
        569
                    format!(
      
        570
                        "csel {}, {}, {}, {}",
      
        571
                        op_str(&inst.operands[0]),
      
        572
                        op_str(&inst.operands[1]),
      
        573
                        op_str(&inst.operands[2]),
      
        574
                        cond
      
        575
                    )
      
        576
                }
      
        577
                ArmOpcode::FCmpReg => format!(
      
        578
                    "fcmp {}, {}",
      
        579
                    op_str(&inst.operands[0]),
      
        580
                    op_str(&inst.operands[1])
      
        581
                ),
      
        582
                ArmOpcode::FcselReg => {
      
        583
                    let cond = if let MachineOperand::Cond(c) = &inst.operands[3] {
      
        584
                        cond_str(*c)
      
        585
                    } else {
      
        586
                        "eq"
      
        587
                    };
      
        588
                    format!(
      
        589
                        "fcsel {}, {}, {}, {}",
      
        590
                        op_str(&inst.operands[0]),
      
        591
                        op_str(&inst.operands[1]),
      
        592
                        op_str(&inst.operands[2]),
      
        593
                        cond
      
        594
                    )
      
        595
                }
      
        596
        
        597
                ArmOpcode::FaddS | ArmOpcode::FaddD => format!(
      
        598
                    "fadd {}, {}, {}",
      
        599
                    op_str(&inst.operands[0]),
      
        600
                    op_str(&inst.operands[1]),
      
        601
                    op_str(&inst.operands[2])
      
        602
                ),
      
        603
                ArmOpcode::FsubS | ArmOpcode::FsubD => format!(
      
        604
                    "fsub {}, {}, {}",
      
        605
                    op_str(&inst.operands[0]),
      
        606
                    op_str(&inst.operands[1]),
      
        607
                    op_str(&inst.operands[2])
      
        608
                ),
      
        609
                ArmOpcode::FmulS | ArmOpcode::FmulD => format!(
      
        610
                    "fmul {}, {}, {}",
      
        611
                    op_str(&inst.operands[0]),
      
        612
                    op_str(&inst.operands[1]),
      
        613
                    op_str(&inst.operands[2])
      
        614
                ),
      
        615
                ArmOpcode::FdivS | ArmOpcode::FdivD => format!(
      
        616
                    "fdiv {}, {}, {}",
      
        617
                    op_str(&inst.operands[0]),
      
        618
                    op_str(&inst.operands[1]),
      
        619
                    op_str(&inst.operands[2])
      
        620
                ),
      
        621
                ArmOpcode::FnegS | ArmOpcode::FnegD => format!(
      
        622
                    "fneg {}, {}",
      
        623
                    op_str(&inst.operands[0]),
      
        624
                    op_str(&inst.operands[1])
      
        625
                ),
      
        626
                ArmOpcode::FabsS | ArmOpcode::FabsD => format!(
      
        627
                    "fabs {}, {}",
      
        628
                    op_str(&inst.operands[0]),
      
        629
                    op_str(&inst.operands[1])
      
        630
                ),
      
        631
                ArmOpcode::FsqrtS | ArmOpcode::FsqrtD => format!(
      
        632
                    "fsqrt {}, {}",
      
        633
                    op_str(&inst.operands[0]),
      
        634
                    op_str(&inst.operands[1])
      
        635
                ),
      
        636
                // Fused multiply-add/subtract: 4-operand (dest, Sn, Sm, Sa).
      
        637
                // FMADD  Sd, Sn, Sm, Sa → Sd = Sa + Sn*Sm
      
        638
                // FMSUB  Sd, Sn, Sm, Sa → Sd = Sa - Sn*Sm
      
        639
                // FNMSUB Sd, Sn, Sm, Sa → Sd = Sn*Sm - Sa
      
        640
                ArmOpcode::FmaddS | ArmOpcode::FmaddD => format!(
      
        641
                    "fmadd {}, {}, {}, {}",
      
        642
                    op_str(&inst.operands[0]),
      
        643
                    op_str(&inst.operands[1]),
      
        644
                    op_str(&inst.operands[2]),
      
        645
                    op_str(&inst.operands[3])
      
        646
                ),
      
        647
                ArmOpcode::FmsubS | ArmOpcode::FmsubD => format!(
      
        648
                    "fmsub {}, {}, {}, {}",
      
        649
                    op_str(&inst.operands[0]),
      
        650
                    op_str(&inst.operands[1]),
      
        651
                    op_str(&inst.operands[2]),
      
        652
                    op_str(&inst.operands[3])
      
        653
                ),
      
        654
                ArmOpcode::FnmsubS | ArmOpcode::FnmsubD => format!(
      
        655
                    "fnmsub {}, {}, {}, {}",
      
        656
                    op_str(&inst.operands[0]),
      
        657
                    op_str(&inst.operands[1]),
      
        658
                    op_str(&inst.operands[2]),
      
        659
                    op_str(&inst.operands[3])
      
        660
                ),
      
        661
        
        662
                ArmOpcode::ScvtfSW | ArmOpcode::ScvtfDW | ArmOpcode::ScvtfSX | ArmOpcode::ScvtfDX => {
      
        663
                    format!(
      
        664
                        "scvtf {}, {}",
      
        665
                        op_str(&inst.operands[0]),
      
        666
                        op_str(&inst.operands[1])
      
        667
                    )
      
        668
                }
      
        669
                ArmOpcode::FcvtzsWS | ArmOpcode::FcvtzsWD | ArmOpcode::FcvtzsXS | ArmOpcode::FcvtzsXD => {
      
        670
                    format!(
      
        671
                        "fcvtzs {}, {}",
      
        672
                        op_str(&inst.operands[0]),
      
        673
                        op_str(&inst.operands[1])
      
        674
                    )
      
        675
                }
      
        676
                ArmOpcode::FcvtSD => format!(
      
        677
                    "fcvt {}, {}",
      
        678
                    fp_reg_str(&inst.operands[0], false),
      
        679
                    fp_reg_str(&inst.operands[1], true)
      
        680
                ),
      
        681
                ArmOpcode::FcvtDS => format!(
      
        682
                    "fcvt {}, {}",
      
        683
                    fp_reg_str(&inst.operands[0], true),
      
        684
                    fp_reg_str(&inst.operands[1], false)
      
        685
                ),
      
        686
        
        687
                ArmOpcode::Movz => {
      
        688
                    let imm = if let MachineOperand::Imm(v) = &inst.operands[1] {
      
        689
                        *v
      
        690
                    } else {
      
        691
                        0
      
        692
                    };
      
        693
                    let shift = if let MachineOperand::Shift(s) = &inst.operands[2] {
      
        694
                        *s
      
        695
                    } else {
      
        696
                        0
      
        697
                    };
      
        698
                    if shift == 0 {
      
        699
                        format!("movz {}, #{}", op_str(&inst.operands[0]), imm)
      
        700
                    } else {
      
        701
                        format!(
      
        702
                            "movz {}, #{}, lsl #{}",
      
        703
                            op_str(&inst.operands[0]),
      
        704
                            imm,
      
        705
                            shift
      
        706
                        )
      
        707
                    }
      
        708
                }
      
        709
                ArmOpcode::Movk => {
      
        710
                    let imm = if let MachineOperand::Imm(v) = &inst.operands[1] {
      
        711
                        *v
      
        712
                    } else {
      
        713
                        0
      
        714
                    };
      
        715
                    let shift = if let MachineOperand::Shift(s) = &inst.operands[2] {
      
        716
                        *s
      
        717
                    } else {
      
        718
                        0
      
        719
                    };
      
        720
                    format!(
      
        721
                        "movk {}, #{}, lsl #{}",
      
        722
                        op_str(&inst.operands[0]),
      
        723
                        imm,
      
        724
                        shift
      
        725
                    )
      
        726
                }
      
        727
                ArmOpcode::Movn => {
      
        728
                    let imm = if let MachineOperand::Imm(v) = &inst.operands[1] {
      
        729
                        *v
      
        730
                    } else {
      
        731
                        0
      
        732
                    };
      
        733
                    let shift = if let MachineOperand::Shift(s) = &inst.operands[2] {
      
        734
                        *s
      
        735
                    } else {
      
        736
                        0
      
        737
                    };
      
        738
                    format!(
      
        739
                        "movn {}, #{}, lsl #{}",
      
        740
                        op_str(&inst.operands[0]),
      
        741
                        imm,
      
        742
                        shift
      
        743
                    )
      
        744
                }
      
        745
                ArmOpcode::MovReg => {
      
        746
                    let dest = op_str(&inst.operands[0]);
      
        747
                    let src = op_str(&inst.operands[1]);
      
        748
                    // Handle width mismatch: w→x extend or x→w truncate.
      
        749
                    let dest_is_x = dest.starts_with('x');
      
        750
                    let dest_is_w = dest.starts_with('w');
      
        751
                    let src_is_w = src.starts_with('w');
      
        752
                    let src_is_x = src.starts_with('x');
      
        753
                    // Cross-register-class move: AArch64 `mov` only encodes GP↔GP
      
        754
                    // (and FP↔FP via FmovReg). When register-allocation hands us
      
        755
                    // a MovReg straddling classes, emit `fmov` which transfers
      
        756
                    // bits between an integer GPR and an SIMD/FP register.
      
        757
                    let dest_is_gp = dest_is_x || dest_is_w;
      
        758
                    let src_is_gp = src_is_x || src_is_w;
      
        759
                    let dest_is_fp = dest.starts_with('s') || dest.starts_with('d');
      
        760
                    let src_is_fp = src.starts_with('s') || src.starts_with('d');
      
        761
                    if dest_is_gp && src_is_fp {
      
        762
                        // GPR ← FPR: pick GPR width to match FPR (s→w, d→x).
      
        763
                        let gp = if src.starts_with('d') {
      
        764
                            if dest_is_x {
      
        765
                                dest.clone()
      
        766
                            } else {
      
        767
                                format!("x{}", &dest[1..])
      
        768
                            }
      
        769
                        } else {
      
        770
                            if dest_is_w {
      
        771
                                dest.clone()
      
        772
                            } else {
      
        773
                                format!("w{}", &dest[1..])
      
        774
                            }
      
        775
                        };
      
        776
                        return format!("fmov {}, {}", gp, src);
      
        777
                    }
      
        778
                    if dest_is_fp && src_is_gp {
      
        779
                        let gp = if dest.starts_with('d') {
      
        780
                            if src_is_x {
      
        781
                                src.clone()
      
        782
                            } else {
      
        783
                                format!("x{}", &src[1..])
      
        784
                            }
      
        785
                        } else {
      
        786
                            if src_is_w {
      
        787
                                src.clone()
      
        788
                            } else {
      
        789
                                format!("w{}", &src[1..])
      
        790
                            }
      
        791
                        };
      
        792
                        return format!("fmov {}, {}", dest, gp);
      
        793
                    }
      
        794
                    if dest_is_x && src_is_w {
      
        795
                        // Zero-extend 32→64: use uxtw.
      
        796
                        format!("uxtw {}, {}", dest, src)
      
        797
                    } else if dest_is_w && src_is_x {
      
        798
                        // Truncate 64→32 by reading the source register through its
      
        799
                        // 32-bit view. `mov wN, xM` is not a valid AArch64 encoding.
      
        800
                        format!("mov {}, w{}", dest, &src[1..])
      
        801
                    } else {
      
        802
                        format!("mov {}, {}", dest, src)
      
        803
                    }
      
        804
                }
      
        805
                ArmOpcode::FmovReg => format!(
      
        806
                    "fmov {}, {}",
      
        807
                    op_str(&inst.operands[0]),
      
        808
                    op_str(&inst.operands[1])
      
        809
                ),
      
        810
                ArmOpcode::Mov16B => format!(
      
        811
                    "mov.16b {}, {}",
      
        812
                    v_reg_bare(&inst.operands[0]),
      
        813
                    v_reg_bare(&inst.operands[1]),
      
        814
                ),
      
        815
                ArmOpcode::AddpV2D => format!(
      
        816
                    "addp.2d {}, {}, {}",
      
        817
                    v_reg_bare(&inst.operands[0]),
      
        818
                    v_reg_bare(&inst.operands[1]),
      
        819
                    v_reg_bare(&inst.operands[2]),
      
        820
                ),
      
        821
                ArmOpcode::FaddpV4S => format!(
      
        822
                    "faddp.4s {}, {}, {}",
      
        823
                    v_reg_bare(&inst.operands[0]),
      
        824
                    v_reg_bare(&inst.operands[1]),
      
        825
                    v_reg_bare(&inst.operands[2]),
      
        826
                ),
      
        827
        
        828
                ArmOpcode::LdrImm | ArmOpcode::LdrFpImm | ArmOpcode::LdrsbImm | ArmOpcode::LdrshImm => {
      
        829
                    let dest = op_str(&inst.operands[0]);
      
        830
                    let base = op_str(&inst.operands[1]);
      
        831
                    let offset_val = match &inst.operands[2] {
      
        832
                        MachineOperand::FrameSlot(off) => *off as i64,
      
        833
                        MachineOperand::Imm(v) => *v,
      
        834
                        _ => 0,
      
        835
                    };
      
        836
                    // Pick the mnemonic by opcode. LDRSB / LDRSH expect a
      
        837
                    // Wt destination (sign-extended into the lower 32 bits);
      
        838
                    // the dest operand is already a Gp32 vreg in those
      
        839
                    // cases, so the formatted register name is `w_`.
      
        840
                    let mnemonic = match inst.opcode {
      
        841
                        ArmOpcode::LdrsbImm => "ldrsb",
      
        842
                        ArmOpcode::LdrshImm => "ldrsh",
      
        843
                        _ => "ldr",
      
        844
                    };
      
        845
                    if (-256..=255).contains(&offset_val) {
      
        846
                        format!("{} {}, [{}, #{}]", mnemonic, dest, base, offset_val)
      
        847
                    } else {
      
        848
                        // Large offset: compute address in x8, then load.
      
        849
                        format!(
      
        850
                            "{}\n    {} {}, [x8]",
      
        851
                            fmt_addr_with_offset("x8", &base, offset_val, "x16"),
      
        852
                            mnemonic,
      
        853
                            dest
      
        854
                        )
      
        855
                    }
      
        856
                }
      
        857
                ArmOpcode::StrImm | ArmOpcode::StrFpImm | ArmOpcode::StrbImm | ArmOpcode::StrhImm => {
      
        858
                    let src = op_str(&inst.operands[0]);
      
        859
                    let base = op_str(&inst.operands[1]);
      
        860
                    let offset_val = match &inst.operands[2] {
      
        861
                        MachineOperand::FrameSlot(off) => *off as i64,
      
        862
                        MachineOperand::Imm(v) => *v,
      
        863
                        _ => 0,
      
        864
                    };
      
        865
                    let mnemonic = match inst.opcode {
      
        866
                        ArmOpcode::StrbImm => "strb",
      
        867
                        ArmOpcode::StrhImm => "strh",
      
        868
                        _ => "str",
      
        869
                    };
      
        870
                    if (-256..=255).contains(&offset_val) {
      
        871
                        format!("{} {}, [{}, #{}]", mnemonic, src, base, offset_val)
      
        872
                    } else {
      
        873
                        // Large offset: compute address in x8, then store.
      
        874
                        format!(
      
        875
                            "{}\n    {} {}, [x8]",
      
        876
                            fmt_addr_with_offset("x8", &base, offset_val, "x16"),
      
        877
                            mnemonic,
      
        878
                            src
      
        879
                        )
      
        880
                    }
      
        881
                }
      
        882
                // Sprint 05: scaled-register-offset addressing. Operands are
      
        883
                // [dest, base, idx, Imm(shift)]. Shift 0 elides the `, lsl
      
        884
                // #0` suffix per the assembler convention.
      
        885
                ArmOpcode::LdrReg | ArmOpcode::LdrFpReg | ArmOpcode::StrReg | ArmOpcode::StrFpReg => {
      
        886
                    let dest = op_str(&inst.operands[0]);
      
        887
                    let base = op_str(&inst.operands[1]);
      
        888
                    let idx = op_str(&inst.operands[2]);
      
        889
                    let shift = match &inst.operands[3] {
      
        890
                        MachineOperand::Imm(v) => *v,
      
        891
                        _ => 0,
      
        892
                    };
      
        893
                    let mnemonic = match inst.opcode {
      
        894
                        ArmOpcode::LdrReg | ArmOpcode::LdrFpReg => "ldr",
      
        895
                        ArmOpcode::StrReg | ArmOpcode::StrFpReg => "str",
      
        896
                        _ => unreachable!(),
      
        897
                    };
      
        898
                    if shift == 0 {
      
        899
                        format!("{} {}, [{}, {}]", mnemonic, dest, base, idx)
      
        900
                    } else {
      
        901
                        format!("{} {}, [{}, {}, lsl #{}]", mnemonic, dest, base, idx, shift)
      
        902
                    }
      
        903
                }
      
        904
        
        905
                ArmOpcode::StpPre => {
      
        906
                    let frame_size = mf.frame.size as i64;
      
        907
                    let stp_offset = frame_size - 16;
      
        908
                    // The `sub sp, sp, #N` portion handles N > 4095 via
      
        909
                    // x16 synthesis (audit6 BLOCKING-5 root cause), and
      
        910
                    // probes very large frames so macOS guard pages aren't
      
        911
                    // skipped in one jump. The `stp ... [sp, #stp_offset]`
      
        912
                    // form is also bounded
      
        913
                    // (signed 7-bit immediate * 8 = ±504 byte range), so
      
        914
                    // we fall back to two `str` instructions when over.
      
        915
                    // For very large frames (stp_offset > 32760, the
      
        916
                    // signed 12-bit max for 64-bit ldr/str unsigned imm),
      
        917
                    // we'd need a register-form load/store — not yet
      
        918
                    // exercised in any test, so the panic catches it.
      
        919
                    let sub_sp = fmt_stack_alloc(frame_size);
      
        920
                    if stp_offset <= 504 {
      
        921
                        format!("{}\n    stp x29, x30, [sp, #{}]", sub_sp, stp_offset)
      
        922
                    } else if stp_offset <= 32760 {
      
        923
                        format!(
      
        924
                            "{}\n    str x29, [sp, #{}]\n    str x30, [sp, #{}]",
      
        925
                            sub_sp,
      
        926
                            stp_offset,
      
        927
                            stp_offset + 8
      
        928
                        )
      
        929
                    } else {
      
        930
                        // Frame too large for any ldr/str unsigned immediate.
      
        931
                        // Synthesize the address in x9 (caller-saved scratch)
      
        932
                        // then use register-offset str.
      
        933
                        let x9_addr = fmt_sp_imm("add", "x9", "sp", stp_offset);
      
        934
                        format!(
      
        935
                            "{}\n    {}\n    str x29, [x9]\n    str x30, [x9, #8]",
      
        936
                            sub_sp, x9_addr
      
        937
                        )
      
        938
                    }
      
        939
                }
      
        940
                ArmOpcode::LdpPost => {
      
        941
                    let frame_size = mf.frame.size as i64;
      
        942
                    let ldp_offset = frame_size - 16;
      
        943
                    let add_sp = fmt_sp_imm("add", "sp", "sp", frame_size);
      
        944
                    if ldp_offset <= 504 {
      
        945
                        format!("ldp x29, x30, [sp, #{}]\n    {}", ldp_offset, add_sp)
      
        946
                    } else if ldp_offset <= 32760 {
      
        947
                        format!(
      
        948
                            "ldr x29, [sp, #{}]\n    ldr x30, [sp, #{}]\n    {}",
      
        949
                            ldp_offset,
      
        950
                            ldp_offset + 8,
      
        951
                            add_sp
      
        952
                        )
      
        953
                    } else {
      
        954
                        // Frame too large for unsigned immediate ldr.
      
        955
                        // Synthesize address in x9 then restore with register-offset ldr.
      
        956
                        let x9_addr = fmt_sp_imm("add", "x9", "sp", ldp_offset);
      
        957
                        format!(
      
        958
                            "{}\n    ldr x29, [x9]\n    ldr x30, [x9, #8]\n    {}",
      
        959
                            x9_addr, add_sp
      
        960
                        )
      
        961
                    }
      
        962
                }
      
        963
        
        964
                // Non-preindex STP/LDP for callee-save pairs.
      
        965
                // Operands: [src1/dst1, src2/dst2, base, imm].
      
        966
                ArmOpcode::StpOffset => {
      
        967
                    let r1 = op_str(&inst.operands[0]);
      
        968
                    let r2 = op_str(&inst.operands[1]);
      
        969
                    let base = op_str(&inst.operands[2]);
      
        970
                    let off = match &inst.operands[3] {
      
        971
                        MachineOperand::Imm(v) => *v,
      
        972
                        MachineOperand::FrameSlot(v) => *v as i64,
      
        973
                        _ => 0,
      
        974
                    };
      
        975
                    // STP signed-offset range: 7-bit signed × 8 → [-512, 504].
      
        976
                    // Fall back to two individual STR instructions if out of range.
      
        977
                    if (-512..=504).contains(&off) {
      
        978
                        format!("stp {}, {}, [{}, #{}]", r1, r2, base, off)
      
        979
                    } else {
      
        980
                        format!(
      
        981
                            "{}\n    str {}, [x9]\n    str {}, [x9, #8]",
      
        982
                            fmt_addr_with_offset("x9", &base, off, "x16"),
      
        983
                            r1,
      
        984
                            r2
      
        985
                        )
      
        986
                    }
      
        987
                }
      
        988
                ArmOpcode::LdpOffset => {
      
        989
                    let r1 = op_str(&inst.operands[0]);
      
        990
                    let r2 = op_str(&inst.operands[1]);
      
        991
                    let base = op_str(&inst.operands[2]);
      
        992
                    let off = match &inst.operands[3] {
      
        993
                        MachineOperand::Imm(v) => *v,
      
        994
                        MachineOperand::FrameSlot(v) => *v as i64,
      
        995
                        _ => 0,
      
        996
                    };
      
        997
                    // LDP signed-offset range: 7-bit signed × 8 → [-512, 504].
      
        998
                    // Fall back to two individual LDR instructions if out of range.
      
        999
                    if (-512..=504).contains(&off) {
      
        1000
                        format!("ldp {}, {}, [{}, #{}]", r1, r2, base, off)
      
        1001
                    } else {
      
        1002
                        format!(
      
        1003
                            "{}\n    ldr {}, [x9]\n    ldr {}, [x9, #8]",
      
        1004
                            fmt_addr_with_offset("x9", &base, off, "x16"),
      
        1005
                            r1,
      
        1006
                            r2
      
        1007
                        )
      
        1008
                    }
      
        1009
                }
      
        1010
        
        1011
                ArmOpcode::AdrpLdr => {
      
        1012
                    if let MachineOperand::ConstPool(idx) = &inst.operands[1] {
      
        1013
                        let label = const_pool_label(&mf.name, *idx);
      
        1014
                        let dest = op_str(&inst.operands[0]);
      
        1015
                        // ADRP requires a GP register. If dest is FP (s/d), use x8 as scratch.
      
        1016
                        let is_fp = dest.starts_with('s') || dest.starts_with('d');
      
        1017
                        if is_fp {
      
        1018
                            format!(
      
        1019
                                "adrp x8, {1}@PAGE\n    ldr {0}, [x8, {1}@PAGEOFF]",
      
        1020
                                dest, label
      
        1021
                            )
      
        1022
                        } else {
      
        1023
                            format!(
      
        1024
                                "adrp {0}, {1}@PAGE\n    ldr {0}, [{0}, {1}@PAGEOFF]",
      
        1025
                                dest, label
      
        1026
                            )
      
        1027
                        }
      
        1028
                    } else {
      
        1029
                        "nop ; bad adrp+ldr".into()
      
        1030
                    }
      
        1031
                }
      
        1032
                ArmOpcode::AdrpAdd => {
      
        1033
                    let dest = op_str(&inst.operands[0]);
      
        1034
                    match &inst.operands[1] {
      
        1035
                        MachineOperand::ConstPool(idx) => {
      
        1036
                            let label = const_pool_label(&mf.name, *idx);
      
        1037
                            format!(
      
        1038
                                "adrp {0}, {1}@PAGE\n    add {0}, {0}, {1}@PAGEOFF",
      
        1039
                                dest, label
      
        1040
                            )
      
        1041
                        }
      
        1042
                        MachineOperand::GlobalLabel(name) => {
      
        1043
                            // Mach-O convention: globals get an underscore prefix.
      
        1044
                            let sym = if name.starts_with('_') {
      
        1045
                                name.clone()
      
        1046
                            } else {
      
        1047
                                format!("_{}", name)
      
        1048
                            };
      
        1049
                            format!(
      
        1050
                                "adrp {0}, {1}@PAGE\n    add {0}, {0}, {1}@PAGEOFF",
      
        1051
                                dest, sym
      
        1052
                            )
      
        1053
                        }
      
        1054
                        _ => "nop ; bad adrp+add".into(),
      
        1055
                    }
      
        1056
                }
      
        1057
        
        1058
                ArmOpcode::B => {
      
        1059
                    match &inst.operands[0] {
      
        1060
                        MachineOperand::BlockRef(id) => format!("b {}", mf.block(*id).label),
      
        1061
                        // Tail call to an external symbol (TCO): B _callee
      
        1062
                        MachineOperand::Extern(name) => {
      
        1063
                            if name.starts_with('_') {
      
        1064
                                format!("b {}", name)
      
        1065
                            } else {
      
        1066
                                format!("b _{}", name)
      
        1067
                            }
      
        1068
                        }
      
        1069
                        _ => "b ???".into(),
      
        1070
                    }
      
        1071
                }
      
        1072
                ArmOpcode::BCond => {
      
        1073
                    let cond = if let MachineOperand::Cond(c) = &inst.operands[0] {
      
        1074
                        cond_str(*c)
      
        1075
                    } else {
      
        1076
                        "eq"
      
        1077
                    };
      
        1078
                    let target = if let MachineOperand::BlockRef(id) = &inst.operands[1] {
      
        1079
                        mf.block(*id).label.clone()
      
        1080
                    } else {
      
        1081
                        "???".into()
      
        1082
                    };
      
        1083
                    format!("b.{} {}", cond, target)
      
        1084
                }
      
        1085
                ArmOpcode::Cbz | ArmOpcode::Cbnz => {
      
        1086
                    let mnemonic = match inst.opcode {
      
        1087
                        ArmOpcode::Cbz => "cbz",
      
        1088
                        _ => "cbnz",
      
        1089
                    };
      
        1090
                    let target = if let MachineOperand::BlockRef(id) = &inst.operands[1] {
      
        1091
                        mf.block(*id).label.clone()
      
        1092
                    } else {
      
        1093
                        "???".into()
      
        1094
                    };
      
        1095
                    format!("{} {}, {}", mnemonic, op_str(&inst.operands[0]), target)
      
        1096
                }
      
        1097
                ArmOpcode::Tbz | ArmOpcode::Tbnz => {
      
        1098
                    let mnemonic = match inst.opcode {
      
        1099
                        ArmOpcode::Tbz => "tbz",
      
        1100
                        _ => "tbnz",
      
        1101
                    };
      
        1102
                    let bit = if let MachineOperand::Imm(v) = &inst.operands[1] {
      
        1103
                        *v
      
        1104
                    } else {
      
        1105
                        0
      
        1106
                    };
      
        1107
                    let target = if let MachineOperand::BlockRef(id) = &inst.operands[2] {
      
        1108
                        mf.block(*id).label.clone()
      
        1109
                    } else {
      
        1110
                        "???".into()
      
        1111
                    };
      
        1112
                    format!(
      
        1113
                        "{} {}, #{}, {}",
      
        1114
                        mnemonic,
      
        1115
                        op_str(&inst.operands[0]),
      
        1116
                        bit,
      
        1117
                        target
      
        1118
                    )
      
        1119
                }
      
        1120
                ArmOpcode::Bl => {
      
        1121
                    if let MachineOperand::Extern(name) = &inst.operands[0] {
      
        1122
                        // Mach-O convention: C symbols get a _ prefix.
      
        1123
                        if name.starts_with('_') {
      
        1124
                            format!("bl {}", name) // already prefixed
      
        1125
                        } else {
      
        1126
                            format!("bl _{}", name) // add Mach-O prefix
      
        1127
                        }
      
        1128
                    } else {
      
        1129
                        "bl ???".into()
      
        1130
                    }
      
        1131
                }
      
        1132
                ArmOpcode::Blr => format!("blr {}", op_str(&inst.operands[0])),
      
        1133
                ArmOpcode::Sxtw => format!(
      
        1134
                    "sxtw {}, {}",
      
        1135
                    op_str(&inst.operands[0]),
      
        1136
                    op_str(&inst.operands[1])
      
        1137
                ),
      
        1138
                ArmOpcode::Sxth => format!(
      
        1139
                    "sxth {}, {}",
      
        1140
                    op_str(&inst.operands[0]),
      
        1141
                    op_str(&inst.operands[1])
      
        1142
                ),
      
        1143
                ArmOpcode::Sxtb => format!(
      
        1144
                    "sxtb {}, {}",
      
        1145
                    op_str(&inst.operands[0]),
      
        1146
                    op_str(&inst.operands[1])
      
        1147
                ),
      
        1148
                ArmOpcode::Ret => "ret".into(),
      
        1149
                ArmOpcode::Nop => "nop".into(),
      
        1150
                ArmOpcode::Brk => {
      
        1151
                    let imm = if let MachineOperand::Imm(v) = &inst.operands[0] {
      
        1152
                        *v
      
        1153
                    } else {
      
        1154
                        1
      
        1155
                    };
      
        1156
                    format!("brk #{}", imm)
      
        1157
                }
      
        1158
        
        1159
                // ---- NEON SIMD vector ops (Sprint 12 Stage 2) ----
      
        1160
                //
      
        1161
                // Each op forwards to a small helper so the lane-shape suffix
      
        1162
                // (.4s / .2d / .s[n] / .d[n]) lives in one place.
      
        1163
                ArmOpcode::AddV4S => fmt_vbinop(inst, "add", "4s"),
      
        1164
                ArmOpcode::AddV2D => fmt_vbinop(inst, "add", "2d"),
      
        1165
                ArmOpcode::SubV4S => fmt_vbinop(inst, "sub", "4s"),
      
        1166
                ArmOpcode::SubV2D => fmt_vbinop(inst, "sub", "2d"),
      
        1167
                ArmOpcode::MulV4S => fmt_vbinop(inst, "mul", "4s"),
      
        1168
                ArmOpcode::NegV4S => fmt_vunop(inst, "neg", "4s"),
      
        1169
                ArmOpcode::NegV2D => fmt_vunop(inst, "neg", "2d"),
      
        1170
                ArmOpcode::FaddV4S => fmt_vbinop(inst, "fadd", "4s"),
      
        1171
                ArmOpcode::FaddV2D => fmt_vbinop(inst, "fadd", "2d"),
      
        1172
                ArmOpcode::FsubV4S => fmt_vbinop(inst, "fsub", "4s"),
      
        1173
                ArmOpcode::FsubV2D => fmt_vbinop(inst, "fsub", "2d"),
      
        1174
                ArmOpcode::FmulV4S => fmt_vbinop(inst, "fmul", "4s"),
      
        1175
                ArmOpcode::FmulV2D => fmt_vbinop(inst, "fmul", "2d"),
      
        1176
                ArmOpcode::FdivV4S => fmt_vbinop(inst, "fdiv", "4s"),
      
        1177
                ArmOpcode::FdivV2D => fmt_vbinop(inst, "fdiv", "2d"),
      
        1178
                ArmOpcode::FnegV4S => fmt_vunop(inst, "fneg", "4s"),
      
        1179
                ArmOpcode::FnegV2D => fmt_vunop(inst, "fneg", "2d"),
      
        1180
                ArmOpcode::FabsV4S => fmt_vunop(inst, "fabs", "4s"),
      
        1181
                ArmOpcode::FabsV2D => fmt_vunop(inst, "fabs", "2d"),
      
        1182
                ArmOpcode::FsqrtV4S => fmt_vunop(inst, "fsqrt", "4s"),
      
        1183
                ArmOpcode::FsqrtV2D => fmt_vunop(inst, "fsqrt", "2d"),
      
        1184
                ArmOpcode::BslV16B => fmt_vbinop(inst, "bsl", "16b"),
      
        1185
                ArmOpcode::FcmgtV4S => fmt_vbinop(inst, "fcmgt", "4s"),
      
        1186
                ArmOpcode::FcmgtV2D => fmt_vbinop(inst, "fcmgt", "2d"),
      
        1187
                ArmOpcode::FcmgeV4S => fmt_vbinop(inst, "fcmge", "4s"),
      
        1188
                ArmOpcode::FcmgeV2D => fmt_vbinop(inst, "fcmge", "2d"),
      
        1189
                ArmOpcode::FcmeqV4S => fmt_vbinop(inst, "fcmeq", "4s"),
      
        1190
                ArmOpcode::FcmeqV2D => fmt_vbinop(inst, "fcmeq", "2d"),
      
        1191
                ArmOpcode::CmgtV4S => fmt_vbinop(inst, "cmgt", "4s"),
      
        1192
                ArmOpcode::CmgeV4S => fmt_vbinop(inst, "cmge", "4s"),
      
        1193
                ArmOpcode::CmeqV4S => fmt_vbinop(inst, "cmeq", "4s"),
      
        1194
                ArmOpcode::FmlaV4S => fmt_vbinop(inst, "fmla", "4s"),
      
        1195
                ArmOpcode::FmlaV2D => fmt_vbinop(inst, "fmla", "2d"),
      
        1196
                ArmOpcode::FminV4S => fmt_vbinop(inst, "fmin", "4s"),
      
        1197
                ArmOpcode::FminV2D => fmt_vbinop(inst, "fmin", "2d"),
      
        1198
                ArmOpcode::FmaxV4S => fmt_vbinop(inst, "fmax", "4s"),
      
        1199
                ArmOpcode::FmaxV2D => fmt_vbinop(inst, "fmax", "2d"),
      
        1200
                ArmOpcode::SminV4S => fmt_vbinop(inst, "smin", "4s"),
      
        1201
                ArmOpcode::SmaxV4S => fmt_vbinop(inst, "smax", "4s"),
      
        1202
                ArmOpcode::UminV4S => fmt_vbinop(inst, "umin", "4s"),
      
        1203
                ArmOpcode::UmaxV4S => fmt_vbinop(inst, "umax", "4s"),
      
        1204
        
        1205
                // afs-as dialect: cross-lane reductions encode the shape in
      
        1206
                // the mnemonic suffix; the destination is a scalar `s/d` and
      
        1207
                // the source is the bare vector register.
      
        1208
                ArmOpcode::FaddpV2S => format!(
      
        1209
                    "faddp.2s {}, {}",
      
        1210
                    fp32_scalar(&inst.operands[0]),
      
        1211
                    v_reg_bare(&inst.operands[1]),
      
        1212
                ),
      
        1213
                ArmOpcode::FaddpV2D => format!(
      
        1214
                    "faddp.2d {}, {}",
      
        1215
                    fp64_scalar(&inst.operands[0]),
      
        1216
                    v_reg_bare(&inst.operands[1]),
      
        1217
                ),
      
        1218
                ArmOpcode::Faddv4S => format!(
      
        1219
                    "faddv.4s {}, {}",
      
        1220
                    fp32_scalar(&inst.operands[0]),
      
        1221
                    v_reg_bare(&inst.operands[1]),
      
        1222
                ),
      
        1223
                ArmOpcode::Sminv4S => format!(
      
        1224
                    "sminv.4s {}, {}",
      
        1225
                    fp32_scalar(&inst.operands[0]),
      
        1226
                    v_reg_bare(&inst.operands[1]),
      
        1227
                ),
      
        1228
                ArmOpcode::Smaxv4S => format!(
      
        1229
                    "smaxv.4s {}, {}",
      
        1230
                    fp32_scalar(&inst.operands[0]),
      
        1231
                    v_reg_bare(&inst.operands[1]),
      
        1232
                ),
      
        1233
                ArmOpcode::FmaxvV4S => format!(
      
        1234
                    "fmaxv.4s {}, {}",
      
        1235
                    fp32_scalar(&inst.operands[0]),
      
        1236
                    v_reg_bare(&inst.operands[1]),
      
        1237
                ),
      
        1238
                ArmOpcode::FminvV4S => format!(
      
        1239
                    "fminv.4s {}, {}",
      
        1240
                    fp32_scalar(&inst.operands[0]),
      
        1241
                    v_reg_bare(&inst.operands[1]),
      
        1242
                ),
      
        1243
                ArmOpcode::FmaxpV2DScalar => format!(
      
        1244
                    "fmaxp.2d {}, {}",
      
        1245
                    fp64_scalar(&inst.operands[0]),
      
        1246
                    v_reg_bare(&inst.operands[1]),
      
        1247
                ),
      
        1248
                ArmOpcode::FminpV2DScalar => format!(
      
        1249
                    "fminp.2d {}, {}",
      
        1250
                    fp64_scalar(&inst.operands[0]),
      
        1251
                    v_reg_bare(&inst.operands[1]),
      
        1252
                ),
      
        1253
                ArmOpcode::Uminv4S => format!(
      
        1254
                    "uminv.4s {}, {}",
      
        1255
                    fp32_scalar(&inst.operands[0]),
      
        1256
                    v_reg_bare(&inst.operands[1]),
      
        1257
                ),
      
        1258
                ArmOpcode::Umaxv4S => format!(
      
        1259
                    "umaxv.4s {}, {}",
      
        1260
                    fp32_scalar(&inst.operands[0]),
      
        1261
                    v_reg_bare(&inst.operands[1]),
      
        1262
                ),
      
        1263
                ArmOpcode::Addv4S => format!(
      
        1264
                    "addv.4s {}, {}",
      
        1265
                    fp32_scalar(&inst.operands[0]),
      
        1266
                    v_reg_bare(&inst.operands[1]),
      
        1267
                ),
      
        1268
        
        1269
                ArmOpcode::DupGen4S => format!(
      
        1270
                    "dup.4s {}, {}",
      
        1271
                    v_reg_bare(&inst.operands[0]),
      
        1272
                    op_str(&inst.operands[1]),
      
        1273
                ),
      
        1274
                ArmOpcode::DupGen2D => format!(
      
        1275
                    "dup.2d {}, {}",
      
        1276
                    v_reg_bare(&inst.operands[0]),
      
        1277
                    op_str(&inst.operands[1]),
      
        1278
                ),
      
        1279
                ArmOpcode::DupEl4S => format!(
      
        1280
                    "dup.4s {}, {}",
      
        1281
                    v_reg_bare(&inst.operands[0]),
      
        1282
                    v_lane_bare(&inst.operands[1], "s", 0),
      
        1283
                ),
      
        1284
                ArmOpcode::DupEl2D => format!(
      
        1285
                    "dup.2d {}, {}",
      
        1286
                    v_reg_bare(&inst.operands[0]),
      
        1287
                    v_lane_bare(&inst.operands[1], "d", 0),
      
        1288
                ),
      
        1289
                ArmOpcode::Ins4S => {
      
        1290
                    let lane = imm_u8(&inst.operands[1]);
      
        1291
                    format!(
      
        1292
                        "ins.s {}, {}",
      
        1293
                        v_lane_bare(&inst.operands[0], "s", lane),
      
        1294
                        op_str(&inst.operands[2]),
      
        1295
                    )
      
        1296
                }
      
        1297
                ArmOpcode::Ins2D => {
      
        1298
                    let lane = imm_u8(&inst.operands[1]);
      
        1299
                    format!(
      
        1300
                        "ins.d {}, {}",
      
        1301
                        v_lane_bare(&inst.operands[0], "d", lane),
      
        1302
                        op_str(&inst.operands[2]),
      
        1303
                    )
      
        1304
                }
      
        1305
                ArmOpcode::Umov4S => {
      
        1306
                    let lane = imm_u8(&inst.operands[2]);
      
        1307
                    format!(
      
        1308
                        "umov.s {}, {}",
      
        1309
                        op_str(&inst.operands[0]),
      
        1310
                        v_lane_bare(&inst.operands[1], "s", lane),
      
        1311
                    )
      
        1312
                }
      
        1313
                ArmOpcode::Umov2D => {
      
        1314
                    let lane = imm_u8(&inst.operands[2]);
      
        1315
                    format!(
      
        1316
                        "umov.d {}, {}",
      
        1317
                        op_str(&inst.operands[0]),
      
        1318
                        v_lane_bare(&inst.operands[1], "d", lane),
      
        1319
                    )
      
        1320
                }
      
        1321
                ArmOpcode::FmovEl4S => {
      
        1322
                    let lane = imm_u8(&inst.operands[2]);
      
        1323
                    format!(
      
        1324
                        "mov.s {}, {}",
      
        1325
                        fp32_scalar(&inst.operands[0]),
      
        1326
                        v_lane_bare(&inst.operands[1], "s", lane),
      
        1327
                    )
      
        1328
                }
      
        1329
                ArmOpcode::FmovEl2D => {
      
        1330
                    let lane = imm_u8(&inst.operands[2]);
      
        1331
                    format!(
      
        1332
                        "mov.d {}, {}",
      
        1333
                        fp64_scalar(&inst.operands[0]),
      
        1334
                        v_lane_bare(&inst.operands[1], "d", lane),
      
        1335
                    )
      
        1336
                }
      
        1337
        
        1338
                ArmOpcode::LdrQ => format!(
      
        1339
                    "ldr {}, [{}, {}]",
      
        1340
                    q_reg(&inst.operands[0]),
      
        1341
                    op_str(&inst.operands[1]),
      
        1342
                    op_str(&inst.operands[2]),
      
        1343
                ),
      
        1344
                ArmOpcode::StrQ => format!(
      
        1345
                    "str {}, [{}, {}]",
      
        1346
                    q_reg(&inst.operands[0]),
      
        1347
                    op_str(&inst.operands[1]),
      
        1348
                    op_str(&inst.operands[2]),
      
        1349
                ),
      
        1350
            }
      
        1351
        }
      
        1352
        
        1353
        // ---- NEON formatting helpers ----
      
        1354
        
        1355
        fn v_reg(op: &MachineOperand, shape: &str) -> String {
      
        1356
            match op {
      
        1357
                MachineOperand::VReg(id) => format!("v{}.{}", id.0, shape),
      
        1358
                MachineOperand::PhysReg(PhysReg::Fp(n)) | MachineOperand::PhysReg(PhysReg::Fp32(n)) => {
      
        1359
                    format!("v{}.{}", n, shape)
      
        1360
                }
      
        1361
                _ => format!("{}.{}", op_str(op), shape),
      
        1362
            }
      
        1363
        }
      
        1364
        
        1365
        fn q_reg(op: &MachineOperand) -> String {
      
        1366
            match op {
      
        1367
                MachineOperand::VReg(id) => format!("q{}", id.0),
      
        1368
                MachineOperand::PhysReg(PhysReg::Fp(n)) | MachineOperand::PhysReg(PhysReg::Fp32(n)) => {
      
        1369
                    format!("q{}", n)
      
        1370
                }
      
        1371
                _ => format!("q{}", op_str(op)),
      
        1372
            }
      
        1373
        }
      
        1374
        
        1375
        fn v_lane(op: &MachineOperand, lane_ty: &str, lane: u8) -> String {
      
        1376
            match op {
      
        1377
                MachineOperand::VReg(id) => format!("v{}.{}[{}]", id.0, lane_ty, lane),
      
        1378
                MachineOperand::PhysReg(PhysReg::Fp(n)) | MachineOperand::PhysReg(PhysReg::Fp32(n)) => {
      
        1379
                    format!("v{}.{}[{}]", n, lane_ty, lane)
      
        1380
                }
      
        1381
                _ => format!("v{}.{}[{}]", op_str(op), lane_ty, lane),
      
        1382
            }
      
        1383
        }
      
        1384
        
        1385
        fn fp32_scalar(op: &MachineOperand) -> String {
      
        1386
            match op {
      
        1387
                MachineOperand::VReg(id) => format!("s{}", id.0),
      
        1388
                MachineOperand::PhysReg(PhysReg::Fp(n)) | MachineOperand::PhysReg(PhysReg::Fp32(n)) => {
      
        1389
                    format!("s{}", n)
      
        1390
                }
      
        1391
                _ => op_str(op),
      
        1392
            }
      
        1393
        }
      
        1394
        
        1395
        fn fp64_scalar(op: &MachineOperand) -> String {
      
        1396
            match op {
      
        1397
                MachineOperand::VReg(id) => format!("d{}", id.0),
      
        1398
                MachineOperand::PhysReg(PhysReg::Fp(n)) | MachineOperand::PhysReg(PhysReg::Fp32(n)) => {
      
        1399
                    format!("d{}", n)
      
        1400
                }
      
        1401
                _ => op_str(op),
      
        1402
            }
      
        1403
        }
      
        1404
        
        1405
        fn imm_u8(op: &MachineOperand) -> u8 {
      
        1406
            if let MachineOperand::Imm(v) = op {
      
        1407
                *v as u8
      
        1408
            } else {
      
        1409
                0
      
        1410
            }
      
        1411
        }
      
        1412
        
        1413
        fn fmt_vbinop(inst: &MachineInst, mnemonic: &str, shape: &str) -> String {
      
        1414
            // afs-as dialect: shape suffix is part of the mnemonic, operand
      
        1415
            // registers are bare (`fadd.4s v0, v1, v2`). Encodes to the same
      
        1416
            // bytes as the Apple/GNU `fadd v0.4s, v1.4s, v2.4s` form.
      
        1417
            format!(
      
        1418
                "{}.{} {}, {}, {}",
      
        1419
                mnemonic,
      
        1420
                shape,
      
        1421
                v_reg_bare(&inst.operands[0]),
      
        1422
                v_reg_bare(&inst.operands[1]),
      
        1423
                v_reg_bare(&inst.operands[2]),
      
        1424
            )
      
        1425
        }
      
        1426
        
        1427
        fn fmt_vunop(inst: &MachineInst, mnemonic: &str, shape: &str) -> String {
      
        1428
            format!(
      
        1429
                "{}.{} {}, {}",
      
        1430
                mnemonic,
      
        1431
                shape,
      
        1432
                v_reg_bare(&inst.operands[0]),
      
        1433
                v_reg_bare(&inst.operands[1]),
      
        1434
            )
      
        1435
        }
      
        1436
        
        1437
        fn v_reg_bare(op: &MachineOperand) -> String {
      
        1438
            match op {
      
        1439
                MachineOperand::VReg(id) => format!("v{}", id.0),
      
        1440
                MachineOperand::PhysReg(PhysReg::Fp(n)) | MachineOperand::PhysReg(PhysReg::Fp32(n)) => {
      
        1441
                    format!("v{}", n)
      
        1442
                }
      
        1443
                _ => op_str(op),
      
        1444
            }
      
        1445
        }
      
        1446
        
        1447
        fn v_lane_bare(op: &MachineOperand, _lane_ty: &str, lane: u8) -> String {
      
        1448
            // afs-as dialect for `umov.s w3, v0[2]` — bare reg with `[lane]`
      
        1449
            // suffix; the element-size width is encoded into the mnemonic
      
        1450
            // (`umov.s` / `umov.d`).
      
        1451
            match op {
      
        1452
                MachineOperand::VReg(id) => format!("v{}[{}]", id.0, lane),
      
        1453
                MachineOperand::PhysReg(PhysReg::Fp(n)) | MachineOperand::PhysReg(PhysReg::Fp32(n)) => {
      
        1454
                    format!("v{}[{}]", n, lane)
      
        1455
                }
      
        1456
                _ => format!("{}[{}]", op_str(op), lane),
      
        1457
            }
      
        1458
        }
      
        1459
        
        1460
        /// Format a machine operand as assembly text.
      
        1461
        fn op_str(op: &MachineOperand) -> String {
      
        1462
            match op {
      
        1463
                MachineOperand::VReg(id) => format!("v{}", id.0), // placeholder until regalloc
      
        1464
                MachineOperand::PhysReg(PhysReg::Sp) => "sp".into(),
      
        1465
                MachineOperand::PhysReg(PhysReg::Xzr) => "xzr".into(),
      
        1466
                MachineOperand::PhysReg(PhysReg::Wzr) => "wzr".into(),
      
        1467
                MachineOperand::PhysReg(PhysReg::Gp(n)) => format!("x{}", n),
      
        1468
                MachineOperand::PhysReg(PhysReg::Gp32(n)) => format!("w{}", n),
      
        1469
                MachineOperand::PhysReg(PhysReg::Fp(n)) => format!("d{}", n),
      
        1470
                MachineOperand::PhysReg(PhysReg::Fp32(n)) => format!("s{}", n),
      
        1471
                MachineOperand::Imm(v) => format!("#{}", v),
      
        1472
                MachineOperand::FrameSlot(off) => format!("[fp, #{}]", off),
      
        1473
                MachineOperand::Cond(c) => cond_str(*c).into(),
      
        1474
                MachineOperand::BlockRef(id) => format!("bb{}", id.0),
      
        1475
                MachineOperand::Extern(name) => name.clone(),
      
        1476
                MachineOperand::GlobalLabel(name) => {
      
        1477
                    if name.starts_with('_') {
      
        1478
                        name.clone()
      
        1479
                    } else {
      
        1480
                        format!("_{}", name)
      
        1481
                    }
      
        1482
                }
      
        1483
                MachineOperand::ConstPool(idx) => format!("cp{}", idx),
      
        1484
                MachineOperand::Shift(s) => format!("lsl #{}", s),
      
        1485
            }
      
        1486
        }
      
        1487
        
        1488
        fn fp_reg_str(op: &MachineOperand, is_f64: bool) -> String {
      
        1489
            match op {
      
        1490
                MachineOperand::PhysReg(PhysReg::Fp(n)) | MachineOperand::PhysReg(PhysReg::Fp32(n)) => {
      
        1491
                    if is_f64 {
      
        1492
                        format!("d{}", n)
      
        1493
                    } else {
      
        1494
                        format!("s{}", n)
      
        1495
                    }
      
        1496
                }
      
        1497
                _ => op_str(op),
      
        1498
            }
      
        1499
        }
      
        1500
        
        1501
        fn cond_str(c: ArmCond) -> &'static str {
      
        1502
            match c {
      
        1503
                ArmCond::Eq => "eq",
      
        1504
                ArmCond::Ne => "ne",
      
        1505
                ArmCond::Hs => "hs",
      
        1506
                ArmCond::Lo => "lo",
      
        1507
                ArmCond::Mi => "mi",
      
        1508
                ArmCond::Pl => "pl",
      
        1509
                ArmCond::Hi => "hi",
      
        1510
                ArmCond::Ls => "ls",
      
        1511
                ArmCond::Ge => "ge",
      
        1512
                ArmCond::Lt => "lt",
      
        1513
                ArmCond::Gt => "gt",
      
        1514
                ArmCond::Le => "le",
      
        1515
            }
      
        1516
        }
      
        1517
        
        1518
        /// Generate a constant pool label.
      
        1519
        fn const_pool_label(func: &str, idx: u32) -> String {
      
        1520
            format!("__{}_cp{}", func, idx)
      
        1521
        }
      
        1522
        
        1523
        #[cfg(test)]
      
        1524
        mod tests {
      
        1525
            use super::*;
      
        1526
            use crate::codegen::isel::select_function;
      
        1527
            use crate::ir::builder::FuncBuilder;
      
        1528
            use crate::ir::inst::*;
      
        1529
            use crate::ir::types::*;
      
        1530
        
        1531
            fn emit_simple(build: impl FnOnce(&mut FuncBuilder)) -> String {
      
        1532
                let mut func = Function::new("test".into(), vec![], IrType::Void);
      
        1533
                {
      
        1534
                    let mut b = FuncBuilder::new(&mut func);
      
        1535
                    build(&mut b);
      
        1536
                }
      
        1537
                let mf = select_function(&func);
      
        1538
                emit_function(&mf)
      
        1539
            }
      
        1540
        
        1541
            #[test]
      
        1542
            fn emit_prologue_epilogue() {
      
        1543
                let asm = emit_simple(|b| b.ret_void());
      
        1544
                assert!(
      
        1545
                    asm.contains("sub sp, sp,"),
      
        1546
                    "missing frame allocation: {}",
      
        1547
                    asm
      
        1548
                );
      
        1549
                assert!(
      
        1550
                    asm.contains("stp x29, x30, [sp,"),
      
        1551
                    "missing prologue save: {}",
      
        1552
                    asm
      
        1553
                );
      
        1554
                assert!(
      
        1555
                    asm.contains("ldp x29, x30, [sp,"),
      
        1556
                    "missing epilogue restore: {}",
      
        1557
                    asm
      
        1558
                );
      
        1559
                assert!(
      
        1560
                    asm.contains("add sp, sp,"),
      
        1561
                    "missing frame deallocation: {}",
      
        1562
                    asm
      
        1563
                );
      
        1564
                assert!(asm.contains("ret"), "missing ret: {}", asm);
      
        1565
            }
      
        1566
        
        1567
            #[test]
      
        1568
            fn emit_integer_add() {
      
        1569
                let asm = emit_simple(|b| {
      
        1570
                    let x = b.const_i32(10);
      
        1571
                    let y = b.const_i32(20);
      
        1572
                    let _z = b.iadd(x, y);
      
        1573
                    b.ret_void();
      
        1574
                });
      
        1575
                assert!(asm.contains("add "), "missing add: {}", asm);
      
        1576
            }
      
        1577
        
        1578
            #[test]
      
        1579
            fn emit_function_label() {
      
        1580
                let asm = emit_simple(|b| b.ret_void());
      
        1581
                assert!(asm.contains(".globl _test"), "missing .globl: {}", asm);
      
        1582
                assert!(asm.contains("_test:"), "missing function label: {}", asm);
      
        1583
            }
      
        1584
        
        1585
            /// Verify that functions with frame sizes > 4095 use x16 scratch
      
        1586
            /// synthesis for the `sub sp, sp, #N` prologue and `add sp, sp, #N`
      
        1587
            /// epilogue rather than an out-of-range immediate.
      
        1588
            #[test]
      
        1589
            fn emit_large_frame_prologue() {
      
        1590
                // 700 allocas of i64 = 700 * 8 = 5600 bytes, well over 4095.
      
        1591
                let asm = emit_simple(|b| {
      
        1592
                    for _ in 0..700 {
      
        1593
                        let _ = b.alloca(IrType::Int(IntWidth::I64));
      
        1594
                    }
      
        1595
                    b.ret_void();
      
        1596
                });
      
        1597
                // The 12-bit immediate max is 4095, so the emitter must
      
        1598
                // synthesize the frame size via x16.
      
        1599
                assert!(
      
        1600
                    asm.contains("movz x16,"),
      
        1601
                    "large frame should use x16 synthesis: {}",
      
        1602
                    asm
      
        1603
                );
      
        1604
                assert!(
      
        1605
                    asm.contains("sub sp, sp, x16"),
      
        1606
                    "large frame sub should use register form: {}",
      
        1607
                    asm
      
        1608
                );
      
        1609
                assert!(
      
        1610
                    asm.contains("add sp, sp, x16"),
      
        1611
                    "large frame add should use register form: {}",
      
        1612
                    asm
      
        1613
                );
      
        1614
                // Must NOT contain a raw "sub sp, sp, #5" that exceeds 4095.
      
        1615
                assert!(
      
        1616
                    !asm.contains("sub sp, sp, #5"),
      
        1617
                    "should not emit out-of-range immediate: {}",
      
        1618
                    asm
      
        1619
                );
      
        1620
            }
      
        1621
        
        1622
            #[test]
      
        1623
            fn emit_huge_frame_with_stack_probes() {
      
        1624
                let asm = emit_simple(|b| {
      
        1625
                    for _ in 0..3000 {
      
        1626
                        let _ = b.alloca(IrType::Int(IntWidth::I64));
      
        1627
                    }
      
        1628
                    b.ret_void();
      
        1629
                });
      
        1630
                assert!(
      
        1631
                    asm.contains("str xzr, [sp]"),
      
        1632
                    "huge frame should probe each chunk: {}",
      
        1633
                    asm
      
        1634
                );
      
        1635
            }
      
        1636
        
        1637
            #[test]
      
        1638
            fn emit_branch() {
      
        1639
                let asm = emit_simple(|b| {
      
        1640
                    let cond = b.const_bool(true);
      
        1641
                    let bb_t = b.create_block("then");
      
        1642
                    let bb_f = b.create_block("else");
      
        1643
                    b.cond_branch(cond, bb_t, vec![], bb_f, vec![]);
      
        1644
                    b.set_block(bb_t);
      
        1645
                    b.ret_void();
      
        1646
                    b.set_block(bb_f);
      
        1647
                    b.ret_void();
      
        1648
                });
      
        1649
                assert!(asm.contains("b.ne"), "missing conditional branch: {}", asm);
      
        1650
                assert!(asm.contains("then_"), "missing then label: {}", asm);
      
        1651
                assert!(asm.contains("else_"), "missing else label: {}", asm);
      
        1652
            }
      
        1653
        
        1654
            #[test]
      
        1655
            fn emit_i128_scalar_global_as_two_quads() {
      
        1656
                let asm = emit_globals(&[Global {
      
        1657
                    name: "big".into(),
      
        1658
                    ty: IrType::Int(IntWidth::I128),
      
        1659
                    initializer: Some(GlobalInit::Int(18_446_744_073_709_551_616i128)),
      
        1660
                }]);
      
        1661
        
        1662
                assert!(
      
        1663
                    asm.contains(".section __DATA,__data"),
      
        1664
                    "missing data section:\n{}",
      
        1665
                    asm
      
        1666
                );
      
        1667
                assert!(
      
        1668
                    asm.contains(".private_extern _big"),
      
        1669
                    "missing global symbol:\n{}",
      
        1670
                    asm
      
        1671
                );
      
        1672
                assert!(
      
        1673
                    asm.contains(".p2align 4"),
      
        1674
                    "i128 globals need 16-byte alignment:\n{}",
      
        1675
                    asm
      
        1676
                );
      
        1677
                assert_eq!(
      
        1678
                    asm.matches(".quad").count(),
      
        1679
                    2,
      
        1680
                    "scalar i128 should emit two quads:\n{}",
      
        1681
                    asm
      
        1682
                );
      
        1683
                assert!(
      
        1684
                    asm.contains(".quad 0x0000000000000000\n    .quad 0x0000000000000001"),
      
        1685
                    "scalar i128 should emit low/high 64-bit words in memory order:\n{}",
      
        1686
                    asm
      
        1687
                );
      
        1688
            }
      
        1689
        
        1690
            #[test]
      
        1691
            fn emit_i128_array_global_as_word_pairs() {
      
        1692
                let asm = emit_globals(&[Global {
      
        1693
                    name: "arr".into(),
      
        1694
                    ty: IrType::Array(Box::new(IrType::Int(IntWidth::I128)), 2),
      
        1695
                    initializer: Some(GlobalInit::IntArray(vec![1, -1])),
      
        1696
                }]);
      
        1697
        
        1698
                assert_eq!(
      
        1699
                    asm.matches(".quad").count(),
      
        1700
                    4,
      
        1701
                    "two i128 elements should emit four quads:\n{}",
      
        1702
                    asm
      
        1703
                );
      
        1704
                assert!(
      
        1705
                    asm.contains(".quad 0x0000000000000001\n    .quad 0x0000000000000000"),
      
        1706
                    "positive i128 array element should preserve low/high word order:\n{}",
      
        1707
                    asm
      
        1708
                );
      
        1709
                assert!(
      
        1710
                    asm.contains(".quad 0xffffffffffffffff\n    .quad 0xffffffffffffffff"),
      
        1711
                    "negative i128 array element should preserve two's-complement words:\n{}",
      
        1712
                    asm
      
        1713
                );
      
        1714
            }
      
        1715
        
        1716
            #[test]
      
        1717
            fn emit_byte_array_global_uses_natural_alignment() {
      
        1718
                let asm = emit_globals(&[Global {
      
        1719
                    name: "history".into(),
      
        1720
                    ty: IrType::Array(Box::new(IrType::Int(IntWidth::I8)), 400),
      
        1721
                    initializer: Some(GlobalInit::Zero),
      
        1722
                }]);
      
        1723
        
        1724
                assert!(
      
        1725
                    asm.contains(".p2align 3\n_history:"),
      
        1726
                    "byte-array globals that model descriptors/derived storage need 8-byte alignment:\n{}",
      
        1727
                    asm
      
        1728
                );
      
        1729
            }
      
        1730
        
        1731
            #[test]
      
        1732
            fn emit_nested_byte_array_global_uses_full_storage_size() {
      
        1733
                let asm = emit_globals(&[Global {
      
        1734
                    name: "command_cache".into(),
      
        1735
                    ty: IrType::Array(
      
        1736
                        Box::new(IrType::Array(Box::new(IrType::Int(IntWidth::I8)), 264)),
      
        1737
                        4,
      
        1738
                    ),
      
        1739
                    initializer: Some(GlobalInit::Zero),
      
        1740
                }]);
      
        1741
        
        1742
                assert!(
      
        1743
                    asm.contains("_command_cache:\n    .space 1056"),
      
        1744
                    "nested byte-array globals should reserve their full storage size:\n{}",
      
        1745
                    asm
      
        1746
                );
      
        1747
            }
      
        1748
        
        1749
            #[test]
      
        1750
            fn emit_mov_reg_truncates_x_source_through_w_view() {
      
        1751
                let mf = MachineFunction::new("test".into());
      
        1752
                let inst = MachineInst {
      
        1753
                    opcode: ArmOpcode::MovReg,
      
        1754
                    operands: vec![
      
        1755
                        MachineOperand::PhysReg(PhysReg::Gp32(21)),
      
        1756
                        MachineOperand::PhysReg(PhysReg::Gp(20)),
      
        1757
                    ],
      
        1758
                    def: None,
      
        1759
                };
      
        1760
        
        1761
                assert_eq!(emit_inst(&inst, &mf), "mov w21, w20");
      
        1762
            }
      
        1763
        
        1764
            #[test]
      
        1765
            fn emit_fcvt_uses_fp_register_widths() {
      
        1766
                let mf = MachineFunction::new("test".into());
      
        1767
                let to_single = MachineInst {
      
        1768
                    opcode: ArmOpcode::FcvtSD,
      
        1769
                    operands: vec![
      
        1770
                        MachineOperand::PhysReg(PhysReg::Fp(0)),
      
        1771
                        MachineOperand::PhysReg(PhysReg::Fp(1)),
      
        1772
                    ],
      
        1773
                    def: None,
      
        1774
                };
      
        1775
                let to_double = MachineInst {
      
        1776
                    opcode: ArmOpcode::FcvtDS,
      
        1777
                    operands: vec![
      
        1778
                        MachineOperand::PhysReg(PhysReg::Fp32(2)),
      
        1779
                        MachineOperand::PhysReg(PhysReg::Fp32(3)),
      
        1780
                    ],
      
        1781
                    def: None,
      
        1782
                };
      
        1783
        
        1784
                assert_eq!(emit_inst(&to_single, &mf), "fcvt s0, d1");
      
        1785
                assert_eq!(emit_inst(&to_double, &mf), "fcvt d2, s3");
      
        1786
            }
      
        1787
        
        1788
            #[test]
      
        1789
            fn emit_large_negative_pair_offsets_use_scratch_addressing() {
      
        1790
                let mf = MachineFunction::new("test".into());
      
        1791
                let stp = MachineInst {
      
        1792
                    opcode: ArmOpcode::StpOffset,
      
        1793
                    operands: vec![
      
        1794
                        MachineOperand::PhysReg(PhysReg::Gp(0)),
      
        1795
                        MachineOperand::PhysReg(PhysReg::Gp(1)),
      
        1796
                        MachineOperand::PhysReg(PhysReg::FP),
      
        1797
                        MachineOperand::Imm(-544),
      
        1798
                    ],
      
        1799
                    def: None,
      
        1800
                };
      
        1801
                let ldp = MachineInst {
      
        1802
                    opcode: ArmOpcode::LdpOffset,
      
        1803
                    operands: vec![
      
        1804
                        MachineOperand::PhysReg(PhysReg::Gp(2)),
      
        1805
                        MachineOperand::PhysReg(PhysReg::Gp(3)),
      
        1806
                        MachineOperand::PhysReg(PhysReg::FP),
      
        1807
                        MachineOperand::Imm(-544),
      
        1808
                    ],
      
        1809
                    def: None,
      
        1810
                };
      
        1811
        
        1812
                let stp_asm = emit_inst(&stp, &mf);
      
        1813
                let ldp_asm = emit_inst(&ldp, &mf);
      
        1814
                assert!(
      
        1815
                    stp_asm.contains("sub x9, x29, #544"),
      
        1816
                    "large negative stp offset should synthesize address: {}",
      
        1817
                    stp_asm
      
        1818
                );
      
        1819
                assert!(
      
        1820
                    ldp_asm.contains("sub x9, x29, #544"),
      
        1821
                    "large negative ldp offset should synthesize address: {}",
      
        1822
                    ldp_asm
      
        1823
                );
      
        1824
                assert!(
      
        1825
                    !stp_asm.contains("[x29, #-544]"),
      
        1826
                    "stp should not emit out-of-range raw offset: {}",
      
        1827
                    stp_asm
      
        1828
                );
      
        1829
                assert!(
      
        1830
                    !ldp_asm.contains("[x29, #-544]"),
      
        1831
                    "ldp should not emit out-of-range raw offset: {}",
      
        1832
                    ldp_asm
      
        1833
                );
      
        1834
            }
      
        1835
        
        1836
            #[test]
      
        1837
            fn emit_internal_only_function_as_private_extern() {
      
        1838
                let mut mf = MachineFunction::new("helper".into());
      
        1839
                mf.internal_only = true;
      
        1840
        
        1841
                let asm = emit_function(&mf);
      
        1842
        
        1843
                assert!(
      
        1844
                    asm.contains(".private_extern _helper"),
      
        1845
                    "internal-only functions should not be emitted as globals:\n{}",
      
        1846
                    asm
      
        1847
                );
      
        1848
                assert!(
      
        1849
                    !asm.contains(".globl _helper"),
      
        1850
                    "internal-only functions should not keep external linkage:\n{}",
      
        1851
                    asm
      
        1852
                );
      
        1853
            }
      
        1854
        
        1855
            // ---- NEON SIMD emit smoke tests (Sprint 12 Stage 2) ----
      
        1856
            //
      
        1857
            // The vectorizer doesn't generate any of these yet, but the emit
      
        1858
            // formatters can be exercised directly by hand-building a
      
        1859
            // MachineInst and feeding it through `emit_inst`. These tests
      
        1860
            // pin the assembly text form so future codegen wiring has a
      
        1861
            // golden reference.
      
        1862
        
        1863
            use crate::codegen::mir::{ArmOpcode, MachineFunction, MachineInst, MachineOperand, RegClass};
      
        1864
        
        1865
            fn emit_one(opcode: ArmOpcode, operands: Vec<MachineOperand>) -> String {
      
        1866
                let mut mf = MachineFunction::new("t".into());
      
        1867
                mf.new_block("entry");
      
        1868
                let inst = MachineInst {
      
        1869
                    opcode,
      
        1870
                    operands,
      
        1871
                    def: None,
      
        1872
                };
      
        1873
                emit_inst(&inst, &mf)
      
        1874
            }
      
        1875
        
        1876
            #[test]
      
        1877
            fn emit_fadd_v_4s_form() {
      
        1878
                let mut mf = MachineFunction::new("t".into());
      
        1879
                let v0 = mf.new_vreg(RegClass::V128);
      
        1880
                let v1 = mf.new_vreg(RegClass::V128);
      
        1881
                let v2 = mf.new_vreg(RegClass::V128);
      
        1882
                let asm = emit_one(
      
        1883
                    ArmOpcode::FaddV4S,
      
        1884
                    vec![
      
        1885
                        MachineOperand::VReg(v0),
      
        1886
                        MachineOperand::VReg(v1),
      
        1887
                        MachineOperand::VReg(v2),
      
        1888
                    ],
      
        1889
                );
      
        1890
                let _ = mf;
      
        1891
                // afs-as dialect: shape suffix on mnemonic, bare regs.
      
        1892
                assert_eq!(asm, "fadd.4s v0, v1, v2");
      
        1893
            }
      
        1894
        
        1895
            #[test]
      
        1896
            fn emit_fadd_v_2d_form() {
      
        1897
                let asm = emit_one(
      
        1898
                    ArmOpcode::FaddV2D,
      
        1899
                    vec![
      
        1900
                        MachineOperand::VReg(crate::codegen::mir::VRegId(0)),
      
        1901
                        MachineOperand::VReg(crate::codegen::mir::VRegId(1)),
      
        1902
                        MachineOperand::VReg(crate::codegen::mir::VRegId(2)),
      
        1903
                    ],
      
        1904
                );
      
        1905
                assert_eq!(asm, "fadd.2d v0, v1, v2");
      
        1906
            }
      
        1907
        
        1908
            #[test]
      
        1909
            fn emit_fmla_v_4s_form() {
      
        1910
                let asm = emit_one(
      
        1911
                    ArmOpcode::FmlaV4S,
      
        1912
                    vec![
      
        1913
                        MachineOperand::VReg(crate::codegen::mir::VRegId(0)),
      
        1914
                        MachineOperand::VReg(crate::codegen::mir::VRegId(1)),
      
        1915
                        MachineOperand::VReg(crate::codegen::mir::VRegId(2)),
      
        1916
                    ],
      
        1917
                );
      
        1918
                assert_eq!(asm, "fmla.4s v0, v1, v2");
      
        1919
            }
      
        1920
        
        1921
            #[test]
      
        1922
            fn emit_addv_4s_reduction_form() {
      
        1923
                let asm = emit_one(
      
        1924
                    ArmOpcode::Addv4S,
      
        1925
                    vec![
      
        1926
                        MachineOperand::VReg(crate::codegen::mir::VRegId(0)),
      
        1927
                        MachineOperand::VReg(crate::codegen::mir::VRegId(1)),
      
        1928
                    ],
      
        1929
                );
      
        1930
                assert_eq!(asm, "addv.4s s0, v1");
      
        1931
            }
      
        1932
        
        1933
            #[test]
      
        1934
            fn emit_dup_gen_4s_broadcasts_w_register() {
      
        1935
                let asm = emit_one(
      
        1936
                    ArmOpcode::DupGen4S,
      
        1937
                    vec![
      
        1938
                        MachineOperand::VReg(crate::codegen::mir::VRegId(0)),
      
        1939
                        MachineOperand::PhysReg(crate::codegen::mir::PhysReg::Gp32(2)),
      
        1940
                    ],
      
        1941
                );
      
        1942
                assert_eq!(asm, "dup.4s v0, w2");
      
        1943
            }
      
        1944
        
        1945
            #[test]
      
        1946
            fn emit_dup_el_4s_broadcasts_fp_lane_zero() {
      
        1947
                // Splatting an Fp32 scalar (which lives in v2's lane 0) into
      
        1948
                // a 4×f32 vector uses the lane-dup form. The gp form
      
        1949
                // `dup.4s v0, s2` is rejected by the assembler. afs-as
      
        1950
                // dialect: bare `vN[L]` (no `.s` suffix), with the lane
      
        1951
                // element width encoded into the `dup.4s` mnemonic.
      
        1952
                let asm = emit_one(
      
        1953
                    ArmOpcode::DupEl4S,
      
        1954
                    vec![
      
        1955
                        MachineOperand::VReg(crate::codegen::mir::VRegId(0)),
      
        1956
                        MachineOperand::VReg(crate::codegen::mir::VRegId(2)),
      
        1957
                    ],
      
        1958
                );
      
        1959
                assert_eq!(asm, "dup.4s v0, v2[0]");
      
        1960
            }
      
        1961
        
        1962
            #[test]
      
        1963
            fn emit_dup_el_2d_broadcasts_fp_lane_zero() {
      
        1964
                let asm = emit_one(
      
        1965
                    ArmOpcode::DupEl2D,
      
        1966
                    vec![
      
        1967
                        MachineOperand::VReg(crate::codegen::mir::VRegId(0)),
      
        1968
                        MachineOperand::VReg(crate::codegen::mir::VRegId(2)),
      
        1969
                    ],
      
        1970
                );
      
        1971
                assert_eq!(asm, "dup.2d v0, v2[0]");
      
        1972
            }
      
        1973
        
        1974
            #[test]
      
        1975
            fn emit_ldr_q_form() {
      
        1976
                let asm = emit_one(
      
        1977
                    ArmOpcode::LdrQ,
      
        1978
                    vec![
      
        1979
                        MachineOperand::VReg(crate::codegen::mir::VRegId(0)),
      
        1980
                        MachineOperand::PhysReg(crate::codegen::mir::PhysReg::Gp(1)),
      
        1981
                        MachineOperand::Imm(16),
      
        1982
                    ],
      
        1983
                );
      
        1984
                assert_eq!(asm, "ldr q0, [x1, #16]");
      
        1985
            }
      
        1986
        
        1987
            #[test]
      
        1988
            fn emit_str_q_form() {
      
        1989
                let asm = emit_one(
      
        1990
                    ArmOpcode::StrQ,
      
        1991
                    vec![
      
        1992
                        MachineOperand::VReg(crate::codegen::mir::VRegId(0)),
      
        1993
                        MachineOperand::PhysReg(crate::codegen::mir::PhysReg::Gp(1)),
      
        1994
                        MachineOperand::Imm(0),
      
        1995
                    ],
      
        1996
                );
      
        1997
                assert_eq!(asm, "str q0, [x1, #0]");
      
        1998
            }
      
        1999
        
        2000
            #[test]
      
        2001
            fn emit_umov_extracts_lane() {
      
        2002
                let asm = emit_one(
      
        2003
                    ArmOpcode::Umov4S,
      
        2004
                    vec![
      
        2005
                        MachineOperand::PhysReg(crate::codegen::mir::PhysReg::Gp32(3)),
      
        2006
                        MachineOperand::VReg(crate::codegen::mir::VRegId(0)),
      
        2007
                        MachineOperand::Imm(2),
      
        2008
                    ],
      
        2009
                );
      
        2010
                assert_eq!(asm, "umov.s w3, v0[2]");
      
        2011
            }
      
        2012
        }
      
        2013