armfortas Public

Watch 0 Fork 0 Star 0
Rust · 163107 bytes Raw Blame History
  
        1
        //! Instruction selection — translate SSA IR to Machine IR.
      
        2
        //!
      
        3
        //! Maps each IR instruction to one or more ARM64 machine instructions.
      
        4
        //! Uses virtual registers throughout; physical register assignment
      
        5
        //! happens in the register allocator (Sprint 21).
      
        6
        //!
      
        7
        //! Strategy: naive spill-everything. Every vreg lives on the stack.
      
        8
        //! Load before use, store after def. Correct but slow — optimized later.
      
        9
        
        10
        use super::mir::*;
      
        11
        use crate::ir::inst::*;
      
        12
        use crate::ir::types::*;
      
        13
        use std::collections::{HashMap, HashSet};
      
        14
        
        15
        /// Select machine instructions for an entire IR module.
      
        16
        pub fn select_module(module: &Module) -> Vec<MachineFunction> {
      
        17
            // Build function name table for resolving Internal call refs.
      
        18
            let func_names: Vec<String> = module.functions.iter().map(|f| f.name.clone()).collect();
      
        19
            module
      
        20
                .functions
      
        21
                .iter()
      
        22
                .map(|f| select_function_with_names(f, &func_names))
      
        23
                .collect()
      
        24
        }
      
        25
        
        26
        fn select_function_with_names(func: &Function, func_names: &[String]) -> MachineFunction {
      
        27
            let mut mf = select_function(func);
      
        28
            // Resolve any Internal call references to actual function names.
      
        29
            for block in &mut mf.blocks {
      
        30
                for inst in &mut block.insts {
      
        31
                    if let super::mir::ArmOpcode::Bl = inst.opcode {
      
        32
                        if let Some(super::mir::MachineOperand::Extern(ref mut name)) =
      
        33
                            inst.operands.first_mut()
      
        34
                        {
      
        35
                            // Check if this is a placeholder "_func_N" name from isel.
      
        36
                            if name.starts_with("_func_") {
      
        37
                                if let Ok(idx) = name[6..].parse::<usize>() {
      
        38
                                    if idx < func_names.len() {
      
        39
                                        *name = func_names[idx].clone();
      
        40
                                    }
      
        41
                                }
      
        42
                            }
      
        43
                        }
      
        44
                    }
      
        45
                }
      
        46
            }
      
        47
            mf
      
        48
        }
      
        49
        
        50
        use super::abi::{classify_abi_arg, AbiArgLoc, AbiArgState};
      
        51
        
        52
        /// Select machine instructions for one IR function.
      
        53
        pub fn select_function(func: &Function) -> MachineFunction {
      
        54
            let mut mf = MachineFunction::new(func.name.clone());
      
        55
            mf.internal_only = func.internal_only;
      
        56
            let mut ctx = ISelCtx::new();
      
        57
        
        58
            // Phase 1: allocate stack slots for all IR alloca instructions.
      
        59
            for block in &func.blocks {
      
        60
                for inst in &block.insts {
      
        61
                    if let InstKind::Alloca(ty) = &inst.kind {
      
        62
                        let size = alloca_size(ty);
      
        63
                        let offset = mf.alloc_local(size);
      
        64
                        ctx.alloca_offsets.insert(inst.id, offset);
      
        65
                    }
      
        66
                }
      
        67
            }
      
        68
        
        69
            // Phase 2: create machine blocks corresponding to IR blocks.
      
        70
            // Entry block already exists as MBlockId(0).
      
        71
            //
      
        72
            // Block labels are prefixed with the function name so two
      
        73
            // functions in the same .s file don't collide on common names
      
        74
            // like `do_check_1`. The `L` prefix turns them into local
      
        75
            // symbols on Apple's assembler.
      
        76
            ctx.block_map.insert(func.entry, MBlockId(0));
      
        77
            for block in &func.blocks {
      
        78
                if block.id != func.entry {
      
        79
                    let label = format!("L{}_{}", mf.name, block.name);
      
        80
                    let mb_id = mf.new_block(&label);
      
        81
                    ctx.block_map.insert(block.id, mb_id);
      
        82
                }
      
        83
            }
      
        84
        
        85
            enum IncomingParam {
      
        86
                Narrow(VRegId, RegClass, AbiArgLoc, IrType),
      
        87
                Wide(i32, AbiArgLoc),
      
        88
            }
      
        89
        
        90
            // Phase 2.5: handle incoming parameters.
      
        91
            // Create a vreg or a wide stack slot for each param.
      
        92
            // The physical register save happens after the prologue.
      
        93
            let mut param_info: Vec<IncomingParam> = Vec::new();
      
        94
            let mut abi_state = AbiArgState::default();
      
        95
            for param in &func.params {
      
        96
                let loc = classify_abi_arg(&param.ty, &mut abi_state);
      
        97
                if matches!(param.ty, IrType::Int(IntWidth::I128)) {
      
        98
                    let offset = mf.alloc_local(16);
      
        99
                    ctx.wide_value_slots.insert(param.id, offset);
      
        100
                    param_info.push(IncomingParam::Wide(offset, loc));
      
        101
                    continue;
      
        102
                }
      
        103
                let class = type_to_reg_class(&param.ty);
      
        104
                let vreg = mf.new_vreg(class);
      
        105
                ctx.value_map.insert(param.id, vreg);
      
        106
                param_info.push(IncomingParam::Narrow(vreg, class, loc, param.ty.clone()));
      
        107
            }
      
        108
        
        109
            // Phase 3: emit prologue in entry block.
      
        110
            emit_prologue(&mut mf, MBlockId(0));
      
        111
        
        112
            // Phase 3.5: move incoming argument registers into param vregs.
      
        113
            // Dispatch by register class: GP args from x0-x7, FP args from d0-d7.
      
        114
            for info in &param_info {
      
        115
                match info {
      
        116
                    IncomingParam::Wide(offset, AbiArgLoc::GpPair(reg)) => {
      
        117
                        emit_store_phys_i128_pair(
      
        118
                            &mut mf,
      
        119
                            MBlockId(0),
      
        120
                            MachineOperand::PhysReg(PhysReg::FP),
      
        121
                            *offset as i64,
      
        122
                            PhysReg::Gp(*reg),
      
        123
                            PhysReg::Gp(*reg + 1),
      
        124
                        );
      
        125
                    }
      
        126
                    IncomingParam::Wide(offset, AbiArgLoc::Stack(stack_offset)) => {
      
        127
                        emit_load_phys_i128_pair(
      
        128
                            &mut mf,
      
        129
                            MBlockId(0),
      
        130
                            MachineOperand::PhysReg(PhysReg::FP),
      
        131
                            16 + *stack_offset,
      
        132
                            PhysReg::Gp(16),
      
        133
                            PhysReg::Gp(17),
      
        134
                        );
      
        135
                        emit_store_phys_i128_pair(
      
        136
                            &mut mf,
      
        137
                            MBlockId(0),
      
        138
                            MachineOperand::PhysReg(PhysReg::FP),
      
        139
                            *offset as i64,
      
        140
                            PhysReg::Gp(16),
      
        141
                            PhysReg::Gp(17),
      
        142
                        );
      
        143
                    }
      
        144
                    IncomingParam::Narrow(vreg, RegClass::Fp64, AbiArgLoc::Fp(reg), _) => {
      
        145
                        mf.block_mut(MBlockId(0)).insts.push(MachineInst {
      
        146
                            opcode: ArmOpcode::FmovReg,
      
        147
                            operands: vec![
      
        148
                                MachineOperand::VReg(*vreg),
      
        149
                                MachineOperand::PhysReg(PhysReg::Fp(*reg)),
      
        150
                            ],
      
        151
                            def: Some(*vreg),
      
        152
                        });
      
        153
                    }
      
        154
                    IncomingParam::Narrow(vreg, RegClass::Fp32, AbiArgLoc::Fp32(reg), _) => {
      
        155
                        mf.block_mut(MBlockId(0)).insts.push(MachineInst {
      
        156
                            opcode: ArmOpcode::FmovReg,
      
        157
                            operands: vec![
      
        158
                                MachineOperand::VReg(*vreg),
      
        159
                                MachineOperand::PhysReg(PhysReg::Fp32(*reg)),
      
        160
                            ],
      
        161
                            def: Some(*vreg),
      
        162
                        });
      
        163
                    }
      
        164
                    IncomingParam::Narrow(vreg, RegClass::Gp32, AbiArgLoc::Gp32(reg), _) => {
      
        165
                        mf.block_mut(MBlockId(0)).insts.push(MachineInst {
      
        166
                            opcode: ArmOpcode::MovReg,
      
        167
                            operands: vec![
      
        168
                                MachineOperand::VReg(*vreg),
      
        169
                                MachineOperand::PhysReg(PhysReg::Gp32(*reg)),
      
        170
                            ],
      
        171
                            def: Some(*vreg),
      
        172
                        });
      
        173
                    }
      
        174
                    IncomingParam::Narrow(vreg, _, AbiArgLoc::Gp(reg), _) => {
      
        175
                        mf.block_mut(MBlockId(0)).insts.push(MachineInst {
      
        176
                            opcode: ArmOpcode::MovReg,
      
        177
                            operands: vec![
      
        178
                                MachineOperand::VReg(*vreg),
      
        179
                                MachineOperand::PhysReg(PhysReg::Gp(*reg)),
      
        180
                            ],
      
        181
                            def: Some(*vreg),
      
        182
                        });
      
        183
                    }
      
        184
                    IncomingParam::Narrow(vreg, class, AbiArgLoc::Stack(stack_offset), ty) => {
      
        185
                        emit_load_stack_arg_into_vreg(
      
        186
                            &mut mf,
      
        187
                            MBlockId(0),
      
        188
                            *vreg,
      
        189
                            *class,
      
        190
                            ty,
      
        191
                            16 + *stack_offset,
      
        192
                        );
      
        193
                    }
      
        194
                    IncomingParam::Wide(_, other) => {
      
        195
                        panic!(
      
        196
                            "isel: unexpected ABI loc {:?} for incoming i128 param",
      
        197
                            other
      
        198
                        );
      
        199
                    }
      
        200
                    IncomingParam::Narrow(_, class, other, _) => {
      
        201
                        panic!(
      
        202
                            "isel: unexpected ABI loc {:?} for incoming {:?} param",
      
        203
                            other, class
      
        204
                        );
      
        205
                    }
      
        206
                }
      
        207
            }
      
        208
        
        209
            // Phase 4a: allocate vregs for EVERY block parameter AND every
      
        210
            // instruction result *before* walking any instructions. We need
      
        211
            // this upfront because:
      
        212
            //
      
        213
            //  - A branch terminator needs to know the target block's
      
        214
            //    param vregs to emit "move branch arg → target param"
      
        215
            //    copies, and the target block may not have been walked yet.
      
        216
            //
      
        217
            //  - An instruction in block A may reference an SSA value
      
        218
            //    defined in block B that appears later in `func.blocks`
      
        219
            //    vec order (perfectly legal under SSA dominance — block B
      
        220
            //    can dominate block A even if it comes later in the vec).
      
        221
            //    Without upfront allocation, the lookup fails.
      
        222
            //
      
        223
            // Allocation here doesn't emit machine instructions; it just
      
        224
            // reserves vreg IDs for every IR ValueId so Phase 4b can use
      
        225
            // `lookup_vreg` without ordering concerns.
      
        226
            for block in &func.blocks {
      
        227
                for bp in &block.params {
      
        228
                    if matches!(bp.ty, IrType::Int(IntWidth::I128)) {
      
        229
                        let offset = mf.alloc_local(16);
      
        230
                        ctx.wide_value_slots.insert(bp.id, offset);
      
        231
                        continue;
      
        232
                    }
      
        233
                    let class = type_to_reg_class(&bp.ty);
      
        234
                    let vreg = mf.new_vreg(class);
      
        235
                    ctx.value_map.insert(bp.id, vreg);
      
        236
                }
      
        237
                for inst in &block.insts {
      
        238
                    // Allocas already have their backing stack slots from
      
        239
                    // Phase 1, but the SSA value they produce is still a real
      
        240
                    // pointer that later blocks may pass to calls or branch
      
        241
                    // params before the defining block is selected.
      
        242
                    //
      
        243
                    // Reserve the vreg here so forward-dominating alloca uses
      
        244
                    // are safe even when block vec order puts the use before
      
        245
                    // the definition.
      
        246
                    // Void-typed insts (Store, RuntimeCall returning void,
      
        247
                    // etc.) don't produce a usable value.
      
        248
                    if matches!(inst.ty, IrType::Void) {
      
        249
                        continue;
      
        250
                    }
      
        251
                    if matches!(inst.ty, IrType::Int(IntWidth::I128)) {
      
        252
                        let offset = mf.alloc_local(16);
      
        253
                        ctx.wide_value_slots.insert(inst.id, offset);
      
        254
                        continue;
      
        255
                    }
      
        256
                    let class = type_to_reg_class(&inst.ty);
      
        257
                    let vreg = mf.new_vreg(class);
      
        258
                    ctx.value_map.insert(inst.id, vreg);
      
        259
                }
      
        260
            }
      
        261
        
        262
            // Snapshot just each IR block's params into ctx so
      
        263
            // `select_terminator` can look them up while we hold a separate
      
        264
            // &mut MachineFunction borrow. We don't need a full BasicBlock
      
        265
            // clone — only the param list — so this avoids cloning every
      
        266
            // instruction in the function for each terminator we visit.
      
        267
            for block in &func.blocks {
      
        268
                ctx.block_params.insert(block.id, block.params.clone());
      
        269
            }
      
        270
        
        271
            // Phase 4a.5: identify ICmp/FCmp → Select fusion candidates.
      
        272
            //
      
        273
            // An ICmp whose boolean result is used only by a single Select in
      
        274
            // the same block (with no intervening flag-clobbering instruction)
      
        275
            // can be fused: we suppress the CSET and pass the CMP flags
      
        276
            // directly into the CSEL. This turns 4 instructions into 2:
      
        277
            //
      
        278
            //   CMP a, b; CSET cond, LE; CMP cond, #0; CSEL dest, tv, fv, NE
      
        279
            //       →  CMP a, b; CSEL dest, tv, fv, LE
      
        280
            compute_csel_fusion(func, &mut ctx);
      
        281
        
        282
            // Phase 4b: select instructions and terminators for each block.
      
        283
            for block in &func.blocks {
      
        284
                let mb_id = ctx.block_map[&block.id];
      
        285
        
        286
                for inst in &block.insts {
      
        287
                    select_inst(&mut mf, &mut ctx, mb_id, inst, func);
      
        288
                }
      
        289
        
        290
                if let Some(term) = &block.terminator {
      
        291
                    select_terminator(&mut mf, &mut ctx, mb_id, term, block, func);
      
        292
                }
      
        293
            }
      
        294
        
        295
            mf
      
        296
        }
      
        297
        
        298
        fn select_call_inst(
      
        299
            mf: &mut MachineFunction,
      
        300
            ctx: &mut ISelCtx,
      
        301
            mb: MBlockId,
      
        302
            inst: &Inst,
      
        303
            func: &Function,
      
        304
        ) {
      
        305
            let (label, args, runtime_func, indirect_target) = match &inst.kind {
      
        306
                InstKind::Call(FuncRef::External(name), args) => {
      
        307
                    (name.clone(), args.as_slice(), None, None)
      
        308
                }
      
        309
                InstKind::Call(FuncRef::Internal(idx), args) => {
      
        310
                    (format!("_func_{}", idx), args.as_slice(), None, None)
      
        311
                }
      
        312
                InstKind::Call(FuncRef::Indirect(target), args) => {
      
        313
                    (String::new(), args.as_slice(), None, Some(*target))
      
        314
                }
      
        315
                InstKind::RuntimeCall(rf, args) => (String::new(), args.as_slice(), Some(rf), None),
      
        316
                _ => unreachable!(),
      
        317
            };
      
        318
        
        319
            let mut abi_state = AbiArgState::default();
      
        320
            let mut arg_locs = Vec::with_capacity(args.len());
      
        321
            for &arg_val in args {
      
        322
                let arg_ty = func
      
        323
                    .value_type(arg_val)
      
        324
                    .unwrap_or_else(|| panic!("isel: missing type for call arg %{}", arg_val.0));
      
        325
                arg_locs.push((arg_val, classify_abi_arg(&arg_ty, &mut abi_state), arg_ty));
      
        326
            }
      
        327
            let label = runtime_func
      
        328
                .map(|rf| runtime_func_symbol(rf, &arg_locs))
      
        329
                .unwrap_or(label);
      
        330
            if abi_state.stack_offset > 0 {
      
        331
                mf.reserve_outgoing_args(abi_state.stack_offset as u32);
      
        332
            }
      
        333
        
        334
            let mut pending_reg_arg_moves: Vec<(ArmOpcode, PhysReg, VRegId)> = Vec::new();
      
        335
            for (arg_val, loc, arg_ty) in arg_locs {
      
        336
                if matches!(arg_ty, IrType::Int(IntWidth::I128)) {
      
        337
                    let arg_slot = ctx.lookup_wide_slot(arg_val);
      
        338
                    match loc {
      
        339
                        AbiArgLoc::GpPair(reg) => {
      
        340
                            emit_load_phys_i128_pair(
      
        341
                                mf,
      
        342
                                mb,
      
        343
                                MachineOperand::PhysReg(PhysReg::FP),
      
        344
                                arg_slot as i64,
      
        345
                                PhysReg::Gp(reg),
      
        346
                                PhysReg::Gp(reg + 1),
      
        347
                            );
      
        348
                        }
      
        349
                        AbiArgLoc::Stack(stack_offset) => {
      
        350
                            emit_load_phys_i128_pair(
      
        351
                                mf,
      
        352
                                mb,
      
        353
                                MachineOperand::PhysReg(PhysReg::FP),
      
        354
                                arg_slot as i64,
      
        355
                                PhysReg::Gp(16),
      
        356
                                PhysReg::Gp(17),
      
        357
                            );
      
        358
                            emit_store_phys_i128_pair(
      
        359
                                mf,
      
        360
                                mb,
      
        361
                                MachineOperand::PhysReg(PhysReg::Sp),
      
        362
                                stack_offset,
      
        363
                                PhysReg::Gp(16),
      
        364
                                PhysReg::Gp(17),
      
        365
                            );
      
        366
                        }
      
        367
                        other => {
      
        368
                            panic!("isel: unexpected ABI loc {:?} for outgoing i128 arg", other);
      
        369
                        }
      
        370
                    }
      
        371
                    continue;
      
        372
                }
      
        373
        
        374
                let arg_vreg = ctx.lookup_vreg(arg_val);
      
        375
                let arg_class = mf.vregs.iter().find(|v| v.id == arg_vreg).map(|v| v.class);
      
        376
                match (arg_class, loc) {
      
        377
                    (Some(RegClass::Fp64), AbiArgLoc::Fp(reg)) => {
      
        378
                        pending_reg_arg_moves.push((ArmOpcode::FmovReg, PhysReg::Fp(reg), arg_vreg));
      
        379
                    }
      
        380
                    (Some(RegClass::Fp32), AbiArgLoc::Fp32(reg)) => {
      
        381
                        pending_reg_arg_moves.push((ArmOpcode::FmovReg, PhysReg::Fp32(reg), arg_vreg));
      
        382
                    }
      
        383
                    (Some(RegClass::Gp32), AbiArgLoc::Gp32(reg)) => {
      
        384
                        pending_reg_arg_moves.push((ArmOpcode::MovReg, PhysReg::Gp32(reg), arg_vreg));
      
        385
                    }
      
        386
                    (Some(RegClass::Gp64), AbiArgLoc::Gp(reg)) => {
      
        387
                        pending_reg_arg_moves.push((ArmOpcode::MovReg, PhysReg::Gp(reg), arg_vreg));
      
        388
                    }
      
        389
                    (Some(class), AbiArgLoc::Stack(stack_offset)) => {
      
        390
                        emit_store_stack_arg_from_vreg(mf, mb, arg_vreg, class, &arg_ty, stack_offset);
      
        391
                    }
      
        392
                    (Some(class), other) => {
      
        393
                        panic!(
      
        394
                            "isel: unexpected ABI loc {:?} for outgoing {:?} arg",
      
        395
                            other, class
      
        396
                        );
      
        397
                    }
      
        398
                    (None, _) => {
      
        399
                        panic!("isel: call arg vreg class missing for %{}", arg_val.0);
      
        400
                    }
      
        401
                }
      
        402
            }
      
        403
        
        404
            for (opcode, dst, src) in pending_reg_arg_moves {
      
        405
                mf.block_mut(mb).insts.push(MachineInst {
      
        406
                    opcode,
      
        407
                    operands: vec![MachineOperand::PhysReg(dst), MachineOperand::VReg(src)],
      
        408
                    def: None,
      
        409
                });
      
        410
            }
      
        411
        
        412
            if let Some(target) = indirect_target {
      
        413
                mf.block_mut(mb).insts.push(MachineInst {
      
        414
                    opcode: ArmOpcode::Blr,
      
        415
                    operands: vec![MachineOperand::VReg(ctx.lookup_vreg(target))],
      
        416
                    def: None,
      
        417
                });
      
        418
            } else {
      
        419
                mf.block_mut(mb).insts.push(MachineInst {
      
        420
                    opcode: ArmOpcode::Bl,
      
        421
                    operands: vec![MachineOperand::Extern(label)],
      
        422
                    def: None,
      
        423
                });
      
        424
            }
      
        425
        
        426
            if matches!(inst.ty, IrType::Int(IntWidth::I128)) {
      
        427
                let dest_slot = ctx.lookup_wide_slot(inst.id);
      
        428
                emit_store_phys_i128_pair(
      
        429
                    mf,
      
        430
                    mb,
      
        431
                    MachineOperand::PhysReg(PhysReg::FP),
      
        432
                    dest_slot as i64,
      
        433
                    PhysReg::Gp(0),
      
        434
                    PhysReg::Gp(1),
      
        435
                );
      
        436
            } else if inst.ty != IrType::Void {
      
        437
                let class = type_to_reg_class(&inst.ty);
      
        438
                let dest = ctx.get_vreg(mf, inst.id, class);
      
        439
                let (src_reg, opcode) = match class {
      
        440
                    RegClass::Fp64 => (PhysReg::Fp(0), ArmOpcode::FmovReg),
      
        441
                    RegClass::Fp32 => (PhysReg::Fp32(0), ArmOpcode::FmovReg),
      
        442
                    RegClass::V128 => (PhysReg::Fp(0), ArmOpcode::FmovReg),
      
        443
                    RegClass::Gp32 => (PhysReg::Gp32(0), ArmOpcode::MovReg),
      
        444
                    RegClass::Gp64 => (PhysReg::Gp(0), ArmOpcode::MovReg),
      
        445
                };
      
        446
                mf.block_mut(mb).insts.push(MachineInst {
      
        447
                    opcode,
      
        448
                    operands: vec![MachineOperand::VReg(dest), MachineOperand::PhysReg(src_reg)],
      
        449
                    def: Some(dest),
      
        450
                });
      
        451
            } else {
      
        452
                ctx.get_vreg(mf, inst.id, RegClass::Gp64);
      
        453
            }
      
        454
        }
      
        455
        
        456
        /// Instruction selection context.
      
        457
        struct ISelCtx {
      
        458
            /// IR ValueId → MIR VRegId.
      
        459
            value_map: HashMap<ValueId, VRegId>,
      
        460
            /// IR wide scalar ValueId → stack slot offset used as its backing store.
      
        461
            wide_value_slots: HashMap<ValueId, i32>,
      
        462
            /// IR BlockId → MIR MBlockId.
      
        463
            block_map: HashMap<BlockId, MBlockId>,
      
        464
            /// IR alloca ValueId → stack frame offset.
      
        465
            alloca_offsets: HashMap<ValueId, i32>,
      
        466
            /// IR BlockId → its block params. Snapshotted before phase 4b
      
        467
            /// so terminator selection can read each target's params
      
        468
            /// without re-borrowing the function while &mut MachineFunction
      
        469
            /// is held. Cloning just the param vec is dramatically cheaper
      
        470
            /// than cloning the whole BasicBlock — instructions can be in
      
        471
            /// the thousands, params are typically 0-3.
      
        472
            block_params: HashMap<BlockId, Vec<BlockParam>>,
      
        473
            /// ICmp/FCmp ValueIds that are exclusively consumed by a Select in
      
        474
            /// the same block with no intervening flag-clobbering instruction.
      
        475
            /// For these, we suppress CSET during ICmp lowering and use the
      
        476
            /// flags directly from the CMP in the CSEL.
      
        477
            select_fused: HashSet<ValueId>,
      
        478
            /// For each fused ICmp/FCmp, the ARM condition code to use in the
      
        479
            /// CSEL (determined at the time we suppress the CSET).
      
        480
            fused_arm_cond: HashMap<ValueId, ArmCond>,
      
        481
        }
      
        482
        
        483
        impl ISelCtx {
      
        484
            fn new() -> Self {
      
        485
                Self {
      
        486
                    value_map: HashMap::new(),
      
        487
                    wide_value_slots: HashMap::new(),
      
        488
                    block_map: HashMap::new(),
      
        489
                    alloca_offsets: HashMap::new(),
      
        490
                    block_params: HashMap::new(),
      
        491
                    select_fused: HashSet::new(),
      
        492
                    fused_arm_cond: HashMap::new(),
      
        493
                }
      
        494
            }
      
        495
        
        496
            /// Get the vreg for an IR value, or create one if needed.
      
        497
            /// In debug builds, asserts that an existing mapping has the
      
        498
            /// same register class as requested — a class mismatch means
      
        499
            /// Phase 4a (vreg pre-allocation) and Phase 4b (instruction
      
        500
            /// selection) disagree about a value's type, which would
      
        501
            /// silently corrupt code.
      
        502
            fn get_vreg(&mut self, mf: &mut MachineFunction, val: ValueId, class: RegClass) -> VRegId {
      
        503
                if let Some(&vreg) = self.value_map.get(&val) {
      
        504
                    debug_assert!(
      
        505
                        mf.vregs.iter().find(|v| v.id == vreg).map(|v| v.class) == Some(class),
      
        506
                        "isel: vreg class mismatch for IR value %{} (existing class \
      
        507
                         differs from requested {:?}) — phase 4a/4b disagreement",
      
        508
                        val.0,
      
        509
                        class,
      
        510
                    );
      
        511
                    return vreg;
      
        512
                }
      
        513
                let vreg = mf.new_vreg(class);
      
        514
                self.value_map.insert(val, vreg);
      
        515
                vreg
      
        516
            }
      
        517
        
        518
            /// Get the vreg for an IR value, assuming it was already mapped.
      
        519
            fn lookup_vreg(&self, val: ValueId) -> VRegId {
      
        520
                *self.value_map.get(&val).unwrap_or_else(|| {
      
        521
                    panic!(
      
        522
                        "isel: unmapped IR value %{} — phase 4a should have allocated \
      
        523
                         a vreg for every IR value before phase 4b runs. {} values are \
      
        524
                         currently mapped. This usually means a forward reference, \
      
        525
                         a missing block param, or a value defined in an unreachable \
      
        526
                         block.",
      
        527
                        val.0,
      
        528
                        self.value_map.len(),
      
        529
                    )
      
        530
                })
      
        531
            }
      
        532
        
        533
            /// Get machine block for an IR block.
      
        534
            fn lookup_block(&self, block: BlockId) -> MBlockId {
      
        535
                *self.block_map.get(&block).unwrap_or(&MBlockId(0))
      
        536
            }
      
        537
        
        538
            fn lookup_wide_slot(&self, val: ValueId) -> i32 {
      
        539
                *self.wide_value_slots.get(&val).unwrap_or_else(|| {
      
        540
                    panic!(
      
        541
                        "isel: unmapped wide i128 value %{} — phase 4a should have allocated \
      
        542
                         a backing slot for every supported i128 SSA value before phase 4b runs",
      
        543
                        val.0
      
        544
                    )
      
        545
                })
      
        546
            }
      
        547
        }
      
        548
        
        549
        /// Select machine instructions for a single IR instruction.
      
        550
        fn select_inst(
      
        551
            mf: &mut MachineFunction,
      
        552
            ctx: &mut ISelCtx,
      
        553
            mb: MBlockId,
      
        554
            inst: &Inst,
      
        555
            func: &Function,
      
        556
        ) {
      
        557
            if matches!(inst.ty, IrType::Int(IntWidth::I128)) {
      
        558
                match &inst.kind {
      
        559
                    InstKind::ConstInt(val, IntWidth::I128) => {
      
        560
                        let dest_slot = ctx.lookup_wide_slot(inst.id);
      
        561
                        emit_const_i128_to_phys_pair(mf, mb, *val, PhysReg::Gp(16), PhysReg::Gp(17));
      
        562
                        emit_store_phys_i128_pair(
      
        563
                            mf,
      
        564
                            mb,
      
        565
                            MachineOperand::PhysReg(PhysReg::FP),
      
        566
                            dest_slot as i64,
      
        567
                            PhysReg::Gp(16),
      
        568
                            PhysReg::Gp(17),
      
        569
                        );
      
        570
                        return;
      
        571
                    }
      
        572
                    InstKind::Undef(_) => {
      
        573
                        let dest_slot = ctx.lookup_wide_slot(inst.id);
      
        574
                        emit_const_i128_to_phys_pair(mf, mb, 0, PhysReg::Gp(16), PhysReg::Gp(17));
      
        575
                        emit_store_phys_i128_pair(
      
        576
                            mf,
      
        577
                            mb,
      
        578
                            MachineOperand::PhysReg(PhysReg::FP),
      
        579
                            dest_slot as i64,
      
        580
                            PhysReg::Gp(16),
      
        581
                            PhysReg::Gp(17),
      
        582
                        );
      
        583
                        return;
      
        584
                    }
      
        585
                    InstKind::IAdd(a, b) => {
      
        586
                        emit_i128_binop_via_slots(mf, ctx, mb, I128BinOp::Add, inst.id, *a, *b);
      
        587
                        return;
      
        588
                    }
      
        589
                    InstKind::ISub(a, b) => {
      
        590
                        emit_i128_binop_via_slots(mf, ctx, mb, I128BinOp::Sub, inst.id, *a, *b);
      
        591
                        return;
      
        592
                    }
      
        593
                    InstKind::INeg(a) => {
      
        594
                        let dest_slot = ctx.lookup_wide_slot(inst.id);
      
        595
                        let src_slot = ctx.lookup_wide_slot(*a);
      
        596
                        emit_load_phys_i128_pair(
      
        597
                            mf,
      
        598
                            mb,
      
        599
                            MachineOperand::PhysReg(PhysReg::FP),
      
        600
                            src_slot as i64,
      
        601
                            PhysReg::Gp(16),
      
        602
                            PhysReg::Gp(17),
      
        603
                        );
      
        604
                        emit_i128_neg(mf, mb, PhysReg::Gp(16), PhysReg::Gp(17));
      
        605
                        emit_store_phys_i128_pair(
      
        606
                            mf,
      
        607
                            mb,
      
        608
                            MachineOperand::PhysReg(PhysReg::FP),
      
        609
                            dest_slot as i64,
      
        610
                            PhysReg::Gp(16),
      
        611
                            PhysReg::Gp(17),
      
        612
                        );
      
        613
                        return;
      
        614
                    }
      
        615
                    InstKind::Load(addr) => {
      
        616
                        let dest_slot = ctx.lookup_wide_slot(inst.id);
      
        617
                        if let Some(&offset) = ctx.alloca_offsets.get(addr) {
      
        618
                            emit_load_phys_i128_pair(
      
        619
                                mf,
      
        620
                                mb,
      
        621
                                MachineOperand::PhysReg(PhysReg::FP),
      
        622
                                offset as i64,
      
        623
                                PhysReg::Gp(16),
      
        624
                                PhysReg::Gp(17),
      
        625
                            );
      
        626
                        } else {
      
        627
                            let base = ctx.lookup_vreg(*addr);
      
        628
                            emit_load_phys_i128_pair(
      
        629
                                mf,
      
        630
                                mb,
      
        631
                                MachineOperand::VReg(base),
      
        632
                                0,
      
        633
                                PhysReg::Gp(16),
      
        634
                                PhysReg::Gp(17),
      
        635
                            );
      
        636
                        }
      
        637
                        emit_store_phys_i128_pair(
      
        638
                            mf,
      
        639
                            mb,
      
        640
                            MachineOperand::PhysReg(PhysReg::FP),
      
        641
                            dest_slot as i64,
      
        642
                            PhysReg::Gp(16),
      
        643
                            PhysReg::Gp(17),
      
        644
                        );
      
        645
                        return;
      
        646
                    }
      
        647
                    InstKind::Select(cond, tv, fv) => {
      
        648
                        let arm_cond = if let Some(&fused_cond) = ctx.fused_arm_cond.get(cond) {
      
        649
                            fused_cond
      
        650
                        } else {
      
        651
                            let cond_reg = ctx.lookup_vreg(*cond);
      
        652
                            mf.block_mut(mb).insts.push(MachineInst {
      
        653
                                opcode: ArmOpcode::CmpImm,
      
        654
                                operands: vec![MachineOperand::VReg(cond_reg), MachineOperand::Imm(0)],
      
        655
                                def: None,
      
        656
                            });
      
        657
                            ArmCond::Ne
      
        658
                        };
      
        659
                        let dest_slot = ctx.lookup_wide_slot(inst.id);
      
        660
                        let true_slot = ctx.lookup_wide_slot(*tv);
      
        661
                        let false_slot = ctx.lookup_wide_slot(*fv);
      
        662
                        emit_load_phys_i128_pair(
      
        663
                            mf,
      
        664
                            mb,
      
        665
                            MachineOperand::PhysReg(PhysReg::FP),
      
        666
                            true_slot as i64,
      
        667
                            PhysReg::Gp(16),
      
        668
                            PhysReg::Gp(17),
      
        669
                        );
      
        670
                        emit_load_phys_i128_pair(
      
        671
                            mf,
      
        672
                            mb,
      
        673
                            MachineOperand::PhysReg(PhysReg::FP),
      
        674
                            false_slot as i64,
      
        675
                            PhysReg::Gp(8),
      
        676
                            PhysReg::Gp(9),
      
        677
                        );
      
        678
                        mf.block_mut(mb).insts.push(MachineInst {
      
        679
                            opcode: ArmOpcode::CselReg,
      
        680
                            operands: vec![
      
        681
                                MachineOperand::PhysReg(PhysReg::Gp(16)),
      
        682
                                MachineOperand::PhysReg(PhysReg::Gp(16)),
      
        683
                                MachineOperand::PhysReg(PhysReg::Gp(8)),
      
        684
                                MachineOperand::Cond(arm_cond),
      
        685
                            ],
      
        686
                            def: None,
      
        687
                        });
      
        688
                        mf.block_mut(mb).insts.push(MachineInst {
      
        689
                            opcode: ArmOpcode::CselReg,
      
        690
                            operands: vec![
      
        691
                                MachineOperand::PhysReg(PhysReg::Gp(17)),
      
        692
                                MachineOperand::PhysReg(PhysReg::Gp(17)),
      
        693
                                MachineOperand::PhysReg(PhysReg::Gp(9)),
      
        694
                                MachineOperand::Cond(arm_cond),
      
        695
                            ],
      
        696
                            def: None,
      
        697
                        });
      
        698
                        emit_store_phys_i128_pair(
      
        699
                            mf,
      
        700
                            mb,
      
        701
                            MachineOperand::PhysReg(PhysReg::FP),
      
        702
                            dest_slot as i64,
      
        703
                            PhysReg::Gp(16),
      
        704
                            PhysReg::Gp(17),
      
        705
                        );
      
        706
                        return;
      
        707
                    }
      
        708
                    InstKind::Call(..) => {
      
        709
                        select_call_inst(mf, ctx, mb, inst, func);
      
        710
                        return;
      
        711
                    }
      
        712
                    _ => {
      
        713
                        panic!(
      
        714
                            "isel: unsupported i128 instruction reached backend despite gating: {:?}",
      
        715
                            inst.kind
      
        716
                        );
      
        717
                    }
      
        718
                }
      
        719
            }
      
        720
        
        721
            match &inst.kind {
      
        722
                // ---- Constants ----
      
        723
                InstKind::ConstInt(val, width) => {
      
        724
                    let class = int_width_class(width);
      
        725
                    let dest = ctx.get_vreg(mf, inst.id, class);
      
        726
                    emit_const_int(mf, mb, dest, *val, *width);
      
        727
                }
      
        728
        
        729
                InstKind::ConstFloat(val, width) => {
      
        730
                    let class = float_width_class(width);
      
        731
                    let dest = ctx.get_vreg(mf, inst.id, class);
      
        732
                    let cp_idx = match width {
      
        733
                        FloatWidth::F32 => mf.add_const(ConstPoolEntry::F32(*val as f32)),
      
        734
                        FloatWidth::F64 => mf.add_const(ConstPoolEntry::F64(*val)),
      
        735
                    };
      
        736
                    // ADRP + LDR from constant pool.
      
        737
                    mf.block_mut(mb).insts.push(MachineInst {
      
        738
                        opcode: ArmOpcode::AdrpLdr,
      
        739
                        operands: vec![
      
        740
                            MachineOperand::VReg(dest),
      
        741
                            MachineOperand::ConstPool(cp_idx),
      
        742
                        ],
      
        743
                        def: Some(dest),
      
        744
                    });
      
        745
                }
      
        746
        
        747
                InstKind::ConstBool(val) => {
      
        748
                    let dest = ctx.get_vreg(mf, inst.id, RegClass::Gp32);
      
        749
                    emit_const_int(mf, mb, dest, if *val { 1 } else { 0 }, IntWidth::I32);
      
        750
                }
      
        751
        
        752
                InstKind::ConstString(bytes) => {
      
        753
                    let dest = ctx.get_vreg(mf, inst.id, RegClass::Gp64);
      
        754
                    let cp_idx = mf.add_const(ConstPoolEntry::Bytes(bytes.clone()));
      
        755
                    // Use ADRP+ADD to compute the address (not ADRP+LDR which loads the value).
      
        756
                    mf.block_mut(mb).insts.push(MachineInst {
      
        757
                        opcode: ArmOpcode::AdrpAdd,
      
        758
                        operands: vec![
      
        759
                            MachineOperand::VReg(dest),
      
        760
                            MachineOperand::ConstPool(cp_idx),
      
        761
                        ],
      
        762
                        def: Some(dest),
      
        763
                    });
      
        764
                }
      
        765
        
        766
                InstKind::Undef(_) => {
      
        767
                    // Emit a deterministic zero instead of leaving the vreg
      
        768
                    // undefined. A truly undefined vreg lets the register
      
        769
                    // allocator hand us whatever physical register is free,
      
        770
                    // and that register's stale contents leak into reads —
      
        771
                    // which makes optimization-level diffs nondeterministic
      
        772
                    // and turns "undef ⇒ anything" into "undef ⇒ whatever
      
        773
                    // happened to be in x14 at this point in the program."
      
        774
                    //
      
        775
                    // mem2reg synthesizes Undef as the initial value of a
      
        776
                    // promoted slot before any store. The Fortran semantics
      
        777
                    // for reading uninitialized storage are undefined, but
      
        778
                    // a hard zero is at least reproducible across opt
      
        779
                    // levels and friendly to debuggers.
      
        780
                    let class = type_to_reg_class(&inst.ty);
      
        781
                    let dest = ctx.get_vreg(mf, inst.id, class);
      
        782
                    match class {
      
        783
                        RegClass::Gp32 => {
      
        784
                            mf.block_mut(mb).insts.push(MachineInst {
      
        785
                                opcode: ArmOpcode::MovReg,
      
        786
                                operands: vec![
      
        787
                                    MachineOperand::VReg(dest),
      
        788
                                    MachineOperand::PhysReg(PhysReg::Wzr),
      
        789
                                ],
      
        790
                                def: Some(dest),
      
        791
                            });
      
        792
                        }
      
        793
                        RegClass::Gp64 => {
      
        794
                            mf.block_mut(mb).insts.push(MachineInst {
      
        795
                                opcode: ArmOpcode::MovReg,
      
        796
                                operands: vec![
      
        797
                                    MachineOperand::VReg(dest),
      
        798
                                    MachineOperand::PhysReg(PhysReg::Xzr),
      
        799
                                ],
      
        800
                                def: Some(dest),
      
        801
                            });
      
        802
                        }
      
        803
                        RegClass::Fp32 => {
      
        804
                            let cp_idx = mf.add_const(ConstPoolEntry::F32(0.0));
      
        805
                            mf.block_mut(mb).insts.push(MachineInst {
      
        806
                                opcode: ArmOpcode::AdrpLdr,
      
        807
                                operands: vec![
      
        808
                                    MachineOperand::VReg(dest),
      
        809
                                    MachineOperand::ConstPool(cp_idx),
      
        810
                                ],
      
        811
                                def: Some(dest),
      
        812
                            });
      
        813
                        }
      
        814
                        RegClass::Fp64 => {
      
        815
                            let cp_idx = mf.add_const(ConstPoolEntry::F64(0.0));
      
        816
                            mf.block_mut(mb).insts.push(MachineInst {
      
        817
                                opcode: ArmOpcode::AdrpLdr,
      
        818
                                operands: vec![
      
        819
                                    MachineOperand::VReg(dest),
      
        820
                                    MachineOperand::ConstPool(cp_idx),
      
        821
                                ],
      
        822
                                def: Some(dest),
      
        823
                            });
      
        824
                        }
      
        825
                        RegClass::V128 => {
      
        826
                            // Sprint 12 Stage 1 reserves the type/instr; no
      
        827
                            // path produces a V128 Undef yet. Bail rather
      
        828
                            // than emit a half-baked NEON zero — when the
      
        829
                            // vectorizer arrives it will have its own
      
        830
                            // VBroadcast(const 0) lowering.
      
        831
                            unreachable!("V128 Undef emission not implemented (Sprint 12 Stage 4 work)");
      
        832
                        }
      
        833
                    }
      
        834
                }
      
        835
        
        836
                // ---- Integer arithmetic ----
      
        837
                InstKind::IAdd(a, b) => emit_binop(mf, ctx, mb, inst, ArmOpcode::AddReg, *a, *b),
      
        838
                InstKind::ISub(a, b) => emit_binop(mf, ctx, mb, inst, ArmOpcode::SubReg, *a, *b),
      
        839
                InstKind::IMul(a, b) => emit_binop(mf, ctx, mb, inst, ArmOpcode::Mul, *a, *b),
      
        840
                InstKind::IDiv(a, b) => emit_binop(mf, ctx, mb, inst, ArmOpcode::Sdiv, *a, *b),
      
        841
                InstKind::IMod(a, b) => {
      
        842
                    // imod = a - (a / b) * b → SDIV + MSUB
      
        843
                    let class = type_to_reg_class(&inst.ty);
      
        844
                    let dest = ctx.get_vreg(mf, inst.id, class);
      
        845
                    let va = ctx.lookup_vreg(*a);
      
        846
                    let vb = ctx.lookup_vreg(*b);
      
        847
                    let tmp = mf.new_vreg(class);
      
        848
                    // tmp = sdiv a, b
      
        849
                    mf.block_mut(mb).insts.push(MachineInst {
      
        850
                        opcode: ArmOpcode::Sdiv,
      
        851
                        operands: vec![
      
        852
                            MachineOperand::VReg(tmp),
      
        853
                            MachineOperand::VReg(va),
      
        854
                            MachineOperand::VReg(vb),
      
        855
                        ],
      
        856
                        def: Some(tmp),
      
        857
                    });
      
        858
                    // dest = msub tmp, vb, va → va - tmp * vb = a - (a/b)*b
      
        859
                    mf.block_mut(mb).insts.push(MachineInst {
      
        860
                        opcode: ArmOpcode::Msub,
      
        861
                        operands: vec![
      
        862
                            MachineOperand::VReg(dest),
      
        863
                            MachineOperand::VReg(tmp),
      
        864
                            MachineOperand::VReg(vb),
      
        865
                            MachineOperand::VReg(va),
      
        866
                        ],
      
        867
                        def: Some(dest),
      
        868
                    });
      
        869
                }
      
        870
                InstKind::INeg(a) => {
      
        871
                    let class = type_to_reg_class(&inst.ty);
      
        872
                    let dest = ctx.get_vreg(mf, inst.id, class);
      
        873
                    let va = ctx.lookup_vreg(*a);
      
        874
                    mf.block_mut(mb).insts.push(MachineInst {
      
        875
                        opcode: ArmOpcode::Neg,
      
        876
                        operands: vec![MachineOperand::VReg(dest), MachineOperand::VReg(va)],
      
        877
                        def: Some(dest),
      
        878
                    });
      
        879
                }
      
        880
        
        881
                // ---- Float arithmetic ----
      
        882
                InstKind::FAdd(a, b) => emit_float_binop(
      
        883
                    mf,
      
        884
                    ctx,
      
        885
                    mb,
      
        886
                    inst,
      
        887
                    &inst.ty,
      
        888
                    *a,
      
        889
                    *b,
      
        890
                    ArmOpcode::FaddS,
      
        891
                    ArmOpcode::FaddD,
      
        892
                ),
      
        893
                InstKind::FSub(a, b) => emit_float_binop(
      
        894
                    mf,
      
        895
                    ctx,
      
        896
                    mb,
      
        897
                    inst,
      
        898
                    &inst.ty,
      
        899
                    *a,
      
        900
                    *b,
      
        901
                    ArmOpcode::FsubS,
      
        902
                    ArmOpcode::FsubD,
      
        903
                ),
      
        904
                InstKind::FMul(a, b) => emit_float_binop(
      
        905
                    mf,
      
        906
                    ctx,
      
        907
                    mb,
      
        908
                    inst,
      
        909
                    &inst.ty,
      
        910
                    *a,
      
        911
                    *b,
      
        912
                    ArmOpcode::FmulS,
      
        913
                    ArmOpcode::FmulD,
      
        914
                ),
      
        915
                InstKind::FDiv(a, b) => emit_float_binop(
      
        916
                    mf,
      
        917
                    ctx,
      
        918
                    mb,
      
        919
                    inst,
      
        920
                    &inst.ty,
      
        921
                    *a,
      
        922
                    *b,
      
        923
                    ArmOpcode::FdivS,
      
        924
                    ArmOpcode::FdivD,
      
        925
                ),
      
        926
                InstKind::FNeg(a) => {
      
        927
                    let (class, opcode) = match &inst.ty {
      
        928
                        IrType::Float(FloatWidth::F32) => (RegClass::Fp32, ArmOpcode::FnegS),
      
        929
                        _ => (RegClass::Fp64, ArmOpcode::FnegD),
      
        930
                    };
      
        931
                    let dest = ctx.get_vreg(mf, inst.id, class);
      
        932
                    let va = ctx.lookup_vreg(*a);
      
        933
                    mf.block_mut(mb).insts.push(MachineInst {
      
        934
                        opcode,
      
        935
                        operands: vec![MachineOperand::VReg(dest), MachineOperand::VReg(va)],
      
        936
                        def: Some(dest),
      
        937
                    });
      
        938
                }
      
        939
                InstKind::FPow(a, b) => {
      
        940
                    let class = type_to_reg_class(&inst.ty);
      
        941
                    let dest = ctx.get_vreg(mf, inst.id, class);
      
        942
                    let va = ctx.lookup_vreg(*a);
      
        943
                    let vb = ctx.lookup_vreg(*b);
      
        944
                    let (func_name, arg0, arg1, ret) = match &inst.ty {
      
        945
                        IrType::Float(FloatWidth::F32) => {
      
        946
                            ("powf", PhysReg::Fp32(0), PhysReg::Fp32(1), PhysReg::Fp32(0))
      
        947
                        }
      
        948
                        _ => ("pow", PhysReg::Fp(0), PhysReg::Fp(1), PhysReg::Fp(0)),
      
        949
                    };
      
        950
                    mf.block_mut(mb).insts.push(MachineInst {
      
        951
                        opcode: ArmOpcode::FmovReg,
      
        952
                        operands: vec![MachineOperand::PhysReg(arg0), MachineOperand::VReg(va)],
      
        953
                        def: None,
      
        954
                    });
      
        955
                    mf.block_mut(mb).insts.push(MachineInst {
      
        956
                        opcode: ArmOpcode::FmovReg,
      
        957
                        operands: vec![MachineOperand::PhysReg(arg1), MachineOperand::VReg(vb)],
      
        958
                        def: None,
      
        959
                    });
      
        960
                    mf.block_mut(mb).insts.push(MachineInst {
      
        961
                        opcode: ArmOpcode::Bl,
      
        962
                        operands: vec![MachineOperand::Extern(func_name.into())],
      
        963
                        def: None,
      
        964
                    });
      
        965
                    mf.block_mut(mb).insts.push(MachineInst {
      
        966
                        opcode: ArmOpcode::FmovReg,
      
        967
                        operands: vec![MachineOperand::VReg(dest), MachineOperand::PhysReg(ret)],
      
        968
                        def: Some(dest),
      
        969
                    });
      
        970
                }
      
        971
        
        972
                // ---- Comparisons ----
      
        973
                InstKind::ICmp(op, a, b) => {
      
        974
                    if matches!(func.value_type(*a), Some(IrType::Int(IntWidth::I128)))
      
        975
                        || matches!(func.value_type(*b), Some(IrType::Int(IntWidth::I128)))
      
        976
                    {
      
        977
                        let dest = ctx.get_vreg(mf, inst.id, RegClass::Gp32);
      
        978
                        let lhs_slot = ctx.lookup_wide_slot(*a);
      
        979
                        let rhs_slot = ctx.lookup_wide_slot(*b);
      
        980
                        emit_load_phys_i128_pair(
      
        981
                            mf,
      
        982
                            mb,
      
        983
                            MachineOperand::PhysReg(PhysReg::FP),
      
        984
                            lhs_slot as i64,
      
        985
                            PhysReg::Gp(16),
      
        986
                            PhysReg::Gp(17),
      
        987
                        );
      
        988
                        emit_load_phys_i128_pair(
      
        989
                            mf,
      
        990
                            mb,
      
        991
                            MachineOperand::PhysReg(PhysReg::FP),
      
        992
                            rhs_slot as i64,
      
        993
                            PhysReg::Gp(8),
      
        994
                            PhysReg::Gp(9),
      
        995
                        );
      
        996
                        match op {
      
        997
                            CmpOp::Eq | CmpOp::Ne => {
      
        998
                                mf.block_mut(mb).insts.push(MachineInst {
      
        999
                                    opcode: ArmOpcode::CmpReg,
      
        1000
                                    operands: vec![
      
        1001
                                        MachineOperand::PhysReg(PhysReg::Gp(16)),
      
        1002
                                        MachineOperand::PhysReg(PhysReg::Gp(8)),
      
        1003
                                    ],
      
        1004
                                    def: None,
      
        1005
                                });
      
        1006
                                mf.block_mut(mb).insts.push(MachineInst {
      
        1007
                                    opcode: ArmOpcode::Cset,
      
        1008
                                    operands: vec![
      
        1009
                                        MachineOperand::PhysReg(PhysReg::Gp32(10)),
      
        1010
                                        MachineOperand::Cond(cmp_to_arm_cond(*op)),
      
        1011
                                    ],
      
        1012
                                    def: None,
      
        1013
                                });
      
        1014
                                mf.block_mut(mb).insts.push(MachineInst {
      
        1015
                                    opcode: ArmOpcode::CmpReg,
      
        1016
                                    operands: vec![
      
        1017
                                        MachineOperand::PhysReg(PhysReg::Gp(17)),
      
        1018
                                        MachineOperand::PhysReg(PhysReg::Gp(9)),
      
        1019
                                    ],
      
        1020
                                    def: None,
      
        1021
                                });
      
        1022
                                mf.block_mut(mb).insts.push(MachineInst {
      
        1023
                                    opcode: ArmOpcode::Cset,
      
        1024
                                    operands: vec![
      
        1025
                                        MachineOperand::PhysReg(PhysReg::Gp32(11)),
      
        1026
                                        MachineOperand::Cond(cmp_to_arm_cond(*op)),
      
        1027
                                    ],
      
        1028
                                    def: None,
      
        1029
                                });
      
        1030
                                let combine = match op {
      
        1031
                                    CmpOp::Eq => ArmOpcode::AndReg,
      
        1032
                                    CmpOp::Ne => ArmOpcode::OrrReg,
      
        1033
                                    _ => unreachable!(),
      
        1034
                                };
      
        1035
                                mf.block_mut(mb).insts.push(MachineInst {
      
        1036
                                    opcode: combine,
      
        1037
                                    operands: vec![
      
        1038
                                        MachineOperand::PhysReg(PhysReg::Gp32(10)),
      
        1039
                                        MachineOperand::PhysReg(PhysReg::Gp32(10)),
      
        1040
                                        MachineOperand::PhysReg(PhysReg::Gp32(11)),
      
        1041
                                    ],
      
        1042
                                    def: None,
      
        1043
                                });
      
        1044
                            }
      
        1045
                            CmpOp::Lt | CmpOp::Le | CmpOp::Gt | CmpOp::Ge => {
      
        1046
                                let (hi_cond, lo_cond) = i128_ordered_conds(*op);
      
        1047
                                mf.block_mut(mb).insts.push(MachineInst {
      
        1048
                                    opcode: ArmOpcode::CmpReg,
      
        1049
                                    operands: vec![
      
        1050
                                        MachineOperand::PhysReg(PhysReg::Gp(17)),
      
        1051
                                        MachineOperand::PhysReg(PhysReg::Gp(9)),
      
        1052
                                    ],
      
        1053
                                    def: None,
      
        1054
                                });
      
        1055
                                mf.block_mut(mb).insts.push(MachineInst {
      
        1056
                                    opcode: ArmOpcode::Cset,
      
        1057
                                    operands: vec![
      
        1058
                                        MachineOperand::PhysReg(PhysReg::Gp32(10)),
      
        1059
                                        MachineOperand::Cond(hi_cond),
      
        1060
                                    ],
      
        1061
                                    def: None,
      
        1062
                                });
      
        1063
                                mf.block_mut(mb).insts.push(MachineInst {
      
        1064
                                    opcode: ArmOpcode::Cset,
      
        1065
                                    operands: vec![
      
        1066
                                        MachineOperand::PhysReg(PhysReg::Gp32(11)),
      
        1067
                                        MachineOperand::Cond(ArmCond::Eq),
      
        1068
                                    ],
      
        1069
                                    def: None,
      
        1070
                                });
      
        1071
                                mf.block_mut(mb).insts.push(MachineInst {
      
        1072
                                    opcode: ArmOpcode::CmpReg,
      
        1073
                                    operands: vec![
      
        1074
                                        MachineOperand::PhysReg(PhysReg::Gp(16)),
      
        1075
                                        MachineOperand::PhysReg(PhysReg::Gp(8)),
      
        1076
                                    ],
      
        1077
                                    def: None,
      
        1078
                                });
      
        1079
                                mf.block_mut(mb).insts.push(MachineInst {
      
        1080
                                    opcode: ArmOpcode::Cset,
      
        1081
                                    operands: vec![
      
        1082
                                        MachineOperand::PhysReg(PhysReg::Gp32(8)),
      
        1083
                                        MachineOperand::Cond(lo_cond),
      
        1084
                                    ],
      
        1085
                                    def: None,
      
        1086
                                });
      
        1087
                                mf.block_mut(mb).insts.push(MachineInst {
      
        1088
                                    opcode: ArmOpcode::AndReg,
      
        1089
                                    operands: vec![
      
        1090
                                        MachineOperand::PhysReg(PhysReg::Gp32(11)),
      
        1091
                                        MachineOperand::PhysReg(PhysReg::Gp32(11)),
      
        1092
                                        MachineOperand::PhysReg(PhysReg::Gp32(8)),
      
        1093
                                    ],
      
        1094
                                    def: None,
      
        1095
                                });
      
        1096
                                mf.block_mut(mb).insts.push(MachineInst {
      
        1097
                                    opcode: ArmOpcode::OrrReg,
      
        1098
                                    operands: vec![
      
        1099
                                        MachineOperand::PhysReg(PhysReg::Gp32(10)),
      
        1100
                                        MachineOperand::PhysReg(PhysReg::Gp32(10)),
      
        1101
                                        MachineOperand::PhysReg(PhysReg::Gp32(11)),
      
        1102
                                    ],
      
        1103
                                    def: None,
      
        1104
                                });
      
        1105
                            }
      
        1106
                        }
      
        1107
                        mf.block_mut(mb).insts.push(MachineInst {
      
        1108
                            opcode: ArmOpcode::MovReg,
      
        1109
                            operands: vec![
      
        1110
                                MachineOperand::VReg(dest),
      
        1111
                                MachineOperand::PhysReg(PhysReg::Gp32(10)),
      
        1112
                            ],
      
        1113
                            def: Some(dest),
      
        1114
                        });
      
        1115
                        return;
      
        1116
                    }
      
        1117
        
        1118
                    let dest = ctx.get_vreg(mf, inst.id, RegClass::Gp32);
      
        1119
                    let va = icmp_operand_vreg(mf, ctx, mb, func, *a, *b);
      
        1120
                    let vb = icmp_operand_vreg(mf, ctx, mb, func, *b, *a);
      
        1121
                    mf.block_mut(mb).insts.push(MachineInst {
      
        1122
                        opcode: ArmOpcode::CmpReg,
      
        1123
                        operands: vec![MachineOperand::VReg(va), MachineOperand::VReg(vb)],
      
        1124
                        def: None,
      
        1125
                    });
      
        1126
                    // If this ICmp feeds exclusively into a Select (detected in the
      
        1127
                    // pre-pass), suppress CSET. The Select will use the flags directly.
      
        1128
                    if !ctx.select_fused.contains(&inst.id) {
      
        1129
                        mf.block_mut(mb).insts.push(MachineInst {
      
        1130
                            opcode: ArmOpcode::Cset,
      
        1131
                            operands: vec![
      
        1132
                                MachineOperand::VReg(dest),
      
        1133
                                MachineOperand::Cond(cmp_to_arm_cond(*op)),
      
        1134
                            ],
      
        1135
                            def: Some(dest),
      
        1136
                        });
      
        1137
                    }
      
        1138
                }
      
        1139
                InstKind::FCmp(op, a, b) => {
      
        1140
                    let dest = ctx.get_vreg(mf, inst.id, RegClass::Gp32);
      
        1141
                    let va = ctx.lookup_vreg(*a);
      
        1142
                    let vb = ctx.lookup_vreg(*b);
      
        1143
                    mf.block_mut(mb).insts.push(MachineInst {
      
        1144
                        opcode: ArmOpcode::FCmpReg,
      
        1145
                        operands: vec![MachineOperand::VReg(va), MachineOperand::VReg(vb)],
      
        1146
                        def: None,
      
        1147
                    });
      
        1148
                    if !ctx.select_fused.contains(&inst.id) {
      
        1149
                        mf.block_mut(mb).insts.push(MachineInst {
      
        1150
                            opcode: ArmOpcode::FCset,
      
        1151
                            operands: vec![
      
        1152
                                MachineOperand::VReg(dest),
      
        1153
                                MachineOperand::Cond(fcmp_to_arm_cond(*op)),
      
        1154
                            ],
      
        1155
                            def: Some(dest),
      
        1156
                        });
      
        1157
                    }
      
        1158
                }
      
        1159
        
        1160
                // ---- Logic ----
      
        1161
                InstKind::And(a, b) => emit_binop(mf, ctx, mb, inst, ArmOpcode::AndReg, *a, *b),
      
        1162
                InstKind::Or(a, b) => emit_binop(mf, ctx, mb, inst, ArmOpcode::OrrReg, *a, *b),
      
        1163
                InstKind::Not(a) => {
      
        1164
                    // Logical NOT: CMP src, #0; CSET dest, EQ
      
        1165
                    // If src == 0 (false), EQ is true → dest = 1 (true).
      
        1166
                    // If src != 0 (true), EQ is false → dest = 0 (false).
      
        1167
                    // This correctly handles any truthy value, not just 0/1.
      
        1168
                    let dest = ctx.get_vreg(mf, inst.id, RegClass::Gp32);
      
        1169
                    let va = ctx.lookup_vreg(*a);
      
        1170
                    mf.block_mut(mb).insts.push(MachineInst {
      
        1171
                        opcode: ArmOpcode::CmpImm,
      
        1172
                        operands: vec![MachineOperand::VReg(va), MachineOperand::Imm(0)],
      
        1173
                        def: None,
      
        1174
                    });
      
        1175
                    mf.block_mut(mb).insts.push(MachineInst {
      
        1176
                        opcode: ArmOpcode::Cset,
      
        1177
                        operands: vec![
      
        1178
                            MachineOperand::VReg(dest),
      
        1179
                            MachineOperand::Cond(ArmCond::Eq),
      
        1180
                        ],
      
        1181
                        def: Some(dest),
      
        1182
                    });
      
        1183
                }
      
        1184
        
        1185
                // ---- Select (CSEL) ----
      
        1186
                //
      
        1187
                // Fast path: if the condition was produced by an ICmp/FCmp in the
      
        1188
                // same block with no other users, the pre-pass marked it as fused.
      
        1189
                // We already emitted `CMP a, b` (no CSET), so the flags are live.
      
        1190
                // Use them directly: `CSEL dest, tv, fv, <arm_cond>`.
      
        1191
                //
      
        1192
                // Slow path (unfused): the condition is an arbitrary boolean in a
      
        1193
                // register. Materialize with `CMP cond, #0; CSEL dest, tv, fv, NE`.
      
        1194
                InstKind::Select(cond, tv, fv) => {
      
        1195
                    let class = type_to_reg_class(&inst.ty);
      
        1196
                    let dest = ctx.get_vreg(mf, inst.id, class);
      
        1197
                    let true_reg = coerce_select_operand_vreg(mf, ctx, mb, func, *tv, &inst.ty);
      
        1198
                    let false_reg = coerce_select_operand_vreg(mf, ctx, mb, func, *fv, &inst.ty);
      
        1199
        
        1200
                    let arm_cond = if let Some(&fused_cond) = ctx.fused_arm_cond.get(cond) {
      
        1201
                        // Flags already set by the fused CMP — no extra compare needed.
      
        1202
                        fused_cond
      
        1203
                    } else {
      
        1204
                        // Unfused: compare the boolean register against 0.
      
        1205
                        let cond_reg = ctx.lookup_vreg(*cond);
      
        1206
                        mf.block_mut(mb).insts.push(MachineInst {
      
        1207
                            opcode: ArmOpcode::CmpImm,
      
        1208
                            operands: vec![MachineOperand::VReg(cond_reg), MachineOperand::Imm(0)],
      
        1209
                            def: None,
      
        1210
                        });
      
        1211
                        ArmCond::Ne
      
        1212
                    };
      
        1213
        
        1214
                    let opcode = if class == RegClass::Fp32 || class == RegClass::Fp64 {
      
        1215
                        ArmOpcode::FcselReg
      
        1216
                    } else {
      
        1217
                        ArmOpcode::CselReg
      
        1218
                    };
      
        1219
                    mf.block_mut(mb).insts.push(MachineInst {
      
        1220
                        opcode,
      
        1221
                        operands: vec![
      
        1222
                            MachineOperand::VReg(dest),
      
        1223
                            MachineOperand::VReg(true_reg),
      
        1224
                            MachineOperand::VReg(false_reg),
      
        1225
                            MachineOperand::Cond(arm_cond),
      
        1226
                        ],
      
        1227
                        def: Some(dest),
      
        1228
                    });
      
        1229
                }
      
        1230
        
        1231
                // ---- Float: fabs, fsqrt ----
      
        1232
                InstKind::FAbs(a) => {
      
        1233
                    let src = ctx.lookup_vreg(*a);
      
        1234
                    let class = type_to_reg_class(&inst.ty);
      
        1235
                    let dest = ctx.get_vreg(mf, inst.id, class);
      
        1236
                    let opcode = if class == RegClass::Fp64 {
      
        1237
                        ArmOpcode::FabsD
      
        1238
                    } else {
      
        1239
                        ArmOpcode::FabsS
      
        1240
                    };
      
        1241
                    mf.block_mut(mb).insts.push(MachineInst {
      
        1242
                        opcode,
      
        1243
                        operands: vec![MachineOperand::VReg(dest), MachineOperand::VReg(src)],
      
        1244
                        def: Some(dest),
      
        1245
                    });
      
        1246
                }
      
        1247
                InstKind::FSqrt(a) => {
      
        1248
                    let src = ctx.lookup_vreg(*a);
      
        1249
                    let class = type_to_reg_class(&inst.ty);
      
        1250
                    let dest = ctx.get_vreg(mf, inst.id, class);
      
        1251
                    let opcode = if class == RegClass::Fp64 {
      
        1252
                        ArmOpcode::FsqrtD
      
        1253
                    } else {
      
        1254
                        ArmOpcode::FsqrtS
      
        1255
                    };
      
        1256
                    mf.block_mut(mb).insts.push(MachineInst {
      
        1257
                        opcode,
      
        1258
                        operands: vec![MachineOperand::VReg(dest), MachineOperand::VReg(src)],
      
        1259
                        def: Some(dest),
      
        1260
                    });
      
        1261
                }
      
        1262
        
        1263
                // ---- Bitwise ----
      
        1264
                InstKind::BitAnd(a, b) => emit_binop(mf, ctx, mb, inst, ArmOpcode::AndReg, *a, *b),
      
        1265
                InstKind::BitOr(a, b) => emit_binop(mf, ctx, mb, inst, ArmOpcode::OrrReg, *a, *b),
      
        1266
                InstKind::BitXor(a, b) => emit_binop(mf, ctx, mb, inst, ArmOpcode::EorReg, *a, *b),
      
        1267
                InstKind::BitNot(a) => {
      
        1268
                    let src = ctx.lookup_vreg(*a);
      
        1269
                    let class = type_to_reg_class(&inst.ty);
      
        1270
                    let dest = ctx.get_vreg(mf, inst.id, class);
      
        1271
                    mf.block_mut(mb).insts.push(MachineInst {
      
        1272
                        opcode: ArmOpcode::Mvn,
      
        1273
                        operands: vec![MachineOperand::VReg(dest), MachineOperand::VReg(src)],
      
        1274
                        def: Some(dest),
      
        1275
                    });
      
        1276
                }
      
        1277
                InstKind::Shl(a, b) => emit_binop(mf, ctx, mb, inst, ArmOpcode::LslReg, *a, *b),
      
        1278
                InstKind::LShr(a, b) => emit_binop(mf, ctx, mb, inst, ArmOpcode::LsrReg, *a, *b),
      
        1279
                InstKind::AShr(a, b) => emit_binop(mf, ctx, mb, inst, ArmOpcode::AsrReg, *a, *b),
      
        1280
                InstKind::CountLeadingZeros(a) => {
      
        1281
                    let src = ctx.lookup_vreg(*a);
      
        1282
                    let class = type_to_reg_class(&inst.ty);
      
        1283
                    let dest = ctx.get_vreg(mf, inst.id, class);
      
        1284
                    mf.block_mut(mb).insts.push(MachineInst {
      
        1285
                        opcode: ArmOpcode::Clz,
      
        1286
                        operands: vec![MachineOperand::VReg(dest), MachineOperand::VReg(src)],
      
        1287
                        def: Some(dest),
      
        1288
                    });
      
        1289
                }
      
        1290
                InstKind::CountTrailingZeros(a) => {
      
        1291
                    // CTZ = CLZ(RBIT(x))
      
        1292
                    let src = ctx.lookup_vreg(*a);
      
        1293
                    let class = type_to_reg_class(&inst.ty);
      
        1294
                    let tmp = mf.new_vreg(class);
      
        1295
                    let dest = ctx.get_vreg(mf, inst.id, class);
      
        1296
                    mf.block_mut(mb).insts.push(MachineInst {
      
        1297
                        opcode: ArmOpcode::Rbit,
      
        1298
                        operands: vec![MachineOperand::VReg(tmp), MachineOperand::VReg(src)],
      
        1299
                        def: Some(tmp),
      
        1300
                    });
      
        1301
                    mf.block_mut(mb).insts.push(MachineInst {
      
        1302
                        opcode: ArmOpcode::Clz,
      
        1303
                        operands: vec![MachineOperand::VReg(dest), MachineOperand::VReg(tmp)],
      
        1304
                        def: Some(dest),
      
        1305
                    });
      
        1306
                }
      
        1307
                InstKind::PopCount(a) => {
      
        1308
                    // ARM64 popcount: FMOV Vd.8B, Xn; CNT Vd.8B, Vd.8B; ADDV Bd, Vd.8B; FMOV Wd, Sd
      
        1309
                    // For simplicity, emit a runtime call.
      
        1310
                    let src = ctx.lookup_vreg(*a);
      
        1311
                    let class = type_to_reg_class(&inst.ty);
      
        1312
                    let dest = ctx.get_vreg(mf, inst.id, class);
      
        1313
                    // Placeholder: use CLZ-based Hamming weight or runtime call.
      
        1314
                    // For now, move src to dest (will be replaced with proper popcount later).
      
        1315
                    mf.block_mut(mb).insts.push(MachineInst {
      
        1316
                        opcode: ArmOpcode::MovReg,
      
        1317
                        operands: vec![MachineOperand::VReg(dest), MachineOperand::VReg(src)],
      
        1318
                        def: Some(dest),
      
        1319
                    });
      
        1320
                }
      
        1321
        
        1322
                // ---- Conversions ----
      
        1323
                InstKind::IntToFloat(a, fw) => {
      
        1324
                    let src = ctx.lookup_vreg(*a);
      
        1325
                    let src_class = mf.vregs.iter().find(|v| v.id == src).map(|v| v.class);
      
        1326
                    let is_64bit_src = matches!(src_class, Some(RegClass::Gp64));
      
        1327
                    let (class, opcode) = match (fw, is_64bit_src) {
      
        1328
                        (FloatWidth::F32, false) => (RegClass::Fp32, ArmOpcode::ScvtfSW),
      
        1329
                        (FloatWidth::F32, true) => (RegClass::Fp32, ArmOpcode::ScvtfSX),
      
        1330
                        (FloatWidth::F64, false) => (RegClass::Fp64, ArmOpcode::ScvtfDW),
      
        1331
                        (FloatWidth::F64, true) => (RegClass::Fp64, ArmOpcode::ScvtfDX),
      
        1332
                    };
      
        1333
                    let dest = ctx.get_vreg(mf, inst.id, class);
      
        1334
                    mf.block_mut(mb).insts.push(MachineInst {
      
        1335
                        opcode,
      
        1336
                        operands: vec![MachineOperand::VReg(dest), MachineOperand::VReg(src)],
      
        1337
                        def: Some(dest),
      
        1338
                    });
      
        1339
                }
      
        1340
                InstKind::FloatToInt(a, iw) => {
      
        1341
                    let src = ctx.lookup_vreg(*a);
      
        1342
                    let src_class = mf.vregs.iter().find(|v| v.id == src).map(|v| v.class);
      
        1343
                    let is_f64_src = matches!(src_class, Some(RegClass::Fp64));
      
        1344
                    let is_64bit_dest = matches!(iw, IntWidth::I64);
      
        1345
                    let class = int_width_class(iw);
      
        1346
                    let opcode = match (is_64bit_dest, is_f64_src) {
      
        1347
                        (false, false) => ArmOpcode::FcvtzsWS,
      
        1348
                        (false, true) => ArmOpcode::FcvtzsWD,
      
        1349
                        (true, false) => ArmOpcode::FcvtzsXS,
      
        1350
                        (true, true) => ArmOpcode::FcvtzsXD,
      
        1351
                    };
      
        1352
                    let dest = ctx.get_vreg(mf, inst.id, class);
      
        1353
                    mf.block_mut(mb).insts.push(MachineInst {
      
        1354
                        opcode,
      
        1355
                        operands: vec![MachineOperand::VReg(dest), MachineOperand::VReg(src)],
      
        1356
                        def: Some(dest),
      
        1357
                    });
      
        1358
                }
      
        1359
                InstKind::FloatExtend(a, _) => {
      
        1360
                    let src = ctx.lookup_vreg(*a);
      
        1361
                    let dest = ctx.get_vreg(mf, inst.id, RegClass::Fp64);
      
        1362
                    mf.block_mut(mb).insts.push(MachineInst {
      
        1363
                        opcode: ArmOpcode::FcvtDS,
      
        1364
                        operands: vec![MachineOperand::VReg(dest), MachineOperand::VReg(src)],
      
        1365
                        def: Some(dest),
      
        1366
                    });
      
        1367
                }
      
        1368
                InstKind::FloatTrunc(a, _) => {
      
        1369
                    let src = ctx.lookup_vreg(*a);
      
        1370
                    let dest = ctx.get_vreg(mf, inst.id, RegClass::Fp32);
      
        1371
                    mf.block_mut(mb).insts.push(MachineInst {
      
        1372
                        opcode: ArmOpcode::FcvtSD,
      
        1373
                        operands: vec![MachineOperand::VReg(dest), MachineOperand::VReg(src)],
      
        1374
                        def: Some(dest),
      
        1375
                    });
      
        1376
                }
      
        1377
        
        1378
                // ---- Memory ----
      
        1379
                InstKind::GlobalAddr(name) => {
      
        1380
                    // Materialize the address of a module-level global into
      
        1381
                    // a Gp64 vreg via ADRP+ADD against `_globalname`. Loads
      
        1382
                    // and stores then operate on this pointer the same way
      
        1383
                    // they operate on an alloca address.
      
        1384
                    let dest = ctx.get_vreg(mf, inst.id, RegClass::Gp64);
      
        1385
                    mf.block_mut(mb).insts.push(MachineInst {
      
        1386
                        opcode: ArmOpcode::AdrpAdd,
      
        1387
                        operands: vec![
      
        1388
                            MachineOperand::VReg(dest),
      
        1389
                            MachineOperand::GlobalLabel(name.clone()),
      
        1390
                        ],
      
        1391
                        def: Some(dest),
      
        1392
                    });
      
        1393
                }
      
        1394
        
        1395
                InstKind::Alloca(_) => {
      
        1396
                    // Alloca is handled in Phase 1 (stack slot allocation).
      
        1397
                    // The "address" is a frame slot offset. Map the ValueId to the offset.
      
        1398
                    if let Some(&offset) = ctx.alloca_offsets.get(&inst.id) {
      
        1399
                        let dest = ctx.get_vreg(mf, inst.id, RegClass::Gp64);
      
        1400
                        // Materialize address: SUB dest, FP, #abs(offset)
      
        1401
                        // Offsets are negative from FP, so we subtract the absolute value.
      
        1402
                        let abs_offset = (-offset) as i64;
      
        1403
                        mf.block_mut(mb).insts.push(MachineInst {
      
        1404
                            opcode: ArmOpcode::SubImm,
      
        1405
                            operands: vec![
      
        1406
                                MachineOperand::VReg(dest),
      
        1407
                                MachineOperand::PhysReg(PhysReg::FP),
      
        1408
                                MachineOperand::Imm(abs_offset),
      
        1409
                            ],
      
        1410
                            def: Some(dest),
      
        1411
                        });
      
        1412
                    }
      
        1413
                }
      
        1414
        
        1415
                InstKind::Load(addr) => {
      
        1416
                    // Audit CRITICAL-2: dispatch on the IR result type so the
      
        1417
                    // load opcode width matches the value, not the pointer.
      
        1418
                    // Previously every integer load used `ldr w_, [_]` regardless
      
        1419
                    // of width, silently reading 4 bytes for an i8 load.
      
        1420
                    let class = type_to_reg_class(&inst.ty);
      
        1421
                    let dest = ctx.get_vreg(mf, inst.id, class);
      
        1422
                    let opcode = load_opcode_for(&inst.ty, class);
      
        1423
                    let (base_op, offset_op) = narrow_load_store_addr(ctx, *addr);
      
        1424
                    mf.block_mut(mb).insts.push(MachineInst {
      
        1425
                        opcode,
      
        1426
                        operands: vec![MachineOperand::VReg(dest), base_op, offset_op],
      
        1427
                        def: Some(dest),
      
        1428
                    });
      
        1429
                }
      
        1430
        
        1431
                InstKind::Store(val, addr) => {
      
        1432
                    if matches!(func.value_type(*val), Some(IrType::Int(IntWidth::I128))) {
      
        1433
                        let src_slot = ctx.lookup_wide_slot(*val);
      
        1434
                        emit_load_phys_i128_pair(
      
        1435
                            mf,
      
        1436
                            mb,
      
        1437
                            MachineOperand::PhysReg(PhysReg::FP),
      
        1438
                            src_slot as i64,
      
        1439
                            PhysReg::Gp(16),
      
        1440
                            PhysReg::Gp(17),
      
        1441
                        );
      
        1442
                        if let Some(&offset) = ctx.alloca_offsets.get(addr) {
      
        1443
                            emit_store_phys_i128_pair(
      
        1444
                                mf,
      
        1445
                                mb,
      
        1446
                                MachineOperand::PhysReg(PhysReg::FP),
      
        1447
                                offset as i64,
      
        1448
                                PhysReg::Gp(16),
      
        1449
                                PhysReg::Gp(17),
      
        1450
                            );
      
        1451
                        } else {
      
        1452
                            let base = ctx.lookup_vreg(*addr);
      
        1453
                            emit_store_phys_i128_pair(
      
        1454
                                mf,
      
        1455
                                mb,
      
        1456
                                MachineOperand::VReg(base),
      
        1457
                                0,
      
        1458
                                PhysReg::Gp(16),
      
        1459
                                PhysReg::Gp(17),
      
        1460
                            );
      
        1461
                        }
      
        1462
                        return;
      
        1463
                    }
      
        1464
        
        1465
                    let val_vreg = ctx.lookup_vreg(*val);
      
        1466
                    // Audit CRITICAL-2: dispatch on the *value*'s declared IR
      
        1467
                    // type, not the pointer's pointee — byte-level GEPs into
      
        1468
                    // derived types and array constructors reuse `Ptr<i8>` as a
      
        1469
                    // generic offset cursor, so dispatching by the pointee
      
        1470
                    // would silently truncate non-byte stores.
      
        1471
                    let val_ty = func.value_type(*val);
      
        1472
                    let val_class = mf.vregs
      
        1473
                        .iter()
      
        1474
                        .find(|v| v.id == val_vreg)
      
        1475
                        .map(|v| v.class)
      
        1476
                        .unwrap_or(RegClass::Gp64);
      
        1477
                    let opcode = store_opcode_for(val_ty.as_ref(), val_class);
      
        1478
                    let (base_op, offset_op) = narrow_load_store_addr(ctx, *addr);
      
        1479
                    mf.block_mut(mb).insts.push(MachineInst {
      
        1480
                        opcode,
      
        1481
                        operands: vec![MachineOperand::VReg(val_vreg), base_op, offset_op],
      
        1482
                        def: None,
      
        1483
                    });
      
        1484
                }
      
        1485
        
        1486
                InstKind::GetElementPtr(base, indices) => {
      
        1487
                    // GEP: base + index * elem_size
      
        1488
                    let dest = ctx.get_vreg(mf, inst.id, RegClass::Gp64);
      
        1489
                    let base_src = ctx.lookup_vreg(*base);
      
        1490
                    let base_vreg = if mf.vregs.iter().find(|v| v.id == base_src).map(|v| v.class)
      
        1491
                        != Some(RegClass::Gp64)
      
        1492
                    {
      
        1493
                        let widened = mf.new_vreg(RegClass::Gp64);
      
        1494
                        mf.block_mut(mb).insts.push(MachineInst {
      
        1495
                            opcode: ArmOpcode::MovReg,
      
        1496
                            operands: vec![
      
        1497
                                MachineOperand::VReg(widened),
      
        1498
                                MachineOperand::VReg(base_src),
      
        1499
                            ],
      
        1500
                            def: Some(widened),
      
        1501
                        });
      
        1502
                        widened
      
        1503
                    } else {
      
        1504
                        base_src
      
        1505
                    };
      
        1506
        
        1507
                    // Determine element size from the GEP result type (Ptr<elem_ty>).
      
        1508
                    // Bool occupies 1 byte both in SSA and in `alloca [Bool x N]`
      
        1509
                    // storage; the prior 4-byte override here desynced GEP byte
      
        1510
                    // strides from `alloca` byte strides, so `arr(i) = .true.` for
      
        1511
                    // a stack `logical :: arr(N)` wrote 3 bytes past the slot.
      
        1512
                    let elem_size = match &inst.ty {
      
        1513
                        IrType::Ptr(inner) => match inner.as_ref() {
      
        1514
                            IrType::Struct(_) => alloca_size(inner) as i64,
      
        1515
                            _ => inner.size_bytes() as i64,
      
        1516
                        },
      
        1517
                        _ => 4, // fallback
      
        1518
                    };
      
        1519
        
        1520
                    if let Some(idx) = indices.first() {
      
        1521
                        let idx_src = ctx.lookup_vreg(*idx);
      
        1522
                        let idx_vreg = if mf.vregs.iter().find(|v| v.id == idx_src).map(|v| v.class)
      
        1523
                            == Some(RegClass::Gp64)
      
        1524
                        {
      
        1525
                            idx_src
      
        1526
                        } else {
      
        1527
                            let widened = mf.new_vreg(RegClass::Gp64);
      
        1528
                            let opcode = if matches!(func.value_type(*idx), Some(IrType::Bool)) {
      
        1529
                                ArmOpcode::MovReg
      
        1530
                            } else {
      
        1531
                                ArmOpcode::Sxtw
      
        1532
                            };
      
        1533
                            mf.block_mut(mb).insts.push(MachineInst {
      
        1534
                                opcode,
      
        1535
                                operands: vec![
      
        1536
                                    MachineOperand::VReg(widened),
      
        1537
                                    MachineOperand::VReg(idx_src),
      
        1538
                                ],
      
        1539
                                def: Some(widened),
      
        1540
                            });
      
        1541
                            widened
      
        1542
                        };
      
        1543
                        let tmp = mf.new_vreg(RegClass::Gp64);
      
        1544
                        emit_const_int(mf, mb, tmp, elem_size as i128, IntWidth::I64);
      
        1545
                        let scaled = mf.new_vreg(RegClass::Gp64);
      
        1546
                        mf.block_mut(mb).insts.push(MachineInst {
      
        1547
                            opcode: ArmOpcode::Mul,
      
        1548
                            operands: vec![
      
        1549
                                MachineOperand::VReg(scaled),
      
        1550
                                MachineOperand::VReg(idx_vreg),
      
        1551
                                MachineOperand::VReg(tmp),
      
        1552
                            ],
      
        1553
                            def: Some(scaled),
      
        1554
                        });
      
        1555
                        mf.block_mut(mb).insts.push(MachineInst {
      
        1556
                            opcode: ArmOpcode::AddReg,
      
        1557
                            operands: vec![
      
        1558
                                MachineOperand::VReg(dest),
      
        1559
                                MachineOperand::VReg(base_vreg),
      
        1560
                                MachineOperand::VReg(scaled),
      
        1561
                            ],
      
        1562
                            def: Some(dest),
      
        1563
                        });
      
        1564
                    } else {
      
        1565
                        // No indices — just copy the base.
      
        1566
                        mf.block_mut(mb).insts.push(MachineInst {
      
        1567
                            opcode: ArmOpcode::MovReg,
      
        1568
                            operands: vec![MachineOperand::VReg(dest), MachineOperand::VReg(base_vreg)],
      
        1569
                            def: Some(dest),
      
        1570
                        });
      
        1571
                    }
      
        1572
                }
      
        1573
        
        1574
                // ---- Calls ----
      
        1575
                InstKind::Call(..) | InstKind::RuntimeCall(..) => {
      
        1576
                    select_call_inst(mf, ctx, mb, inst, func);
      
        1577
                }
      
        1578
        
        1579
                // ---- Integer extend/truncate ----
      
        1580
                InstKind::IntExtend(a, _target_width, signed) => {
      
        1581
                    let src = ctx.lookup_vreg(*a);
      
        1582
                    // Pick the opcode based on the SOURCE width, not the
      
        1583
                    // target. ARM64 has distinct SXTB/SXTH/SXTW instructions
      
        1584
                    // for 8/16/32-bit sources; using SXTW on anything other
      
        1585
                    // than a 32-bit source (or with a non-X dest) yields
      
        1586
                    // "invalid operand for instruction" at the assembler.
      
        1587
                    let src_ty = func.value_type(*a);
      
        1588
                    let src_width = match src_ty.as_ref() {
      
        1589
                        Some(IrType::Int(IntWidth::I8)) => 8,
      
        1590
                        Some(IrType::Int(IntWidth::I16)) => 16,
      
        1591
                        Some(IrType::Int(IntWidth::I32)) | Some(IrType::Bool) => 32,
      
        1592
                        Some(IrType::Int(IntWidth::I64)) => 64,
      
        1593
                        _ => 32, // conservative default
      
        1594
                    };
      
        1595
                    let dest_width = match &inst.ty {
      
        1596
                        IrType::Int(IntWidth::I8)
      
        1597
                        | IrType::Int(IntWidth::I16)
      
        1598
                        | IrType::Int(IntWidth::I32)
      
        1599
                        | IrType::Bool => 32,
      
        1600
                        IrType::Int(IntWidth::I64) => 64,
      
        1601
                        _ => 32,
      
        1602
                    };
      
        1603
                    // Dest register class follows the declared target
      
        1604
                    // bit-width, with one exception: SXTW requires an
      
        1605
                    // X-register destination, so promote to Gp64 when
      
        1606
                    // source is 32 AND target is 64.
      
        1607
                    let dest_class = if dest_width == 64 {
      
        1608
                        RegClass::Gp64
      
        1609
                    } else {
      
        1610
                        RegClass::Gp32
      
        1611
                    };
      
        1612
                    let dest = ctx.get_vreg(mf, inst.id, dest_class);
      
        1613
        
        1614
                    if !*signed {
      
        1615
                        // Zero-extend: MOV (ARM64 implicitly zero-extends W→X).
      
        1616
                        mf.block_mut(mb).insts.push(MachineInst {
      
        1617
                            opcode: ArmOpcode::MovReg,
      
        1618
                            operands: vec![MachineOperand::VReg(dest), MachineOperand::VReg(src)],
      
        1619
                            def: Some(dest),
      
        1620
                        });
      
        1621
                    } else if src_width >= dest_width {
      
        1622
                        // Same-width or wider source (bogus from lowering's
      
        1623
                        // perspective but observed in practice when a
      
        1624
                        // function-result intrinsic mis-resolves). Emit MOV
      
        1625
                        // rather than an illegal SXTW Wd, Wn.
      
        1626
                        mf.block_mut(mb).insts.push(MachineInst {
      
        1627
                            opcode: ArmOpcode::MovReg,
      
        1628
                            operands: vec![MachineOperand::VReg(dest), MachineOperand::VReg(src)],
      
        1629
                            def: Some(dest),
      
        1630
                        });
      
        1631
                    } else {
      
        1632
                        let opcode = match src_width {
      
        1633
                            8 => ArmOpcode::Sxtb,
      
        1634
                            16 => ArmOpcode::Sxth,
      
        1635
                            32 => ArmOpcode::Sxtw,
      
        1636
                            _ => ArmOpcode::MovReg, // unreachable given the bool above
      
        1637
                        };
      
        1638
                        mf.block_mut(mb).insts.push(MachineInst {
      
        1639
                            opcode,
      
        1640
                            operands: vec![MachineOperand::VReg(dest), MachineOperand::VReg(src)],
      
        1641
                            def: Some(dest),
      
        1642
                        });
      
        1643
                    }
      
        1644
                }
      
        1645
        
        1646
                InstKind::IntTrunc(a, _) => {
      
        1647
                    let src = ctx.lookup_vreg(*a);
      
        1648
                    let class = type_to_reg_class(&inst.ty);
      
        1649
                    let dest = ctx.get_vreg(mf, inst.id, class);
      
        1650
                    // Truncate: just MOV — the 32-bit register naturally truncates.
      
        1651
                    mf.block_mut(mb).insts.push(MachineInst {
      
        1652
                        opcode: ArmOpcode::MovReg,
      
        1653
                        operands: vec![MachineOperand::VReg(dest), MachineOperand::VReg(src)],
      
        1654
                        def: Some(dest),
      
        1655
                    });
      
        1656
                }
      
        1657
        
        1658
                InstKind::PtrToInt(a) => {
      
        1659
                    // Pointer is already an i64 in a GP register — just mov.
      
        1660
                    let src = ctx.lookup_vreg(*a);
      
        1661
                    let class = type_to_reg_class(&inst.ty);
      
        1662
                    let dest = ctx.get_vreg(mf, inst.id, class);
      
        1663
                    mf.block_mut(mb).insts.push(MachineInst {
      
        1664
                        opcode: ArmOpcode::MovReg,
      
        1665
                        operands: vec![MachineOperand::VReg(dest), MachineOperand::VReg(src)],
      
        1666
                        def: Some(dest),
      
        1667
                    });
      
        1668
                }
      
        1669
        
        1670
                InstKind::IntToPtr(a, _) => {
      
        1671
                    // Integer already in a GP register — treat as pointer via mov.
      
        1672
                    let src = ctx.lookup_vreg(*a);
      
        1673
                    let dest = ctx.get_vreg(mf, inst.id, RegClass::Gp64);
      
        1674
                    mf.block_mut(mb).insts.push(MachineInst {
      
        1675
                        opcode: ArmOpcode::MovReg,
      
        1676
                        operands: vec![MachineOperand::VReg(dest), MachineOperand::VReg(src)],
      
        1677
                        def: Some(dest),
      
        1678
                    });
      
        1679
                }
      
        1680
        
        1681
                // ---- SIMD vector ops (Sprint 12 Stage 2 isel hookup) ----
      
        1682
                //
      
        1683
                // The vectorizer (Stage 4) is what will start producing
      
        1684
                // these. Each arm picks a NEON ArmOpcode based on the result
      
        1685
                // vector's lane shape. Mixed-shape ops (e.g. integer 8×i16
      
        1686
                // narrow lanes) aren't selected here — Stage 4 will only
      
        1687
                // emit the four shapes covered by `VShape`.
      
        1688
                InstKind::VAdd(a, b) => emit_vbinop(mf, ctx, mb, inst, *a, *b, |s| match s {
      
        1689
                    VShape::V4S => ArmOpcode::AddV4S,
      
        1690
                    VShape::V2D => ArmOpcode::AddV2D,
      
        1691
                    VShape::F4S => ArmOpcode::FaddV4S,
      
        1692
                    VShape::F2D => ArmOpcode::FaddV2D,
      
        1693
                }),
      
        1694
                InstKind::VSub(a, b) => emit_vbinop(mf, ctx, mb, inst, *a, *b, |s| match s {
      
        1695
                    VShape::V4S => ArmOpcode::SubV4S,
      
        1696
                    VShape::V2D => ArmOpcode::SubV2D,
      
        1697
                    VShape::F4S => ArmOpcode::FsubV4S,
      
        1698
                    VShape::F2D => ArmOpcode::FsubV2D,
      
        1699
                }),
      
        1700
                InstKind::VMul(a, b) => emit_vbinop(mf, ctx, mb, inst, *a, *b, |s| match s {
      
        1701
                    VShape::V4S => ArmOpcode::MulV4S,
      
        1702
                    // NEON has no integer 2D mul — Stage 4 should not request
      
        1703
                    // it; if it does we fall through to a placeholder.
      
        1704
                    VShape::V2D => ArmOpcode::Nop,
      
        1705
                    VShape::F4S => ArmOpcode::FmulV4S,
      
        1706
                    VShape::F2D => ArmOpcode::FmulV2D,
      
        1707
                }),
      
        1708
                InstKind::VDiv(a, b) => emit_vbinop(mf, ctx, mb, inst, *a, *b, |s| match s {
      
        1709
                    // No integer NEON divide — emit a placeholder; the
      
        1710
                    // vectorizer should refuse to pick V128 lanes for VDiv
      
        1711
                    // on integer types. Float forms exist.
      
        1712
                    VShape::V4S | VShape::V2D => ArmOpcode::Nop,
      
        1713
                    VShape::F4S => ArmOpcode::FdivV4S,
      
        1714
                    VShape::F2D => ArmOpcode::FdivV2D,
      
        1715
                }),
      
        1716
                InstKind::VNeg(a) => emit_vunop(mf, ctx, mb, inst, *a, |s| match s {
      
        1717
                    VShape::V4S => ArmOpcode::NegV4S,
      
        1718
                    VShape::V2D => ArmOpcode::NegV2D,
      
        1719
                    VShape::F4S => ArmOpcode::FnegV4S,
      
        1720
                    VShape::F2D => ArmOpcode::FnegV2D,
      
        1721
                }),
      
        1722
                InstKind::VAbs(a) => emit_vunop(mf, ctx, mb, inst, *a, |s| match s {
      
        1723
                    VShape::F4S => ArmOpcode::FabsV4S,
      
        1724
                    VShape::F2D => ArmOpcode::FabsV2D,
      
        1725
                    // NEON `abs` exists for integer too but the four-shape
      
        1726
                    // alias isn't generated yet; placeholder.
      
        1727
                    VShape::V4S | VShape::V2D => ArmOpcode::Nop,
      
        1728
                }),
      
        1729
                InstKind::VSqrt(a) => emit_vunop(mf, ctx, mb, inst, *a, |s| match s {
      
        1730
                    VShape::F4S => ArmOpcode::FsqrtV4S,
      
        1731
                    VShape::F2D => ArmOpcode::FsqrtV2D,
      
        1732
                    // sqrt is float-only.
      
        1733
                    VShape::V4S | VShape::V2D => ArmOpcode::Nop,
      
        1734
                }),
      
        1735
                InstKind::VFma(a, b, c) => {
      
        1736
                    // FMLA is dest += a*b. Conventional 3-operand call
      
        1737
                    // assumes dest is a fresh vreg — emit a copy-from-c
      
        1738
                    // followed by FMLA. Stage 4 should fold the copy when it
      
        1739
                    // tracks SSA destinations more carefully.
      
        1740
                    let shape = match VShape::from_ir(&inst.ty) {
      
        1741
                        Some(s) if s.is_float() => s,
      
        1742
                        _ => {
      
        1743
                            // unsupported shape — placeholder
      
        1744
                            let dest = ctx.get_vreg(mf, inst.id, RegClass::V128);
      
        1745
                            mf.block_mut(mb).insts.push(MachineInst {
      
        1746
                                opcode: ArmOpcode::Nop,
      
        1747
                                operands: vec![],
      
        1748
                                def: Some(dest),
      
        1749
                            });
      
        1750
                            return;
      
        1751
                        }
      
        1752
                    };
      
        1753
                    let opcode = match shape {
      
        1754
                        VShape::F4S => ArmOpcode::FmlaV4S,
      
        1755
                        VShape::F2D => ArmOpcode::FmlaV2D,
      
        1756
                        _ => unreachable!(),
      
        1757
                    };
      
        1758
                    let va = ctx.lookup_vreg(*a);
      
        1759
                    let vb = ctx.lookup_vreg(*b);
      
        1760
                    let vc = ctx.lookup_vreg(*c);
      
        1761
                    let dest = ctx.get_vreg(mf, inst.id, RegClass::V128);
      
        1762
                    // dest = c (init accumulator). Must use Mov16B (mov.16b)
      
        1763
                    // for V128 — fmov d, d truncates to 64 bits and silently
      
        1764
                    // drops the upper lanes.
      
        1765
                    mf.block_mut(mb).insts.push(MachineInst {
      
        1766
                        opcode: ArmOpcode::Mov16B,
      
        1767
                        operands: vec![MachineOperand::VReg(dest), MachineOperand::VReg(vc)],
      
        1768
                        def: Some(dest),
      
        1769
                    });
      
        1770
                    // dest += a * b
      
        1771
                    mf.block_mut(mb).insts.push(MachineInst {
      
        1772
                        opcode,
      
        1773
                        operands: vec![
      
        1774
                            MachineOperand::VReg(dest),
      
        1775
                            MachineOperand::VReg(va),
      
        1776
                            MachineOperand::VReg(vb),
      
        1777
                        ],
      
        1778
                        def: Some(dest),
      
        1779
                    });
      
        1780
                }
      
        1781
                InstKind::VSelect(mask, t, f) => {
      
        1782
                    // BSL is destructive: bsl Vd.16b, Vn.16b, Vm.16b → for
      
        1783
                    // each bit, if Vd then Vn else Vm. So we copy the mask
      
        1784
                    // into the dest first (mov.16b), then bsl with t/f.
      
        1785
                    let vmask = ctx.lookup_vreg(*mask);
      
        1786
                    let vt = ctx.lookup_vreg(*t);
      
        1787
                    let vf = ctx.lookup_vreg(*f);
      
        1788
                    let dest = ctx.get_vreg(mf, inst.id, RegClass::V128);
      
        1789
                    mf.block_mut(mb).insts.push(MachineInst {
      
        1790
                        opcode: ArmOpcode::Mov16B,
      
        1791
                        operands: vec![MachineOperand::VReg(dest), MachineOperand::VReg(vmask)],
      
        1792
                        def: Some(dest),
      
        1793
                    });
      
        1794
                    mf.block_mut(mb).insts.push(MachineInst {
      
        1795
                        opcode: ArmOpcode::BslV16B,
      
        1796
                        operands: vec![
      
        1797
                            MachineOperand::VReg(dest),
      
        1798
                            MachineOperand::VReg(vt),
      
        1799
                            MachineOperand::VReg(vf),
      
        1800
                        ],
      
        1801
                        def: Some(dest),
      
        1802
                    });
      
        1803
                }
      
        1804
                InstKind::VLoad(addr) => {
      
        1805
                    let dest = ctx.get_vreg(mf, inst.id, RegClass::V128);
      
        1806
                    let base = ctx.lookup_vreg(*addr);
      
        1807
                    mf.block_mut(mb).insts.push(MachineInst {
      
        1808
                        opcode: ArmOpcode::LdrQ,
      
        1809
                        operands: vec![
      
        1810
                            MachineOperand::VReg(dest),
      
        1811
                            MachineOperand::VReg(base),
      
        1812
                            MachineOperand::Imm(0),
      
        1813
                        ],
      
        1814
                        def: Some(dest),
      
        1815
                    });
      
        1816
                }
      
        1817
                InstKind::VStore(val, addr) => {
      
        1818
                    let v = ctx.lookup_vreg(*val);
      
        1819
                    let base = ctx.lookup_vreg(*addr);
      
        1820
                    mf.block_mut(mb).insts.push(MachineInst {
      
        1821
                        opcode: ArmOpcode::StrQ,
      
        1822
                        operands: vec![
      
        1823
                            MachineOperand::VReg(v),
      
        1824
                            MachineOperand::VReg(base),
      
        1825
                            MachineOperand::Imm(0),
      
        1826
                        ],
      
        1827
                        def: None,
      
        1828
                    });
      
        1829
                }
      
        1830
                InstKind::VFCmp(op, a, b) => {
      
        1831
                    // NEON fcmp produces an all-ones / all-zeros mask per lane.
      
        1832
                    // Eq/Ge/Gt are direct; Ne/Le/Lt swap operands or invert.
      
        1833
                    // For Lt: fcmgt swapped operands. For Le: fcmge swapped.
      
        1834
                    // Ne is not a single-instruction in NEON; we don't handle
      
        1835
                    // it yet (vectorizer doesn't emit Ne).
      
        1836
                    let dest = ctx.get_vreg(mf, inst.id, RegClass::V128);
      
        1837
                    let va = ctx.lookup_vreg(*a);
      
        1838
                    let vb = ctx.lookup_vreg(*b);
      
        1839
                    let shape = VShape::from_ir(&inst.ty);
      
        1840
                    let (opcode, swap) = match (shape, op) {
      
        1841
                        (Some(VShape::F4S), CmpOp::Gt) => (ArmOpcode::FcmgtV4S, false),
      
        1842
                        (Some(VShape::F2D), CmpOp::Gt) => (ArmOpcode::FcmgtV2D, false),
      
        1843
                        (Some(VShape::F4S), CmpOp::Ge) => (ArmOpcode::FcmgeV4S, false),
      
        1844
                        (Some(VShape::F2D), CmpOp::Ge) => (ArmOpcode::FcmgeV2D, false),
      
        1845
                        (Some(VShape::F4S), CmpOp::Eq) => (ArmOpcode::FcmeqV4S, false),
      
        1846
                        (Some(VShape::F2D), CmpOp::Eq) => (ArmOpcode::FcmeqV2D, false),
      
        1847
                        (Some(VShape::F4S), CmpOp::Lt) => (ArmOpcode::FcmgtV4S, true),
      
        1848
                        (Some(VShape::F2D), CmpOp::Lt) => (ArmOpcode::FcmgtV2D, true),
      
        1849
                        (Some(VShape::F4S), CmpOp::Le) => (ArmOpcode::FcmgeV4S, true),
      
        1850
                        (Some(VShape::F2D), CmpOp::Le) => (ArmOpcode::FcmgeV2D, true),
      
        1851
                        _ => (ArmOpcode::Nop, false),
      
        1852
                    };
      
        1853
                    let (lhs, rhs) = if swap { (vb, va) } else { (va, vb) };
      
        1854
                    mf.block_mut(mb).insts.push(MachineInst {
      
        1855
                        opcode,
      
        1856
                        operands: vec![
      
        1857
                            MachineOperand::VReg(dest),
      
        1858
                            MachineOperand::VReg(lhs),
      
        1859
                            MachineOperand::VReg(rhs),
      
        1860
                        ],
      
        1861
                        def: Some(dest),
      
        1862
                    });
      
        1863
                }
      
        1864
                InstKind::VICmp(op, a, b) => {
      
        1865
                    let dest = ctx.get_vreg(mf, inst.id, RegClass::V128);
      
        1866
                    let va = ctx.lookup_vreg(*a);
      
        1867
                    let vb = ctx.lookup_vreg(*b);
      
        1868
                    let shape = VShape::from_ir(&inst.ty);
      
        1869
                    let (opcode, swap) = match (shape, op) {
      
        1870
                        (Some(VShape::V4S), CmpOp::Gt) => (ArmOpcode::CmgtV4S, false),
      
        1871
                        (Some(VShape::V4S), CmpOp::Ge) => (ArmOpcode::CmgeV4S, false),
      
        1872
                        (Some(VShape::V4S), CmpOp::Eq) => (ArmOpcode::CmeqV4S, false),
      
        1873
                        (Some(VShape::V4S), CmpOp::Lt) => (ArmOpcode::CmgtV4S, true),
      
        1874
                        (Some(VShape::V4S), CmpOp::Le) => (ArmOpcode::CmgeV4S, true),
      
        1875
                        _ => (ArmOpcode::Nop, false),
      
        1876
                    };
      
        1877
                    let (lhs, rhs) = if swap { (vb, va) } else { (va, vb) };
      
        1878
                    mf.block_mut(mb).insts.push(MachineInst {
      
        1879
                        opcode,
      
        1880
                        operands: vec![
      
        1881
                            MachineOperand::VReg(dest),
      
        1882
                            MachineOperand::VReg(lhs),
      
        1883
                            MachineOperand::VReg(rhs),
      
        1884
                        ],
      
        1885
                        def: Some(dest),
      
        1886
                    });
      
        1887
                }
      
        1888
                InstKind::VBroadcast(scalar) => {
      
        1889
                    let s = ctx.lookup_vreg(*scalar);
      
        1890
                    let dest = ctx.get_vreg(mf, inst.id, RegClass::V128);
      
        1891
                    // Float scalars live in S/D registers — splatting from
      
        1892
                    // those uses the lane-dup form (`dup.4s vN, vM.s[0]`).
      
        1893
                    // Integer scalars live in W/X registers — splatting from
      
        1894
                    // those uses the gp-dup form (`dup.4s vN, wM`).
      
        1895
                    let opcode = match VShape::from_ir(&inst.ty) {
      
        1896
                        Some(VShape::V4S) => ArmOpcode::DupGen4S,
      
        1897
                        Some(VShape::V2D) => ArmOpcode::DupGen2D,
      
        1898
                        Some(VShape::F4S) => ArmOpcode::DupEl4S,
      
        1899
                        Some(VShape::F2D) => ArmOpcode::DupEl2D,
      
        1900
                        None => ArmOpcode::Nop,
      
        1901
                    };
      
        1902
                    mf.block_mut(mb).insts.push(MachineInst {
      
        1903
                        opcode,
      
        1904
                        operands: vec![MachineOperand::VReg(dest), MachineOperand::VReg(s)],
      
        1905
                        def: Some(dest),
      
        1906
                    });
      
        1907
                }
      
        1908
                InstKind::VReduceSum(v) => {
      
        1909
                    // Cross-lane sum. The reduction instruction writes its
      
        1910
                    // 32/64-bit result into the FP register file (sN/dN view
      
        1911
                    // of vN). For float results that's already what we want;
      
        1912
                    // for int results we follow up with a `umov.s/.d` move
      
        1913
                    // from the FP lane back into a GP register.
      
        1914
                    //
      
        1915
                    //   F4S → faddv s_dest, v_src.4s
      
        1916
                    //   F2D → faddp d_dest, v_src.2d
      
        1917
                    //   int(I32) → addv s_tmp, v_src.4s; umov.s w_dest, v_tmp[0]
      
        1918
                    //   int(I64) → addv s_tmp, v_src.4s; umov.s w_dest, v_tmp[0]
      
        1919
                    //              (4-lane i32 sum widens into a single i32; the
      
        1920
                    //              caller is expected to sign-extend if it
      
        1921
                    //              wanted i64 semantics — matches scalar IAdd)
      
        1922
                    let src = ctx.lookup_vreg(*v);
      
        1923
                    match &inst.ty {
      
        1924
                        IrType::Float(FloatWidth::F32) => {
      
        1925
                            // NEON has no `faddv.4s`. Reduce 4 f32 lanes
      
        1926
                            // with two pairwise adds:
      
        1927
                            //   1) `faddp.4s v_tmp, v_src, v_src`
      
        1928
                            //         → [a+b, c+d, a+b, c+d]
      
        1929
                            //   2) `faddp.2s s_dest, v_tmp`
      
        1930
                            //         → (a+b)+(c+d) — the full sum
      
        1931
                            let tmp = mf.new_vreg(RegClass::V128);
      
        1932
                            mf.block_mut(mb).insts.push(MachineInst {
      
        1933
                                opcode: ArmOpcode::FaddpV4S,
      
        1934
                                operands: vec![
      
        1935
                                    MachineOperand::VReg(tmp),
      
        1936
                                    MachineOperand::VReg(src),
      
        1937
                                    MachineOperand::VReg(src),
      
        1938
                                ],
      
        1939
                                def: Some(tmp),
      
        1940
                            });
      
        1941
                            let class = type_to_reg_class(&inst.ty);
      
        1942
                            let dest = ctx.get_vreg(mf, inst.id, class);
      
        1943
                            mf.block_mut(mb).insts.push(MachineInst {
      
        1944
                                opcode: ArmOpcode::FaddpV2S,
      
        1945
                                operands: vec![MachineOperand::VReg(dest), MachineOperand::VReg(tmp)],
      
        1946
                                def: Some(dest),
      
        1947
                            });
      
        1948
                        }
      
        1949
                        IrType::Float(FloatWidth::F64) => {
      
        1950
                            let class = type_to_reg_class(&inst.ty);
      
        1951
                            let dest = ctx.get_vreg(mf, inst.id, class);
      
        1952
                            mf.block_mut(mb).insts.push(MachineInst {
      
        1953
                                opcode: ArmOpcode::FaddpV2D,
      
        1954
                                operands: vec![MachineOperand::VReg(dest), MachineOperand::VReg(src)],
      
        1955
                                def: Some(dest),
      
        1956
                            });
      
        1957
                        }
      
        1958
                        IrType::Int(IntWidth::I32) => {
      
        1959
                            // 4×i32 → scalar via `addv.4s s_tmp, v_src` then
      
        1960
                            // `umov.s w_dest, v_tmp[0]`.
      
        1961
                            let tmp = mf.new_vreg(RegClass::V128);
      
        1962
                            mf.block_mut(mb).insts.push(MachineInst {
      
        1963
                                opcode: ArmOpcode::Addv4S,
      
        1964
                                operands: vec![MachineOperand::VReg(tmp), MachineOperand::VReg(src)],
      
        1965
                                def: Some(tmp),
      
        1966
                            });
      
        1967
                            let class = type_to_reg_class(&inst.ty);
      
        1968
                            let dest = ctx.get_vreg(mf, inst.id, class);
      
        1969
                            mf.block_mut(mb).insts.push(MachineInst {
      
        1970
                                opcode: ArmOpcode::Umov4S,
      
        1971
                                operands: vec![
      
        1972
                                    MachineOperand::VReg(dest),
      
        1973
                                    MachineOperand::VReg(tmp),
      
        1974
                                    MachineOperand::Imm(0),
      
        1975
                                ],
      
        1976
                                def: Some(dest),
      
        1977
                            });
      
        1978
                        }
      
        1979
                        IrType::Int(IntWidth::I64) => {
      
        1980
                            // 2×i64 → scalar via pairwise add (`addp.2d
      
        1981
                            // v_tmp, v_src, v_src`) then `umov.d x_dest,
      
        1982
                            // v_tmp[0]`. NEON has no `addv.2d`, so the
      
        1983
                            // pairwise form is the canonical i64 reduce.
      
        1984
                            let tmp = mf.new_vreg(RegClass::V128);
      
        1985
                            mf.block_mut(mb).insts.push(MachineInst {
      
        1986
                                opcode: ArmOpcode::AddpV2D,
      
        1987
                                operands: vec![
      
        1988
                                    MachineOperand::VReg(tmp),
      
        1989
                                    MachineOperand::VReg(src),
      
        1990
                                    MachineOperand::VReg(src),
      
        1991
                                ],
      
        1992
                                def: Some(tmp),
      
        1993
                            });
      
        1994
                            let class = type_to_reg_class(&inst.ty);
      
        1995
                            let dest = ctx.get_vreg(mf, inst.id, class);
      
        1996
                            mf.block_mut(mb).insts.push(MachineInst {
      
        1997
                                opcode: ArmOpcode::Umov2D,
      
        1998
                                operands: vec![
      
        1999
                                    MachineOperand::VReg(dest),
      
        2000
                                    MachineOperand::VReg(tmp),
      
        2001
                                    MachineOperand::Imm(0),
      
        2002
                                ],
      
        2003
                                def: Some(dest),
      
        2004
                            });
      
        2005
                        }
      
        2006
                        IrType::Int(_) => {
      
        2007
                            let class = type_to_reg_class(&inst.ty);
      
        2008
                            let dest = ctx.get_vreg(mf, inst.id, class);
      
        2009
                            mf.block_mut(mb).insts.push(MachineInst {
      
        2010
                                opcode: ArmOpcode::Nop,
      
        2011
                                operands: vec![MachineOperand::VReg(dest), MachineOperand::VReg(src)],
      
        2012
                                def: Some(dest),
      
        2013
                            });
      
        2014
                        }
      
        2015
                        _ => {
      
        2016
                            let class = type_to_reg_class(&inst.ty);
      
        2017
                            let dest = ctx.get_vreg(mf, inst.id, class);
      
        2018
                            mf.block_mut(mb).insts.push(MachineInst {
      
        2019
                                opcode: ArmOpcode::Nop,
      
        2020
                                operands: vec![MachineOperand::VReg(dest), MachineOperand::VReg(src)],
      
        2021
                                def: Some(dest),
      
        2022
                            });
      
        2023
                        }
      
        2024
                    }
      
        2025
                }
      
        2026
                InstKind::VExtract(v, lane) => {
      
        2027
                    let src = ctx.lookup_vreg(*v);
      
        2028
                    let class = type_to_reg_class(&inst.ty);
      
        2029
                    let dest = ctx.get_vreg(mf, inst.id, class);
      
        2030
                    let opcode = match &inst.ty {
      
        2031
                        IrType::Int(IntWidth::I32) => ArmOpcode::Umov4S,
      
        2032
                        IrType::Int(IntWidth::I64) => ArmOpcode::Umov2D,
      
        2033
                        IrType::Float(FloatWidth::F32) => ArmOpcode::FmovEl4S,
      
        2034
                        IrType::Float(FloatWidth::F64) => ArmOpcode::FmovEl2D,
      
        2035
                        _ => ArmOpcode::Nop,
      
        2036
                    };
      
        2037
                    mf.block_mut(mb).insts.push(MachineInst {
      
        2038
                        opcode,
      
        2039
                        operands: vec![
      
        2040
                            MachineOperand::VReg(dest),
      
        2041
                            MachineOperand::VReg(src),
      
        2042
                            MachineOperand::Imm(*lane as i64),
      
        2043
                        ],
      
        2044
                        def: Some(dest),
      
        2045
                    });
      
        2046
                }
      
        2047
        
        2048
                InstKind::VMin(a, b) | InstKind::VMax(a, b) => {
      
        2049
                    let va = ctx.lookup_vreg(*a);
      
        2050
                    let vb = ctx.lookup_vreg(*b);
      
        2051
                    let dest = ctx.get_vreg(mf, inst.id, RegClass::V128);
      
        2052
                    let is_max = matches!(inst.kind, InstKind::VMax(..));
      
        2053
                    let opcode = match (VShape::from_ir(&inst.ty), is_max) {
      
        2054
                        (Some(VShape::V4S), true) => ArmOpcode::SmaxV4S,
      
        2055
                        (Some(VShape::V4S), false) => ArmOpcode::SminV4S,
      
        2056
                        (Some(VShape::F4S), true) => ArmOpcode::FmaxV4S,
      
        2057
                        (Some(VShape::F4S), false) => ArmOpcode::FminV4S,
      
        2058
                        (Some(VShape::F2D), true) => ArmOpcode::FmaxV2D,
      
        2059
                        (Some(VShape::F2D), false) => ArmOpcode::FminV2D,
      
        2060
                        _ => ArmOpcode::Nop,
      
        2061
                    };
      
        2062
                    mf.block_mut(mb).insts.push(MachineInst {
      
        2063
                        opcode,
      
        2064
                        operands: vec![
      
        2065
                            MachineOperand::VReg(dest),
      
        2066
                            MachineOperand::VReg(va),
      
        2067
                            MachineOperand::VReg(vb),
      
        2068
                        ],
      
        2069
                        def: Some(dest),
      
        2070
                    });
      
        2071
                }
      
        2072
                InstKind::VReduceMin(v) | InstKind::VReduceMax(v) => {
      
        2073
                    let src = ctx.lookup_vreg(*v);
      
        2074
                    let is_max = matches!(inst.kind, InstKind::VReduceMax(..));
      
        2075
                    match &inst.ty {
      
        2076
                        IrType::Int(IntWidth::I32) => {
      
        2077
                            let tmp = mf.new_vreg(RegClass::V128);
      
        2078
                            let opcode = if is_max {
      
        2079
                                ArmOpcode::Smaxv4S
      
        2080
                            } else {
      
        2081
                                ArmOpcode::Sminv4S
      
        2082
                            };
      
        2083
                            mf.block_mut(mb).insts.push(MachineInst {
      
        2084
                                opcode,
      
        2085
                                operands: vec![MachineOperand::VReg(tmp), MachineOperand::VReg(src)],
      
        2086
                                def: Some(tmp),
      
        2087
                            });
      
        2088
                            let class = type_to_reg_class(&inst.ty);
      
        2089
                            let dest = ctx.get_vreg(mf, inst.id, class);
      
        2090
                            mf.block_mut(mb).insts.push(MachineInst {
      
        2091
                                opcode: ArmOpcode::Umov4S,
      
        2092
                                operands: vec![
      
        2093
                                    MachineOperand::VReg(dest),
      
        2094
                                    MachineOperand::VReg(tmp),
      
        2095
                                    MachineOperand::Imm(0),
      
        2096
                                ],
      
        2097
                                def: Some(dest),
      
        2098
                            });
      
        2099
                        }
      
        2100
                        IrType::Float(FloatWidth::F32) => {
      
        2101
                            // fmaxv.4s / fminv.4s s_dest, v_src
      
        2102
                            let class = type_to_reg_class(&inst.ty);
      
        2103
                            let dest = ctx.get_vreg(mf, inst.id, class);
      
        2104
                            let opcode = if is_max {
      
        2105
                                ArmOpcode::FmaxvV4S
      
        2106
                            } else {
      
        2107
                                ArmOpcode::FminvV4S
      
        2108
                            };
      
        2109
                            mf.block_mut(mb).insts.push(MachineInst {
      
        2110
                                opcode,
      
        2111
                                operands: vec![MachineOperand::VReg(dest), MachineOperand::VReg(src)],
      
        2112
                                def: Some(dest),
      
        2113
                            });
      
        2114
                        }
      
        2115
                        IrType::Float(FloatWidth::F64) => {
      
        2116
                            // NEON has no fmaxv.2d; the pairwise scalar form
      
        2117
                            // (fmaxp.2d d_dest, v_src) is the across-lane
      
        2118
                            // reduction for two f64 lanes.
      
        2119
                            let class = type_to_reg_class(&inst.ty);
      
        2120
                            let dest = ctx.get_vreg(mf, inst.id, class);
      
        2121
                            let opcode = if is_max {
      
        2122
                                ArmOpcode::FmaxpV2DScalar
      
        2123
                            } else {
      
        2124
                                ArmOpcode::FminpV2DScalar
      
        2125
                            };
      
        2126
                            mf.block_mut(mb).insts.push(MachineInst {
      
        2127
                                opcode,
      
        2128
                                operands: vec![MachineOperand::VReg(dest), MachineOperand::VReg(src)],
      
        2129
                                def: Some(dest),
      
        2130
                            });
      
        2131
                        }
      
        2132
                        _ => {
      
        2133
                            let class = type_to_reg_class(&inst.ty);
      
        2134
                            let dest = ctx.get_vreg(mf, inst.id, class);
      
        2135
                            mf.block_mut(mb).insts.push(MachineInst {
      
        2136
                                opcode: ArmOpcode::Nop,
      
        2137
                                operands: vec![MachineOperand::VReg(dest), MachineOperand::VReg(src)],
      
        2138
                                def: Some(dest),
      
        2139
                            });
      
        2140
                        }
      
        2141
                    }
      
        2142
                }
      
        2143
        
        2144
                // Remaining: ExtractField, InsertField, and other vector ops
      
        2145
                // (VInsert, VICmp, VFCmp, VBitcast) — placeholder. Land
      
        2146
                // per-op as the vectorizer grows in Stage 4.
      
        2147
                _ => {
      
        2148
                    let class = type_to_reg_class(&inst.ty);
      
        2149
                    let _dest = ctx.get_vreg(mf, inst.id, class);
      
        2150
                    mf.block_mut(mb).insts.push(MachineInst {
      
        2151
                        opcode: ArmOpcode::Nop,
      
        2152
                        operands: vec![],
      
        2153
                        def: None,
      
        2154
                    });
      
        2155
                }
      
        2156
            }
      
        2157
        }
      
        2158
        
        2159
        /// Select machine instructions for a terminator.
      
        2160
        fn select_terminator(
      
        2161
            mf: &mut MachineFunction,
      
        2162
            ctx: &mut ISelCtx,
      
        2163
            mb: MBlockId,
      
        2164
            term: &Terminator,
      
        2165
            src_block: &BasicBlock,
      
        2166
            func: &Function,
      
        2167
        ) {
      
        2168
            let _ = src_block; // used implicitly via `term`'s args; kept for clarity
      
        2169
            match term {
      
        2170
                Terminator::Return(None) => {
      
        2171
                    emit_epilogue(mf, mb);
      
        2172
                }
      
        2173
                Terminator::Return(Some(val)) => {
      
        2174
                    if matches!(func.value_type(*val), Some(IrType::Int(IntWidth::I128))) {
      
        2175
                        let src_slot = ctx.lookup_wide_slot(*val);
      
        2176
                        emit_load_phys_i128_pair(
      
        2177
                            mf,
      
        2178
                            mb,
      
        2179
                            MachineOperand::PhysReg(PhysReg::FP),
      
        2180
                            src_slot as i64,
      
        2181
                            PhysReg::Gp(0),
      
        2182
                            PhysReg::Gp(1),
      
        2183
                        );
      
        2184
                        emit_epilogue(mf, mb);
      
        2185
                        return;
      
        2186
                    }
      
        2187
                    // Move result to X0 (integer) or D0 (float).
      
        2188
                    let src = ctx.lookup_vreg(*val);
      
        2189
                    let class = mf.vregs.iter().find(|v| v.id == src).map(|v| v.class);
      
        2190
                    let (reg, opcode) = match class {
      
        2191
                        Some(RegClass::Fp64) => (PhysReg::Fp(0), ArmOpcode::FmovReg),
      
        2192
                        Some(RegClass::Fp32) => (PhysReg::Fp32(0), ArmOpcode::FmovReg),
      
        2193
                        Some(RegClass::Gp32) => (PhysReg::Gp32(0), ArmOpcode::MovReg),
      
        2194
                        _ => (PhysReg::Gp(0), ArmOpcode::MovReg),
      
        2195
                    };
      
        2196
                    mf.block_mut(mb).insts.push(MachineInst {
      
        2197
                        opcode,
      
        2198
                        operands: vec![MachineOperand::PhysReg(reg), MachineOperand::VReg(src)],
      
        2199
                        def: None,
      
        2200
                    });
      
        2201
                    emit_epilogue(mf, mb);
      
        2202
                }
      
        2203
                Terminator::Branch(dest, args) => {
      
        2204
                    // Emit parallel copy from each branch arg into the
      
        2205
                    // target block's corresponding param vreg BEFORE the
      
        2206
                    // actual branch instruction. Without this, block
      
        2207
                    // parameters introduced by mem2reg or the lowerer
      
        2208
                    // would never receive their incoming values at edge
      
        2209
                    // points, producing infinite loops or stale data.
      
        2210
                    emit_branch_arg_copies(mf, ctx, mb, *dest, args);
      
        2211
                    let target = ctx.lookup_block(*dest);
      
        2212
                    mf.block_mut(mb).insts.push(MachineInst {
      
        2213
                        opcode: ArmOpcode::B,
      
        2214
                        operands: vec![MachineOperand::BlockRef(target)],
      
        2215
                        def: None,
      
        2216
                    });
      
        2217
                }
      
        2218
                Terminator::CondBranch {
      
        2219
                    cond,
      
        2220
                    true_dest,
      
        2221
                    true_args,
      
        2222
                    false_dest,
      
        2223
                    false_args,
      
        2224
                } => {
      
        2225
                    let cond_vreg = ctx.lookup_vreg(*cond);
      
        2226
                    let true_mb = ctx.lookup_block(*true_dest);
      
        2227
                    let false_mb = ctx.lookup_block(*false_dest);
      
        2228
        
        2229
                    // For a conditional branch, the parallel copies for
      
        2230
                    // the two arms must happen only on the taken edge. We
      
        2231
                    // emit the copies inside per-arm trampoline sequences:
      
        2232
                    //
      
        2233
                    //   CMP cond, #0
      
        2234
                    //   B.EQ false_copies_then_jump   (conditional jump to
      
        2235
                    //                                  false-side copies)
      
        2236
                    //   <true copies>
      
        2237
                    //   B true_dest
      
        2238
                    //  false_copies_then_jump:
      
        2239
                    //   <false copies>
      
        2240
                    //   B false_dest
      
        2241
                    //
      
        2242
                    // To keep the machine CFG simple we instead emit the
      
        2243
                    // false-side copies + jump as a new machine block.
      
        2244
                    // But that's invasive. For the common case where
      
        2245
                    // neither arm has copies, fall back to the original
      
        2246
                    // shape. When either arm has copies, materialize a
      
        2247
                    // shim block for that arm.
      
        2248
                    mf.block_mut(mb).insts.push(MachineInst {
      
        2249
                        opcode: ArmOpcode::CmpImm,
      
        2250
                        operands: vec![MachineOperand::VReg(cond_vreg), MachineOperand::Imm(0)],
      
        2251
                        def: None,
      
        2252
                    });
      
        2253
        
        2254
                    // True arm: if there are branch args to copy, create
      
        2255
                    // a shim block that does the copies then jumps to the
      
        2256
                    // true destination. Otherwise, branch directly.
      
        2257
                    let true_target = if true_args.is_empty() {
      
        2258
                        true_mb
      
        2259
                    } else {
      
        2260
                        // Prefix with the function name so labels stay
      
        2261
                        // unique across functions in the same .s file. Two
      
        2262
                        // functions could otherwise both emit `L3_true_shim`.
      
        2263
                        let label = format!("L{}_{}_true_shim", mf.name, mb.0);
      
        2264
                        let shim = mf.new_block(&label);
      
        2265
                        emit_branch_arg_copies(mf, ctx, shim, *true_dest, true_args);
      
        2266
                        mf.block_mut(shim).insts.push(MachineInst {
      
        2267
                            opcode: ArmOpcode::B,
      
        2268
                            operands: vec![MachineOperand::BlockRef(true_mb)],
      
        2269
                            def: None,
      
        2270
                        });
      
        2271
                        shim
      
        2272
                    };
      
        2273
        
        2274
                    mf.block_mut(mb).insts.push(MachineInst {
      
        2275
                        opcode: ArmOpcode::BCond,
      
        2276
                        operands: vec![
      
        2277
                            MachineOperand::Cond(ArmCond::Ne),
      
        2278
                            MachineOperand::BlockRef(true_target),
      
        2279
                        ],
      
        2280
                        def: None,
      
        2281
                    });
      
        2282
        
        2283
                    // False arm: same treatment.
      
        2284
                    let false_target = if false_args.is_empty() {
      
        2285
                        false_mb
      
        2286
                    } else {
      
        2287
                        let label = format!("L{}_{}_false_shim", mf.name, mb.0);
      
        2288
                        let shim = mf.new_block(&label);
      
        2289
                        emit_branch_arg_copies(mf, ctx, shim, *false_dest, false_args);
      
        2290
                        mf.block_mut(shim).insts.push(MachineInst {
      
        2291
                            opcode: ArmOpcode::B,
      
        2292
                            operands: vec![MachineOperand::BlockRef(false_mb)],
      
        2293
                            def: None,
      
        2294
                        });
      
        2295
                        shim
      
        2296
                    };
      
        2297
                    mf.block_mut(mb).insts.push(MachineInst {
      
        2298
                        opcode: ArmOpcode::B,
      
        2299
                        operands: vec![MachineOperand::BlockRef(false_target)],
      
        2300
                        def: None,
      
        2301
                    });
      
        2302
                }
      
        2303
                Terminator::Switch {
      
        2304
                    selector,
      
        2305
                    cases,
      
        2306
                    default,
      
        2307
                } => {
      
        2308
                    let sel_vreg = ctx.lookup_vreg(*selector);
      
        2309
                    let default_mb = ctx.lookup_block(*default);
      
        2310
        
        2311
                    for (val, dest) in cases {
      
        2312
                        let dest_mb = ctx.lookup_block(*dest);
      
        2313
                        // CMP selector, #val; B.EQ case_block
      
        2314
                        mf.block_mut(mb).insts.push(MachineInst {
      
        2315
                            opcode: ArmOpcode::CmpImm,
      
        2316
                            operands: vec![MachineOperand::VReg(sel_vreg), MachineOperand::Imm(*val)],
      
        2317
                            def: None,
      
        2318
                        });
      
        2319
                        mf.block_mut(mb).insts.push(MachineInst {
      
        2320
                            opcode: ArmOpcode::BCond,
      
        2321
                            operands: vec![
      
        2322
                                MachineOperand::Cond(ArmCond::Eq),
      
        2323
                                MachineOperand::BlockRef(dest_mb),
      
        2324
                            ],
      
        2325
                            def: None,
      
        2326
                        });
      
        2327
                    }
      
        2328
                    // Default: unconditional branch.
      
        2329
                    mf.block_mut(mb).insts.push(MachineInst {
      
        2330
                        opcode: ArmOpcode::B,
      
        2331
                        operands: vec![MachineOperand::BlockRef(default_mb)],
      
        2332
                        def: None,
      
        2333
                    });
      
        2334
                }
      
        2335
                Terminator::Unreachable => {
      
        2336
                    // Debug trap — should never execute. brk #1 triggers SIGTRAP.
      
        2337
                    mf.block_mut(mb).insts.push(MachineInst {
      
        2338
                        opcode: ArmOpcode::Brk,
      
        2339
                        operands: vec![MachineOperand::Imm(1)],
      
        2340
                        def: None,
      
        2341
                    });
      
        2342
                }
      
        2343
            }
      
        2344
        }
      
        2345
        
        2346
        /// Emit the parallel-copy that materializes branch arguments into
      
        2347
        /// the target block's parameter vregs.
      
        2348
        ///
      
        2349
        /// At an SSA block boundary the IR semantics say "all the new values
      
        2350
        /// arrive in the target's params simultaneously." On a register
      
        2351
        /// machine that means we have to perform multiple `mov` operations
      
        2352
        /// such that none of them clobbers a value still needed by another
      
        2353
        /// pending move. The classical solution:
      
        2354
        ///
      
        2355
        ///  1. Skip identity copies (`dst == src`).
      
        2356
        ///  2. Repeatedly find a pending copy whose `dst` is **not** also
      
        2357
        ///     the `src` of some other pending copy. Such a copy is "safe"
      
        2358
        ///     — emitting it can't trample anything still needed.
      
        2359
        ///  3. If every remaining copy is part of a cycle (no safe copy
      
        2360
        ///     exists), break the cycle by moving the head of any pending
      
        2361
        ///     copy through a freshly-allocated scratch vreg, then continue.
      
        2362
        ///
      
        2363
        /// Cycles arise when block params swap with each other across an
      
        2364
        /// edge. The lowerer doesn't currently produce that shape, but
      
        2365
        /// mem2reg may once we have more sophisticated reaching-definition
      
        2366
        /// flow, so handling it now keeps a future bug out of the IR.
      
        2367
        fn emit_branch_arg_copies(
      
        2368
            mf: &mut MachineFunction,
      
        2369
            ctx: &ISelCtx,
      
        2370
            mb: MBlockId,
      
        2371
            target_block: BlockId,
      
        2372
            args: &[ValueId],
      
        2373
        ) {
      
        2374
            if args.is_empty() {
      
        2375
                return;
      
        2376
            }
      
        2377
        
        2378
            // Look up the target block's param vregs in the same order
      
        2379
            // they appear in the IR (which is also the order they were
      
        2380
            // allocated in Phase 4a, so the i-th arg corresponds to the
      
        2381
            // i-th param).
      
        2382
            let target_params = ctx
      
        2383
                .block_params
      
        2384
                .get(&target_block)
      
        2385
                .expect("isel: branch target not in block_params snapshot");
      
        2386
            if target_params.len() != args.len() {
      
        2387
                // Verifier should reject this — but if it leaks through
      
        2388
                // we want a clear panic, not silent corruption.
      
        2389
                panic!(
      
        2390
                    "isel: branch arg count {} ≠ target block param count {}",
      
        2391
                    args.len(),
      
        2392
                    target_params.len()
      
        2393
                );
      
        2394
            }
      
        2395
        
        2396
            // Build the pending copy lists. Narrow SSA values move through
      
        2397
            // vregs; wide i128 values stay stack-backed and must copy slot to
      
        2398
            // slot through a temporary register pair.
      
        2399
            let mut pending_narrow: Vec<(VRegId, VRegId)> = Vec::with_capacity(args.len());
      
        2400
            let mut pending_wide: Vec<(i32, i32)> = Vec::new();
      
        2401
            for (arg, bp) in args.iter().zip(target_params.iter()) {
      
        2402
                if matches!(bp.ty, IrType::Int(IntWidth::I128)) {
      
        2403
                    let dst = ctx.lookup_wide_slot(bp.id);
      
        2404
                    let src = ctx.lookup_wide_slot(*arg);
      
        2405
                    if dst != src {
      
        2406
                        pending_wide.push((dst, src));
      
        2407
                    }
      
        2408
                    continue;
      
        2409
                }
      
        2410
                let dst = ctx.lookup_vreg(bp.id);
      
        2411
                let src = ctx.lookup_vreg(*arg);
      
        2412
                if dst != src {
      
        2413
                    pending_narrow.push((dst, src));
      
        2414
                }
      
        2415
            }
      
        2416
        
        2417
            // Helper to look up a vreg's RegClass via mf.vregs.
      
        2418
            fn class_of(mf: &MachineFunction, v: VRegId) -> RegClass {
      
        2419
                mf.vregs
      
        2420
                    .iter()
      
        2421
                    .find(|r| r.id == v)
      
        2422
                    .map(|r| r.class)
      
        2423
                    .expect("isel: vreg not registered")
      
        2424
            }
      
        2425
        
        2426
            // Helper to choose the right move opcode for a vreg's class.
      
        2427
            fn move_opcode_for(class: RegClass) -> ArmOpcode {
      
        2428
                match class {
      
        2429
                    // V128 needs `mov.16b` to copy all 128 bits — `fmov d, d`
      
        2430
                    // would corrupt the upper lanes. Fp64/Fp32 still use
      
        2431
                    // `fmov` which is the canonical narrow form.
      
        2432
                    RegClass::V128 => ArmOpcode::Mov16B,
      
        2433
                    RegClass::Fp64 | RegClass::Fp32 => ArmOpcode::FmovReg,
      
        2434
                    RegClass::Gp64 | RegClass::Gp32 => ArmOpcode::MovReg,
      
        2435
                }
      
        2436
            }
      
        2437
        
        2438
            let emit_move = |mf: &mut MachineFunction, mb: MBlockId, dst: VRegId, src: VRegId| {
      
        2439
                let class = class_of(mf, dst);
      
        2440
                let opcode = move_opcode_for(class);
      
        2441
                mf.block_mut(mb).insts.push(MachineInst {
      
        2442
                    opcode,
      
        2443
                    operands: vec![MachineOperand::VReg(dst), MachineOperand::VReg(src)],
      
        2444
                    def: Some(dst),
      
        2445
                });
      
        2446
            };
      
        2447
        
        2448
            // Iteratively emit safe narrow moves; break cycles via a scratch
      
        2449
            // vreg of the same class.
      
        2450
            let mut pending = pending_narrow;
      
        2451
            while !pending.is_empty() {
      
        2452
                let safe_idx = (0..pending.len()).find(|&i| {
      
        2453
                    let (d, _) = pending[i];
      
        2454
                    !pending
      
        2455
                        .iter()
      
        2456
                        .enumerate()
      
        2457
                        .any(|(j, &(_, s))| j != i && s == d)
      
        2458
                });
      
        2459
        
        2460
                if let Some(idx) = safe_idx {
      
        2461
                    let (d, s) = pending.remove(idx);
      
        2462
                    emit_move(mf, mb, d, s);
      
        2463
                } else {
      
        2464
                    let (d, s) = pending[0];
      
        2465
                    let class = class_of(mf, s);
      
        2466
                    let temp = mf.new_vreg(class);
      
        2467
                    emit_move(mf, mb, temp, s);
      
        2468
                    pending[0] = (d, temp);
      
        2469
                }
      
        2470
            }
      
        2471
        
        2472
            // Wide i128 block params stay stack-backed, so the same parallel-copy
      
        2473
            // algorithm runs on stack slots instead of vregs.
      
        2474
            let mut pending = pending_wide;
      
        2475
            let mut scratch_slot: Option<i32> = None;
      
        2476
            while !pending.is_empty() {
      
        2477
                let safe_idx = (0..pending.len()).find(|&i| {
      
        2478
                    let (d, _) = pending[i];
      
        2479
                    !pending
      
        2480
                        .iter()
      
        2481
                        .enumerate()
      
        2482
                        .any(|(j, &(_, s))| j != i && s == d)
      
        2483
                });
      
        2484
        
        2485
                if let Some(idx) = safe_idx {
      
        2486
                    let (d, s) = pending.remove(idx);
      
        2487
                    emit_copy_wide_slot(mf, mb, s, d);
      
        2488
                } else {
      
        2489
                    let (d, s) = pending[0];
      
        2490
                    let temp = if let Some(slot) = scratch_slot {
      
        2491
                        slot
      
        2492
                    } else {
      
        2493
                        let slot = mf.alloc_local(16);
      
        2494
                        scratch_slot = Some(slot);
      
        2495
                        slot
      
        2496
                    };
      
        2497
                    emit_copy_wide_slot(mf, mb, s, temp);
      
        2498
                    pending[0] = (d, temp);
      
        2499
                }
      
        2500
            }
      
        2501
        }
      
        2502
        
        2503
        fn emit_copy_wide_slot(mf: &mut MachineFunction, mb: MBlockId, src_slot: i32, dst_slot: i32) {
      
        2504
            emit_load_phys_i128_pair(
      
        2505
                mf,
      
        2506
                mb,
      
        2507
                MachineOperand::PhysReg(PhysReg::FP),
      
        2508
                src_slot as i64,
      
        2509
                PhysReg::Gp(16),
      
        2510
                PhysReg::Gp(17),
      
        2511
            );
      
        2512
            emit_store_phys_i128_pair(
      
        2513
                mf,
      
        2514
                mb,
      
        2515
                MachineOperand::PhysReg(PhysReg::FP),
      
        2516
                dst_slot as i64,
      
        2517
                PhysReg::Gp(16),
      
        2518
                PhysReg::Gp(17),
      
        2519
            );
      
        2520
        }
      
        2521
        
        2522
        // ---- Helpers ----
      
        2523
        
        2524
        /// Emit function prologue:
      
        2525
        ///   stp x29, x30, [sp, #-FRAME_SIZE]!
      
        2526
        ///   add x29, sp, #FRAME_SIZE - 16
      
        2527
        /// FP points at the saved FP/LR pair at the top of the frame.
      
        2528
        fn emit_prologue(mf: &mut MachineFunction, mb: MBlockId) {
      
        2529
            // STP x29, x30, [sp, #-FRAME_SIZE]!
      
        2530
            mf.block_mut(mb).insts.push(MachineInst {
      
        2531
                opcode: ArmOpcode::StpPre,
      
        2532
                operands: vec![
      
        2533
                    MachineOperand::PhysReg(PhysReg::FP),
      
        2534
                    MachineOperand::PhysReg(PhysReg::LR),
      
        2535
                    MachineOperand::PhysReg(PhysReg::Sp),
      
        2536
                ],
      
        2537
                def: None,
      
        2538
            });
      
        2539
            // ADD x29, sp, #FRAME_SIZE - 16
      
        2540
            // (frame_size - 16 computed during emission when final size is known)
      
        2541
            mf.block_mut(mb).insts.push(MachineInst {
      
        2542
                opcode: ArmOpcode::AddImm,
      
        2543
                operands: vec![
      
        2544
                    MachineOperand::PhysReg(PhysReg::FP),
      
        2545
                    MachineOperand::PhysReg(PhysReg::Sp),
      
        2546
                    MachineOperand::Imm(-1), // sentinel: replaced with frame_size-16 during emit
      
        2547
                ],
      
        2548
                def: None,
      
        2549
            });
      
        2550
        }
      
        2551
        
        2552
        /// Emit function epilogue:
      
        2553
        ///   ldp x29, x30, [sp, #FRAME_SIZE-16]
      
        2554
        ///   add sp, sp, #FRAME_SIZE
      
        2555
        ///   ret
      
        2556
        fn emit_epilogue(mf: &mut MachineFunction, mb: MBlockId) {
      
        2557
            // LDP + ADD emitted as a single LdpPost pseudo-op, expanded during emit.
      
        2558
            mf.block_mut(mb).insts.push(MachineInst {
      
        2559
                opcode: ArmOpcode::LdpPost,
      
        2560
                operands: vec![
      
        2561
                    MachineOperand::PhysReg(PhysReg::FP),
      
        2562
                    MachineOperand::PhysReg(PhysReg::LR),
      
        2563
                    MachineOperand::PhysReg(PhysReg::Sp),
      
        2564
                ],
      
        2565
                def: None,
      
        2566
            });
      
        2567
            mf.block_mut(mb).insts.push(MachineInst {
      
        2568
                opcode: ArmOpcode::Ret,
      
        2569
                operands: vec![],
      
        2570
                def: None,
      
        2571
            });
      
        2572
        }
      
        2573
        
        2574
        fn split_i128_words(value: i128) -> (u64, u64) {
      
        2575
            let bits = value as u128;
      
        2576
            (bits as u64, (bits >> 64) as u64)
      
        2577
        }
      
        2578
        
        2579
        fn emit_const_u64_phys(mf: &mut MachineFunction, mb: MBlockId, dest: PhysReg, value: u64) {
      
        2580
            if value == 0 {
      
        2581
                mf.block_mut(mb).insts.push(MachineInst {
      
        2582
                    opcode: ArmOpcode::MovReg,
      
        2583
                    operands: vec![
      
        2584
                        MachineOperand::PhysReg(dest),
      
        2585
                        MachineOperand::PhysReg(PhysReg::Xzr),
      
        2586
                    ],
      
        2587
                    def: None,
      
        2588
                });
      
        2589
                return;
      
        2590
            }
      
        2591
        
        2592
            let mut first = true;
      
        2593
            for i in 0..4 {
      
        2594
                let shift = i * 16;
      
        2595
                let chunk = ((value >> shift) & 0xFFFF) as u16;
      
        2596
                if chunk != 0 || (first && i == 3) {
      
        2597
                    let opcode = if first {
      
        2598
                        ArmOpcode::Movz
      
        2599
                    } else {
      
        2600
                        ArmOpcode::Movk
      
        2601
                    };
      
        2602
                    mf.block_mut(mb).insts.push(MachineInst {
      
        2603
                        opcode,
      
        2604
                        operands: vec![
      
        2605
                            MachineOperand::PhysReg(dest),
      
        2606
                            MachineOperand::Imm(chunk as i64),
      
        2607
                            MachineOperand::Shift(shift as u8),
      
        2608
                        ],
      
        2609
                        def: None,
      
        2610
                    });
      
        2611
                    first = false;
      
        2612
                }
      
        2613
            }
      
        2614
        }
      
        2615
        
        2616
        fn emit_const_i128_to_phys_pair(
      
        2617
            mf: &mut MachineFunction,
      
        2618
            mb: MBlockId,
      
        2619
            value: i128,
      
        2620
            lo: PhysReg,
      
        2621
            hi: PhysReg,
      
        2622
        ) {
      
        2623
            let (low_word, high_word) = split_i128_words(value);
      
        2624
            emit_const_u64_phys(mf, mb, lo, low_word);
      
        2625
            emit_const_u64_phys(mf, mb, hi, high_word);
      
        2626
        }
      
        2627
        
        2628
        fn emit_store_phys_i128_pair(
      
        2629
            mf: &mut MachineFunction,
      
        2630
            mb: MBlockId,
      
        2631
            base: MachineOperand,
      
        2632
            offset: i64,
      
        2633
            lo: PhysReg,
      
        2634
            hi: PhysReg,
      
        2635
        ) {
      
        2636
            mf.block_mut(mb).insts.push(MachineInst {
      
        2637
                opcode: ArmOpcode::StpOffset,
      
        2638
                operands: vec![
      
        2639
                    MachineOperand::PhysReg(lo),
      
        2640
                    MachineOperand::PhysReg(hi),
      
        2641
                    base,
      
        2642
                    MachineOperand::Imm(offset),
      
        2643
                ],
      
        2644
                def: None,
      
        2645
            });
      
        2646
        }
      
        2647
        
        2648
        fn emit_load_phys_u64(
      
        2649
            mf: &mut MachineFunction,
      
        2650
            mb: MBlockId,
      
        2651
            base: MachineOperand,
      
        2652
            offset: i64,
      
        2653
            dest: PhysReg,
      
        2654
        ) {
      
        2655
            mf.block_mut(mb).insts.push(MachineInst {
      
        2656
                opcode: ArmOpcode::LdrImm,
      
        2657
                operands: vec![
      
        2658
                    MachineOperand::PhysReg(dest),
      
        2659
                    base,
      
        2660
                    MachineOperand::Imm(offset),
      
        2661
                ],
      
        2662
                def: None,
      
        2663
            });
      
        2664
        }
      
        2665
        
        2666
        fn emit_load_phys_i128_pair(
      
        2667
            mf: &mut MachineFunction,
      
        2668
            mb: MBlockId,
      
        2669
            base: MachineOperand,
      
        2670
            offset: i64,
      
        2671
            lo: PhysReg,
      
        2672
            hi: PhysReg,
      
        2673
        ) {
      
        2674
            mf.block_mut(mb).insts.push(MachineInst {
      
        2675
                opcode: ArmOpcode::LdpOffset,
      
        2676
                operands: vec![
      
        2677
                    MachineOperand::PhysReg(lo),
      
        2678
                    MachineOperand::PhysReg(hi),
      
        2679
                    base,
      
        2680
                    MachineOperand::Imm(offset),
      
        2681
                ],
      
        2682
                def: None,
      
        2683
            });
      
        2684
        }
      
        2685
        
        2686
        fn emit_load_stack_arg_into_vreg(
      
        2687
            mf: &mut MachineFunction,
      
        2688
            mb: MBlockId,
      
        2689
            dest: VRegId,
      
        2690
            class: RegClass,
      
        2691
            ty: &IrType,
      
        2692
            offset: i64,
      
        2693
        ) {
      
        2694
            let opcode = load_opcode_for(ty, class);
      
        2695
            mf.block_mut(mb).insts.push(MachineInst {
      
        2696
                opcode,
      
        2697
                operands: vec![
      
        2698
                    MachineOperand::VReg(dest),
      
        2699
                    MachineOperand::PhysReg(PhysReg::FP),
      
        2700
                    MachineOperand::Imm(offset),
      
        2701
                ],
      
        2702
                def: Some(dest),
      
        2703
            });
      
        2704
        }
      
        2705
        
        2706
        fn emit_store_stack_arg_from_vreg(
      
        2707
            mf: &mut MachineFunction,
      
        2708
            mb: MBlockId,
      
        2709
            src: VRegId,
      
        2710
            class: RegClass,
      
        2711
            ty: &IrType,
      
        2712
            offset: i64,
      
        2713
        ) {
      
        2714
            let opcode = store_opcode_for(Some(ty), class);
      
        2715
            mf.block_mut(mb).insts.push(MachineInst {
      
        2716
                opcode,
      
        2717
                operands: vec![
      
        2718
                    MachineOperand::VReg(src),
      
        2719
                    MachineOperand::PhysReg(PhysReg::Sp),
      
        2720
                    MachineOperand::Imm(offset),
      
        2721
                ],
      
        2722
                def: None,
      
        2723
            });
      
        2724
        }
      
        2725
        
        2726
        fn emit_i128_add_from_slot(
      
        2727
            mf: &mut MachineFunction,
      
        2728
            mb: MBlockId,
      
        2729
            rhs_base: MachineOperand,
      
        2730
            rhs_offset: i64,
      
        2731
            lo: PhysReg,
      
        2732
            hi: PhysReg,
      
        2733
            scratch: PhysReg,
      
        2734
        ) {
      
        2735
            emit_load_phys_u64(mf, mb, rhs_base.clone(), rhs_offset, scratch);
      
        2736
            mf.block_mut(mb).insts.push(MachineInst {
      
        2737
                opcode: ArmOpcode::AddsReg,
      
        2738
                operands: vec![
      
        2739
                    MachineOperand::PhysReg(lo),
      
        2740
                    MachineOperand::PhysReg(lo),
      
        2741
                    MachineOperand::PhysReg(scratch),
      
        2742
                ],
      
        2743
                def: None,
      
        2744
            });
      
        2745
            emit_load_phys_u64(mf, mb, rhs_base, rhs_offset + 8, scratch);
      
        2746
            mf.block_mut(mb).insts.push(MachineInst {
      
        2747
                opcode: ArmOpcode::AdcReg,
      
        2748
                operands: vec![
      
        2749
                    MachineOperand::PhysReg(hi),
      
        2750
                    MachineOperand::PhysReg(hi),
      
        2751
                    MachineOperand::PhysReg(scratch),
      
        2752
                ],
      
        2753
                def: None,
      
        2754
            });
      
        2755
        }
      
        2756
        
        2757
        fn emit_i128_sub_from_slot(
      
        2758
            mf: &mut MachineFunction,
      
        2759
            mb: MBlockId,
      
        2760
            rhs_base: MachineOperand,
      
        2761
            rhs_offset: i64,
      
        2762
            lo: PhysReg,
      
        2763
            hi: PhysReg,
      
        2764
            scratch: PhysReg,
      
        2765
        ) {
      
        2766
            emit_load_phys_u64(mf, mb, rhs_base.clone(), rhs_offset, scratch);
      
        2767
            mf.block_mut(mb).insts.push(MachineInst {
      
        2768
                opcode: ArmOpcode::SubsReg,
      
        2769
                operands: vec![
      
        2770
                    MachineOperand::PhysReg(lo),
      
        2771
                    MachineOperand::PhysReg(lo),
      
        2772
                    MachineOperand::PhysReg(scratch),
      
        2773
                ],
      
        2774
                def: None,
      
        2775
            });
      
        2776
            emit_load_phys_u64(mf, mb, rhs_base, rhs_offset + 8, scratch);
      
        2777
            mf.block_mut(mb).insts.push(MachineInst {
      
        2778
                opcode: ArmOpcode::SbcReg,
      
        2779
                operands: vec![
      
        2780
                    MachineOperand::PhysReg(hi),
      
        2781
                    MachineOperand::PhysReg(hi),
      
        2782
                    MachineOperand::PhysReg(scratch),
      
        2783
                ],
      
        2784
                def: None,
      
        2785
            });
      
        2786
        }
      
        2787
        
        2788
        fn emit_i128_neg(mf: &mut MachineFunction, mb: MBlockId, lo: PhysReg, hi: PhysReg) {
      
        2789
            mf.block_mut(mb).insts.push(MachineInst {
      
        2790
                opcode: ArmOpcode::SubsReg,
      
        2791
                operands: vec![
      
        2792
                    MachineOperand::PhysReg(lo),
      
        2793
                    MachineOperand::PhysReg(PhysReg::Xzr),
      
        2794
                    MachineOperand::PhysReg(lo),
      
        2795
                ],
      
        2796
                def: None,
      
        2797
            });
      
        2798
            mf.block_mut(mb).insts.push(MachineInst {
      
        2799
                opcode: ArmOpcode::SbcReg,
      
        2800
                operands: vec![
      
        2801
                    MachineOperand::PhysReg(hi),
      
        2802
                    MachineOperand::PhysReg(PhysReg::Xzr),
      
        2803
                    MachineOperand::PhysReg(hi),
      
        2804
                ],
      
        2805
                def: None,
      
        2806
            });
      
        2807
        }
      
        2808
        
        2809
        /// Emit a constant integer using movz/movk sequence.
      
        2810
        /// Respects width: 32-bit values mask to 32 bits and only emit shifts 0/16.
      
        2811
        fn emit_const_int(
      
        2812
            mf: &mut MachineFunction,
      
        2813
            mb: MBlockId,
      
        2814
            dest: VRegId,
      
        2815
            val: i128,
      
        2816
            width: IntWidth,
      
        2817
        ) {
      
        2818
            debug_assert!(
      
        2819
                width != IntWidth::I128,
      
        2820
                "backend should reject i128 before isel"
      
        2821
            );
      
        2822
            let is_32 = matches!(width, IntWidth::I8 | IntWidth::I16 | IntWidth::I32);
      
        2823
            // Mask to the appropriate width to prevent sign-extension artifacts.
      
        2824
            let uval = if is_32 {
      
        2825
                (val as u32) as u64
      
        2826
            } else {
      
        2827
                val as u64
      
        2828
            };
      
        2829
            let max_shift = if is_32 { 2 } else { 4 }; // 2 chunks for 32-bit, 4 for 64-bit
      
        2830
        
        2831
            if uval == 0 {
      
        2832
                let zr = if is_32 { PhysReg::Wzr } else { PhysReg::Xzr };
      
        2833
                mf.block_mut(mb).insts.push(MachineInst {
      
        2834
                    opcode: ArmOpcode::MovReg,
      
        2835
                    operands: vec![MachineOperand::VReg(dest), MachineOperand::PhysReg(zr)],
      
        2836
                    def: Some(dest),
      
        2837
                });
      
        2838
                return;
      
        2839
            }
      
        2840
        
        2841
            // MOVZ for the first non-zero 16-bit chunk, MOVK for the rest.
      
        2842
            let mut first = true;
      
        2843
            for i in 0..max_shift {
      
        2844
                let shift = i * 16;
      
        2845
                let chunk = ((uval >> shift) & 0xFFFF) as u16;
      
        2846
                if chunk != 0 || (first && i == max_shift - 1) {
      
        2847
                    let opcode = if first {
      
        2848
                        ArmOpcode::Movz
      
        2849
                    } else {
      
        2850
                        ArmOpcode::Movk
      
        2851
                    };
      
        2852
                    mf.block_mut(mb).insts.push(MachineInst {
      
        2853
                        opcode,
      
        2854
                        operands: vec![
      
        2855
                            MachineOperand::VReg(dest),
      
        2856
                            MachineOperand::Imm(chunk as i64),
      
        2857
                            MachineOperand::Shift(shift as u8),
      
        2858
                        ],
      
        2859
                        def: Some(dest),
      
        2860
                    });
      
        2861
                    first = false;
      
        2862
                }
      
        2863
            }
      
        2864
        
        2865
            if first {
      
        2866
                let zr = if is_32 { PhysReg::Wzr } else { PhysReg::Xzr };
      
        2867
                mf.block_mut(mb).insts.push(MachineInst {
      
        2868
                    opcode: ArmOpcode::MovReg,
      
        2869
                    operands: vec![MachineOperand::VReg(dest), MachineOperand::PhysReg(zr)],
      
        2870
                    def: Some(dest),
      
        2871
                });
      
        2872
            }
      
        2873
        }
      
        2874
        
        2875
        /// Emit a register-register binary op.
      
        2876
        fn emit_binop(
      
        2877
            mf: &mut MachineFunction,
      
        2878
            ctx: &mut ISelCtx,
      
        2879
            mb: MBlockId,
      
        2880
            inst: &Inst,
      
        2881
            opcode: ArmOpcode,
      
        2882
            a: ValueId,
      
        2883
            b: ValueId,
      
        2884
        ) {
      
        2885
            let class = type_to_reg_class(&inst.ty);
      
        2886
            let dest = ctx.get_vreg(mf, inst.id, class);
      
        2887
            let va = ctx.lookup_vreg(a);
      
        2888
            let vb = ctx.lookup_vreg(b);
      
        2889
            mf.block_mut(mb).insts.push(MachineInst {
      
        2890
                opcode,
      
        2891
                operands: vec![
      
        2892
                    MachineOperand::VReg(dest),
      
        2893
                    MachineOperand::VReg(va),
      
        2894
                    MachineOperand::VReg(vb),
      
        2895
                ],
      
        2896
                def: Some(dest),
      
        2897
            });
      
        2898
        }
      
        2899
        
        2900
        /// Emit a NEON vector binary op. The `pick` closure resolves the
      
        2901
        /// concrete `ArmOpcode` from the result vector's lane shape — that
      
        2902
        /// keeps the per-op InstKind arms one-line.
      
        2903
        fn emit_vbinop(
      
        2904
            mf: &mut MachineFunction,
      
        2905
            ctx: &mut ISelCtx,
      
        2906
            mb: MBlockId,
      
        2907
            inst: &Inst,
      
        2908
            a: ValueId,
      
        2909
            b: ValueId,
      
        2910
            pick: impl FnOnce(VShape) -> ArmOpcode,
      
        2911
        ) {
      
        2912
            let dest = ctx.get_vreg(mf, inst.id, RegClass::V128);
      
        2913
            let va = ctx.lookup_vreg(a);
      
        2914
            let vb = ctx.lookup_vreg(b);
      
        2915
            let opcode = match VShape::from_ir(&inst.ty) {
      
        2916
                Some(s) => pick(s),
      
        2917
                None => ArmOpcode::Nop,
      
        2918
            };
      
        2919
            mf.block_mut(mb).insts.push(MachineInst {
      
        2920
                opcode,
      
        2921
                operands: vec![
      
        2922
                    MachineOperand::VReg(dest),
      
        2923
                    MachineOperand::VReg(va),
      
        2924
                    MachineOperand::VReg(vb),
      
        2925
                ],
      
        2926
                def: Some(dest),
      
        2927
            });
      
        2928
        }
      
        2929
        
        2930
        /// Emit a NEON vector unary op (one source, one result, both V128).
      
        2931
        fn emit_vunop(
      
        2932
            mf: &mut MachineFunction,
      
        2933
            ctx: &mut ISelCtx,
      
        2934
            mb: MBlockId,
      
        2935
            inst: &Inst,
      
        2936
            a: ValueId,
      
        2937
            pick: impl FnOnce(VShape) -> ArmOpcode,
      
        2938
        ) {
      
        2939
            let dest = ctx.get_vreg(mf, inst.id, RegClass::V128);
      
        2940
            let va = ctx.lookup_vreg(a);
      
        2941
            let opcode = match VShape::from_ir(&inst.ty) {
      
        2942
                Some(s) => pick(s),
      
        2943
                None => ArmOpcode::Nop,
      
        2944
            };
      
        2945
            mf.block_mut(mb).insts.push(MachineInst {
      
        2946
                opcode,
      
        2947
                operands: vec![MachineOperand::VReg(dest), MachineOperand::VReg(va)],
      
        2948
                def: Some(dest),
      
        2949
            });
      
        2950
        }
      
        2951
        
        2952
        /// Emit a float binary op, selecting single or double precision.
      
        2953
        #[allow(clippy::too_many_arguments)]
      
        2954
        fn emit_float_binop(
      
        2955
            mf: &mut MachineFunction,
      
        2956
            ctx: &mut ISelCtx,
      
        2957
            mb: MBlockId,
      
        2958
            inst: &Inst,
      
        2959
            ty: &IrType,
      
        2960
            a: ValueId,
      
        2961
            b: ValueId,
      
        2962
            op_s: ArmOpcode,
      
        2963
            op_d: ArmOpcode,
      
        2964
        ) {
      
        2965
            let (class, opcode) = match ty {
      
        2966
                IrType::Float(FloatWidth::F32) => (RegClass::Fp32, op_s),
      
        2967
                _ => (RegClass::Fp64, op_d),
      
        2968
            };
      
        2969
            let dest = ctx.get_vreg(mf, inst.id, class);
      
        2970
            let va = ctx.lookup_vreg(a);
      
        2971
            let vb = ctx.lookup_vreg(b);
      
        2972
            mf.block_mut(mb).insts.push(MachineInst {
      
        2973
                opcode,
      
        2974
                operands: vec![
      
        2975
                    MachineOperand::VReg(dest),
      
        2976
                    MachineOperand::VReg(va),
      
        2977
                    MachineOperand::VReg(vb),
      
        2978
                ],
      
        2979
                def: Some(dest),
      
        2980
            });
      
        2981
        }
      
        2982
        
        2983
        /// Map IR type to register class.
      
        2984
        /// Pick the load opcode for a value of the given IR type and reg class.
      
        2985
        /// Narrow integer types use the sign-extending byte/half loads; floats
      
        2986
        /// route to the FP-imm load; everything else falls through to `LdrImm`
      
        2987
        /// or `LdrFpImm` per reg class. The reg-class fallback matters when
      
        2988
        /// `ty` is a generic pointer or aggregate (e.g., a stack-arg copy that
      
        2989
        /// only knows the destination's register kind).
      
        2990
        fn load_opcode_for(ty: &IrType, class: RegClass) -> ArmOpcode {
      
        2991
            match ty {
      
        2992
                IrType::Int(IntWidth::I8) | IrType::Bool => ArmOpcode::LdrsbImm,
      
        2993
                IrType::Int(IntWidth::I16) => ArmOpcode::LdrshImm,
      
        2994
                IrType::Float(_) => ArmOpcode::LdrFpImm,
      
        2995
                _ => match class {
      
        2996
                    RegClass::Fp64 | RegClass::Fp32 => ArmOpcode::LdrFpImm,
      
        2997
                    RegClass::V128 => ArmOpcode::LdrQ,
      
        2998
                    RegClass::Gp32 | RegClass::Gp64 => ArmOpcode::LdrImm,
      
        2999
                },
      
        3000
            }
      
        3001
        }
      
        3002
        
        3003
        /// Mirror of `load_opcode_for` for stores. Audit CRITICAL-2: the
      
        3004
        /// `ty` here must be the *value's* declared IR type, not the pointer
      
        3005
        /// or pointee — byte-level GEPs reuse `ptr<i8>` as a generic offset
      
        3006
        /// cursor, so dispatching by pointee width would silently truncate
      
        3007
        /// non-byte stores. Pass `None` for `ty` when only the reg class is
      
        3008
        /// available; in that case the helper falls through to the class-only
      
        3009
        /// branch.
      
        3010
        fn store_opcode_for(ty: Option<&IrType>, class: RegClass) -> ArmOpcode {
      
        3011
            match ty {
      
        3012
                Some(IrType::Int(IntWidth::I8)) | Some(IrType::Bool) => ArmOpcode::StrbImm,
      
        3013
                Some(IrType::Int(IntWidth::I16)) => ArmOpcode::StrhImm,
      
        3014
                Some(IrType::Float(_)) => ArmOpcode::StrFpImm,
      
        3015
                _ => match class {
      
        3016
                    RegClass::Fp64 | RegClass::Fp32 => ArmOpcode::StrFpImm,
      
        3017
                    RegClass::V128 => ArmOpcode::StrQ,
      
        3018
                    RegClass::Gp32 | RegClass::Gp64 => ArmOpcode::StrImm,
      
        3019
                },
      
        3020
            }
      
        3021
        }
      
        3022
        
        3023
        /// Resolve an IR address value to the (base, offset) operand pair
      
        3024
        /// expected by `LdrImm`/`StrImm`-family instructions. Alloca addresses
      
        3025
        /// fold to `(FP, FrameSlot(offset))` so the assembler can pick the
      
        3026
        /// final stack-relative form; everything else becomes
      
        3027
        /// `(VReg(addr_vreg), Imm(0))`. Used by both narrow-width Load/Store
      
        3028
        /// arms in `select_inst`. The wide-i128 paths build their own operand
      
        3029
        /// pairs directly because they target the `emit_*_phys_i128_pair`
      
        3030
        /// helpers, which take `i64` offsets and only need a base operand.
      
        3031
        fn narrow_load_store_addr(
      
        3032
            ctx: &ISelCtx,
      
        3033
            addr: ValueId,
      
        3034
        ) -> (MachineOperand, MachineOperand) {
      
        3035
            if let Some(&offset) = ctx.alloca_offsets.get(&addr) {
      
        3036
                (
      
        3037
                    MachineOperand::PhysReg(PhysReg::FP),
      
        3038
                    MachineOperand::FrameSlot(offset),
      
        3039
                )
      
        3040
            } else {
      
        3041
                let base = ctx.lookup_vreg(addr);
      
        3042
                (MachineOperand::VReg(base), MachineOperand::Imm(0))
      
        3043
            }
      
        3044
        }
      
        3045
        
        3046
        /// Operation tag for `emit_i128_binop_via_slots`. Add and Sub share a
      
        3047
        /// load-binop-store skeleton that differs only in which intermediate
      
        3048
        /// helper does the arithmetic.
      
        3049
        #[derive(Clone, Copy)]
      
        3050
        enum I128BinOp {
      
        3051
            Add,
      
        3052
            Sub,
      
        3053
        }
      
        3054
        
        3055
        /// Lower an i128 IAdd/ISub: load `lhs_id`'s slot into x16/x17, run the
      
        3056
        /// matching `emit_i128_<op>_from_slot` against `rhs_id`, then store
      
        3057
        /// the result to `dest_id`'s slot. Replaces three near-identical 30-LOC
      
        3058
        /// blocks in the i128 dispatch (IAdd / ISub).
      
        3059
        fn emit_i128_binop_via_slots(
      
        3060
            mf: &mut MachineFunction,
      
        3061
            ctx: &ISelCtx,
      
        3062
            mb: MBlockId,
      
        3063
            op: I128BinOp,
      
        3064
            dest_id: ValueId,
      
        3065
            lhs_id: ValueId,
      
        3066
            rhs_id: ValueId,
      
        3067
        ) {
      
        3068
            let dest_slot = ctx.lookup_wide_slot(dest_id);
      
        3069
            let lhs_slot = ctx.lookup_wide_slot(lhs_id);
      
        3070
            let rhs_slot = ctx.lookup_wide_slot(rhs_id);
      
        3071
            let fp = || MachineOperand::PhysReg(PhysReg::FP);
      
        3072
            emit_load_phys_i128_pair(mf, mb, fp(), lhs_slot as i64, PhysReg::Gp(16), PhysReg::Gp(17));
      
        3073
            match op {
      
        3074
                I128BinOp::Add => emit_i128_add_from_slot(
      
        3075
                    mf,
      
        3076
                    mb,
      
        3077
                    fp(),
      
        3078
                    rhs_slot as i64,
      
        3079
                    PhysReg::Gp(16),
      
        3080
                    PhysReg::Gp(17),
      
        3081
                    PhysReg::Gp(8),
      
        3082
                ),
      
        3083
                I128BinOp::Sub => emit_i128_sub_from_slot(
      
        3084
                    mf,
      
        3085
                    mb,
      
        3086
                    fp(),
      
        3087
                    rhs_slot as i64,
      
        3088
                    PhysReg::Gp(16),
      
        3089
                    PhysReg::Gp(17),
      
        3090
                    PhysReg::Gp(8),
      
        3091
                ),
      
        3092
            }
      
        3093
            emit_store_phys_i128_pair(mf, mb, fp(), dest_slot as i64, PhysReg::Gp(16), PhysReg::Gp(17));
      
        3094
        }
      
        3095
        
        3096
        fn type_to_reg_class(ty: &IrType) -> RegClass {
      
        3097
            match ty {
      
        3098
                IrType::Float(FloatWidth::F32) => RegClass::Fp32,
      
        3099
                IrType::Float(FloatWidth::F64) => RegClass::Fp64,
      
        3100
                IrType::Vector { .. } => RegClass::V128,
      
        3101
                IrType::Int(IntWidth::I8)
      
        3102
                | IrType::Int(IntWidth::I16)
      
        3103
                | IrType::Int(IntWidth::I32)
      
        3104
                | IrType::Bool => RegClass::Gp32,
      
        3105
                _ => RegClass::Gp64,
      
        3106
            }
      
        3107
        }
      
        3108
        
        3109
        /// Vector lane shape for NEON opcode dispatch.
      
        3110
        #[derive(Debug, Clone, Copy, PartialEq, Eq)]
      
        3111
        enum VShape {
      
        3112
            /// 4 × i32
      
        3113
            V4S,
      
        3114
            /// 2 × i64
      
        3115
            V2D,
      
        3116
            /// 4 × f32
      
        3117
            F4S,
      
        3118
            /// 2 × f64
      
        3119
            F2D,
      
        3120
        }
      
        3121
        
        3122
        impl VShape {
      
        3123
            fn from_ir(ty: &IrType) -> Option<Self> {
      
        3124
                let (lanes, elem) = ty.vector_shape()?;
      
        3125
                match (lanes, elem) {
      
        3126
                    (4, IrType::Int(IntWidth::I32)) => Some(Self::V4S),
      
        3127
                    (2, IrType::Int(IntWidth::I64)) => Some(Self::V2D),
      
        3128
                    (4, IrType::Float(FloatWidth::F32)) => Some(Self::F4S),
      
        3129
                    (2, IrType::Float(FloatWidth::F64)) => Some(Self::F2D),
      
        3130
                    _ => None,
      
        3131
                }
      
        3132
            }
      
        3133
        
        3134
            fn is_float(self) -> bool {
      
        3135
                matches!(self, Self::F4S | Self::F2D)
      
        3136
            }
      
        3137
        }
      
        3138
        
        3139
        fn needs_wide_icmp_operand(ty: Option<&IrType>, other_ty: Option<&IrType>) -> bool {
      
        3140
            matches!(
      
        3141
                (ty, other_ty),
      
        3142
                (
      
        3143
                    Some(IrType::Int(IntWidth::I64) | IrType::Ptr(_) | IrType::FuncPtr(_)),
      
        3144
                    Some(_)
      
        3145
                ) | (
      
        3146
                    Some(_),
      
        3147
                    Some(IrType::Int(IntWidth::I64) | IrType::Ptr(_) | IrType::FuncPtr(_))
      
        3148
                )
      
        3149
            )
      
        3150
        }
      
        3151
        
        3152
        fn zero_extend_cmp_type(ty: Option<&IrType>) -> bool {
      
        3153
            matches!(ty, Some(IrType::Bool))
      
        3154
        }
      
        3155
        
        3156
        fn icmp_operand_vreg(
      
        3157
            mf: &mut MachineFunction,
      
        3158
            ctx: &mut ISelCtx,
      
        3159
            mb: MBlockId,
      
        3160
            func: &Function,
      
        3161
            value: ValueId,
      
        3162
            other: ValueId,
      
        3163
        ) -> VRegId {
      
        3164
            let value_ty = func.value_type(value);
      
        3165
            let other_ty = func.value_type(other);
      
        3166
            let src = ctx.lookup_vreg(value);
      
        3167
        
        3168
            if !needs_wide_icmp_operand(value_ty.as_ref(), other_ty.as_ref()) {
      
        3169
                return src;
      
        3170
            }
      
        3171
        
        3172
            if matches!(
      
        3173
                value_ty,
      
        3174
                Some(IrType::Int(IntWidth::I64) | IrType::Ptr(_) | IrType::FuncPtr(_))
      
        3175
            ) {
      
        3176
                return src;
      
        3177
            }
      
        3178
        
        3179
            let dest = mf.new_vreg(RegClass::Gp64);
      
        3180
            let opcode = if zero_extend_cmp_type(value_ty.as_ref()) {
      
        3181
                ArmOpcode::MovReg
      
        3182
            } else {
      
        3183
                ArmOpcode::Sxtw
      
        3184
            };
      
        3185
            mf.block_mut(mb).insts.push(MachineInst {
      
        3186
                opcode,
      
        3187
                operands: vec![MachineOperand::VReg(dest), MachineOperand::VReg(src)],
      
        3188
                def: Some(dest),
      
        3189
            });
      
        3190
            dest
      
        3191
        }
      
        3192
        
        3193
        fn machine_vreg_class(mf: &MachineFunction, vreg: VRegId) -> RegClass {
      
        3194
            mf.vregs
      
        3195
                .iter()
      
        3196
                .find(|r| r.id == vreg)
      
        3197
                .map(|r| r.class)
      
        3198
                .expect("isel: vreg not registered")
      
        3199
        }
      
        3200
        
        3201
        fn coerce_select_operand_vreg(
      
        3202
            mf: &mut MachineFunction,
      
        3203
            ctx: &mut ISelCtx,
      
        3204
            mb: MBlockId,
      
        3205
            func: &Function,
      
        3206
            value: ValueId,
      
        3207
            target_ty: &IrType,
      
        3208
        ) -> VRegId {
      
        3209
            let src = ctx.lookup_vreg(value);
      
        3210
            let src_class = machine_vreg_class(mf, src);
      
        3211
            let target_class = type_to_reg_class(target_ty);
      
        3212
            if src_class == target_class {
      
        3213
                return src;
      
        3214
            }
      
        3215
        
        3216
            let dest = mf.new_vreg(target_class);
      
        3217
            let src_ty = func.value_type(value);
      
        3218
            let opcode = match (src_class, target_class) {
      
        3219
                (RegClass::Gp32, RegClass::Gp64) => {
      
        3220
                    if matches!(target_ty, IrType::Ptr(_) | IrType::FuncPtr(_))
      
        3221
                        || zero_extend_cmp_type(src_ty.as_ref())
      
        3222
                    {
      
        3223
                        ArmOpcode::MovReg
      
        3224
                    } else {
      
        3225
                        match src_ty.as_ref() {
      
        3226
                            Some(IrType::Int(IntWidth::I8)) => ArmOpcode::Sxtb,
      
        3227
                            Some(IrType::Int(IntWidth::I16)) => ArmOpcode::Sxth,
      
        3228
                            Some(IrType::Int(IntWidth::I32)) | Some(IrType::Bool) => ArmOpcode::Sxtw,
      
        3229
                            _ => ArmOpcode::MovReg,
      
        3230
                        }
      
        3231
                    }
      
        3232
                }
      
        3233
                (RegClass::Gp64, RegClass::Gp32) => ArmOpcode::MovReg,
      
        3234
                (RegClass::Fp32, RegClass::Fp64) => ArmOpcode::FcvtDS,
      
        3235
                (RegClass::Fp64, RegClass::Fp32) => ArmOpcode::FcvtSD,
      
        3236
                _ => ArmOpcode::MovReg,
      
        3237
            };
      
        3238
        
        3239
            mf.block_mut(mb).insts.push(MachineInst {
      
        3240
                opcode,
      
        3241
                operands: vec![MachineOperand::VReg(dest), MachineOperand::VReg(src)],
      
        3242
                def: Some(dest),
      
        3243
            });
      
        3244
            dest
      
        3245
        }
      
        3246
        
        3247
        fn int_width_class(w: &IntWidth) -> RegClass {
      
        3248
            match w {
      
        3249
                IntWidth::I64 => RegClass::Gp64,
      
        3250
                _ => RegClass::Gp32,
      
        3251
            }
      
        3252
        }
      
        3253
        
        3254
        fn float_width_class(w: &FloatWidth) -> RegClass {
      
        3255
            match w {
      
        3256
                FloatWidth::F32 => RegClass::Fp32,
      
        3257
                FloatWidth::F64 => RegClass::Fp64,
      
        3258
            }
      
        3259
        }
      
        3260
        
        3261
        /// Map IR comparison op to ARM64 condition code (for integer CMP).
      
        3262
        /// Pre-scan a function to find ICmp/FCmp → Select fusion candidates.
      
        3263
        ///
      
        3264
        /// An ICmp/FCmp is a fusion candidate when:
      
        3265
        /// 1. Its result is used exactly once in the entire function.
      
        3266
        /// 2. That single use is a `Select` instruction in the same block.
      
        3267
        /// 3. No intervening instruction between the ICmp and the Select in
      
        3268
        ///    that block clobbers NZCV flags (another ICmp/FCmp or a Call).
      
        3269
        ///
      
        3270
        /// For candidates, we suppress CSET during ICmp lowering and store
      
        3271
        /// the ARM condition in `ctx.fused_arm_cond` so the Select can pick
      
        3272
        /// it up and emit `CSEL dest, tv, fv, <cond>` directly.
      
        3273
        fn compute_csel_fusion(func: &Function, ctx: &mut ISelCtx) {
      
        3274
            // Build global use counts.
      
        3275
            let mut use_count: HashMap<ValueId, u32> = HashMap::new();
      
        3276
            for block in &func.blocks {
      
        3277
                for inst in &block.insts {
      
        3278
                    for vid in crate::ir::walk::inst_uses(&inst.kind) {
      
        3279
                        *use_count.entry(vid).or_insert(0) += 1;
      
        3280
                    }
      
        3281
                }
      
        3282
                if let Some(term) = &block.terminator {
      
        3283
                    for vid in crate::ir::walk::terminator_uses(term) {
      
        3284
                        *use_count.entry(vid).or_insert(0) += 1;
      
        3285
                    }
      
        3286
                }
      
        3287
            }
      
        3288
        
        3289
            // Build a map of ValueId → the block that defines it (instruction defs only).
      
        3290
            let mut def_block: HashMap<ValueId, BlockId> = HashMap::new();
      
        3291
            for block in &func.blocks {
      
        3292
                for inst in &block.insts {
      
        3293
                    def_block.insert(inst.id, block.id);
      
        3294
                }
      
        3295
            }
      
        3296
        
        3297
            // Per-block scan: walk instructions in order, tracking the most
      
        3298
            // recent ICmp/FCmp that hasn't been consumed by a Select yet.
      
        3299
            // Any flag-clobbering instruction (another ICmp/FCmp, a call)
      
        3300
            // resets the pending set.
      
        3301
            for block in &func.blocks {
      
        3302
                // The most recently emitted CMP that hasn't been consumed.
      
        3303
                // We use a Vec so that `pending = {last_icmp}` is O(1) to update.
      
        3304
                let mut pending: Option<ValueId> = None;
      
        3305
        
        3306
                for inst in &block.insts {
      
        3307
                    match &inst.kind {
      
        3308
                        InstKind::ICmp(op, _, _) => {
      
        3309
                            if crate::ir::walk::inst_uses(&inst.kind)
      
        3310
                                .into_iter()
      
        3311
                                .filter_map(|vid| func.value_type(vid))
      
        3312
                                .any(|ty| matches!(ty, IrType::Int(IntWidth::I128)))
      
        3313
                            {
      
        3314
                                pending = None;
      
        3315
                                ctx.fused_arm_cond.remove(&inst.id);
      
        3316
                                continue;
      
        3317
                            }
      
        3318
                            // New CMP overwrites NZCV — previous pending is no longer valid.
      
        3319
                            pending = Some(inst.id);
      
        3320
                            // Temporarily store the arm cond so we can retrieve it when
      
        3321
                            // we confirm the Select is the sole user.
      
        3322
                            ctx.fused_arm_cond.insert(inst.id, cmp_to_arm_cond(*op));
      
        3323
                        }
      
        3324
                        InstKind::FCmp(op, _, _) => {
      
        3325
                            pending = Some(inst.id);
      
        3326
                            ctx.fused_arm_cond.insert(inst.id, fcmp_to_arm_cond(*op));
      
        3327
                        }
      
        3328
                        InstKind::Select(cond, _, _) => {
      
        3329
                            if let Some(p) = pending {
      
        3330
                                if p == *cond
      
        3331
                                    && use_count.get(cond) == Some(&1)
      
        3332
                                    && def_block.get(cond) == Some(&block.id)
      
        3333
                                {
      
        3334
                                    // Confirmed: fuse this ICmp into the Select.
      
        3335
                                    ctx.select_fused.insert(*cond);
      
        3336
                                    pending = None;
      
        3337
                                } else {
      
        3338
                                    // The Select isel for an unfused cond emits
      
        3339
                                    // its own `cmp cond_reg, #0` to set NZCV,
      
        3340
                                    // which clobbers any pending fused ICmp's
      
        3341
                                    // flags.  Drop the pending so a later Select
      
        3342
                                    // doesn't try to read stale flags.
      
        3343
                                    pending = None;
      
        3344
                                }
      
        3345
                            }
      
        3346
                        }
      
        3347
                        // Calls may clobber NZCV (per AAPCS64, flags are not preserved).
      
        3348
                        InstKind::Call(_, _) | InstKind::RuntimeCall(_, _) => {
      
        3349
                            pending = None;
      
        3350
                        }
      
        3351
                        _ => {}
      
        3352
                    }
      
        3353
                }
      
        3354
        
        3355
                // Clean up fused_arm_cond for ICmps that turned out NOT to be fused
      
        3356
                // (e.g., they had use_count > 1, or were never consumed by a Select).
      
        3357
                // Leave only the fused ones.
      
        3358
                //
      
        3359
                // We delay cleanup to after all blocks are scanned because the same
      
        3360
                // ValueId can't appear in multiple blocks (SSA), so there's no cross-
      
        3361
                // block confusion.
      
        3362
            }
      
        3363
        
        3364
            // Remove arm_cond entries for non-fused ICmps.
      
        3365
            ctx.fused_arm_cond
      
        3366
                .retain(|vid, _| ctx.select_fused.contains(vid));
      
        3367
        }
      
        3368
        
        3369
        fn cmp_to_arm_cond(op: CmpOp) -> ArmCond {
      
        3370
            match op {
      
        3371
                CmpOp::Eq => ArmCond::Eq,
      
        3372
                CmpOp::Ne => ArmCond::Ne,
      
        3373
                CmpOp::Lt => ArmCond::Lt,
      
        3374
                CmpOp::Le => ArmCond::Le,
      
        3375
                CmpOp::Gt => ArmCond::Gt,
      
        3376
                CmpOp::Ge => ArmCond::Ge,
      
        3377
            }
      
        3378
        }
      
        3379
        
        3380
        fn i128_ordered_conds(op: CmpOp) -> (ArmCond, ArmCond) {
      
        3381
            match op {
      
        3382
                CmpOp::Lt => (ArmCond::Lt, ArmCond::Lo),
      
        3383
                CmpOp::Le => (ArmCond::Lt, ArmCond::Ls),
      
        3384
                CmpOp::Gt => (ArmCond::Gt, ArmCond::Hi),
      
        3385
                CmpOp::Ge => (ArmCond::Gt, ArmCond::Hs),
      
        3386
                _ => panic!("ordered i128 compare requires lt/le/gt/ge, got {:?}", op),
      
        3387
            }
      
        3388
        }
      
        3389
        
        3390
        /// Map IR comparison op to ARM64 condition code (for float FCMP).
      
        3391
        fn fcmp_to_arm_cond(op: CmpOp) -> ArmCond {
      
        3392
            match op {
      
        3393
                CmpOp::Eq => ArmCond::Eq,
      
        3394
                CmpOp::Ne => ArmCond::Ne,
      
        3395
                CmpOp::Lt => ArmCond::Mi, // minus flag for less-than
      
        3396
                CmpOp::Le => ArmCond::Ls, // unsigned LE maps to float LE
      
        3397
                CmpOp::Gt => ArmCond::Gt,
      
        3398
                CmpOp::Ge => ArmCond::Ge,
      
        3399
            }
      
        3400
        }
      
        3401
        
        3402
        /// Compute allocation size for an IR type.
      
        3403
        fn alloca_size(ty: &IrType) -> u32 {
      
        3404
            match ty {
      
        3405
                IrType::Void => 0,
      
        3406
                IrType::Bool => 4, // use 4 bytes for alignment
      
        3407
                IrType::Int(w) => w.bytes(),
      
        3408
                IrType::Float(w) => w.bytes(),
      
        3409
                IrType::Ptr(_) => 8,
      
        3410
                IrType::Array(elem, count) => {
      
        3411
                    // Stack storage uses ABI-sized elements. Fortran LOGICAL arrays are
      
        3412
                    // stored as default-kind 4-byte elements, even though Bool SSA
      
        3413
                    // values themselves remain byte-sized.
      
        3414
                    let elem_size = match elem.as_ref() {
      
        3415
                        IrType::Bool => 4,
      
        3416
                        IrType::Struct(_) => alloca_size(elem),
      
        3417
                        _ => elem.size_bytes() as u32,
      
        3418
                    };
      
        3419
                    elem_size * (*count as u32)
      
        3420
                }
      
        3421
                IrType::FuncPtr(_) => 8,
      
        3422
                IrType::Struct(_) => 8, // placeholder
      
        3423
                IrType::Vector { .. } => 16, // 128-bit NEON
      
        3424
            }
      
        3425
        }
      
        3426
        
        3427
        /// Get the symbol name for a runtime function.
      
        3428
        /// Get the C-level symbol name for a runtime function.
      
        3429
        /// The emitter adds the Mach-O `_` prefix when emitting assembly.
      
        3430
        fn runtime_func_symbol(rf: &RuntimeFunc, args: &[(ValueId, AbiArgLoc, IrType)]) -> String {
      
        3431
            match rf {
      
        3432
                RuntimeFunc::PrintInt => {
      
        3433
                    if args
      
        3434
                        .first()
      
        3435
                        .is_some_and(|(_, _, ty)| matches!(ty, IrType::Int(IntWidth::I128)))
      
        3436
                    {
      
        3437
                        "afs_print_int128".into()
      
        3438
                    } else if args
      
        3439
                        .first()
      
        3440
                        .is_some_and(|(_, _, ty)| matches!(ty, IrType::Int(IntWidth::I64)))
      
        3441
                    {
      
        3442
                        "afs_print_int64".into()
      
        3443
                    } else {
      
        3444
                        "afs_print_int".into()
      
        3445
                    }
      
        3446
                }
      
        3447
                RuntimeFunc::PrintReal => "afs_print_real".into(),
      
        3448
                RuntimeFunc::PrintString => "afs_print_string".into(),
      
        3449
                RuntimeFunc::PrintLogical => "afs_print_logical".into(),
      
        3450
                RuntimeFunc::PrintNewline => "afs_print_newline".into(),
      
        3451
                RuntimeFunc::Allocate => "afs_allocate".into(),
      
        3452
                RuntimeFunc::Deallocate => "afs_deallocate".into(),
      
        3453
                RuntimeFunc::StringConcat => "afs_string_concat".into(),
      
        3454
                RuntimeFunc::StringCopy => "afs_string_copy".into(),
      
        3455
                RuntimeFunc::StringCompare => "afs_string_compare".into(),
      
        3456
                RuntimeFunc::Stop => "afs_stop".into(),
      
        3457
                RuntimeFunc::ErrorStop => "afs_error_stop".into(),
      
        3458
                RuntimeFunc::CheckBounds => "afs_check_bounds".into(),
      
        3459
            }
      
        3460
        }
      
        3461
        
        3462
        #[cfg(test)]
      
        3463
        mod tests {
      
        3464
            use super::*;
      
        3465
            use crate::ir::builder::FuncBuilder;
      
        3466
        
        3467
            fn select_simple(build: impl FnOnce(&mut FuncBuilder)) -> MachineFunction {
      
        3468
                let mut func = Function::new("test".into(), vec![], IrType::Void);
      
        3469
                {
      
        3470
                    let mut b = FuncBuilder::new(&mut func);
      
        3471
                    build(&mut b);
      
        3472
                }
      
        3473
                select_function(&func)
      
        3474
            }
      
        3475
        
        3476
            #[test]
      
        3477
            fn select_const_int() {
      
        3478
                let mf = select_simple(|b| {
      
        3479
                    b.const_i32(42);
      
        3480
                    b.ret_void();
      
        3481
                });
      
        3482
                let insts = &mf.blocks[0].insts;
      
        3483
                // Should have: prologue (STP, MOV), MOVZ #42, epilogue (LDP, RET).
      
        3484
                assert!(insts.iter().any(|i| i.opcode == ArmOpcode::Movz));
      
        3485
            }
      
        3486
        
        3487
            #[test]
      
        3488
            fn select_iadd() {
      
        3489
                let mf = select_simple(|b| {
      
        3490
                    let x = b.const_i32(10);
      
        3491
                    let y = b.const_i32(20);
      
        3492
                    let _z = b.iadd(x, y);
      
        3493
                    b.ret_void();
      
        3494
                });
      
        3495
                assert!(mf.blocks[0]
      
        3496
                    .insts
      
        3497
                    .iter()
      
        3498
                    .any(|i| i.opcode == ArmOpcode::AddReg));
      
        3499
            }
      
        3500
        
        3501
            #[test]
      
        3502
            fn select_icmp() {
      
        3503
                // ICmp whose result is NOT fed into a Select → CSET must appear.
      
        3504
                let mf = select_simple(|b| {
      
        3505
                    let x = b.const_i32(5);
      
        3506
                    let y = b.const_i32(10);
      
        3507
                    let _c = b.icmp(CmpOp::Lt, x, y);
      
        3508
                    b.ret_void();
      
        3509
                });
      
        3510
                assert!(mf.blocks[0]
      
        3511
                    .insts
      
        3512
                    .iter()
      
        3513
                    .any(|i| i.opcode == ArmOpcode::CmpReg));
      
        3514
                assert!(mf.blocks[0]
      
        3515
                    .insts
      
        3516
                    .iter()
      
        3517
                    .any(|i| i.opcode == ArmOpcode::Cset));
      
        3518
            }
      
        3519
        
        3520
            #[test]
      
        3521
            fn select_i128_icmp_eq_combines_limb_results() {
      
        3522
                let mf = select_simple(|b| {
      
        3523
                    let x = b.const_i128(1);
      
        3524
                    let y = b.const_i128(1);
      
        3525
                    let _c = b.icmp(CmpOp::Eq, x, y);
      
        3526
                    b.ret_void();
      
        3527
                });
      
        3528
                let insts = &mf.blocks[0].insts;
      
        3529
                assert!(
      
        3530
                    insts
      
        3531
                        .iter()
      
        3532
                        .filter(|i| i.opcode == ArmOpcode::CmpReg)
      
        3533
                        .count()
      
        3534
                        >= 2
      
        3535
                );
      
        3536
                assert!(insts.iter().filter(|i| i.opcode == ArmOpcode::Cset).count() >= 2);
      
        3537
                assert!(insts.iter().any(|i| i.opcode == ArmOpcode::AndReg));
      
        3538
            }
      
        3539
        
        3540
            #[test]
      
        3541
            fn select_i128_icmp_lt_uses_high_signed_and_low_unsigned_conds() {
      
        3542
                let mf = select_simple(|b| {
      
        3543
                    let x = b.const_i128(1);
      
        3544
                    let y = b.const_i128(2);
      
        3545
                    let _c = b.icmp(CmpOp::Lt, x, y);
      
        3546
                    b.ret_void();
      
        3547
                });
      
        3548
                let insts = &mf.blocks[0].insts;
      
        3549
                assert!(
      
        3550
                    insts
      
        3551
                        .iter()
      
        3552
                        .filter(|i| i.opcode == ArmOpcode::CmpReg)
      
        3553
                        .count()
      
        3554
                        >= 2
      
        3555
                );
      
        3556
                assert!(insts.iter().filter(|i| i.opcode == ArmOpcode::Cset).count() >= 3);
      
        3557
                assert!(insts.iter().any(|i| i.opcode == ArmOpcode::AndReg));
      
        3558
                assert!(insts.iter().any(|i| i.opcode == ArmOpcode::OrrReg));
      
        3559
            }
      
        3560
        
        3561
            #[test]
      
        3562
            fn select_i128_uses_pair_csel_ops() {
      
        3563
                let mf = select_simple(|b| {
      
        3564
                    let cond = b.const_bool(true);
      
        3565
                    let x = b.const_i128(1);
      
        3566
                    let y = b.const_i128(2);
      
        3567
                    let _s = b.select(cond, x, y);
      
        3568
                    b.ret_void();
      
        3569
                });
      
        3570
                let insts = &mf.blocks[0].insts;
      
        3571
                assert!(insts.iter().any(|i| i.opcode == ArmOpcode::CmpImm));
      
        3572
                assert_eq!(
      
        3573
                    insts
      
        3574
                        .iter()
      
        3575
                        .filter(|i| i.opcode == ArmOpcode::CselReg)
      
        3576
                        .count(),
      
        3577
                    2,
      
        3578
                    "wide i128 selects should lower with one CSEL per limb"
      
        3579
                );
      
        3580
            }
      
        3581
        
        3582
            #[test]
      
        3583
            fn select_coerces_mixed_gp_widths_before_csel() {
      
        3584
                let mf = select_simple(|b| {
      
        3585
                    let cond = b.const_bool(true);
      
        3586
                    let wide = b.const_i64(7);
      
        3587
                    let narrow = b.const_i32(-1);
      
        3588
                    let _s = b.select(cond, wide, narrow);
      
        3589
                    b.ret_void();
      
        3590
                });
      
        3591
                let csel = mf.blocks[0]
      
        3592
                    .insts
      
        3593
                    .iter()
      
        3594
                    .find(|i| i.opcode == ArmOpcode::CselReg)
      
        3595
                    .expect("expected CSEL for mixed-width select");
      
        3596
                for operand in csel.operands.iter().take(3) {
      
        3597
                    let MachineOperand::VReg(vreg) = operand else {
      
        3598
                        continue;
      
        3599
                    };
      
        3600
                    assert_eq!(
      
        3601
                        machine_vreg_class(&mf, *vreg),
      
        3602
                        RegClass::Gp64,
      
        3603
                        "mixed-width select operands should be coerced to the result width before CSEL"
      
        3604
                    );
      
        3605
                }
      
        3606
            }
      
        3607
        
        3608
            #[test]
      
        3609
            fn csel_fusion_eliminates_cset_and_extra_cmp() {
      
        3610
                // ICmp used solely by a Select → CSET and CMP cond, #0 must NOT appear.
      
        3611
                // Only CmpReg + CselReg should be present.
      
        3612
                let mf = select_simple(|b| {
      
        3613
                    let x = b.const_i32(5);
      
        3614
                    let y = b.const_i32(10);
      
        3615
                    let c = b.icmp(CmpOp::Le, x, y); // use_count[c] = 1, only in Select
      
        3616
                    let _s = b.select(c, x, y);
      
        3617
                    b.ret_void();
      
        3618
                });
      
        3619
                let insts = &mf.blocks[0].insts;
      
        3620
                // Must have a CMP to set flags.
      
        3621
                assert!(
      
        3622
                    insts.iter().any(|i| i.opcode == ArmOpcode::CmpReg),
      
        3623
                    "expected CmpReg for ICmp"
      
        3624
                );
      
        3625
                // Must have CSEL to select the value.
      
        3626
                assert!(
      
        3627
                    insts.iter().any(|i| i.opcode == ArmOpcode::CselReg),
      
        3628
                    "expected CselReg for Select"
      
        3629
                );
      
        3630
                // Must NOT have CSET (ICmp boolean materialization is suppressed).
      
        3631
                assert!(
      
        3632
                    !insts.iter().any(|i| i.opcode == ArmOpcode::Cset),
      
        3633
                    "CSET should be suppressed when ICmp feeds only a Select"
      
        3634
                );
      
        3635
                // Must NOT have a second CmpImm (CMP cond, #0 is suppressed).
      
        3636
                assert!(
      
        3637
                    !insts.iter().any(|i| i.opcode == ArmOpcode::CmpImm),
      
        3638
                    "CMP cond,#0 should be suppressed when CSEL uses flags directly"
      
        3639
                );
      
        3640
            }
      
        3641
        
        3642
            #[test]
      
        3643
            fn csel_no_fusion_when_icmp_has_multiple_uses() {
      
        3644
                // ICmp used by both a Select and another instruction → CSET is kept.
      
        3645
                let mf = select_simple(|b| {
      
        3646
                    let x = b.const_i32(5);
      
        3647
                    let y = b.const_i32(10);
      
        3648
                    let c = b.icmp(CmpOp::Le, x, y); // use_count[c] = 2
      
        3649
                    let _s = b.select(c, x, y);
      
        3650
                    // Also use `c` in a logical NOT to force a second use.
      
        3651
                    let _n = b.not(c);
      
        3652
                    b.ret_void();
      
        3653
                });
      
        3654
                let insts = &mf.blocks[0].insts;
      
        3655
                // CSET must still be emitted because `c` has multiple uses.
      
        3656
                assert!(
      
        3657
                    insts.iter().any(|i| i.opcode == ArmOpcode::Cset),
      
        3658
                    "CSET should remain when ICmp has multiple uses"
      
        3659
                );
      
        3660
            }
      
        3661
        
        3662
            #[test]
      
        3663
            fn select_fadd() {
      
        3664
                let mf = select_simple(|b| {
      
        3665
                    let x = b.const_f64(1.0);
      
        3666
                    let y = b.const_f64(2.0);
      
        3667
                    let _z = b.fadd(x, y);
      
        3668
                    b.ret_void();
      
        3669
                });
      
        3670
                assert!(mf.blocks[0]
      
        3671
                    .insts
      
        3672
                    .iter()
      
        3673
                    .any(|i| i.opcode == ArmOpcode::FaddD));
      
        3674
            }
      
        3675
        
        3676
            #[test]
      
        3677
            fn select_alloca_and_store() {
      
        3678
                let mf = select_simple(|b| {
      
        3679
                    let addr = b.alloca(IrType::Int(IntWidth::I32));
      
        3680
                    let val = b.const_i32(42);
      
        3681
                    b.store(val, addr);
      
        3682
                    b.ret_void();
      
        3683
                });
      
        3684
                // Should have SubImm (address materialization from FP) and StrImm.
      
        3685
                assert!(mf.blocks[0]
      
        3686
                    .insts
      
        3687
                    .iter()
      
        3688
                    .any(|i| i.opcode == ArmOpcode::SubImm));
      
        3689
                assert!(mf.blocks[0]
      
        3690
                    .insts
      
        3691
                    .iter()
      
        3692
                    .any(|i| i.opcode == ArmOpcode::StrImm));
      
        3693
            }
      
        3694
        
        3695
            #[test]
      
        3696
            fn select_branch() {
      
        3697
                let mf = select_simple(|b| {
      
        3698
                    let cond = b.const_bool(true);
      
        3699
                    let bb_t = b.create_block("then");
      
        3700
                    let bb_f = b.create_block("else");
      
        3701
                    b.cond_branch(cond, bb_t, vec![], bb_f, vec![]);
      
        3702
        
        3703
                    b.set_block(bb_t);
      
        3704
                    b.ret_void();
      
        3705
                    b.set_block(bb_f);
      
        3706
                    b.ret_void();
      
        3707
                });
      
        3708
                // Entry block should have CmpImm + BCond + B.
      
        3709
                assert!(mf.blocks[0]
      
        3710
                    .insts
      
        3711
                    .iter()
      
        3712
                    .any(|i| i.opcode == ArmOpcode::BCond));
      
        3713
            }
      
        3714
        
        3715
            #[test]
      
        3716
            fn select_call() {
      
        3717
                let mf = select_simple(|b| {
      
        3718
                    b.runtime_call(crate::ir::inst::RuntimeFunc::PrintInt, vec![], IrType::Void);
      
        3719
                    b.ret_void();
      
        3720
                });
      
        3721
                assert!(mf.blocks[0].insts.iter().any(|i| i.opcode == ArmOpcode::Bl));
      
        3722
            }
      
        3723
        
        3724
            #[test]
      
        3725
            fn select_call_arg_from_later_block_alloca_has_preallocated_vreg() {
      
        3726
                let mut func = Function::new("test".into(), vec![], IrType::Void);
      
        3727
                {
      
        3728
                    let mut b = FuncBuilder::new(&mut func);
      
        3729
                    let use_block = b.create_block("use");
      
        3730
                    let def_block = b.create_block("def");
      
        3731
        
        3732
                    b.branch(def_block, vec![]);
      
        3733
        
        3734
                    b.set_block(use_block);
      
        3735
                    let dummy = b.const_i64(7);
      
        3736
                    b.call(
      
        3737
                        FuncRef::External("_callee".into()),
      
        3738
                        vec![dummy],
      
        3739
                        IrType::Void,
      
        3740
                    );
      
        3741
                    b.ret_void();
      
        3742
        
        3743
                    b.set_block(def_block);
      
        3744
                    let slot = b.alloca(IrType::Ptr(Box::new(IrType::Int(IntWidth::I8))));
      
        3745
                    b.call(
      
        3746
                        FuncRef::External("_callee".into()),
      
        3747
                        vec![slot],
      
        3748
                        IrType::Void,
      
        3749
                    );
      
        3750
                    b.branch(use_block, vec![]);
      
        3751
                }
      
        3752
        
        3753
                let mf = select_function(&func);
      
        3754
                assert!(
      
        3755
                    mf.blocks.iter().any(|block| {
      
        3756
                        block.insts.iter().any(|inst| {
      
        3757
                            inst.opcode == ArmOpcode::SubImm
      
        3758
                                && matches!(inst.operands.first(), Some(MachineOperand::VReg(_)))
      
        3759
                        })
      
        3760
                    }),
      
        3761
                    "alloca address should materialize into a preallocated vreg",
      
        3762
                );
      
        3763
                assert!(
      
        3764
                    mf.blocks
      
        3765
                        .iter()
      
        3766
                        .flat_map(|block| block.insts.iter())
      
        3767
                        .filter(|inst| inst.opcode == ArmOpcode::Bl)
      
        3768
                        .count()
      
        3769
                        >= 2,
      
        3770
                    "both calls should lower successfully without an unmapped alloca arg vreg",
      
        3771
                );
      
        3772
            }
      
        3773
        
        3774
            #[test]
      
        3775
            fn select_i128_runtime_print_uses_wide_symbol_and_pair_regs() {
      
        3776
                let mf = select_simple(|b| {
      
        3777
                    let wide = b.const_i128(170141183460469231731687303715884105727i128);
      
        3778
                    b.runtime_call(
      
        3779
                        crate::ir::inst::RuntimeFunc::PrintInt,
      
        3780
                        vec![wide],
      
        3781
                        IrType::Void,
      
        3782
                    );
      
        3783
                    b.ret_void();
      
        3784
                });
      
        3785
                let asm = crate::codegen::emit::emit_function(&mf);
      
        3786
                assert!(
      
        3787
                    asm.contains("bl _afs_print_int128"),
      
        3788
                    "runtime i128 print should call the wide symbol:\n{}",
      
        3789
                    asm
      
        3790
                );
      
        3791
                assert!(
      
        3792
                    asm.contains("ldp x0, x1"),
      
        3793
                    "runtime i128 print should marshal the value through the pair-register ABI:\n{}",
      
        3794
                    asm
      
        3795
                );
      
        3796
            }
      
        3797
        
        3798
            #[test]
      
        3799
            fn prologue_and_epilogue() {
      
        3800
                let mf = select_simple(|b| {
      
        3801
                    b.ret_void();
      
        3802
                });
      
        3803
                let insts = &mf.blocks[0].insts;
      
        3804
                assert_eq!(
      
        3805
                    insts[0].opcode,
      
        3806
                    ArmOpcode::StpPre,
      
        3807
                    "first inst should be STP (prologue)"
      
        3808
                );
      
        3809
                assert_eq!(
      
        3810
                    insts[1].opcode,
      
        3811
                    ArmOpcode::AddImm,
      
        3812
                    "second inst should be ADD FP, SP, #offset"
      
        3813
                );
      
        3814
                assert!(
      
        3815
                    insts.iter().any(|i| i.opcode == ArmOpcode::Ret),
      
        3816
                    "should have RET"
      
        3817
                );
      
        3818
            }
      
        3819
        
        3820
            #[test]
      
        3821
            fn const_zero_uses_zr() {
      
        3822
                let mf = select_simple(|b| {
      
        3823
                    b.const_i32(0);
      
        3824
                    b.ret_void();
      
        3825
                });
      
        3826
                // const_i32(0) should use MOV dest, WZR (32-bit zero register).
      
        3827
                let insts = &mf.blocks[0].insts;
      
        3828
                let has_mov_zr = insts.iter().any(|i| {
      
        3829
                    i.opcode == ArmOpcode::MovReg
      
        3830
                        && i.operands.iter().any(|o| {
      
        3831
                            matches!(
      
        3832
                                o,
      
        3833
                                MachineOperand::PhysReg(PhysReg::Xzr)
      
        3834
                                    | MachineOperand::PhysReg(PhysReg::Wzr)
      
        3835
                            )
      
        3836
                        })
      
        3837
                });
      
        3838
                assert!(has_mov_zr, "const 0 should use MOV from XZR or WZR");
      
        3839
            }
      
        3840
        
        3841
            // ---- Parallel-copy / branch arg copy tests ----
      
        3842
            //
      
        3843
            // The branch arg copy resolver in `emit_branch_arg_copies` handles
      
        3844
            // cross-edge moves into block params. When the source/destination
      
        3845
            // graph contains a cycle, the resolver routes one copy through a
      
        3846
            // scratch vreg. These tests construct minimal IR functions that
      
        3847
            // exercise each topology, run isel, and inspect the resulting move
      
        3848
            // count in the source machine block.
      
        3849
        
        3850
            /// Helper: count vreg→vreg moves of the given opcode in a block,
      
        3851
            /// excluding moves that target a physical register (those are
      
        3852
            /// epilogue/return marshaling, not parallel copies).
      
        3853
            fn count_vreg_moves(block: &MachineBlock, opcode: ArmOpcode) -> usize {
      
        3854
                block
      
        3855
                    .insts
      
        3856
                    .iter()
      
        3857
                    .filter(|i| i.opcode == opcode)
      
        3858
                    .filter(|i| {
      
        3859
                        // True parallel copies are VReg → VReg.
      
        3860
                        matches!(i.operands.first(), Some(MachineOperand::VReg(_)))
      
        3861
                            && matches!(i.operands.get(1), Some(MachineOperand::VReg(_)))
      
        3862
                    })
      
        3863
                    .count()
      
        3864
            }
      
        3865
        
        3866
            fn find_block<'a>(mf: &'a MachineFunction, contains: &str) -> &'a MachineBlock {
      
        3867
                mf.blocks
      
        3868
                    .iter()
      
        3869
                    .find(|b| b.label.contains(contains))
      
        3870
                    .unwrap_or_else(|| {
      
        3871
                        panic!(
      
        3872
                            "no machine block containing '{}' (have: {:?})",
      
        3873
                            contains,
      
        3874
                            mf.blocks.iter().map(|b| &b.label).collect::<Vec<_>>(),
      
        3875
                        )
      
        3876
                    })
      
        3877
            }
      
        3878
        
        3879
            #[test]
      
        3880
            fn branch_arg_2_cycle_routes_through_scratch() {
      
        3881
                // body branches to header swapping the two int params:
      
        3882
                //   br header(pb, pa)
      
        3883
                // pending = [(pa,pb), (pb,pa)] — pure 2-cycle, requires:
      
        3884
                //   tmp = pb;  pb = pa;  pa = tmp     (3 moves)
      
        3885
                let mut func = Function::new("test".into(), vec![], IrType::Void);
      
        3886
                {
      
        3887
                    let mut b = FuncBuilder::new(&mut func);
      
        3888
                    let header = b.create_block("header");
      
        3889
                    let pa = b.add_block_param(header, IrType::Int(IntWidth::I32));
      
        3890
                    let pb = b.add_block_param(header, IrType::Int(IntWidth::I32));
      
        3891
                    let body = b.create_block("body");
      
        3892
                    let exit = b.create_block("exit");
      
        3893
        
        3894
                    let v0 = b.const_i32(1);
      
        3895
                    let v1 = b.const_i32(2);
      
        3896
                    b.branch(header, vec![v0, v1]);
      
        3897
        
        3898
                    b.set_block(header);
      
        3899
                    b.cond_branch(pa, body, vec![], exit, vec![]);
      
        3900
        
        3901
                    b.set_block(body);
      
        3902
                    b.branch(header, vec![pb, pa]);
      
        3903
        
        3904
                    b.set_block(exit);
      
        3905
                    b.ret_void();
      
        3906
                }
      
        3907
                let mf = select_function(&func);
      
        3908
                let body_mb = find_block(&mf, "body");
      
        3909
                let moves = count_vreg_moves(body_mb, ArmOpcode::MovReg);
      
        3910
                assert_eq!(
      
        3911
                    moves, 3,
      
        3912
                    "2-cycle should emit 3 vreg→vreg moves (scratch + 2 swaps), got {}: {:#?}",
      
        3913
                    moves, body_mb.insts,
      
        3914
                );
      
        3915
            }
      
        3916
        
        3917
            #[test]
      
        3918
            fn branch_arg_3_cycle_routes_through_scratch() {
      
        3919
                // br header(pb, pc, pa) — rotate three params left.
      
        3920
                // pending = [(pa,pb),(pb,pc),(pc,pa)]
      
        3921
                // Resolution: tmp = pb;  pb = pc;  pc = pa;  pa = tmp   (4 moves)
      
        3922
                let mut func = Function::new("test".into(), vec![], IrType::Void);
      
        3923
                {
      
        3924
                    let mut b = FuncBuilder::new(&mut func);
      
        3925
                    let header = b.create_block("header");
      
        3926
                    let pa = b.add_block_param(header, IrType::Int(IntWidth::I32));
      
        3927
                    let pb = b.add_block_param(header, IrType::Int(IntWidth::I32));
      
        3928
                    let pc = b.add_block_param(header, IrType::Int(IntWidth::I32));
      
        3929
                    let body = b.create_block("body");
      
        3930
                    let exit = b.create_block("exit");
      
        3931
        
        3932
                    let v0 = b.const_i32(1);
      
        3933
                    let v1 = b.const_i32(2);
      
        3934
                    let v2 = b.const_i32(3);
      
        3935
                    b.branch(header, vec![v0, v1, v2]);
      
        3936
        
        3937
                    b.set_block(header);
      
        3938
                    b.cond_branch(pa, body, vec![], exit, vec![]);
      
        3939
        
        3940
                    b.set_block(body);
      
        3941
                    b.branch(header, vec![pb, pc, pa]);
      
        3942
        
        3943
                    b.set_block(exit);
      
        3944
                    b.ret_void();
      
        3945
                }
      
        3946
                let mf = select_function(&func);
      
        3947
                let body_mb = find_block(&mf, "body");
      
        3948
                let moves = count_vreg_moves(body_mb, ArmOpcode::MovReg);
      
        3949
                assert_eq!(
      
        3950
                    moves, 4,
      
        3951
                    "3-cycle should emit 4 vreg→vreg moves (scratch + 3 rotates), got {}: {:#?}",
      
        3952
                    moves, body_mb.insts,
      
        3953
                );
      
        3954
            }
      
        3955
        
        3956
            #[test]
      
        3957
            fn branch_arg_cycle_plus_independent_tail() {
      
        3958
                // 2-cycle on (pa,pb) plus an independent (pc <- v_extra) tail.
      
        3959
                // br header(pb, pa, v_extra)
      
        3960
                // The tail (pc, v_extra) is always safe and emits as a single
      
        3961
                // move; the 2-cycle adds 3 moves for a total of 4.
      
        3962
                let mut func = Function::new("test".into(), vec![], IrType::Void);
      
        3963
                {
      
        3964
                    let mut b = FuncBuilder::new(&mut func);
      
        3965
                    let header = b.create_block("header");
      
        3966
                    let pa = b.add_block_param(header, IrType::Int(IntWidth::I32));
      
        3967
                    let pb = b.add_block_param(header, IrType::Int(IntWidth::I32));
      
        3968
                    let _pc = b.add_block_param(header, IrType::Int(IntWidth::I32));
      
        3969
                    let body = b.create_block("body");
      
        3970
                    let exit = b.create_block("exit");
      
        3971
        
        3972
                    let v0 = b.const_i32(1);
      
        3973
                    let v1 = b.const_i32(2);
      
        3974
                    let v2 = b.const_i32(3);
      
        3975
                    b.branch(header, vec![v0, v1, v2]);
      
        3976
        
        3977
                    b.set_block(header);
      
        3978
                    b.cond_branch(pa, body, vec![], exit, vec![]);
      
        3979
        
        3980
                    b.set_block(body);
      
        3981
                    // Body needs a fresh value for pc so it's not part of the
      
        3982
                    // cycle and so it can't degenerate into pa/pb.
      
        3983
                    let v3 = b.const_i32(99);
      
        3984
                    b.branch(header, vec![pb, pa, v3]);
      
        3985
        
        3986
                    b.set_block(exit);
      
        3987
                    b.ret_void();
      
        3988
                }
      
        3989
                let mf = select_function(&func);
      
        3990
                let body_mb = find_block(&mf, "body");
      
        3991
                let moves = count_vreg_moves(body_mb, ArmOpcode::MovReg);
      
        3992
                assert_eq!(
      
        3993
                    moves, 4,
      
        3994
                    "cycle+tail should emit 4 vreg→vreg moves (3 for cycle + 1 for tail), got {}: {:#?}",
      
        3995
                    moves, body_mb.insts,
      
        3996
                );
      
        3997
            }
      
        3998
        
        3999
            #[test]
      
        4000
            fn branch_arg_mixed_gp_fp_classes() {
      
        4001
                // Two int params and two float params, all swapped pairwise.
      
        4002
                // pending splits into a GP 2-cycle and an FP 2-cycle, each of
      
        4003
                // which independently needs a scratch.
      
        4004
                // Expected: 3 GP MovReg + 3 FP FmovReg = 6 total moves.
      
        4005
                let mut func = Function::new("test".into(), vec![], IrType::Void);
      
        4006
                {
      
        4007
                    let mut b = FuncBuilder::new(&mut func);
      
        4008
                    let header = b.create_block("header");
      
        4009
                    let ia = b.add_block_param(header, IrType::Int(IntWidth::I32));
      
        4010
                    let ib = b.add_block_param(header, IrType::Int(IntWidth::I32));
      
        4011
                    let fa = b.add_block_param(header, IrType::Float(FloatWidth::F64));
      
        4012
                    let fb = b.add_block_param(header, IrType::Float(FloatWidth::F64));
      
        4013
                    let body = b.create_block("body");
      
        4014
                    let exit = b.create_block("exit");
      
        4015
        
        4016
                    let v0 = b.const_i32(1);
      
        4017
                    let v1 = b.const_i32(2);
      
        4018
                    let f0 = b.const_f64(1.0);
      
        4019
                    let f1 = b.const_f64(2.0);
      
        4020
                    b.branch(header, vec![v0, v1, f0, f1]);
      
        4021
        
        4022
                    b.set_block(header);
      
        4023
                    b.cond_branch(ia, body, vec![], exit, vec![]);
      
        4024
        
        4025
                    b.set_block(body);
      
        4026
                    // Swap both pairs: ints (ib, ia) and floats (fb, fa).
      
        4027
                    b.branch(header, vec![ib, ia, fb, fa]);
      
        4028
        
        4029
                    b.set_block(exit);
      
        4030
                    b.ret_void();
      
        4031
                }
      
        4032
                let mf = select_function(&func);
      
        4033
                let body_mb = find_block(&mf, "body");
      
        4034
                let gp_moves = count_vreg_moves(body_mb, ArmOpcode::MovReg);
      
        4035
                let fp_moves = count_vreg_moves(body_mb, ArmOpcode::FmovReg);
      
        4036
                assert_eq!(
      
        4037
                    gp_moves, 3,
      
        4038
                    "GP 2-cycle should emit 3 MovReg, got {}: {:#?}",
      
        4039
                    gp_moves, body_mb.insts,
      
        4040
                );
      
        4041
                assert_eq!(
      
        4042
                    fp_moves, 3,
      
        4043
                    "FP 2-cycle should emit 3 FmovReg, got {}: {:#?}",
      
        4044
                    fp_moves, body_mb.insts,
      
        4045
                );
      
        4046
            }
      
        4047
        
        4048
            #[test]
      
        4049
            fn logical_arrays_use_default_kind_storage_for_stack_slots() {
      
        4050
                assert_eq!(alloca_size(&IrType::Array(Box::new(IrType::Bool), 3)), 12);
      
        4051
                assert_eq!(
      
        4052
                    alloca_size(&IrType::Array(Box::new(IrType::Int(IntWidth::I32)), 3)),
      
        4053
                    12
      
        4054
                );
      
        4055
            }
      
        4056
        
        4057
            // ---- VShape mapping tests (Sprint 12 Stage 2 isel hookup) ----
      
        4058
        
        4059
            #[test]
      
        4060
            fn vshape_recognizes_4xi32() {
      
        4061
                let ty = IrType::Vector {
      
        4062
                    lanes: 4,
      
        4063
                    elem: Box::new(IrType::Int(IntWidth::I32)),
      
        4064
                };
      
        4065
                assert_eq!(VShape::from_ir(&ty), Some(VShape::V4S));
      
        4066
                assert!(!VShape::V4S.is_float());
      
        4067
            }
      
        4068
        
        4069
            #[test]
      
        4070
            fn vshape_recognizes_2xf64() {
      
        4071
                let ty = IrType::Vector {
      
        4072
                    lanes: 2,
      
        4073
                    elem: Box::new(IrType::Float(FloatWidth::F64)),
      
        4074
                };
      
        4075
                assert_eq!(VShape::from_ir(&ty), Some(VShape::F2D));
      
        4076
                assert!(VShape::F2D.is_float());
      
        4077
            }
      
        4078
        
        4079
            #[test]
      
        4080
            fn vshape_rejects_unsupported_shape() {
      
        4081
                // 3 lanes is not a NEON shape; we already verified that
      
        4082
                // verify.rs rejects it. VShape::from_ir simply returns None
      
        4083
                // and the isel arm falls back to Nop.
      
        4084
                let ty = IrType::Vector {
      
        4085
                    lanes: 3,
      
        4086
                    elem: Box::new(IrType::Int(IntWidth::I32)),
      
        4087
                };
      
        4088
                assert_eq!(VShape::from_ir(&ty), None);
      
        4089
            }
      
        4090
        
        4091
            #[test]
      
        4092
            fn vector_type_to_reg_class_returns_v128() {
      
        4093
                let ty = IrType::Vector {
      
        4094
                    lanes: 4,
      
        4095
                    elem: Box::new(IrType::Float(FloatWidth::F32)),
      
        4096
                };
      
        4097
                assert_eq!(type_to_reg_class(&ty), RegClass::V128);
      
        4098
            }
      
        4099
        
        4100
            /// End-to-end: build a tiny IR function that adds two 4×f32
      
        4101
            /// vectors and walk through isel. The result MachineFunction
      
        4102
            /// must contain at least one `FaddV4S` opcode.
      
        4103
            #[test]
      
        4104
            fn isel_lowers_vadd_4xf32_to_faddv4s() {
      
        4105
                use crate::codegen::mir::ArmOpcode;
      
        4106
        
        4107
                let v_ty = IrType::Vector {
      
        4108
                    lanes: 4,
      
        4109
                    elem: Box::new(IrType::Float(FloatWidth::F32)),
      
        4110
                };
      
        4111
                let mut func = Function::new("vadd_test".into(), vec![], IrType::Void);
      
        4112
                {
      
        4113
                    let mut b = FuncBuilder::new(&mut func);
      
        4114
                    // Two pointer params synthesized via alloca for the
      
        4115
                    // smoke test — keeps the body small but exercises the
      
        4116
                    // VLoad / VAdd / VStore chain.
      
        4117
                    let p_a = b.alloca(v_ty.clone());
      
        4118
                    let p_b = b.alloca(v_ty.clone());
      
        4119
                    let p_dst = b.alloca(v_ty.clone());
      
        4120
                    let va = b.vload(p_a, v_ty.clone());
      
        4121
                    let vb = b.vload(p_b, v_ty.clone());
      
        4122
                    let vc = b.vadd(va, vb);
      
        4123
                    b.vstore(vc, p_dst);
      
        4124
                    b.ret_void();
      
        4125
                }
      
        4126
        
        4127
                let mf = select_function(&func);
      
        4128
                let opcodes: Vec<ArmOpcode> =
      
        4129
                    mf.blocks.iter().flat_map(|b| b.insts.iter()).map(|i| i.opcode).collect();
      
        4130
                assert!(
      
        4131
                    opcodes.contains(&ArmOpcode::FaddV4S),
      
        4132
                    "expected FaddV4S in MIR, got {:?}",
      
        4133
                    opcodes
      
        4134
                );
      
        4135
                assert!(
      
        4136
                    opcodes.contains(&ArmOpcode::LdrQ),
      
        4137
                    "expected LdrQ in MIR, got {:?}",
      
        4138
                    opcodes
      
        4139
                );
      
        4140
                assert!(
      
        4141
                    opcodes.contains(&ArmOpcode::StrQ),
      
        4142
                    "expected StrQ in MIR, got {:?}",
      
        4143
                    opcodes
      
        4144
                );
      
        4145
            }
      
        4146
        
        4147
            #[test]
      
        4148
            fn vector_abi_arg_uses_v0_to_v7() {
      
        4149
                // First 8 vector args should land in v0-v7. The 9th should
      
        4150
                // overflow to the stack at the next 16-byte slot.
      
        4151
                let mut state = AbiArgState::default();
      
        4152
                let v_ty = IrType::Vector {
      
        4153
                    lanes: 4,
      
        4154
                    elem: Box::new(IrType::Float(FloatWidth::F32)),
      
        4155
                };
      
        4156
                for expected in 0u8..8 {
      
        4157
                    assert_eq!(
      
        4158
                        classify_abi_arg(&v_ty, &mut state),
      
        4159
                        AbiArgLoc::V128(expected),
      
        4160
                        "vector arg #{} should be v{}",
      
        4161
                        expected,
      
        4162
                        expected
      
        4163
                    );
      
        4164
                }
      
        4165
                // 9th vector arg overflows to stack.
      
        4166
                match classify_abi_arg(&v_ty, &mut state) {
      
        4167
                    AbiArgLoc::Stack(_) => {}
      
        4168
                    other => panic!("expected Stack overflow, got {:?}", other),
      
        4169
                }
      
        4170
            }
      
        4171
        
        4172
            #[test]
      
        4173
            fn vector_args_share_idx_with_float_args() {
      
        4174
                // AAPCS64: vector and float args draw from the same v0-v7
      
        4175
                // pool. A float arg should bump fp_idx, then a vector arg
      
        4176
                // should land at the next slot.
      
        4177
                let mut state = AbiArgState::default();
      
        4178
                let f_ty = IrType::Float(FloatWidth::F64);
      
        4179
                let v_ty = IrType::Vector {
      
        4180
                    lanes: 2,
      
        4181
                    elem: Box::new(IrType::Int(IntWidth::I64)),
      
        4182
                };
      
        4183
                assert_eq!(classify_abi_arg(&f_ty, &mut state), AbiArgLoc::Fp(0));
      
        4184
                assert_eq!(classify_abi_arg(&v_ty, &mut state), AbiArgLoc::V128(1));
      
        4185
                assert_eq!(classify_abi_arg(&f_ty, &mut state), AbiArgLoc::Fp(2));
      
        4186
            }
      
        4187
        
        4188
            #[test]
      
        4189
            fn isel_lowers_vfma_2xf64_to_fmlav2d() {
      
        4190
                use crate::codegen::mir::ArmOpcode;
      
        4191
        
        4192
                let v_ty = IrType::Vector {
      
        4193
                    lanes: 2,
      
        4194
                    elem: Box::new(IrType::Float(FloatWidth::F64)),
      
        4195
                };
      
        4196
                let mut func = Function::new("vfma_test".into(), vec![], IrType::Void);
      
        4197
                {
      
        4198
                    let mut b = FuncBuilder::new(&mut func);
      
        4199
                    let p_a = b.alloca(v_ty.clone());
      
        4200
                    let p_b = b.alloca(v_ty.clone());
      
        4201
                    let p_c = b.alloca(v_ty.clone());
      
        4202
                    let va = b.vload(p_a, v_ty.clone());
      
        4203
                    let vb = b.vload(p_b, v_ty.clone());
      
        4204
                    let vc = b.vload(p_c, v_ty.clone());
      
        4205
                    let _ = b.vfma(va, vb, vc);
      
        4206
                    b.ret_void();
      
        4207
                }
      
        4208
        
        4209
                let mf = select_function(&func);
      
        4210
                let opcodes: Vec<ArmOpcode> =
      
        4211
                    mf.blocks.iter().flat_map(|b| b.insts.iter()).map(|i| i.opcode).collect();
      
        4212
                assert!(
      
        4213
                    opcodes.contains(&ArmOpcode::FmlaV2D),
      
        4214
                    "expected FmlaV2D, got {:?}",
      
        4215
                    opcodes
      
        4216
                );
      
        4217
            }
      
        4218
        }
      
        4219