fortrangoingonforty/armfortas / 2823a78

Browse files

Coerce mixed-width select operands

Authored by espadonne
SHA
2823a78ac21e0508f5435a2afd4ace6a0461f669
Parents
dad5b04
Tree
60272a7

2 changed files

StatusFile+-
M src/codegen/isel.rs 82 2
M tests/i128_stack_args.rs 20 8
src/codegen/isel.rsmodified
@@ -1363,10 +1363,10 @@ fn select_inst(
13631363
         // Slow path (unfused): the condition is an arbitrary boolean in a
13641364
         // register. Materialize with `CMP cond, #0; CSEL dest, tv, fv, NE`.
13651365
         InstKind::Select(cond, tv, fv) => {
1366
-            let true_reg = ctx.lookup_vreg(*tv);
1367
-            let false_reg = ctx.lookup_vreg(*fv);
13681366
             let class = type_to_reg_class(&inst.ty);
13691367
             let dest = ctx.get_vreg(mf, inst.id, class);
1368
+            let true_reg = coerce_select_operand_vreg(mf, ctx, mb, func, *tv, &inst.ty);
1369
+            let false_reg = coerce_select_operand_vreg(mf, ctx, mb, func, *fv, &inst.ty);
13701370
 
13711371
             let arm_cond = if let Some(&fused_cond) = ctx.fused_arm_cond.get(cond) {
13721372
                 // Flags already set by the fused CMP — no extra compare needed.
@@ -2773,6 +2773,60 @@ fn icmp_operand_vreg(
27732773
     dest
27742774
 }
27752775
 
2776
+fn machine_vreg_class(mf: &MachineFunction, vreg: VRegId) -> RegClass {
2777
+    mf.vregs
2778
+        .iter()
2779
+        .find(|r| r.id == vreg)
2780
+        .map(|r| r.class)
2781
+        .expect("isel: vreg not registered")
2782
+}
2783
+
2784
+fn coerce_select_operand_vreg(
2785
+    mf: &mut MachineFunction,
2786
+    ctx: &mut ISelCtx,
2787
+    mb: MBlockId,
2788
+    func: &Function,
2789
+    value: ValueId,
2790
+    target_ty: &IrType,
2791
+) -> VRegId {
2792
+    let src = ctx.lookup_vreg(value);
2793
+    let src_class = machine_vreg_class(mf, src);
2794
+    let target_class = type_to_reg_class(target_ty);
2795
+    if src_class == target_class {
2796
+        return src;
2797
+    }
2798
+
2799
+    let dest = mf.new_vreg(target_class);
2800
+    let src_ty = func.value_type(value);
2801
+    let opcode = match (src_class, target_class) {
2802
+        (RegClass::Gp32, RegClass::Gp64) => {
2803
+            if matches!(target_ty, IrType::Ptr(_) | IrType::FuncPtr(_))
2804
+                || zero_extend_cmp_type(src_ty.as_ref())
2805
+            {
2806
+                ArmOpcode::MovReg
2807
+            } else {
2808
+                match src_ty.as_ref() {
2809
+                    Some(IrType::Int(IntWidth::I8)) => ArmOpcode::Sxtb,
2810
+                    Some(IrType::Int(IntWidth::I16)) => ArmOpcode::Sxth,
2811
+                    Some(IrType::Int(IntWidth::I32)) | Some(IrType::Bool) => ArmOpcode::Sxtw,
2812
+                    _ => ArmOpcode::MovReg,
2813
+                }
2814
+            }
2815
+        }
2816
+        (RegClass::Gp64, RegClass::Gp32) => ArmOpcode::MovReg,
2817
+        (RegClass::Fp32, RegClass::Fp64) => ArmOpcode::FcvtDS,
2818
+        (RegClass::Fp64, RegClass::Fp32) => ArmOpcode::FcvtSD,
2819
+        _ => ArmOpcode::MovReg,
2820
+    };
2821
+
2822
+    mf.block_mut(mb).insts.push(MachineInst {
2823
+        opcode,
2824
+        operands: vec![MachineOperand::VReg(dest), MachineOperand::VReg(src)],
2825
+        def: Some(dest),
2826
+    });
2827
+    dest
2828
+}
2829
+
27762830
 fn int_width_class(w: &IntWidth) -> RegClass {
27772831
     match w {
27782832
         IntWidth::I64 => RegClass::Gp64,
@@ -3103,6 +3157,32 @@ mod tests {
31033157
         );
31043158
     }
31053159
 
3160
+    #[test]
3161
+    fn select_coerces_mixed_gp_widths_before_csel() {
3162
+        let mf = select_simple(|b| {
3163
+            let cond = b.const_bool(true);
3164
+            let wide = b.const_i64(7);
3165
+            let narrow = b.const_i32(-1);
3166
+            let _s = b.select(cond, wide, narrow);
3167
+            b.ret_void();
3168
+        });
3169
+        let csel = mf.blocks[0]
3170
+            .insts
3171
+            .iter()
3172
+            .find(|i| i.opcode == ArmOpcode::CselReg)
3173
+            .expect("expected CSEL for mixed-width select");
3174
+        for operand in csel.operands.iter().take(3) {
3175
+            let MachineOperand::VReg(vreg) = operand else {
3176
+                continue;
3177
+            };
3178
+            assert_eq!(
3179
+                machine_vreg_class(&mf, *vreg),
3180
+                RegClass::Gp64,
3181
+                "mixed-width select operands should be coerced to the result width before CSEL"
3182
+            );
3183
+        }
3184
+    }
3185
+
31063186
     #[test]
31073187
     fn csel_fusion_eliminates_cset_and_extra_cmp() {
31083188
         // ICmp used solely by a Select → CSET and CMP cond, #0 must NOT appear.
tests/i128_stack_args.rsmodified
@@ -19,6 +19,18 @@ fn capture_text(request: CaptureRequest, stage: Stage) -> String {
1919
     }
2020
 }
2121
 
22
+fn assert_i128_return_stored_after_call(asm: &str, call_marker: &str, context: &str) {
23
+    let call_idx = asm
24
+        .find(call_marker)
25
+        .unwrap_or_else(|| panic!("missing call marker '{}' in:\n{}", call_marker, asm));
26
+    assert!(
27
+        asm[call_idx..].contains("stp x0, x1, [x29, #-"),
28
+        "{}:\n{}",
29
+        context,
30
+        asm
31
+    );
32
+}
33
+
2234
 #[test]
2335
 fn internal_i128_stack_call_spills_fifth_arg_and_loads_incoming_slot_at_o0() {
2436
     let asm = capture_text(
@@ -45,10 +57,10 @@ fn internal_i128_stack_call_spills_fifth_arg_and_loads_incoming_slot_at_o0() {
4557
         "callee should load the incoming stack-passed integer(16) arg from [x29, #16]:\n{}",
4658
         asm
4759
     );
48
-    assert!(
49
-        asm.contains("stp x0, x1, [x29, #-360]"),
50
-        "caller should still receive the returned integer(16) value in x0/x1 even when args spill to the stack:\n{}",
51
-        asm
60
+    assert_i128_return_stored_after_call(
61
+        &asm,
62
+        "bl _afs_internal_",
63
+        "caller should still receive the returned integer(16) value in x0/x1 even when args spill to the stack",
5264
     );
5365
 }
5466
 
@@ -165,10 +177,10 @@ fn external_i128_stack_call_spills_fifth_arg_and_tracks_symbol_at_o0() {
165177
         "fifth external integer(16) arg should spill to the outgoing stack area:\n{}",
166178
         asm
167179
     );
168
-    assert!(
169
-        asm.contains("stp x0, x1, [x29, #-360]"),
170
-        "external integer(16) stack-call should still receive the returned value in x0/x1:\n{}",
171
-        asm
180
+    assert_i128_return_stored_after_call(
181
+        &asm,
182
+        "bl _add5_ext",
183
+        "external integer(16) stack-call should still receive the returned value in x0/x1",
172184
     );
173185
 }
174186