| 1 | //! Tail call optimization (post-regalloc peephole). |
| 2 | //! |
| 3 | //! After register allocation and callee-save insertion, the machine code for |
| 4 | //! a call in tail position looks like: |
| 5 | //! |
| 6 | //! ```text |
| 7 | //! ; arg setup (MOV xi, …) |
| 8 | //! Bl _callee |
| 9 | //! ; callee-save restores (LdpOffset / LdrImm / LdrFpImm) — zero or more |
| 10 | //! LdpPost x29, x30, [sp], #16 ; epilogue frame restore |
| 11 | //! Ret |
| 12 | //! ``` |
| 13 | //! |
| 14 | //! We convert this to: |
| 15 | //! |
| 16 | //! ```text |
| 17 | //! ; arg setup (unchanged — all spill loads happen before LdpPost) |
| 18 | //! ; callee-save restores (unchanged) |
| 19 | //! LdpPost x29, x30, [sp], #16 |
| 20 | //! B _callee ; tail jump (not BL) |
| 21 | //! ``` |
| 22 | //! |
| 23 | //! Correctness argument |
| 24 | //! -------------------- |
| 25 | //! * Argument registers (x0–x7, d0–d7) are loaded BEFORE the LdpPost, so |
| 26 | //! they hold the correct values when the tail branch executes. |
| 27 | //! * Callee-saved restores (x19–x28, d8–d15) are disjoint from the |
| 28 | //! argument registers; restoring them cannot clobber the args. |
| 29 | //! * LdpPost restores x29 (our FP) and x30 (our LR). After it fires, LR |
| 30 | //! holds our *caller's* return address. When _callee executes its own |
| 31 | //! RET, it returns to *our* caller directly — exactly what TCO requires. |
| 32 | //! * We only recognize this pattern when there are **no instructions between |
| 33 | //! Bl and the callee-restore cluster**. In particular, non-void calls |
| 34 | //! whose return-value register survives coalesce_moves are handled because |
| 35 | //! `coalesce_moves` already eliminated any `MOV x0, x0` self-moves, leaving |
| 36 | //! the Bl immediately adjacent to the callee restores / LdpPost. |
| 37 | //! * Gate: we don't fire on non-void calls where a non-trivial result-capture |
| 38 | //! sequence remains (e.g., `MOV x1, x0`) — those are left alone. |
| 39 | |
| 40 | use super::mir::{ArmOpcode, MachineFunction, MachineInst, MachineOperand, PhysReg}; |
| 41 | use std::collections::{HashMap, HashSet}; |
| 42 | |
| 43 | /// Run tail call optimization on a single machine function. |
| 44 | /// |
| 45 | /// Safe to call at any optimization level; the transformation never changes |
| 46 | /// visible behavior and is always a code-size win (removes one instruction). |
| 47 | pub fn tail_call_opt(mf: &mut MachineFunction) { |
| 48 | for block in &mut mf.blocks { |
| 49 | let n = block.insts.len(); |
| 50 | if n < 2 { |
| 51 | continue; |
| 52 | } |
| 53 | |
| 54 | // Epilogue is always `LdpPost; Ret` at the very end. |
| 55 | if block.insts[n - 1].opcode != ArmOpcode::Ret { |
| 56 | continue; |
| 57 | } |
| 58 | if block.insts[n - 2].opcode != ArmOpcode::LdpPost { |
| 59 | continue; |
| 60 | } |
| 61 | |
| 62 | let ldp_idx = n - 2; |
| 63 | |
| 64 | // Walk backwards from just before LdpPost, skipping callee-save |
| 65 | // restore instructions (LdpOffset, LdrImm, LdrFpImm). Stop when |
| 66 | // we find something that isn't a callee restore. |
| 67 | let mut bl_candidate = ldp_idx; |
| 68 | while bl_candidate > 0 { |
| 69 | bl_candidate -= 1; |
| 70 | match block.insts[bl_candidate].opcode { |
| 71 | ArmOpcode::LdpOffset | ArmOpcode::LdrImm | ArmOpcode::LdrFpImm => { |
| 72 | // Callee-save restore — keep scanning backwards. |
| 73 | } |
| 74 | ArmOpcode::Bl => { |
| 75 | // Found the BL — stop here. |
| 76 | break; |
| 77 | } |
| 78 | _ => { |
| 79 | // Non-callee-restore, non-BL — pattern doesn't match. |
| 80 | bl_candidate = usize::MAX; // sentinel |
| 81 | break; |
| 82 | } |
| 83 | } |
| 84 | } |
| 85 | |
| 86 | // Sentinel or scanned to index 0 without finding Bl. |
| 87 | if bl_candidate == usize::MAX { |
| 88 | continue; |
| 89 | } |
| 90 | if block.insts[bl_candidate].opcode != ArmOpcode::Bl { |
| 91 | continue; |
| 92 | } |
| 93 | |
| 94 | // SAFETY: reject TCO when any argument register (x0–x7) holds a value |
| 95 | // derived from our frame pointer (e.g. a pointer to a stack-allocated |
| 96 | // local / derived-type struct). After the epilogue tears down our |
| 97 | // frame, the callee's prologue reuses that memory; any pointer into it |
| 98 | // becomes dangling. Taint analysis: track GP registers set from |
| 99 | // `sub xN, x29, #M` (alloca) and propagated through MovReg / AddReg / |
| 100 | // AddImm / Mul. If any x0–x7 is tainted, the tail call is unsafe. |
| 101 | if has_frame_derived_arg(&block.insts[..bl_candidate]) { |
| 102 | continue; |
| 103 | } |
| 104 | |
| 105 | // Extract the call target from the Bl operand. |
| 106 | let label = match block.insts[bl_candidate].operands.first() { |
| 107 | Some(MachineOperand::Extern(s)) => s.clone(), |
| 108 | _ => continue, // indirect call or unexpected operand — skip |
| 109 | }; |
| 110 | |
| 111 | // Transform: |
| 112 | // Remove `Bl _label` at bl_candidate. |
| 113 | // Remove `Ret` (last instruction). |
| 114 | // Append `B _label` (tail branch to external symbol). |
| 115 | // |
| 116 | // The callee restores and LdpPost between bl_candidate and ldp_idx |
| 117 | // shift down by 1 (because we removed bl_candidate), but stay in |
| 118 | // the right relative order. |
| 119 | block.insts.remove(bl_candidate); |
| 120 | block.insts.pop(); // remove Ret (was the last instruction) |
| 121 | block.insts.push(MachineInst { |
| 122 | opcode: ArmOpcode::B, |
| 123 | operands: vec![MachineOperand::Extern(label)], |
| 124 | def: None, |
| 125 | }); |
| 126 | } |
| 127 | } |
| 128 | |
| 129 | // --------------------------------------------------------------------------- |
| 130 | // Safety helpers |
| 131 | // --------------------------------------------------------------------------- |
| 132 | |
| 133 | /// Returns true if any GP argument register (x0–x7) contains a frame-derived |
| 134 | /// pointer at the point of the Bl. |
| 135 | /// |
| 136 | /// "Frame-derived" means the register was set — directly or transitively — from |
| 137 | /// a `sub xN, x29, #M` (alloca materialization). |
| 138 | /// |
| 139 | /// The analysis is a forward taint propagation over both registers AND |
| 140 | /// FP-relative stack slots, so it correctly handles the spill/reload pattern: |
| 141 | /// |
| 142 | /// ```text |
| 143 | /// sub x10, x29, #4 ; x10 = frame addr (tainted) |
| 144 | /// str x10, [x29, #-16] ; slot -16 now tainted |
| 145 | /// ... |
| 146 | /// ldr x9, [x29, #-16] ; x9 = frame addr (tainted via slot) |
| 147 | /// mov x0, x9 ; x0 tainted → unsafe TCO |
| 148 | /// ``` |
| 149 | fn has_frame_derived_arg(insts: &[MachineInst]) -> bool { |
| 150 | // GP register numbers whose current value is derived from the frame pointer. |
| 151 | let mut tainted_regs: HashSet<u8> = HashSet::new(); |
| 152 | // FP-relative offsets whose memory contents are frame-derived pointers. |
| 153 | let mut tainted_slots: HashSet<i64> = HashSet::new(); |
| 154 | // GP registers known to hold `fp + offset` or `fp - offset`. |
| 155 | let mut frame_addr_regs: HashMap<u8, i64> = HashMap::new(); |
| 156 | |
| 157 | for inst in insts { |
| 158 | if let Some(dst) = written_gp_reg(inst) { |
| 159 | frame_addr_regs.remove(&dst); |
| 160 | } |
| 161 | match inst.opcode { |
| 162 | // sub xN, x29, #imm → xN holds a frame-relative address. |
| 163 | ArmOpcode::SubImm if op_is_fp(inst, 1) => { |
| 164 | if let Some(n) = op_gp(inst, 0) { |
| 165 | tainted_regs.insert(n); |
| 166 | if let Some(imm) = op_imm(inst, 2) { |
| 167 | frame_addr_regs.insert(n, -imm); |
| 168 | } |
| 169 | } |
| 170 | } |
| 171 | // add xN, x29, #imm → xN holds a frame-relative address. |
| 172 | ArmOpcode::AddImm if op_is_fp(inst, 1) => { |
| 173 | if let Some(n) = op_gp(inst, 0) { |
| 174 | tainted_regs.insert(n); |
| 175 | if let Some(imm) = op_imm(inst, 2) { |
| 176 | frame_addr_regs.insert(n, imm); |
| 177 | } |
| 178 | } |
| 179 | } |
| 180 | // add xN, xM, #imm where xM is a known frame address. |
| 181 | ArmOpcode::AddImm => { |
| 182 | if let (Some(dst), Some(src), Some(imm)) = |
| 183 | (op_gp(inst, 0), op_gp(inst, 1), op_imm(inst, 2)) |
| 184 | { |
| 185 | if let Some(base_off) = frame_addr_regs.get(&src).copied() { |
| 186 | tainted_regs.insert(dst); |
| 187 | frame_addr_regs.insert(dst, base_off + imm); |
| 188 | } else if tainted_regs.contains(&src) { |
| 189 | tainted_regs.insert(dst); |
| 190 | } |
| 191 | } |
| 192 | } |
| 193 | // sub xN, xM, #imm where xM is a known frame address. |
| 194 | ArmOpcode::SubImm => { |
| 195 | if let (Some(dst), Some(src), Some(imm)) = |
| 196 | (op_gp(inst, 0), op_gp(inst, 1), op_imm(inst, 2)) |
| 197 | { |
| 198 | if let Some(base_off) = frame_addr_regs.get(&src).copied() { |
| 199 | tainted_regs.insert(dst); |
| 200 | frame_addr_regs.insert(dst, base_off - imm); |
| 201 | } else if tainted_regs.contains(&src) { |
| 202 | tainted_regs.insert(dst); |
| 203 | } |
| 204 | } |
| 205 | } |
| 206 | // add xN, xM, xP (GEP: propagate taint from either source) |
| 207 | ArmOpcode::AddReg |
| 208 | if op_gp(inst, 1).is_some_and(|n| tainted_regs.contains(&n)) |
| 209 | || op_gp(inst, 2).is_some_and(|n| tainted_regs.contains(&n)) => |
| 210 | { |
| 211 | if let Some(n) = op_gp(inst, 0) { |
| 212 | tainted_regs.insert(n); |
| 213 | } |
| 214 | } |
| 215 | // mov xN, xM (register copy — propagates taint to arg reg) |
| 216 | ArmOpcode::MovReg if op_gp(inst, 1).is_some_and(|n| tainted_regs.contains(&n)) => { |
| 217 | if let Some(n) = op_gp(inst, 0) { |
| 218 | tainted_regs.insert(n); |
| 219 | if let Some(src) = op_gp(inst, 1) { |
| 220 | if let Some(off) = frame_addr_regs.get(&src).copied() { |
| 221 | frame_addr_regs.insert(n, off); |
| 222 | } |
| 223 | } |
| 224 | } |
| 225 | } |
| 226 | // mul xN, xM, xP (index computation in GEP; conservative) |
| 227 | ArmOpcode::Mul |
| 228 | if op_gp(inst, 1).is_some_and(|n| tainted_regs.contains(&n)) |
| 229 | || op_gp(inst, 2).is_some_and(|n| tainted_regs.contains(&n)) => |
| 230 | { |
| 231 | if let Some(n) = op_gp(inst, 0) { |
| 232 | tainted_regs.insert(n); |
| 233 | } |
| 234 | } |
| 235 | // str xN, [x29, #off] — if xN is tainted, the slot becomes tainted. |
| 236 | ArmOpcode::StrImm |
| 237 | if op_gp(inst, 0).is_some_and(|n| tainted_regs.contains(&n)) |
| 238 | && effective_frame_slot_offset(inst, 1, 2, &frame_addr_regs).is_some() => |
| 239 | { |
| 240 | if let Some(off) = effective_frame_slot_offset(inst, 1, 2, &frame_addr_regs) { |
| 241 | tainted_slots.insert(off); |
| 242 | } |
| 243 | } |
| 244 | // ldr xN, [frame] — if the slot is known tainted, xN becomes |
| 245 | // tainted. Also conservatively reject tail calls when any 64-bit |
| 246 | // GP register is reloaded from our frame in the tail block: the |
| 247 | // slot may have been populated in a predecessor with an escaped |
| 248 | // local address, then copied into x0–x7 later in the block. |
| 249 | ArmOpcode::LdrImm => { |
| 250 | if let Some(off) = effective_frame_slot_offset(inst, 1, 2, &frame_addr_regs) { |
| 251 | if let Some(n) = op_gp(inst, 0) { |
| 252 | if tainted_slots.contains(&off) || n <= 30 { |
| 253 | tainted_regs.insert(n); |
| 254 | } |
| 255 | } |
| 256 | } |
| 257 | } |
| 258 | _ => {} |
| 259 | } |
| 260 | } |
| 261 | |
| 262 | // Argument registers are x0–x7 (PhysReg::Gp(0)–Gp(7)). |
| 263 | (0u8..8).any(|n| tainted_regs.contains(&n)) |
| 264 | } |
| 265 | |
| 266 | /// True if operand at `idx` is the frame pointer (x29 = PhysReg::Gp(29)). |
| 267 | #[inline] |
| 268 | fn op_is_fp(inst: &MachineInst, idx: usize) -> bool { |
| 269 | matches!( |
| 270 | inst.operands.get(idx), |
| 271 | Some(MachineOperand::PhysReg(p)) if *p == PhysReg::FP |
| 272 | ) |
| 273 | } |
| 274 | |
| 275 | /// GP register number (0–30) for the PhysReg::Gp operand at `idx`, or None. |
| 276 | #[inline] |
| 277 | fn op_gp(inst: &MachineInst, idx: usize) -> Option<u8> { |
| 278 | match inst.operands.get(idx)? { |
| 279 | MachineOperand::PhysReg(PhysReg::Gp(n)) => Some(*n), |
| 280 | _ => None, |
| 281 | } |
| 282 | } |
| 283 | |
| 284 | /// Integer immediate operand at `idx`, or None. |
| 285 | #[inline] |
| 286 | fn op_imm(inst: &MachineInst, idx: usize) -> Option<i64> { |
| 287 | match inst.operands.get(idx)? { |
| 288 | MachineOperand::Imm(v) => Some(*v), |
| 289 | MachineOperand::FrameSlot(v) => Some(*v as i64), |
| 290 | _ => None, |
| 291 | } |
| 292 | } |
| 293 | |
| 294 | /// Frame-pointer-relative offset for the operand at `idx`, or None. |
| 295 | /// Accepts both `Imm` and `FrameSlot` variants. |
| 296 | #[inline] |
| 297 | fn op_fp_offset(inst: &MachineInst, idx: usize) -> Option<i64> { |
| 298 | match inst.operands.get(idx)? { |
| 299 | MachineOperand::Imm(v) => Some(*v), |
| 300 | MachineOperand::FrameSlot(v) => Some(*v as i64), |
| 301 | _ => None, |
| 302 | } |
| 303 | } |
| 304 | |
| 305 | /// Effective FP-relative offset addressed by `[base, #off]`, where `base` is |
| 306 | /// either FP directly or a GP register previously materialized from FP. |
| 307 | #[inline] |
| 308 | fn effective_frame_slot_offset( |
| 309 | inst: &MachineInst, |
| 310 | base_idx: usize, |
| 311 | off_idx: usize, |
| 312 | frame_addr_regs: &HashMap<u8, i64>, |
| 313 | ) -> Option<i64> { |
| 314 | let off = op_imm(inst, off_idx).unwrap_or(0); |
| 315 | match inst.operands.get(base_idx)? { |
| 316 | MachineOperand::PhysReg(p) if *p == PhysReg::FP => Some(off), |
| 317 | MachineOperand::PhysReg(PhysReg::Gp(n)) => frame_addr_regs.get(n).map(|base| base + off), |
| 318 | _ => None, |
| 319 | } |
| 320 | } |
| 321 | |
| 322 | /// GP register written by this instruction, if operand 0 is a GP destination. |
| 323 | #[inline] |
| 324 | fn written_gp_reg(inst: &MachineInst) -> Option<u8> { |
| 325 | match inst.opcode { |
| 326 | ArmOpcode::AddReg |
| 327 | | ArmOpcode::AddImm |
| 328 | | ArmOpcode::SubReg |
| 329 | | ArmOpcode::SubImm |
| 330 | | ArmOpcode::Mul |
| 331 | | ArmOpcode::MovReg |
| 332 | | ArmOpcode::LdrImm => op_gp(inst, 0), |
| 333 | _ => None, |
| 334 | } |
| 335 | } |
| 336 | |
| 337 | // --------------------------------------------------------------------------- |
| 338 | // Tests |
| 339 | // --------------------------------------------------------------------------- |
| 340 | |
| 341 | #[cfg(test)] |
| 342 | mod tests { |
| 343 | use super::*; |
| 344 | use crate::codegen::mir::*; |
| 345 | |
| 346 | fn make_block(insts: Vec<MachineInst>) -> MachineBlock { |
| 347 | MachineBlock { |
| 348 | label: "test".into(), |
| 349 | insts, |
| 350 | id: MBlockId(0), |
| 351 | } |
| 352 | } |
| 353 | |
| 354 | fn bl(label: &str) -> MachineInst { |
| 355 | MachineInst { |
| 356 | opcode: ArmOpcode::Bl, |
| 357 | operands: vec![MachineOperand::Extern(label.into())], |
| 358 | def: None, |
| 359 | } |
| 360 | } |
| 361 | |
| 362 | fn ldp_post() -> MachineInst { |
| 363 | MachineInst { |
| 364 | opcode: ArmOpcode::LdpPost, |
| 365 | operands: vec![ |
| 366 | MachineOperand::PhysReg(PhysReg::FP), |
| 367 | MachineOperand::PhysReg(PhysReg::LR), |
| 368 | MachineOperand::PhysReg(PhysReg::Sp), |
| 369 | ], |
| 370 | def: None, |
| 371 | } |
| 372 | } |
| 373 | |
| 374 | fn ret() -> MachineInst { |
| 375 | MachineInst { |
| 376 | opcode: ArmOpcode::Ret, |
| 377 | operands: vec![], |
| 378 | def: None, |
| 379 | } |
| 380 | } |
| 381 | |
| 382 | fn ldr_callee_restore() -> MachineInst { |
| 383 | MachineInst { |
| 384 | opcode: ArmOpcode::LdrImm, |
| 385 | operands: vec![ |
| 386 | MachineOperand::PhysReg(PhysReg::Gp(19)), |
| 387 | MachineOperand::PhysReg(PhysReg::FP), |
| 388 | MachineOperand::Imm(-8), |
| 389 | ], |
| 390 | def: Some(VRegId(19)), |
| 391 | } |
| 392 | } |
| 393 | |
| 394 | fn build_mf(blocks: Vec<Vec<MachineInst>>) -> MachineFunction { |
| 395 | let mut mf = MachineFunction::new("test".into()); |
| 396 | // MachineFunction starts with one empty block; overwrite it. |
| 397 | mf.blocks[0].insts = blocks[0].clone(); |
| 398 | for blk_insts in blocks.into_iter().skip(1) { |
| 399 | let id = mf.new_block("bb"); |
| 400 | mf.block_mut(id).insts = blk_insts; |
| 401 | } |
| 402 | mf |
| 403 | } |
| 404 | |
| 405 | #[test] |
| 406 | fn void_tail_call_no_callee_saves() { |
| 407 | // Pattern: Bl; LdpPost; Ret → LdpPost; B |
| 408 | let mut mf = build_mf(vec![vec![bl("_foo"), ldp_post(), ret()]]); |
| 409 | tail_call_opt(&mut mf); |
| 410 | let insts = &mf.blocks[0].insts; |
| 411 | // Should now be: LdpPost, B _foo |
| 412 | assert_eq!(insts.len(), 2); |
| 413 | assert_eq!(insts[0].opcode, ArmOpcode::LdpPost); |
| 414 | assert_eq!(insts[1].opcode, ArmOpcode::B); |
| 415 | assert_eq!(insts[1].operands[0], MachineOperand::Extern("_foo".into())); |
| 416 | } |
| 417 | |
| 418 | #[test] |
| 419 | fn void_tail_call_with_callee_restore() { |
| 420 | // Pattern: Bl; LdrImm(restore); LdpPost; Ret → LdrImm; LdpPost; B |
| 421 | let mut mf = build_mf(vec![vec![ |
| 422 | bl("_bar"), |
| 423 | ldr_callee_restore(), |
| 424 | ldp_post(), |
| 425 | ret(), |
| 426 | ]]); |
| 427 | tail_call_opt(&mut mf); |
| 428 | let insts = &mf.blocks[0].insts; |
| 429 | assert_eq!(insts.len(), 3); |
| 430 | assert_eq!(insts[0].opcode, ArmOpcode::LdrImm); |
| 431 | assert_eq!(insts[1].opcode, ArmOpcode::LdpPost); |
| 432 | assert_eq!(insts[2].opcode, ArmOpcode::B); |
| 433 | } |
| 434 | |
| 435 | #[test] |
| 436 | fn no_tco_when_non_callee_restore_between_bl_and_ldp() { |
| 437 | // A non-trivial instruction (e.g., MovReg for result capture) blocks TCO. |
| 438 | let mov_result = MachineInst { |
| 439 | opcode: ArmOpcode::MovReg, |
| 440 | operands: vec![ |
| 441 | MachineOperand::PhysReg(PhysReg::Gp(1)), // x1, not x0 |
| 442 | MachineOperand::PhysReg(PhysReg::Gp(0)), // x0 (result) |
| 443 | ], |
| 444 | def: None, |
| 445 | }; |
| 446 | let mut mf = build_mf(vec![vec![bl("_baz"), mov_result, ldp_post(), ret()]]); |
| 447 | tail_call_opt(&mut mf); |
| 448 | let insts = &mf.blocks[0].insts; |
| 449 | // No transformation — Bl should still be present. |
| 450 | assert!( |
| 451 | insts.iter().any(|i| i.opcode == ArmOpcode::Bl), |
| 452 | "Bl should NOT be removed when result is captured in non-return register" |
| 453 | ); |
| 454 | assert!( |
| 455 | insts.iter().any(|i| i.opcode == ArmOpcode::Ret), |
| 456 | "Ret should still be present" |
| 457 | ); |
| 458 | } |
| 459 | |
| 460 | #[test] |
| 461 | fn no_tco_when_not_ending_in_ret() { |
| 462 | // Block ending in B (not Ret) — no TCO. |
| 463 | let b_inst = MachineInst { |
| 464 | opcode: ArmOpcode::B, |
| 465 | operands: vec![MachineOperand::BlockRef(MBlockId(1))], |
| 466 | def: None, |
| 467 | }; |
| 468 | let mut mf = build_mf(vec![vec![bl("_qux"), ldp_post(), b_inst]]); |
| 469 | tail_call_opt(&mut mf); |
| 470 | // Should still have Bl (TCO not fired because no Ret). |
| 471 | assert!(mf.blocks[0].insts.iter().any(|i| i.opcode == ArmOpcode::Bl)); |
| 472 | } |
| 473 | |
| 474 | #[test] |
| 475 | fn no_tco_when_frame_pointer_is_spilled_through_large_offset_slot() { |
| 476 | // Repro shape from fortbite O1: |
| 477 | // sub x10, fp, #104 |
| 478 | // sub x8, fp, #1936 |
| 479 | // str x10, [x8] |
| 480 | // sub x8, fp, #1936 |
| 481 | // ldr x22, [x8] |
| 482 | // mov x1, x22 |
| 483 | // bl _callee |
| 484 | // |
| 485 | // The arg in x1 is a pointer into our frame, just spilled/reloaded |
| 486 | // through the large-offset materialization form. Tail-branching here |
| 487 | // would tear down the frame before the callee consumes x1. |
| 488 | let mut mf = build_mf(vec![vec![ |
| 489 | MachineInst { |
| 490 | opcode: ArmOpcode::SubImm, |
| 491 | operands: vec![ |
| 492 | MachineOperand::PhysReg(PhysReg::Gp(10)), |
| 493 | MachineOperand::PhysReg(PhysReg::FP), |
| 494 | MachineOperand::Imm(104), |
| 495 | ], |
| 496 | def: None, |
| 497 | }, |
| 498 | MachineInst { |
| 499 | opcode: ArmOpcode::SubImm, |
| 500 | operands: vec![ |
| 501 | MachineOperand::PhysReg(PhysReg::Gp(8)), |
| 502 | MachineOperand::PhysReg(PhysReg::FP), |
| 503 | MachineOperand::Imm(1936), |
| 504 | ], |
| 505 | def: None, |
| 506 | }, |
| 507 | MachineInst { |
| 508 | opcode: ArmOpcode::StrImm, |
| 509 | operands: vec![ |
| 510 | MachineOperand::PhysReg(PhysReg::Gp(10)), |
| 511 | MachineOperand::PhysReg(PhysReg::Gp(8)), |
| 512 | MachineOperand::Imm(0), |
| 513 | ], |
| 514 | def: None, |
| 515 | }, |
| 516 | MachineInst { |
| 517 | opcode: ArmOpcode::SubImm, |
| 518 | operands: vec![ |
| 519 | MachineOperand::PhysReg(PhysReg::Gp(8)), |
| 520 | MachineOperand::PhysReg(PhysReg::FP), |
| 521 | MachineOperand::Imm(1936), |
| 522 | ], |
| 523 | def: None, |
| 524 | }, |
| 525 | MachineInst { |
| 526 | opcode: ArmOpcode::LdrImm, |
| 527 | operands: vec![ |
| 528 | MachineOperand::PhysReg(PhysReg::Gp(22)), |
| 529 | MachineOperand::PhysReg(PhysReg::Gp(8)), |
| 530 | MachineOperand::Imm(0), |
| 531 | ], |
| 532 | def: None, |
| 533 | }, |
| 534 | MachineInst { |
| 535 | opcode: ArmOpcode::MovReg, |
| 536 | operands: vec![ |
| 537 | MachineOperand::PhysReg(PhysReg::Gp(1)), |
| 538 | MachineOperand::PhysReg(PhysReg::Gp(22)), |
| 539 | ], |
| 540 | def: None, |
| 541 | }, |
| 542 | bl("_callee"), |
| 543 | ldp_post(), |
| 544 | ret(), |
| 545 | ]]); |
| 546 | tail_call_opt(&mut mf); |
| 547 | assert!( |
| 548 | mf.blocks[0].insts.iter().any(|i| i.opcode == ArmOpcode::Bl), |
| 549 | "tail-call optimization must not erase the call when an arg reloads a spilled frame pointer" |
| 550 | ); |
| 551 | assert!( |
| 552 | mf.blocks[0] |
| 553 | .insts |
| 554 | .iter() |
| 555 | .any(|i| i.opcode == ArmOpcode::Ret), |
| 556 | "tail-call optimization must leave the normal return path intact" |
| 557 | ); |
| 558 | } |
| 559 | |
| 560 | #[test] |
| 561 | fn no_tco_when_arg_register_is_reloaded_from_frame_in_tail_block() { |
| 562 | let mut mf = build_mf(vec![ |
| 563 | vec![ |
| 564 | MachineInst { |
| 565 | opcode: ArmOpcode::SubImm, |
| 566 | operands: vec![ |
| 567 | MachineOperand::PhysReg(PhysReg::Gp(10)), |
| 568 | MachineOperand::PhysReg(PhysReg::FP), |
| 569 | MachineOperand::Imm(104), |
| 570 | ], |
| 571 | def: None, |
| 572 | }, |
| 573 | MachineInst { |
| 574 | opcode: ArmOpcode::StrImm, |
| 575 | operands: vec![ |
| 576 | MachineOperand::PhysReg(PhysReg::Gp(10)), |
| 577 | MachineOperand::PhysReg(PhysReg::FP), |
| 578 | MachineOperand::Imm(-1936), |
| 579 | ], |
| 580 | def: None, |
| 581 | }, |
| 582 | MachineInst { |
| 583 | opcode: ArmOpcode::B, |
| 584 | operands: vec![MachineOperand::BlockRef(MBlockId(1))], |
| 585 | def: None, |
| 586 | }, |
| 587 | ], |
| 588 | vec![ |
| 589 | MachineInst { |
| 590 | opcode: ArmOpcode::LdrImm, |
| 591 | operands: vec![ |
| 592 | MachineOperand::PhysReg(PhysReg::Gp(1)), |
| 593 | MachineOperand::PhysReg(PhysReg::FP), |
| 594 | MachineOperand::Imm(-1936), |
| 595 | ], |
| 596 | def: None, |
| 597 | }, |
| 598 | bl("_callee"), |
| 599 | ldp_post(), |
| 600 | ret(), |
| 601 | ], |
| 602 | ]); |
| 603 | tail_call_opt(&mut mf); |
| 604 | assert!( |
| 605 | mf.blocks[1].insts.iter().any(|i| i.opcode == ArmOpcode::Bl), |
| 606 | "tail-call optimization must not fire when x1 is reloaded from our frame in the tail block" |
| 607 | ); |
| 608 | assert!( |
| 609 | mf.blocks[1].insts.iter().any(|i| i.opcode == ArmOpcode::Ret), |
| 610 | "tail-call optimization must preserve the return when the arg comes from a frame reload" |
| 611 | ); |
| 612 | } |
| 613 | |
| 614 | #[test] |
| 615 | fn no_tco_when_temp_register_reloads_frame_slot_before_copying_to_arg() { |
| 616 | let mut mf = build_mf(vec![ |
| 617 | vec![ |
| 618 | MachineInst { |
| 619 | opcode: ArmOpcode::SubImm, |
| 620 | operands: vec![ |
| 621 | MachineOperand::PhysReg(PhysReg::Gp(10)), |
| 622 | MachineOperand::PhysReg(PhysReg::FP), |
| 623 | MachineOperand::Imm(104), |
| 624 | ], |
| 625 | def: None, |
| 626 | }, |
| 627 | MachineInst { |
| 628 | opcode: ArmOpcode::StrImm, |
| 629 | operands: vec![ |
| 630 | MachineOperand::PhysReg(PhysReg::Gp(10)), |
| 631 | MachineOperand::PhysReg(PhysReg::FP), |
| 632 | MachineOperand::Imm(-1936), |
| 633 | ], |
| 634 | def: None, |
| 635 | }, |
| 636 | MachineInst { |
| 637 | opcode: ArmOpcode::B, |
| 638 | operands: vec![MachineOperand::BlockRef(MBlockId(1))], |
| 639 | def: None, |
| 640 | }, |
| 641 | ], |
| 642 | vec![ |
| 643 | MachineInst { |
| 644 | opcode: ArmOpcode::LdrImm, |
| 645 | operands: vec![ |
| 646 | MachineOperand::PhysReg(PhysReg::Gp(22)), |
| 647 | MachineOperand::PhysReg(PhysReg::FP), |
| 648 | MachineOperand::Imm(-1936), |
| 649 | ], |
| 650 | def: None, |
| 651 | }, |
| 652 | MachineInst { |
| 653 | opcode: ArmOpcode::MovReg, |
| 654 | operands: vec![ |
| 655 | MachineOperand::PhysReg(PhysReg::Gp(1)), |
| 656 | MachineOperand::PhysReg(PhysReg::Gp(22)), |
| 657 | ], |
| 658 | def: None, |
| 659 | }, |
| 660 | bl("_callee"), |
| 661 | ldp_post(), |
| 662 | ret(), |
| 663 | ], |
| 664 | ]); |
| 665 | tail_call_opt(&mut mf); |
| 666 | assert!( |
| 667 | mf.blocks[1].insts.iter().any(|i| i.opcode == ArmOpcode::Bl), |
| 668 | "tail-call optimization must not fire when a temp reloads a frame slot before copying it to x1" |
| 669 | ); |
| 670 | assert!( |
| 671 | mf.blocks[1].insts.iter().any(|i| i.opcode == ArmOpcode::Ret), |
| 672 | "tail-call optimization must preserve the return when a frame reload feeds x1 indirectly" |
| 673 | ); |
| 674 | } |
| 675 | } |
| 676 |