Rust · 25621 bytes Raw Blame History
1 //! Tail call optimization (post-regalloc peephole).
2 //!
3 //! After register allocation and callee-save insertion, the machine code for
4 //! a call in tail position looks like:
5 //!
6 //! ```text
7 //! ; arg setup (MOV xi, …)
8 //! Bl _callee
9 //! ; callee-save restores (LdpOffset / LdrImm / LdrFpImm) — zero or more
10 //! LdpPost x29, x30, [sp], #16 ; epilogue frame restore
11 //! Ret
12 //! ```
13 //!
14 //! We convert this to:
15 //!
16 //! ```text
17 //! ; arg setup (unchanged — all spill loads happen before LdpPost)
18 //! ; callee-save restores (unchanged)
19 //! LdpPost x29, x30, [sp], #16
20 //! B _callee ; tail jump (not BL)
21 //! ```
22 //!
23 //! Correctness argument
24 //! --------------------
25 //! * Argument registers (x0–x7, d0–d7) are loaded BEFORE the LdpPost, so
26 //! they hold the correct values when the tail branch executes.
27 //! * Callee-saved restores (x19–x28, d8–d15) are disjoint from the
28 //! argument registers; restoring them cannot clobber the args.
29 //! * LdpPost restores x29 (our FP) and x30 (our LR). After it fires, LR
30 //! holds our *caller's* return address. When _callee executes its own
31 //! RET, it returns to *our* caller directly — exactly what TCO requires.
32 //! * We only recognize this pattern when there are **no instructions between
33 //! Bl and the callee-restore cluster**. In particular, non-void calls
34 //! whose return-value register survives coalesce_moves are handled because
35 //! `coalesce_moves` already eliminated any `MOV x0, x0` self-moves, leaving
36 //! the Bl immediately adjacent to the callee restores / LdpPost.
37 //! * Gate: we don't fire on non-void calls where a non-trivial result-capture
38 //! sequence remains (e.g., `MOV x1, x0`) — those are left alone.
39
40 use super::mir::{ArmOpcode, MachineFunction, MachineInst, MachineOperand, PhysReg};
41 use std::collections::{HashMap, HashSet};
42
43 /// Run tail call optimization on a single machine function.
44 ///
45 /// Safe to call at any optimization level; the transformation never changes
46 /// visible behavior and is always a code-size win (removes one instruction).
47 pub fn tail_call_opt(mf: &mut MachineFunction) {
48 for block in &mut mf.blocks {
49 let n = block.insts.len();
50 if n < 2 {
51 continue;
52 }
53
54 // Epilogue is always `LdpPost; Ret` at the very end.
55 if block.insts[n - 1].opcode != ArmOpcode::Ret {
56 continue;
57 }
58 if block.insts[n - 2].opcode != ArmOpcode::LdpPost {
59 continue;
60 }
61
62 let ldp_idx = n - 2;
63
64 // Walk backwards from just before LdpPost, skipping callee-save
65 // restore instructions (LdpOffset, LdrImm, LdrFpImm). Stop when
66 // we find something that isn't a callee restore.
67 let mut bl_candidate = ldp_idx;
68 while bl_candidate > 0 {
69 bl_candidate -= 1;
70 match block.insts[bl_candidate].opcode {
71 ArmOpcode::LdpOffset | ArmOpcode::LdrImm | ArmOpcode::LdrFpImm => {
72 // Callee-save restore — keep scanning backwards.
73 }
74 ArmOpcode::Bl => {
75 // Found the BL — stop here.
76 break;
77 }
78 _ => {
79 // Non-callee-restore, non-BL — pattern doesn't match.
80 bl_candidate = usize::MAX; // sentinel
81 break;
82 }
83 }
84 }
85
86 // Sentinel or scanned to index 0 without finding Bl.
87 if bl_candidate == usize::MAX {
88 continue;
89 }
90 if block.insts[bl_candidate].opcode != ArmOpcode::Bl {
91 continue;
92 }
93
94 // SAFETY: reject TCO when any argument register (x0–x7) holds a value
95 // derived from our frame pointer (e.g. a pointer to a stack-allocated
96 // local / derived-type struct). After the epilogue tears down our
97 // frame, the callee's prologue reuses that memory; any pointer into it
98 // becomes dangling. Taint analysis: track GP registers set from
99 // `sub xN, x29, #M` (alloca) and propagated through MovReg / AddReg /
100 // AddImm / Mul. If any x0–x7 is tainted, the tail call is unsafe.
101 if has_frame_derived_arg(&block.insts[..bl_candidate]) {
102 continue;
103 }
104
105 // Extract the call target from the Bl operand.
106 let label = match block.insts[bl_candidate].operands.first() {
107 Some(MachineOperand::Extern(s)) => s.clone(),
108 _ => continue, // indirect call or unexpected operand — skip
109 };
110
111 // Transform:
112 // Remove `Bl _label` at bl_candidate.
113 // Remove `Ret` (last instruction).
114 // Append `B _label` (tail branch to external symbol).
115 //
116 // The callee restores and LdpPost between bl_candidate and ldp_idx
117 // shift down by 1 (because we removed bl_candidate), but stay in
118 // the right relative order.
119 block.insts.remove(bl_candidate);
120 block.insts.pop(); // remove Ret (was the last instruction)
121 block.insts.push(MachineInst {
122 opcode: ArmOpcode::B,
123 operands: vec![MachineOperand::Extern(label)],
124 def: None,
125 });
126 }
127 }
128
129 // ---------------------------------------------------------------------------
130 // Safety helpers
131 // ---------------------------------------------------------------------------
132
133 /// Returns true if any GP argument register (x0–x7) contains a frame-derived
134 /// pointer at the point of the Bl.
135 ///
136 /// "Frame-derived" means the register was set — directly or transitively — from
137 /// a `sub xN, x29, #M` (alloca materialization).
138 ///
139 /// The analysis is a forward taint propagation over both registers AND
140 /// FP-relative stack slots, so it correctly handles the spill/reload pattern:
141 ///
142 /// ```text
143 /// sub x10, x29, #4 ; x10 = frame addr (tainted)
144 /// str x10, [x29, #-16] ; slot -16 now tainted
145 /// ...
146 /// ldr x9, [x29, #-16] ; x9 = frame addr (tainted via slot)
147 /// mov x0, x9 ; x0 tainted → unsafe TCO
148 /// ```
149 fn has_frame_derived_arg(insts: &[MachineInst]) -> bool {
150 // GP register numbers whose current value is derived from the frame pointer.
151 let mut tainted_regs: HashSet<u8> = HashSet::new();
152 // FP-relative offsets whose memory contents are frame-derived pointers.
153 let mut tainted_slots: HashSet<i64> = HashSet::new();
154 // GP registers known to hold `fp + offset` or `fp - offset`.
155 let mut frame_addr_regs: HashMap<u8, i64> = HashMap::new();
156
157 for inst in insts {
158 if let Some(dst) = written_gp_reg(inst) {
159 frame_addr_regs.remove(&dst);
160 }
161 match inst.opcode {
162 // sub xN, x29, #imm → xN holds a frame-relative address.
163 ArmOpcode::SubImm if op_is_fp(inst, 1) => {
164 if let Some(n) = op_gp(inst, 0) {
165 tainted_regs.insert(n);
166 if let Some(imm) = op_imm(inst, 2) {
167 frame_addr_regs.insert(n, -imm);
168 }
169 }
170 }
171 // add xN, x29, #imm → xN holds a frame-relative address.
172 ArmOpcode::AddImm if op_is_fp(inst, 1) => {
173 if let Some(n) = op_gp(inst, 0) {
174 tainted_regs.insert(n);
175 if let Some(imm) = op_imm(inst, 2) {
176 frame_addr_regs.insert(n, imm);
177 }
178 }
179 }
180 // add xN, xM, #imm where xM is a known frame address.
181 ArmOpcode::AddImm => {
182 if let (Some(dst), Some(src), Some(imm)) =
183 (op_gp(inst, 0), op_gp(inst, 1), op_imm(inst, 2))
184 {
185 if let Some(base_off) = frame_addr_regs.get(&src).copied() {
186 tainted_regs.insert(dst);
187 frame_addr_regs.insert(dst, base_off + imm);
188 } else if tainted_regs.contains(&src) {
189 tainted_regs.insert(dst);
190 }
191 }
192 }
193 // sub xN, xM, #imm where xM is a known frame address.
194 ArmOpcode::SubImm => {
195 if let (Some(dst), Some(src), Some(imm)) =
196 (op_gp(inst, 0), op_gp(inst, 1), op_imm(inst, 2))
197 {
198 if let Some(base_off) = frame_addr_regs.get(&src).copied() {
199 tainted_regs.insert(dst);
200 frame_addr_regs.insert(dst, base_off - imm);
201 } else if tainted_regs.contains(&src) {
202 tainted_regs.insert(dst);
203 }
204 }
205 }
206 // add xN, xM, xP (GEP: propagate taint from either source)
207 ArmOpcode::AddReg
208 if op_gp(inst, 1).is_some_and(|n| tainted_regs.contains(&n))
209 || op_gp(inst, 2).is_some_and(|n| tainted_regs.contains(&n)) =>
210 {
211 if let Some(n) = op_gp(inst, 0) {
212 tainted_regs.insert(n);
213 }
214 }
215 // mov xN, xM (register copy — propagates taint to arg reg)
216 ArmOpcode::MovReg if op_gp(inst, 1).is_some_and(|n| tainted_regs.contains(&n)) => {
217 if let Some(n) = op_gp(inst, 0) {
218 tainted_regs.insert(n);
219 if let Some(src) = op_gp(inst, 1) {
220 if let Some(off) = frame_addr_regs.get(&src).copied() {
221 frame_addr_regs.insert(n, off);
222 }
223 }
224 }
225 }
226 // mul xN, xM, xP (index computation in GEP; conservative)
227 ArmOpcode::Mul
228 if op_gp(inst, 1).is_some_and(|n| tainted_regs.contains(&n))
229 || op_gp(inst, 2).is_some_and(|n| tainted_regs.contains(&n)) =>
230 {
231 if let Some(n) = op_gp(inst, 0) {
232 tainted_regs.insert(n);
233 }
234 }
235 // str xN, [x29, #off] — if xN is tainted, the slot becomes tainted.
236 ArmOpcode::StrImm
237 if op_gp(inst, 0).is_some_and(|n| tainted_regs.contains(&n))
238 && effective_frame_slot_offset(inst, 1, 2, &frame_addr_regs).is_some() =>
239 {
240 if let Some(off) = effective_frame_slot_offset(inst, 1, 2, &frame_addr_regs) {
241 tainted_slots.insert(off);
242 }
243 }
244 // ldr xN, [frame] — if the slot is known tainted, xN becomes
245 // tainted. Also conservatively reject tail calls when any 64-bit
246 // GP register is reloaded from our frame in the tail block: the
247 // slot may have been populated in a predecessor with an escaped
248 // local address, then copied into x0–x7 later in the block.
249 ArmOpcode::LdrImm => {
250 if let Some(off) = effective_frame_slot_offset(inst, 1, 2, &frame_addr_regs) {
251 if let Some(n) = op_gp(inst, 0) {
252 if tainted_slots.contains(&off) || n <= 30 {
253 tainted_regs.insert(n);
254 }
255 }
256 }
257 }
258 _ => {}
259 }
260 }
261
262 // Argument registers are x0–x7 (PhysReg::Gp(0)–Gp(7)).
263 (0u8..8).any(|n| tainted_regs.contains(&n))
264 }
265
266 /// True if operand at `idx` is the frame pointer (x29 = PhysReg::Gp(29)).
267 #[inline]
268 fn op_is_fp(inst: &MachineInst, idx: usize) -> bool {
269 matches!(
270 inst.operands.get(idx),
271 Some(MachineOperand::PhysReg(p)) if *p == PhysReg::FP
272 )
273 }
274
275 /// GP register number (0–30) for the PhysReg::Gp operand at `idx`, or None.
276 #[inline]
277 fn op_gp(inst: &MachineInst, idx: usize) -> Option<u8> {
278 match inst.operands.get(idx)? {
279 MachineOperand::PhysReg(PhysReg::Gp(n)) => Some(*n),
280 _ => None,
281 }
282 }
283
284 /// Integer immediate operand at `idx`, or None.
285 #[inline]
286 fn op_imm(inst: &MachineInst, idx: usize) -> Option<i64> {
287 match inst.operands.get(idx)? {
288 MachineOperand::Imm(v) => Some(*v),
289 MachineOperand::FrameSlot(v) => Some(*v as i64),
290 _ => None,
291 }
292 }
293
294 /// Frame-pointer-relative offset for the operand at `idx`, or None.
295 /// Accepts both `Imm` and `FrameSlot` variants.
296 #[inline]
297 fn op_fp_offset(inst: &MachineInst, idx: usize) -> Option<i64> {
298 match inst.operands.get(idx)? {
299 MachineOperand::Imm(v) => Some(*v),
300 MachineOperand::FrameSlot(v) => Some(*v as i64),
301 _ => None,
302 }
303 }
304
305 /// Effective FP-relative offset addressed by `[base, #off]`, where `base` is
306 /// either FP directly or a GP register previously materialized from FP.
307 #[inline]
308 fn effective_frame_slot_offset(
309 inst: &MachineInst,
310 base_idx: usize,
311 off_idx: usize,
312 frame_addr_regs: &HashMap<u8, i64>,
313 ) -> Option<i64> {
314 let off = op_imm(inst, off_idx).unwrap_or(0);
315 match inst.operands.get(base_idx)? {
316 MachineOperand::PhysReg(p) if *p == PhysReg::FP => Some(off),
317 MachineOperand::PhysReg(PhysReg::Gp(n)) => frame_addr_regs.get(n).map(|base| base + off),
318 _ => None,
319 }
320 }
321
322 /// GP register written by this instruction, if operand 0 is a GP destination.
323 #[inline]
324 fn written_gp_reg(inst: &MachineInst) -> Option<u8> {
325 match inst.opcode {
326 ArmOpcode::AddReg
327 | ArmOpcode::AddImm
328 | ArmOpcode::SubReg
329 | ArmOpcode::SubImm
330 | ArmOpcode::Mul
331 | ArmOpcode::MovReg
332 | ArmOpcode::LdrImm => op_gp(inst, 0),
333 _ => None,
334 }
335 }
336
337 // ---------------------------------------------------------------------------
338 // Tests
339 // ---------------------------------------------------------------------------
340
341 #[cfg(test)]
342 mod tests {
343 use super::*;
344 use crate::codegen::mir::*;
345
346 fn make_block(insts: Vec<MachineInst>) -> MachineBlock {
347 MachineBlock {
348 label: "test".into(),
349 insts,
350 id: MBlockId(0),
351 }
352 }
353
354 fn bl(label: &str) -> MachineInst {
355 MachineInst {
356 opcode: ArmOpcode::Bl,
357 operands: vec![MachineOperand::Extern(label.into())],
358 def: None,
359 }
360 }
361
362 fn ldp_post() -> MachineInst {
363 MachineInst {
364 opcode: ArmOpcode::LdpPost,
365 operands: vec![
366 MachineOperand::PhysReg(PhysReg::FP),
367 MachineOperand::PhysReg(PhysReg::LR),
368 MachineOperand::PhysReg(PhysReg::Sp),
369 ],
370 def: None,
371 }
372 }
373
374 fn ret() -> MachineInst {
375 MachineInst {
376 opcode: ArmOpcode::Ret,
377 operands: vec![],
378 def: None,
379 }
380 }
381
382 fn ldr_callee_restore() -> MachineInst {
383 MachineInst {
384 opcode: ArmOpcode::LdrImm,
385 operands: vec![
386 MachineOperand::PhysReg(PhysReg::Gp(19)),
387 MachineOperand::PhysReg(PhysReg::FP),
388 MachineOperand::Imm(-8),
389 ],
390 def: Some(VRegId(19)),
391 }
392 }
393
394 fn build_mf(blocks: Vec<Vec<MachineInst>>) -> MachineFunction {
395 let mut mf = MachineFunction::new("test".into());
396 // MachineFunction starts with one empty block; overwrite it.
397 mf.blocks[0].insts = blocks[0].clone();
398 for blk_insts in blocks.into_iter().skip(1) {
399 let id = mf.new_block("bb");
400 mf.block_mut(id).insts = blk_insts;
401 }
402 mf
403 }
404
405 #[test]
406 fn void_tail_call_no_callee_saves() {
407 // Pattern: Bl; LdpPost; Ret → LdpPost; B
408 let mut mf = build_mf(vec![vec![bl("_foo"), ldp_post(), ret()]]);
409 tail_call_opt(&mut mf);
410 let insts = &mf.blocks[0].insts;
411 // Should now be: LdpPost, B _foo
412 assert_eq!(insts.len(), 2);
413 assert_eq!(insts[0].opcode, ArmOpcode::LdpPost);
414 assert_eq!(insts[1].opcode, ArmOpcode::B);
415 assert_eq!(insts[1].operands[0], MachineOperand::Extern("_foo".into()));
416 }
417
418 #[test]
419 fn void_tail_call_with_callee_restore() {
420 // Pattern: Bl; LdrImm(restore); LdpPost; Ret → LdrImm; LdpPost; B
421 let mut mf = build_mf(vec![vec![
422 bl("_bar"),
423 ldr_callee_restore(),
424 ldp_post(),
425 ret(),
426 ]]);
427 tail_call_opt(&mut mf);
428 let insts = &mf.blocks[0].insts;
429 assert_eq!(insts.len(), 3);
430 assert_eq!(insts[0].opcode, ArmOpcode::LdrImm);
431 assert_eq!(insts[1].opcode, ArmOpcode::LdpPost);
432 assert_eq!(insts[2].opcode, ArmOpcode::B);
433 }
434
435 #[test]
436 fn no_tco_when_non_callee_restore_between_bl_and_ldp() {
437 // A non-trivial instruction (e.g., MovReg for result capture) blocks TCO.
438 let mov_result = MachineInst {
439 opcode: ArmOpcode::MovReg,
440 operands: vec![
441 MachineOperand::PhysReg(PhysReg::Gp(1)), // x1, not x0
442 MachineOperand::PhysReg(PhysReg::Gp(0)), // x0 (result)
443 ],
444 def: None,
445 };
446 let mut mf = build_mf(vec![vec![bl("_baz"), mov_result, ldp_post(), ret()]]);
447 tail_call_opt(&mut mf);
448 let insts = &mf.blocks[0].insts;
449 // No transformation — Bl should still be present.
450 assert!(
451 insts.iter().any(|i| i.opcode == ArmOpcode::Bl),
452 "Bl should NOT be removed when result is captured in non-return register"
453 );
454 assert!(
455 insts.iter().any(|i| i.opcode == ArmOpcode::Ret),
456 "Ret should still be present"
457 );
458 }
459
460 #[test]
461 fn no_tco_when_not_ending_in_ret() {
462 // Block ending in B (not Ret) — no TCO.
463 let b_inst = MachineInst {
464 opcode: ArmOpcode::B,
465 operands: vec![MachineOperand::BlockRef(MBlockId(1))],
466 def: None,
467 };
468 let mut mf = build_mf(vec![vec![bl("_qux"), ldp_post(), b_inst]]);
469 tail_call_opt(&mut mf);
470 // Should still have Bl (TCO not fired because no Ret).
471 assert!(mf.blocks[0].insts.iter().any(|i| i.opcode == ArmOpcode::Bl));
472 }
473
474 #[test]
475 fn no_tco_when_frame_pointer_is_spilled_through_large_offset_slot() {
476 // Repro shape from fortbite O1:
477 // sub x10, fp, #104
478 // sub x8, fp, #1936
479 // str x10, [x8]
480 // sub x8, fp, #1936
481 // ldr x22, [x8]
482 // mov x1, x22
483 // bl _callee
484 //
485 // The arg in x1 is a pointer into our frame, just spilled/reloaded
486 // through the large-offset materialization form. Tail-branching here
487 // would tear down the frame before the callee consumes x1.
488 let mut mf = build_mf(vec![vec![
489 MachineInst {
490 opcode: ArmOpcode::SubImm,
491 operands: vec![
492 MachineOperand::PhysReg(PhysReg::Gp(10)),
493 MachineOperand::PhysReg(PhysReg::FP),
494 MachineOperand::Imm(104),
495 ],
496 def: None,
497 },
498 MachineInst {
499 opcode: ArmOpcode::SubImm,
500 operands: vec![
501 MachineOperand::PhysReg(PhysReg::Gp(8)),
502 MachineOperand::PhysReg(PhysReg::FP),
503 MachineOperand::Imm(1936),
504 ],
505 def: None,
506 },
507 MachineInst {
508 opcode: ArmOpcode::StrImm,
509 operands: vec![
510 MachineOperand::PhysReg(PhysReg::Gp(10)),
511 MachineOperand::PhysReg(PhysReg::Gp(8)),
512 MachineOperand::Imm(0),
513 ],
514 def: None,
515 },
516 MachineInst {
517 opcode: ArmOpcode::SubImm,
518 operands: vec![
519 MachineOperand::PhysReg(PhysReg::Gp(8)),
520 MachineOperand::PhysReg(PhysReg::FP),
521 MachineOperand::Imm(1936),
522 ],
523 def: None,
524 },
525 MachineInst {
526 opcode: ArmOpcode::LdrImm,
527 operands: vec![
528 MachineOperand::PhysReg(PhysReg::Gp(22)),
529 MachineOperand::PhysReg(PhysReg::Gp(8)),
530 MachineOperand::Imm(0),
531 ],
532 def: None,
533 },
534 MachineInst {
535 opcode: ArmOpcode::MovReg,
536 operands: vec![
537 MachineOperand::PhysReg(PhysReg::Gp(1)),
538 MachineOperand::PhysReg(PhysReg::Gp(22)),
539 ],
540 def: None,
541 },
542 bl("_callee"),
543 ldp_post(),
544 ret(),
545 ]]);
546 tail_call_opt(&mut mf);
547 assert!(
548 mf.blocks[0].insts.iter().any(|i| i.opcode == ArmOpcode::Bl),
549 "tail-call optimization must not erase the call when an arg reloads a spilled frame pointer"
550 );
551 assert!(
552 mf.blocks[0]
553 .insts
554 .iter()
555 .any(|i| i.opcode == ArmOpcode::Ret),
556 "tail-call optimization must leave the normal return path intact"
557 );
558 }
559
560 #[test]
561 fn no_tco_when_arg_register_is_reloaded_from_frame_in_tail_block() {
562 let mut mf = build_mf(vec![
563 vec![
564 MachineInst {
565 opcode: ArmOpcode::SubImm,
566 operands: vec![
567 MachineOperand::PhysReg(PhysReg::Gp(10)),
568 MachineOperand::PhysReg(PhysReg::FP),
569 MachineOperand::Imm(104),
570 ],
571 def: None,
572 },
573 MachineInst {
574 opcode: ArmOpcode::StrImm,
575 operands: vec![
576 MachineOperand::PhysReg(PhysReg::Gp(10)),
577 MachineOperand::PhysReg(PhysReg::FP),
578 MachineOperand::Imm(-1936),
579 ],
580 def: None,
581 },
582 MachineInst {
583 opcode: ArmOpcode::B,
584 operands: vec![MachineOperand::BlockRef(MBlockId(1))],
585 def: None,
586 },
587 ],
588 vec![
589 MachineInst {
590 opcode: ArmOpcode::LdrImm,
591 operands: vec![
592 MachineOperand::PhysReg(PhysReg::Gp(1)),
593 MachineOperand::PhysReg(PhysReg::FP),
594 MachineOperand::Imm(-1936),
595 ],
596 def: None,
597 },
598 bl("_callee"),
599 ldp_post(),
600 ret(),
601 ],
602 ]);
603 tail_call_opt(&mut mf);
604 assert!(
605 mf.blocks[1].insts.iter().any(|i| i.opcode == ArmOpcode::Bl),
606 "tail-call optimization must not fire when x1 is reloaded from our frame in the tail block"
607 );
608 assert!(
609 mf.blocks[1].insts.iter().any(|i| i.opcode == ArmOpcode::Ret),
610 "tail-call optimization must preserve the return when the arg comes from a frame reload"
611 );
612 }
613
614 #[test]
615 fn no_tco_when_temp_register_reloads_frame_slot_before_copying_to_arg() {
616 let mut mf = build_mf(vec![
617 vec![
618 MachineInst {
619 opcode: ArmOpcode::SubImm,
620 operands: vec![
621 MachineOperand::PhysReg(PhysReg::Gp(10)),
622 MachineOperand::PhysReg(PhysReg::FP),
623 MachineOperand::Imm(104),
624 ],
625 def: None,
626 },
627 MachineInst {
628 opcode: ArmOpcode::StrImm,
629 operands: vec![
630 MachineOperand::PhysReg(PhysReg::Gp(10)),
631 MachineOperand::PhysReg(PhysReg::FP),
632 MachineOperand::Imm(-1936),
633 ],
634 def: None,
635 },
636 MachineInst {
637 opcode: ArmOpcode::B,
638 operands: vec![MachineOperand::BlockRef(MBlockId(1))],
639 def: None,
640 },
641 ],
642 vec![
643 MachineInst {
644 opcode: ArmOpcode::LdrImm,
645 operands: vec![
646 MachineOperand::PhysReg(PhysReg::Gp(22)),
647 MachineOperand::PhysReg(PhysReg::FP),
648 MachineOperand::Imm(-1936),
649 ],
650 def: None,
651 },
652 MachineInst {
653 opcode: ArmOpcode::MovReg,
654 operands: vec![
655 MachineOperand::PhysReg(PhysReg::Gp(1)),
656 MachineOperand::PhysReg(PhysReg::Gp(22)),
657 ],
658 def: None,
659 },
660 bl("_callee"),
661 ldp_post(),
662 ret(),
663 ],
664 ]);
665 tail_call_opt(&mut mf);
666 assert!(
667 mf.blocks[1].insts.iter().any(|i| i.opcode == ArmOpcode::Bl),
668 "tail-call optimization must not fire when a temp reloads a frame slot before copying it to x1"
669 );
670 assert!(
671 mf.blocks[1].insts.iter().any(|i| i.opcode == ArmOpcode::Ret),
672 "tail-call optimization must preserve the return when a frame reload feeds x1 indirectly"
673 );
674 }
675 }
676