| 1 | //! ARM64 conditional-branch range relaxation. |
| 2 | //! |
| 3 | //! `B.cond` carries a 19-bit signed PC-relative immediate, so its |
| 4 | //! target must lie within ±1MB of the branch. Single-function bodies |
| 5 | //! that exceed that span (notably `stdlib_slaruv`, ~363KB of compiled |
| 6 | //! assembly with cross-body conditional jumps) trip the assembler's |
| 7 | //! "fixup value out of range" error. |
| 8 | //! |
| 9 | //! This pass runs after register allocation and tail-call optimization, |
| 10 | //! before assembly emission. It walks the function's blocks in linear |
| 11 | //! order, computes an approximate byte offset for every block label, |
| 12 | //! and for each `B.cond far_label` whose distance exceeds the |
| 13 | //! conditional-branch encoding window expands it to the trampoline |
| 14 | //! pattern: |
| 15 | //! |
| 16 | //! ```text |
| 17 | //! Before: After (when target is too far): |
| 18 | //! b.cond far_label b.{!cond} skip ; one branch over |
| 19 | //! b far_label ; ±128MB unconditional |
| 20 | //! skip: ; falls through |
| 21 | //! ``` |
| 22 | //! |
| 23 | //! Inserting an extra block + branch shifts subsequent block offsets, |
| 24 | //! which can push other previously-in-range branches out of range, |
| 25 | //! so we iterate the pass until no further insertions are needed |
| 26 | //! (capped at four iterations — non-convergence triggers a panic that |
| 27 | //! survives release builds and produces a clear ICE for diagnostics). |
| 28 | //! |
| 29 | //! ARM64 unconditional `B` and `BL` carry a 26-bit immediate (±128MB |
| 30 | //! range), so the trampoline's unconditional `B` itself never needs |
| 31 | //! relaxation — no realistic single Fortran source produces a |
| 32 | //! function body larger than 128MB. |
| 33 | |
| 34 | use super::emit::emit_inst_text; |
| 35 | use super::mir::{ |
| 36 | ArmCond, ArmOpcode, MBlockId, MachineBlock, MachineFunction, MachineInst, MachineOperand, |
| 37 | }; |
| 38 | |
| 39 | /// Maximum signed offset (in bytes) reachable by a `B.cond` and by |
| 40 | /// `cbz/cbnz`. The 19-bit signed immediate is scaled by 4, giving |
| 41 | /// ±(2^20) bytes. Subtract a safety margin so we never sit right on |
| 42 | /// the edge: a downstream peephole insertion or fall-through fixup |
| 43 | /// that nudges the offset by a single instruction shouldn't push us |
| 44 | /// over. |
| 45 | const COND_BRANCH_LIMIT: i64 = (1 << 20) - 64; |
| 46 | |
| 47 | /// Maximum signed offset reachable by `tbz`/`tbnz`. The 14-bit |
| 48 | /// signed immediate is scaled by 4, giving ±(2^15) bytes — much |
| 49 | /// tighter than the cond-branch / cbz limit. Same safety margin. |
| 50 | const TBZ_BRANCH_LIMIT: i64 = (1 << 15) - 64; |
| 51 | |
| 52 | /// Iteration cap. In practice 1–2 passes suffice; if convergence |
| 53 | /// genuinely doesn't happen, something pathological is going on and |
| 54 | /// we want a loud failure rather than a silent infinite loop. |
| 55 | const MAX_ITERATIONS: u32 = 4; |
| 56 | |
| 57 | /// Run branch relaxation on a machine function. Idempotent: when no |
| 58 | /// `B.cond` overflows, the function is returned unchanged. |
| 59 | pub fn relax_branches(mf: &mut MachineFunction) { |
| 60 | for _ in 0..MAX_ITERATIONS { |
| 61 | if !relax_once(mf) { |
| 62 | return; |
| 63 | } |
| 64 | } |
| 65 | // Convergence failure — extremely unlikely. Better to ICE here |
| 66 | // than emit an assembly file the assembler will reject. |
| 67 | panic!( |
| 68 | "branch relaxation did not converge for function {} after {} iterations", |
| 69 | mf.name, MAX_ITERATIONS |
| 70 | ); |
| 71 | } |
| 72 | |
| 73 | /// Single relaxation pass. Returns `true` when at least one branch |
| 74 | /// was expanded (caller should iterate); `false` when every `B.cond` |
| 75 | /// is in range. |
| 76 | fn relax_once(mf: &mut MachineFunction) -> bool { |
| 77 | // Compute byte offsets for every block label using the actual |
| 78 | // emit-time instruction count. Several MIR opcodes lower to |
| 79 | // multiple ARM64 instructions (AdrpLdr / AdrpAdd, prologue + |
| 80 | // stack-probe sequences, large-immediate movz/movk chains, etc.), |
| 81 | // so a flat 4-bytes-per-MachineInst estimate would systematically |
| 82 | // under-shoot the real offsets in functions like stdlib_slaruv |
| 83 | // and miss far branches that need relaxation. |
| 84 | let block_offsets = compute_block_offsets(mf); |
| 85 | |
| 86 | // Collect overflow sites before mutating anything. We also record |
| 87 | // the per-instruction prefix offset within each block so we can |
| 88 | // skip past wide-emit insts that precede the conditional branch. |
| 89 | let mut overflows: Vec<OverflowSite> = Vec::new(); |
| 90 | for block in &mf.blocks { |
| 91 | let block_offset = block_offsets[&block.id]; |
| 92 | let mut running = 0i64; |
| 93 | for (inst_idx, inst) in block.insts.iter().enumerate() { |
| 94 | if let Some((target, limit, kind)) = relaxable_cond_branch(inst) { |
| 95 | if let Some(&target_offset) = block_offsets.get(&target) { |
| 96 | let branch_offset = block_offset + running; |
| 97 | let delta = target_offset - branch_offset; |
| 98 | if delta.abs() > limit { |
| 99 | overflows.push(OverflowSite { |
| 100 | block_id: block.id, |
| 101 | inst_idx, |
| 102 | target, |
| 103 | kind, |
| 104 | }); |
| 105 | } |
| 106 | } |
| 107 | } |
| 108 | running += inst_emit_bytes(inst, mf) as i64; |
| 109 | } |
| 110 | } |
| 111 | |
| 112 | if overflows.is_empty() { |
| 113 | return false; |
| 114 | } |
| 115 | |
| 116 | // Expand each overflow. Sort in reverse order of (block_id index, |
| 117 | // inst_idx) so earlier expansions don't invalidate later inst_idx |
| 118 | // values — block-vec insertions happen at the back of the affected |
| 119 | // block group. |
| 120 | overflows.sort_by(|a, b| { |
| 121 | let a_pos = block_position(mf, a.block_id); |
| 122 | let b_pos = block_position(mf, b.block_id); |
| 123 | b_pos.cmp(&a_pos).then(b.inst_idx.cmp(&a.inst_idx)) |
| 124 | }); |
| 125 | |
| 126 | for site in overflows { |
| 127 | expand_to_trampoline(mf, site); |
| 128 | } |
| 129 | |
| 130 | true |
| 131 | } |
| 132 | |
| 133 | #[derive(Clone)] |
| 134 | struct OverflowSite { |
| 135 | block_id: MBlockId, |
| 136 | inst_idx: usize, |
| 137 | target: MBlockId, |
| 138 | kind: RelaxKind, |
| 139 | } |
| 140 | |
| 141 | #[derive(Clone)] |
| 142 | enum RelaxKind { |
| 143 | /// `b.cond far` → `b.{!cond} skip; b far` |
| 144 | BCond { cond: ArmCond }, |
| 145 | /// `cbz/cbnz reg far` → `cbnz/cbz reg skip; b far` |
| 146 | Cbz { invert_to: ArmOpcode, reg: MachineOperand }, |
| 147 | /// `tbz/tbnz reg, #bit far` → `tbnz/tbz reg, #bit skip; b far` |
| 148 | Tbz { |
| 149 | invert_to: ArmOpcode, |
| 150 | reg: MachineOperand, |
| 151 | bit: i64, |
| 152 | }, |
| 153 | } |
| 154 | |
| 155 | /// Inspect a machine instruction; if it is a conditional branch that |
| 156 | /// might need relaxation, return the target, the per-opcode range |
| 157 | /// limit, and a kind descriptor capturing what the inverted form |
| 158 | /// looks like. Returns `None` for non-branch instructions or branches |
| 159 | /// whose targets are unrecoverable from operands. |
| 160 | fn relaxable_cond_branch(inst: &MachineInst) -> Option<(MBlockId, i64, RelaxKind)> { |
| 161 | match inst.opcode { |
| 162 | ArmOpcode::BCond => { |
| 163 | let target = bcond_target(inst)?; |
| 164 | let cond = bcond_cond(inst)?; |
| 165 | Some((target, COND_BRANCH_LIMIT, RelaxKind::BCond { cond })) |
| 166 | } |
| 167 | ArmOpcode::Cbz | ArmOpcode::Cbnz => { |
| 168 | // Operands: [reg, BlockRef] |
| 169 | let target = match inst.operands.get(1)? { |
| 170 | MachineOperand::BlockRef(id) => *id, |
| 171 | _ => return None, |
| 172 | }; |
| 173 | let reg = inst.operands.first()?.clone(); |
| 174 | let invert_to = match inst.opcode { |
| 175 | ArmOpcode::Cbz => ArmOpcode::Cbnz, |
| 176 | _ => ArmOpcode::Cbz, |
| 177 | }; |
| 178 | Some((target, COND_BRANCH_LIMIT, RelaxKind::Cbz { invert_to, reg })) |
| 179 | } |
| 180 | ArmOpcode::Tbz | ArmOpcode::Tbnz => { |
| 181 | // Operands: [reg, Imm(bit), BlockRef] |
| 182 | let bit = match inst.operands.get(1)? { |
| 183 | MachineOperand::Imm(v) => *v, |
| 184 | _ => return None, |
| 185 | }; |
| 186 | let target = match inst.operands.get(2)? { |
| 187 | MachineOperand::BlockRef(id) => *id, |
| 188 | _ => return None, |
| 189 | }; |
| 190 | let reg = inst.operands.first()?.clone(); |
| 191 | let invert_to = match inst.opcode { |
| 192 | ArmOpcode::Tbz => ArmOpcode::Tbnz, |
| 193 | _ => ArmOpcode::Tbz, |
| 194 | }; |
| 195 | Some(( |
| 196 | target, |
| 197 | TBZ_BRANCH_LIMIT, |
| 198 | RelaxKind::Tbz { |
| 199 | invert_to, |
| 200 | reg, |
| 201 | bit, |
| 202 | }, |
| 203 | )) |
| 204 | } |
| 205 | _ => None, |
| 206 | } |
| 207 | } |
| 208 | |
| 209 | /// Compute byte offsets for every block label, summing the actual |
| 210 | /// emit-time instruction byte count for every MachineInst in linear |
| 211 | /// order. Each emitted ARM64 instruction is 4 bytes; we count the |
| 212 | /// `\n` separators in the emitted text to figure out how many real |
| 213 | /// instructions an opcode produces (most are 1, but pseudo-ops and |
| 214 | /// large-immediate forms emit 2+). |
| 215 | fn compute_block_offsets(mf: &MachineFunction) -> std::collections::HashMap<MBlockId, i64> { |
| 216 | let mut offsets = std::collections::HashMap::with_capacity(mf.blocks.len()); |
| 217 | let mut running: i64 = 0; |
| 218 | for block in &mf.blocks { |
| 219 | offsets.insert(block.id, running); |
| 220 | for inst in &block.insts { |
| 221 | running += inst_emit_bytes(inst, mf) as i64; |
| 222 | } |
| 223 | } |
| 224 | offsets |
| 225 | } |
| 226 | |
| 227 | /// Number of bytes a single MachineInst emits at assembly time. |
| 228 | /// Counted from the rendered text — `emit_inst_text` already produces |
| 229 | /// exactly the lines `emit_function` would, so newline-count + 1 |
| 230 | /// matches the real instruction count without re-deriving each |
| 231 | /// opcode's expansion rules here. |
| 232 | fn inst_emit_bytes(inst: &MachineInst, mf: &MachineFunction) -> u32 { |
| 233 | let text = emit_inst_text(inst, mf); |
| 234 | let lines = text.matches('\n').count() as u32 + 1; |
| 235 | 4 * lines |
| 236 | } |
| 237 | |
| 238 | fn block_position(mf: &MachineFunction, id: MBlockId) -> usize { |
| 239 | mf.blocks |
| 240 | .iter() |
| 241 | .position(|b| b.id == id) |
| 242 | .expect("block id present in block list") |
| 243 | } |
| 244 | |
| 245 | fn bcond_target(inst: &MachineInst) -> Option<MBlockId> { |
| 246 | inst.operands.iter().find_map(|op| match op { |
| 247 | MachineOperand::BlockRef(id) => Some(*id), |
| 248 | _ => None, |
| 249 | }) |
| 250 | } |
| 251 | |
| 252 | fn bcond_cond(inst: &MachineInst) -> Option<ArmCond> { |
| 253 | inst.operands.iter().find_map(|op| match op { |
| 254 | MachineOperand::Cond(c) => Some(*c), |
| 255 | _ => None, |
| 256 | }) |
| 257 | } |
| 258 | |
| 259 | /// Replace an out-of-range conditional branch at `(block_id, inst_idx)` |
| 260 | /// with the trampoline pattern. Allocates a fresh skip block and |
| 261 | /// inserts it right after the original block in `mf.blocks`. The |
| 262 | /// inverted-condition branch jumps over the unconditional `b`, which |
| 263 | /// has the full ±128MB reach of the unconditional encoding — so the |
| 264 | /// trampoline itself never needs further relaxation regardless of |
| 265 | /// which conditional opcode we started from. |
| 266 | fn expand_to_trampoline(mf: &mut MachineFunction, site: OverflowSite) { |
| 267 | let OverflowSite { |
| 268 | block_id, |
| 269 | inst_idx, |
| 270 | target: far_target, |
| 271 | kind, |
| 272 | } = site; |
| 273 | |
| 274 | // Allocate a fresh skip-block id without disturbing any other |
| 275 | // bookkeeping. We can't call `mf.new_block` directly because it |
| 276 | // pushes onto the back of the vec; we want the skip block |
| 277 | // physically adjacent to the original block (so its label |
| 278 | // fall-through reaches the original successor). |
| 279 | let skip_id = MBlockId(mf.next_block_id()); |
| 280 | let skip_label = format!("{}_relax{}", mf.block(block_id).label, skip_id.0); |
| 281 | |
| 282 | let inverted = match kind { |
| 283 | RelaxKind::BCond { cond } => MachineInst { |
| 284 | opcode: ArmOpcode::BCond, |
| 285 | operands: vec![ |
| 286 | MachineOperand::Cond(cond.inverse()), |
| 287 | MachineOperand::BlockRef(skip_id), |
| 288 | ], |
| 289 | def: None, |
| 290 | }, |
| 291 | RelaxKind::Cbz { invert_to, reg } => MachineInst { |
| 292 | opcode: invert_to, |
| 293 | operands: vec![reg, MachineOperand::BlockRef(skip_id)], |
| 294 | def: None, |
| 295 | }, |
| 296 | RelaxKind::Tbz { |
| 297 | invert_to, |
| 298 | reg, |
| 299 | bit, |
| 300 | } => MachineInst { |
| 301 | opcode: invert_to, |
| 302 | operands: vec![ |
| 303 | reg, |
| 304 | MachineOperand::Imm(bit), |
| 305 | MachineOperand::BlockRef(skip_id), |
| 306 | ], |
| 307 | def: None, |
| 308 | }, |
| 309 | }; |
| 310 | let unconditional_b = MachineInst { |
| 311 | opcode: ArmOpcode::B, |
| 312 | operands: vec![MachineOperand::BlockRef(far_target)], |
| 313 | def: None, |
| 314 | }; |
| 315 | |
| 316 | // Splice them in, preserving any instructions that followed the |
| 317 | // original branch. Those trailing instructions (uncommon) move |
| 318 | // into the skip block to keep program order. |
| 319 | let block_pos = block_position(mf, block_id); |
| 320 | let trailing = { |
| 321 | let block = &mut mf.blocks[block_pos]; |
| 322 | let trailing: Vec<MachineInst> = block.insts.drain(inst_idx + 1..).collect(); |
| 323 | block.insts[inst_idx] = inverted; |
| 324 | block.insts.push(unconditional_b); |
| 325 | trailing |
| 326 | }; |
| 327 | |
| 328 | let mut skip_block = MachineBlock::new(skip_id, skip_label); |
| 329 | skip_block.insts = trailing; |
| 330 | mf.blocks.insert(block_pos + 1, skip_block); |
| 331 | } |
| 332 | |
| 333 | #[cfg(test)] |
| 334 | mod tests { |
| 335 | use super::*; |
| 336 | |
| 337 | /// Compile-time confirmation that every ArmCond's inverse is |
| 338 | /// involutive. Easy to typo otherwise. |
| 339 | #[test] |
| 340 | fn arm_cond_inverse_is_involutive() { |
| 341 | for c in [ |
| 342 | ArmCond::Eq, |
| 343 | ArmCond::Ne, |
| 344 | ArmCond::Hs, |
| 345 | ArmCond::Lo, |
| 346 | ArmCond::Mi, |
| 347 | ArmCond::Pl, |
| 348 | ArmCond::Hi, |
| 349 | ArmCond::Ls, |
| 350 | ArmCond::Ge, |
| 351 | ArmCond::Lt, |
| 352 | ArmCond::Gt, |
| 353 | ArmCond::Le, |
| 354 | ] { |
| 355 | assert_eq!(c.inverse().inverse(), c, "{:?}", c); |
| 356 | } |
| 357 | } |
| 358 | |
| 359 | /// In-range branches stay as a single instruction. |
| 360 | #[test] |
| 361 | fn in_range_bcond_unchanged() { |
| 362 | let mut mf = MachineFunction::new("test".into()); |
| 363 | let target = mf.new_block("after"); |
| 364 | let entry_id = mf.blocks[0].id; |
| 365 | mf.blocks[0].insts.push(MachineInst { |
| 366 | opcode: ArmOpcode::BCond, |
| 367 | operands: vec![ |
| 368 | MachineOperand::Cond(ArmCond::Ne), |
| 369 | MachineOperand::BlockRef(target), |
| 370 | ], |
| 371 | def: None, |
| 372 | }); |
| 373 | let _ = entry_id; |
| 374 | |
| 375 | let block_count_before = mf.blocks.len(); |
| 376 | relax_branches(&mut mf); |
| 377 | assert_eq!(mf.blocks.len(), block_count_before); |
| 378 | assert_eq!(mf.blocks[0].insts.len(), 1); |
| 379 | assert_eq!(mf.blocks[0].insts[0].opcode, ArmOpcode::BCond); |
| 380 | } |
| 381 | |
| 382 | /// Out-of-range branch expands to a trampoline. We force |
| 383 | /// overflow by stuffing one block with enough nops to push the |
| 384 | /// target's offset past ±1MB. |
| 385 | #[test] |
| 386 | fn out_of_range_bcond_expands() { |
| 387 | let mut mf = MachineFunction::new("test".into()); |
| 388 | let target = mf.new_block("after_padding"); |
| 389 | let padding = mf.new_block("padding"); |
| 390 | |
| 391 | // Entry: BCond Ne -> target. |
| 392 | mf.blocks[0].insts.push(MachineInst { |
| 393 | opcode: ArmOpcode::BCond, |
| 394 | operands: vec![ |
| 395 | MachineOperand::Cond(ArmCond::Ne), |
| 396 | MachineOperand::BlockRef(target), |
| 397 | ], |
| 398 | def: None, |
| 399 | }); |
| 400 | // Branch into padding so the entry block has at least one |
| 401 | // successor, then padding stuffs enough Nops to push the |
| 402 | // `after_padding` label past ±1MB. |
| 403 | let pad_inst_count = (COND_BRANCH_LIMIT as usize) / 4 + 16; |
| 404 | let padding_pos = mf.blocks.iter().position(|b| b.id == padding).unwrap(); |
| 405 | mf.blocks[padding_pos].insts = (0..pad_inst_count) |
| 406 | .map(|_| MachineInst { |
| 407 | opcode: ArmOpcode::Nop, |
| 408 | operands: vec![], |
| 409 | def: None, |
| 410 | }) |
| 411 | .collect(); |
| 412 | // Layout order: entry → padding → target. The padding block |
| 413 | // pushes target's offset out of range from the entry's BCond. |
| 414 | mf.blocks.swap(1, 2); // ensure entry, padding, target order |
| 415 | |
| 416 | let blocks_before = mf.blocks.len(); |
| 417 | relax_branches(&mut mf); |
| 418 | // One new skip block was inserted right after entry. |
| 419 | assert_eq!(mf.blocks.len(), blocks_before + 1); |
| 420 | // Entry now ends with: BCond(inverted) -> skip; B target. |
| 421 | let entry = &mf.blocks[0]; |
| 422 | assert_eq!(entry.insts.len(), 2); |
| 423 | assert_eq!(entry.insts[0].opcode, ArmOpcode::BCond); |
| 424 | assert!(matches!( |
| 425 | entry.insts[0].operands[0], |
| 426 | MachineOperand::Cond(ArmCond::Eq) // inverse of Ne |
| 427 | )); |
| 428 | assert_eq!(entry.insts[1].opcode, ArmOpcode::B); |
| 429 | assert!(matches!( |
| 430 | entry.insts[1].operands[0], |
| 431 | MachineOperand::BlockRef(t) if t == target |
| 432 | )); |
| 433 | } |
| 434 | |
| 435 | /// In-range cbz stays as a single instruction. |
| 436 | #[test] |
| 437 | fn in_range_cbz_unchanged() { |
| 438 | let mut mf = MachineFunction::new("test".into()); |
| 439 | let target = mf.new_block("after"); |
| 440 | let test_reg = mf.new_vreg(crate::codegen::mir::RegClass::Gp32); |
| 441 | mf.blocks[0].insts.push(MachineInst { |
| 442 | opcode: ArmOpcode::Cbz, |
| 443 | operands: vec![ |
| 444 | MachineOperand::VReg(test_reg), |
| 445 | MachineOperand::BlockRef(target), |
| 446 | ], |
| 447 | def: None, |
| 448 | }); |
| 449 | |
| 450 | let block_count_before = mf.blocks.len(); |
| 451 | relax_branches(&mut mf); |
| 452 | assert_eq!(mf.blocks.len(), block_count_before); |
| 453 | assert_eq!(mf.blocks[0].insts.len(), 1); |
| 454 | assert_eq!(mf.blocks[0].insts[0].opcode, ArmOpcode::Cbz); |
| 455 | } |
| 456 | |
| 457 | /// Out-of-range cbz expands to: cbnz reg, skip; b far. Padding |
| 458 | /// pushes the target past ±1MB so the original cbz can no longer |
| 459 | /// reach it directly. The inverted opcode (Cbnz) jumps over the |
| 460 | /// unconditional `b`, which has the full ±128MB reach and never |
| 461 | /// itself needs further relaxation. |
| 462 | #[test] |
| 463 | fn out_of_range_cbz_expands_to_inverted_skip() { |
| 464 | let mut mf = MachineFunction::new("test".into()); |
| 465 | let target = mf.new_block("after_padding"); |
| 466 | let padding = mf.new_block("padding"); |
| 467 | let test_reg = mf.new_vreg(crate::codegen::mir::RegClass::Gp32); |
| 468 | |
| 469 | mf.blocks[0].insts.push(MachineInst { |
| 470 | opcode: ArmOpcode::Cbz, |
| 471 | operands: vec![ |
| 472 | MachineOperand::VReg(test_reg), |
| 473 | MachineOperand::BlockRef(target), |
| 474 | ], |
| 475 | def: None, |
| 476 | }); |
| 477 | let pad_inst_count = (COND_BRANCH_LIMIT as usize) / 4 + 16; |
| 478 | let padding_pos = mf.blocks.iter().position(|b| b.id == padding).unwrap(); |
| 479 | mf.blocks[padding_pos].insts = (0..pad_inst_count) |
| 480 | .map(|_| MachineInst { |
| 481 | opcode: ArmOpcode::Nop, |
| 482 | operands: vec![], |
| 483 | def: None, |
| 484 | }) |
| 485 | .collect(); |
| 486 | mf.blocks.swap(1, 2); |
| 487 | |
| 488 | let blocks_before = mf.blocks.len(); |
| 489 | relax_branches(&mut mf); |
| 490 | assert_eq!(mf.blocks.len(), blocks_before + 1); |
| 491 | let entry = &mf.blocks[0]; |
| 492 | assert_eq!(entry.insts.len(), 2); |
| 493 | assert_eq!(entry.insts[0].opcode, ArmOpcode::Cbnz, "cbz inverts to cbnz"); |
| 494 | assert_eq!(entry.insts[1].opcode, ArmOpcode::B); |
| 495 | assert!(matches!( |
| 496 | entry.insts[1].operands[0], |
| 497 | MachineOperand::BlockRef(t) if t == target |
| 498 | )); |
| 499 | } |
| 500 | |
| 501 | /// Out-of-range cbnz inverts to cbz on the skip path. |
| 502 | #[test] |
| 503 | fn out_of_range_cbnz_expands_to_cbz_skip() { |
| 504 | let mut mf = MachineFunction::new("test".into()); |
| 505 | let target = mf.new_block("after_padding"); |
| 506 | let padding = mf.new_block("padding"); |
| 507 | let test_reg = mf.new_vreg(crate::codegen::mir::RegClass::Gp64); |
| 508 | |
| 509 | mf.blocks[0].insts.push(MachineInst { |
| 510 | opcode: ArmOpcode::Cbnz, |
| 511 | operands: vec![ |
| 512 | MachineOperand::VReg(test_reg), |
| 513 | MachineOperand::BlockRef(target), |
| 514 | ], |
| 515 | def: None, |
| 516 | }); |
| 517 | let pad_inst_count = (COND_BRANCH_LIMIT as usize) / 4 + 16; |
| 518 | let padding_pos = mf.blocks.iter().position(|b| b.id == padding).unwrap(); |
| 519 | mf.blocks[padding_pos].insts = (0..pad_inst_count) |
| 520 | .map(|_| MachineInst { |
| 521 | opcode: ArmOpcode::Nop, |
| 522 | operands: vec![], |
| 523 | def: None, |
| 524 | }) |
| 525 | .collect(); |
| 526 | mf.blocks.swap(1, 2); |
| 527 | |
| 528 | relax_branches(&mut mf); |
| 529 | let entry = &mf.blocks[0]; |
| 530 | assert_eq!(entry.insts[0].opcode, ArmOpcode::Cbz, "cbnz inverts to cbz"); |
| 531 | } |
| 532 | |
| 533 | /// In-range tbz stays as a single instruction. |
| 534 | #[test] |
| 535 | fn in_range_tbz_unchanged() { |
| 536 | let mut mf = MachineFunction::new("test".into()); |
| 537 | let target = mf.new_block("after"); |
| 538 | let test_reg = mf.new_vreg(crate::codegen::mir::RegClass::Gp64); |
| 539 | mf.blocks[0].insts.push(MachineInst { |
| 540 | opcode: ArmOpcode::Tbz, |
| 541 | operands: vec![ |
| 542 | MachineOperand::VReg(test_reg), |
| 543 | MachineOperand::Imm(5), |
| 544 | MachineOperand::BlockRef(target), |
| 545 | ], |
| 546 | def: None, |
| 547 | }); |
| 548 | |
| 549 | let block_count_before = mf.blocks.len(); |
| 550 | relax_branches(&mut mf); |
| 551 | assert_eq!(mf.blocks.len(), block_count_before); |
| 552 | assert_eq!(mf.blocks[0].insts.len(), 1); |
| 553 | assert_eq!(mf.blocks[0].insts[0].opcode, ArmOpcode::Tbz); |
| 554 | } |
| 555 | |
| 556 | /// Out-of-range tbz uses the tighter ±32KB limit and expands. Tbz |
| 557 | /// is the most range-restricted of the four conditional branches |
| 558 | /// we emit, so anything past 32K-ish bytes from the branch must |
| 559 | /// trip relaxation. |
| 560 | #[test] |
| 561 | fn out_of_range_tbz_expands_to_tbnz_skip() { |
| 562 | let mut mf = MachineFunction::new("test".into()); |
| 563 | let target = mf.new_block("after_padding"); |
| 564 | let padding = mf.new_block("padding"); |
| 565 | let test_reg = mf.new_vreg(crate::codegen::mir::RegClass::Gp64); |
| 566 | |
| 567 | mf.blocks[0].insts.push(MachineInst { |
| 568 | opcode: ArmOpcode::Tbz, |
| 569 | operands: vec![ |
| 570 | MachineOperand::VReg(test_reg), |
| 571 | MachineOperand::Imm(7), |
| 572 | MachineOperand::BlockRef(target), |
| 573 | ], |
| 574 | def: None, |
| 575 | }); |
| 576 | // Tbz's ±32KB range is much tighter than the ±1MB cond-branch |
| 577 | // range — only ~8K nops are needed to overflow. |
| 578 | let pad_inst_count = (TBZ_BRANCH_LIMIT as usize) / 4 + 16; |
| 579 | let padding_pos = mf.blocks.iter().position(|b| b.id == padding).unwrap(); |
| 580 | mf.blocks[padding_pos].insts = (0..pad_inst_count) |
| 581 | .map(|_| MachineInst { |
| 582 | opcode: ArmOpcode::Nop, |
| 583 | operands: vec![], |
| 584 | def: None, |
| 585 | }) |
| 586 | .collect(); |
| 587 | mf.blocks.swap(1, 2); |
| 588 | |
| 589 | let blocks_before = mf.blocks.len(); |
| 590 | relax_branches(&mut mf); |
| 591 | assert_eq!(mf.blocks.len(), blocks_before + 1); |
| 592 | let entry = &mf.blocks[0]; |
| 593 | assert_eq!(entry.insts.len(), 2); |
| 594 | assert_eq!(entry.insts[0].opcode, ArmOpcode::Tbnz, "tbz inverts to tbnz"); |
| 595 | // Bit operand survives the rewrite. |
| 596 | assert!(matches!(entry.insts[0].operands[1], MachineOperand::Imm(7))); |
| 597 | assert_eq!(entry.insts[1].opcode, ArmOpcode::B); |
| 598 | } |
| 599 | } |
| 600 |