| 1 | //! Mem2Reg — promote scalar stack slots into pure SSA values. |
| 2 | //! |
| 3 | //! Walks every function looking for `Alloca` slots whose address |
| 4 | //! never escapes (the only uses are `Load(alloca)` and |
| 5 | //! `Store(_, alloca)`), then rewrites the function so that each |
| 6 | //! such slot becomes a flow of SSA values: every `Load` is replaced |
| 7 | //! with a direct reference to the "current" value of the slot, every |
| 8 | //! `Store` updates that current value, and at control-flow merges |
| 9 | //! we insert **block parameters** (our IR's phi-node equivalent) |
| 10 | //! plus matching branch args in every predecessor. |
| 11 | //! |
| 12 | //! This is the classical Cytron-Ferrante-Rosen-Wegman-Zadeck |
| 13 | //! algorithm adapted for the block-parameter IR: |
| 14 | //! |
| 15 | //! 1. **Find promotable allocas**. An alloca is promotable iff |
| 16 | //! every use of its result ValueId is either a `Load(alloca)` |
| 17 | //! or a `Store(v, alloca)`. Any other use (as a GEP base, a |
| 18 | //! Call arg, an `ExtractField` aggregate, etc.) means the |
| 19 | //! address could escape and we can't safely promote. |
| 20 | //! |
| 21 | //! 2. **Compute phi placements** via iterated dominance frontiers |
| 22 | //! over the set of blocks that store to each promotable alloca. |
| 23 | //! For each such frontier block we reserve a fresh `BlockParam` |
| 24 | //! that will carry the alloca's value at that join point. |
| 25 | //! |
| 26 | //! 3. **Insert block params** with `Undef` initial values. Record |
| 27 | //! the `(alloca → block → new_param_id)` mapping so we can |
| 28 | //! append matching branch args during renaming. |
| 29 | //! |
| 30 | //! 4. **Renaming walk**. Do a DFS of the dominator tree maintaining |
| 31 | //! a per-alloca stack of "current value". Entering a block |
| 32 | //! pushes any new params belonging to that block; every `Store` |
| 33 | //! pushes its value; every `Load` is replaced by a reference |
| 34 | //! to the stack-top and its defining instruction is marked |
| 35 | //! dead. Leaving a block pops everything we pushed. At each |
| 36 | //! terminator, append the current value of each phi alloca to |
| 37 | //! the corresponding successor's branch arg list. |
| 38 | //! |
| 39 | //! 5. **Delete** the now-dead `Alloca`, `Load`, and `Store` |
| 40 | //! instructions. DCE would clean them up eventually but doing |
| 41 | //! it here makes the IR tidy immediately and gives downstream |
| 42 | //! passes (LICM in particular) a cleaner view. |
| 43 | //! |
| 44 | //! ## Why this matters |
| 45 | //! |
| 46 | //! Before mem2reg, every Fortran local lives in an alloca slot and |
| 47 | //! is accessed via `Load`/`Store`. LICM conservatively refuses to |
| 48 | //! hoist `Load` (no alias analysis), so loop invariants never move. |
| 49 | //! After mem2reg, the same locals become pure SSA values: invariant |
| 50 | //! computations are visible to LICM, CSE dedupes across former |
| 51 | //! store/load pairs, and const_fold propagates constants through |
| 52 | //! local variables at compile time. Most of the optimizer's hoped-for |
| 53 | //! wins are unlocked by this pass. |
| 54 | //! |
| 55 | //! ## Scope |
| 56 | //! |
| 57 | //! This pass handles **scalar allocas only**. Aggregate allocas |
| 58 | //! (fixed-size arrays, structs) require SROA — scalar replacement |
| 59 | //! of aggregates — which decomposes them into individual scalars |
| 60 | //! first. SROA is a separate pass that will land alongside the rest |
| 61 | //! of Sprint 29's memory optimizations. |
| 62 | |
| 63 | use super::pass::Pass; |
| 64 | use super::util::{ |
| 65 | compute_dominance_frontiers, compute_immediate_dominators, dominator_tree_children, |
| 66 | prune_unreachable, substitute_uses, |
| 67 | }; |
| 68 | use crate::ir::inst::*; |
| 69 | use crate::ir::types::IrType; |
| 70 | use std::collections::{HashMap, HashSet, VecDeque}; |
| 71 | |
| 72 | /// The mem2reg scalar promotion pass. |
| 73 | pub struct Mem2Reg; |
| 74 | |
| 75 | impl Pass for Mem2Reg { |
| 76 | fn name(&self) -> &'static str { |
| 77 | "mem2reg" |
| 78 | } |
| 79 | |
| 80 | fn run(&self, module: &mut Module) -> bool { |
| 81 | let mut changed = false; |
| 82 | for func in &mut module.functions { |
| 83 | if promote_function(func) { |
| 84 | changed = true; |
| 85 | } |
| 86 | } |
| 87 | changed |
| 88 | } |
| 89 | } |
| 90 | |
| 91 | /// An alloca eligible for promotion. `alloca_id` is the `ValueId` |
| 92 | /// produced by the original `Alloca` instruction; `pointee_ty` is |
| 93 | /// the type of the slot's contents. |
| 94 | struct Promotable { |
| 95 | alloca_id: ValueId, |
| 96 | pointee_ty: IrType, |
| 97 | } |
| 98 | |
| 99 | fn promote_function(func: &mut Function) -> bool { |
| 100 | // ---- Phase 0: drop unreachable blocks ------------------------ |
| 101 | // |
| 102 | // Audit M1: every later phase walks `func.blocks` and assumes |
| 103 | // that any block it visits is reachable from entry. Without |
| 104 | // this prune: |
| 105 | // |
| 106 | // * Phase 2 (`store_blocks`) collects stores from unreachable |
| 107 | // blocks, which causes the iterated-DF closure to insert |
| 108 | // phi params even though no rename walk will visit those |
| 109 | // blocks (the rename walk uses the dom-tree, which itself |
| 110 | // excludes unreachable nodes). |
| 111 | // |
| 112 | // * Phase 4 (rename walk) never visits unreachable blocks, so |
| 113 | // their `Load`/`Store` instructions never make it into |
| 114 | // `dead_loads` / `dead_stores`. Phase 5 then deletes the |
| 115 | // underlying alloca but leaves the dangling load/store |
| 116 | // referencing it — invalid IR that the verifier rejects. |
| 117 | // |
| 118 | // Pruning first makes "every block we look at is reachable" a |
| 119 | // structural invariant of the rest of the pass. |
| 120 | let pruned = prune_unreachable(func); |
| 121 | |
| 122 | // ---- Phase 1: find promotable allocas ------------------------- |
| 123 | let promotable = find_promotable_allocas(func); |
| 124 | if promotable.is_empty() { |
| 125 | return pruned; |
| 126 | } |
| 127 | |
| 128 | // Map from alloca ValueId → index into `promotable`. We use the |
| 129 | // index as a compact key throughout the rest of the pass. |
| 130 | let alloca_index: HashMap<ValueId, usize> = promotable |
| 131 | .iter() |
| 132 | .enumerate() |
| 133 | .map(|(i, p)| (p.alloca_id, i)) |
| 134 | .collect(); |
| 135 | |
| 136 | // ---- Phase 2: compute phi insertion blocks -------------------- |
| 137 | // For each alloca, collect the set of blocks that store to it. |
| 138 | let mut store_blocks: Vec<HashSet<BlockId>> = vec![HashSet::new(); promotable.len()]; |
| 139 | for block in &func.blocks { |
| 140 | for inst in &block.insts { |
| 141 | if let InstKind::Store(_, addr) = &inst.kind { |
| 142 | if let Some(&idx) = alloca_index.get(addr) { |
| 143 | store_blocks[idx].insert(block.id); |
| 144 | } |
| 145 | } |
| 146 | } |
| 147 | } |
| 148 | |
| 149 | // Iterated dominance frontier: for each alloca, closure of DF |
| 150 | // over its store-block set. |
| 151 | let df = compute_dominance_frontiers(func); |
| 152 | let mut phi_blocks: Vec<HashSet<BlockId>> = vec![HashSet::new(); promotable.len()]; |
| 153 | for (idx, stores) in store_blocks.iter().enumerate() { |
| 154 | let mut worklist: VecDeque<BlockId> = stores.iter().copied().collect(); |
| 155 | let mut in_phi: HashSet<BlockId> = HashSet::new(); |
| 156 | while let Some(b) = worklist.pop_front() { |
| 157 | if let Some(frontier) = df.get(&b) { |
| 158 | for &y in frontier { |
| 159 | if in_phi.insert(y) { |
| 160 | // Newly added to phi set. Re-inserting the |
| 161 | // frontier node into the worklist handles |
| 162 | // the iterated DF closure — if y itself is |
| 163 | // a store-block-in-effect now, its own |
| 164 | // frontier contributes too. |
| 165 | worklist.push_back(y); |
| 166 | } |
| 167 | } |
| 168 | } |
| 169 | } |
| 170 | phi_blocks[idx] = in_phi; |
| 171 | } |
| 172 | |
| 173 | // ---- Phase 3: insert block params + Undef sentinels ------------ |
| 174 | // |
| 175 | // For each alloca, emit ONE `Undef` instruction at the top of |
| 176 | // the entry block. This is the initial "current value" of the |
| 177 | // alloca — any `Load` that executes before the first `Store` |
| 178 | // reads undef, which is semantically correct (Fortran doesn't |
| 179 | // define the value of an uninitialized local). |
| 180 | // |
| 181 | // For each (alloca, phi-block) pair, emit a new `BlockParam`. |
| 182 | // The alloca's "current value" on entry to that block is the |
| 183 | // new param. During the rename walk we'll append matching |
| 184 | // branch args from predecessors. |
| 185 | |
| 186 | // (alloca_idx, block_id) → new param ValueId. |
| 187 | let mut phi_params: HashMap<(usize, BlockId), ValueId> = HashMap::new(); |
| 188 | |
| 189 | // Per alloca: its initial Undef ValueId at function entry. |
| 190 | let mut undef_values: Vec<ValueId> = Vec::with_capacity(promotable.len()); |
| 191 | |
| 192 | // Grab a dummy span we can reuse for inserted insts. |
| 193 | let span = func |
| 194 | .block(func.entry) |
| 195 | .insts |
| 196 | .first() |
| 197 | .map(|i| i.span) |
| 198 | .or_else(|| { |
| 199 | func.block(func.entry).terminator.as_ref().map(|_t| { |
| 200 | // If the block is terminator-only, fall back to a |
| 201 | // zero span — good enough for synthesized insts. |
| 202 | crate::lexer::Span { |
| 203 | start: crate::lexer::Position { line: 0, col: 0 }, |
| 204 | end: crate::lexer::Position { line: 0, col: 0 }, |
| 205 | file_id: 0, |
| 206 | } |
| 207 | }) |
| 208 | }) |
| 209 | .unwrap_or(crate::lexer::Span { |
| 210 | start: crate::lexer::Position { line: 0, col: 0 }, |
| 211 | end: crate::lexer::Position { line: 0, col: 0 }, |
| 212 | file_id: 0, |
| 213 | }); |
| 214 | |
| 215 | // Insert each Undef sentinel at the very front of the entry |
| 216 | // block so it dominates every use. Audit Med-3: the previous |
| 217 | // version computed `pos = undef_values.len() - 1` which only |
| 218 | // worked because nothing else lived at the front of entry — |
| 219 | // a future pass adding prologue insts would break it. The |
| 220 | // unconditional `insert(0, ...)` is robust regardless of what |
| 221 | // sits below it. We insert in REVERSE order so promotable[0] |
| 222 | // ends up at insts[0] after all inserts complete. |
| 223 | let entry = func.entry; |
| 224 | let mut new_undefs: Vec<(ValueId, IrType)> = Vec::with_capacity(promotable.len()); |
| 225 | for p in &promotable { |
| 226 | let id = func.next_value_id(); |
| 227 | undef_values.push(id); |
| 228 | new_undefs.push((id, p.pointee_ty.clone())); |
| 229 | } |
| 230 | for (id, ty) in new_undefs.iter().rev() { |
| 231 | func.block_mut(entry).insts.insert( |
| 232 | 0, |
| 233 | Inst { |
| 234 | id: *id, |
| 235 | kind: InstKind::Undef(ty.clone()), |
| 236 | ty: ty.clone(), |
| 237 | span, |
| 238 | }, |
| 239 | ); |
| 240 | } |
| 241 | |
| 242 | // Now insert block params. Order within a block matters: we |
| 243 | // append in `promotable.len()` order so the rename walk can |
| 244 | // correspond each new param to its alloca index. |
| 245 | // |
| 246 | // `block_phi_order[block]` is the list of alloca indices that |
| 247 | // have a phi param at `block`, in the order the params were |
| 248 | // appended. This also defines the order in which we'll append |
| 249 | // branch args at predecessors. |
| 250 | let mut block_phi_order: HashMap<BlockId, Vec<usize>> = HashMap::new(); |
| 251 | |
| 252 | for idx in 0..promotable.len() { |
| 253 | // Process phi blocks in deterministic order (by BlockId.0). |
| 254 | let mut blocks: Vec<BlockId> = phi_blocks[idx].iter().copied().collect(); |
| 255 | blocks.sort_by_key(|b| b.0); |
| 256 | for bid in blocks { |
| 257 | if bid == func.entry { |
| 258 | // CFRWZ never places entry in any block's dominance |
| 259 | // frontier — entry has no predecessors, so it can't |
| 260 | // be a join point. If we ever see entry in |
| 261 | // `phi_blocks` it means the DF computation is buggy. |
| 262 | // Skip in release for safety; assert loudly in debug |
| 263 | // so the bug surfaces during development. The |
| 264 | // initial Undef value already handles the |
| 265 | // "before-any-store" case at entry. |
| 266 | debug_assert!( |
| 267 | false, |
| 268 | "mem2reg: entry block appeared in phi placement set for alloca {} \ |
| 269 | — DF should never include entry (entry has no preds)", |
| 270 | idx, |
| 271 | ); |
| 272 | continue; |
| 273 | } |
| 274 | let pid = func.next_value_id(); |
| 275 | func.block_mut(bid).params.push(BlockParam { |
| 276 | id: pid, |
| 277 | ty: promotable[idx].pointee_ty.clone(), |
| 278 | }); |
| 279 | phi_params.insert((idx, bid), pid); |
| 280 | block_phi_order.entry(bid).or_default().push(idx); |
| 281 | } |
| 282 | } |
| 283 | |
| 284 | // ---- Phase 4: renaming walk (DFS over dominator tree) --------- |
| 285 | // |
| 286 | // Per-alloca current-value stack. The stack top is the SSA value |
| 287 | // that an instruction executing at the current point of the walk |
| 288 | // should see when it loads from this alloca. |
| 289 | // |
| 290 | // We also accumulate `load_renames`: (old load ValueId → new |
| 291 | // SSA ValueId) which we apply to the whole function via a |
| 292 | // single `substitute_uses_batch` call at the end. The loads and |
| 293 | // stores themselves are marked for deletion. |
| 294 | // |
| 295 | // Visiting a block in the dom tree: |
| 296 | // 1. For each new phi param at this block: push its ValueId. |
| 297 | // 2. Walk instructions: handle Store/Load for promotable |
| 298 | // allocas. Track how many times we pushed onto each stack |
| 299 | // so we can pop on the way out. |
| 300 | // 3. For each successor: append the current stack top of each |
| 301 | // phi-alloca at that successor to the terminator's matching |
| 302 | // arg list. |
| 303 | // 4. Recurse into each dominator-tree child. |
| 304 | // 5. Pop everything we pushed in this block. |
| 305 | |
| 306 | let idoms = compute_immediate_dominators(func); |
| 307 | let children = dominator_tree_children(&idoms); |
| 308 | |
| 309 | // Current value stack per alloca. Initialized with the entry |
| 310 | // Undef for each alloca so that any Load before any Store on |
| 311 | // this dom-tree path sees undef. |
| 312 | let mut stacks: Vec<Vec<ValueId>> = undef_values.iter().map(|&u| vec![u]).collect(); |
| 313 | |
| 314 | // (old_load_id, new_value_id) rewrites to apply at the end. |
| 315 | let mut load_renames: HashMap<ValueId, ValueId> = HashMap::new(); |
| 316 | // Instructions (block_id, inst index at the time of marking) |
| 317 | // that should be deleted. We collect ValueIds and do a pass at |
| 318 | // the end instead of tracking indices, which are invalidated |
| 319 | // by other in-place mutations during the walk. |
| 320 | let mut dead_loads: HashSet<ValueId> = HashSet::new(); |
| 321 | let mut dead_stores: HashSet<ValueId> = HashSet::new(); |
| 322 | |
| 323 | // Helper closure to process a block. Returns the number of |
| 324 | // stack-pushes we need to pop per alloca. |
| 325 | #[allow(clippy::too_many_arguments)] |
| 326 | fn rename_block( |
| 327 | func: &mut Function, |
| 328 | block_id: BlockId, |
| 329 | promotable: &[Promotable], |
| 330 | alloca_index: &HashMap<ValueId, usize>, |
| 331 | phi_params: &HashMap<(usize, BlockId), ValueId>, |
| 332 | block_phi_order: &HashMap<BlockId, Vec<usize>>, |
| 333 | children: &HashMap<BlockId, Vec<BlockId>>, |
| 334 | stacks: &mut [Vec<ValueId>], |
| 335 | load_renames: &mut HashMap<ValueId, ValueId>, |
| 336 | dead_loads: &mut HashSet<ValueId>, |
| 337 | dead_stores: &mut HashSet<ValueId>, |
| 338 | ) { |
| 339 | // 1. Push phi params into their alloca stacks. |
| 340 | let mut pushes_per_alloca: Vec<usize> = vec![0; promotable.len()]; |
| 341 | if let Some(order) = block_phi_order.get(&block_id) { |
| 342 | for &idx in order { |
| 343 | if let Some(&pid) = phi_params.get(&(idx, block_id)) { |
| 344 | stacks[idx].push(pid); |
| 345 | pushes_per_alloca[idx] += 1; |
| 346 | } |
| 347 | } |
| 348 | } |
| 349 | |
| 350 | // 2. Walk instructions. We do this as an index loop because |
| 351 | // we may need to rewrite the current inst in place (for |
| 352 | // Load → delete, Store → delete). |
| 353 | let block_len = func.block(block_id).insts.len(); |
| 354 | for i in 0..block_len { |
| 355 | let inst = &func.block(block_id).insts[i]; |
| 356 | let inst_id = inst.id; |
| 357 | match &inst.kind { |
| 358 | InstKind::Load(addr) => { |
| 359 | let addr = *addr; |
| 360 | if let Some(&idx) = alloca_index.get(&addr) { |
| 361 | // This is a load from a promotable alloca. |
| 362 | // Rewrite uses of `inst_id` to the current |
| 363 | // stack top. |
| 364 | let cur = resolve_promoted_value( |
| 365 | *stacks[idx].last().expect("mem2reg: stack empty at load"), |
| 366 | load_renames, |
| 367 | ); |
| 368 | load_renames.insert(inst_id, cur); |
| 369 | dead_loads.insert(inst_id); |
| 370 | } |
| 371 | } |
| 372 | InstKind::Store(val, addr) => { |
| 373 | let val = resolve_promoted_value(*val, load_renames); |
| 374 | let addr = *addr; |
| 375 | if let Some(&idx) = alloca_index.get(&addr) { |
| 376 | stacks[idx].push(val); |
| 377 | pushes_per_alloca[idx] += 1; |
| 378 | dead_stores.insert(inst_id); |
| 379 | } |
| 380 | } |
| 381 | _ => {} |
| 382 | } |
| 383 | } |
| 384 | |
| 385 | // 3. Append branch args for each successor's phi params. |
| 386 | // |
| 387 | // We need to know the successors and mutate their terminator |
| 388 | // arg lists. `func.block_mut` gives us a single-borrow view, |
| 389 | // so we collect successor IDs first. |
| 390 | // |
| 391 | // Audit M2: dedupe successors before iterating. A |
| 392 | // `CondBranch { true: B, false: B, .. }` (or a `Switch` |
| 393 | // whose default and a case both branch to the same block) |
| 394 | // would otherwise visit `B` twice. Each visit appends a |
| 395 | // copy of the new args to BOTH the true_args and |
| 396 | // false_args slots inside `append_branch_args_for`, |
| 397 | // doubling the arg lists. The verifier then rejects the |
| 398 | // resulting function for branch-arg / param count mismatch. |
| 399 | let successors: Vec<BlockId> = { |
| 400 | let block = func.block(block_id); |
| 401 | let raw: Vec<BlockId> = match &block.terminator { |
| 402 | Some(Terminator::Return(_)) | Some(Terminator::Unreachable) | None => vec![], |
| 403 | Some(Terminator::Branch(d, _)) => vec![*d], |
| 404 | Some(Terminator::CondBranch { |
| 405 | true_dest, |
| 406 | false_dest, |
| 407 | .. |
| 408 | }) => { |
| 409 | vec![*true_dest, *false_dest] |
| 410 | } |
| 411 | Some(Terminator::Switch { cases, default, .. }) => { |
| 412 | let mut v: Vec<BlockId> = cases.iter().map(|(_, b)| *b).collect(); |
| 413 | v.push(*default); |
| 414 | v |
| 415 | } |
| 416 | }; |
| 417 | let mut seen: HashSet<BlockId> = HashSet::new(); |
| 418 | raw.into_iter().filter(|b| seen.insert(*b)).collect() |
| 419 | }; |
| 420 | for succ in successors { |
| 421 | if let Some(order) = block_phi_order.get(&succ) { |
| 422 | // For each alloca with a phi at succ, append the |
| 423 | // current stack top as a branch arg. The order |
| 424 | // matches the order the params were pushed onto |
| 425 | // `succ.params` during phase 3. |
| 426 | let new_args: Vec<ValueId> = order |
| 427 | .iter() |
| 428 | .map(|&idx| { |
| 429 | resolve_promoted_value( |
| 430 | *stacks[idx].last().expect("mem2reg: stack empty at branch"), |
| 431 | load_renames, |
| 432 | ) |
| 433 | }) |
| 434 | .collect(); |
| 435 | // Locate the slots in the terminator's arg list. |
| 436 | let block_mut = func.block_mut(block_id); |
| 437 | if let Some(term) = &mut block_mut.terminator { |
| 438 | append_branch_args_for(term, succ, &new_args); |
| 439 | } |
| 440 | } |
| 441 | } |
| 442 | |
| 443 | // 4. Recurse into children. |
| 444 | let kids: Vec<BlockId> = children.get(&block_id).cloned().unwrap_or_default(); |
| 445 | for kid in kids { |
| 446 | rename_block( |
| 447 | func, |
| 448 | kid, |
| 449 | promotable, |
| 450 | alloca_index, |
| 451 | phi_params, |
| 452 | block_phi_order, |
| 453 | children, |
| 454 | stacks, |
| 455 | load_renames, |
| 456 | dead_loads, |
| 457 | dead_stores, |
| 458 | ); |
| 459 | } |
| 460 | |
| 461 | // 5. Pop everything we pushed in this block. |
| 462 | for (idx, count) in pushes_per_alloca.iter().enumerate() { |
| 463 | for _ in 0..*count { |
| 464 | stacks[idx].pop(); |
| 465 | } |
| 466 | } |
| 467 | } |
| 468 | |
| 469 | rename_block( |
| 470 | func, |
| 471 | func.entry, |
| 472 | &promotable, |
| 473 | &alloca_index, |
| 474 | &phi_params, |
| 475 | &block_phi_order, |
| 476 | &children, |
| 477 | &mut stacks, |
| 478 | &mut load_renames, |
| 479 | &mut dead_loads, |
| 480 | &mut dead_stores, |
| 481 | ); |
| 482 | |
| 483 | // ---- Phase 5: apply load renames and delete dead insts -------- |
| 484 | for (old, new) in &load_renames { |
| 485 | substitute_uses(func, *old, *new); |
| 486 | } |
| 487 | |
| 488 | // Drop dead loads, stores, and the allocas themselves. |
| 489 | let alloca_ids: HashSet<ValueId> = promotable.iter().map(|p| p.alloca_id).collect(); |
| 490 | for block in &mut func.blocks { |
| 491 | block.insts.retain(|inst| { |
| 492 | if dead_loads.contains(&inst.id) { |
| 493 | return false; |
| 494 | } |
| 495 | if dead_stores.contains(&inst.id) { |
| 496 | return false; |
| 497 | } |
| 498 | if alloca_ids.contains(&inst.id) { |
| 499 | return false; |
| 500 | } |
| 501 | true |
| 502 | }); |
| 503 | } |
| 504 | |
| 505 | true |
| 506 | } |
| 507 | |
| 508 | fn resolve_promoted_value(mut value: ValueId, load_renames: &HashMap<ValueId, ValueId>) -> ValueId { |
| 509 | let mut seen = HashSet::new(); |
| 510 | while let Some(&next) = load_renames.get(&value) { |
| 511 | if !seen.insert(value) || next == value { |
| 512 | break; |
| 513 | } |
| 514 | value = next; |
| 515 | } |
| 516 | value |
| 517 | } |
| 518 | |
| 519 | /// Find alloca instructions whose only uses are `Load(alloca)` and |
| 520 | /// `Store(v, alloca)` where the alloca is in the address position. |
| 521 | /// |
| 522 | /// Any other use — GEP base, Call argument, ExtractField aggregate, |
| 523 | /// store's value slot, return value, branch arg, etc. — means the |
| 524 | /// address could be observed and we can't safely promote the slot. |
| 525 | fn find_promotable_allocas(func: &Function) -> Vec<Promotable> { |
| 526 | // First pass: collect all alloca insts and their pointee type. |
| 527 | let mut candidates: HashMap<ValueId, IrType> = HashMap::new(); |
| 528 | for block in &func.blocks { |
| 529 | for inst in &block.insts { |
| 530 | if let InstKind::Alloca(inner) = &inst.kind { |
| 531 | candidates.insert(inst.id, inner.clone()); |
| 532 | } |
| 533 | } |
| 534 | } |
| 535 | if candidates.is_empty() { |
| 536 | return Vec::new(); |
| 537 | } |
| 538 | |
| 539 | // Second pass: walk every use and disqualify any alloca whose |
| 540 | // ValueId appears in a non-load/non-store-addr position. |
| 541 | for block in &func.blocks { |
| 542 | for inst in &block.insts { |
| 543 | match &inst.kind { |
| 544 | InstKind::Alloca(_) => {} |
| 545 | InstKind::Load(addr) => { |
| 546 | // addr is fine — loading from an alloca is a |
| 547 | // promotable use. Nothing to do. |
| 548 | let _ = addr; |
| 549 | } |
| 550 | InstKind::Store(val, addr) => { |
| 551 | // `val` is an ordinary use — if `val` is an |
| 552 | // alloca, that means we're storing the slot's |
| 553 | // address somewhere. That's an escape. |
| 554 | if candidates.contains_key(val) { |
| 555 | candidates.remove(val); |
| 556 | } |
| 557 | // `addr` being an alloca is the promotable |
| 558 | // case. Nothing to do. |
| 559 | let _ = addr; |
| 560 | } |
| 561 | _ => { |
| 562 | // Any other instruction: every operand that is |
| 563 | // an alloca disqualifies that alloca. |
| 564 | for op in crate::ir::walk::inst_uses(&inst.kind) { |
| 565 | if candidates.contains_key(&op) { |
| 566 | candidates.remove(&op); |
| 567 | } |
| 568 | } |
| 569 | } |
| 570 | } |
| 571 | } |
| 572 | // Terminators can carry ValueIds too (Return value, branch |
| 573 | // args, cond). Any of those being an alloca means the |
| 574 | // slot's address flows somewhere observable. |
| 575 | if let Some(term) = &block.terminator { |
| 576 | for v in crate::ir::walk::terminator_uses(term) { |
| 577 | if candidates.contains_key(&v) { |
| 578 | candidates.remove(&v); |
| 579 | } |
| 580 | } |
| 581 | } |
| 582 | } |
| 583 | |
| 584 | // Build the `Promotable` list in a deterministic order — by |
| 585 | // the order the allocas appear in the function. This matters |
| 586 | // for test repeatability and for the "undef values land at |
| 587 | // entry.insts[0..promotable.len()]" layout invariant used |
| 588 | // during phase 3. |
| 589 | let mut out = Vec::new(); |
| 590 | for block in &func.blocks { |
| 591 | for inst in &block.insts { |
| 592 | if let InstKind::Alloca(_) = &inst.kind { |
| 593 | if let Some(ty) = candidates.remove(&inst.id) { |
| 594 | out.push(Promotable { |
| 595 | alloca_id: inst.id, |
| 596 | pointee_ty: ty, |
| 597 | }); |
| 598 | } |
| 599 | } |
| 600 | } |
| 601 | } |
| 602 | out |
| 603 | } |
| 604 | |
| 605 | /// Append `new_args` to the argument slot(s) in `term` that branch |
| 606 | /// to `target`. |
| 607 | /// |
| 608 | /// A `CondBranch` whose `true_dest == false_dest == target` has TWO |
| 609 | /// arg slots that branch to `target` (one per arm), and both arms |
| 610 | /// must receive the same append: the new value of the alloca at the |
| 611 | /// source block doesn't depend on which arm is taken at runtime, so |
| 612 | /// both arg lists end up identical for the new param. The caller |
| 613 | /// must ensure this function is invoked **once per unique successor** |
| 614 | /// (not once per edge), otherwise the arg lists double up. The |
| 615 | /// rename walk in `rename_block` enforces this via successor dedup. |
| 616 | fn append_branch_args_for(term: &mut Terminator, target: BlockId, new_args: &[ValueId]) { |
| 617 | match term { |
| 618 | Terminator::Branch(d, args) if *d == target => { |
| 619 | args.extend_from_slice(new_args); |
| 620 | } |
| 621 | Terminator::CondBranch { |
| 622 | true_dest, |
| 623 | true_args, |
| 624 | false_dest, |
| 625 | false_args, |
| 626 | .. |
| 627 | } => { |
| 628 | if *true_dest == target { |
| 629 | true_args.extend_from_slice(new_args); |
| 630 | } |
| 631 | if *false_dest == target { |
| 632 | false_args.extend_from_slice(new_args); |
| 633 | } |
| 634 | } |
| 635 | // Switch targets cannot have block params per our IR |
| 636 | // convention (the verifier enforces this). Nothing to do. |
| 637 | _ => {} |
| 638 | } |
| 639 | } |
| 640 | |
| 641 | #[cfg(test)] |
| 642 | mod tests { |
| 643 | use super::*; |
| 644 | use crate::ir::types::IntWidth; |
| 645 | use crate::ir::verify::verify_module; |
| 646 | use crate::lexer::{Position, Span}; |
| 647 | |
| 648 | fn dummy_span() -> Span { |
| 649 | let p = Position { line: 1, col: 1 }; |
| 650 | Span { |
| 651 | start: p, |
| 652 | end: p, |
| 653 | file_id: 0, |
| 654 | } |
| 655 | } |
| 656 | |
| 657 | fn push_inst(f: &mut Function, block: BlockId, kind: InstKind, ty: IrType) -> ValueId { |
| 658 | let id = f.next_value_id(); |
| 659 | f.block_mut(block).insts.push(Inst { |
| 660 | id, |
| 661 | kind, |
| 662 | ty, |
| 663 | span: dummy_span(), |
| 664 | }); |
| 665 | id |
| 666 | } |
| 667 | |
| 668 | // ============================================================= |
| 669 | // Straight-line promotion: single store + single load should |
| 670 | // dissolve into a direct value flow. |
| 671 | // ============================================================= |
| 672 | #[test] |
| 673 | fn straight_line_single_store_load() { |
| 674 | let mut m = Module::new("t".into()); |
| 675 | let mut f = Function::new("f".into(), vec![], IrType::Int(IntWidth::I32)); |
| 676 | let entry = f.entry; |
| 677 | |
| 678 | let slot = push_inst( |
| 679 | &mut f, |
| 680 | entry, |
| 681 | InstKind::Alloca(IrType::Int(IntWidth::I32)), |
| 682 | IrType::Ptr(Box::new(IrType::Int(IntWidth::I32))), |
| 683 | ); |
| 684 | let c7 = push_inst( |
| 685 | &mut f, |
| 686 | entry, |
| 687 | InstKind::ConstInt(7, IntWidth::I32), |
| 688 | IrType::Int(IntWidth::I32), |
| 689 | ); |
| 690 | push_inst(&mut f, entry, InstKind::Store(c7, slot), IrType::Void); |
| 691 | let loaded = push_inst( |
| 692 | &mut f, |
| 693 | entry, |
| 694 | InstKind::Load(slot), |
| 695 | IrType::Int(IntWidth::I32), |
| 696 | ); |
| 697 | f.block_mut(entry).terminator = Some(Terminator::Return(Some(loaded))); |
| 698 | m.add_function(f); |
| 699 | |
| 700 | assert!(Mem2Reg.run(&mut m)); |
| 701 | let errs = verify_module(&m); |
| 702 | assert!(errs.is_empty(), "post-mem2reg IR invalid: {:?}", errs); |
| 703 | |
| 704 | // After: alloca/store/load all gone. The return should |
| 705 | // reference c7 directly (via substitution). |
| 706 | let block = &m.functions[0].blocks[0]; |
| 707 | assert!( |
| 708 | !block |
| 709 | .insts |
| 710 | .iter() |
| 711 | .any(|i| matches!(i.kind, InstKind::Alloca(_))), |
| 712 | "alloca should be gone" |
| 713 | ); |
| 714 | assert!( |
| 715 | !block |
| 716 | .insts |
| 717 | .iter() |
| 718 | .any(|i| matches!(i.kind, InstKind::Load(_))), |
| 719 | "load should be gone" |
| 720 | ); |
| 721 | assert!( |
| 722 | !block |
| 723 | .insts |
| 724 | .iter() |
| 725 | .any(|i| matches!(i.kind, InstKind::Store(..))), |
| 726 | "store should be gone" |
| 727 | ); |
| 728 | match block.terminator.as_ref().unwrap() { |
| 729 | Terminator::Return(Some(v)) => { |
| 730 | assert_eq!(*v, c7, "return should reference the stored const directly") |
| 731 | } |
| 732 | _ => panic!(), |
| 733 | } |
| 734 | } |
| 735 | |
| 736 | // ============================================================= |
| 737 | // Diamond merge: if cond { x = 1; } else { x = 2; } load x. |
| 738 | // The merge block should grow a block param for x. |
| 739 | // ============================================================= |
| 740 | #[test] |
| 741 | fn diamond_merge_inserts_block_param() { |
| 742 | let mut m = Module::new("t".into()); |
| 743 | let mut f = Function::new("f".into(), vec![], IrType::Int(IntWidth::I32)); |
| 744 | let entry = f.entry; |
| 745 | |
| 746 | let slot = push_inst( |
| 747 | &mut f, |
| 748 | entry, |
| 749 | InstKind::Alloca(IrType::Int(IntWidth::I32)), |
| 750 | IrType::Ptr(Box::new(IrType::Int(IntWidth::I32))), |
| 751 | ); |
| 752 | let cond = push_inst(&mut f, entry, InstKind::ConstBool(true), IrType::Bool); |
| 753 | let then_b = f.create_block("then"); |
| 754 | let else_b = f.create_block("else"); |
| 755 | let merge = f.create_block("merge"); |
| 756 | f.block_mut(entry).terminator = Some(Terminator::CondBranch { |
| 757 | cond, |
| 758 | true_dest: then_b, |
| 759 | true_args: vec![], |
| 760 | false_dest: else_b, |
| 761 | false_args: vec![], |
| 762 | }); |
| 763 | |
| 764 | let c1 = push_inst( |
| 765 | &mut f, |
| 766 | then_b, |
| 767 | InstKind::ConstInt(1, IntWidth::I32), |
| 768 | IrType::Int(IntWidth::I32), |
| 769 | ); |
| 770 | push_inst(&mut f, then_b, InstKind::Store(c1, slot), IrType::Void); |
| 771 | f.block_mut(then_b).terminator = Some(Terminator::Branch(merge, vec![])); |
| 772 | |
| 773 | let c2 = push_inst( |
| 774 | &mut f, |
| 775 | else_b, |
| 776 | InstKind::ConstInt(2, IntWidth::I32), |
| 777 | IrType::Int(IntWidth::I32), |
| 778 | ); |
| 779 | push_inst(&mut f, else_b, InstKind::Store(c2, slot), IrType::Void); |
| 780 | f.block_mut(else_b).terminator = Some(Terminator::Branch(merge, vec![])); |
| 781 | |
| 782 | let loaded = push_inst( |
| 783 | &mut f, |
| 784 | merge, |
| 785 | InstKind::Load(slot), |
| 786 | IrType::Int(IntWidth::I32), |
| 787 | ); |
| 788 | f.block_mut(merge).terminator = Some(Terminator::Return(Some(loaded))); |
| 789 | |
| 790 | m.add_function(f); |
| 791 | |
| 792 | assert!(Mem2Reg.run(&mut m)); |
| 793 | let errs = verify_module(&m); |
| 794 | assert!(errs.is_empty(), "post-mem2reg IR invalid: {:?}", errs); |
| 795 | |
| 796 | // The merge block should now have exactly one block param |
| 797 | // (for the promoted alloca), and the return should reference |
| 798 | // that param. |
| 799 | let f = &m.functions[0]; |
| 800 | let merge_block = f.block(merge); |
| 801 | assert_eq!( |
| 802 | merge_block.params.len(), |
| 803 | 1, |
| 804 | "merge should have 1 block param" |
| 805 | ); |
| 806 | let param_id = merge_block.params[0].id; |
| 807 | match merge_block.terminator.as_ref().unwrap() { |
| 808 | Terminator::Return(Some(v)) => assert_eq!( |
| 809 | *v, param_id, |
| 810 | "return should reference the merge block param" |
| 811 | ), |
| 812 | _ => panic!(), |
| 813 | } |
| 814 | |
| 815 | // Each predecessor's branch to merge must now carry one arg |
| 816 | // equal to the value it would have stored. |
| 817 | let then_term = f.block(then_b).terminator.as_ref().unwrap(); |
| 818 | match then_term { |
| 819 | Terminator::Branch(_, args) => { |
| 820 | assert_eq!(args.len(), 1); |
| 821 | assert_eq!(args[0], c1); |
| 822 | } |
| 823 | _ => panic!(), |
| 824 | } |
| 825 | let else_term = f.block(else_b).terminator.as_ref().unwrap(); |
| 826 | match else_term { |
| 827 | Terminator::Branch(_, args) => { |
| 828 | assert_eq!(args.len(), 1); |
| 829 | assert_eq!(args[0], c2); |
| 830 | } |
| 831 | _ => panic!(), |
| 832 | } |
| 833 | } |
| 834 | |
| 835 | // ============================================================= |
| 836 | // Non-promotable: the alloca's address escapes via a Call. |
| 837 | // mem2reg must leave it alone. |
| 838 | // ============================================================= |
| 839 | #[test] |
| 840 | fn escaping_alloca_not_promoted() { |
| 841 | let mut m = Module::new("t".into()); |
| 842 | let mut f = Function::new("f".into(), vec![], IrType::Void); |
| 843 | let entry = f.entry; |
| 844 | let slot = push_inst( |
| 845 | &mut f, |
| 846 | entry, |
| 847 | InstKind::Alloca(IrType::Int(IntWidth::I32)), |
| 848 | IrType::Ptr(Box::new(IrType::Int(IntWidth::I32))), |
| 849 | ); |
| 850 | // Call that takes the slot's address — escape! |
| 851 | push_inst( |
| 852 | &mut f, |
| 853 | entry, |
| 854 | InstKind::Call(FuncRef::External("takes_ptr".into()), vec![slot]), |
| 855 | IrType::Void, |
| 856 | ); |
| 857 | f.block_mut(entry).terminator = Some(Terminator::Return(None)); |
| 858 | m.add_function(f); |
| 859 | |
| 860 | assert!( |
| 861 | !Mem2Reg.run(&mut m), |
| 862 | "escaping alloca should not be promoted" |
| 863 | ); |
| 864 | // The alloca is still there. |
| 865 | assert!(m.functions[0].blocks[0] |
| 866 | .insts |
| 867 | .iter() |
| 868 | .any(|i| matches!(i.kind, InstKind::Alloca(_)))); |
| 869 | } |
| 870 | |
| 871 | // ============================================================= |
| 872 | // Mix of promotable + non-promotable: only the good one goes. |
| 873 | // ============================================================= |
| 874 | #[test] |
| 875 | fn mix_promotable_and_escaping() { |
| 876 | let mut m = Module::new("t".into()); |
| 877 | let mut f = Function::new("f".into(), vec![], IrType::Int(IntWidth::I32)); |
| 878 | let entry = f.entry; |
| 879 | // Promotable: only used by store + load. |
| 880 | let good = push_inst( |
| 881 | &mut f, |
| 882 | entry, |
| 883 | InstKind::Alloca(IrType::Int(IntWidth::I32)), |
| 884 | IrType::Ptr(Box::new(IrType::Int(IntWidth::I32))), |
| 885 | ); |
| 886 | // Non-promotable: escapes via call. |
| 887 | let bad = push_inst( |
| 888 | &mut f, |
| 889 | entry, |
| 890 | InstKind::Alloca(IrType::Int(IntWidth::I32)), |
| 891 | IrType::Ptr(Box::new(IrType::Int(IntWidth::I32))), |
| 892 | ); |
| 893 | let c42 = push_inst( |
| 894 | &mut f, |
| 895 | entry, |
| 896 | InstKind::ConstInt(42, IntWidth::I32), |
| 897 | IrType::Int(IntWidth::I32), |
| 898 | ); |
| 899 | push_inst(&mut f, entry, InstKind::Store(c42, good), IrType::Void); |
| 900 | push_inst( |
| 901 | &mut f, |
| 902 | entry, |
| 903 | InstKind::Call(FuncRef::External("takes_ptr".into()), vec![bad]), |
| 904 | IrType::Void, |
| 905 | ); |
| 906 | let loaded = push_inst( |
| 907 | &mut f, |
| 908 | entry, |
| 909 | InstKind::Load(good), |
| 910 | IrType::Int(IntWidth::I32), |
| 911 | ); |
| 912 | f.block_mut(entry).terminator = Some(Terminator::Return(Some(loaded))); |
| 913 | m.add_function(f); |
| 914 | |
| 915 | assert!(Mem2Reg.run(&mut m)); |
| 916 | let errs = verify_module(&m); |
| 917 | assert!(errs.is_empty(), "IR invalid: {:?}", errs); |
| 918 | |
| 919 | let block = &m.functions[0].blocks[0]; |
| 920 | // `good` is gone; `bad` remains. |
| 921 | let alloca_count = block |
| 922 | .insts |
| 923 | .iter() |
| 924 | .filter(|i| matches!(i.kind, InstKind::Alloca(_))) |
| 925 | .count(); |
| 926 | assert_eq!(alloca_count, 1, "bad alloca should survive"); |
| 927 | // Return should reference c42 directly. |
| 928 | match block.terminator.as_ref().unwrap() { |
| 929 | Terminator::Return(Some(v)) => assert_eq!(*v, c42), |
| 930 | _ => panic!(), |
| 931 | } |
| 932 | } |
| 933 | |
| 934 | // ============================================================= |
| 935 | // Loop: counter alloca `i` initialized to 0, incremented each |
| 936 | // iteration. Mem2reg should promote `i` and insert a block param |
| 937 | // on the loop header. |
| 938 | // ============================================================= |
| 939 | #[test] |
| 940 | fn loop_counter_promoted_with_header_param() { |
| 941 | let mut m = Module::new("t".into()); |
| 942 | let mut f = Function::new("f".into(), vec![], IrType::Void); |
| 943 | let entry = f.entry; |
| 944 | let slot = push_inst( |
| 945 | &mut f, |
| 946 | entry, |
| 947 | InstKind::Alloca(IrType::Int(IntWidth::I32)), |
| 948 | IrType::Ptr(Box::new(IrType::Int(IntWidth::I32))), |
| 949 | ); |
| 950 | let c0 = push_inst( |
| 951 | &mut f, |
| 952 | entry, |
| 953 | InstKind::ConstInt(0, IntWidth::I32), |
| 954 | IrType::Int(IntWidth::I32), |
| 955 | ); |
| 956 | push_inst(&mut f, entry, InstKind::Store(c0, slot), IrType::Void); |
| 957 | |
| 958 | let header = f.create_block("header"); |
| 959 | let body = f.create_block("body"); |
| 960 | let exit = f.create_block("exit"); |
| 961 | f.block_mut(entry).terminator = Some(Terminator::Branch(header, vec![])); |
| 962 | |
| 963 | // header: load i, cmp i < 10, cond br body/exit |
| 964 | let cur = push_inst( |
| 965 | &mut f, |
| 966 | header, |
| 967 | InstKind::Load(slot), |
| 968 | IrType::Int(IntWidth::I32), |
| 969 | ); |
| 970 | let c10 = push_inst( |
| 971 | &mut f, |
| 972 | header, |
| 973 | InstKind::ConstInt(10, IntWidth::I32), |
| 974 | IrType::Int(IntWidth::I32), |
| 975 | ); |
| 976 | let cmp = push_inst( |
| 977 | &mut f, |
| 978 | header, |
| 979 | InstKind::ICmp(CmpOp::Lt, cur, c10), |
| 980 | IrType::Bool, |
| 981 | ); |
| 982 | f.block_mut(header).terminator = Some(Terminator::CondBranch { |
| 983 | cond: cmp, |
| 984 | true_dest: body, |
| 985 | true_args: vec![], |
| 986 | false_dest: exit, |
| 987 | false_args: vec![], |
| 988 | }); |
| 989 | |
| 990 | // body: i = i + 1; br header |
| 991 | let cur2 = push_inst( |
| 992 | &mut f, |
| 993 | body, |
| 994 | InstKind::Load(slot), |
| 995 | IrType::Int(IntWidth::I32), |
| 996 | ); |
| 997 | let c1 = push_inst( |
| 998 | &mut f, |
| 999 | body, |
| 1000 | InstKind::ConstInt(1, IntWidth::I32), |
| 1001 | IrType::Int(IntWidth::I32), |
| 1002 | ); |
| 1003 | let next = push_inst( |
| 1004 | &mut f, |
| 1005 | body, |
| 1006 | InstKind::IAdd(cur2, c1), |
| 1007 | IrType::Int(IntWidth::I32), |
| 1008 | ); |
| 1009 | push_inst(&mut f, body, InstKind::Store(next, slot), IrType::Void); |
| 1010 | f.block_mut(body).terminator = Some(Terminator::Branch(header, vec![])); |
| 1011 | |
| 1012 | f.block_mut(exit).terminator = Some(Terminator::Return(None)); |
| 1013 | m.add_function(f); |
| 1014 | |
| 1015 | assert!(Mem2Reg.run(&mut m)); |
| 1016 | let errs = verify_module(&m); |
| 1017 | assert!(errs.is_empty(), "IR invalid: {:?}", errs); |
| 1018 | |
| 1019 | let f = &m.functions[0]; |
| 1020 | let header_block = f.block(header); |
| 1021 | // Header should have exactly one block param (the promoted counter). |
| 1022 | assert_eq!( |
| 1023 | header_block.params.len(), |
| 1024 | 1, |
| 1025 | "header should have 1 block param for the promoted counter" |
| 1026 | ); |
| 1027 | // No loads or stores anywhere. |
| 1028 | for b in &f.blocks { |
| 1029 | for i in &b.insts { |
| 1030 | assert!( |
| 1031 | !matches!(i.kind, InstKind::Load(_)), |
| 1032 | "no loads should survive mem2reg" |
| 1033 | ); |
| 1034 | assert!( |
| 1035 | !matches!(i.kind, InstKind::Store(..)), |
| 1036 | "no stores should survive mem2reg" |
| 1037 | ); |
| 1038 | assert!( |
| 1039 | !matches!(i.kind, InstKind::Alloca(_)), |
| 1040 | "no allocas should survive mem2reg" |
| 1041 | ); |
| 1042 | } |
| 1043 | } |
| 1044 | } |
| 1045 | |
| 1046 | // ============================================================= |
| 1047 | // Load before store: reads undef. Verify the rename walks |
| 1048 | // correctly and the IR stays valid (the undef sentinel takes |
| 1049 | // the load's place). |
| 1050 | // ============================================================= |
| 1051 | #[test] |
| 1052 | fn load_before_any_store_reads_undef() { |
| 1053 | let mut m = Module::new("t".into()); |
| 1054 | let mut f = Function::new("f".into(), vec![], IrType::Int(IntWidth::I32)); |
| 1055 | let entry = f.entry; |
| 1056 | let slot = push_inst( |
| 1057 | &mut f, |
| 1058 | entry, |
| 1059 | InstKind::Alloca(IrType::Int(IntWidth::I32)), |
| 1060 | IrType::Ptr(Box::new(IrType::Int(IntWidth::I32))), |
| 1061 | ); |
| 1062 | let loaded = push_inst( |
| 1063 | &mut f, |
| 1064 | entry, |
| 1065 | InstKind::Load(slot), |
| 1066 | IrType::Int(IntWidth::I32), |
| 1067 | ); |
| 1068 | f.block_mut(entry).terminator = Some(Terminator::Return(Some(loaded))); |
| 1069 | m.add_function(f); |
| 1070 | |
| 1071 | assert!(Mem2Reg.run(&mut m)); |
| 1072 | let errs = verify_module(&m); |
| 1073 | assert!(errs.is_empty(), "post-mem2reg IR invalid: {:?}", errs); |
| 1074 | |
| 1075 | // Return should reference the synthetic Undef. |
| 1076 | let f = &m.functions[0]; |
| 1077 | let undef_id = f.blocks[0] |
| 1078 | .insts |
| 1079 | .iter() |
| 1080 | .find(|i| matches!(i.kind, InstKind::Undef(_))) |
| 1081 | .map(|i| i.id) |
| 1082 | .expect("no Undef inserted"); |
| 1083 | match f.blocks[0].terminator.as_ref().unwrap() { |
| 1084 | Terminator::Return(Some(v)) => assert_eq!(*v, undef_id), |
| 1085 | _ => panic!(), |
| 1086 | } |
| 1087 | } |
| 1088 | |
| 1089 | // ============================================================= |
| 1090 | // Audit M1 — unreachable block storing to a promoted alloca |
| 1091 | // must not corrupt the IR. mem2reg's Phase 0 pre-prune should |
| 1092 | // drop the unreachable block before it influences the |
| 1093 | // dominance frontier or leaves dangling load/store insts. |
| 1094 | // ============================================================= |
| 1095 | #[test] |
| 1096 | fn unreachable_block_with_store_does_not_corrupt_ir() { |
| 1097 | let mut m = Module::new("t".into()); |
| 1098 | let mut f = Function::new("f".into(), vec![], IrType::Int(IntWidth::I32)); |
| 1099 | let entry = f.entry; |
| 1100 | |
| 1101 | let slot = push_inst( |
| 1102 | &mut f, |
| 1103 | entry, |
| 1104 | InstKind::Alloca(IrType::Int(IntWidth::I32)), |
| 1105 | IrType::Ptr(Box::new(IrType::Int(IntWidth::I32))), |
| 1106 | ); |
| 1107 | let c1 = push_inst( |
| 1108 | &mut f, |
| 1109 | entry, |
| 1110 | InstKind::ConstInt(1, IntWidth::I32), |
| 1111 | IrType::Int(IntWidth::I32), |
| 1112 | ); |
| 1113 | push_inst(&mut f, entry, InstKind::Store(c1, slot), IrType::Void); |
| 1114 | let loaded = push_inst( |
| 1115 | &mut f, |
| 1116 | entry, |
| 1117 | InstKind::Load(slot), |
| 1118 | IrType::Int(IntWidth::I32), |
| 1119 | ); |
| 1120 | f.block_mut(entry).terminator = Some(Terminator::Return(Some(loaded))); |
| 1121 | |
| 1122 | // Build an UNREACHABLE block that also stores to `slot`. |
| 1123 | // Nothing branches to it from entry, so prune_unreachable |
| 1124 | // should remove it before the rename walk. |
| 1125 | let dead = f.create_block("dead"); |
| 1126 | let c99 = push_inst( |
| 1127 | &mut f, |
| 1128 | dead, |
| 1129 | InstKind::ConstInt(99, IntWidth::I32), |
| 1130 | IrType::Int(IntWidth::I32), |
| 1131 | ); |
| 1132 | push_inst(&mut f, dead, InstKind::Store(c99, slot), IrType::Void); |
| 1133 | f.block_mut(dead).terminator = Some(Terminator::Return(None)); |
| 1134 | |
| 1135 | m.add_function(f); |
| 1136 | |
| 1137 | assert!(Mem2Reg.run(&mut m)); |
| 1138 | let errs = verify_module(&m); |
| 1139 | assert!( |
| 1140 | errs.is_empty(), |
| 1141 | "post-mem2reg IR invalid (unreachable store regression): {:?}", |
| 1142 | errs |
| 1143 | ); |
| 1144 | |
| 1145 | let f = &m.functions[0]; |
| 1146 | // The dead block must be gone (pruned by Phase 0). |
| 1147 | assert!( |
| 1148 | !f.blocks.iter().any(|b| b.id == dead), |
| 1149 | "unreachable block should be pruned by mem2reg Phase 0" |
| 1150 | ); |
| 1151 | // The promoted alloca and its load/store should be gone. |
| 1152 | let entry_block = f.block(f.entry); |
| 1153 | assert!(!entry_block |
| 1154 | .insts |
| 1155 | .iter() |
| 1156 | .any(|i| matches!(i.kind, InstKind::Alloca(_)))); |
| 1157 | assert!(!entry_block |
| 1158 | .insts |
| 1159 | .iter() |
| 1160 | .any(|i| matches!(i.kind, InstKind::Load(_)))); |
| 1161 | assert!(!entry_block |
| 1162 | .insts |
| 1163 | .iter() |
| 1164 | .any(|i| matches!(i.kind, InstKind::Store(..)))); |
| 1165 | // Return must reference c1 (the live store value). |
| 1166 | match entry_block.terminator.as_ref().unwrap() { |
| 1167 | Terminator::Return(Some(v)) => assert_eq!( |
| 1168 | *v, c1, |
| 1169 | "return should reach c1 directly, not the dead block's c99" |
| 1170 | ), |
| 1171 | _ => panic!(), |
| 1172 | } |
| 1173 | } |
| 1174 | |
| 1175 | // ============================================================= |
| 1176 | // Audit M2 — a CondBranch with the same target on both arms |
| 1177 | // must not double-append branch args during the rename walk. |
| 1178 | // ============================================================= |
| 1179 | #[test] |
| 1180 | fn same_target_cond_branch_does_not_double_append() { |
| 1181 | let mut m = Module::new("t".into()); |
| 1182 | let mut f = Function::new("f".into(), vec![], IrType::Int(IntWidth::I32)); |
| 1183 | let entry = f.entry; |
| 1184 | |
| 1185 | let slot = push_inst( |
| 1186 | &mut f, |
| 1187 | entry, |
| 1188 | InstKind::Alloca(IrType::Int(IntWidth::I32)), |
| 1189 | IrType::Ptr(Box::new(IrType::Int(IntWidth::I32))), |
| 1190 | ); |
| 1191 | let c1 = push_inst( |
| 1192 | &mut f, |
| 1193 | entry, |
| 1194 | InstKind::ConstInt(1, IntWidth::I32), |
| 1195 | IrType::Int(IntWidth::I32), |
| 1196 | ); |
| 1197 | push_inst(&mut f, entry, InstKind::Store(c1, slot), IrType::Void); |
| 1198 | |
| 1199 | let then_b = f.create_block("then"); |
| 1200 | let merge = f.create_block("merge"); |
| 1201 | |
| 1202 | // Conditional branch where both arms target `then_b`. The |
| 1203 | // pre-fix mem2reg would visit `then_b` twice via the |
| 1204 | // successor list and double-append the new branch args. |
| 1205 | let cond = push_inst(&mut f, entry, InstKind::ConstBool(true), IrType::Bool); |
| 1206 | f.block_mut(entry).terminator = Some(Terminator::CondBranch { |
| 1207 | cond, |
| 1208 | true_dest: then_b, |
| 1209 | true_args: vec![], |
| 1210 | false_dest: then_b, |
| 1211 | false_args: vec![], |
| 1212 | }); |
| 1213 | |
| 1214 | // `then_b` stores a different value, then branches to merge. |
| 1215 | let c2 = push_inst( |
| 1216 | &mut f, |
| 1217 | then_b, |
| 1218 | InstKind::ConstInt(2, IntWidth::I32), |
| 1219 | IrType::Int(IntWidth::I32), |
| 1220 | ); |
| 1221 | push_inst(&mut f, then_b, InstKind::Store(c2, slot), IrType::Void); |
| 1222 | f.block_mut(then_b).terminator = Some(Terminator::Branch(merge, vec![])); |
| 1223 | |
| 1224 | // `merge` loads from slot and returns it. |
| 1225 | let loaded = push_inst( |
| 1226 | &mut f, |
| 1227 | merge, |
| 1228 | InstKind::Load(slot), |
| 1229 | IrType::Int(IntWidth::I32), |
| 1230 | ); |
| 1231 | f.block_mut(merge).terminator = Some(Terminator::Return(Some(loaded))); |
| 1232 | |
| 1233 | m.add_function(f); |
| 1234 | |
| 1235 | assert!(Mem2Reg.run(&mut m)); |
| 1236 | let errs = verify_module(&m); |
| 1237 | assert!( |
| 1238 | errs.is_empty(), |
| 1239 | "post-mem2reg IR invalid (same-target CondBranch regression): {:?}", |
| 1240 | errs |
| 1241 | ); |
| 1242 | |
| 1243 | // Verify the entry's CondBranch has at most ONE arg per arm |
| 1244 | // (the new phi-arg added for the promoted slot). Pre-fix it |
| 1245 | // would have TWO per arm. |
| 1246 | let f = &m.functions[0]; |
| 1247 | let entry_block = f.block(f.entry); |
| 1248 | match entry_block.terminator.as_ref().unwrap() { |
| 1249 | Terminator::CondBranch { |
| 1250 | true_args, |
| 1251 | false_args, |
| 1252 | .. |
| 1253 | } => { |
| 1254 | assert!( |
| 1255 | true_args.len() <= 1, |
| 1256 | "true_args double-appended: {:?}", |
| 1257 | true_args |
| 1258 | ); |
| 1259 | assert!( |
| 1260 | false_args.len() <= 1, |
| 1261 | "false_args double-appended: {:?}", |
| 1262 | false_args |
| 1263 | ); |
| 1264 | } |
| 1265 | // mem2reg may have collapsed the cond_branch to a |
| 1266 | // direct branch if the cond was constant; that's |
| 1267 | // fine — verifier-clean is what we care about. |
| 1268 | _ => {} |
| 1269 | } |
| 1270 | } |
| 1271 | |
| 1272 | // ============================================================= |
| 1273 | // Audit D2 — `compute_dominance_frontiers` must not populate |
| 1274 | // entries for unreachable predecessors. mem2reg shouldn't see |
| 1275 | // phantom DF entries from dead code. |
| 1276 | // ============================================================= |
| 1277 | #[test] |
| 1278 | fn dominance_frontier_excludes_unreachable_preds() { |
| 1279 | use crate::ir::walk::compute_dominance_frontiers; |
| 1280 | let mut f = Function::new("f".into(), vec![], IrType::Void); |
| 1281 | let merge = f.create_block("merge"); |
| 1282 | let dead = f.create_block("dead"); |
| 1283 | |
| 1284 | // entry → merge (reachable) |
| 1285 | // dead → merge (unreachable, but emits an edge into merge) |
| 1286 | let entry = f.entry; |
| 1287 | f.block_mut(entry).terminator = Some(Terminator::Branch(merge, vec![])); |
| 1288 | f.block_mut(dead).terminator = Some(Terminator::Branch(merge, vec![])); |
| 1289 | f.block_mut(merge).terminator = Some(Terminator::Return(None)); |
| 1290 | |
| 1291 | let df = compute_dominance_frontiers(&f); |
| 1292 | |
| 1293 | // The dead block is unreachable; it should not appear in |
| 1294 | // the DF map at all. |
| 1295 | assert!( |
| 1296 | !df.contains_key(&dead), |
| 1297 | "DF map should not contain unreachable block, got {:?}", |
| 1298 | df |
| 1299 | ); |
| 1300 | // The merge block has only ONE reachable predecessor |
| 1301 | // (entry), so it isn't a true join point and merge ∉ any |
| 1302 | // DF set. |
| 1303 | for (b, frontier) in &df { |
| 1304 | assert!( |
| 1305 | !frontier.contains(&merge), |
| 1306 | "merge should not be in DF[{:?}]: only one reachable pred", |
| 1307 | b |
| 1308 | ); |
| 1309 | } |
| 1310 | } |
| 1311 | |
| 1312 | // ============================================================= |
| 1313 | // Coverage: only-loaded alloca. An alloca that has loads but |
| 1314 | // no stores anywhere should still be promotable — every load |
| 1315 | // becomes Undef. Catches accidental "must have at least one |
| 1316 | // store" assumptions in the algorithm. |
| 1317 | // ============================================================= |
| 1318 | #[test] |
| 1319 | fn only_loaded_alloca_promotes_to_undef() { |
| 1320 | let mut m = Module::new("t".into()); |
| 1321 | let mut f = Function::new("f".into(), vec![], IrType::Int(IntWidth::I32)); |
| 1322 | let entry = f.entry; |
| 1323 | let slot = push_inst( |
| 1324 | &mut f, |
| 1325 | entry, |
| 1326 | InstKind::Alloca(IrType::Int(IntWidth::I32)), |
| 1327 | IrType::Ptr(Box::new(IrType::Int(IntWidth::I32))), |
| 1328 | ); |
| 1329 | // Two loads with no intervening store. |
| 1330 | let l1 = push_inst( |
| 1331 | &mut f, |
| 1332 | entry, |
| 1333 | InstKind::Load(slot), |
| 1334 | IrType::Int(IntWidth::I32), |
| 1335 | ); |
| 1336 | let _ = push_inst( |
| 1337 | &mut f, |
| 1338 | entry, |
| 1339 | InstKind::Load(slot), |
| 1340 | IrType::Int(IntWidth::I32), |
| 1341 | ); |
| 1342 | f.block_mut(entry).terminator = Some(Terminator::Return(Some(l1))); |
| 1343 | m.add_function(f); |
| 1344 | |
| 1345 | assert!( |
| 1346 | Mem2Reg.run(&mut m), |
| 1347 | "no-store alloca should still be promotable" |
| 1348 | ); |
| 1349 | let errs = verify_module(&m); |
| 1350 | assert!(errs.is_empty(), "post-mem2reg IR invalid: {:?}", errs); |
| 1351 | |
| 1352 | let block = &m.functions[0].blocks[0]; |
| 1353 | assert!( |
| 1354 | !block |
| 1355 | .insts |
| 1356 | .iter() |
| 1357 | .any(|i| matches!(i.kind, InstKind::Alloca(_))), |
| 1358 | "alloca should be gone" |
| 1359 | ); |
| 1360 | assert!( |
| 1361 | !block |
| 1362 | .insts |
| 1363 | .iter() |
| 1364 | .any(|i| matches!(i.kind, InstKind::Load(_))), |
| 1365 | "loads should be gone" |
| 1366 | ); |
| 1367 | // Both loads should be replaced by an Undef sentinel. |
| 1368 | assert!( |
| 1369 | block |
| 1370 | .insts |
| 1371 | .iter() |
| 1372 | .any(|i| matches!(i.kind, InstKind::Undef(_))), |
| 1373 | "Undef sentinel should be inserted" |
| 1374 | ); |
| 1375 | } |
| 1376 | |
| 1377 | // ============================================================= |
| 1378 | // Coverage: address-escape via store of the alloca pointer |
| 1379 | // into another memory location. The alloca whose address is |
| 1380 | // stored must be considered escaped (the writer could keep the |
| 1381 | // pointer for later) and must NOT be promoted. |
| 1382 | // ============================================================= |
| 1383 | #[test] |
| 1384 | fn address_escape_via_store_blocks_promotion() { |
| 1385 | let mut m = Module::new("t".into()); |
| 1386 | let mut f = Function::new("f".into(), vec![], IrType::Void); |
| 1387 | let entry = f.entry; |
| 1388 | |
| 1389 | // bag: a Ptr<Ptr<i32>> slot that we'll write the escape into. |
| 1390 | let bag = push_inst( |
| 1391 | &mut f, |
| 1392 | entry, |
| 1393 | InstKind::Alloca(IrType::Ptr(Box::new(IrType::Int(IntWidth::I32)))), |
| 1394 | IrType::Ptr(Box::new(IrType::Ptr(Box::new(IrType::Int(IntWidth::I32))))), |
| 1395 | ); |
| 1396 | // escapee: the alloca whose address we leak. |
| 1397 | let escapee = push_inst( |
| 1398 | &mut f, |
| 1399 | entry, |
| 1400 | InstKind::Alloca(IrType::Int(IntWidth::I32)), |
| 1401 | IrType::Ptr(Box::new(IrType::Int(IntWidth::I32))), |
| 1402 | ); |
| 1403 | // Store the escapee POINTER into bag — escapee escapes. |
| 1404 | push_inst(&mut f, entry, InstKind::Store(escapee, bag), IrType::Void); |
| 1405 | f.block_mut(entry).terminator = Some(Terminator::Return(None)); |
| 1406 | m.add_function(f); |
| 1407 | |
| 1408 | // Either the pass returns false (nothing promoted) or it |
| 1409 | // promotes only `bag` and leaves `escapee` standing. What |
| 1410 | // it MUST NOT do is promote `escapee` away. |
| 1411 | let _ = Mem2Reg.run(&mut m); |
| 1412 | let errs = verify_module(&m); |
| 1413 | assert!(errs.is_empty(), "post-mem2reg IR invalid: {:?}", errs); |
| 1414 | |
| 1415 | let block = &m.functions[0].blocks[0]; |
| 1416 | let surviving_allocas = block |
| 1417 | .insts |
| 1418 | .iter() |
| 1419 | .filter(|i| matches!(i.kind, InstKind::Alloca(_))) |
| 1420 | .count(); |
| 1421 | assert!( |
| 1422 | surviving_allocas >= 1, |
| 1423 | "the address-escaped alloca must survive promotion, got {} allocas", |
| 1424 | surviving_allocas, |
| 1425 | ); |
| 1426 | } |
| 1427 | |
| 1428 | // ============================================================= |
| 1429 | // Coverage: idempotency. Running mem2reg twice on the same IR |
| 1430 | // must be a no-op the second time around. |
| 1431 | // ============================================================= |
| 1432 | #[test] |
| 1433 | fn second_mem2reg_run_is_a_noop() { |
| 1434 | let mut m = Module::new("t".into()); |
| 1435 | let mut f = Function::new("f".into(), vec![], IrType::Int(IntWidth::I32)); |
| 1436 | let entry = f.entry; |
| 1437 | |
| 1438 | let slot = push_inst( |
| 1439 | &mut f, |
| 1440 | entry, |
| 1441 | InstKind::Alloca(IrType::Int(IntWidth::I32)), |
| 1442 | IrType::Ptr(Box::new(IrType::Int(IntWidth::I32))), |
| 1443 | ); |
| 1444 | let cond = push_inst(&mut f, entry, InstKind::ConstBool(true), IrType::Bool); |
| 1445 | let then_b = f.create_block("then"); |
| 1446 | let else_b = f.create_block("else"); |
| 1447 | let merge = f.create_block("merge"); |
| 1448 | f.block_mut(entry).terminator = Some(Terminator::CondBranch { |
| 1449 | cond, |
| 1450 | true_dest: then_b, |
| 1451 | true_args: vec![], |
| 1452 | false_dest: else_b, |
| 1453 | false_args: vec![], |
| 1454 | }); |
| 1455 | |
| 1456 | let c1 = push_inst( |
| 1457 | &mut f, |
| 1458 | then_b, |
| 1459 | InstKind::ConstInt(1, IntWidth::I32), |
| 1460 | IrType::Int(IntWidth::I32), |
| 1461 | ); |
| 1462 | push_inst(&mut f, then_b, InstKind::Store(c1, slot), IrType::Void); |
| 1463 | f.block_mut(then_b).terminator = Some(Terminator::Branch(merge, vec![])); |
| 1464 | |
| 1465 | let c2 = push_inst( |
| 1466 | &mut f, |
| 1467 | else_b, |
| 1468 | InstKind::ConstInt(2, IntWidth::I32), |
| 1469 | IrType::Int(IntWidth::I32), |
| 1470 | ); |
| 1471 | push_inst(&mut f, else_b, InstKind::Store(c2, slot), IrType::Void); |
| 1472 | f.block_mut(else_b).terminator = Some(Terminator::Branch(merge, vec![])); |
| 1473 | |
| 1474 | let loaded = push_inst( |
| 1475 | &mut f, |
| 1476 | merge, |
| 1477 | InstKind::Load(slot), |
| 1478 | IrType::Int(IntWidth::I32), |
| 1479 | ); |
| 1480 | f.block_mut(merge).terminator = Some(Terminator::Return(Some(loaded))); |
| 1481 | m.add_function(f); |
| 1482 | |
| 1483 | assert!(Mem2Reg.run(&mut m), "first run should promote"); |
| 1484 | // Second run must be a no-op: nothing left to promote. |
| 1485 | assert!( |
| 1486 | !Mem2Reg.run(&mut m), |
| 1487 | "second run on already-promoted IR should be a no-op" |
| 1488 | ); |
| 1489 | let errs = verify_module(&m); |
| 1490 | assert!(errs.is_empty(), "post-mem2reg IR invalid: {:?}", errs); |
| 1491 | } |
| 1492 | |
| 1493 | // ============================================================= |
| 1494 | // Coverage: two promotable allocas joining at the same merge. |
| 1495 | // After promotion the merge block should have TWO block params, |
| 1496 | // and each predecessor should pass two args in the right order. |
| 1497 | // ============================================================= |
| 1498 | #[test] |
| 1499 | fn two_allocas_same_merge_inserts_two_params() { |
| 1500 | let mut m = Module::new("t".into()); |
| 1501 | let mut f = Function::new("f".into(), vec![], IrType::Int(IntWidth::I32)); |
| 1502 | let entry = f.entry; |
| 1503 | |
| 1504 | let slot_a = push_inst( |
| 1505 | &mut f, |
| 1506 | entry, |
| 1507 | InstKind::Alloca(IrType::Int(IntWidth::I32)), |
| 1508 | IrType::Ptr(Box::new(IrType::Int(IntWidth::I32))), |
| 1509 | ); |
| 1510 | let slot_b = push_inst( |
| 1511 | &mut f, |
| 1512 | entry, |
| 1513 | InstKind::Alloca(IrType::Int(IntWidth::I32)), |
| 1514 | IrType::Ptr(Box::new(IrType::Int(IntWidth::I32))), |
| 1515 | ); |
| 1516 | let cond = push_inst(&mut f, entry, InstKind::ConstBool(true), IrType::Bool); |
| 1517 | let then_b = f.create_block("then"); |
| 1518 | let else_b = f.create_block("else"); |
| 1519 | let merge = f.create_block("merge"); |
| 1520 | f.block_mut(entry).terminator = Some(Terminator::CondBranch { |
| 1521 | cond, |
| 1522 | true_dest: then_b, |
| 1523 | true_args: vec![], |
| 1524 | false_dest: else_b, |
| 1525 | false_args: vec![], |
| 1526 | }); |
| 1527 | |
| 1528 | // then: a=1, b=10 |
| 1529 | let c1 = push_inst( |
| 1530 | &mut f, |
| 1531 | then_b, |
| 1532 | InstKind::ConstInt(1, IntWidth::I32), |
| 1533 | IrType::Int(IntWidth::I32), |
| 1534 | ); |
| 1535 | let c10 = push_inst( |
| 1536 | &mut f, |
| 1537 | then_b, |
| 1538 | InstKind::ConstInt(10, IntWidth::I32), |
| 1539 | IrType::Int(IntWidth::I32), |
| 1540 | ); |
| 1541 | push_inst(&mut f, then_b, InstKind::Store(c1, slot_a), IrType::Void); |
| 1542 | push_inst(&mut f, then_b, InstKind::Store(c10, slot_b), IrType::Void); |
| 1543 | f.block_mut(then_b).terminator = Some(Terminator::Branch(merge, vec![])); |
| 1544 | |
| 1545 | // else: a=2, b=20 |
| 1546 | let c2 = push_inst( |
| 1547 | &mut f, |
| 1548 | else_b, |
| 1549 | InstKind::ConstInt(2, IntWidth::I32), |
| 1550 | IrType::Int(IntWidth::I32), |
| 1551 | ); |
| 1552 | let c20 = push_inst( |
| 1553 | &mut f, |
| 1554 | else_b, |
| 1555 | InstKind::ConstInt(20, IntWidth::I32), |
| 1556 | IrType::Int(IntWidth::I32), |
| 1557 | ); |
| 1558 | push_inst(&mut f, else_b, InstKind::Store(c2, slot_a), IrType::Void); |
| 1559 | push_inst(&mut f, else_b, InstKind::Store(c20, slot_b), IrType::Void); |
| 1560 | f.block_mut(else_b).terminator = Some(Terminator::Branch(merge, vec![])); |
| 1561 | |
| 1562 | // merge: result = a + b |
| 1563 | let la = push_inst( |
| 1564 | &mut f, |
| 1565 | merge, |
| 1566 | InstKind::Load(slot_a), |
| 1567 | IrType::Int(IntWidth::I32), |
| 1568 | ); |
| 1569 | let lb = push_inst( |
| 1570 | &mut f, |
| 1571 | merge, |
| 1572 | InstKind::Load(slot_b), |
| 1573 | IrType::Int(IntWidth::I32), |
| 1574 | ); |
| 1575 | let sum = push_inst( |
| 1576 | &mut f, |
| 1577 | merge, |
| 1578 | InstKind::IAdd(la, lb), |
| 1579 | IrType::Int(IntWidth::I32), |
| 1580 | ); |
| 1581 | f.block_mut(merge).terminator = Some(Terminator::Return(Some(sum))); |
| 1582 | m.add_function(f); |
| 1583 | |
| 1584 | assert!(Mem2Reg.run(&mut m)); |
| 1585 | let errs = verify_module(&m); |
| 1586 | assert!(errs.is_empty(), "post-mem2reg IR invalid: {:?}", errs); |
| 1587 | |
| 1588 | let f = &m.functions[0]; |
| 1589 | let merge_block = f.block(merge); |
| 1590 | assert_eq!( |
| 1591 | merge_block.params.len(), |
| 1592 | 2, |
| 1593 | "merge should have 2 block params, one per promoted alloca" |
| 1594 | ); |
| 1595 | // Each predecessor must now carry 2 branch args. |
| 1596 | for pred in [then_b, else_b] { |
| 1597 | let term = f.block(pred).terminator.as_ref().unwrap(); |
| 1598 | match term { |
| 1599 | Terminator::Branch(_, args) => assert_eq!( |
| 1600 | args.len(), |
| 1601 | 2, |
| 1602 | "predecessor {:?} should pass 2 args to merge", |
| 1603 | pred |
| 1604 | ), |
| 1605 | _ => panic!("predecessor terminator should be Branch"), |
| 1606 | } |
| 1607 | } |
| 1608 | } |
| 1609 | |
| 1610 | // ============================================================= |
| 1611 | // Regression: storing a freshly-loaded promoted value into a |
| 1612 | // second promotable slot inside a branch must not leave the |
| 1613 | // second slot's later loads pointing at the deleted load ID. |
| 1614 | // Audit 29.8 surfaced this from a SAVE-backed branchy scalar |
| 1615 | // shape lowered from real Fortran. |
| 1616 | // ============================================================= |
| 1617 | #[test] |
| 1618 | fn branchy_store_of_promoted_load_keeps_ir_valid() { |
| 1619 | let mut m = Module::new("t".into()); |
| 1620 | let mut f = Function::new( |
| 1621 | "f".into(), |
| 1622 | vec![Param { |
| 1623 | name: "flag".into(), |
| 1624 | ty: IrType::Int(IntWidth::I32), |
| 1625 | id: ValueId(0), |
| 1626 | fortran_noalias: false, |
| 1627 | }], |
| 1628 | IrType::Int(IntWidth::I32), |
| 1629 | ); |
| 1630 | let entry = f.entry; |
| 1631 | |
| 1632 | let result_slot = push_inst( |
| 1633 | &mut f, |
| 1634 | entry, |
| 1635 | InstKind::Alloca(IrType::Int(IntWidth::I32)), |
| 1636 | IrType::Ptr(Box::new(IrType::Int(IntWidth::I32))), |
| 1637 | ); |
| 1638 | let save_slot = push_inst( |
| 1639 | &mut f, |
| 1640 | entry, |
| 1641 | InstKind::Alloca(IrType::Int(IntWidth::I32)), |
| 1642 | IrType::Ptr(Box::new(IrType::Int(IntWidth::I32))), |
| 1643 | ); |
| 1644 | let tmp_slot = push_inst( |
| 1645 | &mut f, |
| 1646 | entry, |
| 1647 | InstKind::Alloca(IrType::Int(IntWidth::I32)), |
| 1648 | IrType::Ptr(Box::new(IrType::Int(IntWidth::I32))), |
| 1649 | ); |
| 1650 | let c7 = push_inst( |
| 1651 | &mut f, |
| 1652 | entry, |
| 1653 | InstKind::ConstInt(7, IntWidth::I32), |
| 1654 | IrType::Int(IntWidth::I32), |
| 1655 | ); |
| 1656 | push_inst(&mut f, entry, InstKind::Store(c7, save_slot), IrType::Void); |
| 1657 | let zero = push_inst( |
| 1658 | &mut f, |
| 1659 | entry, |
| 1660 | InstKind::ConstInt(0, IntWidth::I32), |
| 1661 | IrType::Int(IntWidth::I32), |
| 1662 | ); |
| 1663 | let cond = push_inst( |
| 1664 | &mut f, |
| 1665 | entry, |
| 1666 | InstKind::ICmp(CmpOp::Gt, ValueId(0), zero), |
| 1667 | IrType::Bool, |
| 1668 | ); |
| 1669 | let then_b = f.create_block("then"); |
| 1670 | let else_b = f.create_block("else"); |
| 1671 | let merge = f.create_block("merge"); |
| 1672 | f.block_mut(entry).terminator = Some(Terminator::CondBranch { |
| 1673 | cond, |
| 1674 | true_dest: then_b, |
| 1675 | true_args: vec![], |
| 1676 | false_dest: else_b, |
| 1677 | false_args: vec![], |
| 1678 | }); |
| 1679 | |
| 1680 | let then_load = push_inst( |
| 1681 | &mut f, |
| 1682 | then_b, |
| 1683 | InstKind::Load(save_slot), |
| 1684 | IrType::Int(IntWidth::I32), |
| 1685 | ); |
| 1686 | push_inst( |
| 1687 | &mut f, |
| 1688 | then_b, |
| 1689 | InstKind::Store(then_load, tmp_slot), |
| 1690 | IrType::Void, |
| 1691 | ); |
| 1692 | let then_tmp = push_inst( |
| 1693 | &mut f, |
| 1694 | then_b, |
| 1695 | InstKind::Load(tmp_slot), |
| 1696 | IrType::Int(IntWidth::I32), |
| 1697 | ); |
| 1698 | push_inst( |
| 1699 | &mut f, |
| 1700 | then_b, |
| 1701 | InstKind::Store(then_tmp, result_slot), |
| 1702 | IrType::Void, |
| 1703 | ); |
| 1704 | f.block_mut(then_b).terminator = Some(Terminator::Branch(merge, vec![])); |
| 1705 | |
| 1706 | let else_load = push_inst( |
| 1707 | &mut f, |
| 1708 | else_b, |
| 1709 | InstKind::Load(save_slot), |
| 1710 | IrType::Int(IntWidth::I32), |
| 1711 | ); |
| 1712 | push_inst( |
| 1713 | &mut f, |
| 1714 | else_b, |
| 1715 | InstKind::Store(else_load, tmp_slot), |
| 1716 | IrType::Void, |
| 1717 | ); |
| 1718 | let else_tmp = push_inst( |
| 1719 | &mut f, |
| 1720 | else_b, |
| 1721 | InstKind::Load(tmp_slot), |
| 1722 | IrType::Int(IntWidth::I32), |
| 1723 | ); |
| 1724 | let one = push_inst( |
| 1725 | &mut f, |
| 1726 | else_b, |
| 1727 | InstKind::ConstInt(1, IntWidth::I32), |
| 1728 | IrType::Int(IntWidth::I32), |
| 1729 | ); |
| 1730 | let bumped = push_inst( |
| 1731 | &mut f, |
| 1732 | else_b, |
| 1733 | InstKind::IAdd(else_tmp, one), |
| 1734 | IrType::Int(IntWidth::I32), |
| 1735 | ); |
| 1736 | push_inst( |
| 1737 | &mut f, |
| 1738 | else_b, |
| 1739 | InstKind::Store(bumped, result_slot), |
| 1740 | IrType::Void, |
| 1741 | ); |
| 1742 | f.block_mut(else_b).terminator = Some(Terminator::Branch(merge, vec![])); |
| 1743 | |
| 1744 | let merged = push_inst( |
| 1745 | &mut f, |
| 1746 | merge, |
| 1747 | InstKind::Load(result_slot), |
| 1748 | IrType::Int(IntWidth::I32), |
| 1749 | ); |
| 1750 | f.block_mut(merge).terminator = Some(Terminator::Return(Some(merged))); |
| 1751 | m.add_function(f); |
| 1752 | |
| 1753 | assert!(Mem2Reg.run(&mut m), "branchy chain should be promotable"); |
| 1754 | let errs = verify_module(&m); |
| 1755 | assert!( |
| 1756 | errs.is_empty(), |
| 1757 | "post-mem2reg IR invalid for branchy load/store chain: {:?}", |
| 1758 | errs |
| 1759 | ); |
| 1760 | |
| 1761 | let f = &m.functions[0]; |
| 1762 | for block in &f.blocks { |
| 1763 | for inst in &block.insts { |
| 1764 | assert!( |
| 1765 | !matches!( |
| 1766 | inst.kind, |
| 1767 | InstKind::Alloca(_) | InstKind::Load(_) | InstKind::Store(_, _) |
| 1768 | ), |
| 1769 | "promoted branchy scalar should leave no memory traffic, found {:?}", |
| 1770 | inst.kind |
| 1771 | ); |
| 1772 | } |
| 1773 | } |
| 1774 | } |
| 1775 | |
| 1776 | // ============================================================= |
| 1777 | // Coverage: multi-store-per-iteration. A loop body that stores |
| 1778 | // to the same alloca twice in one iteration must end up with |
| 1779 | // the LAST store flowing back to the header. |
| 1780 | // ============================================================= |
| 1781 | #[test] |
| 1782 | fn multi_store_per_iteration_uses_last_store() { |
| 1783 | let mut m = Module::new("t".into()); |
| 1784 | let mut f = Function::new("f".into(), vec![], IrType::Void); |
| 1785 | let entry = f.entry; |
| 1786 | |
| 1787 | let slot = push_inst( |
| 1788 | &mut f, |
| 1789 | entry, |
| 1790 | InstKind::Alloca(IrType::Int(IntWidth::I32)), |
| 1791 | IrType::Ptr(Box::new(IrType::Int(IntWidth::I32))), |
| 1792 | ); |
| 1793 | let c0 = push_inst( |
| 1794 | &mut f, |
| 1795 | entry, |
| 1796 | InstKind::ConstInt(0, IntWidth::I32), |
| 1797 | IrType::Int(IntWidth::I32), |
| 1798 | ); |
| 1799 | push_inst(&mut f, entry, InstKind::Store(c0, slot), IrType::Void); |
| 1800 | |
| 1801 | let header = f.create_block("header"); |
| 1802 | let body = f.create_block("body"); |
| 1803 | let exit = f.create_block("exit"); |
| 1804 | f.block_mut(entry).terminator = Some(Terminator::Branch(header, vec![])); |
| 1805 | |
| 1806 | // header: i = load slot; cmp i < 5 |
| 1807 | let cur = push_inst( |
| 1808 | &mut f, |
| 1809 | header, |
| 1810 | InstKind::Load(slot), |
| 1811 | IrType::Int(IntWidth::I32), |
| 1812 | ); |
| 1813 | let c5 = push_inst( |
| 1814 | &mut f, |
| 1815 | header, |
| 1816 | InstKind::ConstInt(5, IntWidth::I32), |
| 1817 | IrType::Int(IntWidth::I32), |
| 1818 | ); |
| 1819 | let cmp = push_inst( |
| 1820 | &mut f, |
| 1821 | header, |
| 1822 | InstKind::ICmp(CmpOp::Lt, cur, c5), |
| 1823 | IrType::Bool, |
| 1824 | ); |
| 1825 | f.block_mut(header).terminator = Some(Terminator::CondBranch { |
| 1826 | cond: cmp, |
| 1827 | true_dest: body, |
| 1828 | true_args: vec![], |
| 1829 | false_dest: exit, |
| 1830 | false_args: vec![], |
| 1831 | }); |
| 1832 | |
| 1833 | // body: store cur+1 to slot, then store (cur+1)+10 to slot. |
| 1834 | // The SECOND store is the one that should flow to header. |
| 1835 | let c1 = push_inst( |
| 1836 | &mut f, |
| 1837 | body, |
| 1838 | InstKind::ConstInt(1, IntWidth::I32), |
| 1839 | IrType::Int(IntWidth::I32), |
| 1840 | ); |
| 1841 | let cur2 = push_inst( |
| 1842 | &mut f, |
| 1843 | body, |
| 1844 | InstKind::Load(slot), |
| 1845 | IrType::Int(IntWidth::I32), |
| 1846 | ); |
| 1847 | let plus1 = push_inst( |
| 1848 | &mut f, |
| 1849 | body, |
| 1850 | InstKind::IAdd(cur2, c1), |
| 1851 | IrType::Int(IntWidth::I32), |
| 1852 | ); |
| 1853 | push_inst(&mut f, body, InstKind::Store(plus1, slot), IrType::Void); |
| 1854 | let c10 = push_inst( |
| 1855 | &mut f, |
| 1856 | body, |
| 1857 | InstKind::ConstInt(10, IntWidth::I32), |
| 1858 | IrType::Int(IntWidth::I32), |
| 1859 | ); |
| 1860 | let plus10 = push_inst( |
| 1861 | &mut f, |
| 1862 | body, |
| 1863 | InstKind::IAdd(plus1, c10), |
| 1864 | IrType::Int(IntWidth::I32), |
| 1865 | ); |
| 1866 | push_inst(&mut f, body, InstKind::Store(plus10, slot), IrType::Void); |
| 1867 | f.block_mut(body).terminator = Some(Terminator::Branch(header, vec![])); |
| 1868 | |
| 1869 | f.block_mut(exit).terminator = Some(Terminator::Return(None)); |
| 1870 | m.add_function(f); |
| 1871 | |
| 1872 | assert!(Mem2Reg.run(&mut m)); |
| 1873 | let errs = verify_module(&m); |
| 1874 | assert!(errs.is_empty(), "post-mem2reg IR invalid: {:?}", errs); |
| 1875 | |
| 1876 | let f = &m.functions[0]; |
| 1877 | // No loads/stores/allocas anywhere. |
| 1878 | for b in &f.blocks { |
| 1879 | for i in &b.insts { |
| 1880 | assert!( |
| 1881 | !matches!( |
| 1882 | i.kind, |
| 1883 | InstKind::Load(_) | InstKind::Store(..) | InstKind::Alloca(_) |
| 1884 | ), |
| 1885 | "should be promoted away: {:?}", |
| 1886 | i.kind |
| 1887 | ); |
| 1888 | } |
| 1889 | } |
| 1890 | // body's branch back to header should pass `plus10` (the |
| 1891 | // last-store value), not `plus1`. |
| 1892 | let body_term = f.block(body).terminator.as_ref().unwrap(); |
| 1893 | match body_term { |
| 1894 | Terminator::Branch(_, args) => { |
| 1895 | assert_eq!(args.len(), 1, "body should pass 1 arg to header"); |
| 1896 | assert_eq!( |
| 1897 | args[0], plus10, |
| 1898 | "header arg should be the LAST store value, not the first" |
| 1899 | ); |
| 1900 | } |
| 1901 | _ => panic!("body terminator should be Branch"), |
| 1902 | } |
| 1903 | } |
| 1904 | |
| 1905 | // ============================================================= |
| 1906 | // Coverage: multi-latch loop. Two latch blocks both branch back |
| 1907 | // to the same header. Each latch must contribute its own arg |
| 1908 | // to the header's promoted block param. |
| 1909 | // ============================================================= |
| 1910 | #[test] |
| 1911 | fn multi_latch_loop_each_latch_contributes_arg() { |
| 1912 | let mut m = Module::new("t".into()); |
| 1913 | let mut f = Function::new("f".into(), vec![], IrType::Void); |
| 1914 | let entry = f.entry; |
| 1915 | |
| 1916 | let slot = push_inst( |
| 1917 | &mut f, |
| 1918 | entry, |
| 1919 | InstKind::Alloca(IrType::Int(IntWidth::I32)), |
| 1920 | IrType::Ptr(Box::new(IrType::Int(IntWidth::I32))), |
| 1921 | ); |
| 1922 | let c0 = push_inst( |
| 1923 | &mut f, |
| 1924 | entry, |
| 1925 | InstKind::ConstInt(0, IntWidth::I32), |
| 1926 | IrType::Int(IntWidth::I32), |
| 1927 | ); |
| 1928 | push_inst(&mut f, entry, InstKind::Store(c0, slot), IrType::Void); |
| 1929 | |
| 1930 | let header = f.create_block("header"); |
| 1931 | let body = f.create_block("body"); |
| 1932 | let latch_a = f.create_block("latch_a"); |
| 1933 | let latch_b = f.create_block("latch_b"); |
| 1934 | let exit = f.create_block("exit"); |
| 1935 | f.block_mut(entry).terminator = Some(Terminator::Branch(header, vec![])); |
| 1936 | |
| 1937 | // header: i = load; cmp i < 100; cond br body / exit |
| 1938 | let cur = push_inst( |
| 1939 | &mut f, |
| 1940 | header, |
| 1941 | InstKind::Load(slot), |
| 1942 | IrType::Int(IntWidth::I32), |
| 1943 | ); |
| 1944 | let c100 = push_inst( |
| 1945 | &mut f, |
| 1946 | header, |
| 1947 | InstKind::ConstInt(100, IntWidth::I32), |
| 1948 | IrType::Int(IntWidth::I32), |
| 1949 | ); |
| 1950 | let cmp_top = push_inst( |
| 1951 | &mut f, |
| 1952 | header, |
| 1953 | InstKind::ICmp(CmpOp::Lt, cur, c100), |
| 1954 | IrType::Bool, |
| 1955 | ); |
| 1956 | f.block_mut(header).terminator = Some(Terminator::CondBranch { |
| 1957 | cond: cmp_top, |
| 1958 | true_dest: body, |
| 1959 | true_args: vec![], |
| 1960 | false_dest: exit, |
| 1961 | false_args: vec![], |
| 1962 | }); |
| 1963 | |
| 1964 | // body: branch to latch_a or latch_b based on cur. |
| 1965 | let c50 = push_inst( |
| 1966 | &mut f, |
| 1967 | body, |
| 1968 | InstKind::ConstInt(50, IntWidth::I32), |
| 1969 | IrType::Int(IntWidth::I32), |
| 1970 | ); |
| 1971 | let cmp_mid = push_inst( |
| 1972 | &mut f, |
| 1973 | body, |
| 1974 | InstKind::ICmp(CmpOp::Lt, cur, c50), |
| 1975 | IrType::Bool, |
| 1976 | ); |
| 1977 | f.block_mut(body).terminator = Some(Terminator::CondBranch { |
| 1978 | cond: cmp_mid, |
| 1979 | true_dest: latch_a, |
| 1980 | true_args: vec![], |
| 1981 | false_dest: latch_b, |
| 1982 | false_args: vec![], |
| 1983 | }); |
| 1984 | |
| 1985 | // latch_a: store cur+1; jump header |
| 1986 | let c1a = push_inst( |
| 1987 | &mut f, |
| 1988 | latch_a, |
| 1989 | InstKind::ConstInt(1, IntWidth::I32), |
| 1990 | IrType::Int(IntWidth::I32), |
| 1991 | ); |
| 1992 | let curla = push_inst( |
| 1993 | &mut f, |
| 1994 | latch_a, |
| 1995 | InstKind::Load(slot), |
| 1996 | IrType::Int(IntWidth::I32), |
| 1997 | ); |
| 1998 | let nexta = push_inst( |
| 1999 | &mut f, |
| 2000 | latch_a, |
| 2001 | InstKind::IAdd(curla, c1a), |
| 2002 | IrType::Int(IntWidth::I32), |
| 2003 | ); |
| 2004 | push_inst(&mut f, latch_a, InstKind::Store(nexta, slot), IrType::Void); |
| 2005 | f.block_mut(latch_a).terminator = Some(Terminator::Branch(header, vec![])); |
| 2006 | |
| 2007 | // latch_b: store cur+2; jump header |
| 2008 | let c2b = push_inst( |
| 2009 | &mut f, |
| 2010 | latch_b, |
| 2011 | InstKind::ConstInt(2, IntWidth::I32), |
| 2012 | IrType::Int(IntWidth::I32), |
| 2013 | ); |
| 2014 | let curlb = push_inst( |
| 2015 | &mut f, |
| 2016 | latch_b, |
| 2017 | InstKind::Load(slot), |
| 2018 | IrType::Int(IntWidth::I32), |
| 2019 | ); |
| 2020 | let nextb = push_inst( |
| 2021 | &mut f, |
| 2022 | latch_b, |
| 2023 | InstKind::IAdd(curlb, c2b), |
| 2024 | IrType::Int(IntWidth::I32), |
| 2025 | ); |
| 2026 | push_inst(&mut f, latch_b, InstKind::Store(nextb, slot), IrType::Void); |
| 2027 | f.block_mut(latch_b).terminator = Some(Terminator::Branch(header, vec![])); |
| 2028 | |
| 2029 | f.block_mut(exit).terminator = Some(Terminator::Return(None)); |
| 2030 | m.add_function(f); |
| 2031 | |
| 2032 | assert!(Mem2Reg.run(&mut m)); |
| 2033 | let errs = verify_module(&m); |
| 2034 | assert!(errs.is_empty(), "post-mem2reg IR invalid: {:?}", errs); |
| 2035 | |
| 2036 | let f = &m.functions[0]; |
| 2037 | let header_block = f.block(header); |
| 2038 | assert_eq!( |
| 2039 | header_block.params.len(), |
| 2040 | 1, |
| 2041 | "header should have 1 block param for the counter" |
| 2042 | ); |
| 2043 | |
| 2044 | // Each latch's branch to header should pass exactly one arg |
| 2045 | // (its computed `next` value). |
| 2046 | let la_term = f.block(latch_a).terminator.as_ref().unwrap(); |
| 2047 | match la_term { |
| 2048 | Terminator::Branch(_, args) => { |
| 2049 | assert_eq!(args.len(), 1, "latch_a passes 1 arg to header"); |
| 2050 | assert_eq!(args[0], nexta); |
| 2051 | } |
| 2052 | _ => panic!("latch_a should branch to header"), |
| 2053 | } |
| 2054 | let lb_term = f.block(latch_b).terminator.as_ref().unwrap(); |
| 2055 | match lb_term { |
| 2056 | Terminator::Branch(_, args) => { |
| 2057 | assert_eq!(args.len(), 1, "latch_b passes 1 arg to header"); |
| 2058 | assert_eq!(args[0], nextb); |
| 2059 | } |
| 2060 | _ => panic!("latch_b should branch to header"), |
| 2061 | } |
| 2062 | } |
| 2063 | |
| 2064 | // ============================================================= |
| 2065 | // Coverage: nested loops. An inner loop nested inside an outer |
| 2066 | // loop, both promoting their own counter. Both headers should |
| 2067 | // get a block param. |
| 2068 | // ============================================================= |
| 2069 | #[test] |
| 2070 | fn nested_loops_promote_both_counters() { |
| 2071 | let mut m = Module::new("t".into()); |
| 2072 | let mut f = Function::new("f".into(), vec![], IrType::Void); |
| 2073 | let entry = f.entry; |
| 2074 | |
| 2075 | let outer_slot = push_inst( |
| 2076 | &mut f, |
| 2077 | entry, |
| 2078 | InstKind::Alloca(IrType::Int(IntWidth::I32)), |
| 2079 | IrType::Ptr(Box::new(IrType::Int(IntWidth::I32))), |
| 2080 | ); |
| 2081 | let inner_slot = push_inst( |
| 2082 | &mut f, |
| 2083 | entry, |
| 2084 | InstKind::Alloca(IrType::Int(IntWidth::I32)), |
| 2085 | IrType::Ptr(Box::new(IrType::Int(IntWidth::I32))), |
| 2086 | ); |
| 2087 | let c0 = push_inst( |
| 2088 | &mut f, |
| 2089 | entry, |
| 2090 | InstKind::ConstInt(0, IntWidth::I32), |
| 2091 | IrType::Int(IntWidth::I32), |
| 2092 | ); |
| 2093 | push_inst(&mut f, entry, InstKind::Store(c0, outer_slot), IrType::Void); |
| 2094 | |
| 2095 | let outer_header = f.create_block("outer_header"); |
| 2096 | let inner_init = f.create_block("inner_init"); |
| 2097 | let inner_header = f.create_block("inner_header"); |
| 2098 | let inner_body = f.create_block("inner_body"); |
| 2099 | let outer_latch = f.create_block("outer_latch"); |
| 2100 | let exit = f.create_block("exit"); |
| 2101 | |
| 2102 | f.block_mut(entry).terminator = Some(Terminator::Branch(outer_header, vec![])); |
| 2103 | |
| 2104 | // outer_header: i = load outer; cmp i<3; br inner_init / exit |
| 2105 | let i = push_inst( |
| 2106 | &mut f, |
| 2107 | outer_header, |
| 2108 | InstKind::Load(outer_slot), |
| 2109 | IrType::Int(IntWidth::I32), |
| 2110 | ); |
| 2111 | let c3 = push_inst( |
| 2112 | &mut f, |
| 2113 | outer_header, |
| 2114 | InstKind::ConstInt(3, IntWidth::I32), |
| 2115 | IrType::Int(IntWidth::I32), |
| 2116 | ); |
| 2117 | let cmpo = push_inst( |
| 2118 | &mut f, |
| 2119 | outer_header, |
| 2120 | InstKind::ICmp(CmpOp::Lt, i, c3), |
| 2121 | IrType::Bool, |
| 2122 | ); |
| 2123 | f.block_mut(outer_header).terminator = Some(Terminator::CondBranch { |
| 2124 | cond: cmpo, |
| 2125 | true_dest: inner_init, |
| 2126 | true_args: vec![], |
| 2127 | false_dest: exit, |
| 2128 | false_args: vec![], |
| 2129 | }); |
| 2130 | |
| 2131 | // inner_init: store 0 to inner; br inner_header |
| 2132 | let c0i = push_inst( |
| 2133 | &mut f, |
| 2134 | inner_init, |
| 2135 | InstKind::ConstInt(0, IntWidth::I32), |
| 2136 | IrType::Int(IntWidth::I32), |
| 2137 | ); |
| 2138 | push_inst( |
| 2139 | &mut f, |
| 2140 | inner_init, |
| 2141 | InstKind::Store(c0i, inner_slot), |
| 2142 | IrType::Void, |
| 2143 | ); |
| 2144 | f.block_mut(inner_init).terminator = Some(Terminator::Branch(inner_header, vec![])); |
| 2145 | |
| 2146 | // inner_header: j = load inner; cmp j<5; br inner_body / outer_latch |
| 2147 | let j = push_inst( |
| 2148 | &mut f, |
| 2149 | inner_header, |
| 2150 | InstKind::Load(inner_slot), |
| 2151 | IrType::Int(IntWidth::I32), |
| 2152 | ); |
| 2153 | let c5 = push_inst( |
| 2154 | &mut f, |
| 2155 | inner_header, |
| 2156 | InstKind::ConstInt(5, IntWidth::I32), |
| 2157 | IrType::Int(IntWidth::I32), |
| 2158 | ); |
| 2159 | let cmpi = push_inst( |
| 2160 | &mut f, |
| 2161 | inner_header, |
| 2162 | InstKind::ICmp(CmpOp::Lt, j, c5), |
| 2163 | IrType::Bool, |
| 2164 | ); |
| 2165 | f.block_mut(inner_header).terminator = Some(Terminator::CondBranch { |
| 2166 | cond: cmpi, |
| 2167 | true_dest: inner_body, |
| 2168 | true_args: vec![], |
| 2169 | false_dest: outer_latch, |
| 2170 | false_args: vec![], |
| 2171 | }); |
| 2172 | |
| 2173 | // inner_body: j = j + 1; store inner; br inner_header |
| 2174 | let c1i = push_inst( |
| 2175 | &mut f, |
| 2176 | inner_body, |
| 2177 | InstKind::ConstInt(1, IntWidth::I32), |
| 2178 | IrType::Int(IntWidth::I32), |
| 2179 | ); |
| 2180 | let jcur = push_inst( |
| 2181 | &mut f, |
| 2182 | inner_body, |
| 2183 | InstKind::Load(inner_slot), |
| 2184 | IrType::Int(IntWidth::I32), |
| 2185 | ); |
| 2186 | let jnext = push_inst( |
| 2187 | &mut f, |
| 2188 | inner_body, |
| 2189 | InstKind::IAdd(jcur, c1i), |
| 2190 | IrType::Int(IntWidth::I32), |
| 2191 | ); |
| 2192 | push_inst( |
| 2193 | &mut f, |
| 2194 | inner_body, |
| 2195 | InstKind::Store(jnext, inner_slot), |
| 2196 | IrType::Void, |
| 2197 | ); |
| 2198 | f.block_mut(inner_body).terminator = Some(Terminator::Branch(inner_header, vec![])); |
| 2199 | |
| 2200 | // outer_latch: i = i + 1; store outer; br outer_header |
| 2201 | let c1o = push_inst( |
| 2202 | &mut f, |
| 2203 | outer_latch, |
| 2204 | InstKind::ConstInt(1, IntWidth::I32), |
| 2205 | IrType::Int(IntWidth::I32), |
| 2206 | ); |
| 2207 | let icur = push_inst( |
| 2208 | &mut f, |
| 2209 | outer_latch, |
| 2210 | InstKind::Load(outer_slot), |
| 2211 | IrType::Int(IntWidth::I32), |
| 2212 | ); |
| 2213 | let inext = push_inst( |
| 2214 | &mut f, |
| 2215 | outer_latch, |
| 2216 | InstKind::IAdd(icur, c1o), |
| 2217 | IrType::Int(IntWidth::I32), |
| 2218 | ); |
| 2219 | push_inst( |
| 2220 | &mut f, |
| 2221 | outer_latch, |
| 2222 | InstKind::Store(inext, outer_slot), |
| 2223 | IrType::Void, |
| 2224 | ); |
| 2225 | f.block_mut(outer_latch).terminator = Some(Terminator::Branch(outer_header, vec![])); |
| 2226 | |
| 2227 | f.block_mut(exit).terminator = Some(Terminator::Return(None)); |
| 2228 | m.add_function(f); |
| 2229 | |
| 2230 | assert!(Mem2Reg.run(&mut m)); |
| 2231 | let errs = verify_module(&m); |
| 2232 | assert!(errs.is_empty(), "post-mem2reg IR invalid: {:?}", errs); |
| 2233 | |
| 2234 | let f = &m.functions[0]; |
| 2235 | // Audit Min-2: pin the exact param counts. Hand-traced |
| 2236 | // CFRWZ for this CFG produces: |
| 2237 | // * outer_slot stores at {entry, outer_latch}, with |
| 2238 | // DF(outer_latch) = {outer_header}, so the IDF is |
| 2239 | // {outer_header}. → outer_header gets 1 param. |
| 2240 | // * inner_slot stores at {inner_init, inner_body}, with |
| 2241 | // DF(inner_init) = DF(inner_body) = {outer_header, |
| 2242 | // inner_header} after IDF closure (the back-edge from |
| 2243 | // outer_latch and the back-edge from inner_body both |
| 2244 | // promote outer_header / inner_header into the |
| 2245 | // frontier). → outer_header AND inner_header each |
| 2246 | // get 1 param for inner_slot. |
| 2247 | // Totals: outer_header has 2, inner_header has 1. |
| 2248 | assert_eq!(f.block(outer_header).params.len(), 2, |
| 2249 | "outer_header should have exactly 2 params (outer + inner counter via back-edge IDF), got {}", |
| 2250 | f.block(outer_header).params.len()); |
| 2251 | assert_eq!( |
| 2252 | f.block(inner_header).params.len(), |
| 2253 | 1, |
| 2254 | "inner_header should have exactly 1 param for inner counter, got {}", |
| 2255 | f.block(inner_header).params.len() |
| 2256 | ); |
| 2257 | // No loads/stores/allocas anywhere — both slots fully promoted. |
| 2258 | for b in &f.blocks { |
| 2259 | for i in &b.insts { |
| 2260 | assert!( |
| 2261 | !matches!( |
| 2262 | i.kind, |
| 2263 | InstKind::Load(_) | InstKind::Store(..) | InstKind::Alloca(_) |
| 2264 | ), |
| 2265 | "{:?}: kind {:?} should be promoted away", |
| 2266 | b.id, |
| 2267 | i.kind |
| 2268 | ); |
| 2269 | } |
| 2270 | } |
| 2271 | } |
| 2272 | } |
| 2273 |