| 1 | //! Dead-code elimination pass. |
| 2 | //! |
| 3 | //! Removes instructions whose result has no observable effect: |
| 4 | //! |
| 5 | //! * The result `ValueId` is unused by any other instruction or |
| 6 | //! terminator in the function. |
| 7 | //! * The instruction has no side effects of its own (a `Store`, |
| 8 | //! `Call`, or `RuntimeCall` is always live, even if its return is |
| 9 | //! unused, because it can write memory or perform I/O). |
| 10 | //! |
| 11 | //! The pass uses mark-and-sweep: |
| 12 | //! |
| 13 | //! 1. Walk every block, every instruction, every terminator and mark |
| 14 | //! each `ValueId` they consume as "live". |
| 15 | //! 2. Walk every instruction in every block; instructions whose result |
| 16 | //! is unused **and** which are pure are dropped. |
| 17 | //! |
| 18 | //! Constant-folding plus constant-propagation can leave behind a lot |
| 19 | //! of pure dead computation (e.g., the `Const*` values that defined a |
| 20 | //! folded conditional). DCE cleans those up. We also remove blocks |
| 21 | //! that became unreachable for any reason (defensive — `ConstProp` |
| 22 | //! already prunes after itself, but other future passes may not). |
| 23 | //! |
| 24 | //! ### Side-effect classification |
| 25 | //! |
| 26 | //! In Fortran, the following are observable: |
| 27 | //! * `Store` — writes user memory |
| 28 | //! * `Call` / `RuntimeCall` — print, allocate, free, etc. |
| 29 | //! * `Return` / branches / `Unreachable` — terminators are *always* |
| 30 | //! live; we never remove them. |
| 31 | //! |
| 32 | //! `Load` is conservatively pure here. That is technically incorrect |
| 33 | //! when the same address is later stored to (the load could be |
| 34 | //! observing volatile memory), but for SSA-form Fortran loads of |
| 35 | //! locals it's a safe approximation. A future alias-analysis pass can |
| 36 | //! refine this. |
| 37 | |
| 38 | use super::pass::Pass; |
| 39 | use super::util::{inst_uses, prune_unreachable, terminator_uses}; |
| 40 | use crate::ir::inst::*; |
| 41 | use std::collections::{HashMap, HashSet}; |
| 42 | |
| 43 | /// True if the instruction has any side effect that prevents removal, |
| 44 | /// regardless of whether its result is used. |
| 45 | fn has_side_effect(kind: &InstKind, pure_internal_calls: &[bool]) -> bool { |
| 46 | matches!( |
| 47 | kind, |
| 48 | InstKind::Store(..) |
| 49 | // VStore writes 128 bits to memory just like Store; it |
| 50 | // must not be DCE'd. NeonVectorize emits VStore as the |
| 51 | // sink of every vectorized loop body — without this, |
| 52 | // DCE strips the whole body once it's been vectorized |
| 53 | // (the VAdd/VLoad chain has no other uses) and the |
| 54 | // function silently becomes a no-op. |
| 55 | | InstKind::VStore(..) |
| 56 | | InstKind::RuntimeCall(..) |
| 57 | // Alloca produces a stack slot whose identity matters even |
| 58 | // if no one reads from it (the address may escape via a |
| 59 | // future store/call). Treat as side-effecting for safety. |
| 60 | | InstKind::Alloca(..) |
| 61 | ) || match kind { |
| 62 | InstKind::Call(FuncRef::Internal(idx), _) => !pure_internal_calls |
| 63 | .get(*idx as usize) |
| 64 | .copied() |
| 65 | .unwrap_or(false), |
| 66 | InstKind::Call(..) => true, |
| 67 | _ => false, |
| 68 | } |
| 69 | } |
| 70 | |
| 71 | /// Collect every `ValueId` referenced as an operand of any instruction |
| 72 | /// or terminator in the function. The result is the "live use" set. |
| 73 | fn collect_live_uses(func: &Function) -> HashSet<ValueId> { |
| 74 | let mut live = HashSet::new(); |
| 75 | for block in &func.blocks { |
| 76 | for inst in &block.insts { |
| 77 | for v in inst_uses(&inst.kind) { |
| 78 | live.insert(v); |
| 79 | } |
| 80 | } |
| 81 | if let Some(term) = &block.terminator { |
| 82 | for v in terminator_uses(term) { |
| 83 | live.insert(v); |
| 84 | } |
| 85 | } |
| 86 | } |
| 87 | live |
| 88 | } |
| 89 | |
| 90 | /// Run DCE over a single function. Returns true if anything changed. |
| 91 | /// |
| 92 | /// Two interleaved fixpoints: |
| 93 | /// |
| 94 | /// 1. **Inner**: instruction-level mark-and-sweep. We rebuild the |
| 95 | /// live set every round and drop any pure instruction whose |
| 96 | /// result has no uses. Removing one instruction can make another |
| 97 | /// dead (its operands lose a use), so we iterate. |
| 98 | /// |
| 99 | /// 2. **Outer**: block-parameter cleanup. After the instructions |
| 100 | /// settle, we look for block parameters whose `ValueId` no |
| 101 | /// longer appears in the live-use set, drop them, and rewrite |
| 102 | /// every predecessor's branch arg list to remove the |
| 103 | /// corresponding slot. Removing an arg can free its defining |
| 104 | /// instruction, so we re-run the inner loop afterwards. Audit |
| 105 | /// finding Med-5 / C-1. |
| 106 | fn dce_function(func: &mut Function, pure_internal_calls: &[bool]) -> bool { |
| 107 | let mut any_change = false; |
| 108 | let mut outer_changed = true; |
| 109 | while outer_changed { |
| 110 | outer_changed = false; |
| 111 | |
| 112 | // Inner: instruction-level mark-and-sweep. |
| 113 | let mut changed = true; |
| 114 | while changed { |
| 115 | changed = false; |
| 116 | let live = collect_live_uses(func); |
| 117 | for block in &mut func.blocks { |
| 118 | let before = block.insts.len(); |
| 119 | block.insts.retain(|inst| { |
| 120 | if has_side_effect(&inst.kind, pure_internal_calls) { |
| 121 | return true; |
| 122 | } |
| 123 | if live.contains(&inst.id) { |
| 124 | return true; |
| 125 | } |
| 126 | false |
| 127 | }); |
| 128 | if block.insts.len() != before { |
| 129 | changed = true; |
| 130 | any_change = true; |
| 131 | } |
| 132 | } |
| 133 | } |
| 134 | |
| 135 | // Outer: block-parameter cleanup. |
| 136 | if remove_dead_block_params(func) { |
| 137 | any_change = true; |
| 138 | outer_changed = true; |
| 139 | } |
| 140 | } |
| 141 | if prune_unreachable(func) { |
| 142 | any_change = true; |
| 143 | } |
| 144 | any_change |
| 145 | } |
| 146 | |
| 147 | /// Drop block parameters whose `ValueId` is unused, and rewrite every |
| 148 | /// predecessor's branch arg list to delete the matching slot. The |
| 149 | /// entry block is exempt (entry blocks aren't allowed to have |
| 150 | /// parameters anyway). |
| 151 | /// |
| 152 | /// Returns `true` if at least one parameter was removed. |
| 153 | /// |
| 154 | /// Audit B-5: the previous implementation walked `func.blocks` once |
| 155 | /// per `(target, idx)` pair via linear `find` — O(N²·K) for N blocks |
| 156 | /// and K dead params. The new version builds a `BlockId → vec_index` |
| 157 | /// map upfront so the per-pair work is O(N). |
| 158 | fn remove_dead_block_params(func: &mut Function) -> bool { |
| 159 | let live = collect_live_uses(func); |
| 160 | |
| 161 | let mut by_block: HashMap<BlockId, Vec<usize>> = HashMap::new(); |
| 162 | for block in func.blocks.iter() { |
| 163 | if block.id == func.entry { |
| 164 | continue; |
| 165 | } |
| 166 | for (idx, p) in block.params.iter().enumerate() { |
| 167 | if !live.contains(&p.id) { |
| 168 | by_block.entry(block.id).or_default().push(idx); |
| 169 | } |
| 170 | } |
| 171 | } |
| 172 | if by_block.is_empty() { |
| 173 | return false; |
| 174 | } |
| 175 | |
| 176 | // Map BlockId → vec index. LICM/DCE never structurally reorder |
| 177 | // `func.blocks`; only the inst vectors change. So this stays |
| 178 | // valid for the lifetime of this call. |
| 179 | let block_index: HashMap<BlockId, usize> = func |
| 180 | .blocks |
| 181 | .iter() |
| 182 | .enumerate() |
| 183 | .map(|(i, b)| (b.id, i)) |
| 184 | .collect(); |
| 185 | |
| 186 | for (target_block, mut idxs) in by_block { |
| 187 | idxs.sort_by(|a, b| b.cmp(a)); // descending — high indices first so removals don't shift later ones |
| 188 | let target_idx = block_index[&target_block]; |
| 189 | for idx in idxs { |
| 190 | // Remove the parameter at `idx` from `target_block`. |
| 191 | let blk = &mut func.blocks[target_idx]; |
| 192 | if idx < blk.params.len() { |
| 193 | blk.params.remove(idx); |
| 194 | } |
| 195 | // Drop the corresponding arg slot from every predecessor's |
| 196 | // terminator that branches into `target_block`. We still |
| 197 | // walk every block here because we don't keep an explicit |
| 198 | // pred map (build one if profiling shows this dominates). |
| 199 | for src in func.blocks.iter_mut() { |
| 200 | if let Some(term) = &mut src.terminator { |
| 201 | drop_branch_arg(term, target_block, idx); |
| 202 | } |
| 203 | } |
| 204 | } |
| 205 | } |
| 206 | true |
| 207 | } |
| 208 | |
| 209 | /// In-place: if `term` branches to `target`, remove the arg at `idx` |
| 210 | /// from the corresponding arg vector. |
| 211 | /// |
| 212 | /// Audit B-10: the previous version silently no-op'd when the index |
| 213 | /// was out of range. A mismatch between target params and predecessor |
| 214 | /// args is a real IR validity violation, so we now `debug_assert!` on |
| 215 | /// it instead. The release build still degrades gracefully (we don't |
| 216 | /// want to crash production compiles on a stale arg list — the |
| 217 | /// verifier will reject the resulting IR anyway), but a debug build |
| 218 | /// will surface the bug at the source. |
| 219 | fn drop_branch_arg(term: &mut Terminator, target: BlockId, idx: usize) { |
| 220 | match term { |
| 221 | Terminator::Branch(d, args) if *d == target => { |
| 222 | debug_assert!( |
| 223 | idx < args.len(), |
| 224 | "drop_branch_arg: branch arg list out of sync with target params" |
| 225 | ); |
| 226 | if idx < args.len() { |
| 227 | args.remove(idx); |
| 228 | } |
| 229 | } |
| 230 | Terminator::CondBranch { |
| 231 | true_dest, |
| 232 | true_args, |
| 233 | false_dest, |
| 234 | false_args, |
| 235 | .. |
| 236 | } => { |
| 237 | if *true_dest == target { |
| 238 | debug_assert!( |
| 239 | idx < true_args.len(), |
| 240 | "drop_branch_arg: cond_branch true_args out of sync" |
| 241 | ); |
| 242 | if idx < true_args.len() { |
| 243 | true_args.remove(idx); |
| 244 | } |
| 245 | } |
| 246 | if *false_dest == target { |
| 247 | debug_assert!( |
| 248 | idx < false_args.len(), |
| 249 | "drop_branch_arg: cond_branch false_args out of sync" |
| 250 | ); |
| 251 | if idx < false_args.len() { |
| 252 | false_args.remove(idx); |
| 253 | } |
| 254 | } |
| 255 | } |
| 256 | Terminator::Switch { cases, default, .. } => { |
| 257 | // Switch targets cannot have block params per the |
| 258 | // verifier (`check_branch_args` rejects them). Audit |
| 259 | // M-6: if we ever see a Switch that branches into a |
| 260 | // block we're stripping params from, that's a real IR |
| 261 | // validity bug. Assert instead of silently skipping. |
| 262 | let touches_target = cases.iter().any(|(_, b)| *b == target) || *default == target; |
| 263 | debug_assert!( |
| 264 | !touches_target, |
| 265 | "drop_branch_arg: Switch terminator branches to a block with params \ |
| 266 | — verifier should reject this construction" |
| 267 | ); |
| 268 | } |
| 269 | _ => {} |
| 270 | } |
| 271 | } |
| 272 | |
| 273 | pub struct Dce; |
| 274 | |
| 275 | impl Pass for Dce { |
| 276 | fn name(&self) -> &'static str { |
| 277 | "dce" |
| 278 | } |
| 279 | fn run(&self, module: &mut Module) -> bool { |
| 280 | let pure_internal_calls: Vec<bool> = |
| 281 | module.functions.iter().map(|func| func.is_pure).collect(); |
| 282 | let mut changed = false; |
| 283 | for func in &mut module.functions { |
| 284 | if dce_function(func, &pure_internal_calls) { |
| 285 | changed = true; |
| 286 | } |
| 287 | } |
| 288 | changed |
| 289 | } |
| 290 | } |
| 291 | |
| 292 | #[cfg(test)] |
| 293 | mod tests { |
| 294 | use super::*; |
| 295 | use crate::ir::types::{FloatWidth, IntWidth, IrType}; |
| 296 | use crate::lexer::{Position, Span}; |
| 297 | |
| 298 | fn dummy_span() -> Span { |
| 299 | let p = Position { line: 1, col: 1 }; |
| 300 | Span { |
| 301 | start: p, |
| 302 | end: p, |
| 303 | file_id: 0, |
| 304 | } |
| 305 | } |
| 306 | |
| 307 | fn push(f: &mut Function, kind: InstKind, ty: IrType) -> ValueId { |
| 308 | let id = f.next_value_id(); |
| 309 | let entry = f.entry; |
| 310 | f.block_mut(entry).insts.push(Inst { |
| 311 | id, |
| 312 | kind, |
| 313 | ty, |
| 314 | span: dummy_span(), |
| 315 | }); |
| 316 | id |
| 317 | } |
| 318 | |
| 319 | #[test] |
| 320 | fn removes_unused_const() { |
| 321 | let mut m = Module::new("t".into()); |
| 322 | let mut f = Function::new("f".into(), vec![], IrType::Void); |
| 323 | push( |
| 324 | &mut f, |
| 325 | InstKind::ConstInt(7, IntWidth::I32), |
| 326 | IrType::Int(IntWidth::I32), |
| 327 | ); |
| 328 | let entry = f.entry; |
| 329 | f.block_mut(entry).terminator = Some(Terminator::Return(None)); |
| 330 | m.add_function(f); |
| 331 | |
| 332 | assert!(Dce.run(&mut m)); |
| 333 | assert!(m.functions[0].blocks[0].insts.is_empty()); |
| 334 | } |
| 335 | |
| 336 | #[test] |
| 337 | fn keeps_const_used_by_return() { |
| 338 | let mut m = Module::new("t".into()); |
| 339 | let mut f = Function::new("f".into(), vec![], IrType::Int(IntWidth::I32)); |
| 340 | let v = push( |
| 341 | &mut f, |
| 342 | InstKind::ConstInt(7, IntWidth::I32), |
| 343 | IrType::Int(IntWidth::I32), |
| 344 | ); |
| 345 | let entry = f.entry; |
| 346 | f.block_mut(entry).terminator = Some(Terminator::Return(Some(v))); |
| 347 | m.add_function(f); |
| 348 | |
| 349 | assert!(!Dce.run(&mut m)); |
| 350 | assert_eq!(m.functions[0].blocks[0].insts.len(), 1); |
| 351 | } |
| 352 | |
| 353 | #[test] |
| 354 | fn keeps_store_even_if_address_unused() { |
| 355 | // Alloca + Store should both stay even though no one loads. |
| 356 | let mut m = Module::new("t".into()); |
| 357 | let mut f = Function::new("f".into(), vec![], IrType::Void); |
| 358 | let addr = push( |
| 359 | &mut f, |
| 360 | InstKind::Alloca(IrType::Int(IntWidth::I32)), |
| 361 | IrType::Ptr(Box::new(IrType::Int(IntWidth::I32))), |
| 362 | ); |
| 363 | let v = push( |
| 364 | &mut f, |
| 365 | InstKind::ConstInt(1, IntWidth::I32), |
| 366 | IrType::Int(IntWidth::I32), |
| 367 | ); |
| 368 | push(&mut f, InstKind::Store(v, addr), IrType::Void); |
| 369 | let entry = f.entry; |
| 370 | f.block_mut(entry).terminator = Some(Terminator::Return(None)); |
| 371 | m.add_function(f); |
| 372 | |
| 373 | assert!(!Dce.run(&mut m)); |
| 374 | assert_eq!(m.functions[0].blocks[0].insts.len(), 3); |
| 375 | } |
| 376 | |
| 377 | #[test] |
| 378 | fn cascades_through_chain() { |
| 379 | // %1 = const 3 |
| 380 | // %2 = const 4 |
| 381 | // %3 = iadd %1, %2 ; unused |
| 382 | // %4 = fmul ... ; uses %3 transitively? No — distinct chain |
| 383 | // After DCE: only the terminator remains. |
| 384 | let mut m = Module::new("t".into()); |
| 385 | let mut f = Function::new("f".into(), vec![], IrType::Void); |
| 386 | let a = push( |
| 387 | &mut f, |
| 388 | InstKind::ConstInt(3, IntWidth::I32), |
| 389 | IrType::Int(IntWidth::I32), |
| 390 | ); |
| 391 | let b = push( |
| 392 | &mut f, |
| 393 | InstKind::ConstInt(4, IntWidth::I32), |
| 394 | IrType::Int(IntWidth::I32), |
| 395 | ); |
| 396 | let c = push(&mut f, InstKind::IAdd(a, b), IrType::Int(IntWidth::I32)); |
| 397 | let _d = push(&mut f, InstKind::INeg(c), IrType::Int(IntWidth::I32)); |
| 398 | let entry = f.entry; |
| 399 | f.block_mut(entry).terminator = Some(Terminator::Return(None)); |
| 400 | m.add_function(f); |
| 401 | |
| 402 | assert!(Dce.run(&mut m)); |
| 403 | assert!(m.functions[0].blocks[0].insts.is_empty()); |
| 404 | } |
| 405 | |
| 406 | #[test] |
| 407 | fn keeps_call_even_if_result_unused() { |
| 408 | let mut m = Module::new("t".into()); |
| 409 | let mut f = Function::new("f".into(), vec![], IrType::Void); |
| 410 | push( |
| 411 | &mut f, |
| 412 | InstKind::RuntimeCall(RuntimeFunc::PrintNewline, vec![]), |
| 413 | IrType::Void, |
| 414 | ); |
| 415 | let entry = f.entry; |
| 416 | f.block_mut(entry).terminator = Some(Terminator::Return(None)); |
| 417 | m.add_function(f); |
| 418 | |
| 419 | assert!(!Dce.run(&mut m)); |
| 420 | assert_eq!(m.functions[0].blocks[0].insts.len(), 1); |
| 421 | } |
| 422 | |
| 423 | #[test] |
| 424 | fn removes_unused_internal_pure_call() { |
| 425 | let mut m = Module::new("t".into()); |
| 426 | |
| 427 | let mut callee = Function::new("pure_fn".into(), vec![], IrType::Int(IntWidth::I32)); |
| 428 | callee.is_pure = true; |
| 429 | let c = push( |
| 430 | &mut callee, |
| 431 | InstKind::ConstInt(7, IntWidth::I32), |
| 432 | IrType::Int(IntWidth::I32), |
| 433 | ); |
| 434 | let callee_entry = callee.entry; |
| 435 | callee.block_mut(callee_entry).terminator = Some(Terminator::Return(Some(c))); |
| 436 | m.add_function(callee); |
| 437 | |
| 438 | let mut caller = Function::new("caller".into(), vec![], IrType::Void); |
| 439 | push( |
| 440 | &mut caller, |
| 441 | InstKind::Call(FuncRef::Internal(0), vec![]), |
| 442 | IrType::Int(IntWidth::I32), |
| 443 | ); |
| 444 | let caller_entry = caller.entry; |
| 445 | caller.block_mut(caller_entry).terminator = Some(Terminator::Return(None)); |
| 446 | m.add_function(caller); |
| 447 | |
| 448 | assert!(Dce.run(&mut m)); |
| 449 | assert!( |
| 450 | m.functions[1].blocks[0].insts.is_empty(), |
| 451 | "unused PURE call should be removed" |
| 452 | ); |
| 453 | } |
| 454 | |
| 455 | #[test] |
| 456 | fn float_chain_is_dead() { |
| 457 | let mut m = Module::new("t".into()); |
| 458 | let mut f = Function::new("f".into(), vec![], IrType::Void); |
| 459 | let a = push( |
| 460 | &mut f, |
| 461 | InstKind::ConstFloat(1.5, FloatWidth::F64), |
| 462 | IrType::Float(FloatWidth::F64), |
| 463 | ); |
| 464 | let b = push( |
| 465 | &mut f, |
| 466 | InstKind::ConstFloat(2.5, FloatWidth::F64), |
| 467 | IrType::Float(FloatWidth::F64), |
| 468 | ); |
| 469 | let _ = push(&mut f, InstKind::FAdd(a, b), IrType::Float(FloatWidth::F64)); |
| 470 | let entry = f.entry; |
| 471 | f.block_mut(entry).terminator = Some(Terminator::Return(None)); |
| 472 | m.add_function(f); |
| 473 | |
| 474 | assert!(Dce.run(&mut m)); |
| 475 | assert!(m.functions[0].blocks[0].insts.is_empty()); |
| 476 | } |
| 477 | } |
| 478 |