armfortas Public

Watch 0 Fork 0 Star 0

Rust · 16105 bytes Raw Blame History

  
        1
        //! Data dependence analysis for loop transformations.
      
        2
        //!
      
        3
        //! Determines whether two memory accesses inside a loop (or loop nest)
      
        4
        //! can touch the same location on different iterations. Uses the GCD
      
        5
        //! test on affine subscript expressions extracted from GEP indices.
      
        6
        //!
      
        7
        //! ## Fortran-specific simplifications
      
        8
        //!
      
        9
        //! - Distinct array base pointers → always independent (Fortran standard
      
        10
        //!   prohibits aliasing between distinct named arrays absent EQUIVALENCE).
      
        11
        //! - Column-major strides are compile-time constants for fixed-shape arrays.
      
        12
        //! - INTENT(IN) arguments cannot alias INTENT(OUT) arguments.
      
        13
        
        14
        use std::collections::HashSet;
      
        15
        use crate::ir::inst::*;
      
        16
        
        17
        // ---------------------------------------------------------------------------
      
        18
        // Data structures
      
        19
        // ---------------------------------------------------------------------------
      
        20
        
        21
        /// An affine expression: `constant + sum(coefficient * iv)`.
      
        22
        /// The `terms` vector maps induction variables to their coefficients.
      
        23
        #[derive(Debug, Clone)]
      
        24
        pub struct AffineExpr {
      
        25
            pub constant: i64,
      
        26
            pub terms: Vec<(i64, ValueId)>,  // (coefficient, iv)
      
        27
        }
      
        28
        
        29
        impl AffineExpr {
      
        30
            fn zero() -> Self { Self { constant: 0, terms: Vec::new() } }
      
        31
        
        32
            fn from_const(c: i64) -> Self { Self { constant: c, terms: Vec::new() } }
      
        33
        
        34
            fn from_iv(iv: ValueId) -> Self {
      
        35
                Self { constant: 0, terms: vec![(1, iv)] }
      
        36
            }
      
        37
        
        38
            fn add(&self, other: &Self) -> Self {
      
        39
                let mut result = self.clone();
      
        40
                result.constant += other.constant;
      
        41
                for &(coeff, iv) in &other.terms {
      
        42
                    if let Some(entry) = result.terms.iter_mut().find(|(_, v)| *v == iv) {
      
        43
                        entry.0 += coeff;
      
        44
                    } else {
      
        45
                        result.terms.push((coeff, iv));
      
        46
                    }
      
        47
                }
      
        48
                // Remove zero-coefficient terms.
      
        49
                result.terms.retain(|(c, _)| *c != 0);
      
        50
                result
      
        51
            }
      
        52
        
        53
            fn sub(&self, other: &Self) -> Self {
      
        54
                let negated = AffineExpr {
      
        55
                    constant: -other.constant,
      
        56
                    terms: other.terms.iter().map(|(c, v)| (-c, *v)).collect(),
      
        57
                };
      
        58
                self.add(&negated)
      
        59
            }
      
        60
        
        61
            fn scale(&self, factor: i64) -> Self {
      
        62
                AffineExpr {
      
        63
                    constant: self.constant * factor,
      
        64
                    terms: self.terms.iter().map(|(c, v)| (c * factor, *v)).collect(),
      
        65
                }
      
        66
            }
      
        67
        }
      
        68
        
        69
        /// A memory reference extracted from loop body.
      
        70
        #[derive(Debug, Clone)]
      
        71
        pub struct MemRef {
      
        72
            pub inst_id: ValueId,
      
        73
            pub base: ValueId,
      
        74
            pub subscript: AffineExpr,
      
        75
            pub is_write: bool,
      
        76
        }
      
        77
        
        78
        /// Result of dependence testing between two memory references.
      
        79
        #[derive(Debug, Clone)]
      
        80
        pub struct DepResult {
      
        81
            /// True if the references may access the same element on different iterations.
      
        82
            pub dependent: bool,
      
        83
        }
      
        84
        
        85
        // ---------------------------------------------------------------------------
      
        86
        // Affine expression extraction
      
        87
        // ---------------------------------------------------------------------------
      
        88
        
        89
        /// Extract an affine expression from a GEP index by walking backwards
      
        90
        /// through arithmetic instructions. `ivs` is the set of known induction
      
        91
        /// variables for the enclosing loop nest.
      
        92
        pub fn extract_affine(
      
        93
            func: &Function,
      
        94
            val: ValueId,
      
        95
            ivs: &HashSet<ValueId>,
      
        96
        ) -> Option<AffineExpr> {
      
        97
            // Is this an IV?
      
        98
            if ivs.contains(&val) {
      
        99
                return Some(AffineExpr::from_iv(val));
      
        100
            }
      
        101
        
        102
            // Find the instruction that defines this value.
      
        103
            let inst = find_inst(func, val)?;
      
        104
            match &inst.kind {
      
        105
                InstKind::ConstInt(c, _) => i64::try_from(*c).ok().map(AffineExpr::from_const),
      
        106
        
        107
                InstKind::IAdd(a, b) => {
      
        108
                    let ea = extract_affine(func, *a, ivs)?;
      
        109
                    let eb = extract_affine(func, *b, ivs)?;
      
        110
                    Some(ea.add(&eb))
      
        111
                }
      
        112
        
        113
                InstKind::ISub(a, b) => {
      
        114
                    let ea = extract_affine(func, *a, ivs)?;
      
        115
                    let eb = extract_affine(func, *b, ivs)?;
      
        116
                    Some(ea.sub(&eb))
      
        117
                }
      
        118
        
        119
                InstKind::IMul(a, b) => {
      
        120
                    // One operand must be a constant for the result to be affine.
      
        121
                    if let Some(ca) = resolve_const(func, *a) {
      
        122
                        let eb = extract_affine(func, *b, ivs)?;
      
        123
                        Some(eb.scale(ca))
      
        124
                    } else if let Some(cb) = resolve_const(func, *b) {
      
        125
                        let ea = extract_affine(func, *a, ivs)?;
      
        126
                        Some(ea.scale(cb))
      
        127
                    } else {
      
        128
                        None // non-affine (product of two non-constants)
      
        129
                    }
      
        130
                }
      
        131
        
        132
                InstKind::IntExtend(a, _, _) => extract_affine(func, *a, ivs),
      
        133
        
        134
                _ => {
      
        135
                    // Value defined outside the loop (e.g., function param, alloca).
      
        136
                    // If it's not an IV and not computable from IVs, treat as unknown.
      
        137
                    // Conservative: return None (non-affine).
      
        138
                    None
      
        139
                }
      
        140
            }
      
        141
        }
      
        142
        
        143
        fn resolve_const(func: &Function, vid: ValueId) -> Option<i64> {
      
        144
            let inst = find_inst(func, vid)?;
      
        145
            if let InstKind::ConstInt(c, _) = &inst.kind { i64::try_from(*c).ok() } else { None }
      
        146
        }
      
        147
        
        148
        fn find_inst(func: &Function, vid: ValueId) -> Option<&Inst> {
      
        149
            for block in &func.blocks {
      
        150
                for inst in &block.insts {
      
        151
                    if inst.id == vid { return Some(inst); }
      
        152
                }
      
        153
            }
      
        154
            None
      
        155
        }
      
        156
        
        157
        // ---------------------------------------------------------------------------
      
        158
        // Memory reference collection
      
        159
        // ---------------------------------------------------------------------------
      
        160
        
        161
        /// Collect all memory references (loads and stores through GEPs) in a
      
        162
        /// set of blocks.
      
        163
        pub fn collect_mem_refs(
      
        164
            func: &Function,
      
        165
            blocks: &HashSet<BlockId>,
      
        166
            ivs: &HashSet<ValueId>,
      
        167
        ) -> Vec<MemRef> {
      
        168
            let mut refs = Vec::new();
      
        169
            for &bid in blocks {
      
        170
                let block = func.block(bid);
      
        171
                for inst in &block.insts {
      
        172
                    match &inst.kind {
      
        173
                        InstKind::Store(_, ptr) => {
      
        174
                            if let Some(mr) = extract_mem_ref(func, inst.id, *ptr, true, ivs) {
      
        175
                                refs.push(mr);
      
        176
                            }
      
        177
                        }
      
        178
                        InstKind::Load(ptr) => {
      
        179
                            if let Some(mr) = extract_mem_ref(func, inst.id, *ptr, false, ivs) {
      
        180
                                refs.push(mr);
      
        181
                            }
      
        182
                        }
      
        183
                        _ => {}
      
        184
                    }
      
        185
                }
      
        186
            }
      
        187
            refs
      
        188
        }
      
        189
        
        190
        fn extract_mem_ref(
      
        191
            func: &Function,
      
        192
            inst_id: ValueId,
      
        193
            ptr: ValueId,
      
        194
            is_write: bool,
      
        195
            ivs: &HashSet<ValueId>,
      
        196
        ) -> Option<MemRef> {
      
        197
            // The pointer should be a GEP.
      
        198
            let gep_inst = find_inst(func, ptr)?;
      
        199
            let (base, indices) = match &gep_inst.kind {
      
        200
                InstKind::GetElementPtr(b, idxs) => (*b, idxs.clone()),
      
        201
                _ => return None,
      
        202
            };
      
        203
            // We work with the flat offset (single index for 1D GEP).
      
        204
            let idx = indices.first()?;
      
        205
            let subscript = extract_affine(func, *idx, ivs)?;
      
        206
            Some(MemRef { inst_id, base, subscript, is_write })
      
        207
        }
      
        208
        
        209
        // ---------------------------------------------------------------------------
      
        210
        // GCD test
      
        211
        // ---------------------------------------------------------------------------
      
        212
        
        213
        /// Test whether two memory references can access the same element on
      
        214
        /// different iterations.
      
        215
        ///
      
        216
        /// GCD test: given `f(I) = sum(a_k * i_k) + c1` and
      
        217
        /// `g(I) = sum(b_k * i_k) + c2`, a dependence is possible only if
      
        218
        /// `gcd(a_1, ..., b_1, ...) | (c2 - c1)`.
      
        219
        pub fn test_dependence(ref_a: &MemRef, ref_b: &MemRef) -> DepResult {
      
        220
            // Fortran no-alias: distinct array bases → independent.
      
        221
            if ref_a.base != ref_b.base {
      
        222
                return DepResult { dependent: false };
      
        223
            }
      
        224
        
        225
            // Compute the difference of the two affine expressions.
      
        226
            let diff = ref_b.subscript.sub(&ref_a.subscript);
      
        227
        
        228
            // If there are no IV terms, the accesses are at a fixed distance.
      
        229
            // If the constant is 0, they access the same element (same iteration = ok
      
        230
            // unless both are writes). If non-zero, they access different elements.
      
        231
            if diff.terms.is_empty() {
      
        232
                return DepResult { dependent: diff.constant == 0 };
      
        233
            }
      
        234
        
        235
            // GCD of all IV coefficients in the difference.
      
        236
            let g = diff.terms.iter()
      
        237
                .map(|(c, _)| c.unsigned_abs())
      
        238
                .fold(0u64, gcd);
      
        239
        
        240
            if g == 0 {
      
        241
                // All coefficients are zero — same as fixed-distance case.
      
        242
                return DepResult { dependent: diff.constant == 0 };
      
        243
            }
      
        244
        
        245
            // GCD test: if gcd does not divide the constant difference,
      
        246
            // the accesses NEVER touch the same element → independent.
      
        247
            let dependent = diff.constant.unsigned_abs().is_multiple_of(g);
      
        248
            DepResult { dependent }
      
        249
        }
      
        250
        
        251
        fn gcd(a: u64, b: u64) -> u64 {
      
        252
            if b == 0 { a } else { gcd(b, a % b) }
      
        253
        }
      
        254
        
        255
        // ---------------------------------------------------------------------------
      
        256
        // High-level queries for loop passes
      
        257
        // ---------------------------------------------------------------------------
      
        258
        
        259
        /// Check if two adjacent loop bodies can be legally fused.
      
        260
        ///
      
        261
        /// Fusion is safe when no cross-loop dependence would be reversed.
      
        262
        /// Same-iteration dependencies (subscript diff = 0) are ALWAYS
      
        263
        /// fusion-legal: the write in A's body precedes the read in B's
      
        264
        /// body within the same fused iteration. Only cross-iteration
      
        265
        /// backward dependencies (B writes, A reads at a later iteration)
      
        266
        /// would be reversed by fusion — and those can't exist between
      
        267
        /// adjacent loops that don't share a carried state.
      
        268
        /// Check if two adjacent loop bodies can be legally fused.
      
        269
        ///
      
        270
        /// `iv_a` and `iv_b` are the IVs of the two loops — they iterate
      
        271
        /// over the same range, so for dependence purposes they are the
      
        272
        /// same variable. We remap `iv_b → iv_a` in B's subscripts before
      
        273
        /// comparing.
      
        274
        pub fn fusion_legal(
      
        275
            func: &Function,
      
        276
            body_a: &HashSet<BlockId>,
      
        277
            body_b: &HashSet<BlockId>,
      
        278
            iv_a: ValueId,
      
        279
            iv_b: ValueId,
      
        280
        ) -> bool {
      
        281
            let mut ivs_a = HashSet::new();
      
        282
            ivs_a.insert(iv_a);
      
        283
            let mut ivs_b = HashSet::new();
      
        284
            ivs_b.insert(iv_b);
      
        285
        
        286
            let refs_a = collect_mem_refs(func, body_a, &ivs_a);
      
        287
            let refs_b_raw = collect_mem_refs(func, body_b, &ivs_b);
      
        288
        
        289
            // Remap iv_b → iv_a in B's subscripts so the comparison works.
      
        290
            let refs_b: Vec<MemRef> = refs_b_raw.into_iter().map(|mut r| {
      
        291
                for term in &mut r.subscript.terms {
      
        292
                    if term.1 == iv_b { term.1 = iv_a; }
      
        293
                }
      
        294
                r
      
        295
            }).collect();
      
        296
        
        297
            for ra in &refs_a {
      
        298
                for rb in &refs_b {
      
        299
                    if !ra.is_write && !rb.is_write { continue; }
      
        300
                    if ra.base != rb.base { continue; }
      
        301
        
        302
                    let diff = rb.subscript.sub(&ra.subscript);
      
        303
        
        304
                    // Same-iteration access (diff = 0) → fusion-legal.
      
        305
                    if diff.terms.is_empty() && diff.constant == 0 {
      
        306
                        continue;
      
        307
                    }
      
        308
        
        309
                    // Non-zero distance → conservative reject.
      
        310
                    return false;
      
        311
                }
      
        312
            }
      
        313
            true
      
        314
        }
      
        315
        
        316
        /// Check if interchanging (outer, inner) preserves correctness.
      
        317
        /// Conservative: any carried dependence within the inner body that
      
        318
        /// involves both IVs may change direction after interchange.
      
        319
        pub fn interchange_legal(
      
        320
            func: &Function,
      
        321
            inner_body: &HashSet<BlockId>,
      
        322
            outer_iv: ValueId,
      
        323
            inner_iv: ValueId,
      
        324
        ) -> bool {
      
        325
            let mut ivs = HashSet::new();
      
        326
            ivs.insert(outer_iv);
      
        327
            ivs.insert(inner_iv);
      
        328
        
        329
            let refs = collect_mem_refs(func, inner_body, &ivs);
      
        330
        
        331
            // For each pair of refs where at least one is a write:
      
        332
            for i in 0..refs.len() {
      
        333
                for j in (i+1)..refs.len() {
      
        334
                    if !refs[i].is_write && !refs[j].is_write { continue; }
      
        335
                    if refs[i].base != refs[j].base { continue; }
      
        336
        
        337
                    // Both refs share a base and at least one is a write.
      
        338
                    // Check if the subscript difference has non-zero
      
        339
                    // coefficients for BOTH IVs — if so, interchange might
      
        340
                    // reverse the dependence direction.
      
        341
                    let diff = refs[j].subscript.sub(&refs[i].subscript);
      
        342
                    let has_outer = diff.terms.iter().any(|(c, v)| *v == outer_iv && *c != 0);
      
        343
                    let has_inner = diff.terms.iter().any(|(c, v)| *v == inner_iv && *c != 0);
      
        344
                    if has_outer && has_inner {
      
        345
                        // Dependence involves both IVs — interchange could reverse
      
        346
                        // the direction. Conservative: reject.
      
        347
                        return false;
      
        348
                    }
      
        349
                }
      
        350
            }
      
        351
            true
      
        352
        }
      
        353
        
        354
        // ---------------------------------------------------------------------------
      
        355
        // Tests
      
        356
        // ---------------------------------------------------------------------------
      
        357
        
        358
        #[cfg(test)]
      
        359
        mod tests {
      
        360
            use super::*;
      
        361
        
        362
            #[test]
      
        363
            fn gcd_basic() {
      
        364
                assert_eq!(gcd(12, 8), 4);
      
        365
                assert_eq!(gcd(7, 3), 1);
      
        366
                assert_eq!(gcd(0, 5), 5);
      
        367
                assert_eq!(gcd(10, 0), 10);
      
        368
            }
      
        369
        
        370
            #[test]
      
        371
            fn affine_add_sub() {
      
        372
                let iv = ValueId(100);
      
        373
                let a = AffineExpr { constant: 3, terms: vec![(2, iv)] };
      
        374
                let b = AffineExpr { constant: 1, terms: vec![(1, iv)] };
      
        375
                let sum = a.add(&b);
      
        376
                assert_eq!(sum.constant, 4);
      
        377
                assert_eq!(sum.terms.len(), 1);
      
        378
                assert_eq!(sum.terms[0], (3, iv));
      
        379
        
        380
                let diff = a.sub(&b);
      
        381
                assert_eq!(diff.constant, 2);
      
        382
                assert_eq!(diff.terms.len(), 1);
      
        383
                assert_eq!(diff.terms[0], (1, iv));
      
        384
            }
      
        385
        
        386
            #[test]
      
        387
            fn affine_scale() {
      
        388
                let iv = ValueId(100);
      
        389
                let a = AffineExpr { constant: 2, terms: vec![(3, iv)] };
      
        390
                let scaled = a.scale(4);
      
        391
                assert_eq!(scaled.constant, 8);
      
        392
                assert_eq!(scaled.terms[0], (12, iv));
      
        393
            }
      
        394
        
        395
            #[test]
      
        396
            fn gcd_test_independent() {
      
        397
                // a(2i+1) vs a(2i+2): diff = 2i+2 - (2i+1) = 1 (constant).
      
        398
                // No IV terms in diff → dependent only if constant is 0.
      
        399
                // constant = 1 ≠ 0 → independent.
      
        400
                let iv = ValueId(100);
      
        401
                let ref_a = MemRef {
      
        402
                    inst_id: ValueId(0), base: ValueId(50),
      
        403
                    subscript: AffineExpr { constant: 1, terms: vec![(2, iv)] },
      
        404
                    is_write: true,
      
        405
                };
      
        406
                let ref_b = MemRef {
      
        407
                    inst_id: ValueId(1), base: ValueId(50),
      
        408
                    subscript: AffineExpr { constant: 2, terms: vec![(2, iv)] },
      
        409
                    is_write: false,
      
        410
                };
      
        411
                let dep = test_dependence(&ref_a, &ref_b);
      
        412
                assert!(!dep.dependent, "a(2i+1) and a(2i+2) should be independent");
      
        413
            }
      
        414
        
        415
            #[test]
      
        416
            fn gcd_test_dependent() {
      
        417
                // a(i) vs a(i): same subscript → always dependent.
      
        418
                let iv = ValueId(100);
      
        419
                let ref_a = MemRef {
      
        420
                    inst_id: ValueId(0), base: ValueId(50),
      
        421
                    subscript: AffineExpr { constant: 0, terms: vec![(1, iv)] },
      
        422
                    is_write: true,
      
        423
                };
      
        424
                let ref_b = MemRef {
      
        425
                    inst_id: ValueId(1), base: ValueId(50),
      
        426
                    subscript: AffineExpr { constant: 0, terms: vec![(1, iv)] },
      
        427
                    is_write: false,
      
        428
                };
      
        429
                let dep = test_dependence(&ref_a, &ref_b);
      
        430
                assert!(dep.dependent, "a(i) and a(i) should be dependent");
      
        431
            }
      
        432
        
        433
            #[test]
      
        434
            fn gcd_test_different_stride() {
      
        435
                // a(3i) vs a(3i+1): diff has constant 1, gcd of coefficients = 0
      
        436
                // (they cancel: 3-3=0). So diff = constant 1, no terms → independent.
      
        437
                let iv = ValueId(100);
      
        438
                let ref_a = MemRef {
      
        439
                    inst_id: ValueId(0), base: ValueId(50),
      
        440
                    subscript: AffineExpr { constant: 0, terms: vec![(3, iv)] },
      
        441
                    is_write: true,
      
        442
                };
      
        443
                let ref_b = MemRef {
      
        444
                    inst_id: ValueId(1), base: ValueId(50),
      
        445
                    subscript: AffineExpr { constant: 1, terms: vec![(3, iv)] },
      
        446
                    is_write: false,
      
        447
                };
      
        448
                let dep = test_dependence(&ref_a, &ref_b);
      
        449
                assert!(!dep.dependent, "a(3i) and a(3i+1) should be independent by GCD test");
      
        450
            }
      
        451
        
        452
            #[test]
      
        453
            fn distinct_bases_independent() {
      
        454
                let iv = ValueId(100);
      
        455
                let ref_a = MemRef {
      
        456
                    inst_id: ValueId(0), base: ValueId(50),
      
        457
                    subscript: AffineExpr { constant: 0, terms: vec![(1, iv)] },
      
        458
                    is_write: true,
      
        459
                };
      
        460
                let ref_b = MemRef {
      
        461
                    inst_id: ValueId(1), base: ValueId(60), // different base
      
        462
                    subscript: AffineExpr { constant: 0, terms: vec![(1, iv)] },
      
        463
                    is_write: false,
      
        464
                };
      
        465
                let dep = test_dependence(&ref_a, &ref_b);
      
        466
                assert!(!dep.dependent, "distinct bases should be independent (Fortran no-alias)");
      
        467
            }
      
        468
        }
      
        469

1	//! Data dependence analysis for loop transformations.
2	//!
3	//! Determines whether two memory accesses inside a loop (or loop nest)
4	//! can touch the same location on different iterations. Uses the GCD
5	//! test on affine subscript expressions extracted from GEP indices.
6	//!
7	//! ## Fortran-specific simplifications
8	//!
9	//! - Distinct array base pointers → always independent (Fortran standard
10	//! prohibits aliasing between distinct named arrays absent EQUIVALENCE).
11	//! - Column-major strides are compile-time constants for fixed-shape arrays.
12	//! - INTENT(IN) arguments cannot alias INTENT(OUT) arguments.
13
14	use std::collections::HashSet;
15	use crate::ir::inst::*;
16
17	// ---------------------------------------------------------------------------
18	// Data structures
19	// ---------------------------------------------------------------------------
20
21	/// An affine expression: `constant + sum(coefficient * iv)`.
22	/// The `terms` vector maps induction variables to their coefficients.
23	#[derive(Debug, Clone)]
24	pub struct AffineExpr {
25	pub constant: i64,
26	pub terms: Vec<(i64, ValueId)>, // (coefficient, iv)
27	}
28
29	impl AffineExpr {
30	fn zero() -> Self { Self { constant: 0, terms: Vec::new() } }
31
32	fn from_const(c: i64) -> Self { Self { constant: c, terms: Vec::new() } }
33
34	fn from_iv(iv: ValueId) -> Self {
35	Self { constant: 0, terms: vec![(1, iv)] }
36	}
37
38	fn add(&self, other: &Self) -> Self {
39	let mut result = self.clone();
40	result.constant += other.constant;
41	for &(coeff, iv) in &other.terms {
42	if let Some(entry) = result.terms.iter_mut().find(\|(_, v)\| *v == iv) {
43	entry.0 += coeff;
44	} else {
45	result.terms.push((coeff, iv));
46	}
47	}
48	// Remove zero-coefficient terms.
49	result.terms.retain(\|(c, _)\| *c != 0);
50	result
51	}
52
53	fn sub(&self, other: &Self) -> Self {
54	let negated = AffineExpr {
55	constant: -other.constant,
56	terms: other.terms.iter().map(\|(c, v)\| (-c, *v)).collect(),
57	};
58	self.add(&negated)
59	}
60
61	fn scale(&self, factor: i64) -> Self {
62	AffineExpr {
63	constant: self.constant * factor,
64	terms: self.terms.iter().map(\|(c, v)\| (c * factor, *v)).collect(),
65	}
66	}
67	}
68
69	/// A memory reference extracted from loop body.
70	#[derive(Debug, Clone)]
71	pub struct MemRef {
72	pub inst_id: ValueId,
73	pub base: ValueId,
74	pub subscript: AffineExpr,
75	pub is_write: bool,
76	}
77
78	/// Result of dependence testing between two memory references.
79	#[derive(Debug, Clone)]
80	pub struct DepResult {
81	/// True if the references may access the same element on different iterations.
82	pub dependent: bool,
83	}
84
85	// ---------------------------------------------------------------------------
86	// Affine expression extraction
87	// ---------------------------------------------------------------------------
88
89	/// Extract an affine expression from a GEP index by walking backwards
90	/// through arithmetic instructions. `ivs` is the set of known induction
91	/// variables for the enclosing loop nest.
92	pub fn extract_affine(
93	func: &Function,
94	val: ValueId,
95	ivs: &HashSet<ValueId>,
96	) -> Option<AffineExpr> {
97	// Is this an IV?
98	if ivs.contains(&val) {
99	return Some(AffineExpr::from_iv(val));
100	}
101
102	// Find the instruction that defines this value.
103	let inst = find_inst(func, val)?;
104	match &inst.kind {
105	InstKind::ConstInt(c, _) => i64::try_from(*c).ok().map(AffineExpr::from_const),
106
107	InstKind::IAdd(a, b) => {
108	let ea = extract_affine(func, *a, ivs)?;
109	let eb = extract_affine(func, *b, ivs)?;
110	Some(ea.add(&eb))
111	}
112
113	InstKind::ISub(a, b) => {
114	let ea = extract_affine(func, *a, ivs)?;
115	let eb = extract_affine(func, *b, ivs)?;
116	Some(ea.sub(&eb))
117	}
118
119	InstKind::IMul(a, b) => {
120	// One operand must be a constant for the result to be affine.
121	if let Some(ca) = resolve_const(func, *a) {
122	let eb = extract_affine(func, *b, ivs)?;
123	Some(eb.scale(ca))
124	} else if let Some(cb) = resolve_const(func, *b) {
125	let ea = extract_affine(func, *a, ivs)?;
126	Some(ea.scale(cb))
127	} else {
128	None // non-affine (product of two non-constants)
129	}
130	}
131
132	InstKind::IntExtend(a, _, _) => extract_affine(func, *a, ivs),
133
134	_ => {
135	// Value defined outside the loop (e.g., function param, alloca).
136	// If it's not an IV and not computable from IVs, treat as unknown.
137	// Conservative: return None (non-affine).
138	None
139	}
140	}
141	}
142
143	fn resolve_const(func: &Function, vid: ValueId) -> Option<i64> {
144	let inst = find_inst(func, vid)?;
145	if let InstKind::ConstInt(c, _) = &inst.kind { i64::try_from(*c).ok() } else { None }
146	}
147
148	fn find_inst(func: &Function, vid: ValueId) -> Option<&Inst> {
149	for block in &func.blocks {
150	for inst in &block.insts {
151	if inst.id == vid { return Some(inst); }
152	}
153	}
154	None
155	}
156
157	// ---------------------------------------------------------------------------
158	// Memory reference collection
159	// ---------------------------------------------------------------------------
160
161	/// Collect all memory references (loads and stores through GEPs) in a
162	/// set of blocks.
163	pub fn collect_mem_refs(
164	func: &Function,
165	blocks: &HashSet<BlockId>,
166	ivs: &HashSet<ValueId>,
167	) -> Vec<MemRef> {
168	let mut refs = Vec::new();
169	for &bid in blocks {
170	let block = func.block(bid);
171	for inst in &block.insts {
172	match &inst.kind {
173	InstKind::Store(_, ptr) => {
174	if let Some(mr) = extract_mem_ref(func, inst.id, *ptr, true, ivs) {
175	refs.push(mr);
176	}
177	}
178	InstKind::Load(ptr) => {
179	if let Some(mr) = extract_mem_ref(func, inst.id, *ptr, false, ivs) {
180	refs.push(mr);
181	}
182	}
183	_ => {}
184	}
185	}
186	}
187	refs
188	}
189
190	fn extract_mem_ref(
191	func: &Function,
192	inst_id: ValueId,
193	ptr: ValueId,
194	is_write: bool,
195	ivs: &HashSet<ValueId>,
196	) -> Option<MemRef> {
197	// The pointer should be a GEP.
198	let gep_inst = find_inst(func, ptr)?;
199	let (base, indices) = match &gep_inst.kind {
200	InstKind::GetElementPtr(b, idxs) => (*b, idxs.clone()),
201	_ => return None,
202	};
203	// We work with the flat offset (single index for 1D GEP).
204	let idx = indices.first()?;
205	let subscript = extract_affine(func, *idx, ivs)?;
206	Some(MemRef { inst_id, base, subscript, is_write })
207	}
208
209	// ---------------------------------------------------------------------------
210	// GCD test
211	// ---------------------------------------------------------------------------
212
213	/// Test whether two memory references can access the same element on
214	/// different iterations.
215	///
216	/// GCD test: given `f(I) = sum(a_k * i_k) + c1` and
217	/// `g(I) = sum(b_k * i_k) + c2`, a dependence is possible only if
218	/// `gcd(a_1, ..., b_1, ...) \| (c2 - c1)`.
219	pub fn test_dependence(ref_a: &MemRef, ref_b: &MemRef) -> DepResult {
220	// Fortran no-alias: distinct array bases → independent.
221	if ref_a.base != ref_b.base {
222	return DepResult { dependent: false };
223	}
224
225	// Compute the difference of the two affine expressions.
226	let diff = ref_b.subscript.sub(&ref_a.subscript);
227
228	// If there are no IV terms, the accesses are at a fixed distance.
229	// If the constant is 0, they access the same element (same iteration = ok
230	// unless both are writes). If non-zero, they access different elements.
231	if diff.terms.is_empty() {
232	return DepResult { dependent: diff.constant == 0 };
233	}
234
235	// GCD of all IV coefficients in the difference.
236	let g = diff.terms.iter()
237	.map(\|(c, _)\| c.unsigned_abs())
238	.fold(0u64, gcd);
239
240	if g == 0 {
241	// All coefficients are zero — same as fixed-distance case.
242	return DepResult { dependent: diff.constant == 0 };
243	}
244
245	// GCD test: if gcd does not divide the constant difference,
246	// the accesses NEVER touch the same element → independent.
247	let dependent = diff.constant.unsigned_abs().is_multiple_of(g);
248	DepResult { dependent }
249	}
250
251	fn gcd(a: u64, b: u64) -> u64 {
252	if b == 0 { a } else { gcd(b, a % b) }
253	}
254
255	// ---------------------------------------------------------------------------
256	// High-level queries for loop passes
257	// ---------------------------------------------------------------------------
258
259	/// Check if two adjacent loop bodies can be legally fused.
260	///
261	/// Fusion is safe when no cross-loop dependence would be reversed.
262	/// Same-iteration dependencies (subscript diff = 0) are ALWAYS
263	/// fusion-legal: the write in A's body precedes the read in B's
264	/// body within the same fused iteration. Only cross-iteration
265	/// backward dependencies (B writes, A reads at a later iteration)
266	/// would be reversed by fusion — and those can't exist between
267	/// adjacent loops that don't share a carried state.
268	/// Check if two adjacent loop bodies can be legally fused.
269	///
270	/// `iv_a` and `iv_b` are the IVs of the two loops — they iterate
271	/// over the same range, so for dependence purposes they are the
272	/// same variable. We remap `iv_b → iv_a` in B's subscripts before
273	/// comparing.
274	pub fn fusion_legal(
275	func: &Function,
276	body_a: &HashSet<BlockId>,
277	body_b: &HashSet<BlockId>,
278	iv_a: ValueId,
279	iv_b: ValueId,
280	) -> bool {
281	let mut ivs_a = HashSet::new();
282	ivs_a.insert(iv_a);
283	let mut ivs_b = HashSet::new();
284	ivs_b.insert(iv_b);
285
286	let refs_a = collect_mem_refs(func, body_a, &ivs_a);
287	let refs_b_raw = collect_mem_refs(func, body_b, &ivs_b);
288
289	// Remap iv_b → iv_a in B's subscripts so the comparison works.
290	let refs_b: Vec<MemRef> = refs_b_raw.into_iter().map(\|mut r\| {
291	for term in &mut r.subscript.terms {
292	if term.1 == iv_b { term.1 = iv_a; }
293	}
294	r
295	}).collect();
296
297	for ra in &refs_a {
298	for rb in &refs_b {
299	if !ra.is_write && !rb.is_write { continue; }
300	if ra.base != rb.base { continue; }
301
302	let diff = rb.subscript.sub(&ra.subscript);
303
304	// Same-iteration access (diff = 0) → fusion-legal.
305	if diff.terms.is_empty() && diff.constant == 0 {
306	continue;
307	}
308
309	// Non-zero distance → conservative reject.
310	return false;
311	}
312	}
313	true
314	}
315
316	/// Check if interchanging (outer, inner) preserves correctness.
317	/// Conservative: any carried dependence within the inner body that
318	/// involves both IVs may change direction after interchange.
319	pub fn interchange_legal(
320	func: &Function,
321	inner_body: &HashSet<BlockId>,
322	outer_iv: ValueId,
323	inner_iv: ValueId,
324	) -> bool {
325	let mut ivs = HashSet::new();
326	ivs.insert(outer_iv);
327	ivs.insert(inner_iv);
328
329	let refs = collect_mem_refs(func, inner_body, &ivs);
330
331	// For each pair of refs where at least one is a write:
332	for i in 0..refs.len() {
333	for j in (i+1)..refs.len() {
334	if !refs[i].is_write && !refs[j].is_write { continue; }
335	if refs[i].base != refs[j].base { continue; }
336
337	// Both refs share a base and at least one is a write.
338	// Check if the subscript difference has non-zero
339	// coefficients for BOTH IVs — if so, interchange might
340	// reverse the dependence direction.
341	let diff = refs[j].subscript.sub(&refs[i].subscript);
342	let has_outer = diff.terms.iter().any(\|(c, v)\| v == outer_iv && c != 0);
343	let has_inner = diff.terms.iter().any(\|(c, v)\| v == inner_iv && c != 0);
344	if has_outer && has_inner {
345	// Dependence involves both IVs — interchange could reverse
346	// the direction. Conservative: reject.
347	return false;
348	}
349	}
350	}
351	true
352	}
353
354	// ---------------------------------------------------------------------------
355	// Tests
356	// ---------------------------------------------------------------------------
357
358	#[cfg(test)]
359	mod tests {
360	use super::*;
361
362	#[test]
363	fn gcd_basic() {
364	assert_eq!(gcd(12, 8), 4);
365	assert_eq!(gcd(7, 3), 1);
366	assert_eq!(gcd(0, 5), 5);
367	assert_eq!(gcd(10, 0), 10);
368	}
369
370	#[test]
371	fn affine_add_sub() {
372	let iv = ValueId(100);
373	let a = AffineExpr { constant: 3, terms: vec![(2, iv)] };
374	let b = AffineExpr { constant: 1, terms: vec![(1, iv)] };
375	let sum = a.add(&b);
376	assert_eq!(sum.constant, 4);
377	assert_eq!(sum.terms.len(), 1);
378	assert_eq!(sum.terms[0], (3, iv));
379
380	let diff = a.sub(&b);
381	assert_eq!(diff.constant, 2);
382	assert_eq!(diff.terms.len(), 1);
383	assert_eq!(diff.terms[0], (1, iv));
384	}
385
386	#[test]
387	fn affine_scale() {
388	let iv = ValueId(100);
389	let a = AffineExpr { constant: 2, terms: vec![(3, iv)] };
390	let scaled = a.scale(4);
391	assert_eq!(scaled.constant, 8);
392	assert_eq!(scaled.terms[0], (12, iv));
393	}
394
395	#[test]
396	fn gcd_test_independent() {
397	// a(2i+1) vs a(2i+2): diff = 2i+2 - (2i+1) = 1 (constant).
398	// No IV terms in diff → dependent only if constant is 0.
399	// constant = 1 ≠ 0 → independent.
400	let iv = ValueId(100);
401	let ref_a = MemRef {
402	inst_id: ValueId(0), base: ValueId(50),
403	subscript: AffineExpr { constant: 1, terms: vec![(2, iv)] },
404	is_write: true,
405	};
406	let ref_b = MemRef {
407	inst_id: ValueId(1), base: ValueId(50),
408	subscript: AffineExpr { constant: 2, terms: vec![(2, iv)] },
409	is_write: false,
410	};
411	let dep = test_dependence(&ref_a, &ref_b);
412	assert!(!dep.dependent, "a(2i+1) and a(2i+2) should be independent");
413	}
414
415	#[test]
416	fn gcd_test_dependent() {
417	// a(i) vs a(i): same subscript → always dependent.
418	let iv = ValueId(100);
419	let ref_a = MemRef {
420	inst_id: ValueId(0), base: ValueId(50),
421	subscript: AffineExpr { constant: 0, terms: vec![(1, iv)] },
422	is_write: true,
423	};
424	let ref_b = MemRef {
425	inst_id: ValueId(1), base: ValueId(50),
426	subscript: AffineExpr { constant: 0, terms: vec![(1, iv)] },
427	is_write: false,
428	};
429	let dep = test_dependence(&ref_a, &ref_b);
430	assert!(dep.dependent, "a(i) and a(i) should be dependent");
431	}
432
433	#[test]
434	fn gcd_test_different_stride() {
435	// a(3i) vs a(3i+1): diff has constant 1, gcd of coefficients = 0
436	// (they cancel: 3-3=0). So diff = constant 1, no terms → independent.
437	let iv = ValueId(100);
438	let ref_a = MemRef {
439	inst_id: ValueId(0), base: ValueId(50),
440	subscript: AffineExpr { constant: 0, terms: vec![(3, iv)] },
441	is_write: true,
442	};
443	let ref_b = MemRef {
444	inst_id: ValueId(1), base: ValueId(50),
445	subscript: AffineExpr { constant: 1, terms: vec![(3, iv)] },
446	is_write: false,
447	};
448	let dep = test_dependence(&ref_a, &ref_b);
449	assert!(!dep.dependent, "a(3i) and a(3i+1) should be independent by GCD test");
450	}
451
452	#[test]
453	fn distinct_bases_independent() {
454	let iv = ValueId(100);
455	let ref_a = MemRef {
456	inst_id: ValueId(0), base: ValueId(50),
457	subscript: AffineExpr { constant: 0, terms: vec![(1, iv)] },
458	is_write: true,
459	};
460	let ref_b = MemRef {
461	inst_id: ValueId(1), base: ValueId(60), // different base
462	subscript: AffineExpr { constant: 0, terms: vec![(1, iv)] },
463	is_write: false,
464	};
465	let dep = test_dependence(&ref_a, &ref_b);
466	assert!(!dep.dependent, "distinct bases should be independent (Fortran no-alias)");
467	}
468	}
469