armfortas Public

Watch 0 Fork 0 Star 0

Rust · 17205 bytes Raw Blame History

  
        1
        //! Data dependence analysis for loop transformations.
      
        2
        //!
      
        3
        //! Determines whether two memory accesses inside a loop (or loop nest)
      
        4
        //! can touch the same location on different iterations. Uses the GCD
      
        5
        //! test on affine subscript expressions extracted from GEP indices.
      
        6
        //!
      
        7
        //! ## Fortran-specific simplifications
      
        8
        //!
      
        9
        //! - Distinct array base pointers → always independent (Fortran standard
      
        10
        //!   prohibits aliasing between distinct named arrays absent EQUIVALENCE).
      
        11
        //! - Column-major strides are compile-time constants for fixed-shape arrays.
      
        12
        //! - INTENT(IN) arguments cannot alias INTENT(OUT) arguments.
      
        13
        
        14
        use crate::ir::inst::*;
      
        15
        use std::collections::HashSet;
      
        16
        
        17
        // ---------------------------------------------------------------------------
      
        18
        // Data structures
      
        19
        // ---------------------------------------------------------------------------
      
        20
        
        21
        /// An affine expression: `constant + sum(coefficient * iv)`.
      
        22
        /// The `terms` vector maps induction variables to their coefficients.
      
        23
        #[derive(Debug, Clone)]
      
        24
        pub struct AffineExpr {
      
        25
            pub constant: i64,
      
        26
            pub terms: Vec<(i64, ValueId)>, // (coefficient, iv)
      
        27
        }
      
        28
        
        29
        impl AffineExpr {
      
        30
            fn zero() -> Self {
      
        31
                Self {
      
        32
                    constant: 0,
      
        33
                    terms: Vec::new(),
      
        34
                }
      
        35
            }
      
        36
        
        37
            fn from_const(c: i64) -> Self {
      
        38
                Self {
      
        39
                    constant: c,
      
        40
                    terms: Vec::new(),
      
        41
                }
      
        42
            }
      
        43
        
        44
            fn from_iv(iv: ValueId) -> Self {
      
        45
                Self {
      
        46
                    constant: 0,
      
        47
                    terms: vec![(1, iv)],
      
        48
                }
      
        49
            }
      
        50
        
        51
            fn add(&self, other: &Self) -> Self {
      
        52
                let mut result = self.clone();
      
        53
                result.constant += other.constant;
      
        54
                for &(coeff, iv) in &other.terms {
      
        55
                    if let Some(entry) = result.terms.iter_mut().find(|(_, v)| *v == iv) {
      
        56
                        entry.0 += coeff;
      
        57
                    } else {
      
        58
                        result.terms.push((coeff, iv));
      
        59
                    }
      
        60
                }
      
        61
                // Remove zero-coefficient terms.
      
        62
                result.terms.retain(|(c, _)| *c != 0);
      
        63
                result
      
        64
            }
      
        65
        
        66
            fn sub(&self, other: &Self) -> Self {
      
        67
                let negated = AffineExpr {
      
        68
                    constant: -other.constant,
      
        69
                    terms: other.terms.iter().map(|(c, v)| (-c, *v)).collect(),
      
        70
                };
      
        71
                self.add(&negated)
      
        72
            }
      
        73
        
        74
            fn scale(&self, factor: i64) -> Self {
      
        75
                AffineExpr {
      
        76
                    constant: self.constant * factor,
      
        77
                    terms: self.terms.iter().map(|(c, v)| (c * factor, *v)).collect(),
      
        78
                }
      
        79
            }
      
        80
        }
      
        81
        
        82
        /// A memory reference extracted from loop body.
      
        83
        #[derive(Debug, Clone)]
      
        84
        pub struct MemRef {
      
        85
            pub inst_id: ValueId,
      
        86
            pub base: ValueId,
      
        87
            pub subscript: AffineExpr,
      
        88
            pub is_write: bool,
      
        89
        }
      
        90
        
        91
        /// Result of dependence testing between two memory references.
      
        92
        #[derive(Debug, Clone)]
      
        93
        pub struct DepResult {
      
        94
            /// True if the references may access the same element on different iterations.
      
        95
            pub dependent: bool,
      
        96
        }
      
        97
        
        98
        // ---------------------------------------------------------------------------
      
        99
        // Affine expression extraction
      
        100
        // ---------------------------------------------------------------------------
      
        101
        
        102
        /// Extract an affine expression from a GEP index by walking backwards
      
        103
        /// through arithmetic instructions. `ivs` is the set of known induction
      
        104
        /// variables for the enclosing loop nest.
      
        105
        pub fn extract_affine(func: &Function, val: ValueId, ivs: &HashSet<ValueId>) -> Option<AffineExpr> {
      
        106
            // Is this an IV?
      
        107
            if ivs.contains(&val) {
      
        108
                return Some(AffineExpr::from_iv(val));
      
        109
            }
      
        110
        
        111
            // Find the instruction that defines this value.
      
        112
            let inst = find_inst(func, val)?;
      
        113
            match &inst.kind {
      
        114
                InstKind::ConstInt(c, _) => i64::try_from(*c).ok().map(AffineExpr::from_const),
      
        115
        
        116
                InstKind::IAdd(a, b) => {
      
        117
                    let ea = extract_affine(func, *a, ivs)?;
      
        118
                    let eb = extract_affine(func, *b, ivs)?;
      
        119
                    Some(ea.add(&eb))
      
        120
                }
      
        121
        
        122
                InstKind::ISub(a, b) => {
      
        123
                    let ea = extract_affine(func, *a, ivs)?;
      
        124
                    let eb = extract_affine(func, *b, ivs)?;
      
        125
                    Some(ea.sub(&eb))
      
        126
                }
      
        127
        
        128
                InstKind::IMul(a, b) => {
      
        129
                    // One operand must be a constant for the result to be affine.
      
        130
                    if let Some(ca) = resolve_const(func, *a) {
      
        131
                        let eb = extract_affine(func, *b, ivs)?;
      
        132
                        Some(eb.scale(ca))
      
        133
                    } else if let Some(cb) = resolve_const(func, *b) {
      
        134
                        let ea = extract_affine(func, *a, ivs)?;
      
        135
                        Some(ea.scale(cb))
      
        136
                    } else {
      
        137
                        None // non-affine (product of two non-constants)
      
        138
                    }
      
        139
                }
      
        140
        
        141
                InstKind::IntExtend(a, _, _) => extract_affine(func, *a, ivs),
      
        142
        
        143
                _ => {
      
        144
                    // Value defined outside the loop (e.g., function param, alloca).
      
        145
                    // If it's not an IV and not computable from IVs, treat as unknown.
      
        146
                    // Conservative: return None (non-affine).
      
        147
                    None
      
        148
                }
      
        149
            }
      
        150
        }
      
        151
        
        152
        fn resolve_const(func: &Function, vid: ValueId) -> Option<i64> {
      
        153
            let inst = find_inst(func, vid)?;
      
        154
            if let InstKind::ConstInt(c, _) = &inst.kind {
      
        155
                i64::try_from(*c).ok()
      
        156
            } else {
      
        157
                None
      
        158
            }
      
        159
        }
      
        160
        
        161
        fn find_inst(func: &Function, vid: ValueId) -> Option<&Inst> {
      
        162
            for block in &func.blocks {
      
        163
                for inst in &block.insts {
      
        164
                    if inst.id == vid {
      
        165
                        return Some(inst);
      
        166
                    }
      
        167
                }
      
        168
            }
      
        169
            None
      
        170
        }
      
        171
        
        172
        // ---------------------------------------------------------------------------
      
        173
        // Memory reference collection
      
        174
        // ---------------------------------------------------------------------------
      
        175
        
        176
        /// Collect all memory references (loads and stores through GEPs) in a
      
        177
        /// set of blocks.
      
        178
        pub fn collect_mem_refs(
      
        179
            func: &Function,
      
        180
            blocks: &HashSet<BlockId>,
      
        181
            ivs: &HashSet<ValueId>,
      
        182
        ) -> Vec<MemRef> {
      
        183
            let mut refs = Vec::new();
      
        184
            for &bid in blocks {
      
        185
                let block = func.block(bid);
      
        186
                for inst in &block.insts {
      
        187
                    match &inst.kind {
      
        188
                        InstKind::Store(_, ptr) => {
      
        189
                            if let Some(mr) = extract_mem_ref(func, inst.id, *ptr, true, ivs) {
      
        190
                                refs.push(mr);
      
        191
                            }
      
        192
                        }
      
        193
                        InstKind::Load(ptr) => {
      
        194
                            if let Some(mr) = extract_mem_ref(func, inst.id, *ptr, false, ivs) {
      
        195
                                refs.push(mr);
      
        196
                            }
      
        197
                        }
      
        198
                        _ => {}
      
        199
                    }
      
        200
                }
      
        201
            }
      
        202
            refs
      
        203
        }
      
        204
        
        205
        fn extract_mem_ref(
      
        206
            func: &Function,
      
        207
            inst_id: ValueId,
      
        208
            ptr: ValueId,
      
        209
            is_write: bool,
      
        210
            ivs: &HashSet<ValueId>,
      
        211
        ) -> Option<MemRef> {
      
        212
            // The pointer should be a GEP.
      
        213
            let gep_inst = find_inst(func, ptr)?;
      
        214
            let (base, indices) = match &gep_inst.kind {
      
        215
                InstKind::GetElementPtr(b, idxs) => (*b, idxs.clone()),
      
        216
                _ => return None,
      
        217
            };
      
        218
            // We work with the flat offset (single index for 1D GEP).
      
        219
            let idx = indices.first()?;
      
        220
            let subscript = extract_affine(func, *idx, ivs)?;
      
        221
            Some(MemRef {
      
        222
                inst_id,
      
        223
                base,
      
        224
                subscript,
      
        225
                is_write,
      
        226
            })
      
        227
        }
      
        228
        
        229
        // ---------------------------------------------------------------------------
      
        230
        // GCD test
      
        231
        // ---------------------------------------------------------------------------
      
        232
        
        233
        /// Test whether two memory references can access the same element on
      
        234
        /// different iterations.
      
        235
        ///
      
        236
        /// GCD test: given `f(I) = sum(a_k * i_k) + c1` and
      
        237
        /// `g(I) = sum(b_k * i_k) + c2`, a dependence is possible only if
      
        238
        /// `gcd(a_1, ..., b_1, ...) | (c2 - c1)`.
      
        239
        pub fn test_dependence(ref_a: &MemRef, ref_b: &MemRef) -> DepResult {
      
        240
            // Fortran no-alias: distinct array bases → independent.
      
        241
            if ref_a.base != ref_b.base {
      
        242
                return DepResult { dependent: false };
      
        243
            }
      
        244
        
        245
            // Compute the difference of the two affine expressions.
      
        246
            let diff = ref_b.subscript.sub(&ref_a.subscript);
      
        247
        
        248
            // If there are no IV terms, the accesses are at a fixed distance.
      
        249
            // If the constant is 0, they access the same element (same iteration = ok
      
        250
            // unless both are writes). If non-zero, they access different elements.
      
        251
            if diff.terms.is_empty() {
      
        252
                return DepResult {
      
        253
                    dependent: diff.constant == 0,
      
        254
                };
      
        255
            }
      
        256
        
        257
            // GCD of all IV coefficients in the difference.
      
        258
            let g = diff
      
        259
                .terms
      
        260
                .iter()
      
        261
                .map(|(c, _)| c.unsigned_abs())
      
        262
                .fold(0u64, gcd);
      
        263
        
        264
            if g == 0 {
      
        265
                // All coefficients are zero — same as fixed-distance case.
      
        266
                return DepResult {
      
        267
                    dependent: diff.constant == 0,
      
        268
                };
      
        269
            }
      
        270
        
        271
            // GCD test: if gcd does not divide the constant difference,
      
        272
            // the accesses NEVER touch the same element → independent.
      
        273
            let dependent = diff.constant.unsigned_abs().is_multiple_of(g);
      
        274
            DepResult { dependent }
      
        275
        }
      
        276
        
        277
        fn gcd(a: u64, b: u64) -> u64 {
      
        278
            if b == 0 {
      
        279
                a
      
        280
            } else {
      
        281
                gcd(b, a % b)
      
        282
            }
      
        283
        }
      
        284
        
        285
        // ---------------------------------------------------------------------------
      
        286
        // High-level queries for loop passes
      
        287
        // ---------------------------------------------------------------------------
      
        288
        
        289
        /// Check if two adjacent loop bodies can be legally fused.
      
        290
        ///
      
        291
        /// Fusion is safe when no cross-loop dependence would be reversed.
      
        292
        /// Same-iteration dependencies (subscript diff = 0) are ALWAYS
      
        293
        /// fusion-legal: the write in A's body precedes the read in B's
      
        294
        /// body within the same fused iteration. Only cross-iteration
      
        295
        /// backward dependencies (B writes, A reads at a later iteration)
      
        296
        /// would be reversed by fusion — and those can't exist between
      
        297
        /// adjacent loops that don't share a carried state.
      
        298
        /// Check if two adjacent loop bodies can be legally fused.
      
        299
        ///
      
        300
        /// `iv_a` and `iv_b` are the IVs of the two loops — they iterate
      
        301
        /// over the same range, so for dependence purposes they are the
      
        302
        /// same variable. We remap `iv_b → iv_a` in B's subscripts before
      
        303
        /// comparing.
      
        304
        pub fn fusion_legal(
      
        305
            func: &Function,
      
        306
            body_a: &HashSet<BlockId>,
      
        307
            body_b: &HashSet<BlockId>,
      
        308
            iv_a: ValueId,
      
        309
            iv_b: ValueId,
      
        310
        ) -> bool {
      
        311
            let mut ivs_a = HashSet::new();
      
        312
            ivs_a.insert(iv_a);
      
        313
            let mut ivs_b = HashSet::new();
      
        314
            ivs_b.insert(iv_b);
      
        315
        
        316
            let refs_a = collect_mem_refs(func, body_a, &ivs_a);
      
        317
            let refs_b_raw = collect_mem_refs(func, body_b, &ivs_b);
      
        318
        
        319
            // Remap iv_b → iv_a in B's subscripts so the comparison works.
      
        320
            let refs_b: Vec<MemRef> = refs_b_raw
      
        321
                .into_iter()
      
        322
                .map(|mut r| {
      
        323
                    for term in &mut r.subscript.terms {
      
        324
                        if term.1 == iv_b {
      
        325
                            term.1 = iv_a;
      
        326
                        }
      
        327
                    }
      
        328
                    r
      
        329
                })
      
        330
                .collect();
      
        331
        
        332
            for ra in &refs_a {
      
        333
                for rb in &refs_b {
      
        334
                    if !ra.is_write && !rb.is_write {
      
        335
                        continue;
      
        336
                    }
      
        337
                    if ra.base != rb.base {
      
        338
                        continue;
      
        339
                    }
      
        340
        
        341
                    let diff = rb.subscript.sub(&ra.subscript);
      
        342
        
        343
                    // Same-iteration access (diff = 0) → fusion-legal.
      
        344
                    if diff.terms.is_empty() && diff.constant == 0 {
      
        345
                        continue;
      
        346
                    }
      
        347
        
        348
                    // Non-zero distance → conservative reject.
      
        349
                    return false;
      
        350
                }
      
        351
            }
      
        352
            true
      
        353
        }
      
        354
        
        355
        /// Check if interchanging (outer, inner) preserves correctness.
      
        356
        /// Conservative: any carried dependence within the inner body that
      
        357
        /// involves both IVs may change direction after interchange.
      
        358
        pub fn interchange_legal(
      
        359
            func: &Function,
      
        360
            inner_body: &HashSet<BlockId>,
      
        361
            outer_iv: ValueId,
      
        362
            inner_iv: ValueId,
      
        363
        ) -> bool {
      
        364
            let mut ivs = HashSet::new();
      
        365
            ivs.insert(outer_iv);
      
        366
            ivs.insert(inner_iv);
      
        367
        
        368
            let refs = collect_mem_refs(func, inner_body, &ivs);
      
        369
        
        370
            // For each pair of refs where at least one is a write:
      
        371
            for i in 0..refs.len() {
      
        372
                for j in (i + 1)..refs.len() {
      
        373
                    if !refs[i].is_write && !refs[j].is_write {
      
        374
                        continue;
      
        375
                    }
      
        376
                    if refs[i].base != refs[j].base {
      
        377
                        continue;
      
        378
                    }
      
        379
        
        380
                    // Both refs share a base and at least one is a write.
      
        381
                    // Check if the subscript difference has non-zero
      
        382
                    // coefficients for BOTH IVs — if so, interchange might
      
        383
                    // reverse the dependence direction.
      
        384
                    let diff = refs[j].subscript.sub(&refs[i].subscript);
      
        385
                    let has_outer = diff.terms.iter().any(|(c, v)| *v == outer_iv && *c != 0);
      
        386
                    let has_inner = diff.terms.iter().any(|(c, v)| *v == inner_iv && *c != 0);
      
        387
                    if has_outer && has_inner {
      
        388
                        // Dependence involves both IVs — interchange could reverse
      
        389
                        // the direction. Conservative: reject.
      
        390
                        return false;
      
        391
                    }
      
        392
                }
      
        393
            }
      
        394
            true
      
        395
        }
      
        396
        
        397
        // ---------------------------------------------------------------------------
      
        398
        // Tests
      
        399
        // ---------------------------------------------------------------------------
      
        400
        
        401
        #[cfg(test)]
      
        402
        mod tests {
      
        403
            use super::*;
      
        404
        
        405
            #[test]
      
        406
            fn gcd_basic() {
      
        407
                assert_eq!(gcd(12, 8), 4);
      
        408
                assert_eq!(gcd(7, 3), 1);
      
        409
                assert_eq!(gcd(0, 5), 5);
      
        410
                assert_eq!(gcd(10, 0), 10);
      
        411
            }
      
        412
        
        413
            #[test]
      
        414
            fn affine_add_sub() {
      
        415
                let iv = ValueId(100);
      
        416
                let a = AffineExpr {
      
        417
                    constant: 3,
      
        418
                    terms: vec![(2, iv)],
      
        419
                };
      
        420
                let b = AffineExpr {
      
        421
                    constant: 1,
      
        422
                    terms: vec![(1, iv)],
      
        423
                };
      
        424
                let sum = a.add(&b);
      
        425
                assert_eq!(sum.constant, 4);
      
        426
                assert_eq!(sum.terms.len(), 1);
      
        427
                assert_eq!(sum.terms[0], (3, iv));
      
        428
        
        429
                let diff = a.sub(&b);
      
        430
                assert_eq!(diff.constant, 2);
      
        431
                assert_eq!(diff.terms.len(), 1);
      
        432
                assert_eq!(diff.terms[0], (1, iv));
      
        433
            }
      
        434
        
        435
            #[test]
      
        436
            fn affine_scale() {
      
        437
                let iv = ValueId(100);
      
        438
                let a = AffineExpr {
      
        439
                    constant: 2,
      
        440
                    terms: vec![(3, iv)],
      
        441
                };
      
        442
                let scaled = a.scale(4);
      
        443
                assert_eq!(scaled.constant, 8);
      
        444
                assert_eq!(scaled.terms[0], (12, iv));
      
        445
            }
      
        446
        
        447
            #[test]
      
        448
            fn gcd_test_independent() {
      
        449
                // a(2i+1) vs a(2i+2): diff = 2i+2 - (2i+1) = 1 (constant).
      
        450
                // No IV terms in diff → dependent only if constant is 0.
      
        451
                // constant = 1 ≠ 0 → independent.
      
        452
                let iv = ValueId(100);
      
        453
                let ref_a = MemRef {
      
        454
                    inst_id: ValueId(0),
      
        455
                    base: ValueId(50),
      
        456
                    subscript: AffineExpr {
      
        457
                        constant: 1,
      
        458
                        terms: vec![(2, iv)],
      
        459
                    },
      
        460
                    is_write: true,
      
        461
                };
      
        462
                let ref_b = MemRef {
      
        463
                    inst_id: ValueId(1),
      
        464
                    base: ValueId(50),
      
        465
                    subscript: AffineExpr {
      
        466
                        constant: 2,
      
        467
                        terms: vec![(2, iv)],
      
        468
                    },
      
        469
                    is_write: false,
      
        470
                };
      
        471
                let dep = test_dependence(&ref_a, &ref_b);
      
        472
                assert!(!dep.dependent, "a(2i+1) and a(2i+2) should be independent");
      
        473
            }
      
        474
        
        475
            #[test]
      
        476
            fn gcd_test_dependent() {
      
        477
                // a(i) vs a(i): same subscript → always dependent.
      
        478
                let iv = ValueId(100);
      
        479
                let ref_a = MemRef {
      
        480
                    inst_id: ValueId(0),
      
        481
                    base: ValueId(50),
      
        482
                    subscript: AffineExpr {
      
        483
                        constant: 0,
      
        484
                        terms: vec![(1, iv)],
      
        485
                    },
      
        486
                    is_write: true,
      
        487
                };
      
        488
                let ref_b = MemRef {
      
        489
                    inst_id: ValueId(1),
      
        490
                    base: ValueId(50),
      
        491
                    subscript: AffineExpr {
      
        492
                        constant: 0,
      
        493
                        terms: vec![(1, iv)],
      
        494
                    },
      
        495
                    is_write: false,
      
        496
                };
      
        497
                let dep = test_dependence(&ref_a, &ref_b);
      
        498
                assert!(dep.dependent, "a(i) and a(i) should be dependent");
      
        499
            }
      
        500
        
        501
            #[test]
      
        502
            fn gcd_test_different_stride() {
      
        503
                // a(3i) vs a(3i+1): diff has constant 1, gcd of coefficients = 0
      
        504
                // (they cancel: 3-3=0). So diff = constant 1, no terms → independent.
      
        505
                let iv = ValueId(100);
      
        506
                let ref_a = MemRef {
      
        507
                    inst_id: ValueId(0),
      
        508
                    base: ValueId(50),
      
        509
                    subscript: AffineExpr {
      
        510
                        constant: 0,
      
        511
                        terms: vec![(3, iv)],
      
        512
                    },
      
        513
                    is_write: true,
      
        514
                };
      
        515
                let ref_b = MemRef {
      
        516
                    inst_id: ValueId(1),
      
        517
                    base: ValueId(50),
      
        518
                    subscript: AffineExpr {
      
        519
                        constant: 1,
      
        520
                        terms: vec![(3, iv)],
      
        521
                    },
      
        522
                    is_write: false,
      
        523
                };
      
        524
                let dep = test_dependence(&ref_a, &ref_b);
      
        525
                assert!(
      
        526
                    !dep.dependent,
      
        527
                    "a(3i) and a(3i+1) should be independent by GCD test"
      
        528
                );
      
        529
            }
      
        530
        
        531
            #[test]
      
        532
            fn distinct_bases_independent() {
      
        533
                let iv = ValueId(100);
      
        534
                let ref_a = MemRef {
      
        535
                    inst_id: ValueId(0),
      
        536
                    base: ValueId(50),
      
        537
                    subscript: AffineExpr {
      
        538
                        constant: 0,
      
        539
                        terms: vec![(1, iv)],
      
        540
                    },
      
        541
                    is_write: true,
      
        542
                };
      
        543
                let ref_b = MemRef {
      
        544
                    inst_id: ValueId(1),
      
        545
                    base: ValueId(60), // different base
      
        546
                    subscript: AffineExpr {
      
        547
                        constant: 0,
      
        548
                        terms: vec![(1, iv)],
      
        549
                    },
      
        550
                    is_write: false,
      
        551
                };
      
        552
                let dep = test_dependence(&ref_a, &ref_b);
      
        553
                assert!(
      
        554
                    !dep.dependent,
      
        555
                    "distinct bases should be independent (Fortran no-alias)"
      
        556
                );
      
        557
            }
      
        558
        }
      
        559

1	//! Data dependence analysis for loop transformations.
2	//!
3	//! Determines whether two memory accesses inside a loop (or loop nest)
4	//! can touch the same location on different iterations. Uses the GCD
5	//! test on affine subscript expressions extracted from GEP indices.
6	//!
7	//! ## Fortran-specific simplifications
8	//!
9	//! - Distinct array base pointers → always independent (Fortran standard
10	//! prohibits aliasing between distinct named arrays absent EQUIVALENCE).
11	//! - Column-major strides are compile-time constants for fixed-shape arrays.
12	//! - INTENT(IN) arguments cannot alias INTENT(OUT) arguments.
13
14	use crate::ir::inst::*;
15	use std::collections::HashSet;
16
17	// ---------------------------------------------------------------------------
18	// Data structures
19	// ---------------------------------------------------------------------------
20
21	/// An affine expression: `constant + sum(coefficient * iv)`.
22	/// The `terms` vector maps induction variables to their coefficients.
23	#[derive(Debug, Clone)]
24	pub struct AffineExpr {
25	pub constant: i64,
26	pub terms: Vec<(i64, ValueId)>, // (coefficient, iv)
27	}
28
29	impl AffineExpr {
30	fn zero() -> Self {
31	Self {
32	constant: 0,
33	terms: Vec::new(),
34	}
35	}
36
37	fn from_const(c: i64) -> Self {
38	Self {
39	constant: c,
40	terms: Vec::new(),
41	}
42	}
43
44	fn from_iv(iv: ValueId) -> Self {
45	Self {
46	constant: 0,
47	terms: vec![(1, iv)],
48	}
49	}
50
51	fn add(&self, other: &Self) -> Self {
52	let mut result = self.clone();
53	result.constant += other.constant;
54	for &(coeff, iv) in &other.terms {
55	if let Some(entry) = result.terms.iter_mut().find(\|(_, v)\| *v == iv) {
56	entry.0 += coeff;
57	} else {
58	result.terms.push((coeff, iv));
59	}
60	}
61	// Remove zero-coefficient terms.
62	result.terms.retain(\|(c, _)\| *c != 0);
63	result
64	}
65
66	fn sub(&self, other: &Self) -> Self {
67	let negated = AffineExpr {
68	constant: -other.constant,
69	terms: other.terms.iter().map(\|(c, v)\| (-c, *v)).collect(),
70	};
71	self.add(&negated)
72	}
73
74	fn scale(&self, factor: i64) -> Self {
75	AffineExpr {
76	constant: self.constant * factor,
77	terms: self.terms.iter().map(\|(c, v)\| (c * factor, *v)).collect(),
78	}
79	}
80	}
81
82	/// A memory reference extracted from loop body.
83	#[derive(Debug, Clone)]
84	pub struct MemRef {
85	pub inst_id: ValueId,
86	pub base: ValueId,
87	pub subscript: AffineExpr,
88	pub is_write: bool,
89	}
90
91	/// Result of dependence testing between two memory references.
92	#[derive(Debug, Clone)]
93	pub struct DepResult {
94	/// True if the references may access the same element on different iterations.
95	pub dependent: bool,
96	}
97
98	// ---------------------------------------------------------------------------
99	// Affine expression extraction
100	// ---------------------------------------------------------------------------
101
102	/// Extract an affine expression from a GEP index by walking backwards
103	/// through arithmetic instructions. `ivs` is the set of known induction
104	/// variables for the enclosing loop nest.
105	pub fn extract_affine(func: &Function, val: ValueId, ivs: &HashSet<ValueId>) -> Option<AffineExpr> {
106	// Is this an IV?
107	if ivs.contains(&val) {
108	return Some(AffineExpr::from_iv(val));
109	}
110
111	// Find the instruction that defines this value.
112	let inst = find_inst(func, val)?;
113	match &inst.kind {
114	InstKind::ConstInt(c, _) => i64::try_from(*c).ok().map(AffineExpr::from_const),
115
116	InstKind::IAdd(a, b) => {
117	let ea = extract_affine(func, *a, ivs)?;
118	let eb = extract_affine(func, *b, ivs)?;
119	Some(ea.add(&eb))
120	}
121
122	InstKind::ISub(a, b) => {
123	let ea = extract_affine(func, *a, ivs)?;
124	let eb = extract_affine(func, *b, ivs)?;
125	Some(ea.sub(&eb))
126	}
127
128	InstKind::IMul(a, b) => {
129	// One operand must be a constant for the result to be affine.
130	if let Some(ca) = resolve_const(func, *a) {
131	let eb = extract_affine(func, *b, ivs)?;
132	Some(eb.scale(ca))
133	} else if let Some(cb) = resolve_const(func, *b) {
134	let ea = extract_affine(func, *a, ivs)?;
135	Some(ea.scale(cb))
136	} else {
137	None // non-affine (product of two non-constants)
138	}
139	}
140
141	InstKind::IntExtend(a, _, _) => extract_affine(func, *a, ivs),
142
143	_ => {
144	// Value defined outside the loop (e.g., function param, alloca).
145	// If it's not an IV and not computable from IVs, treat as unknown.
146	// Conservative: return None (non-affine).
147	None
148	}
149	}
150	}
151
152	fn resolve_const(func: &Function, vid: ValueId) -> Option<i64> {
153	let inst = find_inst(func, vid)?;
154	if let InstKind::ConstInt(c, _) = &inst.kind {
155	i64::try_from(*c).ok()
156	} else {
157	None
158	}
159	}
160
161	fn find_inst(func: &Function, vid: ValueId) -> Option<&Inst> {
162	for block in &func.blocks {
163	for inst in &block.insts {
164	if inst.id == vid {
165	return Some(inst);
166	}
167	}
168	}
169	None
170	}
171
172	// ---------------------------------------------------------------------------
173	// Memory reference collection
174	// ---------------------------------------------------------------------------
175
176	/// Collect all memory references (loads and stores through GEPs) in a
177	/// set of blocks.
178	pub fn collect_mem_refs(
179	func: &Function,
180	blocks: &HashSet<BlockId>,
181	ivs: &HashSet<ValueId>,
182	) -> Vec<MemRef> {
183	let mut refs = Vec::new();
184	for &bid in blocks {
185	let block = func.block(bid);
186	for inst in &block.insts {
187	match &inst.kind {
188	InstKind::Store(_, ptr) => {
189	if let Some(mr) = extract_mem_ref(func, inst.id, *ptr, true, ivs) {
190	refs.push(mr);
191	}
192	}
193	InstKind::Load(ptr) => {
194	if let Some(mr) = extract_mem_ref(func, inst.id, *ptr, false, ivs) {
195	refs.push(mr);
196	}
197	}
198	_ => {}
199	}
200	}
201	}
202	refs
203	}
204
205	fn extract_mem_ref(
206	func: &Function,
207	inst_id: ValueId,
208	ptr: ValueId,
209	is_write: bool,
210	ivs: &HashSet<ValueId>,
211	) -> Option<MemRef> {
212	// The pointer should be a GEP.
213	let gep_inst = find_inst(func, ptr)?;
214	let (base, indices) = match &gep_inst.kind {
215	InstKind::GetElementPtr(b, idxs) => (*b, idxs.clone()),
216	_ => return None,
217	};
218	// We work with the flat offset (single index for 1D GEP).
219	let idx = indices.first()?;
220	let subscript = extract_affine(func, *idx, ivs)?;
221	Some(MemRef {
222	inst_id,
223	base,
224	subscript,
225	is_write,
226	})
227	}
228
229	// ---------------------------------------------------------------------------
230	// GCD test
231	// ---------------------------------------------------------------------------
232
233	/// Test whether two memory references can access the same element on
234	/// different iterations.
235	///
236	/// GCD test: given `f(I) = sum(a_k * i_k) + c1` and
237	/// `g(I) = sum(b_k * i_k) + c2`, a dependence is possible only if
238	/// `gcd(a_1, ..., b_1, ...) \| (c2 - c1)`.
239	pub fn test_dependence(ref_a: &MemRef, ref_b: &MemRef) -> DepResult {
240	// Fortran no-alias: distinct array bases → independent.
241	if ref_a.base != ref_b.base {
242	return DepResult { dependent: false };
243	}
244
245	// Compute the difference of the two affine expressions.
246	let diff = ref_b.subscript.sub(&ref_a.subscript);
247
248	// If there are no IV terms, the accesses are at a fixed distance.
249	// If the constant is 0, they access the same element (same iteration = ok
250	// unless both are writes). If non-zero, they access different elements.
251	if diff.terms.is_empty() {
252	return DepResult {
253	dependent: diff.constant == 0,
254	};
255	}
256
257	// GCD of all IV coefficients in the difference.
258	let g = diff
259	.terms
260	.iter()
261	.map(\|(c, _)\| c.unsigned_abs())
262	.fold(0u64, gcd);
263
264	if g == 0 {
265	// All coefficients are zero — same as fixed-distance case.
266	return DepResult {
267	dependent: diff.constant == 0,
268	};
269	}
270
271	// GCD test: if gcd does not divide the constant difference,
272	// the accesses NEVER touch the same element → independent.
273	let dependent = diff.constant.unsigned_abs().is_multiple_of(g);
274	DepResult { dependent }
275	}
276
277	fn gcd(a: u64, b: u64) -> u64 {
278	if b == 0 {
279	a
280	} else {
281	gcd(b, a % b)
282	}
283	}
284
285	// ---------------------------------------------------------------------------
286	// High-level queries for loop passes
287	// ---------------------------------------------------------------------------
288
289	/// Check if two adjacent loop bodies can be legally fused.
290	///
291	/// Fusion is safe when no cross-loop dependence would be reversed.
292	/// Same-iteration dependencies (subscript diff = 0) are ALWAYS
293	/// fusion-legal: the write in A's body precedes the read in B's
294	/// body within the same fused iteration. Only cross-iteration
295	/// backward dependencies (B writes, A reads at a later iteration)
296	/// would be reversed by fusion — and those can't exist between
297	/// adjacent loops that don't share a carried state.
298	/// Check if two adjacent loop bodies can be legally fused.
299	///
300	/// `iv_a` and `iv_b` are the IVs of the two loops — they iterate
301	/// over the same range, so for dependence purposes they are the
302	/// same variable. We remap `iv_b → iv_a` in B's subscripts before
303	/// comparing.
304	pub fn fusion_legal(
305	func: &Function,
306	body_a: &HashSet<BlockId>,
307	body_b: &HashSet<BlockId>,
308	iv_a: ValueId,
309	iv_b: ValueId,
310	) -> bool {
311	let mut ivs_a = HashSet::new();
312	ivs_a.insert(iv_a);
313	let mut ivs_b = HashSet::new();
314	ivs_b.insert(iv_b);
315
316	let refs_a = collect_mem_refs(func, body_a, &ivs_a);
317	let refs_b_raw = collect_mem_refs(func, body_b, &ivs_b);
318
319	// Remap iv_b → iv_a in B's subscripts so the comparison works.
320	let refs_b: Vec<MemRef> = refs_b_raw
321	.into_iter()
322	.map(\|mut r\| {
323	for term in &mut r.subscript.terms {
324	if term.1 == iv_b {
325	term.1 = iv_a;
326	}
327	}
328	r
329	})
330	.collect();
331
332	for ra in &refs_a {
333	for rb in &refs_b {
334	if !ra.is_write && !rb.is_write {
335	continue;
336	}
337	if ra.base != rb.base {
338	continue;
339	}
340
341	let diff = rb.subscript.sub(&ra.subscript);
342
343	// Same-iteration access (diff = 0) → fusion-legal.
344	if diff.terms.is_empty() && diff.constant == 0 {
345	continue;
346	}
347
348	// Non-zero distance → conservative reject.
349	return false;
350	}
351	}
352	true
353	}
354
355	/// Check if interchanging (outer, inner) preserves correctness.
356	/// Conservative: any carried dependence within the inner body that
357	/// involves both IVs may change direction after interchange.
358	pub fn interchange_legal(
359	func: &Function,
360	inner_body: &HashSet<BlockId>,
361	outer_iv: ValueId,
362	inner_iv: ValueId,
363	) -> bool {
364	let mut ivs = HashSet::new();
365	ivs.insert(outer_iv);
366	ivs.insert(inner_iv);
367
368	let refs = collect_mem_refs(func, inner_body, &ivs);
369
370	// For each pair of refs where at least one is a write:
371	for i in 0..refs.len() {
372	for j in (i + 1)..refs.len() {
373	if !refs[i].is_write && !refs[j].is_write {
374	continue;
375	}
376	if refs[i].base != refs[j].base {
377	continue;
378	}
379
380	// Both refs share a base and at least one is a write.
381	// Check if the subscript difference has non-zero
382	// coefficients for BOTH IVs — if so, interchange might
383	// reverse the dependence direction.
384	let diff = refs[j].subscript.sub(&refs[i].subscript);
385	let has_outer = diff.terms.iter().any(\|(c, v)\| v == outer_iv && c != 0);
386	let has_inner = diff.terms.iter().any(\|(c, v)\| v == inner_iv && c != 0);
387	if has_outer && has_inner {
388	// Dependence involves both IVs — interchange could reverse
389	// the direction. Conservative: reject.
390	return false;
391	}
392	}
393	}
394	true
395	}
396
397	// ---------------------------------------------------------------------------
398	// Tests
399	// ---------------------------------------------------------------------------
400
401	#[cfg(test)]
402	mod tests {
403	use super::*;
404
405	#[test]
406	fn gcd_basic() {
407	assert_eq!(gcd(12, 8), 4);
408	assert_eq!(gcd(7, 3), 1);
409	assert_eq!(gcd(0, 5), 5);
410	assert_eq!(gcd(10, 0), 10);
411	}
412
413	#[test]
414	fn affine_add_sub() {
415	let iv = ValueId(100);
416	let a = AffineExpr {
417	constant: 3,
418	terms: vec![(2, iv)],
419	};
420	let b = AffineExpr {
421	constant: 1,
422	terms: vec![(1, iv)],
423	};
424	let sum = a.add(&b);
425	assert_eq!(sum.constant, 4);
426	assert_eq!(sum.terms.len(), 1);
427	assert_eq!(sum.terms[0], (3, iv));
428
429	let diff = a.sub(&b);
430	assert_eq!(diff.constant, 2);
431	assert_eq!(diff.terms.len(), 1);
432	assert_eq!(diff.terms[0], (1, iv));
433	}
434
435	#[test]
436	fn affine_scale() {
437	let iv = ValueId(100);
438	let a = AffineExpr {
439	constant: 2,
440	terms: vec![(3, iv)],
441	};
442	let scaled = a.scale(4);
443	assert_eq!(scaled.constant, 8);
444	assert_eq!(scaled.terms[0], (12, iv));
445	}
446
447	#[test]
448	fn gcd_test_independent() {
449	// a(2i+1) vs a(2i+2): diff = 2i+2 - (2i+1) = 1 (constant).
450	// No IV terms in diff → dependent only if constant is 0.
451	// constant = 1 ≠ 0 → independent.
452	let iv = ValueId(100);
453	let ref_a = MemRef {
454	inst_id: ValueId(0),
455	base: ValueId(50),
456	subscript: AffineExpr {
457	constant: 1,
458	terms: vec![(2, iv)],
459	},
460	is_write: true,
461	};
462	let ref_b = MemRef {
463	inst_id: ValueId(1),
464	base: ValueId(50),
465	subscript: AffineExpr {
466	constant: 2,
467	terms: vec![(2, iv)],
468	},
469	is_write: false,
470	};
471	let dep = test_dependence(&ref_a, &ref_b);
472	assert!(!dep.dependent, "a(2i+1) and a(2i+2) should be independent");
473	}
474
475	#[test]
476	fn gcd_test_dependent() {
477	// a(i) vs a(i): same subscript → always dependent.
478	let iv = ValueId(100);
479	let ref_a = MemRef {
480	inst_id: ValueId(0),
481	base: ValueId(50),
482	subscript: AffineExpr {
483	constant: 0,
484	terms: vec![(1, iv)],
485	},
486	is_write: true,
487	};
488	let ref_b = MemRef {
489	inst_id: ValueId(1),
490	base: ValueId(50),
491	subscript: AffineExpr {
492	constant: 0,
493	terms: vec![(1, iv)],
494	},
495	is_write: false,
496	};
497	let dep = test_dependence(&ref_a, &ref_b);
498	assert!(dep.dependent, "a(i) and a(i) should be dependent");
499	}
500
501	#[test]
502	fn gcd_test_different_stride() {
503	// a(3i) vs a(3i+1): diff has constant 1, gcd of coefficients = 0
504	// (they cancel: 3-3=0). So diff = constant 1, no terms → independent.
505	let iv = ValueId(100);
506	let ref_a = MemRef {
507	inst_id: ValueId(0),
508	base: ValueId(50),
509	subscript: AffineExpr {
510	constant: 0,
511	terms: vec![(3, iv)],
512	},
513	is_write: true,
514	};
515	let ref_b = MemRef {
516	inst_id: ValueId(1),
517	base: ValueId(50),
518	subscript: AffineExpr {
519	constant: 1,
520	terms: vec![(3, iv)],
521	},
522	is_write: false,
523	};
524	let dep = test_dependence(&ref_a, &ref_b);
525	assert!(
526	!dep.dependent,
527	"a(3i) and a(3i+1) should be independent by GCD test"
528	);
529	}
530
531	#[test]
532	fn distinct_bases_independent() {
533	let iv = ValueId(100);
534	let ref_a = MemRef {
535	inst_id: ValueId(0),
536	base: ValueId(50),
537	subscript: AffineExpr {
538	constant: 0,
539	terms: vec![(1, iv)],
540	},
541	is_write: true,
542	};
543	let ref_b = MemRef {
544	inst_id: ValueId(1),
545	base: ValueId(60), // different base
546	subscript: AffineExpr {
547	constant: 0,
548	terms: vec![(1, iv)],
549	},
550	is_write: false,
551	};
552	let dep = test_dependence(&ref_a, &ref_b);
553	assert!(
554	!dep.dependent,
555	"distinct bases should be independent (Fortran no-alias)"
556	);
557	}
558	}
559