Rust · 16105 bytes Raw Blame History
1 //! Data dependence analysis for loop transformations.
2 //!
3 //! Determines whether two memory accesses inside a loop (or loop nest)
4 //! can touch the same location on different iterations. Uses the GCD
5 //! test on affine subscript expressions extracted from GEP indices.
6 //!
7 //! ## Fortran-specific simplifications
8 //!
9 //! - Distinct array base pointers → always independent (Fortran standard
10 //! prohibits aliasing between distinct named arrays absent EQUIVALENCE).
11 //! - Column-major strides are compile-time constants for fixed-shape arrays.
12 //! - INTENT(IN) arguments cannot alias INTENT(OUT) arguments.
13
14 use std::collections::HashSet;
15 use crate::ir::inst::*;
16
17 // ---------------------------------------------------------------------------
18 // Data structures
19 // ---------------------------------------------------------------------------
20
21 /// An affine expression: `constant + sum(coefficient * iv)`.
22 /// The `terms` vector maps induction variables to their coefficients.
23 #[derive(Debug, Clone)]
24 pub struct AffineExpr {
25 pub constant: i64,
26 pub terms: Vec<(i64, ValueId)>, // (coefficient, iv)
27 }
28
29 impl AffineExpr {
30 fn zero() -> Self { Self { constant: 0, terms: Vec::new() } }
31
32 fn from_const(c: i64) -> Self { Self { constant: c, terms: Vec::new() } }
33
34 fn from_iv(iv: ValueId) -> Self {
35 Self { constant: 0, terms: vec![(1, iv)] }
36 }
37
38 fn add(&self, other: &Self) -> Self {
39 let mut result = self.clone();
40 result.constant += other.constant;
41 for &(coeff, iv) in &other.terms {
42 if let Some(entry) = result.terms.iter_mut().find(|(_, v)| *v == iv) {
43 entry.0 += coeff;
44 } else {
45 result.terms.push((coeff, iv));
46 }
47 }
48 // Remove zero-coefficient terms.
49 result.terms.retain(|(c, _)| *c != 0);
50 result
51 }
52
53 fn sub(&self, other: &Self) -> Self {
54 let negated = AffineExpr {
55 constant: -other.constant,
56 terms: other.terms.iter().map(|(c, v)| (-c, *v)).collect(),
57 };
58 self.add(&negated)
59 }
60
61 fn scale(&self, factor: i64) -> Self {
62 AffineExpr {
63 constant: self.constant * factor,
64 terms: self.terms.iter().map(|(c, v)| (c * factor, *v)).collect(),
65 }
66 }
67 }
68
69 /// A memory reference extracted from loop body.
70 #[derive(Debug, Clone)]
71 pub struct MemRef {
72 pub inst_id: ValueId,
73 pub base: ValueId,
74 pub subscript: AffineExpr,
75 pub is_write: bool,
76 }
77
78 /// Result of dependence testing between two memory references.
79 #[derive(Debug, Clone)]
80 pub struct DepResult {
81 /// True if the references may access the same element on different iterations.
82 pub dependent: bool,
83 }
84
85 // ---------------------------------------------------------------------------
86 // Affine expression extraction
87 // ---------------------------------------------------------------------------
88
89 /// Extract an affine expression from a GEP index by walking backwards
90 /// through arithmetic instructions. `ivs` is the set of known induction
91 /// variables for the enclosing loop nest.
92 pub fn extract_affine(
93 func: &Function,
94 val: ValueId,
95 ivs: &HashSet<ValueId>,
96 ) -> Option<AffineExpr> {
97 // Is this an IV?
98 if ivs.contains(&val) {
99 return Some(AffineExpr::from_iv(val));
100 }
101
102 // Find the instruction that defines this value.
103 let inst = find_inst(func, val)?;
104 match &inst.kind {
105 InstKind::ConstInt(c, _) => i64::try_from(*c).ok().map(AffineExpr::from_const),
106
107 InstKind::IAdd(a, b) => {
108 let ea = extract_affine(func, *a, ivs)?;
109 let eb = extract_affine(func, *b, ivs)?;
110 Some(ea.add(&eb))
111 }
112
113 InstKind::ISub(a, b) => {
114 let ea = extract_affine(func, *a, ivs)?;
115 let eb = extract_affine(func, *b, ivs)?;
116 Some(ea.sub(&eb))
117 }
118
119 InstKind::IMul(a, b) => {
120 // One operand must be a constant for the result to be affine.
121 if let Some(ca) = resolve_const(func, *a) {
122 let eb = extract_affine(func, *b, ivs)?;
123 Some(eb.scale(ca))
124 } else if let Some(cb) = resolve_const(func, *b) {
125 let ea = extract_affine(func, *a, ivs)?;
126 Some(ea.scale(cb))
127 } else {
128 None // non-affine (product of two non-constants)
129 }
130 }
131
132 InstKind::IntExtend(a, _, _) => extract_affine(func, *a, ivs),
133
134 _ => {
135 // Value defined outside the loop (e.g., function param, alloca).
136 // If it's not an IV and not computable from IVs, treat as unknown.
137 // Conservative: return None (non-affine).
138 None
139 }
140 }
141 }
142
143 fn resolve_const(func: &Function, vid: ValueId) -> Option<i64> {
144 let inst = find_inst(func, vid)?;
145 if let InstKind::ConstInt(c, _) = &inst.kind { i64::try_from(*c).ok() } else { None }
146 }
147
148 fn find_inst(func: &Function, vid: ValueId) -> Option<&Inst> {
149 for block in &func.blocks {
150 for inst in &block.insts {
151 if inst.id == vid { return Some(inst); }
152 }
153 }
154 None
155 }
156
157 // ---------------------------------------------------------------------------
158 // Memory reference collection
159 // ---------------------------------------------------------------------------
160
161 /// Collect all memory references (loads and stores through GEPs) in a
162 /// set of blocks.
163 pub fn collect_mem_refs(
164 func: &Function,
165 blocks: &HashSet<BlockId>,
166 ivs: &HashSet<ValueId>,
167 ) -> Vec<MemRef> {
168 let mut refs = Vec::new();
169 for &bid in blocks {
170 let block = func.block(bid);
171 for inst in &block.insts {
172 match &inst.kind {
173 InstKind::Store(_, ptr) => {
174 if let Some(mr) = extract_mem_ref(func, inst.id, *ptr, true, ivs) {
175 refs.push(mr);
176 }
177 }
178 InstKind::Load(ptr) => {
179 if let Some(mr) = extract_mem_ref(func, inst.id, *ptr, false, ivs) {
180 refs.push(mr);
181 }
182 }
183 _ => {}
184 }
185 }
186 }
187 refs
188 }
189
190 fn extract_mem_ref(
191 func: &Function,
192 inst_id: ValueId,
193 ptr: ValueId,
194 is_write: bool,
195 ivs: &HashSet<ValueId>,
196 ) -> Option<MemRef> {
197 // The pointer should be a GEP.
198 let gep_inst = find_inst(func, ptr)?;
199 let (base, indices) = match &gep_inst.kind {
200 InstKind::GetElementPtr(b, idxs) => (*b, idxs.clone()),
201 _ => return None,
202 };
203 // We work with the flat offset (single index for 1D GEP).
204 let idx = indices.first()?;
205 let subscript = extract_affine(func, *idx, ivs)?;
206 Some(MemRef { inst_id, base, subscript, is_write })
207 }
208
209 // ---------------------------------------------------------------------------
210 // GCD test
211 // ---------------------------------------------------------------------------
212
213 /// Test whether two memory references can access the same element on
214 /// different iterations.
215 ///
216 /// GCD test: given `f(I) = sum(a_k * i_k) + c1` and
217 /// `g(I) = sum(b_k * i_k) + c2`, a dependence is possible only if
218 /// `gcd(a_1, ..., b_1, ...) | (c2 - c1)`.
219 pub fn test_dependence(ref_a: &MemRef, ref_b: &MemRef) -> DepResult {
220 // Fortran no-alias: distinct array bases → independent.
221 if ref_a.base != ref_b.base {
222 return DepResult { dependent: false };
223 }
224
225 // Compute the difference of the two affine expressions.
226 let diff = ref_b.subscript.sub(&ref_a.subscript);
227
228 // If there are no IV terms, the accesses are at a fixed distance.
229 // If the constant is 0, they access the same element (same iteration = ok
230 // unless both are writes). If non-zero, they access different elements.
231 if diff.terms.is_empty() {
232 return DepResult { dependent: diff.constant == 0 };
233 }
234
235 // GCD of all IV coefficients in the difference.
236 let g = diff.terms.iter()
237 .map(|(c, _)| c.unsigned_abs())
238 .fold(0u64, gcd);
239
240 if g == 0 {
241 // All coefficients are zero — same as fixed-distance case.
242 return DepResult { dependent: diff.constant == 0 };
243 }
244
245 // GCD test: if gcd does not divide the constant difference,
246 // the accesses NEVER touch the same element → independent.
247 let dependent = diff.constant.unsigned_abs().is_multiple_of(g);
248 DepResult { dependent }
249 }
250
251 fn gcd(a: u64, b: u64) -> u64 {
252 if b == 0 { a } else { gcd(b, a % b) }
253 }
254
255 // ---------------------------------------------------------------------------
256 // High-level queries for loop passes
257 // ---------------------------------------------------------------------------
258
259 /// Check if two adjacent loop bodies can be legally fused.
260 ///
261 /// Fusion is safe when no cross-loop dependence would be reversed.
262 /// Same-iteration dependencies (subscript diff = 0) are ALWAYS
263 /// fusion-legal: the write in A's body precedes the read in B's
264 /// body within the same fused iteration. Only cross-iteration
265 /// backward dependencies (B writes, A reads at a later iteration)
266 /// would be reversed by fusion — and those can't exist between
267 /// adjacent loops that don't share a carried state.
268 /// Check if two adjacent loop bodies can be legally fused.
269 ///
270 /// `iv_a` and `iv_b` are the IVs of the two loops — they iterate
271 /// over the same range, so for dependence purposes they are the
272 /// same variable. We remap `iv_b → iv_a` in B's subscripts before
273 /// comparing.
274 pub fn fusion_legal(
275 func: &Function,
276 body_a: &HashSet<BlockId>,
277 body_b: &HashSet<BlockId>,
278 iv_a: ValueId,
279 iv_b: ValueId,
280 ) -> bool {
281 let mut ivs_a = HashSet::new();
282 ivs_a.insert(iv_a);
283 let mut ivs_b = HashSet::new();
284 ivs_b.insert(iv_b);
285
286 let refs_a = collect_mem_refs(func, body_a, &ivs_a);
287 let refs_b_raw = collect_mem_refs(func, body_b, &ivs_b);
288
289 // Remap iv_b → iv_a in B's subscripts so the comparison works.
290 let refs_b: Vec<MemRef> = refs_b_raw.into_iter().map(|mut r| {
291 for term in &mut r.subscript.terms {
292 if term.1 == iv_b { term.1 = iv_a; }
293 }
294 r
295 }).collect();
296
297 for ra in &refs_a {
298 for rb in &refs_b {
299 if !ra.is_write && !rb.is_write { continue; }
300 if ra.base != rb.base { continue; }
301
302 let diff = rb.subscript.sub(&ra.subscript);
303
304 // Same-iteration access (diff = 0) → fusion-legal.
305 if diff.terms.is_empty() && diff.constant == 0 {
306 continue;
307 }
308
309 // Non-zero distance → conservative reject.
310 return false;
311 }
312 }
313 true
314 }
315
316 /// Check if interchanging (outer, inner) preserves correctness.
317 /// Conservative: any carried dependence within the inner body that
318 /// involves both IVs may change direction after interchange.
319 pub fn interchange_legal(
320 func: &Function,
321 inner_body: &HashSet<BlockId>,
322 outer_iv: ValueId,
323 inner_iv: ValueId,
324 ) -> bool {
325 let mut ivs = HashSet::new();
326 ivs.insert(outer_iv);
327 ivs.insert(inner_iv);
328
329 let refs = collect_mem_refs(func, inner_body, &ivs);
330
331 // For each pair of refs where at least one is a write:
332 for i in 0..refs.len() {
333 for j in (i+1)..refs.len() {
334 if !refs[i].is_write && !refs[j].is_write { continue; }
335 if refs[i].base != refs[j].base { continue; }
336
337 // Both refs share a base and at least one is a write.
338 // Check if the subscript difference has non-zero
339 // coefficients for BOTH IVs — if so, interchange might
340 // reverse the dependence direction.
341 let diff = refs[j].subscript.sub(&refs[i].subscript);
342 let has_outer = diff.terms.iter().any(|(c, v)| *v == outer_iv && *c != 0);
343 let has_inner = diff.terms.iter().any(|(c, v)| *v == inner_iv && *c != 0);
344 if has_outer && has_inner {
345 // Dependence involves both IVs — interchange could reverse
346 // the direction. Conservative: reject.
347 return false;
348 }
349 }
350 }
351 true
352 }
353
354 // ---------------------------------------------------------------------------
355 // Tests
356 // ---------------------------------------------------------------------------
357
358 #[cfg(test)]
359 mod tests {
360 use super::*;
361
362 #[test]
363 fn gcd_basic() {
364 assert_eq!(gcd(12, 8), 4);
365 assert_eq!(gcd(7, 3), 1);
366 assert_eq!(gcd(0, 5), 5);
367 assert_eq!(gcd(10, 0), 10);
368 }
369
370 #[test]
371 fn affine_add_sub() {
372 let iv = ValueId(100);
373 let a = AffineExpr { constant: 3, terms: vec![(2, iv)] };
374 let b = AffineExpr { constant: 1, terms: vec![(1, iv)] };
375 let sum = a.add(&b);
376 assert_eq!(sum.constant, 4);
377 assert_eq!(sum.terms.len(), 1);
378 assert_eq!(sum.terms[0], (3, iv));
379
380 let diff = a.sub(&b);
381 assert_eq!(diff.constant, 2);
382 assert_eq!(diff.terms.len(), 1);
383 assert_eq!(diff.terms[0], (1, iv));
384 }
385
386 #[test]
387 fn affine_scale() {
388 let iv = ValueId(100);
389 let a = AffineExpr { constant: 2, terms: vec![(3, iv)] };
390 let scaled = a.scale(4);
391 assert_eq!(scaled.constant, 8);
392 assert_eq!(scaled.terms[0], (12, iv));
393 }
394
395 #[test]
396 fn gcd_test_independent() {
397 // a(2i+1) vs a(2i+2): diff = 2i+2 - (2i+1) = 1 (constant).
398 // No IV terms in diff → dependent only if constant is 0.
399 // constant = 1 ≠ 0 → independent.
400 let iv = ValueId(100);
401 let ref_a = MemRef {
402 inst_id: ValueId(0), base: ValueId(50),
403 subscript: AffineExpr { constant: 1, terms: vec![(2, iv)] },
404 is_write: true,
405 };
406 let ref_b = MemRef {
407 inst_id: ValueId(1), base: ValueId(50),
408 subscript: AffineExpr { constant: 2, terms: vec![(2, iv)] },
409 is_write: false,
410 };
411 let dep = test_dependence(&ref_a, &ref_b);
412 assert!(!dep.dependent, "a(2i+1) and a(2i+2) should be independent");
413 }
414
415 #[test]
416 fn gcd_test_dependent() {
417 // a(i) vs a(i): same subscript → always dependent.
418 let iv = ValueId(100);
419 let ref_a = MemRef {
420 inst_id: ValueId(0), base: ValueId(50),
421 subscript: AffineExpr { constant: 0, terms: vec![(1, iv)] },
422 is_write: true,
423 };
424 let ref_b = MemRef {
425 inst_id: ValueId(1), base: ValueId(50),
426 subscript: AffineExpr { constant: 0, terms: vec![(1, iv)] },
427 is_write: false,
428 };
429 let dep = test_dependence(&ref_a, &ref_b);
430 assert!(dep.dependent, "a(i) and a(i) should be dependent");
431 }
432
433 #[test]
434 fn gcd_test_different_stride() {
435 // a(3i) vs a(3i+1): diff has constant 1, gcd of coefficients = 0
436 // (they cancel: 3-3=0). So diff = constant 1, no terms → independent.
437 let iv = ValueId(100);
438 let ref_a = MemRef {
439 inst_id: ValueId(0), base: ValueId(50),
440 subscript: AffineExpr { constant: 0, terms: vec![(3, iv)] },
441 is_write: true,
442 };
443 let ref_b = MemRef {
444 inst_id: ValueId(1), base: ValueId(50),
445 subscript: AffineExpr { constant: 1, terms: vec![(3, iv)] },
446 is_write: false,
447 };
448 let dep = test_dependence(&ref_a, &ref_b);
449 assert!(!dep.dependent, "a(3i) and a(3i+1) should be independent by GCD test");
450 }
451
452 #[test]
453 fn distinct_bases_independent() {
454 let iv = ValueId(100);
455 let ref_a = MemRef {
456 inst_id: ValueId(0), base: ValueId(50),
457 subscript: AffineExpr { constant: 0, terms: vec![(1, iv)] },
458 is_write: true,
459 };
460 let ref_b = MemRef {
461 inst_id: ValueId(1), base: ValueId(60), // different base
462 subscript: AffineExpr { constant: 0, terms: vec![(1, iv)] },
463 is_write: false,
464 };
465 let dep = test_dependence(&ref_a, &ref_b);
466 assert!(!dep.dependent, "distinct bases should be independent (Fortran no-alias)");
467 }
468 }
469