Rust · 4254 bytes Raw Blame History
1 use std::collections::BTreeSet;
2 use std::path::PathBuf;
3
4 use armfortas::driver::OptLevel;
5 use armfortas::testing::{capture_from_path, CaptureRequest, CapturedStage, Stage};
6
7 fn fixture(name: &str) -> PathBuf {
8 let path = PathBuf::from("test_programs").join(name);
9 assert!(path.exists(), "missing test fixture {}", path.display());
10 path
11 }
12
13 fn capture_text(request: CaptureRequest, stage: Stage) -> String {
14 let result = capture_from_path(&request).expect("capture should succeed");
15 match result.get(stage) {
16 Some(CapturedStage::Text(text)) => text.clone(),
17 Some(CapturedStage::Run(_)) => panic!("expected text stage for {}", stage.as_str()),
18 None => panic!("missing requested stage {}", stage.as_str()),
19 }
20 }
21
22 fn function_slice<'a>(ir: &'a str, name: &str) -> &'a str {
23 let marker = format!("func @{}(", name);
24 let start = ir
25 .find(&marker)
26 .unwrap_or_else(|| panic!("missing function {} in IR:\n{}", name, ir));
27 let rest = &ir[start..];
28 let end = rest.find("\n func @").unwrap_or(rest.len());
29 &rest[..end]
30 }
31
32 #[test]
33 fn three_point_apply_scalarizes_coeffs_and_removes_safe_stencil_checks_at_o2() {
34 let source = fixture("realworld_three_point_apply.f90");
35
36 let raw_ir = capture_text(
37 CaptureRequest {
38 input: source.clone(),
39 requested: BTreeSet::from([Stage::Ir]),
40 opt_level: OptLevel::O0,
41 },
42 Stage::Ir,
43 );
44 let opt_ir = capture_text(
45 CaptureRequest {
46 input: source,
47 requested: BTreeSet::from([Stage::OptIr]),
48 opt_level: OptLevel::O2,
49 },
50 Stage::OptIr,
51 );
52
53 let raw_apply = function_slice(&raw_ir, "apply");
54 let opt_apply = function_slice(&opt_ir, "apply");
55
56 assert!(
57 raw_apply.contains("alloca [i32 x 3]"),
58 "raw IR should still materialize coeffs(3) as an aggregate before SROA:\n{}",
59 raw_apply
60 );
61 assert!(
62 raw_apply.contains("rt_call @__afs_check_bounds"),
63 "raw IR should still contain stencil bounds checks before BCE:\n{}",
64 raw_apply
65 );
66 assert!(
67 !opt_apply.contains("alloca [i32 x 3]"),
68 "O2 optimized IR should scalarize/remove coeffs(3):\n{}",
69 opt_apply
70 );
71 assert!(
72 !opt_apply.contains("rt_call @__afs_check_bounds"),
73 "O2 optimized IR should eliminate safe stencil bounds checks:\n{}",
74 opt_apply
75 );
76 }
77
78 #[test]
79 fn sasum_cleanup_eliminates_chunked_loop_bounds_checks_at_o2() {
80 let source = fixture("realworld_sasum_cleanup.f90");
81
82 let raw_ir = capture_text(
83 CaptureRequest {
84 input: source.clone(),
85 requested: BTreeSet::from([Stage::Ir]),
86 opt_level: OptLevel::O0,
87 },
88 Stage::Ir,
89 );
90 let opt_ir = capture_text(
91 CaptureRequest {
92 input: source,
93 requested: BTreeSet::from([Stage::OptIr]),
94 opt_level: OptLevel::O2,
95 },
96 Stage::OptIr,
97 );
98
99 assert!(
100 raw_ir.contains("rt_call @__afs_check_bounds"),
101 "raw IR should contain chunked-loop bounds checks before BCE:\n{}",
102 raw_ir
103 );
104 assert!(
105 !opt_ir.contains("rt_call @__afs_check_bounds"),
106 "O2 optimized IR should eliminate safe chunked-loop bounds checks:\n{}",
107 opt_ir
108 );
109 }
110
111 #[test]
112 fn realworld_29_8_kernels_have_deterministic_o2_objects() {
113 for name in [
114 "realworld_sasum_cleanup.f90",
115 "realworld_three_point_apply.f90",
116 ] {
117 let source = fixture(name);
118 let first = capture_text(
119 CaptureRequest {
120 input: source.clone(),
121 requested: BTreeSet::from([Stage::Obj]),
122 opt_level: OptLevel::O2,
123 },
124 Stage::Obj,
125 );
126 let second = capture_text(
127 CaptureRequest {
128 input: source,
129 requested: BTreeSet::from([Stage::Obj]),
130 opt_level: OptLevel::O2,
131 },
132 Stage::Obj,
133 );
134
135 assert_eq!(
136 first, second,
137 "real-world 29.8 audit kernel should have deterministic O2 object snapshot for {}",
138 name
139 );
140 }
141 }
142