Rust · 8847 bytes Raw Blame History
1 use std::collections::BTreeSet;
2 use std::path::PathBuf;
3
4 use armfortas::driver::OptLevel;
5 use armfortas::testing::{capture_from_path, CaptureRequest, CapturedStage, Stage};
6
7 fn fixture(name: &str) -> PathBuf {
8 let path = PathBuf::from("test_programs").join(name);
9 assert!(path.exists(), "missing test fixture {}", path.display());
10 path
11 }
12
13 fn capture_text(request: CaptureRequest, stage: Stage) -> String {
14 let result = capture_from_path(&request).expect("capture should succeed");
15 match result.get(stage) {
16 Some(CapturedStage::Text(text)) => text.clone(),
17 Some(CapturedStage::Run(_)) => panic!("expected text stage for {}", stage.as_str()),
18 None => panic!("missing requested stage {}", stage.as_str()),
19 }
20 }
21
22 fn function_section<'a>(ir: &'a str, name: &str) -> &'a str {
23 let header = format!(" func @{}", name);
24 let start = ir
25 .find(&header)
26 .unwrap_or_else(|| panic!("missing function section for {}", name));
27 let rest = &ir[start..];
28 let end = rest
29 .find("\n }\n")
30 .unwrap_or_else(|| panic!("unterminated function section for {}", name));
31 &rest[..end + "\n }".len()]
32 }
33
34 fn function_sections(ir: &str) -> Vec<&str> {
35 ir.match_indices(" func @")
36 .map(|(idx, _)| {
37 let rest = &ir[idx..];
38 let end = rest
39 .find("\n }\n")
40 .unwrap_or_else(|| panic!("unterminated function section in:\n{}", rest));
41 &rest[..end + "\n }".len()]
42 })
43 .collect()
44 }
45
46 fn function_name<'a>(func_section: &'a str) -> &'a str {
47 let header = func_section.lines().next().expect("function header").trim();
48 let rest = header
49 .strip_prefix("func @")
50 .expect("function header prefix");
51 let end = rest
52 .find(|ch: char| ch == ' ' || ch == '(')
53 .unwrap_or(rest.len());
54 &rest[..end]
55 }
56
57 fn param_count(func_section: &str) -> usize {
58 let header = func_section.lines().next().expect("function header");
59 let inside = header
60 .split_once('(')
61 .and_then(|(_, tail)| tail.split_once(") ->"))
62 .map(|(params, _)| params.trim())
63 .expect("function header params");
64 if inside.is_empty() {
65 0
66 } else {
67 inside.split(", ").count()
68 }
69 }
70
71 #[test]
72 fn o0_realworld_elemental_stage_proves_elemental_and_concurrent_lowering() {
73 let source = fixture("realworld_elemental_stage.f90");
74
75 let raw_ir = capture_text(
76 CaptureRequest {
77 input: source,
78 requested: BTreeSet::from([Stage::Ir]),
79 opt_level: OptLevel::O0,
80 },
81 Stage::Ir,
82 );
83 let raw_sections = function_sections(&raw_ir);
84 assert_eq!(
85 raw_sections.len(),
86 2,
87 "raw IR should include the program body plus one scalar ELEMENTAL helper:\n{}",
88 raw_ir
89 );
90 let scalar_body_name = function_name(raw_sections[1]);
91
92 assert!(
93 raw_ir.contains("doconc_check_"),
94 "whole-array ELEMENTAL lowering should still synthesize a DO CONCURRENT loop:\n{}",
95 raw_ir
96 );
97 assert!(
98 raw_ir.contains(&format!("call @{}(", scalar_body_name)),
99 "raw IR should still call the scalar ELEMENTAL body per element:\n{}",
100 raw_ir
101 );
102 assert!(
103 raw_ir.contains("call @afs_array_add_i32("),
104 "the clean DO CONCURRENT combine should redirect through the bulk runtime kernel:\n{}",
105 raw_ir
106 );
107 }
108
109 #[test]
110 fn o2_realworld_ipo_chain_trims_dead_arg_and_removes_trivial_wrapper() {
111 let source = fixture("realworld_ipo_chain.f90");
112
113 let raw_ir = capture_text(
114 CaptureRequest {
115 input: source.clone(),
116 requested: BTreeSet::from([Stage::Ir]),
117 opt_level: OptLevel::O0,
118 },
119 Stage::Ir,
120 );
121 let opt_ir = capture_text(
122 CaptureRequest {
123 input: source.clone(),
124 requested: BTreeSet::from([Stage::OptIr, Stage::Obj]),
125 opt_level: OptLevel::O2,
126 },
127 Stage::OptIr,
128 );
129 let obj_a = capture_text(
130 CaptureRequest {
131 input: source.clone(),
132 requested: BTreeSet::from([Stage::Obj]),
133 opt_level: OptLevel::O2,
134 },
135 Stage::Obj,
136 );
137 let obj_b = capture_text(
138 CaptureRequest {
139 input: source,
140 requested: BTreeSet::from([Stage::Obj]),
141 opt_level: OptLevel::O2,
142 },
143 Stage::Obj,
144 );
145
146 let raw_sections = function_sections(&raw_ir);
147 assert_eq!(
148 raw_sections.len(),
149 5,
150 "raw IR should still include accumulate, emit_value, passthrough, and mix_step helpers:\n{}",
151 raw_ir
152 );
153 let raw_wrapper = raw_sections[3];
154 let raw_wrapper_name = function_name(raw_wrapper);
155 let raw_mix = raw_sections[4];
156 let raw_mix_name = function_name(raw_mix);
157 assert_eq!(
158 param_count(raw_mix),
159 3,
160 "raw helper should keep the live arg, constant arg, and dead arg before IPO:\n{}",
161 raw_mix
162 );
163 assert!(
164 param_count(raw_wrapper) == 1,
165 "raw IR should still materialize the trivial wrapper helper:\n{}",
166 raw_ir
167 );
168
169 if opt_ir.contains(&format!("func @{}", raw_mix_name)) {
170 let opt_mix = function_section(&opt_ir, raw_mix_name);
171 assert_eq!(
172 param_count(opt_mix),
173 2,
174 "optimized helper should at least trim the dead dummy from the real-world helper chain:\n{}",
175 opt_mix
176 );
177 }
178 assert!(
179 !opt_ir.contains(&format!("func @{}", raw_wrapper_name)),
180 "optimized IR should remove the trivial wrapper helper:\n{}",
181 opt_ir
182 );
183 assert_eq!(
184 obj_a, obj_b,
185 "IPO-audited O2 object snapshot should stay deterministic"
186 );
187 }
188
189 #[test]
190 fn o2_unrolls_realworld_small_do_concurrent_kernel() {
191 let source = fixture("realworld_doconc_square.f90");
192
193 let raw_ir = capture_text(
194 CaptureRequest {
195 input: source.clone(),
196 requested: BTreeSet::from([Stage::Ir]),
197 opt_level: OptLevel::O0,
198 },
199 Stage::Ir,
200 );
201 let opt_ir = capture_text(
202 CaptureRequest {
203 input: source,
204 requested: BTreeSet::from([Stage::OptIr]),
205 opt_level: OptLevel::O2,
206 },
207 Stage::OptIr,
208 );
209
210 assert!(
211 raw_ir.contains("doconc_check_")
212 && raw_ir.contains("doconc_body_")
213 && raw_ir.contains("doconc_incr_"),
214 "raw IR should preserve the real-world DO CONCURRENT loop identity:\n{}",
215 raw_ir
216 );
217 assert!(
218 !opt_ir.contains("doconc_check_") && !opt_ir.contains("doconc_body_"),
219 "O2 should exploit the small real-world DO CONCURRENT loop enough to erase the loop shape:\n{}",
220 opt_ir
221 );
222 }
223
224 #[test]
225 fn o3_vectorizes_realworld_explicit_do_stage() {
226 let source = fixture("realworld_vector_stage.f90");
227
228 let o2_ir = capture_text(
229 CaptureRequest {
230 input: source.clone(),
231 requested: BTreeSet::from([Stage::OptIr]),
232 opt_level: OptLevel::O2,
233 },
234 Stage::OptIr,
235 );
236 let o3_ir = capture_text(
237 CaptureRequest {
238 input: source.clone(),
239 requested: BTreeSet::from([Stage::OptIr, Stage::Asm, Stage::Obj]),
240 opt_level: OptLevel::O3,
241 },
242 Stage::OptIr,
243 );
244 let o3_asm = capture_text(
245 CaptureRequest {
246 input: source.clone(),
247 requested: BTreeSet::from([Stage::Asm]),
248 opt_level: OptLevel::O3,
249 },
250 Stage::Asm,
251 );
252 let o3_obj_a = capture_text(
253 CaptureRequest {
254 input: source.clone(),
255 requested: BTreeSet::from([Stage::Obj]),
256 opt_level: OptLevel::O3,
257 },
258 Stage::Obj,
259 );
260 let o3_obj_b = capture_text(
261 CaptureRequest {
262 input: source,
263 requested: BTreeSet::from([Stage::Obj]),
264 opt_level: OptLevel::O3,
265 },
266 Stage::Obj,
267 );
268
269 assert!(
270 o2_ir.matches("do_check_").count() >= 2 && !o2_ir.contains("call @afs_array_add_i32("),
271 "O2 should still keep the explicit scalar loop for this real-world stage:\n{}",
272 o2_ir
273 );
274 assert!(
275 o3_ir.contains("call @afs_array_add_i32(")
276 && o3_ir.matches("do_check_").count() < o2_ir.matches("do_check_").count(),
277 "O3 should redirect the real-world explicit DO loop to the bulk add kernel:\n{}",
278 o3_ir
279 );
280 assert!(
281 o3_asm.contains("_afs_array_add_i32"),
282 "vectorized O3 assembly should reference the bulk add kernel:\n{}",
283 o3_asm
284 );
285 assert_eq!(
286 o3_obj_a, o3_obj_b,
287 "vectorized O3 object snapshot should stay deterministic"
288 );
289 }
290