Rust · 5151 bytes Raw Blame History
1 use std::collections::BTreeSet;
2 use std::path::PathBuf;
3
4 use armfortas::driver::OptLevel;
5 use armfortas::testing::{capture_from_path, CaptureRequest, CapturedStage, Stage};
6
7 fn fixture(name: &str) -> PathBuf {
8 let path = PathBuf::from("test_programs").join(name);
9 assert!(path.exists(), "missing test fixture {}", path.display());
10 path
11 }
12
13 fn capture_text(request: CaptureRequest, stage: Stage) -> String {
14 let result = capture_from_path(&request).expect("capture should succeed");
15 match result.get(stage) {
16 Some(CapturedStage::Text(text)) => text.clone(),
17 Some(CapturedStage::Run(_)) => panic!("expected text stage for {}", stage.as_str()),
18 None => panic!("missing requested stage {}", stage.as_str()),
19 }
20 }
21
22 fn function_slice<'a>(ir: &'a str, name: &str) -> &'a str {
23 let marker = format!("func @{}(", name);
24 let start = ir
25 .find(&marker)
26 .unwrap_or_else(|| panic!("missing function {} in IR:\n{}", name, ir));
27 let rest = &ir[start..];
28 let end = rest.find("\n func @").unwrap_or(rest.len());
29 &rest[..end]
30 }
31
32 fn function_sections(ir: &str) -> Vec<&str> {
33 ir.match_indices(" func @")
34 .map(|(idx, _)| {
35 let rest = &ir[idx..];
36 let end = rest.find("\n func @").unwrap_or(rest.len());
37 &rest[..end]
38 })
39 .collect()
40 }
41
42 fn function_name<'a>(func_section: &'a str) -> &'a str {
43 let header = func_section.lines().next().expect("function header").trim();
44 let rest = header
45 .strip_prefix("func @")
46 .expect("function header prefix");
47 let end = rest
48 .find(|ch: char| ch == ' ' || ch == '(')
49 .unwrap_or(rest.len());
50 &rest[..end]
51 }
52
53 #[test]
54 fn three_point_apply_scalarizes_coeffs_and_removes_safe_stencil_checks_at_o2() {
55 let source = fixture("realworld_three_point_apply.f90");
56
57 let raw_ir = capture_text(
58 CaptureRequest {
59 input: source.clone(),
60 requested: BTreeSet::from([Stage::Ir]),
61 opt_level: OptLevel::O0,
62 },
63 Stage::Ir,
64 );
65 let opt_ir = capture_text(
66 CaptureRequest {
67 input: source,
68 requested: BTreeSet::from([Stage::OptIr]),
69 opt_level: OptLevel::O2,
70 },
71 Stage::OptIr,
72 );
73
74 let raw_sections = function_sections(&raw_ir);
75 assert_eq!(
76 raw_sections.len(),
77 2,
78 "raw IR should include the program body plus one contained apply helper:\n{}",
79 raw_ir
80 );
81 let helper_name = function_name(raw_sections[1]);
82 let raw_apply = function_slice(&raw_ir, helper_name);
83 let opt_apply = function_slice(&opt_ir, helper_name);
84
85 assert!(
86 raw_apply.contains("alloca [i32 x 3]"),
87 "raw IR should still materialize coeffs(3) as an aggregate before SROA:\n{}",
88 raw_apply
89 );
90 assert!(
91 raw_apply.contains("rt_call @__afs_check_bounds"),
92 "raw IR should still contain stencil bounds checks before BCE:\n{}",
93 raw_apply
94 );
95 assert!(
96 !opt_apply.contains("alloca [i32 x 3]"),
97 "O2 optimized IR should scalarize/remove coeffs(3):\n{}",
98 opt_apply
99 );
100 assert!(
101 !opt_apply.contains("rt_call @__afs_check_bounds"),
102 "O2 optimized IR should eliminate safe stencil bounds checks:\n{}",
103 opt_apply
104 );
105 }
106
107 #[test]
108 fn sasum_cleanup_eliminates_chunked_loop_bounds_checks_at_o2() {
109 let source = fixture("realworld_sasum_cleanup.f90");
110
111 let raw_ir = capture_text(
112 CaptureRequest {
113 input: source.clone(),
114 requested: BTreeSet::from([Stage::Ir]),
115 opt_level: OptLevel::O0,
116 },
117 Stage::Ir,
118 );
119 let opt_ir = capture_text(
120 CaptureRequest {
121 input: source,
122 requested: BTreeSet::from([Stage::OptIr]),
123 opt_level: OptLevel::O2,
124 },
125 Stage::OptIr,
126 );
127
128 assert!(
129 raw_ir.contains("rt_call @__afs_check_bounds"),
130 "raw IR should contain chunked-loop bounds checks before BCE:\n{}",
131 raw_ir
132 );
133 assert!(
134 !opt_ir.contains("rt_call @__afs_check_bounds"),
135 "O2 optimized IR should eliminate safe chunked-loop bounds checks:\n{}",
136 opt_ir
137 );
138 }
139
140 #[test]
141 fn realworld_29_8_kernels_have_deterministic_o2_objects() {
142 for name in [
143 "realworld_sasum_cleanup.f90",
144 "realworld_three_point_apply.f90",
145 ] {
146 let source = fixture(name);
147 let first = capture_text(
148 CaptureRequest {
149 input: source.clone(),
150 requested: BTreeSet::from([Stage::Obj]),
151 opt_level: OptLevel::O2,
152 },
153 Stage::Obj,
154 );
155 let second = capture_text(
156 CaptureRequest {
157 input: source,
158 requested: BTreeSet::from([Stage::Obj]),
159 opt_level: OptLevel::O2,
160 },
161 Stage::Obj,
162 );
163
164 assert_eq!(
165 first, second,
166 "real-world 29.8 audit kernel should have deterministic O2 object snapshot for {}",
167 name
168 );
169 }
170 }
171