Rust · 16070 bytes Raw Blame History
1 //! Optimization-level → pass pipeline mapping.
2 //!
3 //! `OptLevel` is what the driver hands us; `build_pipeline` returns a
4 //! configured `PassManager`. Adding a new pass to a level is a one-line
5 //! change here, which keeps the dispatch logic in one place.
6
7 use super::bce::Bce;
8 use super::call_resolve::CallResolve;
9 use super::const_arg::ConstArgSpecialize;
10 use super::const_fold::ConstFold;
11 use super::const_prop::ConstProp;
12 use super::cse::LocalCse;
13 use super::dce::Dce;
14 use super::dead_arg::DeadArgElim;
15 use super::dead_func::DeadFuncElim;
16 use super::dse::Dse;
17 use super::fast_math::FastMathReassoc;
18 use super::fission::LoopFission;
19 use super::fusion::LoopFusion;
20 use super::global_lsf::GlobalLsf;
21 use super::gvn::Gvn;
22 use super::inline::Inline;
23 use super::interchange::LoopInterchange;
24 use super::licm::Licm;
25 use super::lsf::LocalLsf;
26 use super::mem2reg::Mem2Reg;
27 use super::pass::PassManager;
28 use super::peel::LoopPeel;
29 use super::preheader::PreheaderInsert;
30 use super::return_prop::ReturnPropagate;
31 use super::simplify_cfg::SimplifyCfg;
32 use super::sroa::Sroa;
33 use super::strength_reduce::StrengthReduce;
34 use super::unroll::LoopUnroll;
35 use super::unswitch::LoopUnswitch;
36 use super::vectorize::Vectorize;
37
38 /// Compiler optimization levels.
39 ///
40 /// Mirrors `gfortran` / `clang` semantics so users have no surprises.
41 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
42 pub enum OptLevel {
43 /// `-O0` — no optimization. Default during development.
44 O0,
45 /// `-O1` — constant folding, DCE, basic CSE, copy propagation.
46 O1,
47 /// `-O2` — `-O1` plus LICM, small inlining, strength reduction,
48 /// bounds-check elimination, GVN, SROA, dead store elim, small loop
49 /// unrolling, FMA fusion.
50 O2,
51 /// `-O3` — `-O2` plus aggressive inlining, NEON vectorization,
52 /// loop interchange/fusion/fission, IPO, devirtualization,
53 /// whole-program analysis, speculative optimizations.
54 O3,
55 /// `-Os` — like `-O2` but prefer code size (no unrolling, less inlining).
56 Os,
57 /// `-Ofast` — `-O3` plus fast-math (reassociation, no NaN/Inf, recip).
58 Ofast,
59 }
60
61 impl OptLevel {
62 /// Parse the textual flag (`O0`, `O1`, ..., `Ofast`).
63 pub fn parse_flag(s: &str) -> Option<Self> {
64 match s {
65 "O0" | "0" => Some(Self::O0),
66 "O1" | "1" => Some(Self::O1),
67 "O2" | "2" => Some(Self::O2),
68 "O3" | "3" => Some(Self::O3),
69 "Os" | "s" => Some(Self::Os),
70 "Ofast" | "fast" => Some(Self::Ofast),
71 _ => None,
72 }
73 }
74
75 pub fn flag_name(self) -> &'static str {
76 match self {
77 Self::O0 => "-O0",
78 Self::O1 => "-O1",
79 Self::O2 => "-O2",
80 Self::O3 => "-O3",
81 Self::Os => "-Os",
82 Self::Ofast => "-Ofast",
83 }
84 }
85
86 /// Does this level enable inlining?
87 ///
88 /// Audit Min-6: this predicate is currently consulted only by the
89 /// pipeline test harness. Once `Inline` lands as a pass, the
90 /// builder below will gate registration on this. Same for the
91 /// other two predicates.
92 pub fn inlining(self) -> bool {
93 matches!(
94 self,
95 Self::O1 | Self::O2 | Self::O3 | Self::Os | Self::Ofast
96 )
97 }
98
99 /// Does this level enable loop vectorization (NEON)?
100 pub fn vectorize(self) -> bool {
101 matches!(self, Self::O3 | Self::Ofast)
102 }
103
104 /// Does this level allow value-changing fast-math reassociation
105 /// (`-Ofast`-only — relaxes IEEE 754 strictness for FAdd/FMul
106 /// reordering, signed-zero collapse, etc.)?
107 pub fn fast_math(self) -> bool {
108 matches!(self, Self::Ofast)
109 }
110 }
111
112 /// Build the pass pipeline for a given optimization level.
113 ///
114 /// Adding a new optimization pass is a single push here. Keeping this
115 /// in one function makes it trivial to audit which passes run at which
116 /// level.
117 pub fn build_pipeline(level: OptLevel) -> PassManager {
118 let mut pm = PassManager::new();
119 match level {
120 OptLevel::O0 => {
121 // Nothing — preserve unoptimized IR exactly as it was lowered.
122 }
123 OptLevel::O1 => {
124 // Cheap, always-correct cleanup.
125 //
126 // Mem2reg runs FIRST so every downstream pass sees SSA
127 // values instead of alloca/load/store round-trips.
128 // Without it, const_fold can't propagate constants
129 // through local variables, CSE can't dedupe across
130 // store/load pairs, and LICM is effectively dormant
131 // (loads block every hoist attempt).
132 pm.add(Box::new(CallResolve));
133 pm.add(Box::new(Mem2Reg));
134 pm.add(Box::new(ConstFold));
135 pm.add(Box::new(Inline::for_level(OptLevel::O1)));
136 pm.add(Box::new(ConstArgSpecialize));
137 pm.add(Box::new(DeadArgElim));
138 pm.add(Box::new(ReturnPropagate));
139 pm.add(Box::new(SimplifyCfg));
140 pm.add(Box::new(DeadFuncElim));
141 pm.add(Box::new(LocalLsf));
142 pm.add(Box::new(LocalCse));
143 pm.add(Box::new(ConstProp));
144 pm.add(Box::new(Dce));
145 }
146 OptLevel::O2 => {
147 // O1 plus LICM, strength reduction, DSE, LSF, loop transforms.
148 pm.add(Box::new(CallResolve));
149 pm.add(Box::new(Mem2Reg));
150 pm.add(Box::new(ConstFold));
151 pm.add(Box::new(Sroa)); // after SSA + const fold (GCC pattern)
152 pm.add(Box::new(Mem2Reg)); // re-promote SROA-created scalar allocas
153 pm.add(Box::new(Inline::for_level(OptLevel::O2)));
154 pm.add(Box::new(ConstArgSpecialize));
155 pm.add(Box::new(DeadArgElim));
156 pm.add(Box::new(ReturnPropagate));
157 pm.add(Box::new(SimplifyCfg));
158 pm.add(Box::new(DeadFuncElim));
159 pm.add(Box::new(Bce));
160 pm.add(Box::new(StrengthReduce));
161 pm.add(Box::new(LocalLsf));
162 pm.add(Box::new(GlobalLsf));
163 pm.add(Box::new(LocalCse));
164 pm.add(Box::new(PreheaderInsert));
165 pm.add(Box::new(LoopPeel));
166 pm.add(Box::new(LoopUnswitch));
167 pm.add(Box::new(Licm));
168 pm.add(Box::new(ConstProp));
169 pm.add(Box::new(Dse));
170 pm.add(Box::new(LoopInterchange));
171 pm.add(Box::new(LoopFission));
172 pm.add(Box::new(LoopFusion));
173 pm.add(Box::new(LoopUnroll));
174 pm.add(Box::new(Gvn)); // after loop passes to avoid SSA conflicts
175 pm.add(Box::new(Dce));
176 }
177 OptLevel::Os => {
178 // Like O2 but no loop unrolling (prefer code size).
179 pm.add(Box::new(CallResolve));
180 pm.add(Box::new(Mem2Reg));
181 pm.add(Box::new(ConstFold));
182 pm.add(Box::new(Sroa));
183 pm.add(Box::new(Mem2Reg));
184 pm.add(Box::new(Inline::for_level(OptLevel::Os)));
185 pm.add(Box::new(ConstArgSpecialize));
186 pm.add(Box::new(DeadArgElim));
187 pm.add(Box::new(ReturnPropagate));
188 pm.add(Box::new(SimplifyCfg));
189 pm.add(Box::new(DeadFuncElim));
190 pm.add(Box::new(Bce));
191 pm.add(Box::new(StrengthReduce));
192 pm.add(Box::new(LocalLsf));
193 pm.add(Box::new(GlobalLsf));
194 pm.add(Box::new(LocalCse));
195 pm.add(Box::new(PreheaderInsert));
196 pm.add(Box::new(LoopPeel));
197 pm.add(Box::new(LoopUnswitch));
198 pm.add(Box::new(Licm));
199 pm.add(Box::new(ConstProp));
200 pm.add(Box::new(Dse));
201 pm.add(Box::new(LoopInterchange));
202 pm.add(Box::new(Gvn));
203 pm.add(Box::new(Dce));
204 }
205 OptLevel::O3 => {
206 // O2 passes + loop unrolling + interchange.
207 pm.add(Box::new(CallResolve));
208 pm.add(Box::new(Mem2Reg));
209 pm.add(Box::new(ConstFold));
210 pm.add(Box::new(Sroa));
211 pm.add(Box::new(Mem2Reg));
212 pm.add(Box::new(Inline::for_level(OptLevel::O3)));
213 pm.add(Box::new(ConstArgSpecialize));
214 pm.add(Box::new(DeadArgElim));
215 pm.add(Box::new(ReturnPropagate));
216 pm.add(Box::new(SimplifyCfg));
217 pm.add(Box::new(DeadFuncElim));
218 pm.add(Box::new(Bce));
219 pm.add(Box::new(StrengthReduce));
220 pm.add(Box::new(LocalLsf));
221 pm.add(Box::new(GlobalLsf));
222 pm.add(Box::new(LocalCse));
223 pm.add(Box::new(PreheaderInsert));
224 pm.add(Box::new(LoopPeel));
225 pm.add(Box::new(LoopUnswitch));
226 pm.add(Box::new(Licm));
227 pm.add(Box::new(ConstProp));
228 pm.add(Box::new(Dse));
229 pm.add(Box::new(LoopInterchange));
230 pm.add(Box::new(LoopFission));
231 pm.add(Box::new(LoopFusion));
232 pm.add(Box::new(Vectorize));
233 pm.add(Box::new(LoopUnroll));
234 pm.add(Box::new(Gvn)); // keep O3/Ofast aligned with O2/Os value numbering
235 pm.add(Box::new(Dce));
236 }
237 OptLevel::Ofast => {
238 // O3 plus Ofast-only fast-math reassociation.
239 pm.add(Box::new(CallResolve));
240 pm.add(Box::new(Mem2Reg));
241 pm.add(Box::new(ConstFold));
242 pm.add(Box::new(Sroa));
243 pm.add(Box::new(Mem2Reg));
244 pm.add(Box::new(Inline::for_level(OptLevel::O3)));
245 pm.add(Box::new(ConstArgSpecialize));
246 pm.add(Box::new(DeadArgElim));
247 pm.add(Box::new(ReturnPropagate));
248 pm.add(Box::new(SimplifyCfg));
249 pm.add(Box::new(DeadFuncElim));
250 pm.add(Box::new(Bce));
251 pm.add(Box::new(StrengthReduce));
252 pm.add(Box::new(LocalLsf));
253 pm.add(Box::new(GlobalLsf));
254 pm.add(Box::new(LocalCse));
255 pm.add(Box::new(PreheaderInsert));
256 pm.add(Box::new(LoopPeel));
257 pm.add(Box::new(LoopUnswitch));
258 pm.add(Box::new(Licm));
259 pm.add(Box::new(ConstProp));
260 pm.add(Box::new(Dse));
261 pm.add(Box::new(LoopInterchange));
262 pm.add(Box::new(LoopFission));
263 pm.add(Box::new(LoopFusion));
264 pm.add(Box::new(Vectorize));
265 pm.add(Box::new(LoopUnroll));
266 pm.add(Box::new(FastMathReassoc));
267 pm.add(Box::new(Gvn));
268 pm.add(Box::new(Dce));
269 }
270 }
271 pm
272 }
273
274 /// Build the restricted optimization pipeline for modules that still contain
275 /// non-global `i128` values.
276 ///
277 /// This deliberately widens `i128` support one optimization lane at a time.
278 /// Now that the backend can carry stack-backed `i128` values through block
279 /// params and mem2reg-style joins, the widened `i128` lane can use the full
280 /// ordinary O1/O2/O3/Os/Ofast pipelines. Higher levels remain gated until their
281 /// pass shapes are proven end to end.
282 pub fn build_i128_pipeline(level: OptLevel) -> Option<PassManager> {
283 match level {
284 OptLevel::O1 => Some(build_pipeline(OptLevel::O1)),
285 OptLevel::O2 => Some(build_pipeline(OptLevel::O2)),
286 OptLevel::O3 => Some(build_pipeline(OptLevel::O3)),
287 OptLevel::Os => Some(build_pipeline(OptLevel::Os)),
288 OptLevel::Ofast => Some(build_pipeline(OptLevel::Ofast)),
289 _ => None,
290 }
291 }
292
293 #[cfg(test)]
294 mod tests {
295 use super::*;
296
297 #[test]
298 fn parse_flags() {
299 assert_eq!(OptLevel::parse_flag("O0"), Some(OptLevel::O0));
300 assert_eq!(OptLevel::parse_flag("Os"), Some(OptLevel::Os));
301 assert_eq!(OptLevel::parse_flag("O3"), Some(OptLevel::O3));
302 assert_eq!(OptLevel::parse_flag("Ofast"), Some(OptLevel::Ofast));
303 assert_eq!(OptLevel::parse_flag("O9"), None);
304 }
305
306 #[test]
307 fn level_predicates() {
308 assert!(!OptLevel::O0.inlining());
309 assert!(OptLevel::O2.inlining());
310 assert!(OptLevel::O3.vectorize());
311 assert!(!OptLevel::O2.vectorize());
312 assert!(OptLevel::Ofast.fast_math());
313 assert!(!OptLevel::O3.fast_math());
314 }
315
316 #[test]
317 fn pipelines_build() {
318 // O0 has no passes; every other level has at least one.
319 assert!(build_pipeline(OptLevel::O0).is_empty());
320 for lvl in [
321 OptLevel::O1,
322 OptLevel::O2,
323 OptLevel::O3,
324 OptLevel::Os,
325 OptLevel::Ofast,
326 ] {
327 let pm = build_pipeline(lvl);
328 assert!(
329 !pm.is_empty(),
330 "pipeline {:?} should have at least one pass",
331 lvl
332 );
333 }
334 }
335
336 #[test]
337 fn higher_optimization_levels_keep_gvn_enabled() {
338 for lvl in [OptLevel::O2, OptLevel::O3, OptLevel::Os, OptLevel::Ofast] {
339 let pm = build_pipeline(lvl);
340 let names = pm.pass_names();
341 assert!(
342 names.contains(&"gvn"),
343 "pipeline {:?} should include gvn, got {:?}",
344 lvl,
345 names
346 );
347 }
348 }
349
350 #[test]
351 fn ofast_enables_fast_math_reassoc_but_o3_does_not() {
352 let o3 = build_pipeline(OptLevel::O3).pass_names();
353 let ofast = build_pipeline(OptLevel::Ofast).pass_names();
354 assert!(
355 !o3.contains(&"fast-math-reassoc"),
356 "O3 should stay strict, got {:?}",
357 o3
358 );
359 assert!(
360 ofast.contains(&"fast-math-reassoc"),
361 "Ofast should include fast-math reassociation, got {:?}",
362 ofast
363 );
364 }
365
366 #[test]
367 fn vectorize_is_enabled_only_at_o3_and_above() {
368 let o2 = build_pipeline(OptLevel::O2).pass_names();
369 let o3 = build_pipeline(OptLevel::O3).pass_names();
370 let ofast = build_pipeline(OptLevel::Ofast).pass_names();
371
372 assert!(
373 !o2.contains(&"vectorize"),
374 "O2 should not include vectorize, got {:?}",
375 o2
376 );
377 assert!(
378 o3.contains(&"vectorize"),
379 "O3 should include vectorize, got {:?}",
380 o3
381 );
382 assert!(
383 ofast.contains(&"vectorize"),
384 "Ofast should include vectorize, got {:?}",
385 ofast
386 );
387 }
388
389 #[test]
390 fn i128_pipeline_is_available_through_ofast() {
391 assert!(
392 build_i128_pipeline(OptLevel::O1).is_some(),
393 "O1 should have the widened i128-safe pipeline"
394 );
395 assert!(
396 build_i128_pipeline(OptLevel::O2).is_some(),
397 "O2 should be available once the widened i128 lane is proven"
398 );
399 for lvl in [OptLevel::O3, OptLevel::Os, OptLevel::Ofast] {
400 assert!(
401 build_i128_pipeline(lvl).is_some(),
402 "{:?} should be available once the widened i128 lane is proven",
403 lvl
404 );
405 }
406 for lvl in [OptLevel::O0] {
407 assert!(
408 build_i128_pipeline(lvl).is_none(),
409 "{:?} should not yet have widened i128 optimization support",
410 lvl
411 );
412 }
413 }
414
415 #[test]
416 fn i128_pipeline_matches_full_o1() {
417 let wide = build_i128_pipeline(OptLevel::O1)
418 .expect("O1 should expose the widened i128 pipeline")
419 .pass_names();
420 let full = build_pipeline(OptLevel::O1).pass_names();
421 assert_eq!(
422 wide, full,
423 "the widened i128 O1 lane should stay aligned with the ordinary O1 pipeline"
424 );
425 }
426
427 #[test]
428 fn i128_pipeline_matches_full_higher_levels() {
429 for lvl in [OptLevel::O2, OptLevel::O3, OptLevel::Os, OptLevel::Ofast] {
430 let wide = build_i128_pipeline(lvl)
431 .expect("level should expose the widened i128 pipeline")
432 .pass_names();
433 let full = build_pipeline(lvl).pass_names();
434 assert_eq!(
435 wide, full,
436 "the widened i128 lane should stay aligned with the ordinary {:?} pipeline",
437 lvl
438 );
439 }
440 }
441 }
442