Rust · 16657 bytes Raw Blame History
1 //! Optimization-level → pass pipeline mapping.
2 //!
3 //! `OptLevel` is what the driver hands us; `build_pipeline` returns a
4 //! configured `PassManager`. Adding a new pass to a level is a one-line
5 //! change here, which keeps the dispatch logic in one place.
6
7 use super::bce::Bce;
8 use super::call_resolve::CallResolve;
9 use super::const_arg::ConstArgSpecialize;
10 use super::const_fold::ConstFold;
11 use super::const_prop::ConstProp;
12 use super::cse::LocalCse;
13 use super::dce::Dce;
14 use super::dead_arg::DeadArgElim;
15 use super::dead_func::DeadFuncElim;
16 use super::dse::Dse;
17 use super::fast_math::FastMathReassoc;
18 use super::fission::LoopFission;
19 use super::fusion::LoopFusion;
20 use super::global_lsf::GlobalLsf;
21 use super::gvn::Gvn;
22 use super::inline::Inline;
23 use super::interchange::LoopInterchange;
24 use super::jump_thread::JumpThread;
25 use super::licm::Licm;
26 use super::lsf::LocalLsf;
27 use super::mem2reg::Mem2Reg;
28 use super::neon_vectorize::NeonVectorize;
29 use super::pass::PassManager;
30 use super::peel::LoopPeel;
31 use super::preheader::PreheaderInsert;
32 use super::return_prop::ReturnPropagate;
33 use super::sccp::Sccp_;
34 use super::simplify_cfg::SimplifyCfg;
35 use super::sroa::Sroa;
36 use super::strength_reduce::StrengthReduce;
37 use super::unroll::LoopUnroll;
38 use super::unswitch::LoopUnswitch;
39 use super::vectorize::Vectorize;
40
41 /// Compiler optimization levels.
42 ///
43 /// Mirrors `gfortran` / `clang` semantics so users have no surprises.
44 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
45 pub enum OptLevel {
46 /// `-O0` — no optimization. Default during development.
47 O0,
48 /// `-O1` — constant folding, DCE, basic CSE, copy propagation.
49 O1,
50 /// `-O2` — `-O1` plus LICM, small inlining, strength reduction,
51 /// bounds-check elimination, GVN, SROA, dead store elim, small loop
52 /// unrolling, FMA fusion.
53 O2,
54 /// `-O3` — `-O2` plus aggressive inlining, NEON vectorization,
55 /// loop interchange/fusion/fission, IPO, devirtualization,
56 /// whole-program analysis, speculative optimizations.
57 O3,
58 /// `-Os` — like `-O2` but prefer code size (no unrolling, less inlining).
59 Os,
60 /// `-Ofast` — `-O3` plus fast-math (reassociation, no NaN/Inf, recip).
61 Ofast,
62 }
63
64 impl OptLevel {
65 /// Parse the textual flag (`O0`, `O1`, ..., `Ofast`).
66 pub fn parse_flag(s: &str) -> Option<Self> {
67 match s {
68 "O0" | "0" => Some(Self::O0),
69 "O1" | "1" => Some(Self::O1),
70 "O2" | "2" => Some(Self::O2),
71 "O3" | "3" => Some(Self::O3),
72 "Os" | "s" => Some(Self::Os),
73 "Ofast" | "fast" => Some(Self::Ofast),
74 _ => None,
75 }
76 }
77
78 pub fn flag_name(self) -> &'static str {
79 match self {
80 Self::O0 => "-O0",
81 Self::O1 => "-O1",
82 Self::O2 => "-O2",
83 Self::O3 => "-O3",
84 Self::Os => "-Os",
85 Self::Ofast => "-Ofast",
86 }
87 }
88
89 /// Does this level enable inlining?
90 ///
91 /// Audit Min-6: this predicate is currently consulted only by the
92 /// pipeline test harness. Once `Inline` lands as a pass, the
93 /// builder below will gate registration on this. Same for the
94 /// other two predicates.
95 pub fn inlining(self) -> bool {
96 matches!(
97 self,
98 Self::O1 | Self::O2 | Self::O3 | Self::Os | Self::Ofast
99 )
100 }
101
102 /// Does this level enable loop vectorization (NEON)?
103 pub fn vectorize(self) -> bool {
104 matches!(self, Self::O3 | Self::Ofast)
105 }
106
107 /// Does this level allow value-changing fast-math reassociation
108 /// (`-Ofast`-only — relaxes IEEE 754 strictness for FAdd/FMul
109 /// reordering, signed-zero collapse, etc.)?
110 pub fn fast_math(self) -> bool {
111 matches!(self, Self::Ofast)
112 }
113 }
114
115 /// Build the pass pipeline for a given optimization level.
116 ///
117 /// Adding a new optimization pass is a single push here. Keeping this
118 /// in one function makes it trivial to audit which passes run at which
119 /// level.
120 pub fn build_pipeline(level: OptLevel) -> PassManager {
121 let mut pm = PassManager::new();
122 match level {
123 OptLevel::O0 => {
124 // Nothing — preserve unoptimized IR exactly as it was lowered.
125 }
126 OptLevel::O1 => {
127 // Cheap, always-correct cleanup.
128 //
129 // Mem2reg runs FIRST so every downstream pass sees SSA
130 // values instead of alloca/load/store round-trips.
131 // Without it, const_fold can't propagate constants
132 // through local variables, CSE can't dedupe across
133 // store/load pairs, and LICM is effectively dormant
134 // (loads block every hoist attempt).
135 pm.add(Box::new(CallResolve));
136 pm.add(Box::new(Mem2Reg));
137 pm.add(Box::new(ConstFold));
138 pm.add(Box::new(Inline::for_level(OptLevel::O1)));
139 pm.add(Box::new(ConstArgSpecialize));
140 pm.add(Box::new(DeadArgElim));
141 pm.add(Box::new(ReturnPropagate));
142 pm.add(Box::new(SimplifyCfg));
143 pm.add(Box::new(DeadFuncElim));
144 pm.add(Box::new(LocalLsf));
145 pm.add(Box::new(LocalCse));
146 pm.add(Box::new(Sccp_));
147 pm.add(Box::new(JumpThread));
148 pm.add(Box::new(ConstProp));
149 pm.add(Box::new(Dce));
150 }
151 OptLevel::O2 => {
152 // O1 plus LICM, strength reduction, DSE, LSF, loop transforms.
153 pm.add(Box::new(CallResolve));
154 pm.add(Box::new(Mem2Reg));
155 pm.add(Box::new(ConstFold));
156 pm.add(Box::new(Sroa)); // after SSA + const fold (GCC pattern)
157 pm.add(Box::new(Mem2Reg)); // re-promote SROA-created scalar allocas
158 pm.add(Box::new(Inline::for_level(OptLevel::O2)));
159 pm.add(Box::new(ConstArgSpecialize));
160 pm.add(Box::new(DeadArgElim));
161 pm.add(Box::new(ReturnPropagate));
162 pm.add(Box::new(SimplifyCfg));
163 pm.add(Box::new(DeadFuncElim));
164 pm.add(Box::new(Bce));
165 pm.add(Box::new(StrengthReduce));
166 pm.add(Box::new(LocalLsf));
167 pm.add(Box::new(GlobalLsf));
168 pm.add(Box::new(LocalCse));
169 pm.add(Box::new(PreheaderInsert));
170 pm.add(Box::new(LoopPeel));
171 pm.add(Box::new(LoopUnswitch));
172 pm.add(Box::new(Licm));
173 pm.add(Box::new(Sccp_));
174 pm.add(Box::new(JumpThread));
175 pm.add(Box::new(ConstProp));
176 pm.add(Box::new(Dse));
177 pm.add(Box::new(LoopInterchange));
178 pm.add(Box::new(LoopFission));
179 pm.add(Box::new(LoopFusion));
180 pm.add(Box::new(LoopUnroll));
181 pm.add(Box::new(Gvn)); // after loop passes to avoid SSA conflicts
182 pm.add(Box::new(Dce));
183 }
184 OptLevel::Os => {
185 // Like O2 but no loop unrolling (prefer code size).
186 pm.add(Box::new(CallResolve));
187 pm.add(Box::new(Mem2Reg));
188 pm.add(Box::new(ConstFold));
189 pm.add(Box::new(Sroa));
190 pm.add(Box::new(Mem2Reg));
191 pm.add(Box::new(Inline::for_level(OptLevel::Os)));
192 pm.add(Box::new(ConstArgSpecialize));
193 pm.add(Box::new(DeadArgElim));
194 pm.add(Box::new(ReturnPropagate));
195 pm.add(Box::new(SimplifyCfg));
196 pm.add(Box::new(DeadFuncElim));
197 pm.add(Box::new(Bce));
198 pm.add(Box::new(StrengthReduce));
199 pm.add(Box::new(LocalLsf));
200 pm.add(Box::new(GlobalLsf));
201 pm.add(Box::new(LocalCse));
202 pm.add(Box::new(PreheaderInsert));
203 pm.add(Box::new(LoopPeel));
204 pm.add(Box::new(LoopUnswitch));
205 pm.add(Box::new(Licm));
206 pm.add(Box::new(Sccp_));
207 pm.add(Box::new(JumpThread));
208 pm.add(Box::new(ConstProp));
209 pm.add(Box::new(Dse));
210 pm.add(Box::new(LoopInterchange));
211 pm.add(Box::new(Gvn));
212 pm.add(Box::new(Dce));
213 }
214 OptLevel::O3 => {
215 // O2 passes + loop unrolling + interchange.
216 pm.add(Box::new(CallResolve));
217 pm.add(Box::new(Mem2Reg));
218 pm.add(Box::new(ConstFold));
219 pm.add(Box::new(Sroa));
220 pm.add(Box::new(Mem2Reg));
221 pm.add(Box::new(Inline::for_level(OptLevel::O3)));
222 pm.add(Box::new(ConstArgSpecialize));
223 pm.add(Box::new(DeadArgElim));
224 pm.add(Box::new(ReturnPropagate));
225 pm.add(Box::new(SimplifyCfg));
226 pm.add(Box::new(DeadFuncElim));
227 pm.add(Box::new(Bce));
228 pm.add(Box::new(StrengthReduce));
229 pm.add(Box::new(LocalLsf));
230 pm.add(Box::new(GlobalLsf));
231 pm.add(Box::new(LocalCse));
232 pm.add(Box::new(PreheaderInsert));
233 pm.add(Box::new(LoopPeel));
234 pm.add(Box::new(LoopUnswitch));
235 pm.add(Box::new(Licm));
236 pm.add(Box::new(Sccp_));
237 pm.add(Box::new(JumpThread));
238 pm.add(Box::new(ConstProp));
239 pm.add(Box::new(Dse));
240 pm.add(Box::new(LoopInterchange));
241 pm.add(Box::new(LoopFission));
242 pm.add(Box::new(LoopFusion));
243 pm.add(Box::new(NeonVectorize));
244 pm.add(Box::new(Vectorize));
245 pm.add(Box::new(LoopUnroll));
246 pm.add(Box::new(Gvn)); // keep O3/Ofast aligned with O2/Os value numbering
247 pm.add(Box::new(Dce));
248 }
249 OptLevel::Ofast => {
250 // O3 plus Ofast-only fast-math reassociation.
251 pm.add(Box::new(CallResolve));
252 pm.add(Box::new(Mem2Reg));
253 pm.add(Box::new(ConstFold));
254 pm.add(Box::new(Sroa));
255 pm.add(Box::new(Mem2Reg));
256 pm.add(Box::new(Inline::for_level(OptLevel::O3)));
257 pm.add(Box::new(ConstArgSpecialize));
258 pm.add(Box::new(DeadArgElim));
259 pm.add(Box::new(ReturnPropagate));
260 pm.add(Box::new(SimplifyCfg));
261 pm.add(Box::new(DeadFuncElim));
262 pm.add(Box::new(Bce));
263 pm.add(Box::new(StrengthReduce));
264 pm.add(Box::new(LocalLsf));
265 pm.add(Box::new(GlobalLsf));
266 pm.add(Box::new(LocalCse));
267 pm.add(Box::new(PreheaderInsert));
268 pm.add(Box::new(LoopPeel));
269 pm.add(Box::new(LoopUnswitch));
270 pm.add(Box::new(Licm));
271 pm.add(Box::new(Sccp_));
272 pm.add(Box::new(JumpThread));
273 pm.add(Box::new(ConstProp));
274 pm.add(Box::new(Dse));
275 pm.add(Box::new(LoopInterchange));
276 pm.add(Box::new(LoopFission));
277 pm.add(Box::new(LoopFusion));
278 pm.add(Box::new(NeonVectorize));
279 pm.add(Box::new(Vectorize));
280 pm.add(Box::new(LoopUnroll));
281 pm.add(Box::new(FastMathReassoc));
282 pm.add(Box::new(Gvn));
283 pm.add(Box::new(Dce));
284 }
285 }
286 pm
287 }
288
289 /// Build the restricted optimization pipeline for modules that still contain
290 /// non-global `i128` values.
291 ///
292 /// This deliberately widens `i128` support one optimization lane at a time.
293 /// Now that the backend can carry stack-backed `i128` values through block
294 /// params and mem2reg-style joins, the widened `i128` lane can use the full
295 /// ordinary O1/O2/O3/Os/Ofast pipelines. Higher levels remain gated until their
296 /// pass shapes are proven end to end.
297 pub fn build_i128_pipeline(level: OptLevel) -> Option<PassManager> {
298 match level {
299 OptLevel::O1 => Some(build_pipeline(OptLevel::O1)),
300 OptLevel::O2 => Some(build_pipeline(OptLevel::O2)),
301 OptLevel::O3 => Some(build_pipeline(OptLevel::O3)),
302 OptLevel::Os => Some(build_pipeline(OptLevel::Os)),
303 OptLevel::Ofast => Some(build_pipeline(OptLevel::Ofast)),
304 _ => None,
305 }
306 }
307
308 #[cfg(test)]
309 mod tests {
310 use super::*;
311
312 #[test]
313 fn parse_flags() {
314 assert_eq!(OptLevel::parse_flag("O0"), Some(OptLevel::O0));
315 assert_eq!(OptLevel::parse_flag("Os"), Some(OptLevel::Os));
316 assert_eq!(OptLevel::parse_flag("O3"), Some(OptLevel::O3));
317 assert_eq!(OptLevel::parse_flag("Ofast"), Some(OptLevel::Ofast));
318 assert_eq!(OptLevel::parse_flag("O9"), None);
319 }
320
321 #[test]
322 fn level_predicates() {
323 assert!(!OptLevel::O0.inlining());
324 assert!(OptLevel::O2.inlining());
325 assert!(OptLevel::O3.vectorize());
326 assert!(!OptLevel::O2.vectorize());
327 assert!(OptLevel::Ofast.fast_math());
328 assert!(!OptLevel::O3.fast_math());
329 }
330
331 #[test]
332 fn pipelines_build() {
333 // O0 has no passes; every other level has at least one.
334 assert!(build_pipeline(OptLevel::O0).is_empty());
335 for lvl in [
336 OptLevel::O1,
337 OptLevel::O2,
338 OptLevel::O3,
339 OptLevel::Os,
340 OptLevel::Ofast,
341 ] {
342 let pm = build_pipeline(lvl);
343 assert!(
344 !pm.is_empty(),
345 "pipeline {:?} should have at least one pass",
346 lvl
347 );
348 }
349 }
350
351 #[test]
352 fn higher_optimization_levels_keep_gvn_enabled() {
353 for lvl in [OptLevel::O2, OptLevel::O3, OptLevel::Os, OptLevel::Ofast] {
354 let pm = build_pipeline(lvl);
355 let names = pm.pass_names();
356 assert!(
357 names.contains(&"gvn"),
358 "pipeline {:?} should include gvn, got {:?}",
359 lvl,
360 names
361 );
362 }
363 }
364
365 #[test]
366 fn ofast_enables_fast_math_reassoc_but_o3_does_not() {
367 let o3 = build_pipeline(OptLevel::O3).pass_names();
368 let ofast = build_pipeline(OptLevel::Ofast).pass_names();
369 assert!(
370 !o3.contains(&"fast-math-reassoc"),
371 "O3 should stay strict, got {:?}",
372 o3
373 );
374 assert!(
375 ofast.contains(&"fast-math-reassoc"),
376 "Ofast should include fast-math reassociation, got {:?}",
377 ofast
378 );
379 }
380
381 #[test]
382 fn vectorize_is_enabled_only_at_o3_and_above() {
383 let o2 = build_pipeline(OptLevel::O2).pass_names();
384 let o3 = build_pipeline(OptLevel::O3).pass_names();
385 let ofast = build_pipeline(OptLevel::Ofast).pass_names();
386
387 assert!(
388 !o2.contains(&"vectorize"),
389 "O2 should not include vectorize, got {:?}",
390 o2
391 );
392 assert!(
393 o3.contains(&"vectorize"),
394 "O3 should include vectorize, got {:?}",
395 o3
396 );
397 assert!(
398 ofast.contains(&"vectorize"),
399 "Ofast should include vectorize, got {:?}",
400 ofast
401 );
402 }
403
404 #[test]
405 fn i128_pipeline_is_available_through_ofast() {
406 assert!(
407 build_i128_pipeline(OptLevel::O1).is_some(),
408 "O1 should have the widened i128-safe pipeline"
409 );
410 assert!(
411 build_i128_pipeline(OptLevel::O2).is_some(),
412 "O2 should be available once the widened i128 lane is proven"
413 );
414 for lvl in [OptLevel::O3, OptLevel::Os, OptLevel::Ofast] {
415 assert!(
416 build_i128_pipeline(lvl).is_some(),
417 "{:?} should be available once the widened i128 lane is proven",
418 lvl
419 );
420 }
421 for lvl in [OptLevel::O0] {
422 assert!(
423 build_i128_pipeline(lvl).is_none(),
424 "{:?} should not yet have widened i128 optimization support",
425 lvl
426 );
427 }
428 }
429
430 #[test]
431 fn i128_pipeline_matches_full_o1() {
432 let wide = build_i128_pipeline(OptLevel::O1)
433 .expect("O1 should expose the widened i128 pipeline")
434 .pass_names();
435 let full = build_pipeline(OptLevel::O1).pass_names();
436 assert_eq!(
437 wide, full,
438 "the widened i128 O1 lane should stay aligned with the ordinary O1 pipeline"
439 );
440 }
441
442 #[test]
443 fn i128_pipeline_matches_full_higher_levels() {
444 for lvl in [OptLevel::O2, OptLevel::O3, OptLevel::Os, OptLevel::Ofast] {
445 let wide = build_i128_pipeline(lvl)
446 .expect("level should expose the widened i128 pipeline")
447 .pass_names();
448 let full = build_pipeline(lvl).pass_names();
449 assert_eq!(
450 wide, full,
451 "the widened i128 lane should stay aligned with the ordinary {:?} pipeline",
452 lvl
453 );
454 }
455 }
456 }
457