armfortas Public

Watch 0 Fork 0 Star 0

Rust · 16070 bytes Raw Blame History

  
        1
        //! Optimization-level → pass pipeline mapping.
      
        2
        //!
      
        3
        //! `OptLevel` is what the driver hands us; `build_pipeline` returns a
      
        4
        //! configured `PassManager`. Adding a new pass to a level is a one-line
      
        5
        //! change here, which keeps the dispatch logic in one place.
      
        6
        
        7
        use super::bce::Bce;
      
        8
        use super::call_resolve::CallResolve;
      
        9
        use super::const_arg::ConstArgSpecialize;
      
        10
        use super::const_fold::ConstFold;
      
        11
        use super::const_prop::ConstProp;
      
        12
        use super::cse::LocalCse;
      
        13
        use super::dce::Dce;
      
        14
        use super::dead_arg::DeadArgElim;
      
        15
        use super::dead_func::DeadFuncElim;
      
        16
        use super::dse::Dse;
      
        17
        use super::fast_math::FastMathReassoc;
      
        18
        use super::fission::LoopFission;
      
        19
        use super::fusion::LoopFusion;
      
        20
        use super::global_lsf::GlobalLsf;
      
        21
        use super::gvn::Gvn;
      
        22
        use super::inline::Inline;
      
        23
        use super::interchange::LoopInterchange;
      
        24
        use super::licm::Licm;
      
        25
        use super::lsf::LocalLsf;
      
        26
        use super::mem2reg::Mem2Reg;
      
        27
        use super::pass::PassManager;
      
        28
        use super::peel::LoopPeel;
      
        29
        use super::preheader::PreheaderInsert;
      
        30
        use super::return_prop::ReturnPropagate;
      
        31
        use super::simplify_cfg::SimplifyCfg;
      
        32
        use super::sroa::Sroa;
      
        33
        use super::strength_reduce::StrengthReduce;
      
        34
        use super::unroll::LoopUnroll;
      
        35
        use super::unswitch::LoopUnswitch;
      
        36
        use super::vectorize::Vectorize;
      
        37
        
        38
        /// Compiler optimization levels.
      
        39
        ///
      
        40
        /// Mirrors `gfortran` / `clang` semantics so users have no surprises.
      
        41
        #[derive(Debug, Clone, Copy, PartialEq, Eq)]
      
        42
        pub enum OptLevel {
      
        43
            /// `-O0` — no optimization. Default during development.
      
        44
            O0,
      
        45
            /// `-O1` — constant folding, DCE, basic CSE, copy propagation.
      
        46
            O1,
      
        47
            /// `-O2` — `-O1` plus LICM, small inlining, strength reduction,
      
        48
            /// bounds-check elimination, GVN, SROA, dead store elim, small loop
      
        49
            /// unrolling, FMA fusion.
      
        50
            O2,
      
        51
            /// `-O3` — `-O2` plus aggressive inlining, NEON vectorization,
      
        52
            /// loop interchange/fusion/fission, IPO, devirtualization,
      
        53
            /// whole-program analysis, speculative optimizations.
      
        54
            O3,
      
        55
            /// `-Os` — like `-O2` but prefer code size (no unrolling, less inlining).
      
        56
            Os,
      
        57
            /// `-Ofast` — `-O3` plus fast-math (reassociation, no NaN/Inf, recip).
      
        58
            Ofast,
      
        59
        }
      
        60
        
        61
        impl OptLevel {
      
        62
            /// Parse the textual flag (`O0`, `O1`, ..., `Ofast`).
      
        63
            pub fn parse_flag(s: &str) -> Option<Self> {
      
        64
                match s {
      
        65
                    "O0" | "0" => Some(Self::O0),
      
        66
                    "O1" | "1" => Some(Self::O1),
      
        67
                    "O2" | "2" => Some(Self::O2),
      
        68
                    "O3" | "3" => Some(Self::O3),
      
        69
                    "Os" | "s" => Some(Self::Os),
      
        70
                    "Ofast" | "fast" => Some(Self::Ofast),
      
        71
                    _ => None,
      
        72
                }
      
        73
            }
      
        74
        
        75
            pub fn flag_name(self) -> &'static str {
      
        76
                match self {
      
        77
                    Self::O0 => "-O0",
      
        78
                    Self::O1 => "-O1",
      
        79
                    Self::O2 => "-O2",
      
        80
                    Self::O3 => "-O3",
      
        81
                    Self::Os => "-Os",
      
        82
                    Self::Ofast => "-Ofast",
      
        83
                }
      
        84
            }
      
        85
        
        86
            /// Does this level enable inlining?
      
        87
            ///
      
        88
            /// Audit Min-6: this predicate is currently consulted only by the
      
        89
            /// pipeline test harness. Once `Inline` lands as a pass, the
      
        90
            /// builder below will gate registration on this. Same for the
      
        91
            /// other two predicates.
      
        92
            pub fn inlining(self) -> bool {
      
        93
                matches!(
      
        94
                    self,
      
        95
                    Self::O1 | Self::O2 | Self::O3 | Self::Os | Self::Ofast
      
        96
                )
      
        97
            }
      
        98
        
        99
            /// Does this level enable loop vectorization (NEON)?
      
        100
            pub fn vectorize(self) -> bool {
      
        101
                matches!(self, Self::O3 | Self::Ofast)
      
        102
            }
      
        103
        
        104
            /// Does this level allow value-changing fast-math reassociation
      
        105
            /// (`-Ofast`-only — relaxes IEEE 754 strictness for FAdd/FMul
      
        106
            /// reordering, signed-zero collapse, etc.)?
      
        107
            pub fn fast_math(self) -> bool {
      
        108
                matches!(self, Self::Ofast)
      
        109
            }
      
        110
        }
      
        111
        
        112
        /// Build the pass pipeline for a given optimization level.
      
        113
        ///
      
        114
        /// Adding a new optimization pass is a single push here. Keeping this
      
        115
        /// in one function makes it trivial to audit which passes run at which
      
        116
        /// level.
      
        117
        pub fn build_pipeline(level: OptLevel) -> PassManager {
      
        118
            let mut pm = PassManager::new();
      
        119
            match level {
      
        120
                OptLevel::O0 => {
      
        121
                    // Nothing — preserve unoptimized IR exactly as it was lowered.
      
        122
                }
      
        123
                OptLevel::O1 => {
      
        124
                    // Cheap, always-correct cleanup.
      
        125
                    //
      
        126
                    // Mem2reg runs FIRST so every downstream pass sees SSA
      
        127
                    // values instead of alloca/load/store round-trips.
      
        128
                    // Without it, const_fold can't propagate constants
      
        129
                    // through local variables, CSE can't dedupe across
      
        130
                    // store/load pairs, and LICM is effectively dormant
      
        131
                    // (loads block every hoist attempt).
      
        132
                    pm.add(Box::new(CallResolve));
      
        133
                    pm.add(Box::new(Mem2Reg));
      
        134
                    pm.add(Box::new(ConstFold));
      
        135
                    pm.add(Box::new(Inline::for_level(OptLevel::O1)));
      
        136
                    pm.add(Box::new(ConstArgSpecialize));
      
        137
                    pm.add(Box::new(DeadArgElim));
      
        138
                    pm.add(Box::new(ReturnPropagate));
      
        139
                    pm.add(Box::new(SimplifyCfg));
      
        140
                    pm.add(Box::new(DeadFuncElim));
      
        141
                    pm.add(Box::new(LocalLsf));
      
        142
                    pm.add(Box::new(LocalCse));
      
        143
                    pm.add(Box::new(ConstProp));
      
        144
                    pm.add(Box::new(Dce));
      
        145
                }
      
        146
                OptLevel::O2 => {
      
        147
                    // O1 plus LICM, strength reduction, DSE, LSF, loop transforms.
      
        148
                    pm.add(Box::new(CallResolve));
      
        149
                    pm.add(Box::new(Mem2Reg));
      
        150
                    pm.add(Box::new(ConstFold));
      
        151
                    pm.add(Box::new(Sroa)); // after SSA + const fold (GCC pattern)
      
        152
                    pm.add(Box::new(Mem2Reg)); // re-promote SROA-created scalar allocas
      
        153
                    pm.add(Box::new(Inline::for_level(OptLevel::O2)));
      
        154
                    pm.add(Box::new(ConstArgSpecialize));
      
        155
                    pm.add(Box::new(DeadArgElim));
      
        156
                    pm.add(Box::new(ReturnPropagate));
      
        157
                    pm.add(Box::new(SimplifyCfg));
      
        158
                    pm.add(Box::new(DeadFuncElim));
      
        159
                    pm.add(Box::new(Bce));
      
        160
                    pm.add(Box::new(StrengthReduce));
      
        161
                    pm.add(Box::new(LocalLsf));
      
        162
                    pm.add(Box::new(GlobalLsf));
      
        163
                    pm.add(Box::new(LocalCse));
      
        164
                    pm.add(Box::new(PreheaderInsert));
      
        165
                    pm.add(Box::new(LoopPeel));
      
        166
                    pm.add(Box::new(LoopUnswitch));
      
        167
                    pm.add(Box::new(Licm));
      
        168
                    pm.add(Box::new(ConstProp));
      
        169
                    pm.add(Box::new(Dse));
      
        170
                    pm.add(Box::new(LoopInterchange));
      
        171
                    pm.add(Box::new(LoopFission));
      
        172
                    pm.add(Box::new(LoopFusion));
      
        173
                    pm.add(Box::new(LoopUnroll));
      
        174
                    pm.add(Box::new(Gvn)); // after loop passes to avoid SSA conflicts
      
        175
                    pm.add(Box::new(Dce));
      
        176
                }
      
        177
                OptLevel::Os => {
      
        178
                    // Like O2 but no loop unrolling (prefer code size).
      
        179
                    pm.add(Box::new(CallResolve));
      
        180
                    pm.add(Box::new(Mem2Reg));
      
        181
                    pm.add(Box::new(ConstFold));
      
        182
                    pm.add(Box::new(Sroa));
      
        183
                    pm.add(Box::new(Mem2Reg));
      
        184
                    pm.add(Box::new(Inline::for_level(OptLevel::Os)));
      
        185
                    pm.add(Box::new(ConstArgSpecialize));
      
        186
                    pm.add(Box::new(DeadArgElim));
      
        187
                    pm.add(Box::new(ReturnPropagate));
      
        188
                    pm.add(Box::new(SimplifyCfg));
      
        189
                    pm.add(Box::new(DeadFuncElim));
      
        190
                    pm.add(Box::new(Bce));
      
        191
                    pm.add(Box::new(StrengthReduce));
      
        192
                    pm.add(Box::new(LocalLsf));
      
        193
                    pm.add(Box::new(GlobalLsf));
      
        194
                    pm.add(Box::new(LocalCse));
      
        195
                    pm.add(Box::new(PreheaderInsert));
      
        196
                    pm.add(Box::new(LoopPeel));
      
        197
                    pm.add(Box::new(LoopUnswitch));
      
        198
                    pm.add(Box::new(Licm));
      
        199
                    pm.add(Box::new(ConstProp));
      
        200
                    pm.add(Box::new(Dse));
      
        201
                    pm.add(Box::new(LoopInterchange));
      
        202
                    pm.add(Box::new(Gvn));
      
        203
                    pm.add(Box::new(Dce));
      
        204
                }
      
        205
                OptLevel::O3 => {
      
        206
                    // O2 passes + loop unrolling + interchange.
      
        207
                    pm.add(Box::new(CallResolve));
      
        208
                    pm.add(Box::new(Mem2Reg));
      
        209
                    pm.add(Box::new(ConstFold));
      
        210
                    pm.add(Box::new(Sroa));
      
        211
                    pm.add(Box::new(Mem2Reg));
      
        212
                    pm.add(Box::new(Inline::for_level(OptLevel::O3)));
      
        213
                    pm.add(Box::new(ConstArgSpecialize));
      
        214
                    pm.add(Box::new(DeadArgElim));
      
        215
                    pm.add(Box::new(ReturnPropagate));
      
        216
                    pm.add(Box::new(SimplifyCfg));
      
        217
                    pm.add(Box::new(DeadFuncElim));
      
        218
                    pm.add(Box::new(Bce));
      
        219
                    pm.add(Box::new(StrengthReduce));
      
        220
                    pm.add(Box::new(LocalLsf));
      
        221
                    pm.add(Box::new(GlobalLsf));
      
        222
                    pm.add(Box::new(LocalCse));
      
        223
                    pm.add(Box::new(PreheaderInsert));
      
        224
                    pm.add(Box::new(LoopPeel));
      
        225
                    pm.add(Box::new(LoopUnswitch));
      
        226
                    pm.add(Box::new(Licm));
      
        227
                    pm.add(Box::new(ConstProp));
      
        228
                    pm.add(Box::new(Dse));
      
        229
                    pm.add(Box::new(LoopInterchange));
      
        230
                    pm.add(Box::new(LoopFission));
      
        231
                    pm.add(Box::new(LoopFusion));
      
        232
                    pm.add(Box::new(Vectorize));
      
        233
                    pm.add(Box::new(LoopUnroll));
      
        234
                    pm.add(Box::new(Gvn)); // keep O3/Ofast aligned with O2/Os value numbering
      
        235
                    pm.add(Box::new(Dce));
      
        236
                }
      
        237
                OptLevel::Ofast => {
      
        238
                    // O3 plus Ofast-only fast-math reassociation.
      
        239
                    pm.add(Box::new(CallResolve));
      
        240
                    pm.add(Box::new(Mem2Reg));
      
        241
                    pm.add(Box::new(ConstFold));
      
        242
                    pm.add(Box::new(Sroa));
      
        243
                    pm.add(Box::new(Mem2Reg));
      
        244
                    pm.add(Box::new(Inline::for_level(OptLevel::O3)));
      
        245
                    pm.add(Box::new(ConstArgSpecialize));
      
        246
                    pm.add(Box::new(DeadArgElim));
      
        247
                    pm.add(Box::new(ReturnPropagate));
      
        248
                    pm.add(Box::new(SimplifyCfg));
      
        249
                    pm.add(Box::new(DeadFuncElim));
      
        250
                    pm.add(Box::new(Bce));
      
        251
                    pm.add(Box::new(StrengthReduce));
      
        252
                    pm.add(Box::new(LocalLsf));
      
        253
                    pm.add(Box::new(GlobalLsf));
      
        254
                    pm.add(Box::new(LocalCse));
      
        255
                    pm.add(Box::new(PreheaderInsert));
      
        256
                    pm.add(Box::new(LoopPeel));
      
        257
                    pm.add(Box::new(LoopUnswitch));
      
        258
                    pm.add(Box::new(Licm));
      
        259
                    pm.add(Box::new(ConstProp));
      
        260
                    pm.add(Box::new(Dse));
      
        261
                    pm.add(Box::new(LoopInterchange));
      
        262
                    pm.add(Box::new(LoopFission));
      
        263
                    pm.add(Box::new(LoopFusion));
      
        264
                    pm.add(Box::new(Vectorize));
      
        265
                    pm.add(Box::new(LoopUnroll));
      
        266
                    pm.add(Box::new(FastMathReassoc));
      
        267
                    pm.add(Box::new(Gvn));
      
        268
                    pm.add(Box::new(Dce));
      
        269
                }
      
        270
            }
      
        271
            pm
      
        272
        }
      
        273
        
        274
        /// Build the restricted optimization pipeline for modules that still contain
      
        275
        /// non-global `i128` values.
      
        276
        ///
      
        277
        /// This deliberately widens `i128` support one optimization lane at a time.
      
        278
        /// Now that the backend can carry stack-backed `i128` values through block
      
        279
        /// params and mem2reg-style joins, the widened `i128` lane can use the full
      
        280
        /// ordinary O1/O2/O3/Os/Ofast pipelines. Higher levels remain gated until their
      
        281
        /// pass shapes are proven end to end.
      
        282
        pub fn build_i128_pipeline(level: OptLevel) -> Option<PassManager> {
      
        283
            match level {
      
        284
                OptLevel::O1 => Some(build_pipeline(OptLevel::O1)),
      
        285
                OptLevel::O2 => Some(build_pipeline(OptLevel::O2)),
      
        286
                OptLevel::O3 => Some(build_pipeline(OptLevel::O3)),
      
        287
                OptLevel::Os => Some(build_pipeline(OptLevel::Os)),
      
        288
                OptLevel::Ofast => Some(build_pipeline(OptLevel::Ofast)),
      
        289
                _ => None,
      
        290
            }
      
        291
        }
      
        292
        
        293
        #[cfg(test)]
      
        294
        mod tests {
      
        295
            use super::*;
      
        296
        
        297
            #[test]
      
        298
            fn parse_flags() {
      
        299
                assert_eq!(OptLevel::parse_flag("O0"), Some(OptLevel::O0));
      
        300
                assert_eq!(OptLevel::parse_flag("Os"), Some(OptLevel::Os));
      
        301
                assert_eq!(OptLevel::parse_flag("O3"), Some(OptLevel::O3));
      
        302
                assert_eq!(OptLevel::parse_flag("Ofast"), Some(OptLevel::Ofast));
      
        303
                assert_eq!(OptLevel::parse_flag("O9"), None);
      
        304
            }
      
        305
        
        306
            #[test]
      
        307
            fn level_predicates() {
      
        308
                assert!(!OptLevel::O0.inlining());
      
        309
                assert!(OptLevel::O2.inlining());
      
        310
                assert!(OptLevel::O3.vectorize());
      
        311
                assert!(!OptLevel::O2.vectorize());
      
        312
                assert!(OptLevel::Ofast.fast_math());
      
        313
                assert!(!OptLevel::O3.fast_math());
      
        314
            }
      
        315
        
        316
            #[test]
      
        317
            fn pipelines_build() {
      
        318
                // O0 has no passes; every other level has at least one.
      
        319
                assert!(build_pipeline(OptLevel::O0).is_empty());
      
        320
                for lvl in [
      
        321
                    OptLevel::O1,
      
        322
                    OptLevel::O2,
      
        323
                    OptLevel::O3,
      
        324
                    OptLevel::Os,
      
        325
                    OptLevel::Ofast,
      
        326
                ] {
      
        327
                    let pm = build_pipeline(lvl);
      
        328
                    assert!(
      
        329
                        !pm.is_empty(),
      
        330
                        "pipeline {:?} should have at least one pass",
      
        331
                        lvl
      
        332
                    );
      
        333
                }
      
        334
            }
      
        335
        
        336
            #[test]
      
        337
            fn higher_optimization_levels_keep_gvn_enabled() {
      
        338
                for lvl in [OptLevel::O2, OptLevel::O3, OptLevel::Os, OptLevel::Ofast] {
      
        339
                    let pm = build_pipeline(lvl);
      
        340
                    let names = pm.pass_names();
      
        341
                    assert!(
      
        342
                        names.contains(&"gvn"),
      
        343
                        "pipeline {:?} should include gvn, got {:?}",
      
        344
                        lvl,
      
        345
                        names
      
        346
                    );
      
        347
                }
      
        348
            }
      
        349
        
        350
            #[test]
      
        351
            fn ofast_enables_fast_math_reassoc_but_o3_does_not() {
      
        352
                let o3 = build_pipeline(OptLevel::O3).pass_names();
      
        353
                let ofast = build_pipeline(OptLevel::Ofast).pass_names();
      
        354
                assert!(
      
        355
                    !o3.contains(&"fast-math-reassoc"),
      
        356
                    "O3 should stay strict, got {:?}",
      
        357
                    o3
      
        358
                );
      
        359
                assert!(
      
        360
                    ofast.contains(&"fast-math-reassoc"),
      
        361
                    "Ofast should include fast-math reassociation, got {:?}",
      
        362
                    ofast
      
        363
                );
      
        364
            }
      
        365
        
        366
            #[test]
      
        367
            fn vectorize_is_enabled_only_at_o3_and_above() {
      
        368
                let o2 = build_pipeline(OptLevel::O2).pass_names();
      
        369
                let o3 = build_pipeline(OptLevel::O3).pass_names();
      
        370
                let ofast = build_pipeline(OptLevel::Ofast).pass_names();
      
        371
        
        372
                assert!(
      
        373
                    !o2.contains(&"vectorize"),
      
        374
                    "O2 should not include vectorize, got {:?}",
      
        375
                    o2
      
        376
                );
      
        377
                assert!(
      
        378
                    o3.contains(&"vectorize"),
      
        379
                    "O3 should include vectorize, got {:?}",
      
        380
                    o3
      
        381
                );
      
        382
                assert!(
      
        383
                    ofast.contains(&"vectorize"),
      
        384
                    "Ofast should include vectorize, got {:?}",
      
        385
                    ofast
      
        386
                );
      
        387
            }
      
        388
        
        389
            #[test]
      
        390
            fn i128_pipeline_is_available_through_ofast() {
      
        391
                assert!(
      
        392
                    build_i128_pipeline(OptLevel::O1).is_some(),
      
        393
                    "O1 should have the widened i128-safe pipeline"
      
        394
                );
      
        395
                assert!(
      
        396
                    build_i128_pipeline(OptLevel::O2).is_some(),
      
        397
                    "O2 should be available once the widened i128 lane is proven"
      
        398
                );
      
        399
                for lvl in [OptLevel::O3, OptLevel::Os, OptLevel::Ofast] {
      
        400
                    assert!(
      
        401
                        build_i128_pipeline(lvl).is_some(),
      
        402
                        "{:?} should be available once the widened i128 lane is proven",
      
        403
                        lvl
      
        404
                    );
      
        405
                }
      
        406
                for lvl in [OptLevel::O0] {
      
        407
                    assert!(
      
        408
                        build_i128_pipeline(lvl).is_none(),
      
        409
                        "{:?} should not yet have widened i128 optimization support",
      
        410
                        lvl
      
        411
                    );
      
        412
                }
      
        413
            }
      
        414
        
        415
            #[test]
      
        416
            fn i128_pipeline_matches_full_o1() {
      
        417
                let wide = build_i128_pipeline(OptLevel::O1)
      
        418
                    .expect("O1 should expose the widened i128 pipeline")
      
        419
                    .pass_names();
      
        420
                let full = build_pipeline(OptLevel::O1).pass_names();
      
        421
                assert_eq!(
      
        422
                    wide, full,
      
        423
                    "the widened i128 O1 lane should stay aligned with the ordinary O1 pipeline"
      
        424
                );
      
        425
            }
      
        426
        
        427
            #[test]
      
        428
            fn i128_pipeline_matches_full_higher_levels() {
      
        429
                for lvl in [OptLevel::O2, OptLevel::O3, OptLevel::Os, OptLevel::Ofast] {
      
        430
                    let wide = build_i128_pipeline(lvl)
      
        431
                        .expect("level should expose the widened i128 pipeline")
      
        432
                        .pass_names();
      
        433
                    let full = build_pipeline(lvl).pass_names();
      
        434
                    assert_eq!(
      
        435
                        wide, full,
      
        436
                        "the widened i128 lane should stay aligned with the ordinary {:?} pipeline",
      
        437
                        lvl
      
        438
                    );
      
        439
                }
      
        440
            }
      
        441
        }
      
        442

1	//! Optimization-level → pass pipeline mapping.
2	//!
3	//! `OptLevel` is what the driver hands us; `build_pipeline` returns a
4	//! configured `PassManager`. Adding a new pass to a level is a one-line
5	//! change here, which keeps the dispatch logic in one place.
6
7	use super::bce::Bce;
8	use super::call_resolve::CallResolve;
9	use super::const_arg::ConstArgSpecialize;
10	use super::const_fold::ConstFold;
11	use super::const_prop::ConstProp;
12	use super::cse::LocalCse;
13	use super::dce::Dce;
14	use super::dead_arg::DeadArgElim;
15	use super::dead_func::DeadFuncElim;
16	use super::dse::Dse;
17	use super::fast_math::FastMathReassoc;
18	use super::fission::LoopFission;
19	use super::fusion::LoopFusion;
20	use super::global_lsf::GlobalLsf;
21	use super::gvn::Gvn;
22	use super::inline::Inline;
23	use super::interchange::LoopInterchange;
24	use super::licm::Licm;
25	use super::lsf::LocalLsf;
26	use super::mem2reg::Mem2Reg;
27	use super::pass::PassManager;
28	use super::peel::LoopPeel;
29	use super::preheader::PreheaderInsert;
30	use super::return_prop::ReturnPropagate;
31	use super::simplify_cfg::SimplifyCfg;
32	use super::sroa::Sroa;
33	use super::strength_reduce::StrengthReduce;
34	use super::unroll::LoopUnroll;
35	use super::unswitch::LoopUnswitch;
36	use super::vectorize::Vectorize;
37
38	/// Compiler optimization levels.
39	///
40	/// Mirrors `gfortran` / `clang` semantics so users have no surprises.
41	#[derive(Debug, Clone, Copy, PartialEq, Eq)]
42	pub enum OptLevel {
43	/// `-O0` — no optimization. Default during development.
44	O0,
45	/// `-O1` — constant folding, DCE, basic CSE, copy propagation.
46	O1,
47	/// `-O2` — `-O1` plus LICM, small inlining, strength reduction,
48	/// bounds-check elimination, GVN, SROA, dead store elim, small loop
49	/// unrolling, FMA fusion.
50	O2,
51	/// `-O3` — `-O2` plus aggressive inlining, NEON vectorization,
52	/// loop interchange/fusion/fission, IPO, devirtualization,
53	/// whole-program analysis, speculative optimizations.
54	O3,
55	/// `-Os` — like `-O2` but prefer code size (no unrolling, less inlining).
56	Os,
57	/// `-Ofast` — `-O3` plus fast-math (reassociation, no NaN/Inf, recip).
58	Ofast,
59	}
60
61	impl OptLevel {
62	/// Parse the textual flag (`O0`, `O1`, ..., `Ofast`).
63	pub fn parse_flag(s: &str) -> Option<Self> {
64	match s {
65	"O0" \| "0" => Some(Self::O0),
66	"O1" \| "1" => Some(Self::O1),
67	"O2" \| "2" => Some(Self::O2),
68	"O3" \| "3" => Some(Self::O3),
69	"Os" \| "s" => Some(Self::Os),
70	"Ofast" \| "fast" => Some(Self::Ofast),
71	_ => None,
72	}
73	}
74
75	pub fn flag_name(self) -> &'static str {
76	match self {
77	Self::O0 => "-O0",
78	Self::O1 => "-O1",
79	Self::O2 => "-O2",
80	Self::O3 => "-O3",
81	Self::Os => "-Os",
82	Self::Ofast => "-Ofast",
83	}
84	}
85
86	/// Does this level enable inlining?
87	///
88	/// Audit Min-6: this predicate is currently consulted only by the
89	/// pipeline test harness. Once `Inline` lands as a pass, the
90	/// builder below will gate registration on this. Same for the
91	/// other two predicates.
92	pub fn inlining(self) -> bool {
93	matches!(
94	self,
95	Self::O1 \| Self::O2 \| Self::O3 \| Self::Os \| Self::Ofast
96	)
97	}
98
99	/// Does this level enable loop vectorization (NEON)?
100	pub fn vectorize(self) -> bool {
101	matches!(self, Self::O3 \| Self::Ofast)
102	}
103
104	/// Does this level allow value-changing fast-math reassociation
105	/// (`-Ofast`-only — relaxes IEEE 754 strictness for FAdd/FMul
106	/// reordering, signed-zero collapse, etc.)?
107	pub fn fast_math(self) -> bool {
108	matches!(self, Self::Ofast)
109	}
110	}
111
112	/// Build the pass pipeline for a given optimization level.
113	///
114	/// Adding a new optimization pass is a single push here. Keeping this
115	/// in one function makes it trivial to audit which passes run at which
116	/// level.
117	pub fn build_pipeline(level: OptLevel) -> PassManager {
118	let mut pm = PassManager::new();
119	match level {
120	OptLevel::O0 => {
121	// Nothing — preserve unoptimized IR exactly as it was lowered.
122	}
123	OptLevel::O1 => {
124	// Cheap, always-correct cleanup.
125	//
126	// Mem2reg runs FIRST so every downstream pass sees SSA
127	// values instead of alloca/load/store round-trips.
128	// Without it, const_fold can't propagate constants
129	// through local variables, CSE can't dedupe across
130	// store/load pairs, and LICM is effectively dormant
131	// (loads block every hoist attempt).
132	pm.add(Box::new(CallResolve));
133	pm.add(Box::new(Mem2Reg));
134	pm.add(Box::new(ConstFold));
135	pm.add(Box::new(Inline::for_level(OptLevel::O1)));
136	pm.add(Box::new(ConstArgSpecialize));
137	pm.add(Box::new(DeadArgElim));
138	pm.add(Box::new(ReturnPropagate));
139	pm.add(Box::new(SimplifyCfg));
140	pm.add(Box::new(DeadFuncElim));
141	pm.add(Box::new(LocalLsf));
142	pm.add(Box::new(LocalCse));
143	pm.add(Box::new(ConstProp));
144	pm.add(Box::new(Dce));
145	}
146	OptLevel::O2 => {
147	// O1 plus LICM, strength reduction, DSE, LSF, loop transforms.
148	pm.add(Box::new(CallResolve));
149	pm.add(Box::new(Mem2Reg));
150	pm.add(Box::new(ConstFold));
151	pm.add(Box::new(Sroa)); // after SSA + const fold (GCC pattern)
152	pm.add(Box::new(Mem2Reg)); // re-promote SROA-created scalar allocas
153	pm.add(Box::new(Inline::for_level(OptLevel::O2)));
154	pm.add(Box::new(ConstArgSpecialize));
155	pm.add(Box::new(DeadArgElim));
156	pm.add(Box::new(ReturnPropagate));
157	pm.add(Box::new(SimplifyCfg));
158	pm.add(Box::new(DeadFuncElim));
159	pm.add(Box::new(Bce));
160	pm.add(Box::new(StrengthReduce));
161	pm.add(Box::new(LocalLsf));
162	pm.add(Box::new(GlobalLsf));
163	pm.add(Box::new(LocalCse));
164	pm.add(Box::new(PreheaderInsert));
165	pm.add(Box::new(LoopPeel));
166	pm.add(Box::new(LoopUnswitch));
167	pm.add(Box::new(Licm));
168	pm.add(Box::new(ConstProp));
169	pm.add(Box::new(Dse));
170	pm.add(Box::new(LoopInterchange));
171	pm.add(Box::new(LoopFission));
172	pm.add(Box::new(LoopFusion));
173	pm.add(Box::new(LoopUnroll));
174	pm.add(Box::new(Gvn)); // after loop passes to avoid SSA conflicts
175	pm.add(Box::new(Dce));
176	}
177	OptLevel::Os => {
178	// Like O2 but no loop unrolling (prefer code size).
179	pm.add(Box::new(CallResolve));
180	pm.add(Box::new(Mem2Reg));
181	pm.add(Box::new(ConstFold));
182	pm.add(Box::new(Sroa));
183	pm.add(Box::new(Mem2Reg));
184	pm.add(Box::new(Inline::for_level(OptLevel::Os)));
185	pm.add(Box::new(ConstArgSpecialize));
186	pm.add(Box::new(DeadArgElim));
187	pm.add(Box::new(ReturnPropagate));
188	pm.add(Box::new(SimplifyCfg));
189	pm.add(Box::new(DeadFuncElim));
190	pm.add(Box::new(Bce));
191	pm.add(Box::new(StrengthReduce));
192	pm.add(Box::new(LocalLsf));
193	pm.add(Box::new(GlobalLsf));
194	pm.add(Box::new(LocalCse));
195	pm.add(Box::new(PreheaderInsert));
196	pm.add(Box::new(LoopPeel));
197	pm.add(Box::new(LoopUnswitch));
198	pm.add(Box::new(Licm));
199	pm.add(Box::new(ConstProp));
200	pm.add(Box::new(Dse));
201	pm.add(Box::new(LoopInterchange));
202	pm.add(Box::new(Gvn));
203	pm.add(Box::new(Dce));
204	}
205	OptLevel::O3 => {
206	// O2 passes + loop unrolling + interchange.
207	pm.add(Box::new(CallResolve));
208	pm.add(Box::new(Mem2Reg));
209	pm.add(Box::new(ConstFold));
210	pm.add(Box::new(Sroa));
211	pm.add(Box::new(Mem2Reg));
212	pm.add(Box::new(Inline::for_level(OptLevel::O3)));
213	pm.add(Box::new(ConstArgSpecialize));
214	pm.add(Box::new(DeadArgElim));
215	pm.add(Box::new(ReturnPropagate));
216	pm.add(Box::new(SimplifyCfg));
217	pm.add(Box::new(DeadFuncElim));
218	pm.add(Box::new(Bce));
219	pm.add(Box::new(StrengthReduce));
220	pm.add(Box::new(LocalLsf));
221	pm.add(Box::new(GlobalLsf));
222	pm.add(Box::new(LocalCse));
223	pm.add(Box::new(PreheaderInsert));
224	pm.add(Box::new(LoopPeel));
225	pm.add(Box::new(LoopUnswitch));
226	pm.add(Box::new(Licm));
227	pm.add(Box::new(ConstProp));
228	pm.add(Box::new(Dse));
229	pm.add(Box::new(LoopInterchange));
230	pm.add(Box::new(LoopFission));
231	pm.add(Box::new(LoopFusion));
232	pm.add(Box::new(Vectorize));
233	pm.add(Box::new(LoopUnroll));
234	pm.add(Box::new(Gvn)); // keep O3/Ofast aligned with O2/Os value numbering
235	pm.add(Box::new(Dce));
236	}
237	OptLevel::Ofast => {
238	// O3 plus Ofast-only fast-math reassociation.
239	pm.add(Box::new(CallResolve));
240	pm.add(Box::new(Mem2Reg));
241	pm.add(Box::new(ConstFold));
242	pm.add(Box::new(Sroa));
243	pm.add(Box::new(Mem2Reg));
244	pm.add(Box::new(Inline::for_level(OptLevel::O3)));
245	pm.add(Box::new(ConstArgSpecialize));
246	pm.add(Box::new(DeadArgElim));
247	pm.add(Box::new(ReturnPropagate));
248	pm.add(Box::new(SimplifyCfg));
249	pm.add(Box::new(DeadFuncElim));
250	pm.add(Box::new(Bce));
251	pm.add(Box::new(StrengthReduce));
252	pm.add(Box::new(LocalLsf));
253	pm.add(Box::new(GlobalLsf));
254	pm.add(Box::new(LocalCse));
255	pm.add(Box::new(PreheaderInsert));
256	pm.add(Box::new(LoopPeel));
257	pm.add(Box::new(LoopUnswitch));
258	pm.add(Box::new(Licm));
259	pm.add(Box::new(ConstProp));
260	pm.add(Box::new(Dse));
261	pm.add(Box::new(LoopInterchange));
262	pm.add(Box::new(LoopFission));
263	pm.add(Box::new(LoopFusion));
264	pm.add(Box::new(Vectorize));
265	pm.add(Box::new(LoopUnroll));
266	pm.add(Box::new(FastMathReassoc));
267	pm.add(Box::new(Gvn));
268	pm.add(Box::new(Dce));
269	}
270	}
271	pm
272	}
273
274	/// Build the restricted optimization pipeline for modules that still contain
275	/// non-global `i128` values.
276	///
277	/// This deliberately widens `i128` support one optimization lane at a time.
278	/// Now that the backend can carry stack-backed `i128` values through block
279	/// params and mem2reg-style joins, the widened `i128` lane can use the full
280	/// ordinary O1/O2/O3/Os/Ofast pipelines. Higher levels remain gated until their
281	/// pass shapes are proven end to end.
282	pub fn build_i128_pipeline(level: OptLevel) -> Option<PassManager> {
283	match level {
284	OptLevel::O1 => Some(build_pipeline(OptLevel::O1)),
285	OptLevel::O2 => Some(build_pipeline(OptLevel::O2)),
286	OptLevel::O3 => Some(build_pipeline(OptLevel::O3)),
287	OptLevel::Os => Some(build_pipeline(OptLevel::Os)),
288	OptLevel::Ofast => Some(build_pipeline(OptLevel::Ofast)),
289	_ => None,
290	}
291	}
292
293	#[cfg(test)]
294	mod tests {
295	use super::*;
296
297	#[test]
298	fn parse_flags() {
299	assert_eq!(OptLevel::parse_flag("O0"), Some(OptLevel::O0));
300	assert_eq!(OptLevel::parse_flag("Os"), Some(OptLevel::Os));
301	assert_eq!(OptLevel::parse_flag("O3"), Some(OptLevel::O3));
302	assert_eq!(OptLevel::parse_flag("Ofast"), Some(OptLevel::Ofast));
303	assert_eq!(OptLevel::parse_flag("O9"), None);
304	}
305
306	#[test]
307	fn level_predicates() {
308	assert!(!OptLevel::O0.inlining());
309	assert!(OptLevel::O2.inlining());
310	assert!(OptLevel::O3.vectorize());
311	assert!(!OptLevel::O2.vectorize());
312	assert!(OptLevel::Ofast.fast_math());
313	assert!(!OptLevel::O3.fast_math());
314	}
315
316	#[test]
317	fn pipelines_build() {
318	// O0 has no passes; every other level has at least one.
319	assert!(build_pipeline(OptLevel::O0).is_empty());
320	for lvl in [
321	OptLevel::O1,
322	OptLevel::O2,
323	OptLevel::O3,
324	OptLevel::Os,
325	OptLevel::Ofast,
326	] {
327	let pm = build_pipeline(lvl);
328	assert!(
329	!pm.is_empty(),
330	"pipeline {:?} should have at least one pass",
331	lvl
332	);
333	}
334	}
335
336	#[test]
337	fn higher_optimization_levels_keep_gvn_enabled() {
338	for lvl in [OptLevel::O2, OptLevel::O3, OptLevel::Os, OptLevel::Ofast] {
339	let pm = build_pipeline(lvl);
340	let names = pm.pass_names();
341	assert!(
342	names.contains(&"gvn"),
343	"pipeline {:?} should include gvn, got {:?}",
344	lvl,
345	names
346	);
347	}
348	}
349
350	#[test]
351	fn ofast_enables_fast_math_reassoc_but_o3_does_not() {
352	let o3 = build_pipeline(OptLevel::O3).pass_names();
353	let ofast = build_pipeline(OptLevel::Ofast).pass_names();
354	assert!(
355	!o3.contains(&"fast-math-reassoc"),
356	"O3 should stay strict, got {:?}",
357	o3
358	);
359	assert!(
360	ofast.contains(&"fast-math-reassoc"),
361	"Ofast should include fast-math reassociation, got {:?}",
362	ofast
363	);
364	}
365
366	#[test]
367	fn vectorize_is_enabled_only_at_o3_and_above() {
368	let o2 = build_pipeline(OptLevel::O2).pass_names();
369	let o3 = build_pipeline(OptLevel::O3).pass_names();
370	let ofast = build_pipeline(OptLevel::Ofast).pass_names();
371
372	assert!(
373	!o2.contains(&"vectorize"),
374	"O2 should not include vectorize, got {:?}",
375	o2
376	);
377	assert!(
378	o3.contains(&"vectorize"),
379	"O3 should include vectorize, got {:?}",
380	o3
381	);
382	assert!(
383	ofast.contains(&"vectorize"),
384	"Ofast should include vectorize, got {:?}",
385	ofast
386	);
387	}
388
389	#[test]
390	fn i128_pipeline_is_available_through_ofast() {
391	assert!(
392	build_i128_pipeline(OptLevel::O1).is_some(),
393	"O1 should have the widened i128-safe pipeline"
394	);
395	assert!(
396	build_i128_pipeline(OptLevel::O2).is_some(),
397	"O2 should be available once the widened i128 lane is proven"
398	);
399	for lvl in [OptLevel::O3, OptLevel::Os, OptLevel::Ofast] {
400	assert!(
401	build_i128_pipeline(lvl).is_some(),
402	"{:?} should be available once the widened i128 lane is proven",
403	lvl
404	);
405	}
406	for lvl in [OptLevel::O0] {
407	assert!(
408	build_i128_pipeline(lvl).is_none(),
409	"{:?} should not yet have widened i128 optimization support",
410	lvl
411	);
412	}
413	}
414
415	#[test]
416	fn i128_pipeline_matches_full_o1() {
417	let wide = build_i128_pipeline(OptLevel::O1)
418	.expect("O1 should expose the widened i128 pipeline")
419	.pass_names();
420	let full = build_pipeline(OptLevel::O1).pass_names();
421	assert_eq!(
422	wide, full,
423	"the widened i128 O1 lane should stay aligned with the ordinary O1 pipeline"
424	);
425	}
426
427	#[test]
428	fn i128_pipeline_matches_full_higher_levels() {
429	for lvl in [OptLevel::O2, OptLevel::O3, OptLevel::Os, OptLevel::Ofast] {
430	let wide = build_i128_pipeline(lvl)
431	.expect("level should expose the widened i128 pipeline")
432	.pass_names();
433	let full = build_pipeline(lvl).pass_names();
434	assert_eq!(
435	wide, full,
436	"the widened i128 lane should stay aligned with the ordinary {:?} pipeline",
437	lvl
438	);
439	}
440	}
441	}
442