armfortas Public

Watch 0 Fork 0 Star 0

Rust · 16657 bytes Raw Blame History

  
        1
        //! Optimization-level → pass pipeline mapping.
      
        2
        //!
      
        3
        //! `OptLevel` is what the driver hands us; `build_pipeline` returns a
      
        4
        //! configured `PassManager`. Adding a new pass to a level is a one-line
      
        5
        //! change here, which keeps the dispatch logic in one place.
      
        6
        
        7
        use super::bce::Bce;
      
        8
        use super::call_resolve::CallResolve;
      
        9
        use super::const_arg::ConstArgSpecialize;
      
        10
        use super::const_fold::ConstFold;
      
        11
        use super::const_prop::ConstProp;
      
        12
        use super::cse::LocalCse;
      
        13
        use super::dce::Dce;
      
        14
        use super::dead_arg::DeadArgElim;
      
        15
        use super::dead_func::DeadFuncElim;
      
        16
        use super::dse::Dse;
      
        17
        use super::fast_math::FastMathReassoc;
      
        18
        use super::fission::LoopFission;
      
        19
        use super::fusion::LoopFusion;
      
        20
        use super::global_lsf::GlobalLsf;
      
        21
        use super::gvn::Gvn;
      
        22
        use super::inline::Inline;
      
        23
        use super::interchange::LoopInterchange;
      
        24
        use super::jump_thread::JumpThread;
      
        25
        use super::licm::Licm;
      
        26
        use super::lsf::LocalLsf;
      
        27
        use super::mem2reg::Mem2Reg;
      
        28
        use super::neon_vectorize::NeonVectorize;
      
        29
        use super::pass::PassManager;
      
        30
        use super::peel::LoopPeel;
      
        31
        use super::preheader::PreheaderInsert;
      
        32
        use super::return_prop::ReturnPropagate;
      
        33
        use super::sccp::Sccp_;
      
        34
        use super::simplify_cfg::SimplifyCfg;
      
        35
        use super::sroa::Sroa;
      
        36
        use super::strength_reduce::StrengthReduce;
      
        37
        use super::unroll::LoopUnroll;
      
        38
        use super::unswitch::LoopUnswitch;
      
        39
        use super::vectorize::Vectorize;
      
        40
        
        41
        /// Compiler optimization levels.
      
        42
        ///
      
        43
        /// Mirrors `gfortran` / `clang` semantics so users have no surprises.
      
        44
        #[derive(Debug, Clone, Copy, PartialEq, Eq)]
      
        45
        pub enum OptLevel {
      
        46
            /// `-O0` — no optimization. Default during development.
      
        47
            O0,
      
        48
            /// `-O1` — constant folding, DCE, basic CSE, copy propagation.
      
        49
            O1,
      
        50
            /// `-O2` — `-O1` plus LICM, small inlining, strength reduction,
      
        51
            /// bounds-check elimination, GVN, SROA, dead store elim, small loop
      
        52
            /// unrolling, FMA fusion.
      
        53
            O2,
      
        54
            /// `-O3` — `-O2` plus aggressive inlining, NEON vectorization,
      
        55
            /// loop interchange/fusion/fission, IPO, devirtualization,
      
        56
            /// whole-program analysis, speculative optimizations.
      
        57
            O3,
      
        58
            /// `-Os` — like `-O2` but prefer code size (no unrolling, less inlining).
      
        59
            Os,
      
        60
            /// `-Ofast` — `-O3` plus fast-math (reassociation, no NaN/Inf, recip).
      
        61
            Ofast,
      
        62
        }
      
        63
        
        64
        impl OptLevel {
      
        65
            /// Parse the textual flag (`O0`, `O1`, ..., `Ofast`).
      
        66
            pub fn parse_flag(s: &str) -> Option<Self> {
      
        67
                match s {
      
        68
                    "O0" | "0" => Some(Self::O0),
      
        69
                    "O1" | "1" => Some(Self::O1),
      
        70
                    "O2" | "2" => Some(Self::O2),
      
        71
                    "O3" | "3" => Some(Self::O3),
      
        72
                    "Os" | "s" => Some(Self::Os),
      
        73
                    "Ofast" | "fast" => Some(Self::Ofast),
      
        74
                    _ => None,
      
        75
                }
      
        76
            }
      
        77
        
        78
            pub fn flag_name(self) -> &'static str {
      
        79
                match self {
      
        80
                    Self::O0 => "-O0",
      
        81
                    Self::O1 => "-O1",
      
        82
                    Self::O2 => "-O2",
      
        83
                    Self::O3 => "-O3",
      
        84
                    Self::Os => "-Os",
      
        85
                    Self::Ofast => "-Ofast",
      
        86
                }
      
        87
            }
      
        88
        
        89
            /// Does this level enable inlining?
      
        90
            ///
      
        91
            /// Audit Min-6: this predicate is currently consulted only by the
      
        92
            /// pipeline test harness. Once `Inline` lands as a pass, the
      
        93
            /// builder below will gate registration on this. Same for the
      
        94
            /// other two predicates.
      
        95
            pub fn inlining(self) -> bool {
      
        96
                matches!(
      
        97
                    self,
      
        98
                    Self::O1 | Self::O2 | Self::O3 | Self::Os | Self::Ofast
      
        99
                )
      
        100
            }
      
        101
        
        102
            /// Does this level enable loop vectorization (NEON)?
      
        103
            pub fn vectorize(self) -> bool {
      
        104
                matches!(self, Self::O3 | Self::Ofast)
      
        105
            }
      
        106
        
        107
            /// Does this level allow value-changing fast-math reassociation
      
        108
            /// (`-Ofast`-only — relaxes IEEE 754 strictness for FAdd/FMul
      
        109
            /// reordering, signed-zero collapse, etc.)?
      
        110
            pub fn fast_math(self) -> bool {
      
        111
                matches!(self, Self::Ofast)
      
        112
            }
      
        113
        }
      
        114
        
        115
        /// Build the pass pipeline for a given optimization level.
      
        116
        ///
      
        117
        /// Adding a new optimization pass is a single push here. Keeping this
      
        118
        /// in one function makes it trivial to audit which passes run at which
      
        119
        /// level.
      
        120
        pub fn build_pipeline(level: OptLevel) -> PassManager {
      
        121
            let mut pm = PassManager::new();
      
        122
            match level {
      
        123
                OptLevel::O0 => {
      
        124
                    // Nothing — preserve unoptimized IR exactly as it was lowered.
      
        125
                }
      
        126
                OptLevel::O1 => {
      
        127
                    // Cheap, always-correct cleanup.
      
        128
                    //
      
        129
                    // Mem2reg runs FIRST so every downstream pass sees SSA
      
        130
                    // values instead of alloca/load/store round-trips.
      
        131
                    // Without it, const_fold can't propagate constants
      
        132
                    // through local variables, CSE can't dedupe across
      
        133
                    // store/load pairs, and LICM is effectively dormant
      
        134
                    // (loads block every hoist attempt).
      
        135
                    pm.add(Box::new(CallResolve));
      
        136
                    pm.add(Box::new(Mem2Reg));
      
        137
                    pm.add(Box::new(ConstFold));
      
        138
                    pm.add(Box::new(Inline::for_level(OptLevel::O1)));
      
        139
                    pm.add(Box::new(ConstArgSpecialize));
      
        140
                    pm.add(Box::new(DeadArgElim));
      
        141
                    pm.add(Box::new(ReturnPropagate));
      
        142
                    pm.add(Box::new(SimplifyCfg));
      
        143
                    pm.add(Box::new(DeadFuncElim));
      
        144
                    pm.add(Box::new(LocalLsf));
      
        145
                    pm.add(Box::new(LocalCse));
      
        146
                    pm.add(Box::new(Sccp_));
      
        147
                    pm.add(Box::new(JumpThread));
      
        148
                    pm.add(Box::new(ConstProp));
      
        149
                    pm.add(Box::new(Dce));
      
        150
                }
      
        151
                OptLevel::O2 => {
      
        152
                    // O1 plus LICM, strength reduction, DSE, LSF, loop transforms.
      
        153
                    pm.add(Box::new(CallResolve));
      
        154
                    pm.add(Box::new(Mem2Reg));
      
        155
                    pm.add(Box::new(ConstFold));
      
        156
                    pm.add(Box::new(Sroa)); // after SSA + const fold (GCC pattern)
      
        157
                    pm.add(Box::new(Mem2Reg)); // re-promote SROA-created scalar allocas
      
        158
                    pm.add(Box::new(Inline::for_level(OptLevel::O2)));
      
        159
                    pm.add(Box::new(ConstArgSpecialize));
      
        160
                    pm.add(Box::new(DeadArgElim));
      
        161
                    pm.add(Box::new(ReturnPropagate));
      
        162
                    pm.add(Box::new(SimplifyCfg));
      
        163
                    pm.add(Box::new(DeadFuncElim));
      
        164
                    pm.add(Box::new(Bce));
      
        165
                    pm.add(Box::new(StrengthReduce));
      
        166
                    pm.add(Box::new(LocalLsf));
      
        167
                    pm.add(Box::new(GlobalLsf));
      
        168
                    pm.add(Box::new(LocalCse));
      
        169
                    pm.add(Box::new(PreheaderInsert));
      
        170
                    pm.add(Box::new(LoopPeel));
      
        171
                    pm.add(Box::new(LoopUnswitch));
      
        172
                    pm.add(Box::new(Licm));
      
        173
                    pm.add(Box::new(Sccp_));
      
        174
                    pm.add(Box::new(JumpThread));
      
        175
                    pm.add(Box::new(ConstProp));
      
        176
                    pm.add(Box::new(Dse));
      
        177
                    pm.add(Box::new(LoopInterchange));
      
        178
                    pm.add(Box::new(LoopFission));
      
        179
                    pm.add(Box::new(LoopFusion));
      
        180
                    pm.add(Box::new(LoopUnroll));
      
        181
                    pm.add(Box::new(Gvn)); // after loop passes to avoid SSA conflicts
      
        182
                    pm.add(Box::new(Dce));
      
        183
                }
      
        184
                OptLevel::Os => {
      
        185
                    // Like O2 but no loop unrolling (prefer code size).
      
        186
                    pm.add(Box::new(CallResolve));
      
        187
                    pm.add(Box::new(Mem2Reg));
      
        188
                    pm.add(Box::new(ConstFold));
      
        189
                    pm.add(Box::new(Sroa));
      
        190
                    pm.add(Box::new(Mem2Reg));
      
        191
                    pm.add(Box::new(Inline::for_level(OptLevel::Os)));
      
        192
                    pm.add(Box::new(ConstArgSpecialize));
      
        193
                    pm.add(Box::new(DeadArgElim));
      
        194
                    pm.add(Box::new(ReturnPropagate));
      
        195
                    pm.add(Box::new(SimplifyCfg));
      
        196
                    pm.add(Box::new(DeadFuncElim));
      
        197
                    pm.add(Box::new(Bce));
      
        198
                    pm.add(Box::new(StrengthReduce));
      
        199
                    pm.add(Box::new(LocalLsf));
      
        200
                    pm.add(Box::new(GlobalLsf));
      
        201
                    pm.add(Box::new(LocalCse));
      
        202
                    pm.add(Box::new(PreheaderInsert));
      
        203
                    pm.add(Box::new(LoopPeel));
      
        204
                    pm.add(Box::new(LoopUnswitch));
      
        205
                    pm.add(Box::new(Licm));
      
        206
                    pm.add(Box::new(Sccp_));
      
        207
                    pm.add(Box::new(JumpThread));
      
        208
                    pm.add(Box::new(ConstProp));
      
        209
                    pm.add(Box::new(Dse));
      
        210
                    pm.add(Box::new(LoopInterchange));
      
        211
                    pm.add(Box::new(Gvn));
      
        212
                    pm.add(Box::new(Dce));
      
        213
                }
      
        214
                OptLevel::O3 => {
      
        215
                    // O2 passes + loop unrolling + interchange.
      
        216
                    pm.add(Box::new(CallResolve));
      
        217
                    pm.add(Box::new(Mem2Reg));
      
        218
                    pm.add(Box::new(ConstFold));
      
        219
                    pm.add(Box::new(Sroa));
      
        220
                    pm.add(Box::new(Mem2Reg));
      
        221
                    pm.add(Box::new(Inline::for_level(OptLevel::O3)));
      
        222
                    pm.add(Box::new(ConstArgSpecialize));
      
        223
                    pm.add(Box::new(DeadArgElim));
      
        224
                    pm.add(Box::new(ReturnPropagate));
      
        225
                    pm.add(Box::new(SimplifyCfg));
      
        226
                    pm.add(Box::new(DeadFuncElim));
      
        227
                    pm.add(Box::new(Bce));
      
        228
                    pm.add(Box::new(StrengthReduce));
      
        229
                    pm.add(Box::new(LocalLsf));
      
        230
                    pm.add(Box::new(GlobalLsf));
      
        231
                    pm.add(Box::new(LocalCse));
      
        232
                    pm.add(Box::new(PreheaderInsert));
      
        233
                    pm.add(Box::new(LoopPeel));
      
        234
                    pm.add(Box::new(LoopUnswitch));
      
        235
                    pm.add(Box::new(Licm));
      
        236
                    pm.add(Box::new(Sccp_));
      
        237
                    pm.add(Box::new(JumpThread));
      
        238
                    pm.add(Box::new(ConstProp));
      
        239
                    pm.add(Box::new(Dse));
      
        240
                    pm.add(Box::new(LoopInterchange));
      
        241
                    pm.add(Box::new(LoopFission));
      
        242
                    pm.add(Box::new(LoopFusion));
      
        243
                    pm.add(Box::new(NeonVectorize));
      
        244
                    pm.add(Box::new(Vectorize));
      
        245
                    pm.add(Box::new(LoopUnroll));
      
        246
                    pm.add(Box::new(Gvn)); // keep O3/Ofast aligned with O2/Os value numbering
      
        247
                    pm.add(Box::new(Dce));
      
        248
                }
      
        249
                OptLevel::Ofast => {
      
        250
                    // O3 plus Ofast-only fast-math reassociation.
      
        251
                    pm.add(Box::new(CallResolve));
      
        252
                    pm.add(Box::new(Mem2Reg));
      
        253
                    pm.add(Box::new(ConstFold));
      
        254
                    pm.add(Box::new(Sroa));
      
        255
                    pm.add(Box::new(Mem2Reg));
      
        256
                    pm.add(Box::new(Inline::for_level(OptLevel::O3)));
      
        257
                    pm.add(Box::new(ConstArgSpecialize));
      
        258
                    pm.add(Box::new(DeadArgElim));
      
        259
                    pm.add(Box::new(ReturnPropagate));
      
        260
                    pm.add(Box::new(SimplifyCfg));
      
        261
                    pm.add(Box::new(DeadFuncElim));
      
        262
                    pm.add(Box::new(Bce));
      
        263
                    pm.add(Box::new(StrengthReduce));
      
        264
                    pm.add(Box::new(LocalLsf));
      
        265
                    pm.add(Box::new(GlobalLsf));
      
        266
                    pm.add(Box::new(LocalCse));
      
        267
                    pm.add(Box::new(PreheaderInsert));
      
        268
                    pm.add(Box::new(LoopPeel));
      
        269
                    pm.add(Box::new(LoopUnswitch));
      
        270
                    pm.add(Box::new(Licm));
      
        271
                    pm.add(Box::new(Sccp_));
      
        272
                    pm.add(Box::new(JumpThread));
      
        273
                    pm.add(Box::new(ConstProp));
      
        274
                    pm.add(Box::new(Dse));
      
        275
                    pm.add(Box::new(LoopInterchange));
      
        276
                    pm.add(Box::new(LoopFission));
      
        277
                    pm.add(Box::new(LoopFusion));
      
        278
                    pm.add(Box::new(NeonVectorize));
      
        279
                    pm.add(Box::new(Vectorize));
      
        280
                    pm.add(Box::new(LoopUnroll));
      
        281
                    pm.add(Box::new(FastMathReassoc));
      
        282
                    pm.add(Box::new(Gvn));
      
        283
                    pm.add(Box::new(Dce));
      
        284
                }
      
        285
            }
      
        286
            pm
      
        287
        }
      
        288
        
        289
        /// Build the restricted optimization pipeline for modules that still contain
      
        290
        /// non-global `i128` values.
      
        291
        ///
      
        292
        /// This deliberately widens `i128` support one optimization lane at a time.
      
        293
        /// Now that the backend can carry stack-backed `i128` values through block
      
        294
        /// params and mem2reg-style joins, the widened `i128` lane can use the full
      
        295
        /// ordinary O1/O2/O3/Os/Ofast pipelines. Higher levels remain gated until their
      
        296
        /// pass shapes are proven end to end.
      
        297
        pub fn build_i128_pipeline(level: OptLevel) -> Option<PassManager> {
      
        298
            match level {
      
        299
                OptLevel::O1 => Some(build_pipeline(OptLevel::O1)),
      
        300
                OptLevel::O2 => Some(build_pipeline(OptLevel::O2)),
      
        301
                OptLevel::O3 => Some(build_pipeline(OptLevel::O3)),
      
        302
                OptLevel::Os => Some(build_pipeline(OptLevel::Os)),
      
        303
                OptLevel::Ofast => Some(build_pipeline(OptLevel::Ofast)),
      
        304
                _ => None,
      
        305
            }
      
        306
        }
      
        307
        
        308
        #[cfg(test)]
      
        309
        mod tests {
      
        310
            use super::*;
      
        311
        
        312
            #[test]
      
        313
            fn parse_flags() {
      
        314
                assert_eq!(OptLevel::parse_flag("O0"), Some(OptLevel::O0));
      
        315
                assert_eq!(OptLevel::parse_flag("Os"), Some(OptLevel::Os));
      
        316
                assert_eq!(OptLevel::parse_flag("O3"), Some(OptLevel::O3));
      
        317
                assert_eq!(OptLevel::parse_flag("Ofast"), Some(OptLevel::Ofast));
      
        318
                assert_eq!(OptLevel::parse_flag("O9"), None);
      
        319
            }
      
        320
        
        321
            #[test]
      
        322
            fn level_predicates() {
      
        323
                assert!(!OptLevel::O0.inlining());
      
        324
                assert!(OptLevel::O2.inlining());
      
        325
                assert!(OptLevel::O3.vectorize());
      
        326
                assert!(!OptLevel::O2.vectorize());
      
        327
                assert!(OptLevel::Ofast.fast_math());
      
        328
                assert!(!OptLevel::O3.fast_math());
      
        329
            }
      
        330
        
        331
            #[test]
      
        332
            fn pipelines_build() {
      
        333
                // O0 has no passes; every other level has at least one.
      
        334
                assert!(build_pipeline(OptLevel::O0).is_empty());
      
        335
                for lvl in [
      
        336
                    OptLevel::O1,
      
        337
                    OptLevel::O2,
      
        338
                    OptLevel::O3,
      
        339
                    OptLevel::Os,
      
        340
                    OptLevel::Ofast,
      
        341
                ] {
      
        342
                    let pm = build_pipeline(lvl);
      
        343
                    assert!(
      
        344
                        !pm.is_empty(),
      
        345
                        "pipeline {:?} should have at least one pass",
      
        346
                        lvl
      
        347
                    );
      
        348
                }
      
        349
            }
      
        350
        
        351
            #[test]
      
        352
            fn higher_optimization_levels_keep_gvn_enabled() {
      
        353
                for lvl in [OptLevel::O2, OptLevel::O3, OptLevel::Os, OptLevel::Ofast] {
      
        354
                    let pm = build_pipeline(lvl);
      
        355
                    let names = pm.pass_names();
      
        356
                    assert!(
      
        357
                        names.contains(&"gvn"),
      
        358
                        "pipeline {:?} should include gvn, got {:?}",
      
        359
                        lvl,
      
        360
                        names
      
        361
                    );
      
        362
                }
      
        363
            }
      
        364
        
        365
            #[test]
      
        366
            fn ofast_enables_fast_math_reassoc_but_o3_does_not() {
      
        367
                let o3 = build_pipeline(OptLevel::O3).pass_names();
      
        368
                let ofast = build_pipeline(OptLevel::Ofast).pass_names();
      
        369
                assert!(
      
        370
                    !o3.contains(&"fast-math-reassoc"),
      
        371
                    "O3 should stay strict, got {:?}",
      
        372
                    o3
      
        373
                );
      
        374
                assert!(
      
        375
                    ofast.contains(&"fast-math-reassoc"),
      
        376
                    "Ofast should include fast-math reassociation, got {:?}",
      
        377
                    ofast
      
        378
                );
      
        379
            }
      
        380
        
        381
            #[test]
      
        382
            fn vectorize_is_enabled_only_at_o3_and_above() {
      
        383
                let o2 = build_pipeline(OptLevel::O2).pass_names();
      
        384
                let o3 = build_pipeline(OptLevel::O3).pass_names();
      
        385
                let ofast = build_pipeline(OptLevel::Ofast).pass_names();
      
        386
        
        387
                assert!(
      
        388
                    !o2.contains(&"vectorize"),
      
        389
                    "O2 should not include vectorize, got {:?}",
      
        390
                    o2
      
        391
                );
      
        392
                assert!(
      
        393
                    o3.contains(&"vectorize"),
      
        394
                    "O3 should include vectorize, got {:?}",
      
        395
                    o3
      
        396
                );
      
        397
                assert!(
      
        398
                    ofast.contains(&"vectorize"),
      
        399
                    "Ofast should include vectorize, got {:?}",
      
        400
                    ofast
      
        401
                );
      
        402
            }
      
        403
        
        404
            #[test]
      
        405
            fn i128_pipeline_is_available_through_ofast() {
      
        406
                assert!(
      
        407
                    build_i128_pipeline(OptLevel::O1).is_some(),
      
        408
                    "O1 should have the widened i128-safe pipeline"
      
        409
                );
      
        410
                assert!(
      
        411
                    build_i128_pipeline(OptLevel::O2).is_some(),
      
        412
                    "O2 should be available once the widened i128 lane is proven"
      
        413
                );
      
        414
                for lvl in [OptLevel::O3, OptLevel::Os, OptLevel::Ofast] {
      
        415
                    assert!(
      
        416
                        build_i128_pipeline(lvl).is_some(),
      
        417
                        "{:?} should be available once the widened i128 lane is proven",
      
        418
                        lvl
      
        419
                    );
      
        420
                }
      
        421
                for lvl in [OptLevel::O0] {
      
        422
                    assert!(
      
        423
                        build_i128_pipeline(lvl).is_none(),
      
        424
                        "{:?} should not yet have widened i128 optimization support",
      
        425
                        lvl
      
        426
                    );
      
        427
                }
      
        428
            }
      
        429
        
        430
            #[test]
      
        431
            fn i128_pipeline_matches_full_o1() {
      
        432
                let wide = build_i128_pipeline(OptLevel::O1)
      
        433
                    .expect("O1 should expose the widened i128 pipeline")
      
        434
                    .pass_names();
      
        435
                let full = build_pipeline(OptLevel::O1).pass_names();
      
        436
                assert_eq!(
      
        437
                    wide, full,
      
        438
                    "the widened i128 O1 lane should stay aligned with the ordinary O1 pipeline"
      
        439
                );
      
        440
            }
      
        441
        
        442
            #[test]
      
        443
            fn i128_pipeline_matches_full_higher_levels() {
      
        444
                for lvl in [OptLevel::O2, OptLevel::O3, OptLevel::Os, OptLevel::Ofast] {
      
        445
                    let wide = build_i128_pipeline(lvl)
      
        446
                        .expect("level should expose the widened i128 pipeline")
      
        447
                        .pass_names();
      
        448
                    let full = build_pipeline(lvl).pass_names();
      
        449
                    assert_eq!(
      
        450
                        wide, full,
      
        451
                        "the widened i128 lane should stay aligned with the ordinary {:?} pipeline",
      
        452
                        lvl
      
        453
                    );
      
        454
                }
      
        455
            }
      
        456
        }
      
        457

1	//! Optimization-level → pass pipeline mapping.
2	//!
3	//! `OptLevel` is what the driver hands us; `build_pipeline` returns a
4	//! configured `PassManager`. Adding a new pass to a level is a one-line
5	//! change here, which keeps the dispatch logic in one place.
6
7	use super::bce::Bce;
8	use super::call_resolve::CallResolve;
9	use super::const_arg::ConstArgSpecialize;
10	use super::const_fold::ConstFold;
11	use super::const_prop::ConstProp;
12	use super::cse::LocalCse;
13	use super::dce::Dce;
14	use super::dead_arg::DeadArgElim;
15	use super::dead_func::DeadFuncElim;
16	use super::dse::Dse;
17	use super::fast_math::FastMathReassoc;
18	use super::fission::LoopFission;
19	use super::fusion::LoopFusion;
20	use super::global_lsf::GlobalLsf;
21	use super::gvn::Gvn;
22	use super::inline::Inline;
23	use super::interchange::LoopInterchange;
24	use super::jump_thread::JumpThread;
25	use super::licm::Licm;
26	use super::lsf::LocalLsf;
27	use super::mem2reg::Mem2Reg;
28	use super::neon_vectorize::NeonVectorize;
29	use super::pass::PassManager;
30	use super::peel::LoopPeel;
31	use super::preheader::PreheaderInsert;
32	use super::return_prop::ReturnPropagate;
33	use super::sccp::Sccp_;
34	use super::simplify_cfg::SimplifyCfg;
35	use super::sroa::Sroa;
36	use super::strength_reduce::StrengthReduce;
37	use super::unroll::LoopUnroll;
38	use super::unswitch::LoopUnswitch;
39	use super::vectorize::Vectorize;
40
41	/// Compiler optimization levels.
42	///
43	/// Mirrors `gfortran` / `clang` semantics so users have no surprises.
44	#[derive(Debug, Clone, Copy, PartialEq, Eq)]
45	pub enum OptLevel {
46	/// `-O0` — no optimization. Default during development.
47	O0,
48	/// `-O1` — constant folding, DCE, basic CSE, copy propagation.
49	O1,
50	/// `-O2` — `-O1` plus LICM, small inlining, strength reduction,
51	/// bounds-check elimination, GVN, SROA, dead store elim, small loop
52	/// unrolling, FMA fusion.
53	O2,
54	/// `-O3` — `-O2` plus aggressive inlining, NEON vectorization,
55	/// loop interchange/fusion/fission, IPO, devirtualization,
56	/// whole-program analysis, speculative optimizations.
57	O3,
58	/// `-Os` — like `-O2` but prefer code size (no unrolling, less inlining).
59	Os,
60	/// `-Ofast` — `-O3` plus fast-math (reassociation, no NaN/Inf, recip).
61	Ofast,
62	}
63
64	impl OptLevel {
65	/// Parse the textual flag (`O0`, `O1`, ..., `Ofast`).
66	pub fn parse_flag(s: &str) -> Option<Self> {
67	match s {
68	"O0" \| "0" => Some(Self::O0),
69	"O1" \| "1" => Some(Self::O1),
70	"O2" \| "2" => Some(Self::O2),
71	"O3" \| "3" => Some(Self::O3),
72	"Os" \| "s" => Some(Self::Os),
73	"Ofast" \| "fast" => Some(Self::Ofast),
74	_ => None,
75	}
76	}
77
78	pub fn flag_name(self) -> &'static str {
79	match self {
80	Self::O0 => "-O0",
81	Self::O1 => "-O1",
82	Self::O2 => "-O2",
83	Self::O3 => "-O3",
84	Self::Os => "-Os",
85	Self::Ofast => "-Ofast",
86	}
87	}
88
89	/// Does this level enable inlining?
90	///
91	/// Audit Min-6: this predicate is currently consulted only by the
92	/// pipeline test harness. Once `Inline` lands as a pass, the
93	/// builder below will gate registration on this. Same for the
94	/// other two predicates.
95	pub fn inlining(self) -> bool {
96	matches!(
97	self,
98	Self::O1 \| Self::O2 \| Self::O3 \| Self::Os \| Self::Ofast
99	)
100	}
101
102	/// Does this level enable loop vectorization (NEON)?
103	pub fn vectorize(self) -> bool {
104	matches!(self, Self::O3 \| Self::Ofast)
105	}
106
107	/// Does this level allow value-changing fast-math reassociation
108	/// (`-Ofast`-only — relaxes IEEE 754 strictness for FAdd/FMul
109	/// reordering, signed-zero collapse, etc.)?
110	pub fn fast_math(self) -> bool {
111	matches!(self, Self::Ofast)
112	}
113	}
114
115	/// Build the pass pipeline for a given optimization level.
116	///
117	/// Adding a new optimization pass is a single push here. Keeping this
118	/// in one function makes it trivial to audit which passes run at which
119	/// level.
120	pub fn build_pipeline(level: OptLevel) -> PassManager {
121	let mut pm = PassManager::new();
122	match level {
123	OptLevel::O0 => {
124	// Nothing — preserve unoptimized IR exactly as it was lowered.
125	}
126	OptLevel::O1 => {
127	// Cheap, always-correct cleanup.
128	//
129	// Mem2reg runs FIRST so every downstream pass sees SSA
130	// values instead of alloca/load/store round-trips.
131	// Without it, const_fold can't propagate constants
132	// through local variables, CSE can't dedupe across
133	// store/load pairs, and LICM is effectively dormant
134	// (loads block every hoist attempt).
135	pm.add(Box::new(CallResolve));
136	pm.add(Box::new(Mem2Reg));
137	pm.add(Box::new(ConstFold));
138	pm.add(Box::new(Inline::for_level(OptLevel::O1)));
139	pm.add(Box::new(ConstArgSpecialize));
140	pm.add(Box::new(DeadArgElim));
141	pm.add(Box::new(ReturnPropagate));
142	pm.add(Box::new(SimplifyCfg));
143	pm.add(Box::new(DeadFuncElim));
144	pm.add(Box::new(LocalLsf));
145	pm.add(Box::new(LocalCse));
146	pm.add(Box::new(Sccp_));
147	pm.add(Box::new(JumpThread));
148	pm.add(Box::new(ConstProp));
149	pm.add(Box::new(Dce));
150	}
151	OptLevel::O2 => {
152	// O1 plus LICM, strength reduction, DSE, LSF, loop transforms.
153	pm.add(Box::new(CallResolve));
154	pm.add(Box::new(Mem2Reg));
155	pm.add(Box::new(ConstFold));
156	pm.add(Box::new(Sroa)); // after SSA + const fold (GCC pattern)
157	pm.add(Box::new(Mem2Reg)); // re-promote SROA-created scalar allocas
158	pm.add(Box::new(Inline::for_level(OptLevel::O2)));
159	pm.add(Box::new(ConstArgSpecialize));
160	pm.add(Box::new(DeadArgElim));
161	pm.add(Box::new(ReturnPropagate));
162	pm.add(Box::new(SimplifyCfg));
163	pm.add(Box::new(DeadFuncElim));
164	pm.add(Box::new(Bce));
165	pm.add(Box::new(StrengthReduce));
166	pm.add(Box::new(LocalLsf));
167	pm.add(Box::new(GlobalLsf));
168	pm.add(Box::new(LocalCse));
169	pm.add(Box::new(PreheaderInsert));
170	pm.add(Box::new(LoopPeel));
171	pm.add(Box::new(LoopUnswitch));
172	pm.add(Box::new(Licm));
173	pm.add(Box::new(Sccp_));
174	pm.add(Box::new(JumpThread));
175	pm.add(Box::new(ConstProp));
176	pm.add(Box::new(Dse));
177	pm.add(Box::new(LoopInterchange));
178	pm.add(Box::new(LoopFission));
179	pm.add(Box::new(LoopFusion));
180	pm.add(Box::new(LoopUnroll));
181	pm.add(Box::new(Gvn)); // after loop passes to avoid SSA conflicts
182	pm.add(Box::new(Dce));
183	}
184	OptLevel::Os => {
185	// Like O2 but no loop unrolling (prefer code size).
186	pm.add(Box::new(CallResolve));
187	pm.add(Box::new(Mem2Reg));
188	pm.add(Box::new(ConstFold));
189	pm.add(Box::new(Sroa));
190	pm.add(Box::new(Mem2Reg));
191	pm.add(Box::new(Inline::for_level(OptLevel::Os)));
192	pm.add(Box::new(ConstArgSpecialize));
193	pm.add(Box::new(DeadArgElim));
194	pm.add(Box::new(ReturnPropagate));
195	pm.add(Box::new(SimplifyCfg));
196	pm.add(Box::new(DeadFuncElim));
197	pm.add(Box::new(Bce));
198	pm.add(Box::new(StrengthReduce));
199	pm.add(Box::new(LocalLsf));
200	pm.add(Box::new(GlobalLsf));
201	pm.add(Box::new(LocalCse));
202	pm.add(Box::new(PreheaderInsert));
203	pm.add(Box::new(LoopPeel));
204	pm.add(Box::new(LoopUnswitch));
205	pm.add(Box::new(Licm));
206	pm.add(Box::new(Sccp_));
207	pm.add(Box::new(JumpThread));
208	pm.add(Box::new(ConstProp));
209	pm.add(Box::new(Dse));
210	pm.add(Box::new(LoopInterchange));
211	pm.add(Box::new(Gvn));
212	pm.add(Box::new(Dce));
213	}
214	OptLevel::O3 => {
215	// O2 passes + loop unrolling + interchange.
216	pm.add(Box::new(CallResolve));
217	pm.add(Box::new(Mem2Reg));
218	pm.add(Box::new(ConstFold));
219	pm.add(Box::new(Sroa));
220	pm.add(Box::new(Mem2Reg));
221	pm.add(Box::new(Inline::for_level(OptLevel::O3)));
222	pm.add(Box::new(ConstArgSpecialize));
223	pm.add(Box::new(DeadArgElim));
224	pm.add(Box::new(ReturnPropagate));
225	pm.add(Box::new(SimplifyCfg));
226	pm.add(Box::new(DeadFuncElim));
227	pm.add(Box::new(Bce));
228	pm.add(Box::new(StrengthReduce));
229	pm.add(Box::new(LocalLsf));
230	pm.add(Box::new(GlobalLsf));
231	pm.add(Box::new(LocalCse));
232	pm.add(Box::new(PreheaderInsert));
233	pm.add(Box::new(LoopPeel));
234	pm.add(Box::new(LoopUnswitch));
235	pm.add(Box::new(Licm));
236	pm.add(Box::new(Sccp_));
237	pm.add(Box::new(JumpThread));
238	pm.add(Box::new(ConstProp));
239	pm.add(Box::new(Dse));
240	pm.add(Box::new(LoopInterchange));
241	pm.add(Box::new(LoopFission));
242	pm.add(Box::new(LoopFusion));
243	pm.add(Box::new(NeonVectorize));
244	pm.add(Box::new(Vectorize));
245	pm.add(Box::new(LoopUnroll));
246	pm.add(Box::new(Gvn)); // keep O3/Ofast aligned with O2/Os value numbering
247	pm.add(Box::new(Dce));
248	}
249	OptLevel::Ofast => {
250	// O3 plus Ofast-only fast-math reassociation.
251	pm.add(Box::new(CallResolve));
252	pm.add(Box::new(Mem2Reg));
253	pm.add(Box::new(ConstFold));
254	pm.add(Box::new(Sroa));
255	pm.add(Box::new(Mem2Reg));
256	pm.add(Box::new(Inline::for_level(OptLevel::O3)));
257	pm.add(Box::new(ConstArgSpecialize));
258	pm.add(Box::new(DeadArgElim));
259	pm.add(Box::new(ReturnPropagate));
260	pm.add(Box::new(SimplifyCfg));
261	pm.add(Box::new(DeadFuncElim));
262	pm.add(Box::new(Bce));
263	pm.add(Box::new(StrengthReduce));
264	pm.add(Box::new(LocalLsf));
265	pm.add(Box::new(GlobalLsf));
266	pm.add(Box::new(LocalCse));
267	pm.add(Box::new(PreheaderInsert));
268	pm.add(Box::new(LoopPeel));
269	pm.add(Box::new(LoopUnswitch));
270	pm.add(Box::new(Licm));
271	pm.add(Box::new(Sccp_));
272	pm.add(Box::new(JumpThread));
273	pm.add(Box::new(ConstProp));
274	pm.add(Box::new(Dse));
275	pm.add(Box::new(LoopInterchange));
276	pm.add(Box::new(LoopFission));
277	pm.add(Box::new(LoopFusion));
278	pm.add(Box::new(NeonVectorize));
279	pm.add(Box::new(Vectorize));
280	pm.add(Box::new(LoopUnroll));
281	pm.add(Box::new(FastMathReassoc));
282	pm.add(Box::new(Gvn));
283	pm.add(Box::new(Dce));
284	}
285	}
286	pm
287	}
288
289	/// Build the restricted optimization pipeline for modules that still contain
290	/// non-global `i128` values.
291	///
292	/// This deliberately widens `i128` support one optimization lane at a time.
293	/// Now that the backend can carry stack-backed `i128` values through block
294	/// params and mem2reg-style joins, the widened `i128` lane can use the full
295	/// ordinary O1/O2/O3/Os/Ofast pipelines. Higher levels remain gated until their
296	/// pass shapes are proven end to end.
297	pub fn build_i128_pipeline(level: OptLevel) -> Option<PassManager> {
298	match level {
299	OptLevel::O1 => Some(build_pipeline(OptLevel::O1)),
300	OptLevel::O2 => Some(build_pipeline(OptLevel::O2)),
301	OptLevel::O3 => Some(build_pipeline(OptLevel::O3)),
302	OptLevel::Os => Some(build_pipeline(OptLevel::Os)),
303	OptLevel::Ofast => Some(build_pipeline(OptLevel::Ofast)),
304	_ => None,
305	}
306	}
307
308	#[cfg(test)]
309	mod tests {
310	use super::*;
311
312	#[test]
313	fn parse_flags() {
314	assert_eq!(OptLevel::parse_flag("O0"), Some(OptLevel::O0));
315	assert_eq!(OptLevel::parse_flag("Os"), Some(OptLevel::Os));
316	assert_eq!(OptLevel::parse_flag("O3"), Some(OptLevel::O3));
317	assert_eq!(OptLevel::parse_flag("Ofast"), Some(OptLevel::Ofast));
318	assert_eq!(OptLevel::parse_flag("O9"), None);
319	}
320
321	#[test]
322	fn level_predicates() {
323	assert!(!OptLevel::O0.inlining());
324	assert!(OptLevel::O2.inlining());
325	assert!(OptLevel::O3.vectorize());
326	assert!(!OptLevel::O2.vectorize());
327	assert!(OptLevel::Ofast.fast_math());
328	assert!(!OptLevel::O3.fast_math());
329	}
330
331	#[test]
332	fn pipelines_build() {
333	// O0 has no passes; every other level has at least one.
334	assert!(build_pipeline(OptLevel::O0).is_empty());
335	for lvl in [
336	OptLevel::O1,
337	OptLevel::O2,
338	OptLevel::O3,
339	OptLevel::Os,
340	OptLevel::Ofast,
341	] {
342	let pm = build_pipeline(lvl);
343	assert!(
344	!pm.is_empty(),
345	"pipeline {:?} should have at least one pass",
346	lvl
347	);
348	}
349	}
350
351	#[test]
352	fn higher_optimization_levels_keep_gvn_enabled() {
353	for lvl in [OptLevel::O2, OptLevel::O3, OptLevel::Os, OptLevel::Ofast] {
354	let pm = build_pipeline(lvl);
355	let names = pm.pass_names();
356	assert!(
357	names.contains(&"gvn"),
358	"pipeline {:?} should include gvn, got {:?}",
359	lvl,
360	names
361	);
362	}
363	}
364
365	#[test]
366	fn ofast_enables_fast_math_reassoc_but_o3_does_not() {
367	let o3 = build_pipeline(OptLevel::O3).pass_names();
368	let ofast = build_pipeline(OptLevel::Ofast).pass_names();
369	assert!(
370	!o3.contains(&"fast-math-reassoc"),
371	"O3 should stay strict, got {:?}",
372	o3
373	);
374	assert!(
375	ofast.contains(&"fast-math-reassoc"),
376	"Ofast should include fast-math reassociation, got {:?}",
377	ofast
378	);
379	}
380
381	#[test]
382	fn vectorize_is_enabled_only_at_o3_and_above() {
383	let o2 = build_pipeline(OptLevel::O2).pass_names();
384	let o3 = build_pipeline(OptLevel::O3).pass_names();
385	let ofast = build_pipeline(OptLevel::Ofast).pass_names();
386
387	assert!(
388	!o2.contains(&"vectorize"),
389	"O2 should not include vectorize, got {:?}",
390	o2
391	);
392	assert!(
393	o3.contains(&"vectorize"),
394	"O3 should include vectorize, got {:?}",
395	o3
396	);
397	assert!(
398	ofast.contains(&"vectorize"),
399	"Ofast should include vectorize, got {:?}",
400	ofast
401	);
402	}
403
404	#[test]
405	fn i128_pipeline_is_available_through_ofast() {
406	assert!(
407	build_i128_pipeline(OptLevel::O1).is_some(),
408	"O1 should have the widened i128-safe pipeline"
409	);
410	assert!(
411	build_i128_pipeline(OptLevel::O2).is_some(),
412	"O2 should be available once the widened i128 lane is proven"
413	);
414	for lvl in [OptLevel::O3, OptLevel::Os, OptLevel::Ofast] {
415	assert!(
416	build_i128_pipeline(lvl).is_some(),
417	"{:?} should be available once the widened i128 lane is proven",
418	lvl
419	);
420	}
421	for lvl in [OptLevel::O0] {
422	assert!(
423	build_i128_pipeline(lvl).is_none(),
424	"{:?} should not yet have widened i128 optimization support",
425	lvl
426	);
427	}
428	}
429
430	#[test]
431	fn i128_pipeline_matches_full_o1() {
432	let wide = build_i128_pipeline(OptLevel::O1)
433	.expect("O1 should expose the widened i128 pipeline")
434	.pass_names();
435	let full = build_pipeline(OptLevel::O1).pass_names();
436	assert_eq!(
437	wide, full,
438	"the widened i128 O1 lane should stay aligned with the ordinary O1 pipeline"
439	);
440	}
441
442	#[test]
443	fn i128_pipeline_matches_full_higher_levels() {
444	for lvl in [OptLevel::O2, OptLevel::O3, OptLevel::Os, OptLevel::Ofast] {
445	let wide = build_i128_pipeline(lvl)
446	.expect("level should expose the widened i128 pipeline")
447	.pass_names();
448	let full = build_pipeline(lvl).pass_names();
449	assert_eq!(
450	wide, full,
451	"the widened i128 lane should stay aligned with the ordinary {:?} pipeline",
452	lvl
453	);
454	}
455	}
456	}
457