Rust · 3655 bytes Raw Blame History
1 use std::collections::BTreeSet;
2 use std::path::PathBuf;
3
4 use armfortas::driver::OptLevel;
5 use armfortas::testing::{capture_from_path, CaptureRequest, CapturedStage, Stage};
6
7 fn fixture(name: &str) -> PathBuf {
8 let path = PathBuf::from("test_programs").join(name);
9 assert!(path.exists(), "missing test fixture {}", path.display());
10 path
11 }
12
13 fn capture_text(request: CaptureRequest, stage: Stage) -> String {
14 let result = capture_from_path(&request).expect("capture should succeed");
15 match result.get(stage) {
16 Some(CapturedStage::Text(text)) => text.clone(),
17 Some(CapturedStage::Run(_)) => panic!("expected text stage for {}", stage.as_str()),
18 None => panic!("missing requested stage {}", stage.as_str()),
19 }
20 }
21
22 fn capture_run_stdout(request: CaptureRequest) -> String {
23 let result = capture_from_path(&request).expect("capture should succeed");
24 match result.get(Stage::Run) {
25 Some(CapturedStage::Run(run)) => run.stdout.clone(),
26 _ => panic!("missing run stage"),
27 }
28 }
29
30 #[test]
31 fn o3_vectorizes_fp_minmax_reductions() {
32 let source = fixture("do_loop_vectorize_reduce_fp_minmax.f90");
33
34 let o3_ir = capture_text(
35 CaptureRequest {
36 input: source.clone(),
37 requested: BTreeSet::from([Stage::OptIr]),
38 opt_level: OptLevel::O3,
39 },
40 Stage::OptIr,
41 );
42 // Each min/max loop should leave behind one vmax/vmin in the
43 // body and one vreduce_max/vreduce_min at exit.
44 assert_eq!(
45 o3_ir.matches("vreduce_max").count(),
46 2,
47 "expected two vreduce_max (f32 + f64):\n{}",
48 o3_ir
49 );
50 assert_eq!(
51 o3_ir.matches("vreduce_min").count(),
52 2,
53 "expected two vreduce_min (f32 + f64):\n{}",
54 o3_ir
55 );
56 assert!(
57 o3_ir.contains("<4 x f32>") && o3_ir.contains("<2 x f64>"),
58 "expected both f32 and f64 vector accumulators in IR:\n{}",
59 o3_ir
60 );
61
62 let o3_asm = capture_text(
63 CaptureRequest {
64 input: source.clone(),
65 requested: BTreeSet::from([Stage::Asm]),
66 opt_level: OptLevel::O3,
67 },
68 Stage::Asm,
69 );
70 assert!(
71 o3_asm.contains("fmaxv.4s"),
72 "f32 max reduce should use fmaxv.4s:\n{}",
73 o3_asm
74 );
75 assert!(
76 o3_asm.contains("fminv.4s"),
77 "f32 min reduce should use fminv.4s:\n{}",
78 o3_asm
79 );
80 // f64: NEON has no fmaxv.2d, the pairwise scalar form is the
81 // across-lane reduce for two f64 lanes.
82 assert!(
83 o3_asm.contains("fmaxp.2d"),
84 "f64 max reduce should use fmaxp.2d:\n{}",
85 o3_asm
86 );
87 assert!(
88 o3_asm.contains("fminp.2d"),
89 "f64 min reduce should use fminp.2d:\n{}",
90 o3_asm
91 );
92
93 let stdout = capture_run_stdout(CaptureRequest {
94 input: source,
95 requested: BTreeSet::from([Stage::Run]),
96 opt_level: OptLevel::O3,
97 });
98 let trimmed: Vec<&str> = stdout
99 .lines()
100 .map(|l| l.trim())
101 .filter(|l| !l.is_empty())
102 .collect();
103 assert_eq!(trimmed.len(), 4, "expected four output lines:\n{}", stdout);
104 assert!(
105 trimmed[0].starts_with("3.2"),
106 "f32 max should be 32.0, got {:?}",
107 trimmed[0]
108 );
109 assert!(
110 trimmed[1].starts_with("1.0"),
111 "f32 min should be 1.0, got {:?}",
112 trimmed[1]
113 );
114 assert!(
115 trimmed[2].starts_with("3.2"),
116 "f64 max should be 32.0, got {:?}",
117 trimmed[2]
118 );
119 assert!(
120 trimmed[3].starts_with("1.0"),
121 "f64 min should be 1.0, got {:?}",
122 trimmed[3]
123 );
124 }
125