fortrangoingonforty/armfortas / c7b9a72

Browse files

Test FP min/max reductions emit fmaxv/fminv.4s + fmaxp/fminp.2d and produce 32.0 / 1.0

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
c7b9a72d6b6f28670314b6a47c0fc06394abc444
Parents
73aa96f
Tree
514e7e3

1 changed file

StatusFile+-
A tests/vectorize_reduce_fp_minmax.rs 124 0
tests/vectorize_reduce_fp_minmax.rsadded
@@ -0,0 +1,124 @@
1
+use std::collections::BTreeSet;
2
+use std::path::PathBuf;
3
+
4
+use armfortas::driver::OptLevel;
5
+use armfortas::testing::{capture_from_path, CaptureRequest, CapturedStage, Stage};
6
+
7
+fn fixture(name: &str) -> PathBuf {
8
+    let path = PathBuf::from("test_programs").join(name);
9
+    assert!(path.exists(), "missing test fixture {}", path.display());
10
+    path
11
+}
12
+
13
+fn capture_text(request: CaptureRequest, stage: Stage) -> String {
14
+    let result = capture_from_path(&request).expect("capture should succeed");
15
+    match result.get(stage) {
16
+        Some(CapturedStage::Text(text)) => text.clone(),
17
+        Some(CapturedStage::Run(_)) => panic!("expected text stage for {}", stage.as_str()),
18
+        None => panic!("missing requested stage {}", stage.as_str()),
19
+    }
20
+}
21
+
22
+fn capture_run_stdout(request: CaptureRequest) -> String {
23
+    let result = capture_from_path(&request).expect("capture should succeed");
24
+    match result.get(Stage::Run) {
25
+        Some(CapturedStage::Run(run)) => run.stdout.clone(),
26
+        _ => panic!("missing run stage"),
27
+    }
28
+}
29
+
30
+#[test]
31
+fn o3_vectorizes_fp_minmax_reductions() {
32
+    let source = fixture("do_loop_vectorize_reduce_fp_minmax.f90");
33
+
34
+    let o3_ir = capture_text(
35
+        CaptureRequest {
36
+            input: source.clone(),
37
+            requested: BTreeSet::from([Stage::OptIr]),
38
+            opt_level: OptLevel::O3,
39
+        },
40
+        Stage::OptIr,
41
+    );
42
+    // Each min/max loop should leave behind one vmax/vmin in the
43
+    // body and one vreduce_max/vreduce_min at exit.
44
+    assert_eq!(
45
+        o3_ir.matches("vreduce_max").count(),
46
+        2,
47
+        "expected two vreduce_max (f32 + f64):\n{}",
48
+        o3_ir
49
+    );
50
+    assert_eq!(
51
+        o3_ir.matches("vreduce_min").count(),
52
+        2,
53
+        "expected two vreduce_min (f32 + f64):\n{}",
54
+        o3_ir
55
+    );
56
+    assert!(
57
+        o3_ir.contains("<4 x f32>") && o3_ir.contains("<2 x f64>"),
58
+        "expected both f32 and f64 vector accumulators in IR:\n{}",
59
+        o3_ir
60
+    );
61
+
62
+    let o3_asm = capture_text(
63
+        CaptureRequest {
64
+            input: source.clone(),
65
+            requested: BTreeSet::from([Stage::Asm]),
66
+            opt_level: OptLevel::O3,
67
+        },
68
+        Stage::Asm,
69
+    );
70
+    assert!(
71
+        o3_asm.contains("fmaxv.4s"),
72
+        "f32 max reduce should use fmaxv.4s:\n{}",
73
+        o3_asm
74
+    );
75
+    assert!(
76
+        o3_asm.contains("fminv.4s"),
77
+        "f32 min reduce should use fminv.4s:\n{}",
78
+        o3_asm
79
+    );
80
+    // f64: NEON has no fmaxv.2d, the pairwise scalar form is the
81
+    // across-lane reduce for two f64 lanes.
82
+    assert!(
83
+        o3_asm.contains("fmaxp.2d"),
84
+        "f64 max reduce should use fmaxp.2d:\n{}",
85
+        o3_asm
86
+    );
87
+    assert!(
88
+        o3_asm.contains("fminp.2d"),
89
+        "f64 min reduce should use fminp.2d:\n{}",
90
+        o3_asm
91
+    );
92
+
93
+    let stdout = capture_run_stdout(CaptureRequest {
94
+        input: source,
95
+        requested: BTreeSet::from([Stage::Run]),
96
+        opt_level: OptLevel::O3,
97
+    });
98
+    let trimmed: Vec<&str> = stdout
99
+        .lines()
100
+        .map(|l| l.trim())
101
+        .filter(|l| !l.is_empty())
102
+        .collect();
103
+    assert_eq!(trimmed.len(), 4, "expected four output lines:\n{}", stdout);
104
+    assert!(
105
+        trimmed[0].starts_with("3.2"),
106
+        "f32 max should be 32.0, got {:?}",
107
+        trimmed[0]
108
+    );
109
+    assert!(
110
+        trimmed[1].starts_with("1.0"),
111
+        "f32 min should be 1.0, got {:?}",
112
+        trimmed[1]
113
+    );
114
+    assert!(
115
+        trimmed[2].starts_with("3.2"),
116
+        "f64 max should be 32.0, got {:?}",
117
+        trimmed[2]
118
+    );
119
+    assert!(
120
+        trimmed[3].starts_with("1.0"),
121
+        "f64 min should be 1.0, got {:?}",
122
+        trimmed[3]
123
+    );
124
+}