fortrangoingonforty/armfortas / d002340

Browse files

Test FP reductions emit faddp pairs and produce 528.0

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
d0023402284e210e38c86121a51dbb2c9ef8582e
Parents
c1483b4
Tree
2de2d1f

1 changed file

StatusFile+-
A tests/vectorize_reduce_fp.rs 98 0
tests/vectorize_reduce_fp.rsadded
@@ -0,0 +1,98 @@
1
+use std::collections::BTreeSet;
2
+use std::path::PathBuf;
3
+
4
+use armfortas::driver::OptLevel;
5
+use armfortas::testing::{capture_from_path, CaptureRequest, CapturedStage, Stage};
6
+
7
+fn fixture(name: &str) -> PathBuf {
8
+    let path = PathBuf::from("test_programs").join(name);
9
+    assert!(path.exists(), "missing test fixture {}", path.display());
10
+    path
11
+}
12
+
13
+fn capture_text(request: CaptureRequest, stage: Stage) -> String {
14
+    let result = capture_from_path(&request).expect("capture should succeed");
15
+    match result.get(stage) {
16
+        Some(CapturedStage::Text(text)) => text.clone(),
17
+        Some(CapturedStage::Run(_)) => panic!("expected text stage for {}", stage.as_str()),
18
+        None => panic!("missing requested stage {}", stage.as_str()),
19
+    }
20
+}
21
+
22
+fn capture_run_stdout(request: CaptureRequest) -> String {
23
+    let result = capture_from_path(&request).expect("capture should succeed");
24
+    match result.get(Stage::Run) {
25
+        Some(CapturedStage::Run(run)) => run.stdout.clone(),
26
+        _ => panic!("missing run stage"),
27
+    }
28
+}
29
+
30
+#[test]
31
+fn o3_vectorizes_fp_sum_reductions() {
32
+    let source = fixture("do_loop_vectorize_reduce_fp.f90");
33
+
34
+    let o3_ir = capture_text(
35
+        CaptureRequest {
36
+            input: source.clone(),
37
+            requested: BTreeSet::from([Stage::OptIr]),
38
+            opt_level: OptLevel::O3,
39
+        },
40
+        Stage::OptIr,
41
+    );
42
+    assert!(
43
+        o3_ir.contains("<4 x f32>") && o3_ir.contains("<2 x f64>"),
44
+        "expected both f32 and f64 vector accumulators in IR:\n{}",
45
+        o3_ir
46
+    );
47
+    assert_eq!(
48
+        o3_ir.matches("vreduce_sum").count(),
49
+        2,
50
+        "expected two vreduce_sums (one per FP loop):\n{}",
51
+        o3_ir
52
+    );
53
+
54
+    let o3_asm = capture_text(
55
+        CaptureRequest {
56
+            input: source.clone(),
57
+            requested: BTreeSet::from([Stage::Asm]),
58
+            opt_level: OptLevel::O3,
59
+        },
60
+        Stage::Asm,
61
+    );
62
+    // f32 reduce: faddp.4s + faddp.2s pair.
63
+    // f64 reduce: faddp.2d (single step).
64
+    assert!(
65
+        o3_asm.contains("faddp.4s") && o3_asm.contains("faddp.2s"),
66
+        "f32 reduce should use the two-step faddp pair:\n{}",
67
+        o3_asm
68
+    );
69
+    assert!(
70
+        o3_asm.contains("faddp.2d"),
71
+        "f64 reduce should use faddp.2d:\n{}",
72
+        o3_asm
73
+    );
74
+
75
+    let stdout = capture_run_stdout(CaptureRequest {
76
+        input: source,
77
+        requested: BTreeSet::from([Stage::Run]),
78
+        opt_level: OptLevel::O3,
79
+    });
80
+    let trimmed: Vec<&str> = stdout
81
+        .lines()
82
+        .map(|l| l.trim())
83
+        .filter(|l| !l.is_empty())
84
+        .collect();
85
+    assert_eq!(trimmed.len(), 2, "expected two output lines:\n{}", stdout);
86
+    // f32: 5.28E2, f64: 5.28E2 with more precision. Both should
87
+    // start with "5.28".
88
+    assert!(
89
+        trimmed[0].starts_with("5.28"),
90
+        "s32 should be 528, got {:?}",
91
+        trimmed[0]
92
+    );
93
+    assert!(
94
+        trimmed[1].starts_with("5.28"),
95
+        "s64 should be 528, got {:?}",
96
+        trimmed[1]
97
+    );
98
+}