fortrangoingonforty/armfortas / e7effcc

Browse files

Test element-wise FMA fuses fmul+fadd into vfma across three loops

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
e7effcc20eadb052b53fe22a0ca77d6178d713d1
Parents
5a0a1f6
Tree
a024a1b

1 changed file

StatusFile+-
A tests/vectorize_fma.rs 95 0
tests/vectorize_fma.rsadded
@@ -0,0 +1,95 @@
1
+use std::collections::BTreeSet;
2
+use std::path::PathBuf;
3
+
4
+use armfortas::driver::OptLevel;
5
+use armfortas::testing::{capture_from_path, CaptureRequest, CapturedStage, Stage};
6
+
7
+fn fixture(name: &str) -> PathBuf {
8
+    let path = PathBuf::from("test_programs").join(name);
9
+    assert!(path.exists(), "missing test fixture {}", path.display());
10
+    path
11
+}
12
+
13
+fn capture_text(request: CaptureRequest, stage: Stage) -> String {
14
+    let result = capture_from_path(&request).expect("capture should succeed");
15
+    match result.get(stage) {
16
+        Some(CapturedStage::Text(text)) => text.clone(),
17
+        Some(CapturedStage::Run(_)) => panic!("expected text stage for {}", stage.as_str()),
18
+        None => panic!("missing requested stage {}", stage.as_str()),
19
+    }
20
+}
21
+
22
+fn capture_run_stdout(request: CaptureRequest) -> String {
23
+    let result = capture_from_path(&request).expect("capture should succeed");
24
+    match result.get(Stage::Run) {
25
+        Some(CapturedStage::Run(run)) => run.stdout.clone(),
26
+        _ => panic!("missing run stage"),
27
+    }
28
+}
29
+
30
+#[test]
31
+fn o3_vectorizes_elementwise_fma() {
32
+    let source = fixture("do_loop_vectorize_fma.f90");
33
+
34
+    let o3_ir = capture_text(
35
+        CaptureRequest {
36
+            input: source.clone(),
37
+            requested: BTreeSet::from([Stage::OptIr]),
38
+            opt_level: OptLevel::O3,
39
+        },
40
+        Stage::OptIr,
41
+    );
42
+    // Three FMA loops: 3-load f32, 3-load f64, 1-load + 2 invariant
43
+    // scalar broadcasts (f32). Each fuses to one vfma.
44
+    assert_eq!(
45
+        o3_ir.matches("vfma").count(),
46
+        3,
47
+        "expected three vfma:\n{}",
48
+        o3_ir
49
+    );
50
+
51
+    let stdout = capture_run_stdout(CaptureRequest {
52
+        input: source,
53
+        requested: BTreeSet::from([Stage::Run]),
54
+        opt_level: OptLevel::O3,
55
+    });
56
+    let trimmed: Vec<&str> = stdout
57
+        .lines()
58
+        .map(|l| l.trim())
59
+        .filter(|l| !l.is_empty())
60
+        .collect();
61
+    assert_eq!(trimmed.len(), 3, "expected three output lines:\n{}", stdout);
62
+    // f32 line: c(1)=3, c(16)=33, c(32)=65.
63
+    assert!(
64
+        trimmed[0].starts_with("3.0000000E0"),
65
+        "f32 c(1) wrong: {:?}",
66
+        trimmed[0]
67
+    );
68
+    assert!(
69
+        trimmed[0].contains("3.3000000E1"),
70
+        "f32 c(16) wrong: {:?}",
71
+        trimmed[0]
72
+    );
73
+    assert!(
74
+        trimmed[0].contains("6.5000000E1"),
75
+        "f32 c(32) wrong: {:?}",
76
+        trimmed[0]
77
+    );
78
+    // f64 line.
79
+    assert!(
80
+        trimmed[1].starts_with("3.000000000000000E0"),
81
+        "f64 c(1) wrong: {:?}",
82
+        trimmed[1]
83
+    );
84
+    assert!(
85
+        trimmed[1].contains("3.300000000000000E1"),
86
+        "f64 c(16) wrong: {:?}",
87
+        trimmed[1]
88
+    );
89
+    // broadcast FMA: e32(4) = 4*2.5+10 = 20.
90
+    assert!(
91
+        trimmed[2].starts_with("2.0000000E1"),
92
+        "broadcast FMA wrong: {:?}",
93
+        trimmed[2]
94
+    );
95
+}