fortrangoingonforty/armfortas / c7e1b7c

Browse files

Test sum reduction emits vbroadcast/vadd/vreduce_sum/mov.16b/addv.4s and prints 528

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
c7e1b7c2d511b1f0c1ad3e17a3bc88d22f56018c
Parents
a57af5f
Tree
0ae99a9

1 changed file

StatusFile+-
A tests/vectorize_reduce_sum.rs 92 0
tests/vectorize_reduce_sum.rsadded
@@ -0,0 +1,92 @@
1
+use std::collections::BTreeSet;
2
+use std::path::PathBuf;
3
+
4
+use armfortas::driver::OptLevel;
5
+use armfortas::testing::{capture_from_path, CaptureRequest, CapturedStage, Stage};
6
+
7
+fn fixture(name: &str) -> PathBuf {
8
+    let path = PathBuf::from("test_programs").join(name);
9
+    assert!(path.exists(), "missing test fixture {}", path.display());
10
+    path
11
+}
12
+
13
+fn capture_text(request: CaptureRequest, stage: Stage) -> String {
14
+    let result = capture_from_path(&request).expect("capture should succeed");
15
+    match result.get(stage) {
16
+        Some(CapturedStage::Text(text)) => text.clone(),
17
+        Some(CapturedStage::Run(_)) => panic!("expected text stage for {}", stage.as_str()),
18
+        None => panic!("missing requested stage {}", stage.as_str()),
19
+    }
20
+}
21
+
22
+fn capture_run_stdout(request: CaptureRequest) -> String {
23
+    let result = capture_from_path(&request).expect("capture should succeed");
24
+    match result.get(Stage::Run) {
25
+        Some(CapturedStage::Run(run)) => run.stdout.clone(),
26
+        _ => panic!("missing run stage"),
27
+    }
28
+}
29
+
30
+#[test]
31
+fn o3_vectorizes_manual_sum_reduction_loop() {
32
+    let source = fixture("do_loop_vectorize_reduce_sum.f90");
33
+
34
+    let o3_ir = capture_text(
35
+        CaptureRequest {
36
+            input: source.clone(),
37
+            requested: BTreeSet::from([Stage::OptIr]),
38
+            opt_level: OptLevel::O3,
39
+        },
40
+        Stage::OptIr,
41
+    );
42
+    // The reduction path must produce a VBroadcast in the preheader,
43
+    // VAdd of two <V x i32> vectors in the body, and a VReduceSum
44
+    // after the loop.
45
+    assert!(
46
+        o3_ir.contains("vbroadcast")
47
+            && o3_ir.contains("vadd")
48
+            && o3_ir.contains("vreduce_sum"),
49
+        "expected NeonVectorize reduction shape (vbroadcast + vadd + vreduce_sum):\n{}",
50
+        o3_ir
51
+    );
52
+
53
+    // Assembly must use `mov.16b` for the loop-param transfer rather
54
+    // than `fmov d` (which would clobber the upper lanes of the V128
55
+    // accumulator and produce a wrong sum).
56
+    let o3_asm = capture_text(
57
+        CaptureRequest {
58
+            input: source.clone(),
59
+            requested: BTreeSet::from([Stage::Asm]),
60
+            opt_level: OptLevel::O3,
61
+        },
62
+        Stage::Asm,
63
+    );
64
+    assert!(
65
+        o3_asm.contains("mov.16b"),
66
+        "regalloc must materialise V128 block-param transfers via `mov.16b`, not `fmov d`:\n{}",
67
+        o3_asm
68
+    );
69
+    assert!(
70
+        o3_asm.contains("addv.4s"),
71
+        "VReduceSum should lower to `addv.4s` for an i32 accumulator:\n{}",
72
+        o3_asm
73
+    );
74
+
75
+    // Runtime: sum(1..32) = 32*33/2 = 528.
76
+    let stdout = capture_run_stdout(CaptureRequest {
77
+        input: source,
78
+        requested: BTreeSet::from([Stage::Run]),
79
+        opt_level: OptLevel::O3,
80
+    });
81
+    let trimmed: Vec<&str> = stdout
82
+        .lines()
83
+        .map(|l| l.trim())
84
+        .filter(|l| !l.is_empty())
85
+        .collect();
86
+    assert_eq!(
87
+        trimmed,
88
+        vec!["528"],
89
+        "vectorized sum reduction should produce 528:\n{}",
90
+        stdout
91
+    );
92
+}