fortrangoingonforty/armfortas / f7ae8d0

Browse files

Test O3 hoists VBroadcast and produces 8 / 39 at runtime

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
f7ae8d02d7c3358ce7c712ec18f626de030f4c89
Parents
c555ff2
Tree
23944a4

1 changed file

StatusFile+-
A tests/vectorize_invariant_scalar.rs 80 0
tests/vectorize_invariant_scalar.rsadded
@@ -0,0 +1,80 @@
1
+use std::collections::BTreeSet;
2
+use std::path::PathBuf;
3
+
4
+use armfortas::driver::OptLevel;
5
+use armfortas::testing::{capture_from_path, CaptureRequest, CapturedStage, Stage};
6
+
7
+fn fixture(name: &str) -> PathBuf {
8
+    let path = PathBuf::from("test_programs").join(name);
9
+    assert!(path.exists(), "missing test fixture {}", path.display());
10
+    path
11
+}
12
+
13
+fn capture_text(request: CaptureRequest, stage: Stage) -> String {
14
+    let result = capture_from_path(&request).expect("capture should succeed");
15
+    match result.get(stage) {
16
+        Some(CapturedStage::Text(text)) => text.clone(),
17
+        Some(CapturedStage::Run(_)) => panic!("expected text stage for {}", stage.as_str()),
18
+        None => panic!("missing requested stage {}", stage.as_str()),
19
+    }
20
+}
21
+
22
+fn capture_run_stdout(request: CaptureRequest) -> String {
23
+    let result = capture_from_path(&request).expect("capture should succeed");
24
+    match result.get(Stage::Run) {
25
+        Some(CapturedStage::Run(run)) => run.stdout.clone(),
26
+        _ => panic!("missing run stage"),
27
+    }
28
+}
29
+
30
+#[test]
31
+fn o3_vectorizes_array_plus_invariant_scalar_loop() {
32
+    let source = fixture("do_loop_vectorize_scalar.f90");
33
+
34
+    let o3_ir = capture_text(
35
+        CaptureRequest {
36
+            input: source.clone(),
37
+            requested: BTreeSet::from([Stage::OptIr]),
38
+            opt_level: OptLevel::O3,
39
+        },
40
+        Stage::OptIr,
41
+    );
42
+
43
+    // Either form is acceptable as long as the scalar+invariant
44
+    // pattern was vectorized:
45
+    //   * NeonVectorize: vbroadcast in preheader + vload/vadd/vstore in body.
46
+    //   * Vectorize fallback: afs_array_add_scalar_i32 kernel call.
47
+    let neon = o3_ir.contains("vbroadcast")
48
+        && o3_ir.contains("vadd")
49
+        && o3_ir.contains("vstore");
50
+    let kernel = o3_ir.contains("call @afs_array_add_scalar_i32(");
51
+    assert!(
52
+        neon || kernel,
53
+        "O3 should vectorize a(i) = b(i) + scale (vbroadcast/vadd/vstore or afs_array_add_scalar_i32):\n{}",
54
+        o3_ir
55
+    );
56
+    // Prefer the real NEON path: it avoids the runtime call entirely.
57
+    assert!(
58
+        neon,
59
+        "O3 should pick the NeonVectorize broadcast path over the runtime kernel:\n{}",
60
+        o3_ir
61
+    );
62
+
63
+    // Also verify runtime correctness: a(1) = 1+7 = 8, a(32) = 32+7 = 39.
64
+    let stdout = capture_run_stdout(CaptureRequest {
65
+        input: source,
66
+        requested: BTreeSet::from([Stage::Run]),
67
+        opt_level: OptLevel::O3,
68
+    });
69
+    let trimmed: Vec<&str> = stdout
70
+        .lines()
71
+        .map(|l| l.trim())
72
+        .filter(|l| !l.is_empty())
73
+        .collect();
74
+    assert_eq!(
75
+        trimmed,
76
+        vec!["8", "39"],
77
+        "vectorized a(i) = b(i) + scale should produce 8 then 39:\n{}",
78
+        stdout
79
+    );
80
+}