fortrangoingonforty/armfortas / 9f699c4

Browse files

Test f32 NEON path produces 2.5/64.0 and avoids gp-form dup

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
9f699c418f80dca47aadf0b9295684a50cd0eeb0
Parents
c58f8be
Tree
59348b6

1 changed file

StatusFile+-
A tests/vectorize_f32.rs 93 0
tests/vectorize_f32.rsadded
@@ -0,0 +1,93 @@
1
+use std::collections::BTreeSet;
2
+use std::path::PathBuf;
3
+
4
+use armfortas::driver::OptLevel;
5
+use armfortas::testing::{capture_from_path, CaptureRequest, CapturedStage, Stage};
6
+
7
+fn fixture(name: &str) -> PathBuf {
8
+    let path = PathBuf::from("test_programs").join(name);
9
+    assert!(path.exists(), "missing test fixture {}", path.display());
10
+    path
11
+}
12
+
13
+fn capture_text(request: CaptureRequest, stage: Stage) -> String {
14
+    let result = capture_from_path(&request).expect("capture should succeed");
15
+    match result.get(stage) {
16
+        Some(CapturedStage::Text(text)) => text.clone(),
17
+        Some(CapturedStage::Run(_)) => panic!("expected text stage for {}", stage.as_str()),
18
+        None => panic!("missing requested stage {}", stage.as_str()),
19
+    }
20
+}
21
+
22
+fn capture_run_stdout(request: CaptureRequest) -> String {
23
+    let result = capture_from_path(&request).expect("capture should succeed");
24
+    match result.get(Stage::Run) {
25
+        Some(CapturedStage::Run(run)) => run.stdout.clone(),
26
+        _ => panic!("missing run stage"),
27
+    }
28
+}
29
+
30
+#[test]
31
+fn o3_vectorizes_f32_body_and_assembles_clean() {
32
+    let source = fixture("do_loop_vectorize_f32.f90");
33
+
34
+    // The IR should contain v-ops over <4 x f32>.
35
+    let o3_ir = capture_text(
36
+        CaptureRequest {
37
+            input: source.clone(),
38
+            requested: BTreeSet::from([Stage::OptIr]),
39
+            opt_level: OptLevel::O3,
40
+        },
41
+        Stage::OptIr,
42
+    );
43
+    assert!(
44
+        o3_ir.contains("vbroadcast") && o3_ir.contains("<4 x f32>"),
45
+        "expected NeonVectorize to emit f32 vbroadcast/vops:\n{}",
46
+        o3_ir
47
+    );
48
+
49
+    // Assembly must NOT contain `dup.4s vN, sM` (the invalid gp-form
50
+    // for an FP scalar source) — that's what the DupEl-vs-DupGen fix
51
+    // prevents.
52
+    let o3_asm = capture_text(
53
+        CaptureRequest {
54
+            input: source.clone(),
55
+            requested: BTreeSet::from([Stage::Asm]),
56
+            opt_level: OptLevel::O3,
57
+        },
58
+        Stage::Asm,
59
+    );
60
+    for line in o3_asm.lines() {
61
+        let trimmed = line.trim();
62
+        if trimmed.starts_with("dup.4s") || trimmed.starts_with("dup.2d") {
63
+            assert!(
64
+                !(trimmed.contains(", s") || trimmed.contains(", d")),
65
+                "FP-scalar VBroadcast must use the lane-dup form, not gp-dup:\n{}",
66
+                trimmed,
67
+            );
68
+        }
69
+    }
70
+
71
+    // Runtime: a(1) = 1.0 + 1.5 = 2.5, c(32) = 32.0 * 2.0 = 64.0.
72
+    let stdout = capture_run_stdout(CaptureRequest {
73
+        input: source,
74
+        requested: BTreeSet::from([Stage::Run]),
75
+        opt_level: OptLevel::O3,
76
+    });
77
+    let trimmed: Vec<&str> = stdout
78
+        .lines()
79
+        .map(|l| l.trim())
80
+        .filter(|l| !l.is_empty())
81
+        .collect();
82
+    assert_eq!(trimmed.len(), 2, "expected two output lines:\n{}", stdout);
83
+    assert!(
84
+        trimmed[0].starts_with("2.5"),
85
+        "a(1) should be 2.5, got {:?}",
86
+        trimmed[0]
87
+    );
88
+    assert!(
89
+        trimmed[1].starts_with("6.4"),
90
+        "c(32) should be 64.0, got {:?}",
91
+        trimmed[1]
92
+    );
93
+}