Rust · 2825 bytes Raw Blame History
1 use std::collections::BTreeSet;
2 use std::path::PathBuf;
3
4 use armfortas::driver::OptLevel;
5 use armfortas::testing::{capture_from_path, CaptureRequest, CapturedStage, Stage};
6
7 fn fixture(name: &str) -> PathBuf {
8 let path = PathBuf::from("test_programs").join(name);
9 assert!(path.exists(), "missing test fixture {}", path.display());
10 path
11 }
12
13 fn capture_text(request: CaptureRequest, stage: Stage) -> String {
14 let result = capture_from_path(&request).expect("capture should succeed");
15 match result.get(stage) {
16 Some(CapturedStage::Text(text)) => text.clone(),
17 Some(CapturedStage::Run(_)) => panic!("expected text stage for {}", stage.as_str()),
18 None => panic!("missing requested stage {}", stage.as_str()),
19 }
20 }
21
22 fn capture_run_stdout(request: CaptureRequest) -> String {
23 let result = capture_from_path(&request).expect("capture should succeed");
24 match result.get(Stage::Run) {
25 Some(CapturedStage::Run(run)) => run.stdout.clone(),
26 _ => panic!("missing run stage"),
27 }
28 }
29
30 #[test]
31 fn o3_vectorizes_elementwise_fma() {
32 let source = fixture("do_loop_vectorize_fma.f90");
33
34 let o3_ir = capture_text(
35 CaptureRequest {
36 input: source.clone(),
37 requested: BTreeSet::from([Stage::OptIr]),
38 opt_level: OptLevel::O3,
39 },
40 Stage::OptIr,
41 );
42 // Three FMA loops: 3-load f32, 3-load f64, 1-load + 2 invariant
43 // scalar broadcasts (f32). Each fuses to one vfma.
44 assert_eq!(
45 o3_ir.matches("vfma").count(),
46 3,
47 "expected three vfma:\n{}",
48 o3_ir
49 );
50
51 let stdout = capture_run_stdout(CaptureRequest {
52 input: source,
53 requested: BTreeSet::from([Stage::Run]),
54 opt_level: OptLevel::O3,
55 });
56 let trimmed: Vec<&str> = stdout
57 .lines()
58 .map(|l| l.trim())
59 .filter(|l| !l.is_empty())
60 .collect();
61 assert_eq!(trimmed.len(), 3, "expected three output lines:\n{}", stdout);
62 // f32 line: c(1)=3, c(16)=33, c(32)=65.
63 assert!(
64 trimmed[0].starts_with("3.0000000E0"),
65 "f32 c(1) wrong: {:?}",
66 trimmed[0]
67 );
68 assert!(
69 trimmed[0].contains("3.3000000E1"),
70 "f32 c(16) wrong: {:?}",
71 trimmed[0]
72 );
73 assert!(
74 trimmed[0].contains("6.5000000E1"),
75 "f32 c(32) wrong: {:?}",
76 trimmed[0]
77 );
78 // f64 line.
79 assert!(
80 trimmed[1].starts_with("3.000000000000000E0"),
81 "f64 c(1) wrong: {:?}",
82 trimmed[1]
83 );
84 assert!(
85 trimmed[1].contains("3.300000000000000E1"),
86 "f64 c(16) wrong: {:?}",
87 trimmed[1]
88 );
89 // broadcast FMA: e32(4) = 4*2.5+10 = 20.
90 assert!(
91 trimmed[2].starts_with("2.0000000E1"),
92 "broadcast FMA wrong: {:?}",
93 trimmed[2]
94 );
95 }
96