Rust · 2989 bytes Raw Blame History
1 use std::collections::BTreeSet;
2 use std::path::PathBuf;
3
4 use armfortas::driver::OptLevel;
5 use armfortas::testing::{capture_from_path, CaptureRequest, CapturedStage, Stage};
6
7 fn fixture(name: &str) -> PathBuf {
8 let path = PathBuf::from("test_programs").join(name);
9 assert!(path.exists(), "missing test fixture {}", path.display());
10 path
11 }
12
13 fn capture_text(request: CaptureRequest, stage: Stage) -> String {
14 let result = capture_from_path(&request).expect("capture should succeed");
15 match result.get(stage) {
16 Some(CapturedStage::Text(text)) => text.clone(),
17 Some(CapturedStage::Run(_)) => panic!("expected text stage for {}", stage.as_str()),
18 None => panic!("missing requested stage {}", stage.as_str()),
19 }
20 }
21
22 fn capture_run_stdout(request: CaptureRequest) -> String {
23 let result = capture_from_path(&request).expect("capture should succeed");
24 match result.get(Stage::Run) {
25 Some(CapturedStage::Run(run)) => run.stdout.clone(),
26 _ => panic!("missing run stage"),
27 }
28 }
29
30 #[test]
31 fn o3_vectorizes_manual_sum_reduction_loop() {
32 let source = fixture("do_loop_vectorize_reduce_sum.f90");
33
34 let o3_ir = capture_text(
35 CaptureRequest {
36 input: source.clone(),
37 requested: BTreeSet::from([Stage::OptIr]),
38 opt_level: OptLevel::O3,
39 },
40 Stage::OptIr,
41 );
42 // The reduction path must produce a VBroadcast in the preheader,
43 // VAdd of two <V x i32> vectors in the body, and a VReduceSum
44 // after the loop.
45 assert!(
46 o3_ir.contains("vbroadcast")
47 && o3_ir.contains("vadd")
48 && o3_ir.contains("vreduce_sum"),
49 "expected NeonVectorize reduction shape (vbroadcast + vadd + vreduce_sum):\n{}",
50 o3_ir
51 );
52
53 // Assembly must use `mov.16b` for the loop-param transfer rather
54 // than `fmov d` (which would clobber the upper lanes of the V128
55 // accumulator and produce a wrong sum).
56 let o3_asm = capture_text(
57 CaptureRequest {
58 input: source.clone(),
59 requested: BTreeSet::from([Stage::Asm]),
60 opt_level: OptLevel::O3,
61 },
62 Stage::Asm,
63 );
64 assert!(
65 o3_asm.contains("mov.16b"),
66 "regalloc must materialise V128 block-param transfers via `mov.16b`, not `fmov d`:\n{}",
67 o3_asm
68 );
69 assert!(
70 o3_asm.contains("addv.4s"),
71 "VReduceSum should lower to `addv.4s` for an i32 accumulator:\n{}",
72 o3_asm
73 );
74
75 // Runtime: sum(1..32) = 32*33/2 = 528.
76 let stdout = capture_run_stdout(CaptureRequest {
77 input: source,
78 requested: BTreeSet::from([Stage::Run]),
79 opt_level: OptLevel::O3,
80 });
81 let trimmed: Vec<&str> = stdout
82 .lines()
83 .map(|l| l.trim())
84 .filter(|l| !l.is_empty())
85 .collect();
86 assert_eq!(
87 trimmed,
88 vec!["528"],
89 "vectorized sum reduction should produce 528:\n{}",
90 stdout
91 );
92 }
93