Rust · 3890 bytes Raw Blame History
1 use std::collections::BTreeSet;
2 use std::path::PathBuf;
3
4 use armfortas::driver::OptLevel;
5 use armfortas::testing::{capture_from_path, CaptureRequest, CapturedStage, Stage};
6
7 fn fixture(name: &str) -> PathBuf {
8 let path = PathBuf::from("test_programs").join(name);
9 assert!(path.exists(), "missing test fixture {}", path.display());
10 path
11 }
12
13 fn capture_text(request: CaptureRequest, stage: Stage) -> String {
14 let result = capture_from_path(&request).expect("capture should succeed");
15 match result.get(stage) {
16 Some(CapturedStage::Text(text)) => text.clone(),
17 Some(CapturedStage::Run(_)) => panic!("expected text stage for {}", stage.as_str()),
18 None => panic!("missing requested stage {}", stage.as_str()),
19 }
20 }
21
22 #[test]
23 fn o3_vectorizes_full_extent_do_loop_and_keeps_objects_deterministic() {
24 let source = fixture("do_loop_vectorize.f90");
25
26 let raw_ir = capture_text(
27 CaptureRequest {
28 input: source.clone(),
29 requested: BTreeSet::from([Stage::Ir]),
30 opt_level: OptLevel::O0,
31 },
32 Stage::Ir,
33 );
34 let o2_ir = capture_text(
35 CaptureRequest {
36 input: source.clone(),
37 requested: BTreeSet::from([Stage::OptIr]),
38 opt_level: OptLevel::O2,
39 },
40 Stage::OptIr,
41 );
42 let o3_ir = capture_text(
43 CaptureRequest {
44 input: source.clone(),
45 requested: BTreeSet::from([Stage::OptIr, Stage::Asm, Stage::Obj]),
46 opt_level: OptLevel::O3,
47 },
48 Stage::OptIr,
49 );
50 let o3_asm = capture_text(
51 CaptureRequest {
52 input: source.clone(),
53 requested: BTreeSet::from([Stage::Asm]),
54 opt_level: OptLevel::O3,
55 },
56 Stage::Asm,
57 );
58 let o3_obj_a = capture_text(
59 CaptureRequest {
60 input: source.clone(),
61 requested: BTreeSet::from([Stage::Obj]),
62 opt_level: OptLevel::O3,
63 },
64 Stage::Obj,
65 );
66 let o3_obj_b = capture_text(
67 CaptureRequest {
68 input: source,
69 requested: BTreeSet::from([Stage::Obj]),
70 opt_level: OptLevel::O3,
71 },
72 Stage::Obj,
73 );
74
75 assert!(
76 raw_ir.contains("do_check_") && raw_ir.contains("store %"),
77 "raw IR should keep the explicit scalar loop shape:\n{}",
78 raw_ir
79 );
80 assert!(
81 o2_ir.contains("do_check_") && !o2_ir.contains("call @afs_array_add_i32("),
82 "O2 should keep the scalar loop for this ordinary DO map:\n{}",
83 o2_ir
84 );
85 // Two valid vectorization shapes at O3:
86 // * NeonVectorize rewrites the inner body to vload/vadd/vstore
87 // on 128-bit lanes (preferred — no call overhead).
88 // * The older Vectorize pass replaces the loop with a single
89 // afs_array_add_i32 kernel call (fallback).
90 let o3_neon = o3_ir.contains("vstore") && o3_ir.contains("vadd");
91 let o3_kernel = o3_ir.contains("call @afs_array_add_i32(");
92 assert!(
93 o3_neon || o3_kernel,
94 "O3 should vectorize the scalar loop (vload/vadd/vstore or bulk kernel call):\n{}",
95 o3_ir
96 );
97 if o3_kernel {
98 assert!(
99 !o3_ir.contains("do_check_"),
100 "kernel-form O3 should remove the loop CFG entirely:\n{}",
101 o3_ir
102 );
103 assert!(
104 o3_asm.contains("_afs_array_add_i32"),
105 "kernel-form O3 assembly should reference the bulk add kernel:\n{}",
106 o3_asm
107 );
108 } else {
109 assert!(
110 o3_asm.contains("ldr q") || o3_asm.contains("add.4s") || o3_asm.contains("str q"),
111 "neon-form O3 assembly should reference 128-bit vector ops:\n{}",
112 o3_asm
113 );
114 }
115 assert_eq!(
116 o3_obj_a, o3_obj_b,
117 "O3 vectorized object snapshot should stay deterministic"
118 );
119 }
120