@@ -39,18 +39,21 @@ fn o3_vectorizes_sum_reductions_with_scalar_tail() { |
| 39 | 39 | }, |
| 40 | 40 | Stage::OptIr, |
| 41 | 41 | ); |
| 42 | | - // Both reductions should leave a VAdd in the body and a |
| 43 | | - // vreduce_sum at exit followed by peeled scalar iadd ops that |
| 44 | | - // chain from the reduce result. |
| 42 | + // All four reductions should leave a vreduce_sum at exit |
| 43 | + // followed by peeled scalar iadd/fadd ops chaining from the |
| 44 | + // reduce result. |
| 45 | 45 | assert_eq!( |
| 46 | 46 | o3_ir.matches("vreduce_sum").count(), |
| 47 | | - 2, |
| 48 | | - "expected two vreduce_sum (i32 + i64):\n{}", |
| 47 | + 4, |
| 48 | + "expected four vreduce_sum (i32, i64, f32, f64):\n{}", |
| 49 | 49 | o3_ir |
| 50 | 50 | ); |
| 51 | 51 | assert!( |
| 52 | | - o3_ir.contains("<4 x i32>") && o3_ir.contains("<2 x i64>"), |
| 53 | | - "expected i32 and i64 vector accumulators in IR:\n{}", |
| 52 | + o3_ir.contains("<4 x i32>") |
| 53 | + && o3_ir.contains("<2 x i64>") |
| 54 | + && o3_ir.contains("<4 x f32>") |
| 55 | + && o3_ir.contains("<2 x f64>"), |
| 56 | + "expected i32/i64/f32/f64 vector accumulators in IR:\n{}", |
| 54 | 57 | o3_ir |
| 55 | 58 | ); |
| 56 | 59 | |
@@ -64,8 +67,18 @@ fn o3_vectorizes_sum_reductions_with_scalar_tail() { |
| 64 | 67 | .map(|l| l.trim()) |
| 65 | 68 | .filter(|l| !l.is_empty()) |
| 66 | 69 | .collect(); |
| 67 | | - assert_eq!(trimmed.len(), 2, "expected two output lines:\n{}", stdout); |
| 70 | + assert_eq!(trimmed.len(), 4, "expected four output lines:\n{}", stdout); |
| 68 | 71 | // 1 + 2 + ... + 31 = 31 * 32 / 2 = 496. |
| 69 | 72 | assert_eq!(trimmed[0], "496", "i32 sum wrong: got {:?}", trimmed[0]); |
| 70 | 73 | assert_eq!(trimmed[1], "496", "i64 sum wrong: got {:?}", trimmed[1]); |
| 74 | + assert!( |
| 75 | + trimmed[2].starts_with("4.96"), |
| 76 | + "f32 sum wrong: got {:?}", |
| 77 | + trimmed[2] |
| 78 | + ); |
| 79 | + assert!( |
| 80 | + trimmed[3].starts_with("4.96"), |
| 81 | + "f64 sum wrong: got {:?}", |
| 82 | + trimmed[3] |
| 83 | + ); |
| 71 | 84 | } |