@@ -39,22 +39,15 @@ fn o3_vectorizes_sum_with_unary_load() { |
| 39 | 39 | }, |
| 40 | 40 | Stage::OptIr, |
| 41 | 41 | ); |
| 42 | | - // Expect the unary lifted into the vector lane (vneg / vabs) |
| 43 | | - // inside the body, plus a vreduce_sum at exit. |
| 44 | 42 | assert!( |
| 45 | | - o3_ir.contains("vneg"), |
| 46 | | - "expected vneg in IR:\n{}", |
| 47 | | - o3_ir |
| 48 | | - ); |
| 49 | | - assert!( |
| 50 | | - o3_ir.contains("vabs"), |
| 51 | | - "expected vabs in IR:\n{}", |
| 43 | + o3_ir.contains("vneg") && o3_ir.contains("vabs"), |
| 44 | + "expected both vneg and vabs in IR:\n{}", |
| 52 | 45 | o3_ir |
| 53 | 46 | ); |
| 54 | 47 | assert_eq!( |
| 55 | 48 | o3_ir.matches("vreduce_sum").count(), |
| 56 | | - 2, |
| 57 | | - "expected two vreduce_sum:\n{}", |
| 49 | + 4, |
| 50 | + "expected four vreduce_sum:\n{}", |
| 58 | 51 | o3_ir |
| 59 | 52 | ); |
| 60 | 53 | |
@@ -68,11 +61,24 @@ fn o3_vectorizes_sum_with_unary_load() { |
| 68 | 61 | .map(|l| l.trim()) |
| 69 | 62 | .filter(|l| !l.is_empty()) |
| 70 | 63 | .collect(); |
| 71 | | - assert_eq!(trimmed.len(), 2, "expected two output lines:\n{}", stdout); |
| 64 | + assert_eq!(trimmed.len(), 4, "expected four output lines:\n{}", stdout); |
| 65 | + // sum(-i for i=1..32) = -528. |
| 72 | 66 | assert_eq!(trimmed[0], "-528", "neg sum wrong: got {:?}", trimmed[0]); |
| 67 | + // sum(|i-16| for i=1..32) = 240 + 16 = 256. |
| 73 | 68 | assert!( |
| 74 | 69 | trimmed[1].starts_with("2.56"), |
| 75 | | - "abs sum wrong: got {:?}", |
| 70 | + "f32 abs sum wrong: got {:?}", |
| 76 | 71 | trimmed[1] |
| 77 | 72 | ); |
| 73 | + assert!( |
| 74 | + trimmed[2].starts_with("2.56"), |
| 75 | + "f64 abs sum wrong: got {:?}", |
| 76 | + trimmed[2] |
| 77 | + ); |
| 78 | + // Trip = 31; head 28 + tail 3, sum |i-16| for i=1..31 = 240. |
| 79 | + assert!( |
| 80 | + trimmed[3].starts_with("2.4"), |
| 81 | + "f32 abs+tail wrong: got {:?}", |
| 82 | + trimmed[3] |
| 83 | + ); |
| 78 | 84 | } |