Compute column-major canonical strides in afs_allocate_array
Committed by
mfwolffe
- SHA
8807b8862897bfc2f09a61c871fcaa89d4ea5be0- Parents
-
5537df7 - Tree
c2ed164
8807b88
8807b8862897bfc2f09a61c871fcaa89d4ea5be05537df7
c2ed164| Status | File | + | - |
|---|---|---|---|
| M |
runtime/src/array.rs
|
16 | 0 |
runtime/src/array.rsmodified@@ -990,6 +990,22 @@ pub extern "C" fn afs_allocate_array( | ||
| 990 | 990 | } |
| 991 | 991 | } |
| 992 | 992 | |
| 993 | + // ALLOCATE always produces a single contiguous block, so the | |
| 994 | + // descriptor's per-dim memory stride must be the column-major | |
| 995 | + // canonical step (1 for dim 0, then product of preceding extents). | |
| 996 | + // Compiler-generated dim_buf entries pass stride=1 across the | |
| 997 | + // board (the rank-1 case happens to be correct, multi-dim wasn't), | |
| 998 | + // so fix it up here. Without this, allocatable rank-N section | |
| 999 | + // assignments fed afs_create_section a stride=1 source and the | |
| 1000 | + // produced section descriptor stepped through memory contiguously, | |
| 1001 | + // collapsing column-major rows into a single linear walk and | |
| 1002 | + // corrupting both the LHS write and any subsequent RHS read. | |
| 1003 | + let mut running_stride: i64 = 1; | |
| 1004 | + for i in 0..rank as usize { | |
| 1005 | + desc.dims[i].stride = running_stride; | |
| 1006 | + running_stride = running_stride.saturating_mul(desc.dims[i].extent().max(1)); | |
| 1007 | + } | |
| 1008 | + | |
| 993 | 1009 | // Compute total bytes. |
| 994 | 1010 | let total = desc.total_elements(); |
| 995 | 1011 | let bytes = total * elem_size; |