Implement array-of-struct-of-array data containers that set the size of the inner arrays as the SIMD width of the executing architecture. This will allow better caching/coalescing of kernel memory and will increase vectorization.
struct AoS {
int x, y, z;
};
AoS[n];
struct SoA {
int x[n], y[n], z[n];
};
struct AoSoA {
int x[VEC_WIDTH], y[VEC_WIDTH], z[VEC_WIDTH];
};
AoSoA[ N / VEC_WIDTH];