@@ -99,31 +99,19 @@ static inline void STA(R *x, V v, INT ovs, const R *aligned_like) {
9999static inline V LDu (const R * x , INT ivs , const R * aligned_like )
100100{
101101 (void )aligned_like ; /* UNUSED */
102- __m512i index = _mm512_set_epi32 (7 * ivs + 1 , 7 * ivs ,
103- 6 * ivs + 1 , 6 * ivs ,
104- 5 * ivs + 1 , 5 * ivs ,
105- 4 * ivs + 1 , 4 * ivs ,
106- 3 * ivs + 1 , 3 * ivs ,
107- 2 * ivs + 1 , 2 * ivs ,
108- 1 * ivs + 1 , 1 * ivs ,
109- 0 * ivs + 1 , 0 * ivs );
102+ /* pretend pair of single are a double */
103+ const __m256i index = _mm256_set_epi32 (7 * ivs , 6 * ivs , 5 * ivs , 4 * ivs , 3 * ivs , 2 * ivs , 1 * ivs , 0 * ivs );
110104
111- return _mm512_i32gather_ps (index , x , 4 );
105+ return ( V ) _mm512_i32gather_pd (index , x , 4 );
112106}
113107
114108static inline void STu (R * x , V v , INT ovs , const R * aligned_like )
115109{
116110 (void )aligned_like ; /* UNUSED */
117- __m512i index = _mm512_set_epi32 (7 * ovs + 1 , 7 * ovs ,
118- 6 * ovs + 1 , 6 * ovs ,
119- 5 * ovs + 1 , 5 * ovs ,
120- 4 * ovs + 1 , 4 * ovs ,
121- 3 * ovs + 1 , 3 * ovs ,
122- 2 * ovs + 1 , 2 * ovs ,
123- 1 * ovs + 1 , 1 * ovs ,
124- 0 * ovs + 1 , 0 * ovs );
125-
126- _mm512_i32scatter_ps (x , index , v , 4 );
111+ /* pretend pair of single are a double */
112+ const __m256i index = _mm256_set_epi32 (7 * ovs , 6 * ovs , 5 * ovs , 4 * ovs , 3 * ovs , 2 * ovs , 1 * ovs , 0 * ovs );
113+
114+ _mm512_i32scatter_pd (x , index , (__m512d )v , 4 );
127115}
128116
129117#else /* !FFTW_SINGLE */
0 commit comments