diff --git a/src/plan.jl b/src/plan.jl index efa8333..8a72045 100644 --- a/src/plan.jl +++ b/src/plan.jl @@ -145,10 +145,17 @@ function LinearAlgebra.mul!(y::AbstractArray{U,N}, p::FFTAPlan_cx{T,1}, x::Abstr end Rpre = CartesianIndices(size(x)[1:p.region[]-1]) Rpost = CartesianIndices(size(x)[p.region[]+1:end]) - for Ipre in Rpre - for Ipost in Rpost - @views fft!(y[Ipre,:,Ipost], x[Ipre,:,Ipost], 1, 1, p.dir, p.callgraph[1][1].type, p.callgraph[1], 1) - end + _mul_loop!(y, x, Rpre, Rpost, p) +end + +function _mul_loop!( + y::AbstractArray{U,N}, + x::AbstractArray{T,N}, + Rpre::CartesianIndices, + Rpost::CartesianIndices, + p::FFTAPlan_cx{T,1}) where {T,U,N} + for Ipost in Rpost, Ipre in Rpre + @views fft!(y[Ipre,:,Ipost], x[Ipre,:,Ipost], 1, 1, p.dir, p.callgraph[1][1].type, p.callgraph[1], 1) end end @@ -172,10 +179,10 @@ function LinearAlgebra.mul!(y::AbstractArray{U,N}, p::FFTAPlan_cx{T,2}, x::Abstr # Introduce function barrier here since the variables used in the loop ranges aren't inferred. This # is partly because the region field of the plan is abstractly typed but even if that wasn't the case, # it might be a bit tricky to construct the Rxs in an inferred way. - _mul_loop(y_tmp, y, x, p, R1, R2, R3, rows, cols) + _mul_loop!(y_tmp, y, x, p, R1, R2, R3, rows, cols) end -function _mul_loop( +function _mul_loop!( y_tmp::AbstractArray, y::AbstractArray, x::AbstractArray, @@ -186,17 +193,13 @@ function _mul_loop( rows::Int, cols::Int ) - for I1 in R1 - for I2 in R2 - for I3 in R3 - for k in 1:cols - @views fft!(y_tmp[:,k], x[I1,:,I2,k,I3], 1, 1, p.dir, p.callgraph[1][1].type, p.callgraph[1], 1) - end + for I3 in R3, I2 in R2, I1 in R1 + for k in 1:cols + @views fft!(y_tmp[:,k], x[I1,:,I2,k,I3], 1, 1, p.dir, p.callgraph[1][1].type, p.callgraph[1], 1) + end - for k in 1:rows - @views fft!(y[I1,k,I2,:,I3], y_tmp[k,:], 1, 1, p.dir, p.callgraph[2][1].type, p.callgraph[2], 1) - end - end + for k in 1:rows + @views fft!(y[I1,k,I2,:,I3], y_tmp[k,:], 1, 1, p.dir, p.callgraph[2][1].type, p.callgraph[2], 1) end end end