From 658c4a3448117fd0bbe271f158ac6655f2abdaee Mon Sep 17 00:00:00 2001 From: AhmedSalih3d <36305327+AhmedSalih3d@users.noreply.github.com> Date: Fri, 23 Jan 2026 13:18:39 +0100 Subject: [PATCH 1/2] Add SIMD helpers to cell list --- Project.toml | 2 ++ src/SPHCellList.jl | 39 ++++++++++++++++++++++++++++----------- 2 files changed, 30 insertions(+), 11 deletions(-) diff --git a/Project.toml b/Project.toml index 3eae26b8..0607a527 100644 --- a/Project.toml +++ b/Project.toml @@ -16,6 +16,7 @@ Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" LoggingExtras = "e6f89c97-d47a-5376-807f-9c37f3926c36" Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" +SIMD = "fdea26ae-647d-5447-a871-4b548cad5224" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" @@ -31,6 +32,7 @@ InteractiveUtils = "1.11.0" LibGit2 = "1.11.0" LoggingExtras = "^1.2.0" Parameters = "^0.12.3" +SIMD = "3.7.2" StaticArrays = "^1.9.15" StructArrays = "^0.7.2" TimerOutputs = "^0.5.29" diff --git a/src/SPHCellList.jl b/src/SPHCellList.jl index 1cb20127..ccd4866e 100644 --- a/src/SPHCellList.jl +++ b/src/SPHCellList.jl @@ -30,6 +30,7 @@ using TimerOutputs using HDF5 using Base.Threads using LinearAlgebra +using SIMD: Vec using Bumper function NeighborLoopPerParticle!(SimDensityDiffusion::SDD, SimViscosity::SV, SimKernel, @@ -350,6 +351,22 @@ using LinearAlgebra # in favour of the per-particle variants (ComputeInteractionsPerParticle! etc.). # It has been removed to reduce code size and avoid dead code. + @inline function DotSimd(a::SVector{2,T}, b::SVector{2,T}) where {T<:AbstractFloat} + veca = Vec{2,T}(Tuple(a)) + vecb = Vec{2,T}(Tuple(b)) + return sum(veca * vecb) + end + + @inline function DotSimd(a::SVector{3,T}, b::SVector{3,T}) where {T<:AbstractFloat} + veca = Vec{4,T}(a[1], a[2], a[3], zero(T)) + vecb = Vec{4,T}(b[1], b[2], b[3], zero(T)) + return sum(veca * vecb) + end + + @inline function DotSimd(a::SVector{D,T}, b::SVector{D,T}) where {D,T} + return dot(a, b) + end + @inline function compute_kernel_output_local(::SimulationMetaData{D,T,S,NoKernelOutput,B,L}, kernel_acc, kernel_grad_acc, SimKernel, q, ∇ᵢWᵢⱼ) where {D,T,S<:ShiftingMode, @@ -381,7 +398,7 @@ using LinearAlgebra @unpack h⁻¹, H², h = SimKernel xᵢⱼ = Position[i] - Position[j] - xᵢⱼ² = dot(xᵢⱼ, xᵢⱼ) + xᵢⱼ² = DotSimd(xᵢⱼ, xᵢⱼ) if xᵢⱼ² <= H² dᵢⱼ = sqrt(abs(xᵢⱼ²)) dᵢⱼ² = dᵢⱼ^2 @@ -394,7 +411,7 @@ using LinearAlgebra vᵢ = Velocity[i] vⱼ = Velocity[j] vᵢⱼ = vᵢ - vⱼ - density_symmetric_term = dot(-vᵢⱼ, ∇ᵢWᵢⱼ) + density_symmetric_term = DotSimd(-vᵢⱼ, ∇ᵢWᵢⱼ) dρdt⁺ = -ρᵢ * (m₀ / ρⱼ) * density_symmetric_term Dᵢ, _ = compute_density_diffusion(SimDensityDiffusion, SimKernel, SimConstants, SimParticles, xᵢⱼ, ∇ᵢWᵢⱼ, dᵢⱼ², i, j, ParticleType) @@ -428,7 +445,7 @@ using LinearAlgebra @unpack h⁻¹, H², h = SimKernel xᵢⱼ = Position[i] - Position[j] - xᵢⱼ² = dot(xᵢⱼ, xᵢⱼ) + xᵢⱼ² = DotSimd(xᵢⱼ, xᵢⱼ) if xᵢⱼ² <= H² dᵢⱼ = sqrt(abs(xᵢⱼ²)) dᵢⱼ² = dᵢⱼ^2 @@ -441,7 +458,7 @@ using LinearAlgebra vᵢ = Velocity[i] vⱼ = Velocity[j] vᵢⱼ = vᵢ - vⱼ - density_symmetric_term = dot(-vᵢⱼ, ∇ᵢWᵢⱼ) + density_symmetric_term = DotSimd(-vᵢⱼ, ∇ᵢWᵢⱼ) dρdt⁺ = -ρᵢ * (m₀ / ρⱼ) * density_symmetric_term Dᵢ, _ = compute_density_diffusion(SimDensityDiffusion, SimKernel, SimConstants, SimParticles, xᵢⱼ, ∇ᵢWᵢⱼ, dᵢⱼ², i, j, ParticleType) @@ -477,7 +494,7 @@ using LinearAlgebra @unpack h⁻¹, H², h = SimKernel xᵢⱼ = Position[i] - Position[j] - xᵢⱼ² = dot(xᵢⱼ, xᵢⱼ) + xᵢⱼ² = DotSimd(xᵢⱼ, xᵢⱼ) if xᵢⱼ² <= H² dᵢⱼ = sqrt(abs(xᵢⱼ²)) dᵢⱼ² = dᵢⱼ^2 @@ -490,7 +507,7 @@ using LinearAlgebra vᵢ = Velocity[i] vⱼ = Velocity[j] vᵢⱼ = vᵢ - vⱼ - density_symmetric_term = dot(-vᵢⱼ, ∇ᵢWᵢⱼ) + density_symmetric_term = DotSimd(-vᵢⱼ, ∇ᵢWᵢⱼ) dρdt⁺ = -ρᵢ * (m₀ / ρⱼ) * density_symmetric_term Dᵢ, _ = compute_density_diffusion(SimDensityDiffusion, SimKernel, SimConstants, SimParticles, xᵢⱼ, ∇ᵢWᵢⱼ, dᵢⱼ², i, j, ParticleType) @@ -511,7 +528,7 @@ using LinearAlgebra MotionLimiterCondition = MotionLimiterValue(eltype(ρᵢ), ParticleType[i]) * MotionLimiterValue(eltype(ρᵢ), ParticleType[j]) shift_c_acc += (m₀ / ρᵢ) * ∇ᵢWᵢⱼ - shift_r_acc += (m₀ / ρⱼ) * dot(-xᵢⱼ, ∇ᵢWᵢⱼ) * MotionLimiterCondition + shift_r_acc += (m₀ / ρⱼ) * DotSimd(-xᵢⱼ, ∇ᵢWᵢⱼ) * MotionLimiterCondition end return dρdt_acc, acc_acc, kernel_acc, kernel_grad_acc, shift_c_acc, shift_r_acc @@ -531,7 +548,7 @@ using LinearAlgebra @unpack h⁻¹, H², h = SimKernel xᵢⱼ = Position[i] - Position[j] - xᵢⱼ² = dot(xᵢⱼ, xᵢⱼ) + xᵢⱼ² = DotSimd(xᵢⱼ, xᵢⱼ) if xᵢⱼ² <= H² dᵢⱼ = sqrt(abs(xᵢⱼ²)) dᵢⱼ² = dᵢⱼ^2 @@ -544,7 +561,7 @@ using LinearAlgebra vᵢ = Velocity[i] vⱼ = Velocity[j] vᵢⱼ = vᵢ - vⱼ - density_symmetric_term = dot(-vᵢⱼ, ∇ᵢWᵢⱼ) + density_symmetric_term = DotSimd(-vᵢⱼ, ∇ᵢWᵢⱼ) dρdt⁺ = -ρᵢ * (m₀ / ρⱼ) * density_symmetric_term Dᵢ, _ = compute_density_diffusion(SimDensityDiffusion, SimKernel, SimConstants, SimParticles, xᵢⱼ, ∇ᵢWᵢⱼ, dᵢⱼ², i, j, ParticleType) @@ -563,7 +580,7 @@ using LinearAlgebra MotionLimiterCondition = MotionLimiterValue(eltype(ρᵢ), ParticleType[i]) * MotionLimiterValue(eltype(ρᵢ), ParticleType[j]) shift_c_acc += (m₀ / ρᵢ) * ∇ᵢWᵢⱼ - shift_r_acc += (m₀ / ρⱼ) * dot(-xᵢⱼ, ∇ᵢWᵢⱼ) * MotionLimiterCondition + shift_r_acc += (m₀ / ρⱼ) * DotSimd(-xᵢⱼ, ∇ᵢWᵢⱼ) * MotionLimiterCondition end return dρdt_acc, acc_acc, shift_c_acc, shift_r_acc @@ -585,7 +602,7 @@ using LinearAlgebra xᵢⱼ = GhostPoints[i] - Position[j] - xᵢⱼ² = dot(xᵢⱼ, xᵢⱼ) + xᵢⱼ² = DotSimd(xᵢⱼ, xᵢⱼ) if xᵢⱼ² <= H² dᵢⱼ = sqrt(abs(xᵢⱼ²)) q = clamp(dᵢⱼ * h⁻¹, 0.0, 2.0) From 5409ece7bb65ff6b535414fb6ed6c28c281258cc Mon Sep 17 00:00:00 2001 From: AhmedSalih3d <36305327+AhmedSalih3d@users.noreply.github.com> Date: Fri, 23 Jan 2026 14:04:51 +0100 Subject: [PATCH 2/2] Expand SIMD dot usage --- src/AuxiliaryFunctions.jl | 26 +++++++++++++++++++++++++- src/SPHCellList.jl | 17 ----------------- src/SPHDensityDiffusionModels.jl | 7 ++++--- src/SPHViscosityModels.jl | 5 +++-- 4 files changed, 32 insertions(+), 23 deletions(-) diff --git a/src/AuxiliaryFunctions.jl b/src/AuxiliaryFunctions.jl index c9e8fc2f..d2dc82ef 100644 --- a/src/AuxiliaryFunctions.jl +++ b/src/AuxiliaryFunctions.jl @@ -2,8 +2,10 @@ module AuxiliaryFunctions using StaticArrays using Base.Threads using HDF5 +using SIMD: Vec +import LinearAlgebra: dot -export ResetArrays!, to_3d, CloseHDFVTKManually, CleanUpSimulationFolder +export ResetArrays!, to_3d, CloseHDFVTKManually, CleanUpSimulationFolder, DotSimd """ ResetArrays!(arrays...) @@ -19,6 +21,28 @@ Convert a vector of 2D `SVector`s to 3D by appending a zero z-component. """ @inline to_3d(vec_2d) = [SVector(v..., 0.0) for v in vec_2d] +""" + DotSimd(a, b) + +SIMD-accelerated dot product for `SVector` inputs where it is practical, +falling back to `dot` for other sizes and types. +""" +@inline function DotSimd(a::SVector{2,T}, b::SVector{2,T}) where {T<:AbstractFloat} + veca = Vec{2,T}(Tuple(a)) + vecb = Vec{2,T}(Tuple(b)) + return sum(veca * vecb) +end + +@inline function DotSimd(a::SVector{3,T}, b::SVector{3,T}) where {T<:AbstractFloat} + veca = Vec{4,T}(a[1], a[2], a[3], zero(T)) + vecb = Vec{4,T}(b[1], b[2], b[3], zero(T)) + return sum(veca * vecb) +end + +@inline function DotSimd(a::SVector{D,T}, b::SVector{D,T}) where {D,T} + return dot(a, b) +end + """ to_3d!(dest, src) diff --git a/src/SPHCellList.jl b/src/SPHCellList.jl index ccd4866e..6fa6b5ef 100644 --- a/src/SPHCellList.jl +++ b/src/SPHCellList.jl @@ -30,7 +30,6 @@ using TimerOutputs using HDF5 using Base.Threads using LinearAlgebra -using SIMD: Vec using Bumper function NeighborLoopPerParticle!(SimDensityDiffusion::SDD, SimViscosity::SV, SimKernel, @@ -351,22 +350,6 @@ using SIMD: Vec # in favour of the per-particle variants (ComputeInteractionsPerParticle! etc.). # It has been removed to reduce code size and avoid dead code. - @inline function DotSimd(a::SVector{2,T}, b::SVector{2,T}) where {T<:AbstractFloat} - veca = Vec{2,T}(Tuple(a)) - vecb = Vec{2,T}(Tuple(b)) - return sum(veca * vecb) - end - - @inline function DotSimd(a::SVector{3,T}, b::SVector{3,T}) where {T<:AbstractFloat} - veca = Vec{4,T}(a[1], a[2], a[3], zero(T)) - vecb = Vec{4,T}(b[1], b[2], b[3], zero(T)) - return sum(veca * vecb) - end - - @inline function DotSimd(a::SVector{D,T}, b::SVector{D,T}) where {D,T} - return dot(a, b) - end - @inline function compute_kernel_output_local(::SimulationMetaData{D,T,S,NoKernelOutput,B,L}, kernel_acc, kernel_grad_acc, SimKernel, q, ∇ᵢWᵢⱼ) where {D,T,S<:ShiftingMode, diff --git a/src/SPHDensityDiffusionModels.jl b/src/SPHDensityDiffusionModels.jl index 71651654..58cee3f4 100644 --- a/src/SPHDensityDiffusionModels.jl +++ b/src/SPHDensityDiffusionModels.jl @@ -3,6 +3,7 @@ module SPHDensityDiffusionModels using StaticArrays, LinearAlgebra, Parameters using ..SimulationEquations using ..SimulationGeometry +using ..AuxiliaryFunctions #--------------------------------------------------------------- # Exported #--------------------------------------------------------------- @@ -80,7 +81,7 @@ struct ZeroGravityLinearDensityDiffusion <: SPHDensityDiffusion end ρⱼᵢ = ρⱼ - ρᵢ ψᵢⱼ = 2 * ρⱼᵢ * (-xᵢⱼ) * invdᵢⱼ²η² - Dᵢ = δᵩ * h * c₀ * (m₀/ρⱼ) * dot(ψᵢⱼ, ∇ᵢWᵢⱼ) + Dᵢ = δᵩ * h * c₀ * (m₀/ρⱼ) * DotSimd(ψᵢⱼ, ∇ᵢWᵢⱼ) Dⱼ = -Dᵢ @@ -130,7 +131,7 @@ struct LinearDensityDiffusion <: SPHDensityDiffusion end MotionLimiterCondition = MotionLimiterValue(eltype(ρᵢ), ParticleType[i]) * MotionLimiterValue(eltype(ρᵢ), ParticleType[j]) - Dᵢ = δᵩ * h * c₀ * (m₀/ρⱼ) * dot(ψᵢⱼ, ∇ᵢWᵢⱼ) * MotionLimiterCondition + Dᵢ = δᵩ * h * c₀ * (m₀/ρⱼ) * DotSimd(ψᵢⱼ, ∇ᵢWᵢⱼ) * MotionLimiterCondition Dⱼ = -Dᵢ return Dᵢ, Dⱼ @@ -182,7 +183,7 @@ struct ComplexDensityDiffusion <: SPHDensityDiffusion end MotionLimiterCondition = MotionLimiterValue(eltype(ρᵢ), ParticleType[i]) * MotionLimiterValue(eltype(ρᵢ), ParticleType[j]) - Dᵢ = δᵩ * h * c₀ * (m₀/ρⱼ) * dot(ψᵢⱼ, ∇ᵢWᵢⱼ) * MotionLimiterCondition + Dᵢ = δᵩ * h * c₀ * (m₀/ρⱼ) * DotSimd(ψᵢⱼ, ∇ᵢWᵢⱼ) * MotionLimiterCondition Dⱼ = -Dᵢ return Dᵢ, Dⱼ diff --git a/src/SPHViscosityModels.jl b/src/SPHViscosityModels.jl index 318b0419..2c6acba4 100644 --- a/src/SPHViscosityModels.jl +++ b/src/SPHViscosityModels.jl @@ -1,6 +1,7 @@ module SPHViscosityModels using StaticArrays, LinearAlgebra, Parameters +using ..AuxiliaryFunctions export SPHViscosity, ZeroViscosity, ArtificialViscosity, Laminar, LaminarSPS, compute_viscosity @@ -61,7 +62,7 @@ end ρᵢ = SimParticles.Density[i] ρⱼ = SimParticles.Density[j] - v_dot_x = dot(vᵢⱼ, xᵢⱼ) + v_dot_x = DotSimd(vᵢⱼ, xᵢⱼ) if v_dot_x < 0 ρ̄ = 0.5 * (ρᵢ + ρⱼ) μᵢⱼ = h * v_dot_x / (d² + η²) @@ -82,7 +83,7 @@ end ρᵢ = SimParticles.Density[i] ρⱼ = SimParticles.Density[j] - term = (4 * m₀ * ν₀ * dot(xᵢⱼ, ∇ᵢWᵢⱼ)) / ((ρᵢ + ρⱼ) + (d² + η²)) + term = (4 * m₀ * ν₀ * DotSimd(xᵢⱼ, ∇ᵢWᵢⱼ)) / ((ρᵢ + ρⱼ) + (d² + η²)) return term * vᵢⱼ, -term * vᵢⱼ end