From 61deef313bec6b2587f3a26c597d8a277e8d7c81 Mon Sep 17 00:00:00 2001 From: AhmedSalih3d <36305327+AhmedSalih3d@users.noreply.github.com> Date: Thu, 15 Jan 2026 22:42:55 +0100 Subject: [PATCH 1/2] Add CUDA neighbor loop path --- README.md | 15 ++ example/StillWedgeMDBC.jl | 39 ++-- src/SPHCUDA.jl | 470 ++++++++++++++++++++++++++++++++++++++ src/SPHExample.jl | 11 + 4 files changed, 522 insertions(+), 13 deletions(-) create mode 100644 src/SPHCUDA.jl diff --git a/README.md b/README.md index c23bc265..9e184d45 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,7 @@ src/ ├── PreProcess.jl # Load inputs and allocate arrays ├── ProduceHDFVTK.jl # Write simulation data in HDF5/VTK format ├── SPHCellList.jl # Custom neighbour search and time stepping +├── SPHCUDA.jl # Optional CUDA interaction path ├── SPHDensityDiffusionModels.jl # Density diffusion implementations ├── SPHExample.jl # Glue module re-exporting all functions ├── SPHKernels.jl # SPH kernel definitions @@ -91,6 +92,20 @@ one plus the particles in its neighbor stencil. Neighbor rebuilds use a per-cell ordering buffer without reordering particle storage. +### CUDA Acceleration (Optional) + +The solver can offload the particle interaction loops to a CUDA GPU when +`CUDA.jl` is available in your Julia environment. The helper `CUDAAvailable()` +reports whether CUDA is functional, and the `RunSimulationCUDA` entry point +mirrors `RunSimulation` while delegating the interaction loops to the GPU. +Currently, the CUDA path supports the `NoShifting` and `NoKernelOutput` +configurations with `LinearDensityDiffusion` or `ZeroDensityDiffusion` plus +`ArtificialViscosity` or `ZeroViscosity`. + +To try the GPU workflow, install CUDA.jl in your environment and run +`example/StillWedgeMDBC.jl`, which will automatically pick `RunSimulationCUDA` +when CUDA is available. + ## Help Questions or issues can be posted on the GitHub issue tracker. Response times may vary but all feedback is welcome. diff --git a/example/StillWedgeMDBC.jl b/example/StillWedgeMDBC.jl index 560021b6..727d42d4 100644 --- a/example/StillWedgeMDBC.jl +++ b/example/StillWedgeMDBC.jl @@ -7,7 +7,7 @@ let SimConstantsWedge = SimulationConstants{FloatType}(dx=0.02,c₀=42.48576250492629, δᵩ = 0.1, CFL=0.5) # SimConstantsWedge = SimulationConstants{FloatType}(dx=0.01,c₀=43.4, δᵩ = 0.1, CFL=0.2) # - SimMetaDataWedge = SimulationMetaData{Dimensions,FloatType,NoShifting,NoKernelOutput,NoMDBC,StoreLog}( + SimMetaDataWedge = SimulationMetaData{Dimensions,FloatType,NoShifting,NoKernelOutput,SimpleMDBC,StoreLog}( SimulationName="StillWedge", SaveLocation="W:/Simulations/StillWedge2D_MDBC", SimulationTime=4.0, @@ -49,17 +49,31 @@ let SimKernel = SPHKernelInstance{Dimensions, FloatType}(WendlandC2(); dx = SimConstantsWedge.dx) - @profview RunSimulation( - SimGeometry = SimulationGeometry, - SimMetaData = SimMetaDataWedge, - SimConstants = SimConstantsWedge, - SimKernel = SimKernel, - SimLogger = SimLogger, - SimParticles = SimParticles, - SimViscosity = ArtificialViscosity(), - SimDensityDiffusion = LinearDensityDiffusion(), - ParticleNormalsPath = "./input/still_wedge_mdbc/StillWedge_Dp$(SimConstantsWedge.dx)_GhostNodes_Correct.csv" - ) + if CUDAAvailable() + @profview RunSimulationCUDA( + SimGeometry = SimulationGeometry, + SimMetaData = SimMetaDataWedge, + SimConstants = SimConstantsWedge, + SimKernel = SimKernel, + SimLogger = SimLogger, + SimParticles = SimParticles, + SimViscosity = ArtificialViscosity(), + SimDensityDiffusion = LinearDensityDiffusion(), + ParticleNormalsPath = "./input/still_wedge_mdbc/StillWedge_Dp$(SimConstantsWedge.dx)_GhostNodes_Correct.csv" + ) + else + @profview RunSimulation( + SimGeometry = SimulationGeometry, + SimMetaData = SimMetaDataWedge, + SimConstants = SimConstantsWedge, + SimKernel = SimKernel, + SimLogger = SimLogger, + SimParticles = SimParticles, + SimViscosity = ArtificialViscosity(), + SimDensityDiffusion = LinearDensityDiffusion(), + ParticleNormalsPath = "./input/still_wedge_mdbc/StillWedge_Dp$(SimConstantsWedge.dx)_GhostNodes_Correct.csv" + ) + end # This can be used to plot pressure profile results after simulation # using Plots @@ -97,4 +111,3 @@ let # display(plt) end - diff --git a/src/SPHCUDA.jl b/src/SPHCUDA.jl new file mode 100644 index 00000000..3f6107f8 --- /dev/null +++ b/src/SPHCUDA.jl @@ -0,0 +1,470 @@ +module SPHCUDA + +export RunSimulationCUDA, CUDAAvailable + +using CUDA +using LinearAlgebra +using Parameters +using StaticArrays + +using ..SPHKernels +using ..SPHViscosityModels +using ..SPHDensityDiffusionModels +using ..SimulationMetaDataConfiguration +using ..SimulationConstantsConfiguration +using ..SimulationLoggerConfiguration +using ..SimulationGeometry +using ..SimulationEquations +using ..OpenExternalPrograms +using ..PreProcess +using ..ProduceHDFVTK +using ..TimeStepping +using ..SPHNeighborList +using ..SPHCellList + +using StructArrays: StructArray + +CUDAAvailable() = CUDA.functional() + +@inline function BuildNeighborCellRanges(NeighborCellLists, cell_count) + starts = zeros(Int, cell_count + 1) + total = 0 + @inbounds for index in 1:cell_count + starts[index] = total + 1 + total += length(NeighborCellLists[index]) + end + starts[cell_count + 1] = total + 1 + + entries = Vector{Int}(undef, total) + cursor = 1 + @inbounds for index in 1:cell_count + for neighbor_index in NeighborCellLists[index] + entries[cursor] = neighbor_index + cursor += 1 + end + end + return starts, entries +end + +@inline function BuildCellListIndices(Cells, CellLookup) + indices = similar(Cells, Int) + @inbounds for i in eachindex(Cells) + indices[i] = CellLookupIndex(CellLookup, Cells[i], 1) + end + return indices +end + +@inline function ComputeDensityDiffusionGPU(::ZeroDensityDiffusion, _SimKernel, _SimConstants, + _Density, _MotionLimiter, _xᵢⱼ, _∇ᵢWᵢⱼ, d², _i, _j) + return zero(d²) +end + +@inline function ComputeDensityDiffusionGPU(::LinearDensityDiffusion, SimKernel, SimConstants, + Density, MotionLimiter, xᵢⱼ, ∇ᵢWᵢⱼ, d², i, j) + @unpack ρ₀, m₀, c₀, δᵩ, Cb, γ, g = SimConstants + @unpack h, η² = SimKernel + + Linear_ρ_factor = (1 / (Cb * γ)) * ρ₀ + ρᵢ = Density[i] + ρⱼ = Density[j] + + Pᵢⱼᴴ = ρ₀ * (-g) * -xᵢⱼ[end] + ρᵢⱼᴴ = Pᵢⱼᴴ * Linear_ρ_factor + + invdᵢⱼ²η² = one(eltype(ρᵢ)) / (d² + η²) + ρⱼᵢ = ρⱼ - ρᵢ + ψᵢⱼ = 2 * (ρⱼᵢ - ρᵢⱼᴴ) * (-xᵢⱼ) * invdᵢⱼ²η² + + MLcond = MotionLimiter[i] * MotionLimiter[j] + Dᵢ = δᵩ * h * c₀ * (m₀ / ρⱼ) * dot(ψᵢⱼ, ∇ᵢWᵢⱼ) * MLcond + + return Dᵢ +end + +@inline function ComputeViscosityGPU(::ZeroViscosity, _SimKernel, _SimConstants, + _Density, _xᵢⱼ, _vᵢⱼ, _∇ᵢWᵢⱼ, _d², _i, _j) + return zero(_xᵢⱼ) +end + +@inline function ComputeViscosityGPU(::ArtificialViscosity, SimKernel, SimConstants, + Density, xᵢⱼ, vᵢⱼ, ∇ᵢWᵢⱼ, d², i, j) + @unpack m₀, α, c₀ = SimConstants + @unpack h, η² = SimKernel + + ρᵢ = Density[i] + ρⱼ = Density[j] + + v_dot_x = dot(vᵢⱼ, xᵢⱼ) + if v_dot_x < 0 + ρ̄ = 0.5 * (ρᵢ + ρⱼ) + μᵢⱼ = h * v_dot_x / (d² + η²) + Π = -m₀ * (-α * c₀ * μᵢⱼ) / ρ̄ * ∇ᵢWᵢⱼ + return Π + end + + return zero(xᵢⱼ) +end + +function NeighborLoopCUDAKernel!(dρdtI, Acceleration, Position, Density, Pressure, Velocity, + MotionLimiter, CellListIndices, ParticleRanges, ParticleOrder, + NeighborCellStarts, NeighborCellEntries, SimKernel, SimConstants, + SimDensityDiffusion, SimViscosity) + i = (blockIdx().x - 1) * blockDim().x + threadIdx().x + if i <= length(Position) + dρdt_acc = zero(eltype(dρdtI)) + acc_acc = zero(Position[i]) + cell_list_index = CellListIndices[i] + same_cell_start = ParticleRanges[cell_list_index] + same_cell_end = ParticleRanges[cell_list_index + 1] - 1 + + @inbounds for j in same_cell_start:same_cell_end + j_index = ParticleOrder[j] + if j_index != i + xᵢⱼ = Position[i] - Position[j_index] + xᵢⱼ² = dot(xᵢⱼ, xᵢⱼ) + if xᵢⱼ² <= SimKernel.H² + dᵢⱼ = sqrt(abs(xᵢⱼ²)) + q = clamp(dᵢⱼ * SimKernel.h⁻¹, zero(eltype(dᵢⱼ)), one(eltype(dᵢⱼ)) * 2) + ∇ᵢWᵢⱼ = ∇Wᵢⱼ(SimKernel, q, xᵢⱼ) + + ρᵢ = Density[i] + ρⱼ = Density[j_index] + vᵢⱼ = Velocity[i] - Velocity[j_index] + + density_symmetric_term = dot(-vᵢⱼ, ∇ᵢWᵢⱼ) + dρdt⁺ = -ρᵢ * (SimConstants.m₀ / ρⱼ) * density_symmetric_term + + Dᵢ = ComputeDensityDiffusionGPU(SimDensityDiffusion, SimKernel, SimConstants, + Density, MotionLimiter, xᵢⱼ, ∇ᵢWᵢⱼ, + dᵢⱼ^2, i, j_index) + + dρdt_acc += dρdt⁺ + Dᵢ + + Pᵢ = Pressure[i] + Pⱼ = Pressure[j_index] + Pfac = (Pᵢ + Pⱼ) / (ρᵢ * ρⱼ) + f_ab = tensile_correction(SimKernel, Pᵢ, ρᵢ, Pⱼ, ρⱼ, q, SimConstants.dx) + dvdt⁺ = -SimConstants.m₀ * (Pfac + f_ab) * ∇ᵢWᵢⱼ + + visc_term = ComputeViscosityGPU(SimViscosity, SimKernel, SimConstants, + Density, xᵢⱼ, vᵢⱼ, ∇ᵢWᵢⱼ, dᵢⱼ^2, i, j_index) + + acc_acc += dvdt⁺ + visc_term + end + end + end + + neighbor_start = NeighborCellStarts[cell_list_index] + neighbor_end = NeighborCellStarts[cell_list_index + 1] - 1 + @inbounds for neighbor_cursor in neighbor_start:neighbor_end + neighbor_index = NeighborCellEntries[neighbor_cursor] + start_index = ParticleRanges[neighbor_index] + end_index = ParticleRanges[neighbor_index + 1] - 1 + for j in start_index:end_index + j_index = ParticleOrder[j] + xᵢⱼ = Position[i] - Position[j_index] + xᵢⱼ² = dot(xᵢⱼ, xᵢⱼ) + if xᵢⱼ² <= SimKernel.H² + dᵢⱼ = sqrt(abs(xᵢⱼ²)) + q = clamp(dᵢⱼ * SimKernel.h⁻¹, zero(eltype(dᵢⱼ)), one(eltype(dᵢⱼ)) * 2) + ∇ᵢWᵢⱼ = ∇Wᵢⱼ(SimKernel, q, xᵢⱼ) + + ρᵢ = Density[i] + ρⱼ = Density[j_index] + vᵢⱼ = Velocity[i] - Velocity[j_index] + + density_symmetric_term = dot(-vᵢⱼ, ∇ᵢWᵢⱼ) + dρdt⁺ = -ρᵢ * (SimConstants.m₀ / ρⱼ) * density_symmetric_term + + Dᵢ = ComputeDensityDiffusionGPU(SimDensityDiffusion, SimKernel, SimConstants, + Density, MotionLimiter, xᵢⱼ, ∇ᵢWᵢⱼ, + dᵢⱼ^2, i, j_index) + + dρdt_acc += dρdt⁺ + Dᵢ + + Pᵢ = Pressure[i] + Pⱼ = Pressure[j_index] + Pfac = (Pᵢ + Pⱼ) / (ρᵢ * ρⱼ) + f_ab = tensile_correction(SimKernel, Pᵢ, ρᵢ, Pⱼ, ρⱼ, q, SimConstants.dx) + dvdt⁺ = -SimConstants.m₀ * (Pfac + f_ab) * ∇ᵢWᵢⱼ + + visc_term = ComputeViscosityGPU(SimViscosity, SimKernel, SimConstants, + Density, xᵢⱼ, vᵢⱼ, ∇ᵢWᵢⱼ, dᵢⱼ^2, i, j_index) + + acc_acc += dvdt⁺ + visc_term + end + end + end + + dρdtI[i] = dρdt_acc + Acceleration[i] = acc_acc + end + return nothing +end + +function NeighborLoopPerParticleCUDA!(SimDensityDiffusion::SDD, SimViscosity::SV, SimKernel, + SimMetaData::SimulationMetaData{D,T,NoShifting,NoKernelOutput,B,L}, + SimConstants, SimParticles, ParticleRanges, + CellLookup, NeighborCellLists, dρdtI, + Acceleration, ∇Cᵢ, + ∇◌rᵢ, max_visc = nothing, + min_dt_force = nothing; + Position = SimParticles.Position, + Density = SimParticles.Density, + Pressure = SimParticles.Pressure, + Velocity = SimParticles.Velocity, + ParticleOrder) where {D,T, + B<:MDBCMode,L<:LogMode, + SDD<:SPHDensityDiffusion, + SV<:SPHViscosity} + if !CUDAAvailable() + error("CUDA is not available; cannot run GPU neighbor loop.") + end + + if !(SimDensityDiffusion isa LinearDensityDiffusion || SimDensityDiffusion isa ZeroDensityDiffusion) + error("CUDA neighbor loop supports LinearDensityDiffusion or ZeroDensityDiffusion.") + end + if !(SimViscosity isa ArtificialViscosity || SimViscosity isa ZeroViscosity) + error("CUDA neighbor loop supports ArtificialViscosity or ZeroViscosity.") + end + + cell_list_indices = BuildCellListIndices(SimParticles.Cells, CellLookup) + cell_count = length(NeighborCellLists) + neighbor_cell_starts, neighbor_cell_entries = BuildNeighborCellRanges(NeighborCellLists, cell_count) + + dρdtI_gpu = CuArray(dρdtI) + acceleration_gpu = CuArray(Acceleration) + position_gpu = CuArray(Position) + density_gpu = CuArray(Density) + pressure_gpu = CuArray(Pressure) + velocity_gpu = CuArray(Velocity) + motion_limiter_gpu = CuArray(SimParticles.MotionLimiter) + cell_list_indices_gpu = CuArray(cell_list_indices) + particle_ranges_gpu = CuArray(ParticleRanges) + particle_order_gpu = CuArray(ParticleOrder) + neighbor_cell_starts_gpu = CuArray(neighbor_cell_starts) + neighbor_cell_entries_gpu = CuArray(neighbor_cell_entries) + + threads = 256 + blocks = cld(length(Position), threads) + @cuda threads=threads blocks=blocks NeighborLoopCUDAKernel!( + dρdtI_gpu, acceleration_gpu, position_gpu, density_gpu, pressure_gpu, + velocity_gpu, motion_limiter_gpu, cell_list_indices_gpu, particle_ranges_gpu, + particle_order_gpu, neighbor_cell_starts_gpu, neighbor_cell_entries_gpu, + SimKernel, SimConstants, SimDensityDiffusion, SimViscosity, + ) + + CUDA.copyto!(dρdtI, dρdtI_gpu) + CUDA.copyto!(Acceleration, acceleration_gpu) + + if max_visc !== nothing || min_dt_force !== nothing + @inbounds for i in eachindex(Position) + UpdateTimeStepBuffers!(max_visc, min_dt_force, i, Position[i], Velocity[i], Acceleration[i], SimKernel) + end + end + + return nothing +end + +@inbounds function SimulationLoopCUDA(SimDensityDiffusion::SDD, SimViscosity::SV, SimKernel, + SimMetaData::SimulationMetaData{Dimensions, FloatType, SMode, KMode, BMode, LMode}, + SimConstants, SimParticles, FullStencil, + ParticleRanges, UniqueCells, CellLookup, + ParticleOrder, CellOffsets, + NeighborCellLists, dρdtI, Velocityₙ⁺, + Positionₙ⁺, ρₙ⁺, ∇Cᵢ, ∇◌rᵢ, + MotionDefinition::Union{ + Nothing, + AbstractVector{ + Union{ + Nothing, + MotionDetails{Dimensions, FloatType}, + }, + }, + }) where { + Dimensions, FloatType, SMode, KMode, + BMode, LMode, + SDD<:SPHDensityDiffusion, + SV<:SPHViscosity} + @unpack Position, Density, Pressure, Velocity, Acceleration, MotionLimiter, + GroupMarker = SimParticles + ParticleType = SimParticles.Type + ParticleMarker = GroupMarker + GhostPoints = hasproperty(SimParticles, :GhostPoints) ? SimParticles.GhostPoints : nothing + GhostNormals = hasproperty(SimParticles, :GhostNormals) ? SimParticles.GhostNormals : nothing + + UniqueCellsView = view(UniqueCells, 1:SimMetaData.IndexCounter) + dt = Δt(Position, Velocity, Acceleration, SimConstants, SimKernel) + + @no_escape begin + max_visc = @alloc(FloatType, length(Position)) + min_dt_force = @alloc(FloatType, length(Position)) + + dt₂ = dt * 0.5 + + while SimMetaData.TotalTime <= next_output_time(SimMetaData) + @timeit SimMetaData.HourGlass "01 Calculate IndexCounter" begin + SimMetaData.Δx = UpdateΔx!(SimMetaData.Δx, Positionₙ⁺, SimParticles.Position) + ShouldRebuild = SimMetaData.Δx >= SimKernel.h + + if ShouldRebuild + @timeit SimMetaData.HourGlass "01a Actual Calculate IndexCounter" begin + SimMetaData.IndexCounter = UpdateNeighbors!(SimParticles, SimKernel.H⁻¹, ParticleRanges, UniqueCells, CellLookup, ParticleOrder, CellOffsets) + end + SimMetaData.Δx = zero(eltype(dρdtI)) + UniqueCellsView = view(UniqueCells, 1:SimMetaData.IndexCounter) + BuildNeighborCellLists!(NeighborCellLists, FullStencil, UniqueCellsView, ParticleRanges, CellLookup) + end + end + + @timeit SimMetaData.HourGlass "Motion" ProgressMotion(SimParticles, dt₂, MotionDefinition, SimMetaData) + + @timeit SimMetaData.HourGlass "02 Pressure" Pressure!(SimParticles.Pressure, SimParticles.Density, SimConstants) + if SimMetaData isa SimulationMetaData{Dimensions, FloatType, SMode, KMode, NoMDBC, LMode} where {SMode, KMode, LMode} + @timeit SimMetaData.HourGlass "03 Apply MDBC before Half TimeStep" SPHCellList.ApplyMDBCBeforeHalf!(SimMetaData) + else + @timeit SimMetaData.HourGlass "03 Apply MDBC before Half TimeStep" SPHCellList.ApplyMDBCBeforeHalf!( + SimMetaData, SimKernel, SimConstants, SimParticles, ParticleRanges, CellLookup, Position, + Density, GhostPoints, GhostNormals, ParticleType; ParticleOrder = ParticleOrder, + ) + end + + @timeit SimMetaData.HourGlass "04 First NeighborLoop" NeighborLoopPerParticleCUDA!( + SimDensityDiffusion, SimViscosity, SimKernel, SimMetaData, + SimConstants, SimParticles, ParticleRanges, CellLookup, + NeighborCellLists, dρdtI, Acceleration, ∇Cᵢ, ∇◌rᵢ, + ParticleOrder = ParticleOrder, + ) + + @timeit SimMetaData.HourGlass "05 Update To Half TimeStep" HalfTimeStep(SimMetaData, SimConstants, SimParticles, Positionₙ⁺, Velocityₙ⁺, ρₙ⁺, dρdtI, dt₂) + + @timeit SimMetaData.HourGlass "06 Half LimitDensityAtBoundary" LimitDensityAtBoundary!(ρₙ⁺, SimConstants.ρ₀, MotionLimiter) + + @timeit SimMetaData.HourGlass "Motion" ProgressMotion(SimParticles, dt₂, MotionDefinition, SimMetaData) + + @timeit SimMetaData.HourGlass "07 Pressure" Pressure!(SimParticles.Pressure, ρₙ⁺, SimConstants) + @timeit SimMetaData.HourGlass "08 Second NeighborLoop" NeighborLoopPerParticleCUDA!( + SimDensityDiffusion, SimViscosity, SimKernel, SimMetaData, + SimConstants, SimParticles, ParticleRanges, CellLookup, + NeighborCellLists, dρdtI, Acceleration, ∇Cᵢ, ∇◌rᵢ, + max_visc, min_dt_force, + ParticleOrder = ParticleOrder, + Position = Positionₙ⁺, + Density = ρₙ⁺, + Velocity = Velocityₙ⁺, + ) + + @timeit SimMetaData.HourGlass "09 Final LimitDensityAtBoundary" LimitDensityAtBoundary!(Density, SimConstants.ρ₀, MotionLimiter) + + @timeit SimMetaData.HourGlass "10 Final Density" DensityEpsi!(Density, dρdtI, ρₙ⁺, dt) + + @timeit SimMetaData.HourGlass "11 Update To Final TimeStep" FullTimeStep(SimMetaData, SimKernel, SimConstants, SimParticles, ∇Cᵢ, ∇◌rᵢ, dt) + + @timeit SimMetaData.HourGlass "12 Update MetaData" UpdateMetaData!(SimMetaData, dt) + + @timeit SimMetaData.HourGlass "13 Update TimeStep" dt = FinalizeTimeStep(max_visc, min_dt_force, SimConstants, SimKernel) + end + end + + return nothing +end + +function RunSimulationCUDA(;SimGeometry::Vector{Geometry{Dimensions, FloatType}}, + SimMetaData::SimulationMetaData{Dimensions, FloatType, SMode, KMode, BMode, LMode}, + SimConstants::SimulationConstants, + SimKernel::SPHKernelInstance, + SimLogger::SimulationLogger, + SimParticles::StructArray, + SimViscosity::SV, + SimDensityDiffusion::SDD, + ParticleNormalsPath::Union{Nothing,String} = nothing + ) where {Dimensions,FloatType,SMode,KMode,BMode,LMode,SV<:SPHViscosity,SDD<:SPHDensityDiffusion} + if !CUDAAvailable() + error("CUDA is not available; cannot run RunSimulationCUDA.") + end + if !(SimMetaData isa SimulationMetaData{Dimensions, FloatType, NoShifting, NoKernelOutput, BMode, LMode} where {BMode, LMode}) + error("RunSimulationCUDA currently supports NoShifting and NoKernelOutput configurations.") + end + + NumberOfPoints = length(SimParticles) + + dρdtI, Velocityₙ⁺, Positionₙ⁺, ρₙ⁺, ∇Cᵢ, ∇◌rᵢ = AllocateSupportDataStructures(SimMetaData, SimParticles.Position) + + LoadMDBCNormals!(SimMetaData, SimParticles, ParticleNormalsPath) + + InitializeLog!(SimMetaData, SimLogger, SimConstants, SimKernel, SimViscosity, SimDensityDiffusion, SimGeometry, SimParticles) + + Pressure!(SimParticles.Pressure, SimParticles.Density, SimConstants) + + ParticleRanges = zeros(Int, NumberOfPoints + 1 + 1) + UniqueCells = zeros(CartesianIndex{Dimensions}, NumberOfPoints) + CellLookup = InitializeCellIndexLookup(Val(Dimensions)) + FullStencil = ConstructStencil(Val(Dimensions)) + NeighborCellLists = [Int[] for _ in 1:length(UniqueCells)] + ParticleOrder = zeros(Int, NumberOfPoints) + CellOffsets = zeros(Int, length(ParticleRanges)) + + output = SetupVTKOutput(SimMetaData, SimParticles, SimKernel, Dimensions) + + SimMetaData.OutputIterationCounter = 1 + output.enqueue_particles(SimMetaData.OutputIterationCounter) + if SimMetaData.IndexCounter > 0 + unique_cells_view = view(UniqueCells, 1:SimMetaData.IndexCounter) + cell_particle_counts = nothing + cell_neighbor_counts = nothing + if SimMetaData.ExportGridCellParticleCounts + cell_particle_counts = ComputeCellParticleCounts(ParticleRanges, SimMetaData.IndexCounter) + cell_neighbor_counts = ComputeCellNeighborCounts(ParticleRanges, NeighborCellLists, SimMetaData.IndexCounter) + end + output.enqueue_grid( + SimMetaData.OutputIterationCounter, + unique_cells_view, + cell_particle_counts=cell_particle_counts, + cell_neighbor_counts=cell_neighbor_counts, + ) + end + + MotionDefinition = SPHCellList.GenerateMotionDetails(SimParticles, SimGeometry, Dimensions, FloatType) + + @inbounds while true + @timeit SimMetaData.HourGlass "00 SimulationLoop" SimulationLoopCUDA( + SimDensityDiffusion, SimViscosity, SimKernel, SimMetaData, + SimConstants, SimParticles, FullStencil, ParticleRanges, + UniqueCells, CellLookup, ParticleOrder, CellOffsets, + NeighborCellLists, dρdtI, Velocityₙ⁺, Positionₙ⁺, ρₙ⁺, + ∇Cᵢ, ∇◌rᵢ, MotionDefinition, + ) + push!(SimMetaData.TimeSteps, SimMetaData.CurrentTimeStep) + + LogStep!(SimMetaData, SimLogger) + + SimMetaData.OutputIterationCounter += 1 + + UniqueCellsView = view(UniqueCells, 1:SimMetaData.IndexCounter) + + @timeit SimMetaData.HourGlass "13 Determine Output" SPHCellList.DetermineOutput( + Val(SimMetaData.ExportGridCellParticleCounts), + SimMetaData, + output, + ParticleRanges, + NeighborCellLists, + UniqueCellsView, + ) + + if SimMetaData.TotalTime > SimMetaData.SimulationTime + @timeit SimMetaData.HourGlass "13B Close Data Streams" output.close_files() + + show(SimMetaData.HourGlass, sortby=:name) + show(SimMetaData.HourGlass) + + FinalizeLog!(SimMetaData, SimLogger) + + AutoOpenLogFile(SimLogger, SimMetaData) + AutoOpenParaview(SimMetaData, output.variable_names) + + break + end + end +end + +end diff --git a/src/SPHExample.jl b/src/SPHExample.jl index a31df067..65b8da5d 100644 --- a/src/SPHExample.jl +++ b/src/SPHExample.jl @@ -15,6 +15,9 @@ module SPHExample include("SPHDensityDiffusionModels.jl") include("SPHNeighborList.jl") include("SPHCellList.jl") #Must be last + if Base.find_package("CUDA") !== nothing + include("SPHCUDA.jl") + end # Re-export desired functions from each submodule using .AuxiliaryFunctions @@ -64,6 +67,14 @@ module SPHExample using .SPHCellList export NeighborLoop!, ComputeInteractions!, RunSimulation + if Base.find_package("CUDA") !== nothing + using .SPHCUDA + export RunSimulationCUDA, CUDAAvailable + else + CUDAAvailable() = false + export CUDAAvailable + end + using .OpenExternalPrograms export AutoOpenLogFile, AutoOpenParaview From 45f9242b7fe9d6237c6d1f14088168d2c83eebc5 Mon Sep 17 00:00:00 2001 From: AhmedSalih3d <36305327+AhmedSalih3d@users.noreply.github.com> Date: Thu, 15 Jan 2026 23:07:26 +0100 Subject: [PATCH 2/2] Remove no_escape usage in CUDA loop --- src/SPHCUDA.jl | 106 ++++++++++++++++++++++++------------------------- 1 file changed, 52 insertions(+), 54 deletions(-) diff --git a/src/SPHCUDA.jl b/src/SPHCUDA.jl index 3f6107f8..6b9b31bb 100644 --- a/src/SPHCUDA.jl +++ b/src/SPHCUDA.jl @@ -296,74 +296,72 @@ end UniqueCellsView = view(UniqueCells, 1:SimMetaData.IndexCounter) dt = Δt(Position, Velocity, Acceleration, SimConstants, SimKernel) - @no_escape begin - max_visc = @alloc(FloatType, length(Position)) - min_dt_force = @alloc(FloatType, length(Position)) - - dt₂ = dt * 0.5 - - while SimMetaData.TotalTime <= next_output_time(SimMetaData) - @timeit SimMetaData.HourGlass "01 Calculate IndexCounter" begin - SimMetaData.Δx = UpdateΔx!(SimMetaData.Δx, Positionₙ⁺, SimParticles.Position) - ShouldRebuild = SimMetaData.Δx >= SimKernel.h - - if ShouldRebuild - @timeit SimMetaData.HourGlass "01a Actual Calculate IndexCounter" begin - SimMetaData.IndexCounter = UpdateNeighbors!(SimParticles, SimKernel.H⁻¹, ParticleRanges, UniqueCells, CellLookup, ParticleOrder, CellOffsets) - end - SimMetaData.Δx = zero(eltype(dρdtI)) - UniqueCellsView = view(UniqueCells, 1:SimMetaData.IndexCounter) - BuildNeighborCellLists!(NeighborCellLists, FullStencil, UniqueCellsView, ParticleRanges, CellLookup) - end - end + max_visc = Vector{FloatType}(undef, length(Position)) + min_dt_force = Vector{FloatType}(undef, length(Position)) - @timeit SimMetaData.HourGlass "Motion" ProgressMotion(SimParticles, dt₂, MotionDefinition, SimMetaData) + dt₂ = dt * 0.5 - @timeit SimMetaData.HourGlass "02 Pressure" Pressure!(SimParticles.Pressure, SimParticles.Density, SimConstants) - if SimMetaData isa SimulationMetaData{Dimensions, FloatType, SMode, KMode, NoMDBC, LMode} where {SMode, KMode, LMode} - @timeit SimMetaData.HourGlass "03 Apply MDBC before Half TimeStep" SPHCellList.ApplyMDBCBeforeHalf!(SimMetaData) - else - @timeit SimMetaData.HourGlass "03 Apply MDBC before Half TimeStep" SPHCellList.ApplyMDBCBeforeHalf!( - SimMetaData, SimKernel, SimConstants, SimParticles, ParticleRanges, CellLookup, Position, - Density, GhostPoints, GhostNormals, ParticleType; ParticleOrder = ParticleOrder, - ) + while SimMetaData.TotalTime <= next_output_time(SimMetaData) + @timeit SimMetaData.HourGlass "01 Calculate IndexCounter" begin + SimMetaData.Δx = UpdateΔx!(SimMetaData.Δx, Positionₙ⁺, SimParticles.Position) + ShouldRebuild = SimMetaData.Δx >= SimKernel.h + + if ShouldRebuild + @timeit SimMetaData.HourGlass "01a Actual Calculate IndexCounter" begin + SimMetaData.IndexCounter = UpdateNeighbors!(SimParticles, SimKernel.H⁻¹, ParticleRanges, UniqueCells, CellLookup, ParticleOrder, CellOffsets) + end + SimMetaData.Δx = zero(eltype(dρdtI)) + UniqueCellsView = view(UniqueCells, 1:SimMetaData.IndexCounter) + BuildNeighborCellLists!(NeighborCellLists, FullStencil, UniqueCellsView, ParticleRanges, CellLookup) end + end + + @timeit SimMetaData.HourGlass "Motion" ProgressMotion(SimParticles, dt₂, MotionDefinition, SimMetaData) - @timeit SimMetaData.HourGlass "04 First NeighborLoop" NeighborLoopPerParticleCUDA!( - SimDensityDiffusion, SimViscosity, SimKernel, SimMetaData, - SimConstants, SimParticles, ParticleRanges, CellLookup, - NeighborCellLists, dρdtI, Acceleration, ∇Cᵢ, ∇◌rᵢ, - ParticleOrder = ParticleOrder, + @timeit SimMetaData.HourGlass "02 Pressure" Pressure!(SimParticles.Pressure, SimParticles.Density, SimConstants) + if SimMetaData isa SimulationMetaData{Dimensions, FloatType, SMode, KMode, NoMDBC, LMode} where {SMode, KMode, LMode} + @timeit SimMetaData.HourGlass "03 Apply MDBC before Half TimeStep" SPHCellList.ApplyMDBCBeforeHalf!(SimMetaData) + else + @timeit SimMetaData.HourGlass "03 Apply MDBC before Half TimeStep" SPHCellList.ApplyMDBCBeforeHalf!( + SimMetaData, SimKernel, SimConstants, SimParticles, ParticleRanges, CellLookup, Position, + Density, GhostPoints, GhostNormals, ParticleType; ParticleOrder = ParticleOrder, ) + end - @timeit SimMetaData.HourGlass "05 Update To Half TimeStep" HalfTimeStep(SimMetaData, SimConstants, SimParticles, Positionₙ⁺, Velocityₙ⁺, ρₙ⁺, dρdtI, dt₂) + @timeit SimMetaData.HourGlass "04 First NeighborLoop" NeighborLoopPerParticleCUDA!( + SimDensityDiffusion, SimViscosity, SimKernel, SimMetaData, + SimConstants, SimParticles, ParticleRanges, CellLookup, + NeighborCellLists, dρdtI, Acceleration, ∇Cᵢ, ∇◌rᵢ, + ParticleOrder = ParticleOrder, + ) - @timeit SimMetaData.HourGlass "06 Half LimitDensityAtBoundary" LimitDensityAtBoundary!(ρₙ⁺, SimConstants.ρ₀, MotionLimiter) + @timeit SimMetaData.HourGlass "05 Update To Half TimeStep" HalfTimeStep(SimMetaData, SimConstants, SimParticles, Positionₙ⁺, Velocityₙ⁺, ρₙ⁺, dρdtI, dt₂) - @timeit SimMetaData.HourGlass "Motion" ProgressMotion(SimParticles, dt₂, MotionDefinition, SimMetaData) + @timeit SimMetaData.HourGlass "06 Half LimitDensityAtBoundary" LimitDensityAtBoundary!(ρₙ⁺, SimConstants.ρ₀, MotionLimiter) - @timeit SimMetaData.HourGlass "07 Pressure" Pressure!(SimParticles.Pressure, ρₙ⁺, SimConstants) - @timeit SimMetaData.HourGlass "08 Second NeighborLoop" NeighborLoopPerParticleCUDA!( - SimDensityDiffusion, SimViscosity, SimKernel, SimMetaData, - SimConstants, SimParticles, ParticleRanges, CellLookup, - NeighborCellLists, dρdtI, Acceleration, ∇Cᵢ, ∇◌rᵢ, - max_visc, min_dt_force, - ParticleOrder = ParticleOrder, - Position = Positionₙ⁺, - Density = ρₙ⁺, - Velocity = Velocityₙ⁺, - ) + @timeit SimMetaData.HourGlass "Motion" ProgressMotion(SimParticles, dt₂, MotionDefinition, SimMetaData) - @timeit SimMetaData.HourGlass "09 Final LimitDensityAtBoundary" LimitDensityAtBoundary!(Density, SimConstants.ρ₀, MotionLimiter) + @timeit SimMetaData.HourGlass "07 Pressure" Pressure!(SimParticles.Pressure, ρₙ⁺, SimConstants) + @timeit SimMetaData.HourGlass "08 Second NeighborLoop" NeighborLoopPerParticleCUDA!( + SimDensityDiffusion, SimViscosity, SimKernel, SimMetaData, + SimConstants, SimParticles, ParticleRanges, CellLookup, + NeighborCellLists, dρdtI, Acceleration, ∇Cᵢ, ∇◌rᵢ, + max_visc, min_dt_force, + ParticleOrder = ParticleOrder, + Position = Positionₙ⁺, + Density = ρₙ⁺, + Velocity = Velocityₙ⁺, + ) - @timeit SimMetaData.HourGlass "10 Final Density" DensityEpsi!(Density, dρdtI, ρₙ⁺, dt) + @timeit SimMetaData.HourGlass "09 Final LimitDensityAtBoundary" LimitDensityAtBoundary!(Density, SimConstants.ρ₀, MotionLimiter) - @timeit SimMetaData.HourGlass "11 Update To Final TimeStep" FullTimeStep(SimMetaData, SimKernel, SimConstants, SimParticles, ∇Cᵢ, ∇◌rᵢ, dt) + @timeit SimMetaData.HourGlass "10 Final Density" DensityEpsi!(Density, dρdtI, ρₙ⁺, dt) - @timeit SimMetaData.HourGlass "12 Update MetaData" UpdateMetaData!(SimMetaData, dt) + @timeit SimMetaData.HourGlass "11 Update To Final TimeStep" FullTimeStep(SimMetaData, SimKernel, SimConstants, SimParticles, ∇Cᵢ, ∇◌rᵢ, dt) - @timeit SimMetaData.HourGlass "13 Update TimeStep" dt = FinalizeTimeStep(max_visc, min_dt_force, SimConstants, SimKernel) - end + @timeit SimMetaData.HourGlass "12 Update MetaData" UpdateMetaData!(SimMetaData, dt) + + @timeit SimMetaData.HourGlass "13 Update TimeStep" dt = FinalizeTimeStep(max_visc, min_dt_force, SimConstants, SimKernel) end return nothing