Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
2021f6c
datadeps: Fix views and implement remainder copies
jpsamaroo Aug 10, 2025
876b82d
Add type-stable spawn code paths
jpsamaroo Oct 16, 2025
f152819
datadeps: Optimize ainfo aliasing lookups
jpsamaroo Oct 16, 2025
8b53c35
datadeps: Optimize remote ArgumentWrapper lookup
jpsamaroo Oct 16, 2025
e0bc71a
thunk: Remove unnecessary scope allocations
jpsamaroo Oct 16, 2025
9dff381
test/datadeps: Remove aliasing=false tests
jpsamaroo Nov 11, 2025
bab76c3
datadeps: ainfo_arg must track ainfo -> multiple arg_w
jpsamaroo Nov 15, 2025
2ef6c26
datadeps: Fix broken ChunkView unwrapping
jpsamaroo Nov 15, 2025
13e4945
datadeps: Signature fixups and small cleanups
jpsamaroo Nov 15, 2025
293f333
datadeps: Fix aliased object detection around Chunks
jpsamaroo Dec 9, 2025
87cdbe9
datadeps: Validate ManyMemorySpan inner span lengths
jpsamaroo Dec 9, 2025
d4d9330
datadeps: Optimize RemainderAliasing move! copies
jpsamaroo Dec 9, 2025
9996206
datadeps: Overhaul Datadeps tests
jpsamaroo Dec 9, 2025
f745dbe
datadeps: Validate further that RemainderAliasing is not empty
jpsamaroo Dec 10, 2025
f2381a8
datadeps: Fix aliasing for degenerate views
jpsamaroo Dec 12, 2025
321f08a
datadeps: Fix GPU execution
jpsamaroo Sep 23, 2025
c671d24
Sch: Skip set_failed! store when result already set
jpsamaroo Dec 14, 2025
0c19fa0
scopes: Disallow constructing empty UnionScope
jpsamaroo Dec 14, 2025
6a1bf16
datadeps: Consolidate aliasing rewrap code
jpsamaroo Dec 15, 2025
e59fdd7
HaloArray: Add aliasing methods
jpsamaroo Dec 15, 2025
36b25dc
CI: Extend CUDA job time
jpsamaroo Dec 15, 2025
563caef
datadeps: Make IntervalTree find_overlapping non-recursive
jpsamaroo Dec 16, 2025
6f9f98e
datadeps: Add TID to dagdebug statements
jpsamaroo Dec 16, 2025
357a2d6
fixup! scopes: Disallow constructing empty UnionScope
jpsamaroo Dec 16, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ steps:
codecov: true

- label: Julia 1.11 (CUDA)
timeout_in_minutes: 20
timeout_in_minutes: 30
<<: *gputest
plugins:
- JuliaCI/julia#v1:
Expand Down
9 changes: 9 additions & 0 deletions ext/CUDAExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@ function Dagger.memory_space(x::CuArray)
device_uuid = CUDA.uuid(dev)
return CUDAVRAMMemorySpace(myid(), device_id, device_uuid)
end
function Dagger.aliasing(x::CuArray{T}) where T
space = Dagger.memory_space(x)
S = typeof(space)
cuptr = pointer(x)
rptr = Dagger.RemotePtr{Cvoid}(UInt64(cuptr), space)
return Dagger.ContiguousAliasing(Dagger.MemorySpan{S}(rptr, sizeof(T)*length(x)))
end

Dagger.memory_spaces(proc::CuArrayDeviceProc) = Set([CUDAVRAMMemorySpace(proc.owner, proc.device, proc.device_uuid)])
Dagger.processors(space::CUDAVRAMMemorySpace) = Set([CuArrayDeviceProc(space.owner, space.device, space.device_uuid)])
Expand Down Expand Up @@ -75,6 +82,8 @@ function with_context!(space::CUDAVRAMMemorySpace)
@assert Dagger.root_worker_id(space) == myid()
with_context!(space.device)
end
Dagger.with_context!(proc::CuArrayDeviceProc) = with_context!(proc)
Dagger.with_context!(space::CUDAVRAMMemorySpace) = with_context!(space)
function with_context(f, x)
old_ctx = context()
old_stream = stream()
Expand Down
9 changes: 9 additions & 0 deletions ext/IntelExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,13 @@ function Dagger.memory_space(x::oneArray)
return IntelVRAMMemorySpace(myid(), device_id)
end
_device_id(dev::ZeDevice) = findfirst(other_dev->other_dev === dev, collect(oneAPI.devices()))
function Dagger.aliasing(x::oneArray{T}) where T
space = Dagger.memory_space(x)
S = typeof(space)
gpu_ptr = pointer(x)
rptr = Dagger.RemotePtr{Cvoid}(UInt64(gpu_ptr), space)
return Dagger.ContiguousAliasing(Dagger.MemorySpan{S}(rptr, sizeof(T)*length(x)))
end

Dagger.memory_spaces(proc::oneArrayDeviceProc) = Set([IntelVRAMMemorySpace(proc.owner, proc.device_id)])
Dagger.processors(space::IntelVRAMMemorySpace) = Set([oneArrayDeviceProc(space.owner, space.device_id)])
Expand All @@ -68,6 +75,8 @@ function with_context!(space::IntelVRAMMemorySpace)
@assert Dagger.root_worker_id(space) == myid()
with_context!(space.device_id)
end
Dagger.with_context!(proc::oneArrayDeviceProc) = with_context!(proc)
Dagger.with_context!(space::IntelVRAMMemorySpace) = with_context!(space)
function with_context(f, x)
old_drv = driver()
old_dev = device()
Expand Down
9 changes: 9 additions & 0 deletions ext/MetalExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,13 @@ function Dagger.memory_space(x::MtlArray)
return MetalVRAMMemorySpace(myid(), device_id)
end
_device_id(dev::MtlDevice) = findfirst(other_dev->other_dev === dev, Metal.devices())
function Dagger.aliasing(x::MtlArray{T}) where T
space = Dagger.memory_space(x)
S = typeof(space)
gpu_ptr = pointer(x)
rptr = Dagger.RemotePtr{Cvoid}(UInt64(gpu_ptr), space)
return Dagger.ContiguousAliasing(Dagger.MemorySpan{S}(rptr, sizeof(T)*length(x)))
end

Dagger.memory_spaces(proc::MtlArrayDeviceProc) = Set([MetalVRAMMemorySpace(proc.owner, proc.device_id)])
Dagger.processors(space::MetalVRAMMemorySpace) = Set([MtlArrayDeviceProc(space.owner, space.device_id)])
Expand All @@ -66,6 +73,8 @@ end
function with_context!(space::MetalVRAMMemorySpace)
@assert Dagger.root_worker_id(space) == myid()
end
Dagger.with_context!(proc::MtlArrayDeviceProc) = with_context!(proc)
Dagger.with_context!(space::MetalVRAMMemorySpace) = with_context!(space)
function with_context(f, x)
with_context!(x)
return f()
Expand Down
9 changes: 9 additions & 0 deletions ext/OpenCLExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,13 @@ function Dagger.memory_space(x::CLArray)
idx = findfirst(==(queue), QUEUES)
return CLMemorySpace(myid(), idx)
end
function Dagger.aliasing(x::CLArray{T}) where T
space = Dagger.memory_space(x)
S = typeof(space)
gpu_ptr = pointer(x)
rptr = Dagger.RemotePtr{Cvoid}(UInt64(gpu_ptr), space)
return Dagger.ContiguousAliasing(Dagger.MemorySpan{S}(rptr, sizeof(T)*length(x)))
end

Dagger.memory_spaces(proc::CLArrayDeviceProc) = Set([CLMemorySpace(proc.owner, proc.device)])
Dagger.processors(space::CLMemorySpace) = Set([CLArrayDeviceProc(space.owner, space.device)])
Expand Down Expand Up @@ -71,6 +78,8 @@ function with_context!(space::CLMemorySpace)
@assert Dagger.root_worker_id(space) == myid()
with_context!(space.device)
end
Dagger.with_context!(proc::CLArrayDeviceProc) = with_context!(proc)
Dagger.with_context!(space::CLMemorySpace) = with_context!(space)
function with_context(f, x)
old_ctx = cl.context()
old_queue = cl.queue()
Expand Down
9 changes: 9 additions & 0 deletions ext/ROCExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@ end
Dagger.root_worker_id(space::ROCVRAMMemorySpace) = space.owner
Dagger.memory_space(x::ROCArray) =
ROCVRAMMemorySpace(myid(), AMDGPU.device(x).device_id)
function Dagger.aliasing(x::ROCArray{T}) where T
space = Dagger.memory_space(x)
S = typeof(space)
gpu_ptr = pointer(x)
rptr = Dagger.RemotePtr{Cvoid}(UInt64(gpu_ptr), space)
return Dagger.ContiguousAliasing(Dagger.MemorySpan{S}(rptr, sizeof(T)*length(x)))
end

Dagger.memory_spaces(proc::ROCArrayDeviceProc) = Set([ROCVRAMMemorySpace(proc.owner, proc.device_id)])
Dagger.processors(space::ROCVRAMMemorySpace) = Set([ROCArrayDeviceProc(space.owner, space.device_id)])
Expand Down Expand Up @@ -67,6 +74,8 @@ function with_context!(space::ROCVRAMMemorySpace)
@assert Dagger.root_worker_id(space) == myid()
with_context!(space.device_id)
end
Dagger.with_context!(proc::ROCArrayDeviceProc) = with_context!(proc)
Dagger.with_context!(space::ROCVRAMMemorySpace) = with_context!(space)
function with_context(f, x)
old_ctx = context()
old_device = AMDGPU.device()
Expand Down
10 changes: 9 additions & 1 deletion src/Dagger.jl
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ include("utils/fetch.jl")
include("utils/chunks.jl")
include("utils/logging.jl")
include("submission.jl")
abstract type MemorySpace end
include("utils/memory-span.jl")
include("utils/interval_tree.jl")
include("memory-spaces.jl")

# Task scheduling
Expand All @@ -83,7 +86,12 @@ include("utils/caching.jl")
include("sch/Sch.jl"); using .Sch

# Data dependency task queue
include("datadeps.jl")
include("datadeps/aliasing.jl")
include("datadeps/chunkview.jl")
include("datadeps/remainders.jl")
include("datadeps/queue.jl")

# Stencils
include("utils/haloarray.jl")
include("stencil.jl")

Expand Down
32 changes: 31 additions & 1 deletion src/argument.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ function pos_kw(pos::ArgPosition)
@assert pos.kw != :NULL
return pos.kw
end

mutable struct Argument
pos::ArgPosition
value
Expand All @@ -41,6 +42,35 @@ function Base.iterate(arg::Argument, state::Bool)
return nothing
end
end

Base.copy(arg::Argument) = Argument(ArgPosition(arg.pos), arg.value)
chunktype(arg::Argument) = chunktype(value(arg))

mutable struct TypedArgument{T}
pos::ArgPosition
value::T
end
TypedArgument(pos::Integer, value::T) where T = TypedArgument{T}(ArgPosition(true, pos, :NULL), value)
TypedArgument(kw::Symbol, value::T) where T = TypedArgument{T}(ArgPosition(false, 0, kw), value)
Base.setproperty!(arg::TypedArgument, name::Symbol, value::T) where T =
throw(ArgumentError("Cannot set properties of TypedArgument"))
ispositional(arg::TypedArgument) = ispositional(arg.pos)
iskw(arg::TypedArgument) = iskw(arg.pos)
pos_idx(arg::TypedArgument) = pos_idx(arg.pos)
pos_kw(arg::TypedArgument) = pos_kw(arg.pos)
raw_position(arg::TypedArgument) = raw_position(arg.pos)
value(arg::TypedArgument) = arg.value
valuetype(arg::TypedArgument{T}) where T = T
Base.iterate(arg::TypedArgument) = (arg.pos, true)
function Base.iterate(arg::TypedArgument, state::Bool)
if state
return (arg.value, false)
else
return nothing
end
end
Base.copy(arg::TypedArgument{T}) where T = TypedArgument{T}(ArgPosition(arg.pos), arg.value)
chunktype(arg::TypedArgument) = chunktype(value(arg))

Argument(arg::TypedArgument) = Argument(arg.pos, arg.value)

const AnyArgument = Union{Argument, TypedArgument}
Loading
Loading