diff --git a/src/array.jl b/src/array.jl index 40a6bfa1d..a73627f75 100644 --- a/src/array.jl +++ b/src/array.jl @@ -31,7 +31,7 @@ function check_eltype(T) Base.isbitsunion(T) && error("MtlArray does not yet support isbits-union arrays") contains_eltype(T, Float64) && error("Metal does not support Float64 values, try using Float32 instead") contains_eltype(T, Int128) && error("Metal does not support Int128 values, try using Int64 instead") - contains_eltype(T, UInt128) && error("Metal does not support UInt128 values, try using UInt64 instead") + return contains_eltype(T, UInt128) && error("Metal does not support UInt128 values, try using UInt64 instead") end """ @@ -43,14 +43,14 @@ end See the Array Programming section of the Metal.jl docs for more details. """ -mutable struct MtlArray{T,N,S} <: AbstractGPUArray{T,N} +mutable struct MtlArray{T, N, S} <: AbstractGPUArray{T, N} data::DataRef{<:MTLBuffer} maxsize::Int # maximum data size in bytes; excluding any selector bytes offset::Int # offset of the data in the buffer, in number of elements dims::Dims{N} - function MtlArray{T,N,S}(::UndefInitializer, dims::Dims{N}) where {T,N,S} + function MtlArray{T, N, S}(::UndefInitializer, dims::Dims{N}) where {T, N, S} check_eltype(T) maxsize = prod(dims) * sizeof(T) @@ -75,44 +75,48 @@ mutable struct MtlArray{T,N,S} <: AbstractGPUArray{T,N} end data[].label = "MtlArray{$(T),$(N),$(S)}(dims=$dims)" - obj = new{T,N,S}(data, maxsize, 0, dims) - finalizer(unsafe_free!, obj) + obj = new{T, N, S}(data, maxsize, 0, dims) + return finalizer(unsafe_free!, obj) end - function MtlArray{T,N,S}(data::DataRef{<:MTLBuffer}, dims::Dims{N}; - maxsize::Int=prod(dims) * sizeof(T), offset::Int=0) where {T,N,S} + function MtlArray{T, N, S}( + data::DataRef{<:MTLBuffer}, dims::Dims{N}; + maxsize::Int = prod(dims) * sizeof(T), offset::Int = 0 + ) where {T, N, S} check_eltype(T) storagemode = convert(MTL.MTLStorageMode, S) if storagemode != data[].storageMode error("Storage mode mismatch: expected $S, got $(data[].storageMode)") end obj = new{T, N, S}(copy(data), maxsize, offset, dims) - finalizer(unsafe_free!, obj) + return finalizer(unsafe_free!, obj) end - function MtlArray{T,N}(data::DataRef{<:MTLBuffer}, dims::Dims{N}; - maxsize::Int=prod(dims) * sizeof(T), offset::Int=0) where {T,N} + function MtlArray{T, N}( + data::DataRef{<:MTLBuffer}, dims::Dims{N}; + maxsize::Int = prod(dims) * sizeof(T), offset::Int = 0 + ) where {T, N} check_eltype(T) storagemode = data[].storageMode obj = if storagemode == MTL.MTLStorageModeShared - new{T,N,SharedStorage}(copy(data), maxsize, offset, dims) + new{T, N, SharedStorage}(copy(data), maxsize, offset, dims) elseif storagemode == MTL.MTLStorageModeManaged @warn "`ManagedStorage` is no longer supported with `MtlArray`s. Instead, use `SharedStorage` or use the Metal api directly from `Metal.MTL`." - new{T,N,ManagedStorage}(copy(data), maxsize, offset, dims) + new{T, N, ManagedStorage}(copy(data), maxsize, offset, dims) elseif storagemode == MTL.MTLStorageModePrivate - new{T,N,PrivateStorage}(copy(data), maxsize, offset, dims) + new{T, N, PrivateStorage}(copy(data), maxsize, offset, dims) elseif storagemode == MTL.MTLStorageModeMemoryless - new{T,N,Memoryless}(copy(data), maxsize, offset, dims) + new{T, N, Memoryless}(copy(data), maxsize, offset, dims) end - finalizer(unsafe_free!, obj) + return finalizer(unsafe_free!, obj) end end # Create MtlArray from MTLBuffer -function MtlArray{T,N}(buf::B, dims::Dims{N}; kwargs...) where {B<:MTLBuffer,T,N} +function MtlArray{T, N}(buf::B, dims::Dims{N}; kwargs...) where {B <: MTLBuffer, T, N} data = DataRef(buf) do buf free(buf) end - return MtlArray{T,N}(data, dims; kwargs...) + return MtlArray{T, N}(data, dims; kwargs...) end GPUArrays.storage(a::MtlArray) = a.data @@ -125,7 +129,7 @@ Get the Metal device for an MtlArray. device(A::MtlArray) = A.data[].device storagemode(x::MtlArray) = storagemode(typeof(x)) -storagemode(::Type{<:MtlArray{<:Any,<:Any,S}}) where {S} = S +storagemode(::Type{<:MtlArray{<:Any, <:Any, S}}) where {S} = S """ is_shared(A::MtlArray)::Bool @@ -168,7 +172,7 @@ for MtlArray{T,1,S}. See also `Vector`(@ref), and the Array Programming section of the Metal.jl docs for more details. """ -const MtlVector{T,S} = MtlArray{T,1,S} +const MtlVector{T, S} = MtlArray{T, 1, S} """ MtlMatrix{T,S} <: AbstractGPUMatrix{T} @@ -178,7 +182,7 @@ for MtlArray{T,2,S}. See also `Matrix`(@ref), and the Array Programming section of the Metal.jl docs for more details. """ -const MtlMatrix{T,S} = MtlArray{T,2,S} +const MtlMatrix{T, S} = MtlArray{T, 2, S} """ MtlVecOrMat{T,S} @@ -188,54 +192,63 @@ MtlMatrix or an MtlVector. See also `VecOrMat`(@ref) for examples. """ -const MtlVecOrMat{T,S} = Union{MtlVector{T,S},MtlMatrix{T,S}} +const MtlVecOrMat{T, S} = Union{MtlVector{T, S}, MtlMatrix{T, S}} -# default to private memory -const DefaultStorageMode = let str = @load_preference("default_storage", "private") +# default storage mode: "auto" selects based on unified memory architecture +# - UMA devices (Apple Silicon): SharedStorage (zero-copy CPU access) +# - Non-UMA devices (Intel discrete GPU): PrivateStorage +# - Non-Apple platforms: PrivateStorage (Metal not available) +const DefaultStorageMode = let str = @load_preference("default_storage", "auto") if str == "private" PrivateStorage elseif str == "shared" SharedStorage + elseif str == "auto" + if Sys.isapple() && !isempty(devices()) + MTLDevice(1).hasUnifiedMemory ? SharedStorage : PrivateStorage + else + PrivateStorage + end else - error("unknown default storage mode: $default_storage") + error("unknown default storage mode: $str") end end -MtlArray{T,N}(::UndefInitializer, dims::Dims{N}) where {T,N} = - MtlArray{T,N,DefaultStorageMode}(undef, dims) +MtlArray{T, N}(::UndefInitializer, dims::Dims{N}) where {T, N} = + MtlArray{T, N, DefaultStorageMode}(undef, dims) # storage, type and dimensionality specified -MtlArray{T,N,S}(::UndefInitializer, dims::NTuple{N,Integer}) where {T,N,S} = - MtlArray{T,N,S}(undef, convert(Tuple{Vararg{Int}}, dims)) -MtlArray{T,N,S}(::UndefInitializer, dims::Vararg{Integer,N}) where {T,N,S} = - MtlArray{T,N,S}(undef, convert(Tuple{Vararg{Int}}, dims)) +MtlArray{T, N, S}(::UndefInitializer, dims::NTuple{N, Integer}) where {T, N, S} = + MtlArray{T, N, S}(undef, convert(Tuple{Vararg{Int}}, dims)) +MtlArray{T, N, S}(::UndefInitializer, dims::Vararg{Integer, N}) where {T, N, S} = + MtlArray{T, N, S}(undef, convert(Tuple{Vararg{Int}}, dims)) # type and dimensionality specified -MtlArray{T,N}(::UndefInitializer, dims::NTuple{N,Integer}) where {T,N} = - MtlArray{T,N}(undef, convert(Tuple{Vararg{Int}}, dims)) -MtlArray{T,N}(::UndefInitializer, dims::Vararg{Integer,N}) where {T,N} = - MtlArray{T,N}(undef, convert(Tuple{Vararg{Int}}, dims)) +MtlArray{T, N}(::UndefInitializer, dims::NTuple{N, Integer}) where {T, N} = + MtlArray{T, N}(undef, convert(Tuple{Vararg{Int}}, dims)) +MtlArray{T, N}(::UndefInitializer, dims::Vararg{Integer, N}) where {T, N} = + MtlArray{T, N}(undef, convert(Tuple{Vararg{Int}}, dims)) # only type specified -MtlArray{T}(::UndefInitializer, dims::NTuple{N,Integer}) where {T,N} = - MtlArray{T,N}(undef, convert(Tuple{Vararg{Int}}, dims)) -MtlArray{T}(::UndefInitializer, dims::Vararg{Integer,N}) where {T,N} = - MtlArray{T,N}(undef, convert(Tuple{Vararg{Int}}, dims)) +MtlArray{T}(::UndefInitializer, dims::NTuple{N, Integer}) where {T, N} = + MtlArray{T, N}(undef, convert(Tuple{Vararg{Int}}, dims)) +MtlArray{T}(::UndefInitializer, dims::Vararg{Integer, N}) where {T, N} = + MtlArray{T, N}(undef, convert(Tuple{Vararg{Int}}, dims)) # empty vector constructor -MtlArray{T,1,S}() where {T,S} = MtlArray{T,1,S}(undef, 0) -MtlArray{T,1}() where {T} = MtlArray{T,1}(undef, 0) +MtlArray{T, 1, S}() where {T, S} = MtlArray{T, 1, S}(undef, 0) +MtlArray{T, 1}() where {T} = MtlArray{T, 1}(undef, 0) -Base.similar(a::MtlArray{T,N,S}; storage=S) where {T,N,S} = - MtlArray{T,N,storage}(undef, size(a)) -Base.similar(::MtlArray{T,<:Any,S}, dims::Base.Dims{N}; storage=S) where {T,N,S} = - MtlArray{T,N,storage}(undef, dims) -Base.similar(::MtlArray{<:Any,<:Any,S}, ::Type{T}, dims::Base.Dims{N}; storage=S) where {T,N,S} = - MtlArray{T,N,storage}(undef, dims) +Base.similar(a::MtlArray{T, N, S}; storage = S) where {T, N, S} = + MtlArray{T, N, storage}(undef, size(a)) +Base.similar(::MtlArray{T, <:Any, S}, dims::Base.Dims{N}; storage = S) where {T, N, S} = + MtlArray{T, N, storage}(undef, dims) +Base.similar(::MtlArray{<:Any, <:Any, S}, ::Type{T}, dims::Base.Dims{N}; storage = S) where {T, N, S} = + MtlArray{T, N, storage}(undef, dims) function Base.copy(a::MtlArray) b = similar(a) - @inbounds copyto!(b, a) + return @inbounds copyto!(b, a) end @@ -246,7 +259,7 @@ Base.elsize(::Type{<:MtlArray{T}}) where {T} = sizeof(T) Base.size(x::MtlArray) = x.dims Base.sizeof(x::MtlArray) = Base.elsize(x) * length(x) -@inline function Base.pointer(x::MtlArray{T}, i::Integer=1; storage=PrivateStorage) where {T} +@inline function Base.pointer(x::MtlArray{T}, i::Integer = 1; storage = PrivateStorage) where {T} PT = if storage == PrivateStorage MtlPtr{T} elseif storage == SharedStorage @@ -254,73 +267,73 @@ Base.sizeof(x::MtlArray) = Base.elsize(x) * length(x) else error("unknown memory type") end - Base.unsafe_convert(PT, x) + Base._memory_offset(x, i) + return Base.unsafe_convert(PT, x) + Base._memory_offset(x, i) end function Base.unsafe_convert(::Type{MtlPtr{T}}, x::MtlArray) where {T} buf = x.data[] - MtlPtr{T}(buf, x.offset * Base.elsize(x)) + return MtlPtr{T}(buf, x.offset * Base.elsize(x)) end -function Base.unsafe_convert(::Type{Ptr{S}}, x::MtlArray{T}) where {S,T} +function Base.unsafe_convert(::Type{Ptr{S}}, x::MtlArray{T}) where {S, T} if is_private(x) throw(ArgumentError("cannot take the CPU address of a $(typeof(x))")) end synchronize() buf = x.data[] - convert(Ptr{T}, buf) + x.offset * Base.elsize(x) + return convert(Ptr{T}, buf) + x.offset * Base.elsize(x) end ## indexing -function Base.getindex(x::MtlArray{T,N,S}, I::Int) where {T,N,S<:SharedStorage} +function Base.getindex(x::MtlArray{T, N, S}, I::Int) where {T, N, S <: SharedStorage} @boundscheck checkbounds(x, I) - unsafe_load(pointer(x, I; storage=S)) + return unsafe_load(pointer(x, I; storage = S)) end -function Base.setindex!(x::MtlArray{T,N,S}, v, I::Int) where {T,N,S<:SharedStorage} +function Base.setindex!(x::MtlArray{T, N, S}, v, I::Int) where {T, N, S <: SharedStorage} @boundscheck checkbounds(x, I) - unsafe_store!(pointer(x, I; storage=S), v) + return unsafe_store!(pointer(x, I; storage = S), v) end ## interop with other arrays -@inline function MtlArray{T,N}(xs::AbstractArray{T,N}) where {T,N} - A = MtlArray{T,N}(undef, size(xs)) +@inline function MtlArray{T, N}(xs::AbstractArray{T, N}) where {T, N} + A = MtlArray{T, N}(undef, size(xs)) @inline copyto!(A, convert(Array{T}, xs)) return A end -@inline function MtlArray{T,N,S}(xs::AbstractArray{T,N}) where {T,N,S} - A = MtlArray{T,N,S}(undef, size(xs)) +@inline function MtlArray{T, N, S}(xs::AbstractArray{T, N}) where {T, N, S} + A = MtlArray{T, N, S}(undef, size(xs)) @inline copyto!(A, convert(Array{T}, xs)) return A end -MtlArray{T,N}(xs::AbstractArray{OT,N}) where {T,N,OT} = MtlArray{T,N}(map(T, xs)) -MtlArray{T,N,S}(xs::AbstractArray{OT,N}) where {T,N,S,OT} = MtlArray{T,N,S}(map(T, xs)) +MtlArray{T, N}(xs::AbstractArray{OT, N}) where {T, N, OT} = MtlArray{T, N}(map(T, xs)) +MtlArray{T, N, S}(xs::AbstractArray{OT, N}) where {T, N, S, OT} = MtlArray{T, N, S}(map(T, xs)) # underspecified constructors -MtlArray{T}(xs::AbstractArray{OT,N}) where {T,N,OT} = MtlArray{T,N}(xs) -(::Type{MtlArray{T,N} where T})(x::AbstractArray{OT,N}) where {OT,N} = MtlArray{OT,N}(x) -MtlArray(A::AbstractArray{T,N}) where {T,N} = MtlArray{T,N}(A) +MtlArray{T}(xs::AbstractArray{OT, N}) where {T, N, OT} = MtlArray{T, N}(xs) +(::Type{MtlArray{T, N} where {T}})(x::AbstractArray{OT, N}) where {OT, N} = MtlArray{OT, N}(x) +MtlArray(A::AbstractArray{T, N}) where {T, N} = MtlArray{T, N}(A) # copy xs to match Array behavior with same storage mode -MtlArray{T,N,S}(xs::MtlArray{T,N,S}) where {T,N,S} = copy(xs) +MtlArray{T, N, S}(xs::MtlArray{T, N, S}) where {T, N, S} = copy(xs) ## derived types # wrapped arrays: can be used in kernels -const WrappedMtlArray{T,N} = Union{MtlArray{T,N},WrappedArray{T,N,MtlArray,MtlArray{T,N}}} -const WrappedMtlVector{T} = WrappedMtlArray{T,1} -const WrappedMtlMatrix{T} = WrappedMtlArray{T,2} -const WrappedMtlVecOrMat{T} = Union{WrappedMtlVector{T},WrappedMtlMatrix{T}} +const WrappedMtlArray{T, N} = Union{MtlArray{T, N}, WrappedArray{T, N, MtlArray, MtlArray{T, N}}} +const WrappedMtlVector{T} = WrappedMtlArray{T, 1} +const WrappedMtlMatrix{T} = WrappedMtlArray{T, 2} +const WrappedMtlVecOrMat{T} = Union{WrappedMtlVector{T}, WrappedMtlMatrix{T}} ## conversions -Base.convert(::Type{T}, x::T) where T <: MtlArray = x +Base.convert(::Type{T}, x::T) where {T <: MtlArray} = x ## interop with C libraries @@ -338,14 +351,16 @@ Base.cconvert(::Type{<:id}, x::MtlArray) = x.data[] ## interop with CPU arrays -Base.collect(x::MtlArray{T,N}) where {T,N} = copyto!(Array{T,N}(undef, size(x)), x) +Base.collect(x::MtlArray{T, N}) where {T, N} = copyto!(Array{T, N}(undef, size(x)), x) ## memory copying # CPU -> GPU -function Base.copyto!(dest::MtlArray{T}, doffs::Integer, src::Array{T}, soffs::Integer, - n::Integer) where T +function Base.copyto!( + dest::MtlArray{T}, doffs::Integer, src::Array{T}, soffs::Integer, + n::Integer + ) where {T} (n == 0 || sizeof(T) == 0) && return dest @boundscheck checkbounds(dest, doffs) @boundscheck checkbounds(dest, doffs + n - 1) @@ -359,8 +374,10 @@ Base.copyto!(dest::MtlArray{T}, src::Array{T}) where {T} = copyto!(dest, 1, src, 1, length(src)) # GPU -> CPU -function Base.copyto!(dest::Array{T}, doffs::Integer, src::MtlArray{T}, soffs::Integer, - n::Integer) where T +function Base.copyto!( + dest::Array{T}, doffs::Integer, src::MtlArray{T}, soffs::Integer, + n::Integer + ) where {T} (n == 0 || sizeof(T) == 0) && return dest @boundscheck checkbounds(dest, doffs) @boundscheck checkbounds(dest, doffs + n - 1) @@ -374,8 +391,10 @@ Base.copyto!(dest::Array{T}, src::MtlArray{T}) where {T} = copyto!(dest, 1, src, 1, length(src)) # GPU -> GPU -function Base.copyto!(dest::MtlArray{T}, doffs::Integer, src::MtlArray{T}, soffs::Integer, - n::Integer) where T +function Base.copyto!( + dest::MtlArray{T}, doffs::Integer, src::MtlArray{T}, soffs::Integer, + n::Integer + ) where {T} (n == 0 || sizeof(T) == 0) && return dest @boundscheck checkbounds(dest, doffs) @boundscheck checkbounds(dest, doffs + n - 1) @@ -394,7 +413,7 @@ Base.copyto!(dest::MtlArray{T}, src::MtlArray{T}) where {T} = copyto!(dest, 1, src, 1, length(src)) # CPU -> GPU -function Base.unsafe_copyto!(dev::MTLDevice, dest::MtlArray{T}, doffs, src::Array{T}, soffs, n) where T +function Base.unsafe_copyto!(dev::MTLDevice, dest::MtlArray{T}, doffs, src::Array{T}, soffs, n) where {T} # these copies are implemented using pure memcpy's, not API calls, so aren't ordered. synchronize() GC.@preserve src dest unsafe_copyto!(dev, pointer(dest, doffs), pointer(src, soffs), n) @@ -404,15 +423,15 @@ function Base.unsafe_copyto!(dev::MTLDevice, dest::MtlArray{T}, doffs, src::Arra end return dest end -function Base.unsafe_copyto!(::MTLDevice, dest::MtlArray{T,<:Any,Metal.SharedStorage}, doffs, src::Array{T}, soffs, n) where T +function Base.unsafe_copyto!(::MTLDevice, dest::MtlArray{T, <:Any, Metal.SharedStorage}, doffs, src::Array{T}, soffs, n) where {T} # these copies are implemented using pure memcpy's, not API calls, so aren't ordered. synchronize() - GC.@preserve src dest unsafe_copyto!(pointer(unsafe_wrap(Array,dest), doffs), pointer(src, soffs), n) + GC.@preserve src dest unsafe_copyto!(pointer(unsafe_wrap(Array, dest), doffs), pointer(src, soffs), n) return dest end # GPU -> CPU -function Base.unsafe_copyto!(dev::MTLDevice, dest::Array{T}, doffs, src::MtlArray{T}, soffs, n) where T +function Base.unsafe_copyto!(dev::MTLDevice, dest::Array{T}, doffs, src::MtlArray{T}, soffs, n) where {T} # these copies are implemented using pure memcpy's, not API calls, so aren't ordered. synchronize() GC.@preserve src dest unsafe_copyto!(dev, pointer(dest, doffs), pointer(src, soffs), n) @@ -422,15 +441,15 @@ function Base.unsafe_copyto!(dev::MTLDevice, dest::Array{T}, doffs, src::MtlArra end return dest end -function Base.unsafe_copyto!(::MTLDevice, dest::Array{T}, doffs, src::MtlArray{T,<:Any,Metal.SharedStorage}, soffs, n) where T +function Base.unsafe_copyto!(::MTLDevice, dest::Array{T}, doffs, src::MtlArray{T, <:Any, Metal.SharedStorage}, soffs, n) where {T} # these copies are implemented using pure memcpy's, not API calls, so aren't ordered. synchronize() - GC.@preserve src dest unsafe_copyto!(pointer(dest, doffs), pointer(unsafe_wrap(Array,src), soffs), n) + GC.@preserve src dest unsafe_copyto!(pointer(dest, doffs), pointer(unsafe_wrap(Array, src), soffs), n) return dest end # GPU -> GPU -function Base.unsafe_copyto!(dev::MTLDevice, dest::MtlArray{T}, doffs, src::MtlArray{T}, soffs, n) where T +function Base.unsafe_copyto!(dev::MTLDevice, dest::MtlArray{T}, doffs, src::MtlArray{T}, soffs, n) where {T} # these copies are implemented using pure memcpy's, not API calls, so aren't ordered. synchronize() GC.@preserve src dest unsafe_copyto!(dev, pointer(dest, doffs), pointer(src, soffs), n) @@ -440,10 +459,10 @@ function Base.unsafe_copyto!(dev::MTLDevice, dest::MtlArray{T}, doffs, src::MtlA end return dest end -function Base.unsafe_copyto!(::MTLDevice, dest::MtlArray{T,<:Any,Metal.SharedStorage}, doffs, src::MtlArray{T,<:Any,Metal.SharedStorage}, soffs, n) where T +function Base.unsafe_copyto!(::MTLDevice, dest::MtlArray{T, <:Any, Metal.SharedStorage}, doffs, src::MtlArray{T, <:Any, Metal.SharedStorage}, soffs, n) where {T} # these copies are implemented using pure memcpy's, not API calls, so aren't ordered. synchronize() - GC.@preserve src dest unsafe_copyto!(pointer(unsafe_wrap(Array,dest), doffs), pointer(unsafe_wrap(Array,src), soffs), n) + GC.@preserve src dest unsafe_copyto!(pointer(unsafe_wrap(Array, dest), doffs), pointer(unsafe_wrap(Array, src), soffs), n) return dest end @@ -453,16 +472,16 @@ end # We don't convert isbits types in `adapt`, since they are already # considered GPU-compatible. -Adapt.adapt_storage(::Type{MtlArray}, xs::AT) where {AT<:AbstractArray} = +Adapt.adapt_storage(::Type{MtlArray}, xs::AT) where {AT <: AbstractArray} = isbitstype(AT) ? xs : convert(MtlArray, xs) # if specific type parameters are specified, preserve those -Adapt.adapt_storage(::Type{<:MtlArray{T}}, xs::AT) where {T,AT<:AbstractArray} = +Adapt.adapt_storage(::Type{<:MtlArray{T}}, xs::AT) where {T, AT <: AbstractArray} = isbitstype(AT) ? xs : convert(MtlArray{T}, xs) -Adapt.adapt_storage(::Type{<:MtlArray{T,N}}, xs::AT) where {T,N,AT<:AbstractArray} = - isbitstype(AT) ? xs : convert(MtlArray{T,N}, xs) -Adapt.adapt_storage(::Type{<:MtlArray{T,N,S}}, xs::AT) where {T,N,S,AT<:AbstractArray} = - isbitstype(AT) ? xs : convert(MtlArray{T,N,S}, xs) +Adapt.adapt_storage(::Type{<:MtlArray{T, N}}, xs::AT) where {T, N, AT <: AbstractArray} = + isbitstype(AT) ? xs : convert(MtlArray{T, N}, xs) +Adapt.adapt_storage(::Type{<:MtlArray{T, N, S}}, xs::AT) where {T, N, S, AT <: AbstractArray} = + isbitstype(AT) ? xs : convert(MtlArray{T, N, S}, xs) ## opinionated gpu array adaptor @@ -471,14 +490,14 @@ Adapt.adapt_storage(::Type{<:MtlArray{T,N,S}}, xs::AT) where {T,N,S,AT<:Abstract struct MtlArrayAdaptor{S} end -Adapt.adapt_storage(::MtlArrayAdaptor{S}, xs::AbstractArray{T,N}) where {T,N,S} = - isbits(xs) ? xs : MtlArray{T,N,S}(xs) +Adapt.adapt_storage(::MtlArrayAdaptor{S}, xs::AbstractArray{T, N}) where {T, N, S} = + isbits(xs) ? xs : MtlArray{T, N, S}(xs) -Adapt.adapt_storage(::MtlArrayAdaptor{S}, xs::AbstractArray{T,N}) where {T<:Float64,N,S} = - isbits(xs) ? xs : MtlArray{Float32,N,S}(xs) +Adapt.adapt_storage(::MtlArrayAdaptor{S}, xs::AbstractArray{T, N}) where {T <: Float64, N, S} = + isbits(xs) ? xs : MtlArray{Float32, N, S}(xs) -Adapt.adapt_storage(::MtlArrayAdaptor{S}, xs::AbstractArray{T,N}) where {T<:Complex{<:Float64},N,S} = - isbits(xs) ? xs : MtlArray{ComplexF32,N,S}(xs) +Adapt.adapt_storage(::MtlArrayAdaptor{S}, xs::AbstractArray{T, N}) where {T <: Complex{<:Float64}, N, S} = + isbits(xs) ? xs : MtlArray{ComplexF32, N, S}(xs) """ mtl(A; storage=Metal.PrivateStorage) @@ -516,34 +535,34 @@ julia> MtlArray(1:3) 3 ``` """ -@inline mtl(xs; storage=DefaultStorageMode) = adapt(MtlArrayAdaptor{storage}(), xs) +@inline mtl(xs; storage = DefaultStorageMode) = adapt(MtlArrayAdaptor{storage}(), xs) ## utilities for (fname, felt) in ((:zeros, :zero), (:ones, :one)) @eval begin - $fname(::Type{T}, dims::Base.Dims{N}; storage=DefaultStorageMode) where {T,N} = fill!(MtlArray{T,N,storage}(undef, dims), $felt(T)) - $fname(::Type{T}, dims...; storage=DefaultStorageMode) where {T} = fill!(MtlArray{T,length(dims),storage}(undef, dims), $felt(T)) - $fname(dims...; storage=DefaultStorageMode) = fill!(MtlArray{Float32,length(dims),storage}(undef, dims), $felt(Float32)) + $fname(::Type{T}, dims::Base.Dims{N}; storage = DefaultStorageMode) where {T, N} = fill!(MtlArray{T, N, storage}(undef, dims), $felt(T)) + $fname(::Type{T}, dims...; storage = DefaultStorageMode) where {T} = fill!(MtlArray{T, length(dims), storage}(undef, dims), $felt(T)) + $fname(dims...; storage = DefaultStorageMode) = fill!(MtlArray{Float32, length(dims), storage}(undef, dims), $felt(Float32)) end end -fill(v::T, dims::Base.Dims{N}; storage=DefaultStorageMode) where {T,N} = fill!(MtlArray{T,N,storage}(undef, dims), v) -fill(v::T, dims...; storage=DefaultStorageMode) where T = fill!(MtlArray{T,length(dims),storage}(undef, dims), v) +fill(v::T, dims::Base.Dims{N}; storage = DefaultStorageMode) where {T, N} = fill!(MtlArray{T, N, storage}(undef, dims), v) +fill(v::T, dims...; storage = DefaultStorageMode) where {T} = fill!(MtlArray{T, length(dims), storage}(undef, dims), v) # optimized implementation of `fill!` for types that are directly supported by fillbuffer -function Base.fill!(A::MtlArray{T}, val) where T <: Union{UInt8,Int8} +function Base.fill!(A::MtlArray{T}, val) where {T <: Union{UInt8, Int8}} B = convert(T, val) unsafe_fill!(device(A), pointer(A), B, length(A)) - A + return A end ## derived arrays -function GPUArrays.derive(::Type{T}, a::MtlArray{<:Any,<:Any,S}, dims::Dims{N}, offset::Int) where {T,N,S} +function GPUArrays.derive(::Type{T}, a::MtlArray{<:Any, <:Any, S}, dims::Dims{N}, offset::Int) where {T, N, S} offset = (a.offset * Base.elsize(a)) ÷ sizeof(T) + offset - MtlArray{T,N,S}(a.data, dims; a.maxsize, offset) + return MtlArray{T, N, S}(a.data, dims; a.maxsize, offset) end @@ -552,13 +571,13 @@ end device(a::SubArray) = device(parent(a)) # pointer conversions -function Base.unsafe_convert(::Type{MTL.MTLBuffer}, V::SubArray{T,N,P,<:Tuple{Vararg{Base.RangeIndex}}}) where {T,N,P} +function Base.unsafe_convert(::Type{MTL.MTLBuffer}, V::SubArray{T, N, P, <:Tuple{Vararg{Base.RangeIndex}}}) where {T, N, P} return Base.unsafe_convert(MTL.MTLBuffer, parent(V)) + - Base._memory_offset(V.parent, map(first, V.indices)...) + Base._memory_offset(V.parent, map(first, V.indices)...) end -function Base.unsafe_convert(::Type{MTL.MTLBuffer}, V::SubArray{T,N,P,<:Tuple{Vararg{Union{Base.RangeIndex,Base.ReshapedUnitRange}}}}) where {T,N,P} +function Base.unsafe_convert(::Type{MTL.MTLBuffer}, V::SubArray{T, N, P, <:Tuple{Vararg{Union{Base.RangeIndex, Base.ReshapedUnitRange}}}}) where {T, N, P} return Base.unsafe_convert(MTL.MTLBuffer, parent(V)) + - (Base.first_index(V) - 1) * sizeof(T) + (Base.first_index(V) - 1) * sizeof(T) end @@ -577,22 +596,24 @@ function Base.unsafe_wrap( arr::MtlArray{T, N}, dims = size(arr); own::Bool = false ) where {T, N} - return unsafe_wrap(Array{T,N}, pointer(arr), dims; own) + return unsafe_wrap(Array{T, N}, pointer(arr), dims; own) end -function Base.unsafe_wrap(t::Type{<:Array{T}}, buf::MTLBuffer, dims; own=false) where T +function Base.unsafe_wrap(t::Type{<:Array{T}}, buf::MTLBuffer, dims; own = false) where {T} ptr = convert(Ptr{T}, buf) return unsafe_wrap(t, ptr, dims; own) end -function Base.unsafe_wrap(t::Type{<:Array{T}}, ptr::MtlPtr{T}, dims; own=false) where T +function Base.unsafe_wrap(t::Type{<:Array{T}}, ptr::MtlPtr{T}, dims; own = false) where {T} return unsafe_wrap(t, convert(Ptr{T}, ptr), dims; own) end -function Base.unsafe_wrap(A::Type{<:MtlArray{T,N}}, arr::Array, dims=size(arr); - dev=device(), kwargs...) where {T,N} +function Base.unsafe_wrap( + A::Type{<:MtlArray{T, N}}, arr::Array, dims = size(arr); + dev = device(), kwargs... + ) where {T, N} GC.@preserve arr begin - buf = MTLBuffer(dev, prod(dims) * sizeof(T), pointer(arr); nocopy=true, kwargs...) + buf = MTLBuffer(dev, prod(dims) * sizeof(T), pointer(arr); nocopy = true, kwargs...) return A(buf, Dims(dims)) end end @@ -606,7 +627,7 @@ Resize `a` to contain `n` elements. If `n` is smaller than the current collectio the first `n` elements will be retained. If `n` is larger, the new elements are not guaranteed to be initialized. """ -function Base.resize!(A::MtlVector{T}, n::Integer) where T +function Base.resize!(A::MtlVector{T}, n::Integer) where {T} # TODO: add additional space to allow for quicker resizing maxsize = n * sizeof(T) bufsize = if isbitstype(T) @@ -618,7 +639,7 @@ function Base.resize!(A::MtlVector{T}, n::Integer) where T # replace the data with a new one. this 'unshares' the array. # as a result, we can safely support resizing unowned buffers. - buf = alloc(device(A), bufsize; storage=storagemode(A)) + buf = alloc(device(A), bufsize; storage = storagemode(A)) ptr = MtlPtr{T}(buf) m = min(length(A), n) if m > 0 @@ -634,5 +655,5 @@ function Base.resize!(A::MtlVector{T}, n::Integer) where T A.maxsize = maxsize A.offset = 0 - A + return A end diff --git a/test/array.jl b/test/array.jl index e64dbf9bb..9111f35e1 100644 --- a/test/array.jl +++ b/test/array.jl @@ -2,604 +2,632 @@ STORAGEMODES = [Metal.PrivateStorage, Metal.SharedStorage] @testset "array" begin -let arr = MtlVector{Int}(undef, 1) - @test sizeof(arr) == 8 - @test length(arr) == 1 - @test eltype(arr) == Int -end - -let arr = MtlVector{Int}(undef, 0) - @test sizeof(arr) == 0 -end - -@testset "constructors" begin - xs = MtlArray{Int8}(undef, 2, 3) - @test device(xs) == device() - @test Base.elsize(xs) == sizeof(Int8) - @test xs.data[].length == 6 - xs2 = MtlArray{Int8, 2}(xs) - @test xs2.data[].length == 6 - @test pointer(xs2) != pointer(xs) - - @test collect(MtlArray([1 2; 3 4])) == [1 2; 3 4] - @test collect(mtl([1, 2, 3])) == [1, 2, 3] - @test testf(vec, rand(Float32, 5,3)) - @test mtl(1:3) === 1:3 - - - # Page 22 of https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf - # Only bfloat missing - supported_number_types = [Float16 => Float16, - Float32 => Float32, - Float64 => Float32, - Bool => Bool, - Int16 => Int16, - Int32 => Int32, - Int64 => Int64, - Int8 => Int8, - UInt16 => UInt16, - UInt32 => UInt32, - UInt64 => UInt64, - UInt8 => UInt8] - # Test supported types and ensure only Float64 get converted to Float32 - for (SrcType, TargType) in supported_number_types - @test mtl(SrcType[1]) isa MtlArray{TargType} - @test mtl(Complex{SrcType}[1+1im]) isa MtlArray{Complex{TargType}} - end + @testset "default storage mode" begin + # Test that default storage mode respects UMA detection + if Metal.device().hasUnifiedMemory + @test Metal.DefaultStorageMode == Metal.SharedStorage + else + @test Metal.DefaultStorageMode == Metal.PrivateStorage + end - # test the regular adaptor - @test Adapt.adapt(MtlArray, [1 2;3 4]) isa MtlArray{Int, 2, Metal.DefaultStorageMode} - @test Adapt.adapt(MtlArray{Float32}, [1 2;3 4]) isa MtlArray{Float32, 2, Metal.DefaultStorageMode} - @test Adapt.adapt(MtlArray{Float32, 2}, [1 2;3 4]) isa MtlArray{Float32, 2, Metal.DefaultStorageMode} - @test Adapt.adapt(MtlArray{Float32, 2, Metal.SharedStorage}, [1 2;3 4]) isa MtlArray{Float32, 2, Metal.SharedStorage} - @test Adapt.adapt(MtlMatrix{ComplexF32, Metal.SharedStorage}, [1 2;3 4]) isa MtlArray{ComplexF32, 2, Metal.SharedStorage} - @test Adapt.adapt(MtlArray{Float16}, Float64[1]) isa MtlArray{Float16} - - # Test a few explicitly unsupported types - @test_throws "MtlArray only supports element types that are stored inline" MtlArray(BigInt[1]) - @test_throws "Metal does not support Float64 values" MtlArray(Float64[1]) - @test_throws "Metal does not support Int128 values" MtlArray(Int128[1]) - @test_throws "Metal does not support UInt128 values" MtlArray(UInt128[1]) - - @test collect(Metal.zeros(2, 2)) == zeros(Float32, 2, 2) - @test collect(Metal.ones(2, 2)) == ones(Float32, 2, 2) - - @test collect(Metal.fill(0, 2, 2)) == zeros(Float32, 2, 2) - @test collect(Metal.fill(1, 2, 2)) == ones(Float32, 2, 2) -end + # Test that arrays created without explicit storage use the default + arr = MtlArray{Float32}(undef, 100) + @test Metal.storagemode(arr) == Metal.DefaultStorageMode + + # Test that explicit storage mode overrides the default + arr_private = MtlArray{Float32, 1, Metal.PrivateStorage}(undef, 100) + arr_shared = MtlArray{Float32, 1, Metal.SharedStorage}(undef, 100) + @test Metal.storagemode(arr_private) == Metal.PrivateStorage + @test Metal.storagemode(arr_shared) == Metal.SharedStorage + end + + let arr = MtlVector{Int}(undef, 1) + @test sizeof(arr) == 8 + @test length(arr) == 1 + @test eltype(arr) == Int + end + + let arr = MtlVector{Int}(undef, 0) + @test sizeof(arr) == 0 + end + + @testset "constructors" begin + xs = MtlArray{Int8}(undef, 2, 3) + @test device(xs) == device() + @test Base.elsize(xs) == sizeof(Int8) + @test xs.data[].length == 6 + xs2 = MtlArray{Int8, 2}(xs) + @test xs2.data[].length == 6 + @test pointer(xs2) != pointer(xs) + + @test collect(MtlArray([1 2; 3 4])) == [1 2; 3 4] + @test collect(mtl([1, 2, 3])) == [1, 2, 3] + @test testf(vec, rand(Float32, 5, 3)) + @test mtl(1:3) === 1:3 + + + # Page 22 of https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf + # Only bfloat missing + supported_number_types = [ + Float16 => Float16, + Float32 => Float32, + Float64 => Float32, + Bool => Bool, + Int16 => Int16, + Int32 => Int32, + Int64 => Int64, + Int8 => Int8, + UInt16 => UInt16, + UInt32 => UInt32, + UInt64 => UInt64, + UInt8 => UInt8, + ] + # Test supported types and ensure only Float64 get converted to Float32 + for (SrcType, TargType) in supported_number_types + @test mtl(SrcType[1]) isa MtlArray{TargType} + @test mtl(Complex{SrcType}[1 + 1im]) isa MtlArray{Complex{TargType}} + end -@testset "copyto!" begin - @testset "$T, $S" for S in [Metal.PrivateStorage, Metal.SharedStorage], - T in [Float16, Float32, Bool, Int16, Int32, Int64, Int8, UInt16, UInt32, UInt64, UInt8] - dim = (1000,17,10) - A = rand(T,dim) - mtlA = mtl(A;storage=S) - - #cpu -> gpu - res = Metal.zeros(T,dim;storage=S) - copyto!(res,A) - @test Array(res) == Array(A) - - #gpu -> cpu - res = zeros(T,dim) - copyto!(res,mtlA) - @test Array(res) == Array(mtlA) - - #gpu -> gpu - res = Metal.zeros(T,dim;storage=S) - copyto!(res,mtlA) - @test Array(res) == Array(mtlA) + # test the regular adaptor + @test Adapt.adapt(MtlArray, [1 2;3 4]) isa MtlArray{Int, 2, Metal.DefaultStorageMode} + @test Adapt.adapt(MtlArray{Float32}, [1 2;3 4]) isa MtlArray{Float32, 2, Metal.DefaultStorageMode} + @test Adapt.adapt(MtlArray{Float32, 2}, [1 2;3 4]) isa MtlArray{Float32, 2, Metal.DefaultStorageMode} + @test Adapt.adapt(MtlArray{Float32, 2, Metal.SharedStorage}, [1 2;3 4]) isa MtlArray{Float32, 2, Metal.SharedStorage} + @test Adapt.adapt(MtlMatrix{ComplexF32, Metal.SharedStorage}, [1 2;3 4]) isa MtlArray{ComplexF32, 2, Metal.SharedStorage} + @test Adapt.adapt(MtlArray{Float16}, Float64[1]) isa MtlArray{Float16} + + # Test a few explicitly unsupported types + @test_throws "MtlArray only supports element types that are stored inline" MtlArray(BigInt[1]) + @test_throws "Metal does not support Float64 values" MtlArray(Float64[1]) + @test_throws "Metal does not support Int128 values" MtlArray(Int128[1]) + @test_throws "Metal does not support UInt128 values" MtlArray(UInt128[1]) + + @test collect(Metal.zeros(2, 2)) == zeros(Float32, 2, 2) + @test collect(Metal.ones(2, 2)) == ones(Float32, 2, 2) + + @test collect(Metal.fill(0, 2, 2)) == zeros(Float32, 2, 2) + @test collect(Metal.fill(1, 2, 2)) == ones(Float32, 2, 2) + end + + @testset "copyto!" begin + @testset "$T, $S" for S in [Metal.PrivateStorage, Metal.SharedStorage], + T in [Float16, Float32, Bool, Int16, Int32, Int64, Int8, UInt16, UInt32, UInt64, UInt8] + dim = (1000, 17, 10) + A = rand(T, dim) + mtlA = mtl(A; storage = S) + + #cpu -> gpu + res = Metal.zeros(T, dim; storage = S) + copyto!(res, A) + @test Array(res) == Array(A) + + #gpu -> cpu + res = zeros(T, dim) + copyto!(res, mtlA) + @test Array(res) == Array(mtlA) + + #gpu -> gpu + res = Metal.zeros(T, dim; storage = S) + copyto!(res, mtlA) + @test Array(res) == Array(mtlA) + end end -end - -check_storagemode(arr, smode) = Metal.storagemode(arr) == smode - -# There is some repetition to the GPUArrays tests to test for different storagemodes -@testset "$SM storageMode $dim" for SM in STORAGEMODES, dim in [(10,10,10), (1000,17,10)] # The second one purposefully made to always be bigger than 16KiB - N = length(dim) + check_storagemode(arr, smode) = Metal.storagemode(arr) == smode - # mtl - let arr = mtl(rand(2,2); storage= SM) - @test check_storagemode(arr, SM) - end - - # type and dimensionality specified, accepting dims as series of Ints - let arr = MtlArray{Int,3,SM}(undef, dim[1],dim[2],dim[3]) - @test check_storagemode(arr, SM) - end - let arr = MtlArray{Int,2,SM}(undef, dim[1],dim[2]) - @test check_storagemode(arr, SM) - end + # There is some repetition to the GPUArrays tests to test for different storagemodes + @testset "$SM storageMode $dim" for SM in STORAGEMODES, dim in [(10, 10, 10), (1000, 17, 10)] # The second one purposefully made to always be bigger than 16KiB - # empty vector constructor - let arr = MtlArray{Int,1,SM}(undef, 0) - @test check_storagemode(arr, SM) - end - let arr = MtlVector{Int,SM}() - @test check_storagemode(arr, SM) - end + N = length(dim) - ## interop with other arrays - let arr = MtlArray{Float32,N,SM}(rand(Float32,dim)) - @test check_storagemode(arr, SM) - end - let arr = MtlArray{Float32,N,SM}(rand(Int,dim)) - @test check_storagemode(arr, SM) - end + # mtl + let arr = mtl(rand(2, 2); storage = SM) + @test check_storagemode(arr, SM) + end - # constructing new MtlArray from MtlArray - let arr = MtlArray{Int,N,SM}(rand(Int,dim)) - arr2 = MtlArray{Int,N,SM}(arr) - @test check_storagemode(arr2, SM) - end + # type and dimensionality specified, accepting dims as series of Ints + let arr = MtlArray{Int, 3, SM}(undef, dim[1], dim[2], dim[3]) + @test check_storagemode(arr, SM) + end + let arr = MtlArray{Int, 2, SM}(undef, dim[1], dim[2]) + @test check_storagemode(arr, SM) + end - # fill, zeros, ones - let arr = Metal.fill(rand(Float32), dim; storage=SM) - @test check_storagemode(arr, SM) - end + # empty vector constructor + let arr = MtlArray{Int, 1, SM}(undef, 0) + @test check_storagemode(arr, SM) + end + let arr = MtlVector{Int, SM}() + @test check_storagemode(arr, SM) + end - let arr = Metal.zeros(Float32, dim; storage=SM) - @test check_storagemode(arr, SM) - end + ## interop with other arrays + let arr = MtlArray{Float32, N, SM}(rand(Float32, dim)) + @test check_storagemode(arr, SM) + end + let arr = MtlArray{Float32, N, SM}(rand(Int, dim)) + @test check_storagemode(arr, SM) + end - let arr = Metal.ones(Float32, dim; storage=SM) - @test check_storagemode(arr, SM) - end + # constructing new MtlArray from MtlArray + let arr = MtlArray{Int, N, SM}(rand(Int, dim)) + arr2 = MtlArray{Int, N, SM}(arr) + @test check_storagemode(arr2, SM) + end - for SM2 in STORAGEMODES - let arr = MtlArray{Int,N,SM}(rand(Int,dim)) - arr2 = MtlArray{Int,N,SM2}(arr) - @test check_storagemode(arr2, SM2) + # fill, zeros, ones + let arr = Metal.fill(rand(Float32), dim; storage = SM) + @test check_storagemode(arr, SM) end - end - # private storage errors. - if SM == Metal.PrivateStorage - let arr_mtl = Metal.zeros(Float32, dim...; storage=Metal.PrivateStorage) - @test is_private(arr_mtl) && !is_shared(arr_mtl) - @test_throws "Cannot access the contents of a private buffer" arr_cpu = unsafe_wrap(Array{Float32}, arr_mtl, dim) + let arr = Metal.zeros(Float32, dim; storage = SM) + @test check_storagemode(arr, SM) end - let b = rand(Float32, 10) - arr_mtl = mtl(b; storage=Metal.PrivateStorage) - @test_throws ErrorException arr_mtl[1] - @test Metal.@allowscalar arr_mtl[1] == b[1] + let arr = Metal.ones(Float32, dim; storage = SM) + @test check_storagemode(arr, SM) end - elseif SM == Metal.SharedStorage - let arr_mtl = Metal.zeros(Float32, dim...; storage=Metal.SharedStorage) - @test !is_private(arr_mtl) && is_shared(arr_mtl) - @test unsafe_wrap(Array{Float32}, arr_mtl) isa Array{Float32} + + for SM2 in STORAGEMODES + let arr = MtlArray{Int, N, SM}(rand(Int, dim)) + arr2 = MtlArray{Int, N, SM2}(arr) + @test check_storagemode(arr2, SM2) + end end - let b = rand(Float32, 10) - arr_mtl = mtl(b; storage=Metal.SharedStorage) - @test arr_mtl[1] == b[1] + # private storage errors. + if SM == Metal.PrivateStorage + let arr_mtl = Metal.zeros(Float32, dim...; storage = Metal.PrivateStorage) + @test is_private(arr_mtl) && !is_shared(arr_mtl) + @test_throws "Cannot access the contents of a private buffer" arr_cpu = unsafe_wrap(Array{Float32}, arr_mtl, dim) + end + + let b = rand(Float32, 10) + arr_mtl = mtl(b; storage = Metal.PrivateStorage) + @test_throws ErrorException arr_mtl[1] + @test Metal.@allowscalar arr_mtl[1] == b[1] + end + elseif SM == Metal.SharedStorage + let arr_mtl = Metal.zeros(Float32, dim...; storage = Metal.SharedStorage) + @test !is_private(arr_mtl) && is_shared(arr_mtl) + @test unsafe_wrap(Array{Float32}, arr_mtl) isa Array{Float32} + end + + let b = rand(Float32, 10) + arr_mtl = mtl(b; storage = Metal.SharedStorage) + @test arr_mtl[1] == b[1] + end end end -end -# Also tests changing storagemode -@testset "similar" begin - check_similar(::MtlArray{T,N,S}, typ, dim, sm) where {T,N,S} = - T == typ && N == dim && S == sm - # similar - typ1 = Int - typ2 = Float32 - dim1 = (10,10,10) - n1 = length(dim1) - dim2 = dim1[1:2] - n2 = length(dim2) - sm1 = Metal.SharedStorage - sm2 = Metal.PrivateStorage + # Also tests changing storagemode + @testset "similar" begin + check_similar(::MtlArray{T, N, S}, typ, dim, sm) where {T, N, S} = + T == typ && N == dim && S == sm + # similar + typ1 = Int + typ2 = Float32 + dim1 = (10, 10, 10) + n1 = length(dim1) + dim2 = dim1[1:2] + n2 = length(dim2) + sm1 = Metal.SharedStorage + sm2 = Metal.PrivateStorage - arr = MtlArray{typ1, n1, sm1}(undef, dim1) + arr = MtlArray{typ1, n1, sm1}(undef, dim1) - s1 = similar(arr) - @test check_similar(s1,typ1,n1,sm1) + s1 = similar(arr) + @test check_similar(s1, typ1, n1, sm1) - s2 = similar(arr, dim2) - @test check_similar(s2,typ1,n2,sm1) + s2 = similar(arr, dim2) + @test check_similar(s2, typ1, n2, sm1) - s3 = similar(arr, typ2, dim2) - @test check_similar(s3,typ2,n2,sm1) + s3 = similar(arr, typ2, dim2) + @test check_similar(s3, typ2, n2, sm1) - # s4-s6 test for changing storagemode - s4 = similar(arr; storage=sm2) - @test check_similar(s4,typ1,n1,sm2) + # s4-s6 test for changing storagemode + s4 = similar(arr; storage = sm2) + @test check_similar(s4, typ1, n1, sm2) - s5 = similar(arr, dim2; storage=sm2) - @test check_similar(s5,typ1,n2,sm2) + s5 = similar(arr, dim2; storage = sm2) + @test check_similar(s5, typ1, n2, sm2) - s6 = similar(arr, typ2, dim2; storage=sm2) - @test check_similar(s6,typ2,n2,sm2) + s6 = similar(arr, typ2, dim2; storage = sm2) + @test check_similar(s6, typ2, n2, sm2) -end + end -@testset "fill($T)" for T in [Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, - Float16, Float32] - b = rand(T) + @testset "fill($T)" for T in [ + Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, + Float16, Float32, + ] + b = rand(T) - # Dims in tuple - let A = Metal.fill(b, (10, 10, 10, 1000)) - B = fill(b, (10, 10, 10, 1000)) - @test Array(A) == B - end + # Dims in tuple + let A = Metal.fill(b, (10, 10, 10, 1000)) + B = fill(b, (10, 10, 10, 1000)) + @test Array(A) == B + end - let M = Metal.fill(b, (10, 10)) - B = fill(b, (10, 10)) - @test Array(M) == B - end + let M = Metal.fill(b, (10, 10)) + B = fill(b, (10, 10)) + @test Array(M) == B + end - let V = Metal.fill(b, (10,)) - B = fill(b, (10,)) - @test Array(V) == B - end + let V = Metal.fill(b, (10,)) + B = fill(b, (10,)) + @test Array(V) == B + end - #Dims already unpacked - let A = Metal.fill(b, 10, 1000, 1000) - B = fill(b, 10, 1000, 1000) - @test Array(A) == B - end + #Dims already unpacked + let A = Metal.fill(b, 10, 1000, 1000) + B = fill(b, 10, 1000, 1000) + @test Array(A) == B + end - let M = Metal.fill(b, 10, 10) - B = fill(b, 10, 10) - @test Array(M) == B - end + let M = Metal.fill(b, 10, 10) + B = fill(b, 10, 10) + @test Array(M) == B + end - let V = Metal.fill(b, 10) - B = fill(b, 10) - @test Array(V) == B + let V = Metal.fill(b, 10) + B = fill(b, 10) + @test Array(V) == B + end end -end -@testset "fill!($T)" for T in [Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, - Float16, Float32] - b = rand(T) + @testset "fill!($T)" for T in [ + Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, + Float16, Float32, + ] + b = rand(T) - # Dims in tuple - let A = MtlArray{T,3}(undef, (10, 1000, 1000)) - fill!(A, b) - @test all(Array(A) .== b) - end + # Dims in tuple + let A = MtlArray{T, 3}(undef, (10, 1000, 1000)) + fill!(A, b) + @test all(Array(A) .== b) + end - let M = MtlMatrix{T}(undef, (10, 10)) - fill!(M, b) - @test all(Array(M) .== b) - end + let M = MtlMatrix{T}(undef, (10, 10)) + fill!(M, b) + @test all(Array(M) .== b) + end - let V = MtlVector{T}(undef, (10,)) - fill!(V, b) - @test all(Array(V) .== b) - end + let V = MtlVector{T}(undef, (10,)) + fill!(V, b) + @test all(Array(V) .== b) + end - # Dims already unpacked - let A = MtlArray{T,4}(undef, 10, 10, 10, 1000) - fill!(A, b) - @test all(Array(A) .== b) - end + # Dims already unpacked + let A = MtlArray{T, 4}(undef, 10, 10, 10, 1000) + fill!(A, b) + @test all(Array(A) .== b) + end - let M = MtlMatrix{T}(undef, 10, 10) - fill!(M, b) - @test all(Array(M) .== b) - end + let M = MtlMatrix{T}(undef, 10, 10) + fill!(M, b) + @test all(Array(M) .== b) + end - let V = MtlVector{T}(undef, 10) - fill!(V, b) - @test all(Array(V) .== b) - end + let V = MtlVector{T}(undef, 10) + fill!(V, b) + @test all(Array(V) .== b) + end - # 0-length array - let A = MtlArray{T}(undef, 0) - b = rand(T) - fill!(A, b) - @test A isa MtlArray{T,1} - @test Array(A) == fill(b, 0) + # 0-length array + let A = MtlArray{T}(undef, 0) + b = rand(T) + fill!(A, b) + @test A isa MtlArray{T, 1} + @test Array(A) == fill(b, 0) + end end -end -# https://github.com/JuliaGPU/CUDA.jl/issues/2191 -@testset "preserving storage mode" begin - a = mtl([1]; storage=Metal.SharedStorage) - @test Metal.storagemode(a) == Metal.SharedStorage + # https://github.com/JuliaGPU/CUDA.jl/issues/2191 + @testset "preserving storage mode" begin + a = mtl([1]; storage = Metal.SharedStorage) + @test Metal.storagemode(a) == Metal.SharedStorage - # storage mode should be preserved - b = a .+ 1 - @test Metal.storagemode(b) == Metal.SharedStorage + # storage mode should be preserved + b = a .+ 1 + @test Metal.storagemode(b) == Metal.SharedStorage - # when there's a conflict, we should defer to shared memory - c = mtl([1]; storage=Metal.PrivateStorage) - d = mtl([1]; storage=Metal.SharedStorage) - e = c .+ d - @test Metal.storagemode(e) == Metal.SharedStorage -end + # when there's a conflict, we should defer to shared memory + c = mtl([1]; storage = Metal.PrivateStorage) + d = mtl([1]; storage = Metal.SharedStorage) + e = c .+ d + @test Metal.storagemode(e) == Metal.SharedStorage + end -@testset "resizing" begin - a = MtlArray([1,2,3]) + @testset "resizing" begin + a = MtlArray([1, 2, 3]) - resize!(a, 3) - @test length(a) == 3 - @test Array(a) == [1,2,3] + resize!(a, 3) + @test length(a) == 3 + @test Array(a) == [1, 2, 3] - resize!(a, 5) - @test length(a) == 5 - @test Array(a)[1:3] == [1,2,3] + resize!(a, 5) + @test length(a) == 5 + @test Array(a)[1:3] == [1, 2, 3] - resize!(a, 2) - @test length(a) == 2 - @test Array(a)[1:2] == [1,2] + resize!(a, 2) + @test length(a) == 2 + @test Array(a)[1:2] == [1, 2] - b = MtlArray{Int}(undef, 0) - @test length(b) == 0 - resize!(b, 1) - @test length(b) == 1 -end + b = MtlArray{Int}(undef, 0) + @test length(b) == 0 + resize!(b, 1) + @test length(b) == 1 + end -function _alignedvec(::Type{T}, n::Integer, alignment::Integer = 16384) where {T} - ispow2(alignment) || throw(ArgumentError("$alignment is not a power of 2")) - alignment ≥ sizeof(Int) || throw(ArgumentError("$alignment is not a multiple of $(sizeof(Int))")) - isbitstype(T) || throw(ArgumentError("$T is not a bitstype")) - p = Ref{Ptr{T}}() - err = ccall(:posix_memalign, Cint, (Ref{Ptr{T}}, Csize_t, Csize_t), p, alignment, n * sizeof(T)) - iszero(err) || throw(OutOfMemoryError()) - return unsafe_wrap(Array, p[], n, own = true) -end + function _alignedvec(::Type{T}, n::Integer, alignment::Integer = 16384) where {T} + ispow2(alignment) || throw(ArgumentError("$alignment is not a power of 2")) + alignment ≥ sizeof(Int) || throw(ArgumentError("$alignment is not a multiple of $(sizeof(Int))")) + isbitstype(T) || throw(ArgumentError("$T is not a bitstype")) + p = Ref{Ptr{T}}() + err = ccall(:posix_memalign, Cint, (Ref{Ptr{T}}, Csize_t, Csize_t), p, alignment, n * sizeof(T)) + iszero(err) || throw(OutOfMemoryError()) + return unsafe_wrap(Array, p[], n, own = true) + end -@testset "unsafe_wrap" begin - @testset "cpu array incremented" begin - @testset "wrap cpu" begin - @testset "check cpu" begin # cpu array checked first - arr = _alignedvec(Float32, 16384 * 2) - fill!(arr, one(eltype(arr))) - marr = Metal.@sync unsafe_wrap(MtlVector{Float32}, arr) + @testset "unsafe_wrap" begin + @testset "cpu array incremented" begin + @testset "wrap cpu" begin + @testset "check cpu" begin # cpu array checked first + arr = _alignedvec(Float32, 16384 * 2) + fill!(arr, one(eltype(arr))) + marr = Metal.@sync unsafe_wrap(MtlVector{Float32}, arr) - @test all(arr .== 1) - @test all(marr .== 1) + @test all(arr .== 1) + @test all(marr .== 1) - arr .+= 1 - @test all(arr .== 2) - @test all(marr .== 2) - end + arr .+= 1 + @test all(arr .== 2) + @test all(marr .== 2) + end - @testset "check gpu" begin # gpu array checked first - arr = _alignedvec(Float32, 16384 * 2) - fill!(arr, one(eltype(arr))) - marr = Metal.@sync unsafe_wrap(MtlVector{Float32}, arr) + @testset "check gpu" begin # gpu array checked first + arr = _alignedvec(Float32, 16384 * 2) + fill!(arr, one(eltype(arr))) + marr = Metal.@sync unsafe_wrap(MtlVector{Float32}, arr) - @test all(marr .== 1) - @test all(arr .== 1) + @test all(marr .== 1) + @test all(arr .== 1) - arr .+= 1 - @test all(marr .== 2) - @test all(arr .== 2) + arr .+= 1 + @test all(marr .== 2) + @test all(arr .== 2) + end end - end - @testset "wrap gpu" begin - @testset "check cpu" begin # cpu array checked first - marr = Metal.@sync Metal.ones(Float32, 18000; storage = Metal.SharedStorage) - arr = unsafe_wrap(Vector{Float32}, marr) + @testset "wrap gpu" begin + @testset "check cpu" begin # cpu array checked first + marr = Metal.@sync Metal.ones(Float32, 18000; storage = Metal.SharedStorage) + arr = unsafe_wrap(Vector{Float32}, marr) - @test all(arr .== 1) - @test all(marr .== 1) + @test all(arr .== 1) + @test all(marr .== 1) - arr .+= 1 - @test all(arr .== 2) - @test all(marr .== 2) - end + arr .+= 1 + @test all(arr .== 2) + @test all(marr .== 2) + end - @testset "check gpu" begin # gpu array checked first - marr = Metal.@sync Metal.ones(Float32, 18000; storage = Metal.SharedStorage) - arr = unsafe_wrap(Vector{Float32}, marr) + @testset "check gpu" begin # gpu array checked first + marr = Metal.@sync Metal.ones(Float32, 18000; storage = Metal.SharedStorage) + arr = unsafe_wrap(Vector{Float32}, marr) - @test all(marr .== 1) - @test all(arr .== 1) + @test all(marr .== 1) + @test all(arr .== 1) - arr .+= 1 - @test all(marr .== 2) - @test all(arr .== 2) + arr .+= 1 + @test all(marr .== 2) + @test all(arr .== 2) + end end end - end - @testset "gpu array incremented" begin - @testset "wrap cpu" begin - @testset "check cpu" begin # cpu array checked first - arr = _alignedvec(Float32, 16384 * 2) - fill!(arr, one(eltype(arr))) - marr = Metal.@sync unsafe_wrap(MtlVector{Float32}, arr) - - @test all(arr .== 1) - @test all(marr .== 1) - - Metal.@sync marr .+= 1 - @test all(arr .== 2) - @test all(marr .== 2) + @testset "gpu array incremented" begin + @testset "wrap cpu" begin + @testset "check cpu" begin # cpu array checked first + arr = _alignedvec(Float32, 16384 * 2) + fill!(arr, one(eltype(arr))) + marr = Metal.@sync unsafe_wrap(MtlVector{Float32}, arr) + + @test all(arr .== 1) + @test all(marr .== 1) + + Metal.@sync marr .+= 1 + @test all(arr .== 2) + @test all(marr .== 2) + end + + @testset "check gpu" begin # gpu array checked first + arr = _alignedvec(Float32, 16384 * 2) + fill!(arr, one(eltype(arr))) + marr = Metal.@sync unsafe_wrap(MtlVector{Float32}, arr) + + @test all(marr .== 1) + @test all(arr .== 1) + + marr .+= 1 + @test all(marr .== 2) + @test all(arr .== 2) + end end - @testset "check gpu" begin # gpu array checked first - arr = _alignedvec(Float32, 16384 * 2) - fill!(arr, one(eltype(arr))) - marr = Metal.@sync unsafe_wrap(MtlVector{Float32}, arr) + @testset "wrap gpu" begin + @testset "check cpu" begin # cpu array checked first + marr = Metal.@sync Metal.ones(Float32, 18000; storage = Metal.SharedStorage) + arr = unsafe_wrap(Vector{Float32}, marr) - @test all(marr .== 1) - @test all(arr .== 1) + @test all(arr .== 1) + @test all(marr .== 1) - marr .+= 1 - @test all(marr .== 2) - @test all(arr .== 2) - end - end + Metal.@sync marr .+= 1 + @test all(arr .== 2) + @test all(marr .== 2) + end - @testset "wrap gpu" begin - @testset "check cpu" begin # cpu array checked first - marr = Metal.@sync Metal.ones(Float32, 18000; storage = Metal.SharedStorage) - arr = unsafe_wrap(Vector{Float32}, marr) + @testset "check gpu" begin # gpu array checked first + marr = Metal.@sync Metal.ones(Float32, 18000; storage = Metal.SharedStorage) + arr = unsafe_wrap(Vector{Float32}, marr) - @test all(arr .== 1) - @test all(marr .== 1) + @test all(marr .== 1) + @test all(arr .== 1) - Metal.@sync marr .+= 1 - @test all(arr .== 2) - @test all(marr .== 2) + marr .+= 1 + @test all(marr .== 2) + @test all(arr .== 2) + end end + end - @testset "check gpu" begin # gpu array checked first - marr = Metal.@sync Metal.ones(Float32, 18000; storage = Metal.SharedStorage) - arr = unsafe_wrap(Vector{Float32}, marr) - - @test all(marr .== 1) - @test all(arr .== 1) + @testset "Issue #451" begin + a = mtl(reshape(Float32.(1:60), 5, 4, 3); storage = Metal.SharedStorage) + view_a = @view a[:, 1:4, 2] + b = copy(unsafe_wrap(Array, view_a)) + c = Array(view_a) - marr .+= 1 - @test all(marr .== 2) - @test all(arr .== 2) - end + @test b == c end - end - @testset "Issue #451" begin - a = mtl(reshape(Float32.(1:60), 5,4,3);storage=Metal.SharedStorage) - view_a = @view a[:,1:4,2] - b = copy(unsafe_wrap(Array, view_a)) - c = Array(view_a) + # test that you cannot create an array with a different eltype + marr3 = mtl(zeros(Float32, 10); storage = Metal.SharedStorage) + @test_throws MethodError unsafe_wrap(Array{Float16}, marr3) + end - @test b == c + @testset "ReshapedArray" begin + @test Array(sum(reshape(Metal.ones(3, 10)', (5, 3, 2)); dims = 1)) == fill(5, (1, 3, 2)) + @test Array(sum(reshape(PermutedDimsArray(reshape(mtl(collect(Float32, 1:30)), 5, 3, 2), (3, 1, 2)), (10, 3)); dims = 1)) == + sum(reshape(PermutedDimsArray(reshape(Float32.(1:30), 5, 3, 2), (3, 1, 2)), (10, 3)); dims = 1) end - # test that you cannot create an array with a different eltype - marr3 = mtl(zeros(Float32, 10); storage = Metal.SharedStorage) - @test_throws MethodError unsafe_wrap(Array{Float16}, marr3) -end + @testset "accumulate" begin + for n in (0, 1, 2, 3, 10, 10_000, 16384, 16384 + 1) # small, large, odd & even, pow2 and not + @test testf(x -> accumulate(+, x), rand(Float32, n)) + @test testf(x -> accumulate(+, x), rand(Float32, n, 2)) + @test testf(Base.Fix2((x, y) -> accumulate(+, x; init = y), rand(Float32)), rand(Float32, n)) + end -@testset "ReshapedArray" begin - @test Array(sum(reshape(Metal.ones(3, 10)', (5, 3, 2)); dims=1)) == fill(5, (1,3,2)) - @test Array(sum(reshape(PermutedDimsArray(reshape(mtl(collect(Float32, 1:30)), 5, 3, 2), (3, 1, 2)), (10, 3)); dims=1)) == - sum(reshape(PermutedDimsArray(reshape(Float32.(1:30), 5, 3, 2), (3, 1, 2)), (10, 3)); dims=1) -end + # multidimensional + for (sizes, dims) in ( + (2,) => 2, + (3, 4, 5) => 2, + (1, 70, 50, 20) => 3, + ) + @test testf(x -> accumulate(+, x; dims = dims), rand(-10:10, sizes)) + @test testf(x -> accumulate(+, x), rand(-10:10, sizes)) + end -@testset "accumulate" begin - for n in (0, 1, 2, 3, 10, 10_000, 16384, 16384+1) # small, large, odd & even, pow2 and not - @test testf(x->accumulate(+, x), rand(Float32, n)) - @test testf(x->accumulate(+, x), rand(Float32, n, 2)) - @test testf(Base.Fix2((x,y)->accumulate(+, x; init=y), rand(Float32)), rand(Float32, n)) - end + # using initializer + for (sizes, dims) in ( + (2,) => 2, + (3, 4, 5) => 2, + (1, 70, 50, 20) => 3, + ) + @test testf(Base.Fix2((x, y) -> accumulate(+, x; dims = dims, init = y), rand(-10:10)), rand(-10:10, sizes)) + @test testf(Base.Fix2((x, y) -> accumulate(+, x; init = y), rand(-10:10)), rand(-10:10, sizes)) + end - # multidimensional - for (sizes, dims) in ((2,) => 2, - (3,4,5) => 2, - (1, 70, 50, 20) => 3,) - @test testf(x->accumulate(+, x; dims=dims), rand(-10:10, sizes)) - @test testf(x->accumulate(+, x), rand(-10:10, sizes)) - end + # in place + @test testf(x -> (accumulate!(+, x, copy(x)); x), rand(Float32, 2)) - # using initializer - for (sizes, dims) in ((2,) => 2, - (3,4,5) => 2, - (1, 70, 50, 20) => 3) - @test testf(Base.Fix2((x,y)->accumulate(+, x; dims=dims, init=y), rand(-10:10)), rand(-10:10, sizes)) - @test testf(Base.Fix2((x,y)->accumulate(+, x; init=y), rand(-10:10)), rand(-10:10, sizes)) + # specialized + @test testf(cumsum, rand(Float32, 2)) + @test testf(cumprod, rand(Float32, 2)) end - # in place - @test testf(x->(accumulate!(+, x, copy(x)); x), rand(Float32, 2)) - - # specialized - @test testf(cumsum, rand(Float32, 2)) - @test testf(cumprod, rand(Float32, 2)) -end - -@testset "findall" begin - # 1D - @test testf(x->findall(x), rand(Bool, 1000)) - @test testf(x->findall(y->y>Float32(0.5), x), rand(Float32,1000)) + @testset "findall" begin + # 1D + @test testf(x -> findall(x), rand(Bool, 1000)) + @test testf(x -> findall(y -> y > Float32(0.5), x), rand(Float32, 1000)) - # Set storage mode to a different one than the default - let storage=Metal.DefaultStorageMode == Metal.PrivateStorage ? Metal.SharedStorage : Metal.PrivateStorage - x = mtl(rand(Float32,100); storage) - out = findall(y->y>Float32(0.5), x) - @test Metal.storagemode(x) == Metal.storagemode(out) - end + # Set storage mode to a different one than the default + let storage = Metal.DefaultStorageMode == Metal.PrivateStorage ? Metal.SharedStorage : Metal.PrivateStorage + x = mtl(rand(Float32, 100); storage) + out = findall(y -> y > Float32(0.5), x) + @test Metal.storagemode(x) == Metal.storagemode(out) + end - # ND - let x = rand(Bool, 0, 0) - @test findall(x) == Array(findall(MtlArray(x))) - end - let x = rand(Bool, 1000, 1000) - @test findall(x) == Array(findall(MtlArray(x))) - end - let x = rand(Float32, 1000, 1000) - @test findall(y->y>Float32(0.5), x) == Array(findall(y->y>Float32(0.5), MtlArray(x))) - end + # ND + let x = rand(Bool, 0, 0) + @test findall(x) == Array(findall(MtlArray(x))) + end + let x = rand(Bool, 1000, 1000) + @test findall(x) == Array(findall(MtlArray(x))) + end + let x = rand(Float32, 1000, 1000) + @test findall(y -> y > Float32(0.5), x) == Array(findall(y -> y > Float32(0.5), MtlArray(x))) + end - # ambiguity - let f = in(3) - x = MtlArray([1, 2, 3, 4, 5, 3]) - @test Array(findall(f, x)) == [3, 6] + # ambiguity + let f = in(3) + x = MtlArray([1, 2, 3, 4, 5, 3]) + @test Array(findall(f, x)) == [3, 6] + end end -end -@testset "broadcast" begin - testf(f, x) = Array(f(MtlArray(x))) ≈ f(x) + @testset "broadcast" begin + testf(f, x) = Array(f(MtlArray(x))) ≈ f(x) - @test testf(x->max.(x, zero(Float32)), randn(Float32, 1000)) - @test testf(x->min.(x, one(Float32)), randn(Float32, 1000)) - @test testf(x->min.(max.(x, zero(Float32)), one(Float32)), randn(Float32, 1000)) - @test testf(x->max.(min.(x, one(Float32)), zero(Float32)), randn(Float32, 1000)) + @test testf(x -> max.(x, zero(Float32)), randn(Float32, 1000)) + @test testf(x -> min.(x, one(Float32)), randn(Float32, 1000)) + @test testf(x -> min.(max.(x, zero(Float32)), one(Float32)), randn(Float32, 1000)) + @test testf(x -> max.(min.(x, one(Float32)), zero(Float32)), randn(Float32, 1000)) - # preserving buffer types - let x = Metal.zeros(Float32, 1; storage=Metal.SharedStorage) - y = x .+ 1 - @test is_shared(y) - end + # preserving buffer types + let x = Metal.zeros(Float32, 1; storage = Metal.SharedStorage) + y = x .+ 1 + @test is_shared(y) + end - # when storages are different, choose shared - let x = Metal.zeros(Float32, 1; storage=Metal.SharedStorage), y = Metal.zeros(Float32, 1; storage=Metal.PrivateStorage) - z = x .+ y - @test is_shared(z) - end + # when storages are different, choose shared + let x = Metal.zeros(Float32, 1; storage = Metal.SharedStorage), y = Metal.zeros(Float32, 1; storage = Metal.PrivateStorage) + z = x .+ y + @test is_shared(z) + end - let x = Metal.zeros(Float32, 2, 2; storage=Metal.SharedStorage), y = Metal.zeros(Float32, 2; storage=Metal.PrivateStorage) - z = x .+ y - @test is_shared(z) + let x = Metal.zeros(Float32, 2, 2; storage = Metal.SharedStorage), y = Metal.zeros(Float32, 2; storage = Metal.PrivateStorage) + z = x .+ y + @test is_shared(z) + end end -end end @testset "large map reduce" begin - dev = device() + dev = device() - big_size = Metal.serial_mapreduce_threshold(dev) + 5 - a = rand(Float32, big_size, 31) - c = MtlArray(a) + big_size = Metal.serial_mapreduce_threshold(dev) + 5 + a = rand(Float32, big_size, 31) + c = MtlArray(a) - expected = minimum(a, dims=2) - actual = minimum(c, dims=2) - @test expected == Array(actual) + expected = minimum(a, dims = 2) + actual = minimum(c, dims = 2) + @test expected == Array(actual) - expected = findmax(a, dims=2) - actual = findmax(c, dims=2) - @test expected == map(Array, actual) + expected = findmax(a, dims = 2) + actual = findmax(c, dims = 2) + @test expected == map(Array, actual) - expected = sum(a, dims=2) - actual = sum(c, dims=2) - @test expected == Array(actual) + expected = sum(a, dims = 2) + actual = sum(c, dims = 2) + @test expected == Array(actual) - a = rand(Int, big_size, 31) - c = MtlArray(a) + a = rand(Int, big_size, 31) + c = MtlArray(a) - expected = minimum(a, dims=2) - actual = minimum(c, dims=2) - @test expected == Array(actual) + expected = minimum(a, dims = 2) + actual = minimum(c, dims = 2) + @test expected == Array(actual) - expected = findmax(a, dims=2) - actual = findmax(c, dims=2) - @test expected == map(Array, actual) + expected = findmax(a, dims = 2) + actual = findmax(c, dims = 2) + @test expected == map(Array, actual) - expected = sum(a, dims=2) - actual = sum(c, dims=2) - @test expected == Array(actual) + expected = sum(a, dims = 2) + actual = sum(c, dims = 2) + @test expected == Array(actual) end -