Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 42 additions & 24 deletions lib/cusparse/conversions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ function sort_coo(A::CuSparseMatrixCOO{Tv,Ti}, type::SparseChar='R') where {Tv,T
# and we have the following error in the tests:
# "Out of GPU memory trying to allocate 127.781 TiB".
# We set 0 as default value to avoid it.
out = Ref{Csize_t}(0)
out = Ref{Csize_t}(1)
cusparseXcoosort_bufferSizeExt(handle(), m, n, nnz(A), A.rowInd, A.colInd, out)
return out[]
end
Expand Down Expand Up @@ -332,9 +332,13 @@ for elty in (:Float32, :Float64, :ComplexF32, :ComplexF64)
@eval begin
function CuSparseMatrixCSC{$elty, Ti}(csr::CuSparseMatrixCSR{$elty, Ti}; index::SparseChar='O', action::cusparseAction_t=CUSPARSE_ACTION_NUMERIC, algo::cusparseCsr2CscAlg_t=CUSPARSE_CSR2CSC_ALG1) where {Ti}
m,n = size(csr)
colPtr = CUDA.zeros(Cint, n+1)
rowVal = CUDA.zeros(Cint, nnz(csr))
nzVal = CUDA.zeros($elty, nnz(csr))
colPtr = CuArray{Cint}(undef, n+1)
rowVal = CuArray{Cint}(undef, nnz(csr))
nzVal = CuArray{$elty}(undef, nnz(csr))
if version() <= v"12.6-"
# JuliaGPU/CUDA.jl#2806 (NVBUG 5384319)
colPtr .= (index == 'O' ? 1 : 0)
end
function bufferSize()
out = Ref{Csize_t}(1)
cusparseCsr2cscEx2_bufferSize(handle(), m, n, nnz(csr), nonzeros(csr),
Expand All @@ -355,9 +359,13 @@ for elty in (:Float32, :Float64, :ComplexF32, :ComplexF64)
CuSparseMatrixCSC{$elty, Ti}(csr; index=index, action=action, algo=algo)
function CuSparseMatrixCSR{$elty, Ti}(csc::CuSparseMatrixCSC{$elty, Ti}; index::SparseChar='O', action::cusparseAction_t=CUSPARSE_ACTION_NUMERIC, algo::cusparseCsr2CscAlg_t=CUSPARSE_CSR2CSC_ALG1) where {Ti}
m,n = size(csc)
rowPtr = CUDA.zeros(Cint,m+1)
colVal = CUDA.zeros(Cint,nnz(csc))
nzVal = CUDA.zeros($elty,nnz(csc))
rowPtr = CuArray{Cint}(undef, m+1)
colVal = CuArray{Cint}(undef, nnz(csc))
nzVal = CuArray{$elty}(undef, nnz(csc))
if version() <= v"12.6-"
# JuliaGPU/CUDA.jl#2806 (NVBUG 5384319)
rowPtr .= (index == 'O' ? 1 : 0)
end
function bufferSize()
out = Ref{Csize_t}(1)
cusparseCsr2cscEx2_bufferSize(handle(), n, m, nnz(csc), nonzeros(csc),
Expand Down Expand Up @@ -386,9 +394,13 @@ for (elty, welty) in ((:Float16, :Float32),
@eval begin
function CuSparseMatrixCSC{$elty, Ti}(csr::CuSparseMatrixCSR{$elty, Ti}; index::SparseChar='O', action::cusparseAction_t=CUSPARSE_ACTION_NUMERIC, algo::cusparseCsr2CscAlg_t=CUSPARSE_CSR2CSC_ALG1) where {Ti}
m,n = size(csr)
colPtr = CUDA.zeros(Cint, n+1)
rowVal = CUDA.zeros(Cint, nnz(csr))
nzVal = CUDA.zeros($elty, nnz(csr))
colPtr = CuArray{Cint}(undef, n+1)
rowVal = CuArray{Cint}(undef, nnz(csr))
nzVal = CuArray{$elty}(undef, nnz(csr))
if version() <= v"12.6-"
# JuliaGPU/CUDA.jl#2806 (NVBUG 5384319)
colPtr .= (index == 'O' ? 1 : 0)
end
if $elty == Float16 #broken for ComplexF16?
function bufferSize()
out = Ref{Csize_t}(1)
Expand All @@ -415,9 +427,13 @@ for (elty, welty) in ((:Float16, :Float32),
CuSparseMatrixCSC{$elty, Ti}(csr; index=index, action=action, algo=algo)
function CuSparseMatrixCSR{$elty, Ti}(csc::CuSparseMatrixCSC{$elty, Ti}; index::SparseChar='O', action::cusparseAction_t=CUSPARSE_ACTION_NUMERIC, algo::cusparseCsr2CscAlg_t=CUSPARSE_CSR2CSC_ALG1) where {Ti}
m,n = size(csc)
rowPtr = CUDA.zeros(Cint,m+1)
colVal = CUDA.zeros(Cint,nnz(csc))
nzVal = CUDA.zeros($elty,nnz(csc))
rowPtr = CuArray{Cint}(undef, m+1)
colVal = CuArray{Cint}(undef, nnz(csc))
nzVal = CuArray{$elty}(undef, nnz(csc))
if version() <= v"12.6-"
# JuliaGPU/CUDA.jl#2806 (NVBUG 5384319)
rowPtr .= (index == 'O' ? 1 : 0)
end
if $elty == Float16 #broken for ComplexF16?
function bufferSize()
out = Ref{Csize_t}(1)
Expand Down Expand Up @@ -544,9 +560,9 @@ for (fname,elty) in ((:cusparseSbsr2csr, :Float32),
nb = cld(n, bsr.blockDim)
cudesca = CuMatrixDescriptor('G', 'L', 'N', index)
cudescc = CuMatrixDescriptor('G', 'L', 'N', indc)
csrRowPtr = CUDA.zeros(Ti, m + 1)
csrColInd = CUDA.zeros(Ti, nnz(bsr))
csrNzVal = CUDA.zeros($elty, nnz(bsr))
csrRowPtr = CuArray{Ti}(undef, m + 1)
csrColInd = CuArray{Ti}(undef, nnz(bsr))
csrNzVal = CuArray{$elty}(undef, nnz(bsr))
$fname(handle(), bsr.dir, mb, nb,
cudesca, nonzeros(bsr), bsr.rowPtr, bsr.colVal,
bsr.blockDim, cudescc, csrNzVal, csrRowPtr,
Expand Down Expand Up @@ -609,10 +625,12 @@ end

# need both typevars for compatibility with GPUArrays
function CuSparseMatrixCSR{Tv, Ti}(coo::CuSparseMatrixCOO{Tv, Ti}; index::SparseChar='O') where {Tv, Ti}
m,n = size(coo)
nnz(coo) == 0 && return CuSparseMatrixCSR{Tv,Cint}(CUDA.ones(Cint, m+1), coo.colInd, nonzeros(coo), size(coo))
m, n = size(coo)
csrRowPtr = CuArray{Ti}(undef, m + 1)
if iszero(m)
csrRowPtr .= (index == 'O') ? 1 : 0 # cusparseXcoo2csr does not initialize csrRowPtr correctly if m == 0
end
coo = sort_coo(coo, 'R')
csrRowPtr = CuVector{Cint}(undef, m+1)
cusparseXcoo2csr(handle(), coo.rowInd, nnz(coo), m, csrRowPtr, index)
CuSparseMatrixCSR{Tv,Cint}(csrRowPtr, coo.colInd, nonzeros(coo), size(coo))
end
Expand All @@ -621,7 +639,6 @@ CuSparseMatrixCSR(coo::CuSparseMatrixCOO{Tv, Ti}; index::SparseChar='O') where {

function CuSparseMatrixCOO{Tv, Ti}(csr::CuSparseMatrixCSR{Tv}; index::SparseChar='O') where {Tv, Ti}
m,n = size(csr)
nnz(csr) == 0 && return CuSparseMatrixCOO{Tv,Cint}(CUDA.zeros(Cint, 0), CUDA.zeros(Cint, 0), nonzeros(csr), size(csr))
cooRowInd = CuVector{Cint}(undef, nnz(csr))
cusparseXcsr2coo(handle(), csr.rowPtr, nnz(csr), m, cooRowInd, index)
CuSparseMatrixCOO{Tv,Cint}(cooRowInd, csr.colVal, nonzeros(csr), size(csr))
Expand All @@ -632,10 +649,12 @@ CuSparseMatrixCOO(csr::CuSparseMatrixCSR{Tv, Ti}; index::SparseChar='O') where {
### CSC to COO and viceversa

function CuSparseMatrixCSC{Tv, Ti}(coo::CuSparseMatrixCOO{Tv, Ti}; index::SparseChar='O') where {Tv, Ti}
m,n = size(coo)
nnz(coo) == 0 && return CuSparseMatrixCSC{Tv}(CUDA.ones(Cint, n+1), coo.rowInd, nonzeros(coo), size(coo))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kshyatt do you remember the edgecase you added these shortcuts for? In #2725

If something you needed is not covered by the tests added here, we should probably add them.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Honestly I don't remember 😓

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the problem was just that the CUDA internal methods don't work with length 0 arrays at all

m, n = size(coo)
cscColPtr = CuArray{Ti}(undef, n + 1)
if iszero(n)
cscColPtr .= (index == 'O') ? 1 : 0 # cusparseXcoo2csr does not initialize cscColPtr correctly if n == 0
end
coo = sort_coo(coo, 'C')
cscColPtr = CuVector{Cint}(undef, n+1)
cusparseXcoo2csr(handle(), coo.colInd, nnz(coo), n, cscColPtr, index)
CuSparseMatrixCSC{Tv,Cint}(cscColPtr, coo.rowInd, nonzeros(coo), size(coo))
end
Expand All @@ -644,7 +663,6 @@ CuSparseMatrixCSC(coo::CuSparseMatrixCOO{Tv, Ti}; index::SparseChar='O') where {

function CuSparseMatrixCOO{Tv, Ti}(csc::CuSparseMatrixCSC{Tv, Ti}; index::SparseChar='O') where {Tv, Ti}
m,n = size(csc)
nnz(csc) == 0 && return CuSparseMatrixCOO{Tv,Cint}(CUDA.zeros(Cint, 0), CUDA.zeros(Cint, 0), nonzeros(csc), size(csc))
cooColInd = CuVector{Cint}(undef, nnz(csc))
cusparseXcsr2coo(handle(), csc.colPtr, nnz(csc), n, cooColInd, index)
coo = CuSparseMatrixCOO{Tv,Cint}(csc.rowVal, cooColInd, nonzeros(csc), size(csc))
Expand Down
23 changes: 22 additions & 1 deletion test/libraries/cusparse/conversions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -113,20 +113,41 @@ if !(v"12.0" <= CUSPARSE.version() < v"12.1")
end
end

for (n, bd, p) in [(100, 5, 0.02), (5, 1, 0.8), (4, 2, 0.5)]
function __test_valid_ptr(A::CuSparseMatrixCSC)
colPtr = collect(A.colPtr)
@test length(A.colPtr) >= 1
@test colPtr[1] == 1 # assuming one based indexing (index='O')
@test issorted(colPtr)
end

function __test_valid_ptr(A::CuSparseMatrixCSR)
rowPtr = collect(A.rowPtr)
@test length(A.rowPtr) >= 1
@test rowPtr[1] == 1 # assuming one based indexing (index='O')
@test issorted(rowPtr)
end

__test_valid_ptr(::CuSparseMatrixCOO) = nothing
__test_valid_ptr(::CuSparseMatrixBSR) = nothing

for (n, bd, p) in [(100, 5, 0.02), (5, 1, 0.8), (4, 2, 0.5), (0, 1, 0.0)]
v"12.0" <= CUSPARSE.version() < v"12.1" && n == 4 && continue
@testset "conversions between CuSparseMatrices (n, bd, p) = ($n, $bd, $p)" begin
A = sprand(n, n, p)
blockdim = bd
for CuSparseMatrixType1 in (CuSparseMatrixCSC, CuSparseMatrixCSR, CuSparseMatrixCOO, CuSparseMatrixBSR)
if CuSparseMatrixType1 == CuSparseMatrixBSR && n == 0 continue end # TODO: conversion to CuSparseMatrixBSR breaks with (0x0) matrices
dA1 = CuSparseMatrixType1 == CuSparseMatrixBSR ? CuSparseMatrixType1(A, blockdim) : CuSparseMatrixType1(A)
@testset "conversion $CuSparseMatrixType1 --> SparseMatrixCSC" begin
__test_valid_ptr(dA1)
@test SparseMatrixCSC(dA1) ≈ A
end
for CuSparseMatrixType2 in (CuSparseMatrixCSC, CuSparseMatrixCSR, CuSparseMatrixCOO, CuSparseMatrixBSR)
if CuSparseMatrixType2 == CuSparseMatrixBSR && n == 0 continue end # TODO: conversion to CuSparseMatrixBSR breaks with (0x0) matrices
CuSparseMatrixType1 == CuSparseMatrixType2 && continue
dA2 = CuSparseMatrixType2 == CuSparseMatrixBSR ? CuSparseMatrixType2(dA1, blockdim) : CuSparseMatrixType2(dA1)
@testset "conversion $CuSparseMatrixType1 --> $CuSparseMatrixType2" begin
__test_valid_ptr(dA2)
@test collect(dA1) ≈ collect(dA2)
end
end
Expand Down