Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
104 commits
Select commit Hold shift + click to select a range
5a58e6a
Added nvtx for baseline
May 28, 2025
3b3525e
minor change
May 29, 2025
b26db25
more nvtx
May 29, 2025
a9ba5d1
Initial impl
Jun 6, 2025
6960ab8
HOpefully fixed?
zalbanob Jun 6, 2025
d5eb1a8
Atleast generates bindings
Jun 6, 2025
9b49119
Fixed binding issue due to namespace generation
Jun 8, 2025
2d3eb27
Major fix to the api strucutre
Jun 8, 2025
3e3ff42
Fix
Jun 8, 2025
d6b8c5d
CPU backend functional
Jun 8, 2025
266d25e
GPU backend working
Jun 9, 2025
67a29d7
Cmake fix tests
zalbanob Jun 10, 2025
06718c0
Add nvbench as a submodule under bench/nvbench
zalbanob Jun 10, 2025
b9dada1
Added catch as submodule
zalbanob Jun 10, 2025
131b2ce
Tests link
zalbanob Jun 10, 2025
690f860
Unfinalized tests
zalbanob Jun 10, 2025
1699a8a
Wish I had known Rcpp cannot be used without R runtime (https://stack…
Jun 11, 2025
637a584
Added testall cpp interface to CMAKE
zalbanob Jun 11, 2025
3f136cb
Made Testthat work through the cpp interface
zalbanob Jun 11, 2025
83b46f6
Fix
Jun 11, 2025
b5fc805
Hindsight is 20/20
zalbanob Jun 11, 2025
284a869
Fixed bindings
Jun 11, 2025
0e1b50e
Initial test integration works
Jun 11, 2025
9ed7369
Fixed a couple problems with the scripts
Jun 11, 2025
28676dd
Fixed the problem with alignemnet
Jun 12, 2025
8829219
Added Ctz,comparision kernels, fixed utils copy
zalbanob Jun 15, 2025
43a6200
Added thrust to cmake
zalbanob Jun 15, 2025
fc2a360
Minor fixes to cmake, ctz not working
Jun 16, 2025
75f4e4f
Fixed ctz
Jun 16, 2025
d7cb0fa
Fixed the bindings
Jun 16, 2025
1799695
inlining dispatchers
Jun 18, 2025
da613d8
un-inlining dispatchers
Jun 18, 2025
20b18dd
implemented graflex::getFDR cuda
Jun 30, 2025
46ae4c9
added tests for GPU getFDR and count
Jun 30, 2025
552674d
inital implementation of odd ratio computation
Jun 30, 2025
8379927
Fixed deadlock issue
Jul 1, 2025
3d163db
merged init and compute into one kernel
Jul 1, 2025
c6c49af
Fixed race
Jul 2, 2025
579fe0a
added more kernels to graflex
Jul 3, 2025
9c9cd7b
Added non-optimal graflex cuda dispatch impl
Jul 3, 2025
db564f9
Fixed minor allocation issues
Jul 9, 2025
7e0942e
minor change
Jul 10, 2025
7dedd5c
Minor fixes to prevent potential overflow
Jul 10, 2025
1232f61
minor Style fix
Jul 10, 2025
a5369e2
Fixed a bug in lrm alpha unweighted
Jul 14, 2025
9008147
intial impl of cutlass - syncing for ide
Jul 21, 2025
a43159d
Cutlass omega
zalbanob Jul 22, 2025
d503ad3
Fix
Jul 23, 2025
b78b3fb
Save
Jul 24, 2025
6932666
Working Omega cutlass kernel but without overflow handleing
Aug 1, 2025
380dd60
local save
Aug 8, 2025
99efc12
Update omega kernel
Aug 25, 2025
1595661
Cleanup
Aug 26, 2025
13a1ebc
Cleanup
Aug 26, 2025
ec20536
Fix ctz
Aug 28, 2025
fda4f9e
Fix lrv
Aug 28, 2025
7dc3a4e
Inital impl of backend
Aug 29, 2025
f3360ac
further backend kernels
Sep 11, 2025
f255fbb
deprecated cutlass
Sep 11, 2025
1f3d148
More backend kernels and fix copyToNumericVector
Sep 11, 2025
acc6a1f
added alr
Sep 11, 2025
fe8a539
more backend impl and tests
Sep 12, 2025
c017094
Minor fixes
Sep 12, 2025
5824469
Fixes backend cpu code, implements more backend kernels (as well as f…
Oct 3, 2025
7410210
impl linRcpp
Oct 8, 2025
306177e
Kernel Fixes
Oct 10, 2025
d629dfc
cleanup
Oct 15, 2025
ec261c5
implemented phiRcpp but we deadlock (as expected on large matrices)
Oct 15, 2025
45272b6
added symRcpp kernel
Oct 16, 2025
95fbbfe
Added nvtx for baseline
May 28, 2025
25fd741
added launch bounds to better inform the compiler
Oct 17, 2025
931e46b
intial movement into traits
Oct 17, 2025
04bdc87
more cleanup
Oct 17, 2025
c5738c0
made nvtx optional by setting propr.enable_nvtx to true/false
Oct 17, 2025
b534393
Minor refactor
Oct 28, 2025
dae3f3d
add singularity def to here
Oct 31, 2025
735db18
Added CI finally
Oct 31, 2025
51d2705
Added CI finally and Fixed lg2 to lg_e
Oct 31, 2025
4b2f93a
More clear test message
Oct 31, 2025
2a110ce
Fix numerical issue
Nov 3, 2025
fe1c8ff
Sync
Nov 4, 2025
6ee73ce
Added cub load store
Nov 4, 2025
9a72e7f
cosmetics
Nov 4, 2025
f171922
cosmetics 2
Nov 4, 2025
9a83ab2
Added CC detection
Nov 5, 2025
433cad8
CPU Harmonic
Nov 5, 2025
889f059
Updated R files to 60bd9da
Nov 5, 2025
4734be0
More optimizations
Nov 10, 2025
ea4bf15
Added Kernel Traits
Nov 11, 2025
c028368
W-LRM: arithmetic mean to harmonic mean and fixes
Nov 17, 2025
49e6df4
W-LRV moved to harmonic mean
Nov 18, 2025
242ad69
Fixed CPU backend
Nov 19, 2025
d6e885b
Added fix from dev/3ecdb11
Nov 19, 2025
60ade44
Added lrv and lrm tests based on real input
Nov 19, 2025
466569e
split phase phi
Nov 19, 2025
3ed0226
Added nvtx markers
Nov 23, 2025
96a97bc
Fixes
Nov 24, 2025
356d62b
Fixes
Nov 24, 2025
7e7acb8
Added support for integration with nvbenchr
Nov 24, 2025
6f1da66
Added benchmarking and profiling code
Nov 24, 2025
bcf0311
removed singularity
Jan 29, 2026
4772236
removed catch2
Jan 29, 2026
7afb08f
Merge pull request #84 from zalbanob/initial-gpu-impl
suzannejin Feb 5, 2026
a693491
Merge branch 'dev' into dev-cuda
suzannejin Feb 5, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,7 @@
.Rhistory
.RData
inst/doc
.r-lib
.ci
ci_artifacts
bin
6 changes: 6 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[submodule "bench/nvbench"]
path = bench/nvbench
url = https://github.com/NVIDIA/nvbench.git
[submodule "thirdparty/Catch2"]
path = thirdparty/Catch2
url = https://github.com/catchorg/Catch2.git
81 changes: 81 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
cmake_minimum_required(VERSION 3.20)
cmake_policy(SET CMP0048 NEW)

project(PROPR CXX)


if ("${CMAKE_BUILD_TYPE}" STREQUAL "" OR ${CMAKE_BUILD_TYPE} STREQUAL "NOMODE")
message("WORKING ON NO MODE")
elseif (${CMAKE_BUILD_TYPE} STREQUAL "RELEASE")
message("WORKING ON RELEASE MODE")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS_RELEASE} -g")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS_RELEASE} -g")
elseif (${CMAKE_BUILD_TYPE} STREQUAL "DEBUG" OR ${CMAKE_BUILD_TYPE} STREQUAL "Debug")
message("WORKING ON DEBUG MODE")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS_DEBUG}")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS_DEBUG}")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS}")
else ()
message(FATAL_ERROR "Unrecognized build type")
endif ()

include(CheckLanguage)

set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)

FIND_PACKAGE(R REQUIRED)
if (${R_FOUND})
message("R : FOUND")
list(APPEND LIBS "R")
else ()
message("R : NOT FOUND")
endif ()

check_language(CUDA)
if (CMAKE_CUDA_COMPILER)
set(USE_CUDA ON)
message("Build CUDA Support")
set(PROPERTIES CUDA_ARCHITECTURES 90)
enable_language(CUDA)
include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
set(USE_CUDA ON)
set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --extended-lambda --expt-relaxed-constexpr -lineinfo -O3 --use_fast_math --extra-device-vectorization")
message("CUDA : FOUND")
find_package(CUDAToolkit REQUIRED)
execute_process(
COMMAND nvidia-smi --query-gpu=compute_cap --format=csv
COMMAND tail -n 1
COMMAND tr -d .
OUTPUT_VARIABLE NV_CC
OUTPUT_STRIP_TRAILING_WHITESPACE
)
message("Detected Compute Capability : " ${NV_CC})
set(PROPERTIES CUDA_ARCHITECTURES ${NV_CC})
set(CMAKE_CUDA_ARCHITECTURES ${NV_CC})
else ()
message(FATAL_ERROR "CUDA : NOT FOUND")
endif ()

if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
set(CMAKE_INSTALL_PREFIX "/usr")
endif ()
message("Install Prefix : " ${CMAKE_INSTALL_PREFIX})
include_directories(${CMAKE_SOURCE_DIR}/inst/include)

# set(CUTLASS_NVCC_ARCHS "90")
# add_subdirectory(${CMAKE_SOURCE_DIR}/thirdparty/cutlass)
# include_directories(${CMAKE_SOURCE_DIR}/thirdparty/cutlass/include/)

option(PROPR_BUILD_TESTS "Option to enable building tests" OFF)

add_subdirectory(src)
message("Install Path : ${CMAKE_INSTALL_PREFIX}")
message("PROPR Tests : ${PROPR_BUILD_TESTS}")

install(DIRECTORY inst/include DESTINATION ${CMAKE_INSTALL_PREFIX}/PROPR
FILES_MATCHING
PATTERN "*.hpp"
PATTERN "*.h"
PATTERN "*.cuh")
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ Imports:
corpcor,
ppcor,
Rcpp,
stats
stats,
Suggests:
ALDEx2,
fastcluster,
Expand All @@ -43,7 +43,8 @@ Suggests:
parallel,
rmarkdown,
testthat (>= 3.0.0),
vegan
vegan,
nvtxR
LinkingTo:
Rcpp
Config/testthat/edition: 3
122 changes: 122 additions & 0 deletions OLDCMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
cmake_minimum_required(VERSION 3.15)

set(CMAKE_MODULE_PATH /home/zalbanob/propr/cmake)
set(RCPP_INCLUDE_DIRS /home/zalbanob/R/x86_64-pc-linux-gnu-library/4.5/Rcpp/include)
set(RCPP_LIB_PATH /home/zalbanob/R/x86_64-pc-linux-gnu-library/4.5/Rcpp/libs)

set(CMAKE_CUDA_ARCHITECTURES 80 90)
set(CUDACXX "/usr/local/cuda/bin/nvcc" CACHE FILEPATH "Path to nvcc compiler")
project(propr LANGUAGES CXX CUDA)

set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)

find_package(R REQUIRED)
execute_process(
COMMAND "${R_BINARY}" -e "cat(Rcpp:::CxxFlags())"
OUTPUT_VARIABLE RCPP_CXX_FLAGS_OUTPUT
OUTPUT_STRIP_TRAILING_WHITESPACE
)
string(REPLACE "-I" "" RCPP_INCLUDE_DIRS_RAW "${RCPP_CXX_FLAGS_OUTPUT}")
string(REPLACE " " ";" RCPP_INCLUDE_DIRS "${RCPP_INCLUDE_DIRS_RAW}")
execute_process(
COMMAND "${R_BINARY}" -e "cat(Rcpp:::LdFlags())"
OUTPUT_VARIABLE RCPP_LDFLAGS_OUTPUT
OUTPUT_STRIP_TRAILING_WHITESPACE
)
set(RCPP_LINK_DIRS "")
set(RCPP_LIBRARIES_LIST "")
string(REGEX MATCHALL "-L[^ ]+" RCPP_LIB_DIRS_MATCHES "${RCPP_LDFLAGS_OUTPUT}")
foreach(MATCH ${RCPP_LIB_DIRS_MATCHES})
string(REPLACE "-L" "" LIB_DIR ${MATCH})
list(APPEND RCPP_LINK_DIRS ${LIB_DIR})
endforeach()

string(REGEX MATCHALL "-l[^ ]+" RCPP_LIB_NAMES_MATCHES "${RCPP_LDFLAGS_OUTPUT}")
foreach(MATCH ${RCPP_LIB_NAMES_MATCHES})
string(REPLACE "-l" "" LIB_NAME ${MATCH})
list(APPEND RCPP_LIBRARIES_LIST ${LIB_NAME})
endforeach()

find_package(CUDAToolkit 11.0 REQUIRED)


set(PROPR_LIB_SOURCES
src/kernels/cuda/lrm.cu
src/kernels/cuda/lrv.cu

src/dispatch/cpu/backend.cpp
src/dispatch/cpu/comparison.cpp
src/dispatch/cpu/ctzRcpp.cpp
src/dispatch/cpu/graflex.cpp
src/dispatch/cpu/lr2propr.cpp
src/dispatch/cpu/lrm.cpp
src/dispatch/cpu/lrv.cpp
src/dispatch/cpu/omega.cpp

src/RcppExports.cpp
inst/include/propr/context.h
inst/include/propr/interface/device_selector.hpp
src/dispatch/device_selector.cpp
src/interface/lrv.cpp
src/interface/lrm.cpp
src/dispatch/cpu/lrm.cpp
src/dispatch/cpu/backend.cpp
inst/include/propr/kernels/cpu/dispatch/backend.hpp
src/interface/backend.cpp
src/interface/comparison.cpp
src/interface/graflex.cpp
src/interface/lr2propr.cpp
src/interface/omega.cpp
)

set(PROPR_BENCHMARK_SOURCES bench/src/main_benchmark.cu )

add_library(propr SHARED ${PROPR_LIB_SOURCES})

target_compile_features(propr PUBLIC cxx_std_14)
set_target_properties(propr PROPERTIES
CUDA_STANDARD 14
CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}"
)

# Specify include directories for the library.
target_include_directories(propr PUBLIC
# R includes (from find_package(R))
${R_INCLUDE_DIRS}
# Rcpp includes (parsed from Rcpp:::CxxFlags())
${RCPP_INCLUDE_DIRS}
# CUDA Toolkit includes (from find_package(CUDAToolkit))
${CUDAToolkit_INCLUDE_DIRS}
# Local project headers (e.g., src/include/interface, src/include/kernels etc.)
inst/include
# Prereq headers (if they are part of the main library)
# The original CMakeLists.txt had `include_directories(prereq)`.
${CMAKE_CURRENT_SOURCE_DIR}/prereq
)

# Link libraries for the main shared library.
target_link_libraries(propr PUBLIC
# R runtime libraries (from find_package(R))
${R_LIBRARIES}
# Rcpp libraries (parsed from Rcpp:::LdFlags())
${RCPP_LIBRARIES_LIST}
# CUDA runtime libraries (from find_package(CUDAToolkit))
${CUDAToolkit_LIBRARIES}
)

target_link_directories(propr PUBLIC ${RCPP_LINK_DIRS})
set_target_properties(propr PROPERTIES OUTPUT_NAME "propr" SUFFIX ".so")

add_executable(propr_benchmark ${PROPR_BENCHMARK_SOURCES})
target_link_libraries(propr_benchmark PRIVATE propr)

target_include_directories(propr_benchmark PRIVATE inst/include)

# Set compile features for benchmark.
target_compile_features(propr_benchmark PUBLIC cxx_std_14)
set_target_properties(propr_benchmark PROPERTIES
CUDA_STANDARD 14
CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}"
)
34 changes: 23 additions & 11 deletions R/1-propr.R
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ propr <- function(counts,
alpha = NA,
p = 0,
permutation_option = c("feature-wise", "sample-wise")) {
NVTX_PUSH("propr", 0)
##############################################################################
### CLEAN UP ARGS
##############################################################################
Expand Down Expand Up @@ -141,46 +142,57 @@ propr <- function(counts,
lambda <- NULL

if (metric == "rho") {
NVTX_PUSH("metric == rho", 0)
mat <- lr2rho(lr)

NVTX_POP()
} else if (metric == "phi") {
NVTX_PUSH("metric == phi", 0)
mat <- lr2phi(lr)
if (symmetrize)
symRcpp(mat) # optionally force symmetry

if (symmetrize) symRcpp(mat) # optionally force symmetry
NVTX_POP()
} else if (metric == "phs") {
NVTX_PUSH("metric == phs", 0)
mat <- lr2phs(lr)

NVTX_POP()
} else if (metric == "cor") {
NVTX_PUSH("metric == cor", 0)
mat <- stats::cor(lr)

NVTX_POP()
} else if (metric == "vlr") {
mat <- lrv

} else if (metric == "ppcor") {
packageCheck("ppcor")
NVTX_PUSH("metric == ppcor", 0)
mat <- ppcor::pcor(lr)$estimate

NVTX_POP()
} else if (metric == "pcor") {
packageCheck("corpcor")

NVTX_PUSH("metric == pcor")
cov <- cov(lr)
mat <- corpcor::cor2pcor(cov)
class(mat) <- "matrix"

NVTX_POP()
} else if (metric == "pcor.shrink") {
packageCheck("corpcor")

NVTX_PUSH("metric == pcor.shrink", 0)
cov <- corpcor::cov.shrink(lr)
mat <- corpcor::cor2pcor(cov)
mat <- matrix(mat, ncol=ncol(lr), nrow=ncol(lr))
class(mat) <- "matrix"
lambda <- attr(cov, "lambda")
NVTX_POP()

} else if (metric == "pcor.bshrink") {
NVTX_PUSH("metric == pcor.bshrink", 0)

with(pcor.bshrink(ct, outtype = ivar_pcor), {
mat <<- matrix
lambda <<- lambda
})

NVTX_POP()
} else {
stop("Provided 'metric' not recognized.")
}
Expand Down Expand Up @@ -232,6 +244,6 @@ propr <- function(counts,
##############################################################################

message("Alert: Use 'updateCutoffs' to calculate FDR.")

NVTX_POP()
return(result)
}
}
20 changes: 20 additions & 0 deletions R/10-nvtx-shim.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
.nvtx_enabled <- function() {
isTRUE(getOption("propr.enable_nvtx", FALSE)) && requireNamespace("nvtxR", quietly = TRUE)
}

NVTX_PUSH <- function(label, color = 0L) {
if (.nvtx_enabled()) nvtxR::nvtx_push_range(label, as.integer(color))
invisible(NULL)
}
NVTX_POP <- function() {
if (.nvtx_enabled()) nvtxR::nvtx_pop_range()
invisible(NULL)
}

WITH_NVTX <- function(label, color = 0L, expr) {
if (.nvtx_enabled()) {
nvtxR::nvtx_push_range(label, as.integer(color))
on.exit(nvtxR::nvtx_pop_range(), add = TRUE)
}
eval.parent(substitute(expr))
}
9 changes: 7 additions & 2 deletions R/1a-propr-backend.R
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ logratio <- function(counts, ivar, alpha=NA) {
#'
#' @export
pcor.bshrink <- function(ct, outtype = c("clr", "alr")) {
NVTX_PUSH("pcor.bshrink", 0)
packageCheck("corpcor")
outtype <- match.arg(outtype)

Expand All @@ -225,6 +226,7 @@ pcor.bshrink <- function(ct, outtype = c("clr", "alr")) {
lambda <- attr(covB, "lambda")

# convert basis covariance matrix to clr/alr covariance matrix
NVTX_PUSH("basis covariance matrix to clr/alr", 0)
D <- ncol(ct)
if (outtype == "alr") {
F <- cbind(diag(rep(1, D - 1)), rep(-1, D - 1))
Expand All @@ -233,9 +235,12 @@ pcor.bshrink <- function(ct, outtype = c("clr", "alr")) {
G <- diag(rep(1, D)) - matrix(1 / D, D, D)
cov <- G %*% covB %*% G
}
NVTX_POP()

# partial correlation
NVTX_PUSH("corpcor::cor2pcor", 0)
pcor <- corpcor::cor2pcor(cov)
NVTX_POP()

# make output to have same dimensions as input
# alr partial correlation has one less dimension,
Expand All @@ -245,6 +250,6 @@ pcor.bshrink <- function(ct, outtype = c("clr", "alr")) {
pcor <- rbind(pcor, 0)
pcor[ncol(pcor), ncol(pcor)] <- 1
}

NVTX_POP()
return(list(matrix = pcor, lambda = lambda))
}
}
Loading