diff --git a/.Rbuildignore b/.Rbuildignore
index e05151613..b66ab8982 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -17,8 +17,8 @@
^R/secure.global.ranking.md$
^_pkgdown\.yml$
^docs$
-^dsBase_6.3.4.tar.gz$
-^dsBase_6.3.4-permissive.tar.gz$
+^dsBase_6.3.5.tar.gz$
+^dsBase_6.3.5-permissive.tar.gz$
^dsDanger_6.3.4.tar.gz$
^\.circleci$
^\.circleci/config\.yml$
diff --git a/.github/workflows/dsBaseClient_test_suite.yaml b/.github/workflows/dsBaseClient_test_suite.yaml
new file mode 100644
index 000000000..17e13f1dc
--- /dev/null
+++ b/.github/workflows/dsBaseClient_test_suite.yaml
@@ -0,0 +1,247 @@
+################################################################################
+# DataSHIELD GHA test suite - dsBaseClient
+# Adapted from `armadillo_azure-pipelines.yml` by Roberto Villegas-Diaz
+#
+# Inside the root directory $(Pipeline.Workspace) will be a file tree like:
+# /dsBaseClient <- Checked out version of datashield/dsBaseClient
+# /dsBaseClient/logs <- Where results of tests and logs are collated
+# /testStatus <- Checked out version of datashield/testStatus
+#
+# As of Sept. 2025 this takes ~ 95 mins to run.
+################################################################################
+name: dsBaseClient tests' suite
+
+on:
+ push:
+ schedule:
+ - cron: '0 0 * * 6' # Weekly (on Saturdays @ 0.00)
+
+jobs:
+ dsBaseClient_test_suite:
+ runs-on: ubuntu-latest
+ timeout-minutes: 180
+ permissions:
+ contents: read
+
+ # These should all be constant, except TEST_FILTER. This can be used to test
+ # subsets of test files in the testthat directory. Options are like:
+ # '*' <- Run all tests.
+ # 'asNumericDS*' <- Run all asNumericDS tests, i.e. all the arg, etc. tests.
+ # '*_smk_*' <- Run all the smoke tests for all functions.
+ env:
+ TEST_FILTER: '_-|datachk-|smk-|arg-|disc-|perf-|smk_expt-|expt-|math-'
+ _r_check_system_clock_: 0
+ WORKFLOW_ID: ${{ github.run_id }}-${{ github.run_attempt }}
+ PROJECT_NAME: dsBaseClient
+ BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
+ REPO_OWNER: ${{ github.repository_owner }}
+ R_KEEP_PKG_SOURCE: yes
+ GITHUB_TOKEN: ${{ github.token || 'placeholder-token' }}
+
+ steps:
+ - name: Checkout dsBaseClient
+ uses: actions/checkout@v4
+ with:
+ path: dsBaseClient
+
+ - name: Checkout testStatus
+ if: ${{ github.actor != 'nektos/act' }} # for local deployment only
+ uses: actions/checkout@v4
+ with:
+ repository: ${{ env.REPO_OWNER }}/testStatus
+ ref: master
+ path: testStatus
+ persist-credentials: false
+ token: ${{ env.GITHUB_TOKEN }}
+
+ - name: Uninstall default MySQL
+ run: |
+ curl https://bazel.build/bazel-release.pub.gpg | sudo apt-key add -
+ sudo service mysql stop || true
+ sudo apt-get update
+ sudo apt-get remove --purge mysql-client mysql-server mysql-common -y
+ sudo apt-get autoremove -y
+ sudo apt-get autoclean -y
+ sudo rm -rf /var/lib/mysql/
+
+ - uses: r-lib/actions/setup-pandoc@v2
+
+ - uses: r-lib/actions/setup-r@v2
+ with:
+ r-version: release
+ http-user-agent: release
+ use-public-rspm: true
+
+ - name: Install R and dependencies
+ run: |
+ sudo apt-get install --no-install-recommends software-properties-common dirmngr -y
+ wget -qO- https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc | sudo tee -a /etc/apt/trusted.gpg.d/cran_ubuntu_key.asc
+ sudo add-apt-repository "deb https://cloud.r-project.org/bin/linux/ubuntu $(lsb_release -cs)-cran40/"
+ sudo apt-get update -qq
+ sudo apt-get upgrade -y
+ sudo apt-get install -qq libxml2-dev libcurl4-openssl-dev libssl-dev libgsl-dev libgit2-dev r-base -y
+ sudo apt-get install -qq libharfbuzz-dev libfribidi-dev libmagick++-dev xml-twig-tools -y
+ sudo R -q -e "install.packages(c('devtools','covr','fields','meta','metafor','ggplot2','gridExtra','data.table','DSI','DSOpal','DSLite','MolgenisAuth','MolgenisArmadillo','DSMolgenisArmadillo','DescTools','e1071'), repos='https://cloud.r-project.org')"
+ sudo R -q -e "devtools::install_github(repo='datashield/dsDangerClient', ref=Sys.getenv('BRANCH_NAME'))"
+
+ - uses: r-lib/actions/setup-r-dependencies@v2
+ with:
+ dependencies: 'c("Imports")'
+ extra-packages: |
+ any::rcmdcheck
+ cran::devtools
+ cran::git2r
+ cran::RCurl
+ cran::readr
+ cran::magrittr
+ cran::xml2
+ cran::purrr
+ cran::dplyr
+ cran::stringr
+ cran::tidyr
+ cran::quarto
+ cran::knitr
+ cran::kableExtra
+ cran::rmarkdown
+ cran::downlit
+ needs: check
+
+ - name: Check manual updated
+ run: |
+ orig_sum=$(find man -type f | sort -u | xargs cat | md5sum)
+ R -q -e "devtools::document()"
+ new_sum=$(find man -type f | sort -u | xargs cat | md5sum)
+ if [ "$orig_sum" != "$new_sum" ]; then
+ echo "Your committed man/*.Rd files are out of sync with the R headers."
+ exit 1
+ fi
+ working-directory: dsBaseClient
+ continue-on-error: true
+
+ - name: Devtools checks
+ run: |
+ R -q -e "devtools::check(args = c('--no-examples', '--no-tests'))" | tee azure-pipelines_check.Rout
+ grep --quiet "^0 errors" azure-pipelines_check.Rout && grep --quiet " 0 warnings" azure-pipelines_check.Rout && grep --quiet " 0 notes" azure-pipelines_check.Rout
+ working-directory: dsBaseClient
+ continue-on-error: true
+
+ - name: Start Armadillo docker-compose
+ run: docker compose -f docker-compose_armadillo.yml up -d --build
+ working-directory: dsBaseClient
+
+ - name: Install test datasets
+ run: |
+ sleep 60
+ R -q -f "molgenis_armadillo-upload_testing_datasets.R"
+ working-directory: dsBaseClient/tests/testthat/data_files
+
+ - name: Install dsBase to Armadillo
+ run: |
+ curl -u admin:admin -X GET http://localhost:8080/packages
+ curl -u admin:admin -H 'Content-Type: multipart/form-data' -F "file=@dsBase_6.3.5-permissive.tar.gz" -X POST http://localhost:8080/install-package
+ sleep 60
+ docker restart dsbaseclient-armadillo-1
+ sleep 30
+ curl -u admin:admin -X POST http://localhost:8080/whitelist/dsBase
+ working-directory: dsBaseClient
+
+ - name: Run tests with coverage & JUnit report
+ run: |
+ mkdir -p logs
+ R -q -e "devtools::reload();"
+ R -q -e '
+ write.csv(
+ covr::coverage_to_list(
+ covr::package_coverage(
+ type = c("none"),
+ code = c('"'"'
+ output_file <- file("test_console_output.txt");
+ sink(output_file);
+ sink(output_file, type = "message");
+ junit_rep <- testthat::JunitReporter$new(file = file.path(getwd(), "test_results.xml"));
+ progress_rep <- testthat::ProgressReporter$new(max_failures = 999999);
+ multi_rep <- testthat::MultiReporter$new(reporters = list(progress_rep, junit_rep));
+ options("datashield.return_errors" = FALSE, "default_driver" = "ArmadilloDriver");
+ testthat::test_package("${{ env.PROJECT_NAME }}", filter = "${{ env.TEST_FILTER }}", reporter = multi_rep, stop_on_failure = FALSE)'"'"'
+ )
+ )
+ ),
+ "coveragelist.csv"
+ )'
+
+ mv coveragelist.csv logs/
+ mv test_* logs/
+ working-directory: dsBaseClient
+
+ - name: Check for JUnit errors
+ run: |
+ issue_count=$(sed 's/failures="0" errors="0"//' test_results.xml | grep -c errors= || true)
+ echo "Number of testsuites with issues: $issue_count"
+ sed 's/failures="0" errors="0"//' test_results.xml | grep errors= > issues.log || true
+ cat issues.log || true
+ # continue with workflow even when some tests fail
+ exit 0
+ working-directory: dsBaseClient/logs
+
+ - name: Write versions to file
+ run: |
+ echo "branch:${{ env.BRANCH_NAME }}" > ${{ env.WORKFLOW_ID }}.txt
+ echo "os:$(lsb_release -ds)" >> ${{ env.WORKFLOW_ID }}.txt
+ echo "R:$(R --version | head -n1)" >> ${{ env.WORKFLOW_ID }}.txt
+ Rscript --vanilla -e 'sessionInfo()' >> session_info_${{ env.WORKFLOW_ID }}.txt
+ working-directory: dsBaseClient/logs
+
+ - name: Parse results from testthat and covr
+ run: |
+ Rscript --verbose --vanilla ../testStatus/source/parse_test_report.R logs/ logs/ https://github.com/datashield/${{ env.PROJECT_NAME }}/blob/${{ env.BRANCH_NAME }} '([^:]+)' '(?<=::)[^:]+(?=::)'
+ working-directory: dsBaseClient
+ env:
+ PROJECT_NAME: ${{ env.PROJECT_NAME }}
+ BRANCH_NAME: ${{ env.BRANCH_NAME }}
+
+ - name: Render report
+ run: |
+ cd testStatus
+
+ mkdir -p new/logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
+ mkdir -p new/docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
+ mkdir -p new/docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/latest/
+
+ # Copy logs to new logs directory location
+ cp -rv ../dsBaseClient/logs/* new/logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
+ cp -rv ../dsBaseClient/logs/${{ env.WORKFLOW_ID }}.txt new/logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
+
+ R -e 'input_dir <- file.path("../new/logs", Sys.getenv("PROJECT_NAME"), Sys.getenv("BRANCH_NAME"), Sys.getenv("WORKFLOW_ID")); quarto::quarto_render("source/test_report.qmd", execute_params = list(input_dir = input_dir))'
+ mv source/test_report.html new/docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/index.html
+ cp -r new/docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/* new/docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/latest
+
+ env:
+ PROJECT_NAME: ${{ env.PROJECT_NAME }}
+ BRANCH_NAME: ${{ env.BRANCH_NAME }}
+ WORKFLOW_ID: ${{ env.WORKFLOW_ID }}
+
+ - name: Upload test logs
+ uses: actions/upload-artifact@v4
+ with:
+ name: dsbaseclient-logs
+ path: testStatus/new
+
+ - name: Dump environment info
+ run: |
+ echo -e "\n#############################"
+ echo -e "ls /: ######################"
+ ls -al .
+ echo -e "\n#############################"
+ echo -e "lscpu: ######################"
+ lscpu
+ echo -e "\n#############################"
+ echo -e "memory: #####################"
+ free -m
+ echo -e "\n#############################"
+ echo -e "env: ########################"
+ env
+ echo -e "\n#############################"
+ echo -e "R sessionInfo(): ############"
+ R -e 'sessionInfo()'
+ sudo apt install tree -y
+ tree .
diff --git a/DESCRIPTION b/DESCRIPTION
index cc110b7f7..e0f278a52 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,11 +1,11 @@
Package: dsBaseClient
Title: 'DataSHIELD' Client Side Base Functions
-Version: 6.3.4
+Version: 6.3.5
Description: Base 'DataSHIELD' functions for the client side. 'DataSHIELD' is a software package which allows
you to do non-disclosive federated analysis on sensitive data. 'DataSHIELD' analytic functions have
been designed to only share non disclosive summary statistics, with built in automated output
checking based on statistical disclosure control. With data sites setting the threshold values for
- the automated output checks. For more details, see 'citation("dsBaseClient")'.
+ the automated output checks. For more details, see citation('dsBaseClient').
Authors@R: c(person(given = "Paul",
family = "Burton",
role = c("aut"),
@@ -36,10 +36,6 @@ Authors@R: c(person(given = "Paul",
family = "Avraam",
role = c("aut"),
comment = c(ORCID = "0000-0001-8908-2441")),
- person(given = "Demetris",
- family = "Avraam",
- role = c("aut"),
- comment = c(ORCID = "0000-0001-8908-2441")),
person(given = "Yannick",
family = "Marcon",
role = c("aut"),
diff --git a/NAMESPACE b/NAMESPACE
index ec905eb6e..289592525 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,6 +1,5 @@
# Generated by roxygen2: do not edit by hand
-export(computeWeightedMeans)
export(ds.Boole)
export(ds.abs)
export(ds.asCharacter)
@@ -73,6 +72,7 @@ export(ds.matrixDimnames)
export(ds.matrixInvert)
export(ds.matrixMult)
export(ds.matrixTranspose)
+export(ds.mdPattern)
export(ds.mean)
export(ds.meanByClass)
export(ds.meanSdGp)
@@ -118,6 +118,7 @@ export(ds.unList)
export(ds.unique)
export(ds.var)
export(ds.vectorCalc)
+export(subsetHelper)
import(DSI)
import(data.table)
importFrom(stats,as.formula)
diff --git a/R/computeWeightedMeans.R b/R/computeWeightedMeans.R
index 0f04fc915..1284ffc08 100644
--- a/R/computeWeightedMeans.R
+++ b/R/computeWeightedMeans.R
@@ -9,9 +9,11 @@
#' @param variables character name of the variable(s) to focus on. The variables must be in the data.table
#' @param weight character name of the data.table column that contains a weight.
#' @param by character vector of the columns to group by
+#' @return Returns a data table object with computed weighted means.
+#'
#' @import data.table
#' @importFrom stats as.formula na.omit ts weighted.mean
-#' @export
+#' @keywords internal
computeWeightedMeans <- function(data_table, variables, weight, by) {
if (is.null(weight)) {
diff --git a/R/ds.asFactor.R b/R/ds.asFactor.R
index 476f00f85..8e5fbd090 100644
--- a/R/ds.asFactor.R
+++ b/R/ds.asFactor.R
@@ -48,7 +48,7 @@
#' \code{baseline.level = 1} and \code{forced.factor.levels = c(1,2,3,4,5)}.
#' The input vector is converted to the following matrix of dummy variables:
#'
-#' \tabular{rrrrr}{
+#' \tabular{rrrr}{
#' \strong{DV2} \tab \strong{DV3} \tab \strong{DV4} \tab \strong{DV5} \cr
#' 0 \tab 0 \tab 0 \tab 0\cr
#' 1 \tab 0 \tab 0 \tab 0\cr
diff --git a/R/ds.boxPlot.R b/R/ds.boxPlot.R
index d89c54709..7d86a79b1 100644
--- a/R/ds.boxPlot.R
+++ b/R/ds.boxPlot.R
@@ -21,7 +21,7 @@
#' ## Version 6, for version 5 see the Wiki
#'
#' ### Please ensure you have a training Virtual Machine running,
-#' or that you have a live connection to a server.
+#' # or that you have a live connection to a server.
#'
#' # Connecting to the Opal servers
#'
@@ -48,37 +48,37 @@
#' symbol = "D")
#'
#' ## Create a boxplot of one variable
-#' ds.boxPlot("D", "LAB_HDL", datasources = connections)
+#' ds.boxPlot("D", "LAB_HDL", datasources = connections)
#'
#' ## Create a boxplot that is split by study:
-#' ds.boxPlot("D", "LAB_HDL", type= "split", datasources = connections)
+#' ds.boxPlot("D", "LAB_HDL", type= "split", datasources = connections)
#'
#' ## Create a boxplot of two variables variable
-#' ds.boxPlot("D", c("LAB_HDL", "LAB_TRIG", type="pooled",
-#' datasources = connections)
+#' ds.boxPlot("D", c("LAB_HDL", "LAB_TRIG"), type="pooled",
+#' datasources = connections)
#' # only one plot is created (of the aggregated results of all servers)
#'
#' ## Create a boxplot of two variables, which are split by a factor
-#' ds.boxPlot("D", c("LAB_HDL", "LAB_TRIG"), group = "GENDER",
+#' ds.boxPlot("D", c("LAB_HDL", "LAB_TRIG"), group = "GENDER",
#' datasources = connections)
#'
#' ## Create a boxplot with x- and y-axis labels
-#' ds.boxPlot("D", c("LAB_HDL", "LAB_TRIG"), group = "GENDER",
+#' ds.boxPlot("D", c("LAB_HDL", "LAB_TRIG"), group = "GENDER",
#' xlabel = "Variable", ylabel = "Measurement", datasources = connections)
#'
#' ## Improve the presentation of ds.boxplot output using ggplot:
#' ### User must save the output, which is in a ggplot format already:
-#' a <- ds.boxPlot("D", c("LAB_HDL", "LAB_TRIG"), group = "GENDER",
+#' a <- ds.boxPlot("D", c("LAB_HDL", "LAB_TRIG"), group = "GENDER",
#' xlabel = "Variable", ylabel = "Measurement", datasources = connections)
#'
#' ### Then customise output "a" using ggplot tools:
#' a + ggplot2::scale_fill_discrete(name = "Gender", labels = c("Male", "Female"))
#'
#' ### Or use an alternative way, to maintain the aesthetics:
-#' a + ggplot2::scale_fill_brewer(name = "Gender", labels = c("Male", "Female"))
+#' a + ggplot2::scale_fill_brewer(name = "Gender", labels = c("Male", "Female"))
#'
#' # Clear the Datashield R sessions and logout
-#' datashield.logout(connections)
+#' datashield.logout(connections)
#'
#' }
#'
diff --git a/R/ds.cbind.R b/R/ds.cbind.R
index d943e0175..e21cb961c 100644
--- a/R/ds.cbind.R
+++ b/R/ds.cbind.R
@@ -157,7 +157,7 @@ ds.cbind <- function(x=NULL, DataSHIELD.checks=FALSE, force.colnames=NULL, newob
}
colNames <- unlist(colNames)
if(anyDuplicated(colNames) != 0){
- cat("\n Warning: Some column names in study", j, "are duplicated and a suffix '.k' will be added to the kth replicate \n")
+ message("\n Warning: Some column names in study", j, "are duplicated and a suffix '.k' will be added to the kth replicate \n")
}
}
}
@@ -198,7 +198,7 @@ ds.cbind <- function(x=NULL, DataSHIELD.checks=FALSE, force.colnames=NULL, newob
next.class <- DSI::datashield.aggregate(datasources[std], calltext1)
class.vector <- c(class.vector, next.class[[1]])
if (notify.of.progress){
- cat("\n",j," of ", length(x), " elements to combine in step 1 of 2 in study ", std, "\n")
+ message("\n",j," of ", length(x), " elements to combine in step 1 of 2 in study ", std, "\n")
}
}
for(j in 1:length(x)){
@@ -206,14 +206,14 @@ ds.cbind <- function(x=NULL, DataSHIELD.checks=FALSE, force.colnames=NULL, newob
if(class.vector[j]!="data.frame" && class.vector[j]!="matrix"){
colname.vector <- c(colname.vector, test.df)
if (notify.of.progress){
- cat("\n",j," of ", length(x), " elements to combine in step 2 of 2 in study ", std, "\n")
+ message("\n",j," of ", length(x), " elements to combine in step 2 of 2 in study ", std, "\n")
}
}else{
calltext2 <- call('colnamesDS', test.df)
df.names <- DSI::datashield.aggregate(datasources[std], calltext2)
colname.vector <- c(colname.vector, df.names[[1]])
if (notify.of.progress){
- cat("\n", j," of ", length(x), " elements to combine in step 2 of 2 in study ", std, "\n")
+ message("\n", j," of ", length(x), " elements to combine in step 2 of 2 in study ", std, "\n")
}
}
}
@@ -221,7 +221,7 @@ ds.cbind <- function(x=NULL, DataSHIELD.checks=FALSE, force.colnames=NULL, newob
}
if (notify.of.progress){
- cat("\nBoth steps in all studies completed\n")
+ message("\nBoth steps in all studies completed\n")
}
# prepare name vectors for transmission
diff --git a/R/ds.colnames.R b/R/ds.colnames.R
index a4b98b1ad..a9e802523 100644
--- a/R/ds.colnames.R
+++ b/R/ds.colnames.R
@@ -1,51 +1,51 @@
#'
#' @title Produces column names of the R object in the server-side
-#' @description Retrieves column names of an R object on the server-side.
+#' @description Retrieves column names of an R object on the server-side.
#' This function is similar to R function \code{colnames}.
-#' @details The input is restricted to the object of type \code{data.frame} or \code{matrix}.
-#'
+#' @details The input is restricted to the object of type \code{data.frame} or \code{matrix}.
+#'
#' Server function called: \code{colnamesDS}
#' @param x a character string providing the name of the input data frame or matrix.
-#' @param datasources a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login.
+#' @param datasources a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login.
#' If the \code{datasources} argument is not specified
#' the default set of connections will be used: see \code{\link[DSI]{datashield.connections_default}}.
-#' @return \code{ds.colnames} returns the column names of
-#' the specified server-side data frame or matrix.
+#' @return \code{ds.colnames} returns the column names of
+#' the specified server-side data frame or matrix.
#' @author DataSHIELD Development Team
#' @seealso \code{\link{ds.dim}} to obtain the dimensions of a matrix or a data frame.
-#' @examples
+#' @examples
#' \dontrun{
-#'
+#'
#' ## Version 6, for version 5 see the Wiki
#' # Connecting to the Opal servers
-#'
+#'
#' require('DSI')
#' require('DSOpal')
#' require('dsBaseClient')
-#'
+#'
#' builder <- DSI::newDSLoginBuilder()
-#' builder$append(server = "study1",
-#' url = "http://192.168.56.100:8080/",
-#' user = "administrator", password = "datashield_test&",
+#' builder$append(server = "study1",
+#' url = "http://192.168.56.100:8080/",
+#' user = "administrator", password = "datashield_test&",
#' table = "CNSIM.CNSIM1", driver = "OpalDriver")
-#' builder$append(server = "study2",
-#' url = "http://192.168.56.100:8080/",
-#' user = "administrator", password = "datashield_test&",
+#' builder$append(server = "study2",
+#' url = "http://192.168.56.100:8080/",
+#' user = "administrator", password = "datashield_test&",
#' table = "CNSIM.CNSIM2", driver = "OpalDriver")
#' builder$append(server = "study3",
-#' url = "http://192.168.56.100:8080/",
-#' user = "administrator", password = "datashield_test&",
+#' url = "http://192.168.56.100:8080/",
+#' user = "administrator", password = "datashield_test&",
#' table = "CNSIM.CNSIM3", driver = "OpalDriver")
#' logindata <- builder$build()
-#'
+#'
#' # Log onto the remote Opal training servers
-#' connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D")
-#'
+#' connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D")
+#'
#' # Getting column names of the R objects stored in the server-side
#' ds.colnames(x = "D",
#' datasources = connections[1]) #only the first server ("study1") is used
#' # Clear the Datashield R sessions and logout
-#' datashield.logout(connections)
+#' datashield.logout(connections)
#' }
#' @export
#'
@@ -65,17 +65,6 @@ ds.colnames <- function(x=NULL, datasources=NULL) {
stop("Please provide the name of a data.frame or matrix!", call.=FALSE)
}
- # check if the input object(s) is(are) defined in all the studies
- defined <- isDefined(datasources, x)
-
- # call the internal function that checks the input object is of the same class in all studies.
- typ <- checkClass(datasources, x)
-
- # if the input object is not a matrix or a dataframe stop
- if(!('data.frame' %in% typ) & !('matrix' %in% typ)){
- stop("The input vector must be of type 'data.frame' or a 'matrix'!", call.=FALSE)
- }
-
cally <- call("colnamesDS", x)
column_names <- DSI::datashield.aggregate(datasources, cally)
diff --git a/R/ds.contourPlot.R b/R/ds.contourPlot.R
index 4e195e48b..f1fbb3bd8 100644
--- a/R/ds.contourPlot.R
+++ b/R/ds.contourPlot.R
@@ -120,6 +120,10 @@ ds.contourPlot <- function(x=NULL, y=NULL, type='combine', show='all', numints=2
stop("y=NULL. Please provide the names of two numeric vectors!", call.=FALSE)
}
+ # Save par and setup reseting of par values
+ old_par <- graphics::par(no.readonly = TRUE)
+ on.exit(graphics::par(old_par), add = TRUE)
+
# check if the input objects are defined in all the studies
isDefined(datasources, x)
isDefined(datasources, y)
diff --git a/R/ds.dataFrame.R b/R/ds.dataFrame.R
index 5837747c6..eeddcdd90 100644
--- a/R/ds.dataFrame.R
+++ b/R/ds.dataFrame.R
@@ -137,7 +137,7 @@ ds.dataFrame <- function(x=NULL, row.names=NULL, check.rows=FALSE, check.names=T
}
colNames <- unlist(colNames)
if(anyDuplicated(colNames) != 0){
- cat("\n Warning: Some column names in study", j, "are duplicated and a suffix '.k' will be added to the kth replicate \n")
+ message("\n Warning: Some column names in study", j, "are duplicated and a suffix '.k' will be added to the kth replicate \n")
}
}
}
@@ -178,7 +178,7 @@ ds.dataFrame <- function(x=NULL, row.names=NULL, check.rows=FALSE, check.names=T
next.class <- DSI::datashield.aggregate(datasources[std], calltext1)
class.vector <- c(class.vector, next.class[[1]])
if (notify.of.progress){
- cat("\n",j," of ", length(x), " elements to combine in step 1 of 2 in study ", std, "\n")
+ message("\n",j," of ", length(x), " elements to combine in step 1 of 2 in study ", std, "\n")
}
}
for(j in 1:length(x)){
@@ -186,14 +186,14 @@ ds.dataFrame <- function(x=NULL, row.names=NULL, check.rows=FALSE, check.names=T
if(class.vector[j]!="data.frame" && class.vector[j]!="matrix"){
colname.vector <- c(colname.vector, test.df)
if (notify.of.progress){
- cat("\n",j," of ", length(x), " elements to combine in step 2 of 2 in study ", std, "\n")
+ message("\n",j," of ", length(x), " elements to combine in step 2 of 2 in study ", std, "\n")
}
}else{
calltext2 <- call('colnamesDS', test.df)
df.names <- DSI::datashield.aggregate(datasources[std], calltext2)
colname.vector <- c(colname.vector, df.names[[1]])
if (notify.of.progress){
- cat("\n", j," of ", length(x), " elements to combine in step 2 of 2 in study ", std, "\n")
+ message("\n", j," of ", length(x), " elements to combine in step 2 of 2 in study ", std, "\n")
}
}
}
@@ -201,7 +201,7 @@ ds.dataFrame <- function(x=NULL, row.names=NULL, check.rows=FALSE, check.names=T
}
if (notify.of.progress){
- cat("\nBoth steps in all studies completed\n")
+ message("\nBoth steps in all studies completed\n")
}
# prepare vectors for transmission
diff --git a/R/ds.dataFrameSubset.R b/R/ds.dataFrameSubset.R
index 1c5ff6a00..1ae6278db 100644
--- a/R/ds.dataFrameSubset.R
+++ b/R/ds.dataFrameSubset.R
@@ -231,13 +231,13 @@ if(!is.null(rm.cols)){
if (notify.of.progress)
{
if(num.messages==1){
- cat("\nSource",s,"\n",return.warning.message[[s]][[1]],"\n")
+ message("\nSource",s,"\n",return.warning.message[[s]][[1]],"\n")
}else{
- cat("\nSource",s,"\n")
+ message("\nSource",s,"\n")
for(m in 1:(num.messages-1)){
- cat(return.warning.message[[s]][[m]],"\n")
+ message(return.warning.message[[s]][[m]],"\n")
}
- cat(return.warning.message[[s]][[num.messages]],"\n")
+ message(return.warning.message[[s]][[num.messages]],"\n")
}
}
}
diff --git a/R/ds.densityGrid.R b/R/ds.densityGrid.R
index b0766418a..fbd5b909b 100644
--- a/R/ds.densityGrid.R
+++ b/R/ds.densityGrid.R
@@ -66,11 +66,11 @@
#' # Example2: generate a grid density object for each study separately
#' ds.densityGrid(x="D$LAB_TSC",
#' y="D$LAB_HDL",
-#' type="split"
+#' type="split",
#' datasources = connections[1])#only the first Opal server is used ("study1")
#'
#' # Example3: generate a grid density object where the number of intervals is set to 15, for
-#' each study separately
+#' # each study separately
#' ds.densityGrid(x="D$LAB_TSC",
#' y="D$LAB_HDL",
#' type="split",
diff --git a/R/ds.dmtC2S.R b/R/ds.dmtC2S.R
index ee2e4be67..085d198fb 100644
--- a/R/ds.dmtC2S.R
+++ b/R/ds.dmtC2S.R
@@ -93,7 +93,7 @@ if(dplyr::is.tbl(dfdata))
if(!is.matrix(dfdata) && !is.data.frame(dfdata) && !dplyr::is.tbl(dfdata))
{
- cat("\n FAILED: Burton P, Wilson R, Butters O, Ryser-Welch P, Westerberg A, Abarrategui L, Villegas-Diaz R, Avraam D, Avraam D, Marcon Y, Bishop T, Gaye A, Escribà-Montagut X, Wheater S (2025).
+ Burton P, Wilson R, Butters O, Ryser-Welch P, Westerberg A, Abarrategui L, Villegas-Diaz R, Avraam D, Marcon Y, Bishop T, Gaye A, Escribà-Montagut X, Wheater S (????).
dsBaseClient: 'DataSHIELD' Client Side Base Functions.
-R package version 6.3.4.
+R package version 6.3.5.
Gaye A, Marcon Y, Isaeva J, LaFlamme P, Turner A, Jones E, Minion J, Boyd A, Newby C, Nuotio M, Wilson R, Butters O, Murtagh B, Demir I, Doiron D, Giepmans L, Wallace S, Budin-Ljøsne I, Schmidt C, Boffetta P, Boniol M, Bota M, Carter K, deKlerk N, Dibben C, Francis R, Hiekkalinna T, Hveem K, Kvaløy K, Millar S, Perry I, Peters A, Phillips C, Popham F, Raab G, Reischl E, Sheehan N, Waldenberger M, Perola M, van den Heuvel E, Macleod J, Knoppers B, Stolk R, Fortier I, Harris J, Woffenbuttel B, Murtagh M, Ferretti V, Burton P (2014).
“DataSHIELD: taking the analysis to the data, not the data to the analysis.”
@@ -168,11 +164,11 @@ Internal function to pool md.pattern results from multiple studies Pooled pattern matrixPage not found (404)
diff --git a/docs/LICENSE.html b/docs/LICENSE.html
index b495f5487..b18449a17 100644
--- a/docs/LICENSE.html
+++ b/docs/LICENSE.html
@@ -17,7 +17,7 @@
@@ -256,11 +256,11 @@ NA
diff --git a/docs/authors.html b/docs/authors.html
index 177847f60..827877cec 100644
--- a/docs/authors.html
+++ b/docs/authors.html
@@ -17,7 +17,7 @@
@@ -71,10 +71,6 @@ Authors and Citation
- Citation
- @Manual{,
title = {dsBaseClient: 'DataSHIELD' Client Side Base Functions},
- author = {Paul Burton and Rebecca Wilson and Olly Butters and Patricia Ryser-Welch and Alex Westerberg and Leire Abarrategui and Roberto Villegas-Diaz and Demetris Avraam and Demetris Avraam and Yannick Marcon and Tom Bishop and Amadou Gaye and Xavier Escribà-Montagut and Stuart Wheater},
- note = {R package version 6.3.4},
+ author = {Paul Burton and Rebecca Wilson and Olly Butters and Patricia Ryser-Welch and Alex Westerberg and Leire Abarrategui and Roberto Villegas-Diaz and Demetris Avraam and Yannick Marcon and Tom Bishop and Amadou Gaye and Xavier Escribà-Montagut and Stuart Wheater},
+ note = {R package version 6.3.5},
}
Citation
diff --git a/docs/index.html b/docs/index.html
index cc91e1494..b3d66be0a 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -12,7 +12,7 @@
-
+
Pool missing data patterns across studies
+
+ dot-pool_md_patterns.Rd.pool_md_patterns(patterns_list, study_names)Arguments
+
+
+Value
+ Examples
diff --git a/docs/reference/ds.abs.html b/docs/reference/ds.abs.html
index d986834af..3ca2945fd 100644
--- a/docs/reference/ds.abs.html
+++ b/docs/reference/ds.abs.html
@@ -18,7 +18,7 @@
@@ -150,11 +150,11 @@ Examples
diff --git a/docs/reference/ds.asCharacter.html b/docs/reference/ds.asCharacter.html
index 9b2b1fdaf..1ea041a6b 100644
--- a/docs/reference/ds.asCharacter.html
+++ b/docs/reference/ds.asCharacter.html
@@ -18,7 +18,7 @@
@@ -132,11 +132,11 @@ Examples
diff --git a/docs/reference/ds.asDataMatrix.html b/docs/reference/ds.asDataMatrix.html
index eb7b66e38..43145cc63 100644
--- a/docs/reference/ds.asDataMatrix.html
+++ b/docs/reference/ds.asDataMatrix.html
@@ -18,7 +18,7 @@
@@ -135,11 +135,11 @@ Examples
diff --git a/docs/reference/ds.asFactor.html b/docs/reference/ds.asFactor.html
index a2db6d261..7c3ea953c 100644
--- a/docs/reference/ds.asFactor.html
+++ b/docs/reference/ds.asFactor.html
@@ -17,7 +17,7 @@
@@ -144,7 +144,7 @@ Details
If we set the argument fixed.dummy.vars = TRUE,
baseline.level = 1 and forced.factor.levels = c(1,2,3,4,5).
The input vector is converted to the following matrix of dummy variables:
| DV2 | DV3 | DV4 | DV5 | 0 |
| 0 | 0 | 0 | 1 | 0 |
| 0 | 0 | 0 | 0 | 0 |
| 0 | 0 | 1 | 0 | 0 |
| 0 | 0 | 1 | 0 | 0 |
| 0 | 1 | 0 | 0 | 0 |
| 0 | 0 | 0 | 1 | 0 |
| 0 | 0 | 0 | 1 | 0 |
For the same example if the baseline.level = 3 then the matrix is:
| DV2 | DV3 | DV4 | DV5 |
| 0 | 0 | 0 | 0 |
| 1 | 0 | 0 | 0 |
| 0 | 0 | 0 | 0 |
| 0 | 1 | 0 | 0 |
| 0 | 0 | 1 | 0 |
| 0 | 0 | 1 | 0 |
| 0 | 0 | 0 | 0 |
| 0 | 1 | 0 | 0 |
| 0 | 0 | 1 | 0 |
| 0 | 0 | 0 | 1 |
For the same example if the baseline.level = 3 then the matrix is:
| DV1 | DV2 | DV4 | DV5 |
| 1 | 0 | 0 | 0 |
| 0 | 1 | 0 | 0 |
| 1 | 0 | 0 | 0 |
| 0 | 0 | 0 | 0 |
| 0 | 0 | 1 | 0 |
| 0 | 0 | 1 | 0 |
| 1 | 0 | 0 | 0 |
| 0 | 0 | 0 | 0 |
| 0 | 0 | 1 | 0 |
| 0 | 0 | 0 | 1 |
In the first instance the first row of the matrix has zeros in all entries indicating
that the first data point belongs to level 1 (as the baseline level is equal to 1).
The second row has 1 at the first (DV2) column and zeros elsewhere,
@@ -229,11 +229,11 @@
if (FALSE) { # \dontrun{
+ ## Version 6, for version 5 see the Wiki
+
+ ### Please ensure you have a training Virtual Machine running,
+ # or that you have a live connection to a server.
+
+ # Connecting to the Opal servers
+
+ require('DSI')
+ require('DSOpal')
+ require('dsBaseClient')
+
+ builder <- DSI::newDSLoginBuilder()
+ builder$append(server = "study1",
+ url = "http://192.168.56.100:8080/",
+ user = "administrator", password = "datashield_test&",
+ table = "CNSIM.CNSIM1", driver = "OpalDriver")
+ builder$append(server = "study2",
+ url = "http://192.168.56.100:8080/",
+ user = "administrator", password = "datashield_test&",
+ table = "CNSIM.CNSIM2", driver = "OpalDriver")
+ builder$append(server = "study3",
+ url = "http://192.168.56.100:8080/",
+ user = "administrator", password = "datashield_test&",
+ table = "CNSIM.CNSIM3", driver = "OpalDriver")
+ logindata <- builder$build()
+
+ connections <- DSI::datashield.login(logins = logindata, assign = TRUE,
+ symbol = "D")
+
+ ## Create a boxplot of one variable
+ ds.boxPlot("D", "LAB_HDL", datasources = connections)
+
+ ## Create a boxplot that is split by study:
+ ds.boxPlot("D", "LAB_HDL", type= "split", datasources = connections)
+
+ ## Create a boxplot of two variables variable
+ ds.boxPlot("D", c("LAB_HDL", "LAB_TRIG"), type="pooled",
+ datasources = connections)
+ # only one plot is created (of the aggregated results of all servers)
+
+ ## Create a boxplot of two variables, which are split by a factor
+ ds.boxPlot("D", c("LAB_HDL", "LAB_TRIG"), group = "GENDER",
+ datasources = connections)
+
+ ## Create a boxplot with x- and y-axis labels
+ ds.boxPlot("D", c("LAB_HDL", "LAB_TRIG"), group = "GENDER",
+ xlabel = "Variable", ylabel = "Measurement", datasources = connections)
+
+ ## Improve the presentation of ds.boxplot output using ggplot:
+ ### User must save the output, which is in a ggplot format already:
+ a <- ds.boxPlot("D", c("LAB_HDL", "LAB_TRIG"), group = "GENDER",
+ xlabel = "Variable", ylabel = "Measurement", datasources = connections)
+
+ ### Then customise output "a" using ggplot tools:
+ a + ggplot2::scale_fill_discrete(name = "Gender", labels = c("Male", "Female"))
+
+ ### Or use an alternative way, to maintain the aesthetics:
+ a + ggplot2::scale_fill_brewer(name = "Gender", labels = c("Male", "Female"))
+
+ # Clear the Datashield R sessions and logout
+ datashield.logout(connections)
+
+} # }
+
+if (FALSE) { # \dontrun{
+
+ ## Version 6, for version 5 see the Wiki
+ # Connecting to the Opal servers
+
+ require('DSI')
+ require('DSOpal')
+ require('dsBaseClient')
+
+ builder <- DSI::newDSLoginBuilder()
+ builder$append(server = "study1",
+ url = "http://192.168.56.100:8080/",
+ user = "administrator", password = "datashield_test&",
+ table = "CNSIM.CNSIM1", driver = "OpalDriver")
+ builder$append(server = "study2",
+ url = "http://192.168.56.100:8080/",
+ user = "administrator", password = "datashield_test&",
+ table = "CNSIM.CNSIM2", driver = "OpalDriver")
+ builder$append(server = "study3",
+ url = "http://192.168.56.100:8080/",
+ user = "administrator", password = "datashield_test&",
+ table = "CNSIM.CNSIM3", driver = "OpalDriver")
+ logindata <- builder$build()
+
+ # Log onto the remote Opal training servers
+ connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D")
+
+ #Generate the density grid
+ # Example1: generate a combined grid density object (default)
+ ds.densityGrid(x="D$LAB_TSC",
+ y="D$LAB_HDL",
+ datasources = connections)#all opal servers are used
+
+ # Example2: generate a grid density object for each study separately
+ ds.densityGrid(x="D$LAB_TSC",
+ y="D$LAB_HDL",
+ type="split",
+ datasources = connections[1])#only the first Opal server is used ("study1")
+
+ # Example3: generate a grid density object where the number of intervals is set to 15, for
+ # each study separately
+ ds.densityGrid(x="D$LAB_TSC",
+ y="D$LAB_HDL",
+ type="split",
+ numints=15,
+ datasources = connections)
+
+ # clear the Datashield R sessions and logout
+ datashield.logout(connections)
+
+} # }
+
+?meta::metagen for the different options.
+ Results a foresplot object created with `meta::forest`.
+if (FALSE) { # \dontrun{
+
+## Version 6, for version 5 see the Wiki
+ # Connecting to the Opal servers
+
+ require('DSI')
+ require('DSOpal')
+ require('dsBaseClient')
+
+ builder <- DSI::newDSLoginBuilder()
+ builder$append(server = "study1",
+ url = "http://192.168.56.100:8080/",
+ user = "administrator", password = "datashield_test&",
+ table = "CNSIM.CNSIM1", driver = "OpalDriver")
+ builder$append(server = "study2",
+ url = "http://192.168.56.100:8080/",
+ user = "administrator", password = "datashield_test&",
+ table = "CNSIM.CNSIM2", driver = "OpalDriver")
+ builder$append(server = "study3",
+ url = "http://192.168.56.100:8080/",
+ user = "administrator", password = "datashield_test&",
+ table = "CNSIM.CNSIM3", driver = "OpalDriver")
+ logindata <- builder$build()
+
+ # Log onto the remote Opal training servers
+ connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D")
+
+ # Compute the heat map plot
+ # Example 1: Plot a combined (default) heat map plot of the variables 'LAB_TSC'
+ # and 'LAB_HDL' using the method 'smallCellsRule' (default)
+ ds.heatmapPlot(x = 'D$LAB_TSC',
+ y = 'D$LAB_HDL',
+ datasources = connections) #all servers are used
+
+ # Example 2: Plot a split heat map plot of the variables 'LAB_TSC'
+ # and 'LAB_HDL' using the method 'smallCellsRule' (default)
+ ds.heatmapPlot(x = 'D$LAB_TSC',
+ y = 'D$LAB_HDL',
+ method = 'smallCellsRule',
+ type = 'split',
+ datasources = connections[1]) #only the first server is used (study1)
+
+ # Example 3: Plot a combined heat map plot using the method 'deterministic' centroids of each
+ # k = 7 nearest neighbours for numints = 40
+ ds.heatmapPlot(x = 'D$LAB_TSC',
+ y = 'D$LAB_HDL',
+ numints = 40,
+ method = 'deterministic',
+ k = 7,
+ type = 'split',
+ datasources = connections[2]) #only the second server is used (study2)
+
+
+ # clear the Datashield R sessions and logout
+ datashield.logout(connections)
+
+} # }
+
+if (FALSE) { # \dontrun{
+
+## Version 6, for version 5 see the Wiki
+ # Connecting to the Opal servers
+
+ require('DSI')
+ require('DSOpal')
+ require('dsBaseClient')
+
+ builder <- DSI::newDSLoginBuilder()
+ builder$append(server = "study1",
+ url = "http://192.168.56.100:8080/",
+ user = "administrator", password = "datashield_test&",
+ table = "CNSIM.CNSIM1", driver = "OpalDriver")
+ builder$append(server = "study2",
+ url = "http://192.168.56.100:8080/",
+ user = "administrator", password = "datashield_test&",
+ table = "CNSIM.CNSIM2", driver = "OpalDriver")
+ builder$append(server = "study3",
+ url = "http://192.168.56.100:8080/",
+ user = "administrator", password = "datashield_test&",
+ table = "CNSIM.CNSIM3", driver = "OpalDriver")
+ logindata <- builder$build()
+
+ # Log onto the remote Opal training servers
+ connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D")
+
+ # Compute the histogram
+ # Example 1: generate a histogram for each study separately
+ ds.histogram(x = 'D$PM_BMI_CONTINUOUS',
+ type = "split",
+ datasources = connections) #all studies are used
+
+ # Example 2: generate a combined histogram with the default small cells counts
+ # suppression rule
+ ds.histogram(x = 'D$PM_BMI_CONTINUOUS',
+ method = 'smallCellsRule',
+ type = 'combine',
+ datasources = connections[1]) #only the first study is used (study1)
+
+ # Example 3: if a variable is of type factor the function returns an error
+ ds.histogram(x = 'D$PM_BMI_CATEGORICAL',
+ datasources = connections)
+
+ # Example 4: generate a combined histogram with the deterministic method for k=50
+ ds.histogram(x = 'D$PM_BMI_CONTINUOUS',
+ k = 50,
+ method = 'deterministic',
+ type = 'combine',
+ datasources = connections[2])#only the second study is used (study2)
+
+
+ # Example 5: create a histogram and the probability density on the plot
+ hist <- ds.histogram(x = 'D$PM_BMI_CONTINUOUS',
+ method = 'probabilistic', type='combine',
+ num.breaks = 30,
+ vertical.axis = 'Density',
+ datasources = connections)
+ lines(hist$mids, hist$density)
+
+ # clear the Datashield R sessions and logout
+ datashield.logout(connections)
+ } # }
+
+
+if (FALSE) { # \dontrun{
+
+ ## Version 6, for version 5 see the Wiki
+
+ # connecting to the Opal servers
+
+ require('DSI')
+ require('DSOpal')
+ require('dsBaseClient')
+
+ builder <- DSI::newDSLoginBuilder()
+ builder$append(server = "study1",
+ url = "http://192.168.56.100:8080/",
+ user = "administrator", password = "datashield_test&",
+ table = "CNSIM.CNSIM1", driver = "OpalDriver")
+ builder$append(server = "study2",
+ url = "http://192.168.56.100:8080/",
+ user = "administrator", password = "datashield_test&",
+ table = "CNSIM.CNSIM2", driver = "OpalDriver")
+ builder$append(server = "study3",
+ url = "http://192.168.56.100:8080/",
+ user = "administrator", password = "datashield_test&",
+ table = "CNSIM.CNSIM3", driver = "OpalDriver")
+ logindata <- builder$build()
+
+ connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D")
+
+ #Example 1: Create a square matrix with the server-side vector as its diagonal
+ #and all the other values = 0
+
+ # Create a vector in the server-side
+
+ ds.rUnif(samp.size = 9,
+ min = -10.5,
+ max = 10.5,
+ newobj = "ss.vector.9",
+ seed.as.integer = 5575,
+ force.output.to.k.decimal.places = 0,
+ datasources = connections)
+
+
+ #Calculate the diagonal of the matrix
+
+ ds.matrixDiag(x1 = "ss.vector.9",
+ aim = "serverside.vector.2.matrix",
+ nrows.scalar = NULL,
+ newobj = "matrix.diag1",
+ datasources = connections)
+
+ #Example 2: Create a square matrix with the server-side scalar as all diagonal values
+ #and all the other values = 0
+
+ #Create a scalar in the server-side
+
+ ds.rUnif(samp.size = 1,
+ min = -10.5,
+ max = 10.5,
+ newobj = "ss.scalar",
+ seed.as.integer = 5575,
+ force.output.to.k.decimal.places = 0,
+ datasources = connections)
+
+ #Calculate the diagonal of the matrix
+
+ ds.matrixDiag(x1 = "ss.scalar",
+ aim = "serverside.scalar.2.matrix",
+ nrows.scalar = 4,
+ newobj = "matrix.diag2",
+ datasources = connections)
+
+ #Example 3: Create a vector that contains the server-side matrix diagonal values
+
+ #Create a matrix in the server-side
+
+ ds.matrix(mdata = 10,
+ from = "clientside.scalar",
+ nrows.scalar = 3,
+ ncols.scalar = 8,
+ newobj = "ss.matrix",
+ datasources = connections)
+
+ #Extract the diagonal of the matrix
+
+ ds.matrixDiag(x1 = "ss.matrix",
+ aim = "serverside.matrix.2.vector",
+ nrows.scalar = NULL,
+ newobj = "vector.diag3",
+ datasources = connections)
+
+ #Example 4: Create a square matrix with the client-side vector as a diagonal
+ #and all the other values = 0
+
+ ds.matrixDiag(x1 = c(2,6,9,10),
+ aim = "clientside.vector.2.matrix",
+ nrows.scalar = NULL,
+ newobj = "matrix.diag4",
+ datasources = connections)
+
+ #Example 5: Create a square matrix with the client-side scalar as all diagonal values
+ #and all the other values = 0
+
+ ds.matrixDiag(x1 = 4,
+ aim = "clientside.scalar.2.matrix",
+ nrows.scalar = 5,
+ newobj = "matrix.diag5",
+ datasources = connections)
+
+
+ # clear the Datashield R sessions and logout
+ datashield.logout(connections)
+} # }
+ds.mdPattern.RdThis function is a client-side wrapper for the server-side mdPatternDS +function. It generates a missing data pattern matrix similar to mice::md.pattern but +with disclosure control applied to prevent revealing small cell counts.
+ds.mdPattern(x = NULL, type = "split", datasources = NULL)a character string specifying the name of a data frame or matrix on the +server-side containing the data to analyze.
a character string specifying the output type. If 'split' (default), +returns separate patterns for each study. If 'combine', attempts to pool patterns +across studies.
a list of DSConnection-class objects obtained
+after login. If the datasources argument is not specified, the default set of
+connections will be used: see datashield.connections_default.
For type='split': A list with one element per study, each containing:
The missing data pattern matrix for that study
Logical indicating if all patterns meet disclosure requirements
A message describing the validity status
For type='combine': A list containing:
The pooled missing data pattern matrix across all studies
Logical indicating if all pooled patterns meet disclosure requirements
A message describing the validity status
The function calls the server-side mdPatternDS function which uses +mice::md.pattern to analyze missing data patterns. Patterns with counts below the +disclosure threshold (default: nfilter.tab = 3) are suppressed to maintain privacy.
+Output Format: +- Each row represents a missing data pattern +- Pattern counts are shown in row names (e.g., "150", "25") +- Columns show 1 if the variable is observed, 0 if missing +- Last column shows the total number of missing values per pattern +- Last row shows the total number of missing values per variable
+Disclosure Control:
+Suppressed patterns (count below threshold) are indicated by: +- Row name: "suppressed(<N>)" where N is the threshold +- All pattern values set to NA +- Summary row also suppressed to prevent back-calculation
+Pooling Behavior (type='combine'):
+When pooling across studies, the function uses a conservative approach +for disclosure control:
+1. Identifies identical missing patterns across studies +2. EXCLUDES suppressed patterns from pooling - patterns suppressed in + ANY study are not included in the pooled count +3. Sums counts only for non-suppressed identical patterns +4. Re-validates pooled counts against disclosure threshold
+Important: This conservative approach means: +- Pooled counts may be underestimates if some studies had suppressed patterns +- This prevents disclosure through subtraction (e.g., if study A shows count=5 + and pool shows count=7, one could deduce study B has count=2, violating disclosure) +- Different patterns across studies are preserved separately in the pooled result
+if (FALSE) { # \dontrun{
+ ## Version 6, for version 5 see the Wiki
+
+ # Connecting to the Opal servers
+
+ require('DSI')
+ require('DSOpal')
+ require('dsBaseClient')
+
+ builder <- DSI::newDSLoginBuilder()
+ builder$append(server = "study1",
+ url = "http://192.168.56.100:8080/",
+ user = "administrator", password = "datashield_test&",
+ table = "CNSIM.CNSIM1", driver = "OpalDriver")
+ builder$append(server = "study2",
+ url = "http://192.168.56.100:8080/",
+ user = "administrator", password = "datashield_test&",
+ table = "CNSIM.CNSIM2", driver = "OpalDriver")
+ logindata <- builder$build()
+
+ connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D")
+
+ # Get missing data patterns for each study separately
+ patterns_split <- ds.mdPattern(x = "D", type = "split", datasources = connections)
+
+ # View results for study1
+ print(patterns_split$study1$pattern)
+ # var1 var2 var3
+ # 150 1 1 1 0 <- 150 obs complete
+ # 25 0 1 1 1 <- 25 obs missing var1
+ # 25 0 0 25 <- Summary: 25 missing per variable
+
+ # Get pooled missing data patterns across studies
+ patterns_pooled <- ds.mdPattern(x = "D", type = "combine", datasources = connections)
+ print(patterns_pooled$pattern)
+
+ # Example with suppressed patterns:
+ # If study1 has a pattern with count=2 (suppressed) and study2 has same pattern
+ # with count=5 (valid), the pooled result will show count=5 (conservative approach)
+ # A warning will indicate: "Pooled counts may underestimate the true total"
+
+ # Clear the Datashield R sessions and logout
+ datashield.logout(connections)
+} # }
+
+if (FALSE) { # \dontrun{
+
+ ## Version 6, for version 5 see the Wiki
+
+ # connecting to the Opal servers
+
+ require('DSI')
+ require('DSOpal')
+ require('dsBaseClient')
+
+ builder <- DSI::newDSLoginBuilder()
+ builder$append(server = "study1",
+ url = "http://192.168.56.100:8080/",
+ user = "administrator", password = "datashield_test&",
+ table = "CNSIM.CNSIM1", driver = "OpalDriver")
+ builder$append(server = "study2",
+ url = "http://192.168.56.100:8080/",
+ user = "administrator", password = "datashield_test&",
+ table = "CNSIM.CNSIM2", driver = "OpalDriver")
+ builder$append(server = "study3",
+ url = "http://192.168.56.100:8080/",
+ user = "administrator", password = "datashield_test&",
+ table = "CNSIM.CNSIM3", driver = "OpalDriver")
+ logindata <- builder$build()
+
+ connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D")
+
+ # Example 1: recode the levels of D$GENDER
+ ds.recodeValues(var.name = "D$GENDER",
+ values2replace.vector = c(0,1),
+ new.values.vector = c(10,20),
+ newobj = 'gender_recoded',
+ datasources = connections)
+
+ # Example 2: recode NAs in D$PM_BMI_CATEGORICAL
+ ds.recodeValues(var.name = "D$PM_BMI_CATEGORICAL",
+ values2replace.vector = c(1,2),
+ new.values.vector = c(1,2),
+ missing = 99,
+ newobj = 'bmi_recoded',
+ datasources = connections)
+
+ # Clear the Datashield R sessions and logout
+ datashield.logout(connections)
+
+} # }
+if (FALSE) { # \dontrun{
+
+ # load the file that contains the login details
+ data(logindata)
+
+ # login and assign all the variables to R
+ conns <- datashield.login(logins=logindata,assign=TRUE)
+
+ # Example 1: generate a two dimensional table, outputting combined contingency
+ # tables - default behaviour
+ output <- ds.table2D(x='D$DIS_DIAB', y='D$GENDER')
+ # display the 5 results items, one at a time to avoid having too much information
+ # displayed at the same time
+ output$counts
+ output$rowPercent
+ output$colPercent
+ output$chi2Test
+ output$validity
+
+ # Example 2: generate a two dimensional table, outputting study specific contingency tables
+ ds.table2D(x='D$DIS_DIAB', y='D$GENDER', type='split')
+ # display the 5 results items, one at a time to avoid having too much information displayed
+ # at the same time
+ output$counts
+ output$rowPercent
+ output$colPercent
+ output$chi2Test
+ output$validity
+
+ # Example 3: generate a two dimensional table, outputting combined contingency tables
+ # *** this example shows what happens when one or studies return an invalid table ***
+ output <- ds.table2D(x='D$DIS_CVA', y='D$GENDER', type='combine')
+ output$counts
+ output$rowPercent
+ output$colPercent
+ output$chi2Test
+ output$validity
+
+ # Example 4: same example as above but output is given for each study,
+ # separately (i.e. type='split')
+ # *** this example shows what happens when one or studies return an invalid table ***
+ output <- ds.table2D(x='D$DIS_CVA', y='D$GENDER', type='split')
+ output$counts
+ output$rowPercent
+ output$colPercent
+ output$chi2Test
+ output$validity
+
+ # clear the Datashield R sessions and logout
+ datashield.logout(conns)
+
+} # }
+
+Compute Weighted Mean by Group
Converts a server-side R object into Boolean indicators
ds.matrixTranspose()
Transposes a server-side matrix
Display missing data patterns with disclosure control
Developed by Paul Burton, Rebecca Wilson, Olly Butters, Patricia Ryser-Welch, Alex Westerberg, Leire Abarrategui, Roberto Villegas-Diaz, Demetris Avraam, Demetris Avraam, Yannick Marcon, Tom Bishop, Amadou Gaye, Xavier Escribà-Montagut, Stuart Wheater.
+Developed by Paul Burton, Rebecca Wilson, Olly Butters, Patricia Ryser-Welch, Alex Westerberg, Leire Abarrategui, Roberto Villegas-Diaz, Demetris Avraam, Yannick Marcon, Tom Bishop, Amadou Gaye, Xavier Escribà-Montagut, Stuart Wheater.