diff --git a/.Rbuildignore b/.Rbuildignore
index 819b13428..f0d597f85 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -19,6 +19,7 @@
^docs$
^dsBase_6.4.0.tar.gz$
^dsBase_6.4.0-permissive.tar.gz$
-^dsDanger_6.3.1.tar.gz$
+^dsDanger_6.3.4.tar.gz$
^\.circleci$
^\.circleci/config\.yml$
+^\.github$
diff --git a/.github/.gitignore b/.github/.gitignore
new file mode 100644
index 000000000..2d19fc766
--- /dev/null
+++ b/.github/.gitignore
@@ -0,0 +1 @@
+*.html
diff --git a/.github/workflows/dsBaseClient_test_suite.yaml b/.github/workflows/dsBaseClient_test_suite.yaml
new file mode 100644
index 000000000..17e13f1dc
--- /dev/null
+++ b/.github/workflows/dsBaseClient_test_suite.yaml
@@ -0,0 +1,247 @@
+################################################################################
+# DataSHIELD GHA test suite - dsBaseClient
+# Adapted from `armadillo_azure-pipelines.yml` by Roberto Villegas-Diaz
+#
+# Inside the root directory $(Pipeline.Workspace) will be a file tree like:
+# /dsBaseClient <- Checked out version of datashield/dsBaseClient
+# /dsBaseClient/logs <- Where results of tests and logs are collated
+# /testStatus <- Checked out version of datashield/testStatus
+#
+# As of Sept. 2025 this takes ~ 95 mins to run.
+################################################################################
+name: dsBaseClient tests' suite
+
+on:
+ push:
+ schedule:
+ - cron: '0 0 * * 6' # Weekly (on Saturdays @ 0.00)
+
+jobs:
+ dsBaseClient_test_suite:
+ runs-on: ubuntu-latest
+ timeout-minutes: 180
+ permissions:
+ contents: read
+
+ # These should all be constant, except TEST_FILTER. This can be used to test
+ # subsets of test files in the testthat directory. Options are like:
+ # '*' <- Run all tests.
+ # 'asNumericDS*' <- Run all asNumericDS tests, i.e. all the arg, etc. tests.
+ # '*_smk_*' <- Run all the smoke tests for all functions.
+ env:
+ TEST_FILTER: '_-|datachk-|smk-|arg-|disc-|perf-|smk_expt-|expt-|math-'
+ _r_check_system_clock_: 0
+ WORKFLOW_ID: ${{ github.run_id }}-${{ github.run_attempt }}
+ PROJECT_NAME: dsBaseClient
+ BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
+ REPO_OWNER: ${{ github.repository_owner }}
+ R_KEEP_PKG_SOURCE: yes
+ GITHUB_TOKEN: ${{ github.token || 'placeholder-token' }}
+
+ steps:
+ - name: Checkout dsBaseClient
+ uses: actions/checkout@v4
+ with:
+ path: dsBaseClient
+
+ - name: Checkout testStatus
+ if: ${{ github.actor != 'nektos/act' }} # for local deployment only
+ uses: actions/checkout@v4
+ with:
+ repository: ${{ env.REPO_OWNER }}/testStatus
+ ref: master
+ path: testStatus
+ persist-credentials: false
+ token: ${{ env.GITHUB_TOKEN }}
+
+ - name: Uninstall default MySQL
+ run: |
+ curl https://bazel.build/bazel-release.pub.gpg | sudo apt-key add -
+ sudo service mysql stop || true
+ sudo apt-get update
+ sudo apt-get remove --purge mysql-client mysql-server mysql-common -y
+ sudo apt-get autoremove -y
+ sudo apt-get autoclean -y
+ sudo rm -rf /var/lib/mysql/
+
+ - uses: r-lib/actions/setup-pandoc@v2
+
+ - uses: r-lib/actions/setup-r@v2
+ with:
+ r-version: release
+ http-user-agent: release
+ use-public-rspm: true
+
+ - name: Install R and dependencies
+ run: |
+ sudo apt-get install --no-install-recommends software-properties-common dirmngr -y
+ wget -qO- https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc | sudo tee -a /etc/apt/trusted.gpg.d/cran_ubuntu_key.asc
+ sudo add-apt-repository "deb https://cloud.r-project.org/bin/linux/ubuntu $(lsb_release -cs)-cran40/"
+ sudo apt-get update -qq
+ sudo apt-get upgrade -y
+ sudo apt-get install -qq libxml2-dev libcurl4-openssl-dev libssl-dev libgsl-dev libgit2-dev r-base -y
+ sudo apt-get install -qq libharfbuzz-dev libfribidi-dev libmagick++-dev xml-twig-tools -y
+ sudo R -q -e "install.packages(c('devtools','covr','fields','meta','metafor','ggplot2','gridExtra','data.table','DSI','DSOpal','DSLite','MolgenisAuth','MolgenisArmadillo','DSMolgenisArmadillo','DescTools','e1071'), repos='https://cloud.r-project.org')"
+ sudo R -q -e "devtools::install_github(repo='datashield/dsDangerClient', ref=Sys.getenv('BRANCH_NAME'))"
+
+ - uses: r-lib/actions/setup-r-dependencies@v2
+ with:
+ dependencies: 'c("Imports")'
+ extra-packages: |
+ any::rcmdcheck
+ cran::devtools
+ cran::git2r
+ cran::RCurl
+ cran::readr
+ cran::magrittr
+ cran::xml2
+ cran::purrr
+ cran::dplyr
+ cran::stringr
+ cran::tidyr
+ cran::quarto
+ cran::knitr
+ cran::kableExtra
+ cran::rmarkdown
+ cran::downlit
+ needs: check
+
+ - name: Check manual updated
+ run: |
+ orig_sum=$(find man -type f | sort -u | xargs cat | md5sum)
+ R -q -e "devtools::document()"
+ new_sum=$(find man -type f | sort -u | xargs cat | md5sum)
+ if [ "$orig_sum" != "$new_sum" ]; then
+ echo "Your committed man/*.Rd files are out of sync with the R headers."
+ exit 1
+ fi
+ working-directory: dsBaseClient
+ continue-on-error: true
+
+ - name: Devtools checks
+ run: |
+ R -q -e "devtools::check(args = c('--no-examples', '--no-tests'))" | tee azure-pipelines_check.Rout
+ grep --quiet "^0 errors" azure-pipelines_check.Rout && grep --quiet " 0 warnings" azure-pipelines_check.Rout && grep --quiet " 0 notes" azure-pipelines_check.Rout
+ working-directory: dsBaseClient
+ continue-on-error: true
+
+ - name: Start Armadillo docker-compose
+ run: docker compose -f docker-compose_armadillo.yml up -d --build
+ working-directory: dsBaseClient
+
+ - name: Install test datasets
+ run: |
+ sleep 60
+ R -q -f "molgenis_armadillo-upload_testing_datasets.R"
+ working-directory: dsBaseClient/tests/testthat/data_files
+
+ - name: Install dsBase to Armadillo
+ run: |
+ curl -u admin:admin -X GET http://localhost:8080/packages
+ curl -u admin:admin -H 'Content-Type: multipart/form-data' -F "file=@dsBase_6.3.5-permissive.tar.gz" -X POST http://localhost:8080/install-package
+ sleep 60
+ docker restart dsbaseclient-armadillo-1
+ sleep 30
+ curl -u admin:admin -X POST http://localhost:8080/whitelist/dsBase
+ working-directory: dsBaseClient
+
+ - name: Run tests with coverage & JUnit report
+ run: |
+ mkdir -p logs
+ R -q -e "devtools::reload();"
+ R -q -e '
+ write.csv(
+ covr::coverage_to_list(
+ covr::package_coverage(
+ type = c("none"),
+ code = c('"'"'
+ output_file <- file("test_console_output.txt");
+ sink(output_file);
+ sink(output_file, type = "message");
+ junit_rep <- testthat::JunitReporter$new(file = file.path(getwd(), "test_results.xml"));
+ progress_rep <- testthat::ProgressReporter$new(max_failures = 999999);
+ multi_rep <- testthat::MultiReporter$new(reporters = list(progress_rep, junit_rep));
+ options("datashield.return_errors" = FALSE, "default_driver" = "ArmadilloDriver");
+ testthat::test_package("${{ env.PROJECT_NAME }}", filter = "${{ env.TEST_FILTER }}", reporter = multi_rep, stop_on_failure = FALSE)'"'"'
+ )
+ )
+ ),
+ "coveragelist.csv"
+ )'
+
+ mv coveragelist.csv logs/
+ mv test_* logs/
+ working-directory: dsBaseClient
+
+ - name: Check for JUnit errors
+ run: |
+ issue_count=$(sed 's/failures="0" errors="0"//' test_results.xml | grep -c errors= || true)
+ echo "Number of testsuites with issues: $issue_count"
+ sed 's/failures="0" errors="0"//' test_results.xml | grep errors= > issues.log || true
+ cat issues.log || true
+ # continue with workflow even when some tests fail
+ exit 0
+ working-directory: dsBaseClient/logs
+
+ - name: Write versions to file
+ run: |
+ echo "branch:${{ env.BRANCH_NAME }}" > ${{ env.WORKFLOW_ID }}.txt
+ echo "os:$(lsb_release -ds)" >> ${{ env.WORKFLOW_ID }}.txt
+ echo "R:$(R --version | head -n1)" >> ${{ env.WORKFLOW_ID }}.txt
+ Rscript --vanilla -e 'sessionInfo()' >> session_info_${{ env.WORKFLOW_ID }}.txt
+ working-directory: dsBaseClient/logs
+
+ - name: Parse results from testthat and covr
+ run: |
+ Rscript --verbose --vanilla ../testStatus/source/parse_test_report.R logs/ logs/ https://github.com/datashield/${{ env.PROJECT_NAME }}/blob/${{ env.BRANCH_NAME }} '([^:]+)' '(?<=::)[^:]+(?=::)'
+ working-directory: dsBaseClient
+ env:
+ PROJECT_NAME: ${{ env.PROJECT_NAME }}
+ BRANCH_NAME: ${{ env.BRANCH_NAME }}
+
+ - name: Render report
+ run: |
+ cd testStatus
+
+ mkdir -p new/logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
+ mkdir -p new/docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
+ mkdir -p new/docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/latest/
+
+ # Copy logs to new logs directory location
+ cp -rv ../dsBaseClient/logs/* new/logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
+ cp -rv ../dsBaseClient/logs/${{ env.WORKFLOW_ID }}.txt new/logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
+
+ R -e 'input_dir <- file.path("../new/logs", Sys.getenv("PROJECT_NAME"), Sys.getenv("BRANCH_NAME"), Sys.getenv("WORKFLOW_ID")); quarto::quarto_render("source/test_report.qmd", execute_params = list(input_dir = input_dir))'
+ mv source/test_report.html new/docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/index.html
+ cp -r new/docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/* new/docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/latest
+
+ env:
+ PROJECT_NAME: ${{ env.PROJECT_NAME }}
+ BRANCH_NAME: ${{ env.BRANCH_NAME }}
+ WORKFLOW_ID: ${{ env.WORKFLOW_ID }}
+
+ - name: Upload test logs
+ uses: actions/upload-artifact@v4
+ with:
+ name: dsbaseclient-logs
+ path: testStatus/new
+
+ - name: Dump environment info
+ run: |
+ echo -e "\n#############################"
+ echo -e "ls /: ######################"
+ ls -al .
+ echo -e "\n#############################"
+ echo -e "lscpu: ######################"
+ lscpu
+ echo -e "\n#############################"
+ echo -e "memory: #####################"
+ free -m
+ echo -e "\n#############################"
+ echo -e "env: ########################"
+ env
+ echo -e "\n#############################"
+ echo -e "R sessionInfo(): ############"
+ R -e 'sessionInfo()'
+ sudo apt install tree -y
+ tree .
diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
new file mode 100644
index 000000000..bfc9f4db3
--- /dev/null
+++ b/.github/workflows/pkgdown.yaml
@@ -0,0 +1,49 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+ push:
+ branches: [main, master]
+ pull_request:
+ release:
+ types: [published]
+ workflow_dispatch:
+
+name: pkgdown.yaml
+
+permissions: read-all
+
+jobs:
+ pkgdown:
+ runs-on: ubuntu-latest
+ # Only restrict concurrency for non-PR jobs
+ concurrency:
+ group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
+ env:
+ GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+ permissions:
+ contents: write
+ steps:
+ - uses: actions/checkout@v4
+
+ - uses: r-lib/actions/setup-pandoc@v2
+
+ - uses: r-lib/actions/setup-r@v2
+ with:
+ use-public-rspm: true
+
+ - uses: r-lib/actions/setup-r-dependencies@v2
+ with:
+ extra-packages: any::pkgdown, local::.
+ needs: website
+
+ - name: Build site
+ run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
+ shell: Rscript {0}
+
+ - name: Deploy to GitHub pages đ
+ if: github.event_name != 'pull_request'
+ uses: JamesIves/github-pages-deploy-action@v4.5.0
+ with:
+ clean: false
+ branch: gh-pages
+ folder: docs
diff --git a/DESCRIPTION b/DESCRIPTION
index 882df32ca..0a3e7fbb5 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,23 +1,27 @@
Package: dsBaseClient
-Title: DataSHIELD Client Functions
+Title: 'DataSHIELD' Client Side Base Functions
Version: 6.4.0.9000
-Description: Base DataSHIELD functions for the client side. DataSHIELD is a software package which allows
- you to do non-disclosive federated analysis on sensitive data. DataSHIELD analytic functions have
+Description: Base 'DataSHIELD' functions for the client side. 'DataSHIELD' is a software package which allows
+ you to do non-disclosive federated analysis on sensitive data. 'DataSHIELD' analytic functions have
been designed to only share non disclosive summary statistics, with built in automated output
checking based on statistical disclosure control. With data sites setting the threshold values for
- the automated output checks.
+ the automated output checks. For more details, see citation("dsBaseClient").
Authors@R: c(person(given = "Paul",
family = "Burton",
- role = c("aut")),
+ role = c("aut"),
+ comment = c(ORCID = "0000-0001-5799-9634")),
person(given = "Rebecca",
family = "Wilson",
- role = c("aut")),
+ role = c("aut"),
+ comment = c(ORCID = "0000-0003-2294-593X")),
person(given = "Olly",
family = "Butters",
- role = c("aut")),
+ role = c("aut"),
+ comment = c(ORCID = "0000-0003-0354-8461")),
person(given = "Patricia",
family = "Ryser-Welch",
- role = c("aut")),
+ role = c("aut"),
+ comment = c(ORCID = "0000-0002-0070-0264")),
person(given = "Alex",
family = "Westerberg",
role = c("aut")),
@@ -37,6 +41,17 @@ Authors@R: c(person(given = "Paul",
role = c("aut"),
email = "yannick.marcon@obiba.org",
comment = c(ORCID = "0000-0003-0138-2023")),
+ person(given = "Tom",
+ family = "Bishop",
+ role = c("aut")),
+ person(given = "Amadou",
+ family = "Gaye",
+ role = c("aut"),
+ comment = c(ORCID = "0000-0002-1180-2792")),
+ person(given = "Xavier",
+ family = "EscribĂ -Montagut",
+ role = c("aut"),
+ comment = c(ORCID = "0000-0003-2888-8948")),
person(given = "Stuart",
family = "Wheater",
role = c("aut", "cre"),
@@ -58,6 +73,7 @@ Imports:
Suggests:
lme4,
httr,
+ spelling,
tibble,
testthat,
e1071,
@@ -65,5 +81,6 @@ Suggests:
DSOpal,
DSMolgenisArmadillo,
DSLite
-RoxygenNote: 7.3.2
+RoxygenNote: 7.3.3
Encoding: UTF-8
+Language: en-GB
diff --git a/NAMESPACE b/NAMESPACE
index d737d5e6a..3bfe6e6e5 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,6 +1,5 @@
# Generated by roxygen2: do not edit by hand
-export(computeWeightedMeans)
export(ds.Boole)
export(ds.abs)
export(ds.asCharacter)
@@ -73,6 +72,7 @@ export(ds.matrixDimnames)
export(ds.matrixInvert)
export(ds.matrixMult)
export(ds.matrixTranspose)
+export(ds.mdPattern)
export(ds.mean)
export(ds.meanByClass)
export(ds.meanSdGp)
@@ -104,6 +104,8 @@ export(ds.seq)
export(ds.setSeed)
export(ds.skewness)
export(ds.sqrt)
+export(ds.subset)
+export(ds.subsetByClass)
export(ds.summary)
export(ds.table)
export(ds.table1D)
diff --git a/R/computeWeightedMeans.R b/R/computeWeightedMeans.R
index 0f04fc915..1284ffc08 100644
--- a/R/computeWeightedMeans.R
+++ b/R/computeWeightedMeans.R
@@ -9,9 +9,11 @@
#' @param variables character name of the variable(s) to focus on. The variables must be in the data.table
#' @param weight character name of the data.table column that contains a weight.
#' @param by character vector of the columns to group by
+#' @return Returns a data table object with computed weighted means.
+#'
#' @import data.table
#' @importFrom stats as.formula na.omit ts weighted.mean
-#' @export
+#' @keywords internal
computeWeightedMeans <- function(data_table, variables, weight, by) {
if (is.null(weight)) {
diff --git a/R/ds.asFactor.R b/R/ds.asFactor.R
index 476f00f85..8e5fbd090 100644
--- a/R/ds.asFactor.R
+++ b/R/ds.asFactor.R
@@ -48,7 +48,7 @@
#' \code{baseline.level = 1} and \code{forced.factor.levels = c(1,2,3,4,5)}.
#' The input vector is converted to the following matrix of dummy variables:
#'
-#' \tabular{rrrrr}{
+#' \tabular{rrrr}{
#' \strong{DV2} \tab \strong{DV3} \tab \strong{DV4} \tab \strong{DV5} \cr
#' 0 \tab 0 \tab 0 \tab 0\cr
#' 1 \tab 0 \tab 0 \tab 0\cr
diff --git a/R/ds.asNumeric.R b/R/ds.asNumeric.R
index 7b4da435e..3e2b445fa 100644
--- a/R/ds.asNumeric.R
+++ b/R/ds.asNumeric.R
@@ -5,7 +5,7 @@
#' @details This function is based on the native R function \code{as.numeric}.
#' However, it behaves differently with some specific classes of variables. For example, if the input
#' object is of class factor, it first converts its values into characters and then convert those to
-#' numerics. This behavior is important for the case where the input object is of class factor having
+#' numerics. This behaviour is important for the case where the input object is of class factor having
#' numbers as levels. In that case, the native R
#' \code{as.numeric} function returns the underlying level codes and not the values as numbers.
#' For example \code{as.numeric} in R converts the factor vector: \cr
diff --git a/R/ds.cbind.R b/R/ds.cbind.R
index d943e0175..e21cb961c 100644
--- a/R/ds.cbind.R
+++ b/R/ds.cbind.R
@@ -157,7 +157,7 @@ ds.cbind <- function(x=NULL, DataSHIELD.checks=FALSE, force.colnames=NULL, newob
}
colNames <- unlist(colNames)
if(anyDuplicated(colNames) != 0){
- cat("\n Warning: Some column names in study", j, "are duplicated and a suffix '.k' will be added to the kth replicate \n")
+ message("\n Warning: Some column names in study", j, "are duplicated and a suffix '.k' will be added to the kth replicate \n")
}
}
}
@@ -198,7 +198,7 @@ ds.cbind <- function(x=NULL, DataSHIELD.checks=FALSE, force.colnames=NULL, newob
next.class <- DSI::datashield.aggregate(datasources[std], calltext1)
class.vector <- c(class.vector, next.class[[1]])
if (notify.of.progress){
- cat("\n",j," of ", length(x), " elements to combine in step 1 of 2 in study ", std, "\n")
+ message("\n",j," of ", length(x), " elements to combine in step 1 of 2 in study ", std, "\n")
}
}
for(j in 1:length(x)){
@@ -206,14 +206,14 @@ ds.cbind <- function(x=NULL, DataSHIELD.checks=FALSE, force.colnames=NULL, newob
if(class.vector[j]!="data.frame" && class.vector[j]!="matrix"){
colname.vector <- c(colname.vector, test.df)
if (notify.of.progress){
- cat("\n",j," of ", length(x), " elements to combine in step 2 of 2 in study ", std, "\n")
+ message("\n",j," of ", length(x), " elements to combine in step 2 of 2 in study ", std, "\n")
}
}else{
calltext2 <- call('colnamesDS', test.df)
df.names <- DSI::datashield.aggregate(datasources[std], calltext2)
colname.vector <- c(colname.vector, df.names[[1]])
if (notify.of.progress){
- cat("\n", j," of ", length(x), " elements to combine in step 2 of 2 in study ", std, "\n")
+ message("\n", j," of ", length(x), " elements to combine in step 2 of 2 in study ", std, "\n")
}
}
}
@@ -221,7 +221,7 @@ ds.cbind <- function(x=NULL, DataSHIELD.checks=FALSE, force.colnames=NULL, newob
}
if (notify.of.progress){
- cat("\nBoth steps in all studies completed\n")
+ message("\nBoth steps in all studies completed\n")
}
# prepare name vectors for transmission
diff --git a/R/ds.colnames.R b/R/ds.colnames.R
index a4b98b1ad..a9e802523 100644
--- a/R/ds.colnames.R
+++ b/R/ds.colnames.R
@@ -1,51 +1,51 @@
#'
#' @title Produces column names of the R object in the server-side
-#' @description Retrieves column names of an R object on the server-side.
+#' @description Retrieves column names of an R object on the server-side.
#' This function is similar to R function \code{colnames}.
-#' @details The input is restricted to the object of type \code{data.frame} or \code{matrix}.
-#'
+#' @details The input is restricted to the object of type \code{data.frame} or \code{matrix}.
+#'
#' Server function called: \code{colnamesDS}
#' @param x a character string providing the name of the input data frame or matrix.
-#' @param datasources a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login.
+#' @param datasources a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login.
#' If the \code{datasources} argument is not specified
#' the default set of connections will be used: see \code{\link[DSI]{datashield.connections_default}}.
-#' @return \code{ds.colnames} returns the column names of
-#' the specified server-side data frame or matrix.
+#' @return \code{ds.colnames} returns the column names of
+#' the specified server-side data frame or matrix.
#' @author DataSHIELD Development Team
#' @seealso \code{\link{ds.dim}} to obtain the dimensions of a matrix or a data frame.
-#' @examples
+#' @examples
#' \dontrun{
-#'
+#'
#' ## Version 6, for version 5 see the Wiki
#' # Connecting to the Opal servers
-#'
+#'
#' require('DSI')
#' require('DSOpal')
#' require('dsBaseClient')
-#'
+#'
#' builder <- DSI::newDSLoginBuilder()
-#' builder$append(server = "study1",
-#' url = "http://192.168.56.100:8080/",
-#' user = "administrator", password = "datashield_test&",
+#' builder$append(server = "study1",
+#' url = "http://192.168.56.100:8080/",
+#' user = "administrator", password = "datashield_test&",
#' table = "CNSIM.CNSIM1", driver = "OpalDriver")
-#' builder$append(server = "study2",
-#' url = "http://192.168.56.100:8080/",
-#' user = "administrator", password = "datashield_test&",
+#' builder$append(server = "study2",
+#' url = "http://192.168.56.100:8080/",
+#' user = "administrator", password = "datashield_test&",
#' table = "CNSIM.CNSIM2", driver = "OpalDriver")
#' builder$append(server = "study3",
-#' url = "http://192.168.56.100:8080/",
-#' user = "administrator", password = "datashield_test&",
+#' url = "http://192.168.56.100:8080/",
+#' user = "administrator", password = "datashield_test&",
#' table = "CNSIM.CNSIM3", driver = "OpalDriver")
#' logindata <- builder$build()
-#'
+#'
#' # Log onto the remote Opal training servers
-#' connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D")
-#'
+#' connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D")
+#'
#' # Getting column names of the R objects stored in the server-side
#' ds.colnames(x = "D",
#' datasources = connections[1]) #only the first server ("study1") is used
#' # Clear the Datashield R sessions and logout
-#' datashield.logout(connections)
+#' datashield.logout(connections)
#' }
#' @export
#'
@@ -65,17 +65,6 @@ ds.colnames <- function(x=NULL, datasources=NULL) {
stop("Please provide the name of a data.frame or matrix!", call.=FALSE)
}
- # check if the input object(s) is(are) defined in all the studies
- defined <- isDefined(datasources, x)
-
- # call the internal function that checks the input object is of the same class in all studies.
- typ <- checkClass(datasources, x)
-
- # if the input object is not a matrix or a dataframe stop
- if(!('data.frame' %in% typ) & !('matrix' %in% typ)){
- stop("The input vector must be of type 'data.frame' or a 'matrix'!", call.=FALSE)
- }
-
cally <- call("colnamesDS", x)
column_names <- DSI::datashield.aggregate(datasources, cally)
diff --git a/R/ds.contourPlot.R b/R/ds.contourPlot.R
index 4e195e48b..f1fbb3bd8 100644
--- a/R/ds.contourPlot.R
+++ b/R/ds.contourPlot.R
@@ -120,6 +120,10 @@ ds.contourPlot <- function(x=NULL, y=NULL, type='combine', show='all', numints=2
stop("y=NULL. Please provide the names of two numeric vectors!", call.=FALSE)
}
+ # Save par and setup reseting of par values
+ old_par <- graphics::par(no.readonly = TRUE)
+ on.exit(graphics::par(old_par), add = TRUE)
+
# check if the input objects are defined in all the studies
isDefined(datasources, x)
isDefined(datasources, y)
diff --git a/R/ds.corTest.R b/R/ds.corTest.R
index 38fffceb9..3c9e42a81 100644
--- a/R/ds.corTest.R
+++ b/R/ds.corTest.R
@@ -10,7 +10,7 @@
#' used for the test. One of "pearson", "kendall", or "spearman", can be abbreviated.
#' Default is set to "pearson".
#' @param exact a logical indicating whether an exact p-value should be computed. Used for
-#' Kendall's tau and Spearman's rho. See âDetailsâ of R stats function \code{cor.test} for
+#' Kendall's tau and Spearman's rho. See \emph{Details} of R stats function \code{cor.test} for
#' the meaning of NULL (the default).
#' @param conf.level confidence level for the returned confidence interval. Currently
#' only used for the Pearson product moment correlation coefficient if there are at least
diff --git a/R/ds.dataFrame.R b/R/ds.dataFrame.R
index 5837747c6..eeddcdd90 100644
--- a/R/ds.dataFrame.R
+++ b/R/ds.dataFrame.R
@@ -137,7 +137,7 @@ ds.dataFrame <- function(x=NULL, row.names=NULL, check.rows=FALSE, check.names=T
}
colNames <- unlist(colNames)
if(anyDuplicated(colNames) != 0){
- cat("\n Warning: Some column names in study", j, "are duplicated and a suffix '.k' will be added to the kth replicate \n")
+ message("\n Warning: Some column names in study", j, "are duplicated and a suffix '.k' will be added to the kth replicate \n")
}
}
}
@@ -178,7 +178,7 @@ ds.dataFrame <- function(x=NULL, row.names=NULL, check.rows=FALSE, check.names=T
next.class <- DSI::datashield.aggregate(datasources[std], calltext1)
class.vector <- c(class.vector, next.class[[1]])
if (notify.of.progress){
- cat("\n",j," of ", length(x), " elements to combine in step 1 of 2 in study ", std, "\n")
+ message("\n",j," of ", length(x), " elements to combine in step 1 of 2 in study ", std, "\n")
}
}
for(j in 1:length(x)){
@@ -186,14 +186,14 @@ ds.dataFrame <- function(x=NULL, row.names=NULL, check.rows=FALSE, check.names=T
if(class.vector[j]!="data.frame" && class.vector[j]!="matrix"){
colname.vector <- c(colname.vector, test.df)
if (notify.of.progress){
- cat("\n",j," of ", length(x), " elements to combine in step 2 of 2 in study ", std, "\n")
+ message("\n",j," of ", length(x), " elements to combine in step 2 of 2 in study ", std, "\n")
}
}else{
calltext2 <- call('colnamesDS', test.df)
df.names <- DSI::datashield.aggregate(datasources[std], calltext2)
colname.vector <- c(colname.vector, df.names[[1]])
if (notify.of.progress){
- cat("\n", j," of ", length(x), " elements to combine in step 2 of 2 in study ", std, "\n")
+ message("\n", j," of ", length(x), " elements to combine in step 2 of 2 in study ", std, "\n")
}
}
}
@@ -201,7 +201,7 @@ ds.dataFrame <- function(x=NULL, row.names=NULL, check.rows=FALSE, check.names=T
}
if (notify.of.progress){
- cat("\nBoth steps in all studies completed\n")
+ message("\nBoth steps in all studies completed\n")
}
# prepare vectors for transmission
diff --git a/R/ds.dataFrameSubset.R b/R/ds.dataFrameSubset.R
index 46878520b..1ae6278db 100644
--- a/R/ds.dataFrameSubset.R
+++ b/R/ds.dataFrameSubset.R
@@ -12,7 +12,7 @@
#'
#' Server functions called: \code{dataFrameSubsetDS1} and \code{dataFrameSubsetDS2}
#'
-#' @param df.name a character string providing the name of the data frame to be subseted.
+#' @param df.name a character string providing the name of the data frame to be subset.
#' @param V1.name A character string specifying the name of the vector
#' to which the Boolean operator is to be applied to define the subset.
#' For more information see details.
@@ -231,13 +231,13 @@ if(!is.null(rm.cols)){
if (notify.of.progress)
{
if(num.messages==1){
- cat("\nSource",s,"\n",return.warning.message[[s]][[1]],"\n")
+ message("\nSource",s,"\n",return.warning.message[[s]][[1]],"\n")
}else{
- cat("\nSource",s,"\n")
+ message("\nSource",s,"\n")
for(m in 1:(num.messages-1)){
- cat(return.warning.message[[s]][[m]],"\n")
+ message(return.warning.message[[s]][[m]],"\n")
}
- cat(return.warning.message[[s]][[num.messages]],"\n")
+ message(return.warning.message[[s]][[num.messages]],"\n")
}
}
}
diff --git a/R/ds.dmtC2S.R b/R/ds.dmtC2S.R
index 1f91efbfe..085d198fb 100644
--- a/R/ds.dmtC2S.R
+++ b/R/ds.dmtC2S.R
@@ -29,7 +29,7 @@
#' wish to change the connections you wish to use by default the call
#' datashield.connections_default('opals.a') will set 'default.connections'
#' to be 'opals.a' and so in the absence of specific instructions to the contrary
-#' (e.g. by specifiying a particular dataset to be used via the Paul Burton. Author.
+ Rebecca Wilson. Author.
+ Olly Butters. Author.
+ Patricia Ryser-Welch. Author.
+ Tom Bishop. Author.
+ Stuart Wheater. Author, maintainer.
Burton P, Wilson R, Butters O, Ryser-Welch P, Westerberg A, Abarrategui L, Villegas-Diaz R, Avraam D, Marcon Y, Wheater S (2025).
-dsBaseClient: DataSHIELD Client Functions.
+ Burton P, Wilson R, Butters O, Ryser-Welch P, Westerberg A, Abarrategui L, Villegas-Diaz R, Avraam D, Marcon Y, Bishop T, Gaye A, EscribĂ -Montagut X, Wheater S (????).
+dsBaseClient: 'DataSHIELD' Client Side Base Functions.
R package version 6.4.0.9000.
Gaye A, Marcon Y, Isaeva J, LaFlamme P, Turner A, Jones E, Minion J, Boyd A, Newby C, Nuotio M, Wilson R, Butters O, Murtagh B, Demir I, Doiron D, Giepmans L, Wallace S, Budin-LjĂžsne I, Schmidt C, Boffetta P, Boniol M, Bota M, Carter K, deKlerk N, Dibben C, Francis R, Hiekkalinna T, Hveem K, KvalĂžy K, Millar S, Perry I, Peters A, Phillips C, Popham F, Raab G, Reischl E, Sheehan N, Waldenberger M, Perola M, van den Heuvel E, Macleod J, Knoppers B, Stolk R, Fortier I, Harris J, Woffenbuttel B, Murtagh M, Ferretti V, Burton P (2014).
+âDataSHIELD: taking the analysis to the data, not the data to the analysis.â
+International Journal of Epidemiology, 43(6), 1929â1944.
+doi:10.1093/ije/dyu188.
+ Wilson R, Butters O, Avraam D, Baker J, Tedds J, Turner A, Murtagh M, Burton P (2017).
+âDataSHIELD â New Directions and Dimensions.â
+Data Science Journal, 16(21), 1â21.
+doi:10.5334/dsj-2017-021.
+ Avraam D, Wilson R, Aguirre Chan N, Banerjee S, Bishop T, Butters O, Cadman T, Cederkvist L, Duijts L, Escribà Montagut X, Garner H, Gonçalves G, Gonzålez J, Haakma S, Hartlev M, Hasenauer J, Huth M, Hyde E, Jaddoe V, Marcon Y, Mayrhofer M, Molnar-Gabor F, Morgan A, Murtagh M, Nestor M, Nybo Andersen A, Parker S, Pinot de Moira A, Schwarz F, Strandberg-Larsen K, Swertz M, Welten M, Wheater S, Burton P (2024).
+âDataSHIELD: mitigating disclosure risk in a multi-site federated analysis platform.â
+Bioinformatics Advances, 5(1), 1â21.
+doi:10.1093/bioadv/vbaf046.
+Page not found (404)
diff --git a/docs/LICENSE.html b/docs/LICENSE.html
index 40bc80ee8..515536140 100644
--- a/docs/LICENSE.html
+++ b/docs/LICENSE.html
@@ -1,5 +1,5 @@
-NA
diff --git a/docs/authors.html b/docs/authors.html
index d662e9c57..a4be467aa 100644
--- a/docs/authors.html
+++ b/docs/authors.html
@@ -1,5 +1,5 @@
-Authors and Citation
Authors and Citation
Citation
- @Manual{,
- title = {dsBaseClient: DataSHIELD Client Functions},
- author = {Paul Burton and Rebecca Wilson and Olly Butters and Patricia Ryser-Welch and Alex Westerberg and Leire Abarrategui and Roberto Villegas-Diaz and Demetris Avraam and Yannick Marcon and Stuart Wheater},
- year = {2025},
+ title = {dsBaseClient: 'DataSHIELD' Client Side Base Functions},
+ author = {Paul Burton and Rebecca Wilson and Olly Butters and Patricia Ryser-Welch and Alex Westerberg and Leire Abarrategui and Roberto Villegas-Diaz and Demetris Avraam and Yannick Marcon and Tom Bishop and Amadou Gaye and Xavier EscribĂ -Montagut and Stuart Wheater},
note = {R package version 6.4.0.9000},
}
+ @Article{,
+ title = {{DataSHIELD: taking the analysis to the data, not the data to the analysis}},
+ author = {Amadou Gaye and Yannick Marcon and Julia Isaeva and Philippe {LaFlamme} and Andrew Turner and Elinor M Jones and Joel Minion and Andrew W Boyd and Christopher J Newby and Marja-Liisa Nuotio and Rebecca Wilson and Oliver Butters and Barnaby Murtagh and Ipek Demir and Dany Doiron and Lisette Giepmans and Susan E Wallace and Isabelle Budin-Lj{\o}sne and Carsten O. Schmidt and Paolo Boffetta and Mathieu Boniol and Maria Bota and Kim W Carter and Nick {deKlerk} and Chris Dibben and Richard W Francis and Tero Hiekkalinna and Kristian Hveem and Kirsti Kval{\o}y and Sean Millar and Ivan J Perry and Annette Peters and Catherine M Phillips and Frank Popham and Gillian Raab and Eva Reischl and Nuala Sheehan and Melanie Waldenberger and Markus Perola and Edwin {{van den Heuvel}} and John Macleod and Bartha M Knoppers and Ronald P Stolk and Isabel Fortier and Jennifer R Harris and Bruce H R Woffenbuttel and Madeleine J Murtagh and Vincent Ferretti and Paul R Burton},
+ journal = {International Journal of Epidemiology},
+ year = {2014},
+ volume = {43},
+ number = {6},
+ pages = {1929--1944},
+ doi = {10.1093/ije/dyu188},
+}
+ @Article{,
+ title = {{DataSHIELD â New Directions and Dimensions}},
+ author = {Rebecca C. Wilson and Oliver W. Butters and Demetris Avraam and James Baker and Jonathan A. Tedds and Andrew Turner and Madeleine Murtagh and Paul R. Burton},
+ journal = {Data Science Journal},
+ year = {2017},
+ volume = {16},
+ number = {21},
+ pages = {1--21},
+ doi = {10.5334/dsj-2017-021},
+}
+ @Article{,
+ title = {{DataSHIELD: mitigating disclosure risk in a multi-site federated analysis platform}},
+ author = {Demetris Avraam and Rebecca C Wilson and Noemi {{Aguirre Chan}} and Soumya Banerjee and Tom R P Bishop and Olly Butters and Tim Cadman and Luise Cederkvist and Liesbeth Duijts and Xavier {{Escrib{\a`a} Montagut}} and Hugh Garner and Gon{\c c}alo {Gon{\c c}alves} and Juan R Gonz{\a'a}lez and Sido Haakma and Mette Hartlev and Jan Hasenauer and Manuel Huth and Eleanor Hyde and Vincent W V Jaddoe and Yannick Marcon and Michaela Th Mayrhofer and Fruzsina Molnar-Gabor and Andrei Scott Morgan and Madeleine Murtagh and Marc Nestor and Anne-Marie {{Nybo Andersen}} and Simon Parker and Angela {{Pinot de Moira}} and Florian Schwarz and Katrine Strandberg-Larsen and Morris A Swertz and Marieke Welten and Stuart Wheater and Paul R Burton},
+ journal = {Bioinformatics Advances},
+ year = {2024},
+ volume = {5},
+ number = {1},
+ pages = {1--21},
+ doi = {10.1093/bioadv/vbaf046},
+ editor = {Thomas Lengauer},
+ publisher = {Oxford University Press (OUP)},
+}
@@ -106,11 +164,11 @@ Citation
diff --git a/docs/index.html b/docs/index.html
index 83ad68969..42c7f7f3c 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -1,18 +1,18 @@
-
+
-
+install.packages("remotes")
+remotes::install_github("datashield/dsBaseClient", "<BRANCH>")
+
+# Install v6.3.5 with the following
+remotes::install_github("datashield/dsBaseClient", "6.3.5")For a full list of development branches, checkout https://github.com/datashield/dsBaseClient/branches
+ +DataSHIELD is a software package which allows you to do non-disclosive federated analysis on sensitive data. Our website (https://www.datashield.org) has in depth descriptions of what it is, how it works and how to install it. A key point to highlight is that DataSHIELD has a client-server infrastructure, so the dsBase package (https://github.com/datashield/dsBase) needs to be used in conjunction with the dsBaseClient package (https://github.com/datashield/dsBaseClient) - trying to use one without the other makes no sense.
Detailed instructions on how to install DataSHIELD are at https://www.datashield.org/wiki.
Discussion and help with using DataSHIELD can be obtained from The DataSHIELD Forum https://datashield.discourse.group/
The code here is organised as:
@@ -93,6 +107,21 @@[1] Burton P, Wilson R, Butters O, Ryser-Welch P, Westerberg A, Abarrategui L, Villegas-Diaz R, Avraam D, Marcon Y, Bishop T, Gaye A, EscribĂ Montagut X, Wheater S (2025). dsBaseClient: âDataSHIELDâ Client Side Base Functions. R package version 6.3.5.
+[2] Gaye A, Marcon Y, Isaeva J, LaFlamme P, Turner A, Jones E, Minion J, Boyd A, Newby C, Nuotio M, Wilson R, Butters O, Murtagh B, Demir I, Doiron D, Giepmans L, Wallace S, Budin-LjĂžsne I, Oliver Schmidt C, Boffetta P, Boniol M, Bota M, Carter K, deKlerk N, Dibben C, Francis R, Hiekkalinna T, Hveem K, KvalĂžy K, Millar S, Perry I, Peters A, Phillips C, Popham F, Raab G, Reischl E, Sheehan N, Waldenberger M, Perola M, van den Heuvel E, Macleod J, Knoppers B, Stolk R, Fortier I, Harris J, Woffenbuttel B, Murtagh M, Ferretti V, Burton P (2014). âDataSHIELD: taking the analysis to the data, not the data to the analysis.â International Journal of Epidemiology, 43(6), 1929-1944. https://doi.org/10.1093/ije/dyu188.
+[3] Wilson R, W. Butters O, Avraam D, Baker J, Tedds J, Turner A, Murtagh M, R. Burton P (2017). âDataSHIELD â New Directions and Dimensions.â Data Science Journal, 16(21), 1-21. https://doi.org/10.5334/dsj-2017-021.
+[4] Avraam D, Wilson R, Aguirre Chan N, Banerjee S, Bishop T, Butters O, Cadman T, Cederkvist L, Duijts L, EscribĂ Montagut X, Garner H, Gonçalves G, GonzĂĄlez J, Haakma S, Hartlev M, Hasenauer J, Huth M, Hyde E, Jaddoe V, Marcon Y, Mayrhofer M, Molnar-Gabor F, Morgan A, Murtagh M, Nestor M, Nybo Andersen A, Parker S, Pinot de Moira A, Schwarz F, Strandberg-Larsen K, Swertz M, Welten M, Wheater S, Burton P (2024). âDataSHIELD: mitigating disclosure risk in a multi-site federated analysis platform.â Bioinformatics Advances, 5(1), 1-21. https://doi.org/10.1093/bioadv/vbaf046.
++Note: Apple Mx architecture users, please be aware that there are some numerical limitations on this platform, which leads to unexpected results when using base R packages, like statsâ.
+x <- c(0, 3, 7)
+1 - cor(x, x)â
+The above should result in a value of zero.
+Also See: For more details see https://cran.r-project.org/doc/FAQ/R-FAQ.html#Why-doesn_0027t-R-think-these-numbers-are-equal_003f and the bug report: https://bugs.r-project.org/show_bug.cgi?id=18941
+
dot-pool_md_patterns.RdInternal function to pool md.pattern results from multiple studies
+.pool_md_patterns(patterns_list, study_names)Pooled pattern matrix
+fixed.dummy.vars = TRUE,
baseline.level = 1 and forced.factor.levels = c(1,2,3,4,5).
The input vector is converted to the following matrix of dummy variables:
-| DV2 | DV3 | DV4 | DV5 | 0 |
| 0 | 0 | 0 | 1 | 0 |
| 0 | 0 | 0 | 0 | 0 |
| 0 | 0 | 1 | 0 | 0 |
| 0 | 0 | 1 | 0 | 0 |
| 0 | 1 | 0 | 0 | 0 |
| 0 | 0 | 0 | 1 | 0 |
| 0 | 0 | 0 | 1 | 0 |
For the same example if the baseline.level = 3 then the matrix is:
| DV2 | DV3 | DV4 | DV5 |
| 0 | 0 | 0 | 0 |
| 1 | 0 | 0 | 0 |
| 0 | 0 | 0 | 0 |
| 0 | 1 | 0 | 0 |
| 0 | 0 | 1 | 0 |
| 0 | 0 | 1 | 0 |
| 0 | 0 | 0 | 0 |
| 0 | 1 | 0 | 0 |
| 0 | 0 | 1 | 0 |
| 0 | 0 | 0 | 1 |
For the same example if the baseline.level = 3 then the matrix is:
| DV1 | DV2 | DV4 | DV5 |
| 1 | 0 | 0 | 0 |
| 0 | 1 | 0 | 0 |
| 1 | 0 | 0 | 0 |
| 0 | 0 | 0 | 0 |
| 0 | 0 | 1 | 0 |
| 0 | 0 | 1 | 0 |
| 1 | 0 | 0 | 0 |
| 0 | 0 | 0 | 0 |
| 0 | 0 | 1 | 0 |
| 0 | 0 | 0 | 1 |
In the first instance the first row of the matrix has zeros in all entries indicating
that the first data point belongs to level 1 (as the baseline level is equal to 1).
The second row has 1 at the first (DV2) column and zeros elsewhere,
@@ -229,11 +229,11 @@
a logical indicating whether an exact p-value should be computed. Used for
-Kendall's tau and Spearman's rho. See âDetailsâ of R stats function cor.test for
+Kendall's tau and Spearman's rho. See Details of R stats function cor.test for
the meaning of NULL (the default).
a character string providing the name of the data frame to be subseted.
a character string providing the name of the data frame to be subset.
Developed by Paul Burton, Rebecca Wilson, Olly Butters, Patricia Ryser-Welch, Alex Westerberg, Leire Abarrategui, Roberto Villegas-Diaz, Demetris Avraam, Yannick Marcon, Stuart Wheater.
+Developed by Paul Burton, Rebecca Wilson, Olly Butters, Patricia Ryser-Welch, Alex Westerberg, Leire Abarrategui, Roberto Villegas-Diaz, Demetris Avraam, Yannick Marcon, Tom Bishop, Amadou Gaye, Xavier EscribĂ -Montagut, Stuart Wheater.
diff --git a/docs/reference/ds.forestplot.html b/docs/reference/ds.forestplot.html index 68c8d1dd3..81fa806d7 100644 --- a/docs/reference/ds.forestplot.html +++ b/docs/reference/ds.forestplot.html @@ -1,5 +1,5 @@ -International standards for newborn weight, length, and head circumference by -gestational age and sex: the Newborn Cross-Sectional Study of the INTERGROWTH-21st Project -Villar, José et al. The Lancet, Volume 384, Issue 9946, 857-868 -INTERGROWTH-21st very preterm size at birth reference charts. Lancet 2016 -doi.org/10.1016/S0140-6736(16) 00384-6. Villar, José et al.
-Villar, J., Ismail, L.C., Victora, C.G., Ohuma, E.O., Bertino, E., + Altman, D.G., Lambert, A., Papageorghiou, A.T., Carvalho, M., Jaffer, Y.A., + Gravett, M.G., Purwar, M., Frederick, I.O., Noble, A.J., Pang, R., Barros, + F.C., Chumlea, C., Bhutta, Z.A., Kennedy, S.H., 2014. International + standards for newborn weight, length, and head circumference by gestational + age and sex: the Newborn Cross-Sectional Study of the INTERGROWTH-21st + Project. The Lancet 384, 857â868. https://doi.org/10.1016/S0140-6736(14)60932-6
Villar, J., Giuliani, F., Fenton, T.R., Ohuma, E.O., Ismail, L.C., + Kennedy, S.H., 2016. INTERGROWTH-21st very preterm size at birth reference + charts. The Lancet 387, 844â845. https://doi.org/10.1016/S0140-6736(16)00384-6
ds.mdPattern.RdThis function is a client-side wrapper for the server-side mdPatternDS +function. It generates a missing data pattern matrix similar to mice::md.pattern but +with disclosure control applied to prevent revealing small cell counts.
+ds.mdPattern(x = NULL, type = "split", datasources = NULL)a character string specifying the name of a data frame or matrix on the +server-side containing the data to analyze.
a character string specifying the output type. If 'split' (default), +returns separate patterns for each study. If 'combine', attempts to pool patterns +across studies.
a list of DSConnection-class objects obtained
+after login. If the datasources argument is not specified, the default set of
+connections will be used: see datashield.connections_default.
For type='split': A list with one element per study, each containing:
The missing data pattern matrix for that study
Logical indicating if all patterns meet disclosure requirements
A message describing the validity status
For type='combine': A list containing:
The pooled missing data pattern matrix across all studies
Logical indicating if all pooled patterns meet disclosure requirements
A message describing the validity status
The function calls the server-side mdPatternDS function which uses +mice::md.pattern to analyze missing data patterns. Patterns with counts below the +disclosure threshold (default: nfilter.tab = 3) are suppressed to maintain privacy.
+Output Format: +- Each row represents a missing data pattern +- Pattern counts are shown in row names (e.g., "150", "25") +- Columns show 1 if the variable is observed, 0 if missing +- Last column shows the total number of missing values per pattern +- Last row shows the total number of missing values per variable
+Disclosure Control:
+Suppressed patterns (count below threshold) are indicated by: +- Row name: "suppressed(<N>)" where N is the threshold +- All pattern values set to NA +- Summary row also suppressed to prevent back-calculation
+Pooling Behavior (type='combine'):
+When pooling across studies, the function uses a conservative approach +for disclosure control:
+1. Identifies identical missing patterns across studies +2. EXCLUDES suppressed patterns from pooling - patterns suppressed in + ANY study are not included in the pooled count +3. Sums counts only for non-suppressed identical patterns +4. Re-validates pooled counts against disclosure threshold
+Important: This conservative approach means: +- Pooled counts may be underestimates if some studies had suppressed patterns +- This prevents disclosure through subtraction (e.g., if study A shows count=5 + and pool shows count=7, one could deduce study B has count=2, violating disclosure) +- Different patterns across studies are preserved separately in the pooled result
+if (FALSE) { # \dontrun{
+ ## Version 6, for version 5 see the Wiki
+
+ # Connecting to the Opal servers
+
+ require('DSI')
+ require('DSOpal')
+ require('dsBaseClient')
+
+ builder <- DSI::newDSLoginBuilder()
+ builder$append(server = "study1",
+ url = "http://192.168.56.100:8080/",
+ user = "administrator", password = "datashield_test&",
+ table = "CNSIM.CNSIM1", driver = "OpalDriver")
+ builder$append(server = "study2",
+ url = "http://192.168.56.100:8080/",
+ user = "administrator", password = "datashield_test&",
+ table = "CNSIM.CNSIM2", driver = "OpalDriver")
+ logindata <- builder$build()
+
+ connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D")
+
+ # Get missing data patterns for each study separately
+ patterns_split <- ds.mdPattern(x = "D", type = "split", datasources = connections)
+
+ # View results for study1
+ print(patterns_split$study1$pattern)
+ # var1 var2 var3
+ # 150 1 1 1 0 <- 150 obs complete
+ # 25 0 1 1 1 <- 25 obs missing var1
+ # 25 0 0 25 <- Summary: 25 missing per variable
+
+ # Get pooled missing data patterns across studies
+ patterns_pooled <- ds.mdPattern(x = "D", type = "combine", datasources = connections)
+ print(patterns_pooled$pattern)
+
+ # Example with suppressed patterns:
+ # If study1 has a pattern with count=2 (suppressed) and study2 has same pattern
+ # with count=5 (valid), the pooled result will show count=5 (conservative approach)
+ # A warning will indicate: "Pooled counts may underestimate the true total"
+
+ # Clear the Datashield R sessions and logout
+ datashield.logout(connections)
+} # }
+
+Developed by Paul Burton, Rebecca Wilson, Olly Butters, Patricia Ryser-Welch, Alex Westerberg, Leire Abarrategui, Roberto Villegas-Diaz, Demetris Avraam, Yannick Marcon, Stuart Wheater.
+Developed by Paul Burton, Rebecca Wilson, Olly Butters, Patricia Ryser-Welch, Alex Westerberg, Leire Abarrategui, Roberto Villegas-Diaz, Demetris Avraam, Yannick Marcon, Tom Bishop, Amadou Gaye, Xavier EscribĂ -Montagut, Stuart Wheater.
diff --git a/docs/reference/ds.names.html b/docs/reference/ds.names.html index 312b4e817..25e3ca7f6 100644 --- a/docs/reference/ds.names.html +++ b/docs/reference/ds.names.html @@ -1,5 +1,5 @@ -Securely generate the ranks of a numeric vector and estimate -true qlobal quantiles across all data sources simultaneously
+true global quantiles across all data sources simultaneouslyThe function uses the R classical subsetting with squared brackets '[]' and allows also to -subset using a logical oprator and a threshold. The object to subset from must be a vector (factor, numeric -or charcater) or a table (data.frame or matrix).
+subset using a logical operator and a threshold. The object to subset from must be a vector (factor, numeric +or character) or a table (data.frame or matrix).(1) If the input data is a table the user specifies the rows and/or columns to include in the subset; the columns can be -refered to by their names. Table subsetting can also be done using the name of a variable and a threshold (see example 3). +referred to by their names. Table subsetting can also be done using the name of a variable and a threshold (see example 3). (2) If the input data is a vector and the parameters 'rows', 'logical' and 'threshold' are all provided the last two are ignored (i.e. 'rows' has precedence over the other two parameters then). IMPORTANT NOTE: If the requested subset is not valid (i.e. contains less than the allowed number of observations) all the values are @@ -115,7 +115,7 @@
ds.subsetByClass to subset by the classes of factor vector(s).
+ds.subsetByClass to subset by the classes of factor vector(s).
ds.meanByClass to compute mean and standard deviation across categories of a factor vectors.
This is an internal function required by the client function ds.glm
-to verify all the variables and ensure the process does not halt inadvertanly.
null or a numreric vector that can be used to specify an a priori known component to be +
null or a numeric vector that can be used to specify an a priori known component to be included in the linear predictor during fitting.
the variables are checked to ensure they are defined, not empty (i.e. are not missing -at complete) and evantually (if 'offset' or 'weights') are of 'numeric' with non negative value +at complete) and eventually (if 'offset' or 'weights') are of 'numeric' with non negative value (if 'weights').
Compute Weighted Mean by Group
Converts a server-side R object into Boolean indicators
ds.matrixTranspose()
Transposes a server-side matrix
Display missing data patterns with disclosure control
ds.sqrt()
Computes the square root values of a variable
Generates a valid subset of a table or a vector
Developed by Paul Burton, Rebecca Wilson, Olly Butters, Patricia Ryser-Welch, Alex Westerberg, Leire Abarrategui, Roberto Villegas-Diaz, Demetris Avraam, Yannick Marcon, Stuart Wheater.
+Developed by Paul Burton, Rebecca Wilson, Olly Butters, Patricia Ryser-Welch, Alex Westerberg, Leire Abarrategui, Roberto Villegas-Diaz, Demetris Avraam, Yannick Marcon, Tom Bishop, Amadou Gaye, Xavier EscribĂ -Montagut, Stuart Wheater.
This function is called to turn a logical oprator given as a +
This function is called to turn a logical operator given as a character into an integer: '>' is turned into 1, '>=' into 2, '<' into 3, '<=' into 4, '==' into 5 and '!=' into 6.
a character which represents the type of analysis to carry out. If type is set to
'combine', a pooled table of results is generated. If type is set to 'split', a table of results
-is genrated for each study.
a character which represents the type of analysis to carry out. If type is set to
'combine', a pooled table of results is generated. If type is set to 'split', a table of results
-is genrated for each study.
This function is called by the function 'ds.meanByClass' to produce the final tables -if the user soecify a table structure.
+if the user specify a table structure.a list, holds informations about invalid subsets in each study.
a list, holds information about invalid subsets in each study.
This function is called by the function 'ds.meanByClass' to produce the final table -if the user sets the parmater 'type' to combine (the default behaviour of 'ds.meanByClass').
+if the user sets the parameter 'type' to combine (the default behaviour of 'ds.meanByClass').a list, holds informations about invalid subsets in each study
a list, holds information about invalid subsets in each study
This function is called by the function 'ds.meanByClass' to produce the final tables -if the user sets the parmater 'type' to 'split'.
+if the user sets the parameter 'type' to 'split'.