Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 33 additions & 21 deletions R/recordSwap.R
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,8 @@ recordSwap.default <- function(data, hid, hierarchy, similar,
N <- NULL
vhid <- names(data)[hid + 1]
vidx <- names(data)[idx + 1]
agg <- data[, .(N = length(unique(get(vidx)))), by = vhid]
agg <- unique(subset(data, select = c(vhid, vidx)))
agg <- agg[, .N, by = c(vhid)]
agg <- agg[N != 1]
if (nrow(agg) > 0) {
msg <- paste(
Expand Down Expand Up @@ -349,7 +350,7 @@ recordSwap.default <- function(data, hid, hierarchy, similar,
stop("risk and hierarchy need to address the same number of columns!")
}
risk <- checkIndexString(risk,cnames,minLength = 1)
risk <- data[,c(risk+1)]
risk <- subset(data, select = risk+1)
}else{
if(all(!class(risk)%in%c("data.table","data.frame","matrix"))){
stop("If risk is not a vector containing column indices or column names in data then risk must be either a data.table, data.frame or matrix!")
Expand All @@ -364,12 +365,12 @@ recordSwap.default <- function(data, hid, hierarchy, similar,

cnamesrisk <- copy(colnames(risk))
risk <- data.table(risk)

if(nrow(risk)>0){
if(is.null(cnamesrisk)){
message("risk does not contain column names; the first column in risk will be used for the first hierarchy level, e.g ",cnames[hierarchy[1]+1]," and so on.")
}else{
if(!any(cnamesrisk%in%cnames[hierarchy+1])){
if(!any(cnamesrisk%in%cnames)){
stop("the columnnames of risk do not appear in data")
}
}
Expand All @@ -384,21 +385,16 @@ recordSwap.default <- function(data, hid, hierarchy, similar,
# every member of the household the highest value in the household.
risk_variables_names <- copy(colnames(risk))
risk[,hid_help:=data[[hid+1]]]
tryCatch(
expr = risk[,lapply(.SD,
function(z){
if( (length(unique(z)) > 1)) {stop()} else {0}
}), # error when value not equal 0
.SDcols=c(risk_variables_names),
by=.(hid_help)], # calculate if each household have unique values
error = function(e){
message("risk was adjusted in order to give each household member the maximum household risk value")
risk[,c(risk_variables_names):=lapply(.SD,max),
.SDcols=c(risk_variables_names),
by=.(hid_help)] # assign to each household its max value
risk[,hid_help:=NULL]
}
)

risk_check <- unique(subset(risk, select = c("hid_help", risk_variables_names)))
risk_check <- risk_check[,.N,by=.(hid_help)]
if(any(risk_check[["N"]]>1)){
warning("risk was adjusted in order to give each household member the maximum household risk value")
risk[,c(risk_variables_names):=lapply(.SD,max),
.SDcols=c(risk_variables_names),
by=.(hid_help)] # assign to each household its max value
risk[,hid_help:=NULL]
}
}

# check seed
Expand Down Expand Up @@ -452,10 +448,19 @@ recordSwap.default <- function(data, hid, hierarchy, similar,
which(original_cols %in% z) -1
})
}


# check if any factors are present and cast to integer
is_factor <- sapply(data_sw, is.factor)
factor_variables <- NULL
if(any(is_factor)){
factor_variables <- colnames(data_sw)[is_factor]
factor_variables_levels <- lapply(subset(data_sw, select = factor_variables),levels)
data_sw[,c(factor_variables) := lapply(.SD, as.integer),.SDcols=c(factor_variables)]
}

# check if any non numeric values are present in data
if(any(!unlist(apply(data_sw,2,is.numeric)))){
stop("Columns specified in hid, hierarchy, similar and carry_along must contain only integer values at this point")
stop("Columns specified in hid, hierarchy, similar, risk_variables and carry_along must contain only integer values at this point")
}

# check if any values with NA values are present in data
Expand Down Expand Up @@ -508,6 +513,13 @@ recordSwap.default <- function(data, hid, hierarchy, similar,
rm(data_sw)
setcolorder(data,cnames)
data[,helpVariableforMergingAfterTRS:=NULL]

if(!is.null(factor_variables)){
for(s in factor_variables){
data[, c(s) := factor(get(s), labels = factor_variables_levels[[s]])]
}
}


return(data)
}
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test_recordSwap_inputs.R
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ test_that("test para - data, hid, hierarchy, similar, risk_variables, carry_alon
risk_variables = risk_variables,
carry_along = NULL,
return_swapped_id = TRUE,
seed=seed),"Columns specified in hid, hierarchy, similar and carry\\_along must contain only integer values at this point")
seed=seed),"Columns specified in hid, hierarchy, similar, risk_variables and carry\\_along must contain only integer values at this point")
dat[,c("h_extra","h_extra2"):=NULL]
#################################

Expand Down
18 changes: 9 additions & 9 deletions tests/testthat/test_recordSwap_outputs.R
Original file line number Diff line number Diff line change
Expand Up @@ -220,15 +220,15 @@ test_that("test similarity profiles",{

test_that("test risk parameter - number of hd swaps)",{

dat_s <- recordSwap(data = dat, hid = hid, hierarchy = hier,
similar,
risk = risk, # TESTING
risk_threshold = risk_threshold, # TESTING
carry_along = NULL,
return_swapped_id = TRUE,
seed=seed,
swaprate = 0 # to test swaping based only on risk values
)
dat_s <- expect_warning(recordSwap(data = dat, hid = hid, hierarchy = hier,
similar,
risk = risk, # TESTING
risk_threshold = risk_threshold, # TESTING
carry_along = NULL,
return_swapped_id = TRUE,
seed=seed,
swaprate = 0 # to test swaping based only on risk values
),"risk was adjusted in order to give each household member the maximum household risk value")

# check that swapped regions are identical for each household
dat_check <- dat_s[,lapply(.SD,uniqueN),by=.(hid),.SDcols=c(hier)]
Expand Down