From 6f1491b121d40e45b0c1cf7a229b1dd5adea6bc6 Mon Sep 17 00:00:00 2001 From: aliaksah Date: Fri, 21 Nov 2025 12:23:05 +0100 Subject: [PATCH 01/15] cleaned up the repo for resubmission --- {tests_current => R_script}/JSS_Script.R | 32 +- .../oldvers}/Ex12_Sec6_5.R | 0 .../oldvers}/Ex2_Sec4_1.R | 0 .../oldvers}/Ex3_Sec4_2.R | 0 R_script/oldvers/Ex4_Sec4_2.R | 84 ++ .../oldvers}/Ex4_Sec4_3.R | 0 .../oldvers}/Ex5_Sec4_3.R | 0 R_script/oldvers/Ex6_Sec4_4.R | 226 +++++ .../oldvers}/Ex6_Sec5_1.R | 0 R_script/oldvers/Ex7_Sec5.1.R | 311 +++++++ R_script/oldvers/Ex7_Sec5.2.R | 297 +++++++ .../oldvers}/Ex7_Sec5_2.R | 0 .../oldvers}/Ex8_Sec6_1.R | 8 +- .../oldvers}/Ex9_Sec6_2.R | 0 R_script/oldvers/FBMS-guide.R | 354 ++++++++ R_script/oldvers/bacteremia.R | 155 ++++ R_script/oldvers/fix impute jon.R | 144 ++++ R_script/oldvers/gg.txt | 48 ++ R_script/oldvers/kristoffer.R | 80 ++ R_script/oldvers/likelihoods2.R | 800 ++++++++++++++++++ R_script/oldvers/new general estimators.R | 262 ++++++ R_script/oldvers/synthetic_gaussian_data.csv | 48 ++ tests_current/Ex10_Sec6_3.R | 102 --- tests_current/Ex11_Sec6_4.R | 185 ---- tests_current/Ex1_Sec3.R | 213 ----- 25 files changed, 2829 insertions(+), 520 deletions(-) rename {tests_current => R_script}/JSS_Script.R (94%) rename {tests_current => R_script/oldvers}/Ex12_Sec6_5.R (100%) rename {tests_current => R_script/oldvers}/Ex2_Sec4_1.R (100%) rename {tests_current => R_script/oldvers}/Ex3_Sec4_2.R (100%) create mode 100644 R_script/oldvers/Ex4_Sec4_2.R rename {tests_current => R_script/oldvers}/Ex4_Sec4_3.R (100%) rename {tests_current => R_script/oldvers}/Ex5_Sec4_3.R (100%) create mode 100644 R_script/oldvers/Ex6_Sec4_4.R rename {tests_current => R_script/oldvers}/Ex6_Sec5_1.R (100%) create mode 100644 R_script/oldvers/Ex7_Sec5.1.R create mode 100644 R_script/oldvers/Ex7_Sec5.2.R rename {tests_current => R_script/oldvers}/Ex7_Sec5_2.R (100%) rename {tests_current => R_script/oldvers}/Ex8_Sec6_1.R (91%) rename {tests_current => R_script/oldvers}/Ex9_Sec6_2.R (100%) create mode 100644 R_script/oldvers/FBMS-guide.R create mode 100644 R_script/oldvers/bacteremia.R create mode 100644 R_script/oldvers/fix impute jon.R create mode 100644 R_script/oldvers/gg.txt create mode 100644 R_script/oldvers/kristoffer.R create mode 100644 R_script/oldvers/likelihoods2.R create mode 100644 R_script/oldvers/new general estimators.R create mode 100644 R_script/oldvers/synthetic_gaussian_data.csv delete mode 100644 tests_current/Ex10_Sec6_3.R delete mode 100644 tests_current/Ex11_Sec6_4.R delete mode 100644 tests_current/Ex1_Sec3.R diff --git a/tests_current/JSS_Script.R b/R_script/JSS_Script.R similarity index 94% rename from tests_current/JSS_Script.R rename to R_script/JSS_Script.R index 6340913..17705a7 100644 --- a/tests_current/JSS_Script.R +++ b/R_script/JSS_Script.R @@ -4,7 +4,7 @@ # # Kepler Example with the most recent database update # -# Basic introduction of the FBMS package +# Basic introduction of the FBMS packagea # ################################################## @@ -82,7 +82,7 @@ result.P50 <- fbms(data = df.train, method = "gmjmcmc", transforms = transforms, set.seed(123) -result_parallel <- fbms(data = df.train, method = "gmjmcmc.parallel", transforms = transforms, +result.parallel <- fbms(data = df.train, method = "gmjmcmc.parallel", transforms = transforms, runs = 40, cores = parallel::detectCores()-1, P = 25) @@ -105,10 +105,10 @@ summary(result.P50, pop = "last", labels = paste0("x",1:length(df.train[,-1]))) summary(result.P50, pop = "last", tol = 0.01, labels = paste0("x",1:length(df.train[,-1]))) summary(result.P50, pop = "all") -summary(result_parallel) +summary(result.parallel) library(tictoc) tic() -summary(result_parallel, tol = 0.01, pop = "all",data = df.train) +summary(result.parallel, tol = 0.01, pop = "all",data = df.train) toc() @@ -133,11 +133,11 @@ plot(result.P50) -pdf("result_parallel.pdf") -plot(result_parallel) +pdf("result.parallel.pdf") +plot(result.parallel) dev.off() -plot(result_parallel) +plot(result.parallel) #################################################### @@ -185,7 +185,7 @@ plot(predmean(preds.P50), df.test$semimajoraxis) ############################### -preds.multi <- predict(result_parallel , df.test[,-1], link = function(x) x) +preds.multi <- predict(result.parallel , df.test[,-1], link = function(x) x) rmse.parallel <- sqrt(mean((predmean(preds.multi) - df.test$semimajoraxis)^2)) pdf("pred_parallel.pdf") @@ -211,8 +211,8 @@ sqrt(mean((preds.mpm - df.test$semimajoraxis)^2)) -get.best.model(result = result_parallel) -preds.best_parallel <- predict(get.best.model(result_parallel), df.test[, -1]) +get.best.model(result = result.parallel) +preds.best_parallel <- predict(get.best.model(result.parallel), df.test[, -1]) sqrt(mean((preds.best_parallel - df.test$semimajoraxis)^2)) @@ -222,7 +222,7 @@ sqrt(mean((preds.best_parallel - df.test$semimajoraxis)^2)) coef(result.default) coef(result.P50) -coef(result_parallel) +coef(result.parallel) #################################################### @@ -245,10 +245,10 @@ diagn_plot(result.P50, ylim = c(600,1500), FUN = max) pdf("diagn_par.pdf") -diagn_plot(result_parallel, ylim = c(600,1500),FUN = max) +diagn_plot(result.parallel, ylim = c(600,1500),FUN = max) dev.off() -diagn_plot(result_parallel, ylim = c(600,1500),FUN = max) +diagn_plot(result.parallel, ylim = c(600,1500),FUN = max) @@ -287,7 +287,7 @@ df <- as.data.frame(sapply(Zambia[1:5],scale)) transforms <- c("p0","p2","p3","p05","pm05","pm1","pm2","p0p0","p0p05","p0p1","p0p2","p0p3","p0p05","p0pm05","p0pm1","p0pm2") probs <- gen.probs.gmjmcmc(transforms) -probs$gen <- c(1,1,0,1) # Modifications and interactions! +probs$gen <- c(1/3,1/3,0,1/3) # Modifications and interactions! params <- gen.params.gmjmcmc(ncol(df) - 1) params$feat$D <- 1 # Set depth of features to 1 (still allows for interactions) @@ -468,7 +468,7 @@ plot(result2a) # Analysis with fractional polynomials probs <- gen.probs.gmjmcmc(transforms) -probs$gen <- c(1,1,0,1) # Modifications and interactions! +probs$gen <- c(1/3,1/3,0,1/3) # Modifications and interactions! params <- gen.params.gmjmcmc(ncol(df) - 1) params$feat$D <- 1 # Set depth of features to 1 (still allows for interactions) @@ -488,7 +488,7 @@ summary(result2b,tol = 0.05,labels=names(df)[-1]) # Analysis with non-linear projections transforms <- c("sigmoid") probs <- gen.probs.gmjmcmc(transforms) -probs$gen <- c(0,0,1,1) +probs$gen <- c(0,0,0.5,0.5) params <- gen.params.gmjmcmc(ncol(df) - 1) params$feat$pop.max = 10 diff --git a/tests_current/Ex12_Sec6_5.R b/R_script/oldvers/Ex12_Sec6_5.R similarity index 100% rename from tests_current/Ex12_Sec6_5.R rename to R_script/oldvers/Ex12_Sec6_5.R diff --git a/tests_current/Ex2_Sec4_1.R b/R_script/oldvers/Ex2_Sec4_1.R similarity index 100% rename from tests_current/Ex2_Sec4_1.R rename to R_script/oldvers/Ex2_Sec4_1.R diff --git a/tests_current/Ex3_Sec4_2.R b/R_script/oldvers/Ex3_Sec4_2.R similarity index 100% rename from tests_current/Ex3_Sec4_2.R rename to R_script/oldvers/Ex3_Sec4_2.R diff --git a/R_script/oldvers/Ex4_Sec4_2.R b/R_script/oldvers/Ex4_Sec4_2.R new file mode 100644 index 0000000..13f4f8d --- /dev/null +++ b/R_script/oldvers/Ex4_Sec4_2.R @@ -0,0 +1,84 @@ +####################################################### +# +# Example 4 (Section 4.2): +# +# Simulated data with interactions, using only fbms +# +# This is the valid version for the JSS Paper +# +####################################################### + +library(mvtnorm) +library(FBMS) + +n <- 100 # sample size +p <- 20 # number of covariates + +# Model: +# X1: Pure Main effect +# X2 : X3: Pure interaction effect +# X4 * X5: Main effects plus interaction effect + + +set.seed(1003) + +x = rmvnorm(n, rep(0, p)) +X <- as.matrix(x) +X <- scale(X)/sqrt(n) + +y <- (1.2 * x[,1] + 1.5 * x[,2]* x[,3] - x[,4] + 1.1*x[,5] - 1.3 * x[,4]*x[,5])+ rnorm(n) +y<-scale(y) + +df <- data.frame(y = y, X) + + +transforms <- c("") +probs <- gen.probs.gmjmcmc(transforms) +probs$gen <- c(1,0,0,1) #Include interactions and mutations + +#################################################### +# +# single thread analysis (two different runs) +# +#################################################### + +set.seed(123) +result <- fbms(data = df, method = "gmjmcmc", transforms = transforms, + probs = probs) +summary(result) + + +set.seed(123) +result2 <- fbms(data = df, method = "gmjmcmc", transforms = transforms, + N = 1000, probs = probs, P=40) +summary(result2, tol = 0.01) + + +#################################################### +# +# multiple thread analysis +# +#################################################### + + +set.seed(123) + + result_parallel <- fbms(data = df, method = "gmjmcmc.parallel", transforms = transforms, + runs = 40, cores = 40, + probs = probs, P=25) + +summary(result_parallel, tol = 0.01) + + + +# Using longer more iterations of MJMCMC chains does not change results substantially +set.seed(123) + +result_parallel2 <- fbms(data = df, method = "gmjmcmc.parallel", transforms = transforms, + runs = 40, cores = 10, N=1000, N.final=2000, + probs = probs, P=25) +summary(result_parallel2, tol = 0.01) + +#summary(result_parallel2, pop = "all", tol = 0.01) + + diff --git a/tests_current/Ex4_Sec4_3.R b/R_script/oldvers/Ex4_Sec4_3.R similarity index 100% rename from tests_current/Ex4_Sec4_3.R rename to R_script/oldvers/Ex4_Sec4_3.R diff --git a/tests_current/Ex5_Sec4_3.R b/R_script/oldvers/Ex5_Sec4_3.R similarity index 100% rename from tests_current/Ex5_Sec4_3.R rename to R_script/oldvers/Ex5_Sec4_3.R diff --git a/R_script/oldvers/Ex6_Sec4_4.R b/R_script/oldvers/Ex6_Sec4_4.R new file mode 100644 index 0000000..370691c --- /dev/null +++ b/R_script/oldvers/Ex6_Sec4_4.R @@ -0,0 +1,226 @@ +####################################################### +# +# Example 6 (Section 4.4): +# +# Prediction using non-linear Projections, only using fbms function +# +# DATA - abalone data set +# +# Data set is available at https://www.kaggle.com/datasets/rodolfomendes/abalone-dataset +# +# For convenience we provide the file abalone.csv which contains already the names of the variables +# +# +# This is the valid version for the JSS Paper +# +####################################################### + + +library(FBMS) + +data("abalone") + +df = abalone +df$Sex_F_vs_I = as.numeric(df$Sex == "F") +df$Sex_M_vs_I = as.numeric(df$Sex == "M") +df$Sex = as.factor(df$Sex) +df$Rings = as.numeric(df$Rings) + +summary(df) + + +#number of observations in the data + +n = dim(df)[1] + +# Create Training and Test dataset + +# Sam Waugh (1995) "Extending and benchmarking Cascade-Correlation", PhD +# thesis, Computer Science Department, University of Tasmania. + +#-- Test set performance (final 1044 examples, first 3133 used for training): +# 24.86% Cascade-Correlation (no hidden nodes) +# 26.25% Cascade-Correlation (5 hidden nodes) +# 21.5% C4.5 +# 0.0% Linear Discriminate Analysis +# 3.57% k=5 Nearest Neighbour +# (Problem encoded as a classification task) + + +# remove variable sex because gmjmcmc cannot handle factor variables +df.training = df[1:3133,-2] +df.test = df[3134:n,-2] + + +summary(df.training) + + +pred.RMSE = rep(0,5) # To collect the results of prediction RMSE from the five different methods + +pred.RMSE.mpm = rep(0,5) # Same for MPM +pred.RMSE.best = rep(0,5) # Same for posterior modes in the model space + +transforms = c("sigmoid") +probs = gen.probs.gmjmcmc(transforms) +probs$gen = c(0,0,1,1) #Only projections! + + +############################################################################# +# +# Using method 0 for alpha (simply set to 1, default) +# +############################################################################# + +set.seed(5001) +result = fbms(data = df.training, method = "gmjmcmc", transforms = transforms, + probs = probs) +summary(result) + +pred = predict(result, x = df.test[,-1]) +pred.RMSE[1] = sqrt(mean((pred$aggr$mean - df.test$Rings)^2)) + +preds = predict(get.best.model(result), df.test[, -1]) +pred.RMSE.best[1] = sqrt(mean((preds - df.test$Rings)^2)) + +preds = predict(get.mpm.model(result, y = df.training$Rings, x = df.training[, -1]), df.test[, -1]) +pred.RMSE.mpm[1] = sqrt(mean((preds - df.test$Rings)^2)) + + +plot(pred$aggr$mean, df.test$Rings) + + + + + +############################################################################# +# +# Parallel version +# +############################################################################# + +set.seed(5003) +result_parallel = fbms(data = df.training, method = "gmjmcmc.parallel", runs = 40, cores = 40, + transforms = transforms, probs = probs, P=25) +summary(result_parallel, tol = 0.05) + + + +pred_parallel = predict(result_parallel, x = df.test[,-1], link = function(x)(x)) +pred.RMSE[2] = sqrt(mean((pred_parallel$aggr$mean - df.test$Rings)^2)) + +preds = predict(get.best.model(result_parallel), df.test[, -1]) +pred.RMSE.best[2] = sqrt(mean((preds - df.test$Rings)^2)) + +preds = predict(get.mpm.model(result_parallel, y = df.training$Rings, x = df.training[, -1]), df.test[, -1]) +pred.RMSE.mpm[2] = sqrt(mean((preds - df.test$Rings)^2)) + + + +plot(pred_parallel$aggr$mean, df.test$Rings) +abline(0,1) + + + +############################################################################# +# +# Using method 3 to estimate alpha +# +############################################################################# + + +params = gen.params.gmjmcmc(ncol(df.training) - 1) +params$feat$alpha = "deep" + + +set.seed(5003) +result.a3 = fbms(data = df.training, method = "gmjmcmc", transforms = transforms, + probs = probs, params = params) +summary(result.a3) + + + +pred.a3 = predict(result.a3, x = df.test[,-1], link = function(x)(x)) +pred.RMSE[3] = sqrt(mean((pred.a3$aggr$mean - df.test$Rings)^2)) + +plot(pred.a3$aggr$mean, df.test$Rings) + + +preds = predict(get.best.model(result.a3), df.test[, -1]) +pred.RMSE.best[3] = sqrt(mean((preds - df.test$Rings)^2)) + +#not yet applicable to the deep method +#preds = predict(get.mpm.model(result.a3, y = df.training$Rings, x = df.training[, -1]), df.test[, -1]) +pred.RMSE.mpm[3] = NA#sqrt(mean((preds - df.test$Rings)^2)) + + + + +############################################################################# +# +# Parallel version params$feat$alpha = "random" +# +############################################################################# + +params$feat$alpha = "random" + +set.seed(5004) +result_parallel.a3 = fbms(data = df.training, method = "gmjmcmc.parallel", runs = 40, cores = 40, + transforms = transforms, probs = probs, params = params, P=25) +summary(result_parallel.a3, tol = 0.05) + + + +pred_parallel.a3 = predict(result_parallel.a3, x = df.test[,-1], link = function(x)(x)) +pred.RMSE[4] = sqrt(mean((pred_parallel.a3$aggr$mean - df.test$Rings)^2)) + +preds = predict(get.best.model(result_parallel.a3), df.test[, -1]) +pred.RMSE.best[4] = sqrt(mean((preds - df.test$Rings)^2)) + +#preds = predict(get.mpm.model(result_parallel, y = df.training$Rings, x = df.training[, -1]), df.test[, -1]) +pred.RMSE.mpm[4] = NA #sqrt(mean((preds - df.test$Rings)^2)) + + +plot(pred_parallel.a3$aggr$mean, df.test$Rings) +abline(0,1) + + + +############################################################################# +# +# Parallel version with fractional polynomials +# +############################################################################# + +transforms = c("p0","p2","p3","p05","pm05","pm1","pm2","p0p0","p0p05","p0p1","p0p2","p0p3","p0p05","p0pm05","p0pm1","p0pm2") +probs = gen.probs.gmjmcmc(transforms) +probs$gen = c(0,1,0,1) #Only modifications! + +set.seed(50005) +result.fp = fbms(data = df.training, method = "gmjmcmc.parallel", runs = 40, cores = 40, + transforms = transforms, probs = probs, P=25) +summary(result.fp) + + +pred_fp = predict(result.fp, x = df.test[,-1], link = function(x)(x)) +pred.RMSE[5] = sqrt(mean((pred_fp$aggr$mean - df.test$Rings)^2)) + +plot(pred_fp$aggr$mean, df.test$Rings) + +preds = predict(get.best.model(result.fp), df.test[, -1]) +pred.RMSE.best[5] = sqrt(mean((preds - df.test$Rings)^2)) + +preds = predict(get.mpm.model(result.fp, y = df.training$Rings, x = df.training[, -1]), df.test[, -1]) +pred.RMSE.mpm[5] = sqrt(mean((preds - df.test$Rings)^2)) + + + +############################################################################# +# +# Summary of predictions +# +############################################################################# + +round(pred.RMSE,3) +round(pred.RMSE.best,3) +round(pred.RMSE.mpm,3) + diff --git a/tests_current/Ex6_Sec5_1.R b/R_script/oldvers/Ex6_Sec5_1.R similarity index 100% rename from tests_current/Ex6_Sec5_1.R rename to R_script/oldvers/Ex6_Sec5_1.R diff --git a/R_script/oldvers/Ex7_Sec5.1.R b/R_script/oldvers/Ex7_Sec5.1.R new file mode 100644 index 0000000..512bd9a --- /dev/null +++ b/R_script/oldvers/Ex7_Sec5.1.R @@ -0,0 +1,311 @@ +####################################################### +# +# Example 7 (Section 5.2): +# +# Logic regression with a different model prior +# +# DATA - simulated +# +# +# +# This is the valid version for the JSS Paper +# +####################################################### + +#library(devtools) +#devtools::install_github("jonlachmann/GMJMCMC@FBMS", force=T, build_vignettes=F) + +library(FBMS) +use.fbms <- TRUE + +set.seed(1) +X2 <- as.data.frame(array(data = rbinom(n = 50*2000,size = 1,prob = runif(n = 50*2000,0,1)),dim = c(2000,50))) +Y2 <- rnorm(n = 2000,mean = 1+7*(X2$V4*X2$V17*X2$V30*X2$V10) + 9*(X2$V7*X2$V20*X2$V12)+ 3.5*(X2$V9*X2$V2)+1.5*(X2$V37),sd = 1) +df <- data.frame(Y2,X2) +summary(df) + +str(df) + +#number of observations in the data + +n = dim(df)[1] + + + +# remove variable sex because gmjmcmc cannot handle factor variables +df.training <- df[1:1000,] +df.test <- df[1001:n,] +df.test$Mean <- (1+7*(X2$V4*X2$V17*X2$V30*X2$V10) + 9*(X2$V7*X2$V20*X2$V12)+ 3.5*(X2$V9*X2$V2)+1.5*(X2$V37))[1001:n] + +#FBMS unlike EMJMCMC package does not explicitly have GMJMCMC for logic regression, but we can easily run it without an +# "or" operator as "and" and "not" allow to automatically handle "or" through de Morgan law. +transforms <- c("not") +probs <- gen.probs.gmjmcmc(transforms) +probs$gen <- c(1,1,0,1) #No projections allowed + + +params <- gen.params.gmjmcmc(ncol(df.training) - 1) +params$feat$pop.max <- 51 +params$feat$L <- 15 +############################################## +############################################################################# +# +# FBMS logic regression with a Jeffreys parameter prior +# +############################################################################# + + +estimate.logic.lm = function(y, x, model, complex, mlpost_params) +{ + + if (length(mlpost_params) == 0) + mlpost_params <- list(r = 1/dim(x)[1]) + suppressWarnings({ + mod <- fastglm(as.matrix(x[, model]), y, family = gaussian()) + }) + + wj <- complex$width + + lp <- sum(log(factorial(wj))) - sum(wj*log(mlpost_params$p) + (2*wj-2)*log(2)) + + mloglik <- -(mod$aic + (log(length(y))-2) * (mod$rank))/2 + + logpost <- mloglik + lp + + if(logpost==-Inf) + logpost = -10000 + + return(list(crit = logpost, coefs = mod$coefficients)) +} + + + +############################################################################# +# +# Logic regression training +# +############################################################################# + +set.seed(5001) + +if (use.fbms) { + result <- fbms(formula = Y2~1+., data = df.training, family = "custom", loglik.pi = estimate.logic.lm,N = 500,N.final = 500, P = 25, + method = "gmjmcmc", model_prior = list(p = 50), beta_prior = NULL, transforms = transforms, + probs = probs, params = params) +} else { + # result <- gmjmcmc(df.training, transforms = transforms, probs = probs) + + result <- gmjmcmc(x = df.training[, -1], y = df.training[, 1], loglik.pi = estimate.logic.lm,N = 500,N.final = 500, , P = 25, + transforms = transforms, mlpost_params = list(p = 50), params = params, probs = probs) + +} +summary(result) +mpm <- get.mpm.model(result, y = df.training$Y2, x = df.training[,-1], family = "custom", loglik.pi = estimate.logic.lm,params = list(p = 50)) +mbest <- get.best.model(result) + + +pred <- predict(result, x = df.test[,-1], link = function(x)(x)) +pred_mpm <- predict(mpm, x = df.test[,-1], link = function(x)(x)) +pred_best <- predict(mbest, x = df.test[,-1], link = function(x)(x)) + + +#prediction errors +sqrt(mean((pred$aggr$mean - df.test$Y2)^2)) +sqrt(mean((pred_best - df.test$Y2)^2)) +sqrt(mean((pred_mpm - df.test$Y2)^2)) +sqrt(mean((df.test$Mean - df.test$Y2)^2)) + +#prediction errors to the true means +sqrt(mean((pred$aggr$mean - df.test$Mean)^2)) +sqrt(mean((pred_best - df.test$Mean)^2)) +sqrt(mean((pred_mpm - df.test$Mean)^2)) + + + +plot(pred$aggr$mean, df.test$Y2) +points(pred$aggr$mean,df.test$Mean,col = 2) +points(pred_best,df.test$Mean,col = 3) +points(pred_mpm,df.test$Mean,col = 4) + + + + +############################################################################# +# +# Parallel version just 16 chains on 8 cores +# +############################################################################# + + +set.seed(5002) + +if (use.fbms) { + result_parallel <- fbms(formula = Y2~1+.,data = df.training, family = "custom", loglik.pi = estimate.logic.lm, N = 500, N.final = 500, + method = "gmjmcmc.parallel",model_prior = list(p = 50), beta_prior = NULL, runs = 16, cores = 8, + transforms = transforms, probs = probs, params = params, P=25) +} else { + result_parallel = gmjmcmc.parallel(runs = 16, cores = 8, x = df.training[, -1], y = df.training[, 1], + loglik.pi = estimate.logic.lm, mlpost_params = list(p = 50), N = 500,N.final = 500, + transforms = transforms, probs = probs, params = params, P=25) +} +summary(result_parallel) +mpm <- get.mpm.model(result_parallel, y = df.training$Y2, x = df.training[,-1], family = "custom", loglik.pi = estimate.logic.lm,params = list(p = 50)) +mbest <- get.best.model(result_parallel) + + +pred_parallel <- predict(result_parallel, x = df.test[,-1], link = function(x)(x)) +pred_par_mpm <- predict(mpm, x = df.test[,-1], link = function(x)(x)) +pred_par_best <- predict(mbest, x = df.test[,-1], link = function(x)(x)) + + +#prediction errors +sqrt(mean((pred_parallel$aggr$mean - df.test$Y2)^2)) +sqrt(mean((pred_par_best - df.test$Y2)^2)) +sqrt(mean((pred_par_mpm - df.test$Y2)^2)) +sqrt(mean((df.test$Mean - df.test$Y2)^2)) + +#prediction errors to the true means +sqrt(mean((pred_parallel$aggr$mean - df.test$Mean)^2)) +sqrt(mean((pred_par_best - df.test$Mean)^2)) +sqrt(mean((pred_par_mpm - df.test$Mean)^2)) + + + +plot(pred_parallel$aggr$mean, df.test$Y2) +points(pred_parallel$aggr$mean,df.test$Mean,col = 2) +points(pred_par_best,df.test$Mean,col = 3) +points(pred_par_mpm,df.test$Mean,col = 4) + + + +############################################################################# +# +# FBMS logic regression with a tCCH parameter prior +# +############################################################################# + +transforms <- c("not") +probs <- gen.probs.gmjmcmc(transforms) +probs$gen <- c(1,1,0,1) #No projections allowed +probs$filter <- 0.6 +params <- gen.params.gmjmcmc(ncol(df.training) - 1) +params$feat$pop.max <- 51 + +library(BAS) #needed for hypergeometric functions +estimate.logic.tcch = function(y, x, model, complex, mlpost_params) +{ + + if (length(mlpost_params) == 0) + mlpost_params <- list(r = 1 / dim(x)[1]) + suppressWarnings({ + mod <- fastglm(as.matrix(x[, model]), y, family = gaussian()) + }) + + wj <- complex$width + + lp <- sum(log(factorial(wj))) - sum(wj*log(mlpost_params$p) + (2*wj-2)*log(2)) + + p.v <- (mlpost_params$n+1)/(mod$rank+1) + + y_mean <- mean(y) + TSS <- sum((y - y_mean)^2) + RSS <- sum(mod$residuals^2) + R.2 <- 1 - (RSS / TSS) + p <- mod$rank + + mloglik = (-0.5*p*log(p.v) -0.5*(mlpost_params$n-1)*log(1-(1-1/p.v)*R.2) + log(beta((mlpost_params$p.a+p)/2,mlpost_params$p.b/2)) - log(beta(mlpost_params$p.a/2,mlpost_params$p.b/2)) + log(phi1(mlpost_params$p.b/2,(mlpost_params$n-1)/2,(mlpost_params$p.a+mlpost_params$p.b+p)/2,mlpost_params$p.s/2/p.v,R.2/(p.v-(p.v-1)*R.2))) - hypergeometric1F1(mlpost_params$p.b/2,(mlpost_params$p.a+mlpost_params$p.b)/2,mlpost_params$p.s/2/p.v,log = T)) + if(mloglik ==-Inf||is.na(mloglik )||is.nan(mloglik )) + mloglik = -10000 + + logpost <- mloglik + lp + mlpost_params$n + + if(logpost==-Inf) + logpost = -10000 + + return(list(crit = logpost + lp, coefs = mod$coefficients)) +} + + +set.seed(5001) + +if (use.fbms) { + result <- fbms(formula = Y2~1+.,data = df.training, family = "custom", loglik.pi = estimate.logic.tcch,N = 500,N.final = 500, P = 25, + method = "gmjmcmc", transforms = transforms, + probs = probs,model_prior = list(p = 50,n = n),beta_prior = list(p.a = 1, p.b = 1, p.r = 1.5, p.s = 0, p.k = 1), params = params) +} else { + # result <- gmjmcmc(df.training, transforms = transforms, probs = probs) + + result <- gmjmcmc(x = df.training[, -1], y = df.training[, 1], loglik.pi = estimate.logic.tcch,N = 500,N.final = 500, P = 25, + transforms = transforms,mlpost_params = list(p = 50, n = n, p.a = 1, p.b = 1, p.r = 1.5, p.s = 0, p.k = 1), params = params, probs = probs) + +} +summary(result) +mpm <- get.mpm.model(result, y = df.training$Y2, x = df.training[,-1], family = "custom", loglik.pi = estimate.logic.lm,params = list(p = 50, n = n, p.a = 1, p.b = 1, p.r = 1.5, p.s = 0, p.k = 1)) +mbest <- get.best.model(result) + + +pred <- predict(result, x = df.test[,-1], link = function(x)(x)) +pred_mpm <- predict(mpm, x = df.test[,-1], link = function(x)(x)) +pred_best <- predict(mbest, x = df.test[,-1], link = function(x)(x)) + + +#prediction errors +sqrt(mean((pred$aggr$mean - df.test$Y2)^2)) +sqrt(mean((pred_best - df.test$Y2)^2)) +sqrt(mean((pred_mpm - df.test$Y2)^2)) +sqrt(mean((df.test$Mean - df.test$Y2)^2)) + +#prediction errors to the true means +sqrt(mean((pred$aggr$mean - df.test$Mean)^2)) +sqrt(mean((pred_best - df.test$Mean)^2)) +sqrt(mean((pred_mpm - df.test$Mean)^2)) + + + +plot(pred$aggr$mean, df.test$Y2) +points(pred$aggr$mean,df.test$Mean,col = 2) +points(pred_best,df.test$Mean,col = 3) +points(pred_mpm,df.test$Mean,col = 4) + + +# Now parallel inference + +set.seed(5002) + +if (use.fbms) { + result_parallel <- fbms(formula = Y2~1+.,data = df.training, family = "custom", loglik.pi = estimate.logic.tcch,N = 500,N.final = 500, + method = "gmjmcmc.parallel", runs = 16, cores = 8, model_prior = list(p = 50,n = n),beta_prior = list(p.a = 1, p.b = 1, p.r = 1.5, p.s = 0, p.k = 1), + transforms = transforms, probs = probs, params = params, P=25) +} else { + result_parallel = gmjmcmc.parallel(runs = 16, cores = 8, x = df.training[, -1], y = df.training[, 1], + loglik.pi = estimate.logic.tcch,N = 500,N.final = 500, + mlpost_params = list(p = 50, n = n, p.a = 1, p.b = 1, p.r = 1.5, p.s = 0, p.k = 1), + transforms = transforms, probs = probs, params = params, P=25) +} +summary(result_parallel) +mpm <- get.mpm.model(result_parallel,y = df.training$Y2,x = df.training[,-1],family = "custom", loglik.pi = estimate.logic.lm,params = list(p = 50, n = n, p.a = 1, p.b = 1, p.r = 1.5, p.s = 0, p.k = 1)) +mbest <- get.best.model(result_parallel) + + +pred_parallel <- predict(result_parallel, x = df.test[,-1], link = function(x)(x)) +pred_par_mpm <- predict(mpm, x = df.test[,-1], link = function(x)(x)) +pred_par_best <- predict(mbest, x = df.test[,-1], link = function(x)(x)) + + +#prediction errors +sqrt(mean((pred_parallel$aggr$mean - df.test$Y2)^2)) +sqrt(mean((pred_par_best - df.test$Y2)^2)) +sqrt(mean((pred_par_mpm - df.test$Y2)^2)) +sqrt(mean((df.test$Mean - df.test$Y2)^2)) + +#prediction errors to the true means +sqrt(mean((pred_parallel$aggr$mean - df.test$Mean)^2)) +sqrt(mean((pred_par_best - df.test$Mean)^2)) +sqrt(mean((pred_par_mpm - df.test$Mean)^2)) + + + +plot(pred_parallel$aggr$mean, df.test$Y2) +points(pred_parallel$aggr$mean,df.test$Mean,col = 2) +points(pred_par_best,df.test$Mean,col = 3) +points(pred_par_mpm,df.test$Mean,col = 4) \ No newline at end of file diff --git a/R_script/oldvers/Ex7_Sec5.2.R b/R_script/oldvers/Ex7_Sec5.2.R new file mode 100644 index 0000000..a1d7d17 --- /dev/null +++ b/R_script/oldvers/Ex7_Sec5.2.R @@ -0,0 +1,297 @@ +####################################################### +# +# Example 7 (Section 5.2): +# +# Logic regression with a different model prior +# +# DATA - simulated +# +# +# +# This is the valid version for the JSS Paper +# +####################################################### + +#library(devtools) +#devtools::install_github("jonlachmann/GMJMCMC@FBMS", force=T, build_vignettes=F) + +library(FBMS) + +n = 2000 +p = 50 + +set.seed(1) +X2 <- as.data.frame(array(data = rbinom(n = n*p,size = 1,prob = runif(n = n*p,0,1)),dim = c(n,p))) +y2.Mean = 1+7*(X2$V4*X2$V17*X2$V30*X2$V10) + 9*(X2$V7*X2$V20*X2$V12)+ 3.5*(X2$V9*X2$V2)+1.5*(X2$V37) +Y2 <- rnorm(n = n,mean = y2.Mean,sd = 1) +df <- data.frame(Y2,X2) +summary(df) + +str(df) + + +# Split data into training and test dataset +df.training <- df[1:(n/2),] +df.test <- df[(n/2 + 1):n,] +df.test$Mean <- y2.mean[(n/2 + 1):n] + + + +############################################################################# +# +# FBMS logic regression with a Jeffreys parameter prior +# +############################################################################# + + + +# FBMS - unlike the EMJMCMC package - does not explicitly have GMJMCMC for logic regression, +# but we can easily run it without an "or" operator as "and" and "not" allow +# to compute "or" through de Morgan law. + +transforms <- c("not") +probs <- gen.probs.gmjmcmc(transforms) +probs$gen <- c(1,1,0,1) #No projections allowed + +params <- gen.params.gmjmcmc(p) +params$feat$pop.max <- 50 +params$feat$L <- 15 + + + +estimate.logic.lm = function(y, x, model, complex, mlpost_params) +{ + # Computation of marginal log-likelihood using Jeffreys prior + suppressWarnings({ + mod <- fastglm(as.matrix(x[, model]), y, family = gaussian()) + }) + mloglik <- -(mod$aic + (log(length(y))-2) * (mod$rank))/2 + + # Computation of log of model prior + wj <- complex$width + lp <- sum(log(factorial(wj))) - sum(wj*log(4*mlpost_params$p) - log(4)) + + # log posterior up to a constant + logpost <- mloglik + lp + + if(logpost==-Inf) + logpost = -10000 + + return(list(crit = logpost, coefs = mod$coefficients)) +} + + + +############################################################################# +# +# Logic regression training +# +############################################################################# + +set.seed(5001) + +result <- fbms(formula = Y2~1+., data = df.training, probs = probs, params = params, + method = "gmjmcmc", transforms = transforms, N = 500, P = 25, + family = "custom", loglik.pi = estimate.logic.lm, + model_prior = list(p = p), beta_prior = NULL) +summary(result) +mpm <- get.mpm.model(result, y = df.training$Y2, x = df.training[,-1], family = "custom", loglik.pi = estimate.logic.lm,params = list(p = 50)) +mpm$coefs +mpm <- get.mpm.model(result, y = df.training$Y2, x = df.training[,-1]) +mpm$coefs +mbest <- get.best.model(result) +mbest$coefs + + +pred <- predict(result, x = df.test[,-1], link = function(x)(x)) +pred_mpm <- predict(mpm, x = df.test[,-1], link = function(x)(x)) +pred_best <- predict(mbest, x = df.test[,-1], link = function(x)(x)) + + +#prediction errors +sqrt(mean((pred$aggr$mean - df.test$Y2)^2)) +sqrt(mean((pred_mpm - df.test$Y2)^2)) +sqrt(mean((pred_best - df.test$Y2)^2)) +sqrt(mean((df.test$Mean - df.test$Y2)^2)) + +#prediction errors to the true means +sqrt(mean((pred$aggr$mean - df.test$Mean)^2)) +sqrt(mean((pred_best - df.test$Mean)^2)) +sqrt(mean((pred_mpm - df.test$Mean)^2)) + + + +plot(pred$aggr$mean, df.test$Y2) +points(pred$aggr$mean,df.test$Mean,col = 2) +points(pred_best,df.test$Mean,col = 3) +points(pred_mpm,df.test$Mean,col = 4) + + + + +############################################################################# +# +# Parallel version just 16 chains on 8 cores +# +############################################################################# + + +set.seed(5002) + +result_parallel <- fbms(formula = Y2~1+.,data = df.training, probs = probs, params = params, + method = "gmjmcmc.parallel", transforms = transforms, N = 500, P=25, + family = "custom", loglik.pi = estimate.logic.lm, + model_prior = list(p = p), beta_prior = NULL, runs = 16, cores = 8) +summary(result_parallel) +mpm <- get.mpm.model(result_parallel, y = df.training$Y2, x = df.training[,-1], family = "custom", loglik.pi = estimate.logic.lm,params = list(p = 50)) +mbest <- get.best.model(result_parallel) + + +pred_parallel <- predict(result_parallel, x = df.test[,-1], link = function(x)(x)) +pred_par_mpm <- predict(mpm, x = df.test[,-1], link = function(x)(x)) +pred_par_best <- predict(mbest, x = df.test[,-1], link = function(x)(x)) + + +#prediction errors +sqrt(mean((pred_parallel$aggr$mean - df.test$Y2)^2)) +sqrt(mean((pred_par_best - df.test$Y2)^2)) +sqrt(mean((pred_par_mpm - df.test$Y2)^2)) +sqrt(mean((df.test$Mean - df.test$Y2)^2)) + +#prediction errors to the true means +sqrt(mean((pred_parallel$aggr$mean - df.test$Mean)^2)) +sqrt(mean((pred_par_best - df.test$Mean)^2)) +sqrt(mean((pred_par_mpm - df.test$Mean)^2)) + + + +plot(pred_parallel$aggr$mean, df.test$Y2) +points(pred_parallel$aggr$mean,df.test$Mean,col = 2) +points(pred_par_best,df.test$Mean,col = 3) +points(pred_par_mpm,df.test$Mean,col = 4) + + + +############################################################################# +# +# FBMS logic regression with a tCCH parameter prior +# +############################################################################# + +transforms <- c("not") +probs <- gen.probs.gmjmcmc(transforms) +probs$gen <- c(1,1,0,1) #No projections allowed +probs$filter <- 0.6 +params <- gen.params.gmjmcmc(ncol(df.training) - 1) +params$feat$pop.max <- 51 + +library(BAS) #needed for hypergeometric functions +estimate.logic.tcch = function(y, x, model, complex, mlpost_params) +{ + # Computation of marginal log likelihood + + suppressWarnings({ + mod <- fastglm(as.matrix(x[, model]), y, family = gaussian()) + }) + + p.v <- (mlpost_params$n+1)/(mod$rank+1) + + y_mean <- mean(y) + TSS <- sum((y - y_mean)^2) + RSS <- sum(mod$residuals^2) + R.2 <- 1 - (RSS / TSS) + p <- mod$rank + + mloglik = (-0.5*p*log(p.v) -0.5*(mlpost_params$n-1)*log(1-(1-1/p.v)*R.2) + log(beta((mlpost_params$p.a+p)/2,mlpost_params$p.b/2)) - log(beta(mlpost_params$p.a/2,mlpost_params$p.b/2)) + log(phi1(mlpost_params$p.b/2,(mlpost_params$n-1)/2,(mlpost_params$p.a+mlpost_params$p.b+p)/2,mlpost_params$p.s/2/p.v,R.2/(p.v-(p.v-1)*R.2))) - hypergeometric1F1(mlpost_params$p.b/2,(mlpost_params$p.a+mlpost_params$p.b)/2,mlpost_params$p.s/2/p.v,log = T)) + if(mloglik ==-Inf||is.na(mloglik )||is.nan(mloglik )) + mloglik = -10000 + + # Computation of log of model prior + + wj <- complex$width + lp <- sum(log(factorial(wj))) - sum(wj*log(mlpost_params$p) + (2*wj-2)*log(2)) + + + logpost <- mloglik + lp + mlpost_params$n + + if(logpost==-Inf) + logpost = -10000 + + return(list(crit = logpost + lp, coefs = mod$coefficients)) +} + + +set.seed(5001) + + + +result.tcch <- fbms(formula = Y2~1+.,data = df.training, probs = probs, params = params, + method = "gmjmcmc", transforms = transforms, N = 500, P = 25, + family = "custom", loglik.pi = estimate.logic.tcch, + model_prior = list(p = p, n = n), + beta_prior = list(p.a = 1, p.b = 1, p.r = 1.5, p.s = 0, p.k = 1)) +summary(result.tcch) +mpm.tcch <- get.mpm.model(result.tcch, y = df.training$Y2, x = df.training[,-1], family = "custom", loglik.pi = estimate.logic.lm,params = list(p = 50, n = n, p.a = 1, p.b = 1, p.r = 1.5, p.s = 0, p.k = 1)) +mbest.tcch <- get.best.model(result.tcch) + + +pred.tcch <- predict(result.tcch, x = df.test[,-1], link = function(x)(x)) +pred_mpm.tcch <- predict(mpm.tcch, x = df.test[,-1], link = function(x)(x)) +pred_best.tcch <- predict(mbest.tcch, x = df.test[,-1], link = function(x)(x)) + + +#prediction errors +sqrt(mean((pred.tcch$aggr$mean - df.test$Y2)^2)) +sqrt(mean((pred_best.tcch - df.test$Y2)^2)) +sqrt(mean((pred_mpm.tcch - df.test$Y2)^2)) +sqrt(mean((df.test$Mean - df.test$Y2)^2)) + +#prediction errors to the true means +sqrt(mean((pred.tcch$aggr$mean - df.test$Mean)^2)) +sqrt(mean((pred_best.tcch - df.test$Mean)^2)) +sqrt(mean((pred_mpm.tcch - df.test$Mean)^2)) + + + +plot(pred.tcch$aggr$mean, df.test$Y2) +points(pred.tcch$aggr$mean,df.test$Mean,col = 2) +points(pred_best.tcch,df.test$Mean,col = 3) +points(pred_mpm.tcch,df.test$Mean,col = 4) + + +# Now parallel inference + +set.seed(5002) + +result_parallel.tcch <- fbms(formula = Y2~1+.,data = df.training, probs = probs, params = params, + method = "gmjmcmc.parallel", transforms = transforms, N = 500, P = 25, + family = "custom", loglik.pi = estimate.logic.tcch, + runs = 16, cores = 8, model_prior = list(p = p, n = n), + beta_prior = list(p.a = 1, p.b = 1, p.r = 1.5, p.s = 0, p.k = 1)) +summary(result_parallel.tcch) +mpm <- get.mpm.model(result_parallel.tcch,y = df.training$Y2,x = df.training[,-1],family = "custom", loglik.pi = estimate.logic.lm,params = list(p = 50, n = n, p.a = 1, p.b = 1, p.r = 1.5, p.s = 0, p.k = 1)) +mbest <- get.best.model(result_parallel.tcch) + + +pred_parallel.tcch <- predict(result_parallel.tcch, x = df.test[,-1], link = function(x)(x)) +pred_par_mpm.tcch <- predict(mpm, x = df.test[,-1], link = function(x)(x)) +pred_par_best.tcch <- predict(mbest, x = df.test[,-1], link = function(x)(x)) + + +#prediction errors +sqrt(mean((pred_parallel.tcch$aggr$mean - df.test$Y2)^2)) +sqrt(mean((pred_par_best.tcch - df.test$Y2)^2)) +sqrt(mean((pred_par_mpm.tcch - df.test$Y2)^2)) +sqrt(mean((df.test$Mean - df.test$Y2)^2)) + +#prediction errors to the true means +sqrt(mean((pred_parallel.tcch$aggr$mean - df.test$Mean)^2)) +sqrt(mean((pred_par_best.tcch - df.test$Mean)^2)) +sqrt(mean((pred_par_mpm.tcch - df.test$Mean)^2)) + + + +plot(pred_parallel.tcch$aggr$mean, df.test$Y2) +points(pred_parallel.tcch$aggr$mean,df.test$Mean,col = 2) +points(pred_par_best.tcch,df.test$Mean,col = 3) +points(pred_par_mpm.tcch,df.test$Mean,col = 4) \ No newline at end of file diff --git a/tests_current/Ex7_Sec5_2.R b/R_script/oldvers/Ex7_Sec5_2.R similarity index 100% rename from tests_current/Ex7_Sec5_2.R rename to R_script/oldvers/Ex7_Sec5_2.R diff --git a/tests_current/Ex8_Sec6_1.R b/R_script/oldvers/Ex8_Sec6_1.R similarity index 91% rename from tests_current/Ex8_Sec6_1.R rename to R_script/oldvers/Ex8_Sec6_1.R index 76df6b6..a744afd 100644 --- a/tests_current/Ex8_Sec6_1.R +++ b/R_script/oldvers/Ex8_Sec6_1.R @@ -99,13 +99,13 @@ summary(result_parallel) # # Model averaging -pred_parallel = predict(result_parallel, x = df[,-1], link = function(x)(1/(1+exp(-x)))) +pred_parallel <- predict(result_parallel, x = df[,-1], link = function(x)(1/(1+exp(-x)))) mean(round(pred_parallel$aggr$mean)==df$y) # Best Model -#bm_parallel <- get.best.model(result_parallel) -#pred_bm_parallel <- predict(bm_parallel, df[,-1],link = function(x)(1/(1+exp(-x)))) -#mean(round(pred_bm_parallel)==df$y) +bm_parallel <- get.best.model(result_parallel) +pred_bm_parallel <- predict(bm_parallel, df[,-1],link = function(x)(1/(1+exp(-x)))) +mean(round(pred_bm_parallel)==df$y) # Median Probability Model mpm_parallel <- predict(get.mpm.model(result = result_parallel,family = "binomial",y = df$y,x=df[,-1]), df[,-1],link = function(x)(1/(1+exp(-x)))) diff --git a/tests_current/Ex9_Sec6_2.R b/R_script/oldvers/Ex9_Sec6_2.R similarity index 100% rename from tests_current/Ex9_Sec6_2.R rename to R_script/oldvers/Ex9_Sec6_2.R diff --git a/R_script/oldvers/FBMS-guide.R b/R_script/oldvers/FBMS-guide.R new file mode 100644 index 0000000..435c2da --- /dev/null +++ b/R_script/oldvers/FBMS-guide.R @@ -0,0 +1,354 @@ +ß## ----include=FALSE------------------------------------------------------------ +knitr::opts_chunk$set( + message = TRUE, # show package startup and other messages + warning = FALSE, # suppress warnings + echo = TRUE, # show code + results = "hide" # hide default printed results unless printed via printn() +) + +# For careful printing of only what I explicitly ask for +printn <- function(x) { + # Capture the *exact* console print output as a character vector + txt <- capture.output(print(x)) + # Combine lines with newline, send as a message to be shown in output + message(paste(txt, collapse = "\n")) +} + +library(FBMS) + +## ----eval=FALSE, include=FALSE------------------------------------------------ +# library(FBMS) + +## ----------------------------------------------------------------------------- + +# Parameters for parallel runs are set to a single thread and single core to comply with CRAN requirenments (please tune for your machine if you have more capacity) +runs <- 1 # 1 set for simplicity; use rather 16 or more +cores <- 1 # 1 set for simplicity; use rather 8 or more + +## ----------------------------------------------------------------------------- +# Load example +data <- FBMS::exoplanet + +# Choose a small but expressive transform set for a quick demo +transforms <- c("sigmoid", "sin_deg", "exp_dbl", "p0", "troot", "p3") + +# ---- fbms() call (simple GMJMCMC) ---- +# Key parameters (explicit): +# - formula : semimajoraxis ~ 1 + . # response and all predictors +# - data : data # dataset +# - beta_prior : list(type = "g-prior") # parameter prior +# - model_prior : list(r = 1/dim(data)[1]) # model prior +# - method : "gmjmcmc" # exploration strategy +# - transforms : transforms # nonlinear feature dictionary +# - P : population size per generation (search breadth) +result_single <- fbms( + formula = semimajoraxis ~ 1 + ., + data = data, + beta_prior = list(type = "g-prior", alpha = dim(data)[1]), + model_prior = list(r = 1/dim(data)[1]), + method = "gmjmcmc", + transforms = transforms, + P = 20 +) + +# Summarize +printn(summary(result_single)) + +## ----------------------------------------------------------------------------- + +# ---- fbms() call (parallel GMJMCMC) ---- +# Key parameters (explicit): +# - formula : semimajoraxis ~ 1 + . # response and all predictors +# - data : data # dataset +# - beta_prior : list(type = "g-prior") # parameter prior +# - model_prior : list(r = 1/dim(data)[1]) # model prior +# - method : "gmjmcmc" # exploration strategy +# - transforms : transforms # nonlinear feature dictionary +# - runs, cores : parallelization controls +# - P : population size per generation (search breadth) +result_parallel <- fbms( + formula = semimajoraxis ~ 1 + ., + data = data, + beta_prior = list(type = "g-prior", alpha = dim(data)[1]), + model_prior = list(r = 1/dim(data)[1]), + method = "gmjmcmc.parallel", + transforms = transforms, + runs = runs*10, # by default the rmd has runs = 1; increase for convergence + cores = cores, # by default the rmd has cores = 1; increase for convergence + P = 20 +) + +# Summarize +printn(summary(result_parallel)) + +## ----------------------------------------------------------------------------- +plot(result_parallel) + +## ----------------------------------------------------------------------------- +diagn_plot(result_parallel) + +## ----------------------------------------------------------------------------- +library(mvtnorm) + +n <- 100 # sample size +p <- 20 # number of covariates +k <- 5 # size of true submodel + +correct.model <- 1:k +beta.k <- (1:5)/5 + +beta <- rep(0, p) +beta[correct.model] <- beta.k + +set.seed(123) +x <- rmvnorm(n, rep(0, p)) +y <- x %*% beta + rnorm(n) + +# Standardize +y <- scale(y) +X <- scale(x) / sqrt(n) + +df <- as.data.frame(cbind(y, X)) +colnames(df) <- c("Y", paste0("X", seq_len(ncol(df) - 1))) + +printn(correct.model) +printn(beta.k) + +## ----------------------------------------------------------------------------- +# ---- fbms() call (MJMCMC) ---- +# Explicit prior choice: +# beta_prior = list(type = "g-prior", alpha = 100) +# To switch to another prior, e.g. robust: +# beta_prior = list(type = "robust") +result.lin <- fbms( + formula = Y ~ 1 + ., + data = df, + method = "mjmcmc", + N = 5000, # number of iterations + beta_prior = list(type = "g-prior", alpha = 100) +) + +## ----------------------------------------------------------------------------- +plot(result.lin) + +## ----------------------------------------------------------------------------- +# 'effects' specifies quantiles for posterior modes of effects across models +printn(summary(result.lin, effects = c(0.5, 0.025, 0.975))) + +## ----------------------------------------------------------------------------- +# ---- fbms() call (parallel MJMCMC) ---- +# Explicit prior choice: +# beta_prior = list(type = "g-prior", alpha = 100) +# To switch to another prior, e.g. robust: +# beta_prior = list(type = "robust") +# method = mjmcmc.parallel +# runs, cores : parallelization controls +result.lin.par <- fbms( + formula = Y ~ 1 + ., + data = df, + method = "mjmcmc.parallel", + N = 5000, # number of iterations + beta_prior = list(type = "g-prior", alpha = 100), + runs = runs, + cores = cores +) +printn(summary(result.lin.par, effects = c(0.5, 0.025, 0.975))) + +## ----------------------------------------------------------------------------- +# Create FP-style response with known structure, covariates are from previous example +df$Y <- p05(df$X1) + df$X1 + pm05(df$X3) + p0pm05(df$X3) + df$X4 + + pm1(df$X5) + p0(df$X6) + df$X8 + df$X10 + rnorm(nrow(df)) + +# Allow common FP transforms +transforms <- c( + "p0", "p2", "p3", "p05", "pm05", "pm1", "pm2", "p0p0", + "p0p05", "p0p1", "p0p2", "p0p3", "p0p05", "p0pm05", "p0pm1", "p0pm2" +) + +# Generation probabilities — here only modifications and mutations +probs <- gen.probs.gmjmcmc(transforms) +probs$gen <- c(0, 1, 0, 1) + +# Feature-generation parameters +params <- gen.params.gmjmcmc(ncol(df) - 1) +params$feat$D <- 1 # max depth 1 features + +## ----------------------------------------------------------------------------- +result <- fbms( + formula = Y ~ 1 + ., + data = df, + method = "gmjmcmc", + transforms = transforms, + beta_prior = list(type = "Jeffreys-BIC"), + probs = probs, + params = params, + P = 25 +) + +printn(summary(result)) + +## ----------------------------------------------------------------------------- +result_parallel <- fbms( + formula = Y ~ 1 + ., + data = df, + method = "gmjmcmc.parallel", + transforms = transforms, + beta_prior = list(type = "Jeffreys-BIC"), + probs = probs, + params = params, + P = 25, + runs = runs, + cores = cores +) + +printn(summary(result_parallel)) + +## ----------------------------------------------------------------------------- +# Custom approximate log marginal likelihood for mixed model using Laplace approximation +mixed.model.loglik.lme4 <- function (y, x, model, complex, mlpost_params) { + if (sum(model) > 1) { + x.model <- x[, model] + data <- data.frame(y, x = x.model[, -1], dr = mlpost_params$dr) + mm <- lmer(as.formula(paste0("y ~ 1 +", + paste0(names(data)[2:(ncol(data)-1)], collapse = "+"), + " + (1 | dr)")), data = data, REML = FALSE) + } else { + data <- data.frame(y, dr = mlpost_params$dr) + mm <- lmer(y ~ 1 + (1 | dr), data = data, REML = FALSE) + } + # log marginal likelihood (Laplace approx) + log model prior + mloglik <- as.numeric(logLik(mm)) - 0.5 * log(length(y)) * (ncol(data) - 2) + if (length(mlpost_params$r) == 0) mlpost_params$r <- 1 / nrow(x) + lp <- log_prior(mlpost_params, complex) + list(crit = mloglik + lp, coefs = fixef(mm)) +} + +## ----------------------------------------------------------------------------- +library(lme4) +data(Zambia, package = "cAIC4") + +df <- as.data.frame(sapply(Zambia[1:5], scale)) + +transforms <- c("p0","p2","p3","p05","pm05","pm1","pm2", + "p0p0","p0p05","p0p1","p0p2","p0p3", + "p0p05","p0pm05","p0pm1","p0pm2") + +probs <- gen.probs.gmjmcmc(transforms) +probs$gen <- c(1, 1, 0, 1) # include modifications and interactions + +params <- gen.params.gmjmcmc(ncol(df) - 1) +params$feat$D <- 1 +params$feat$pop.max <- 10 + +result2a <- fbms( + formula = z ~ 1 + ., + data = df, + method = "gmjmcmc.parallel", + transforms = transforms, + probs = probs, + params = params, + P = 25, + N = 100, + runs = runs, + cores = cores, + family = "custom", + loglik.pi = mixed.model.loglik.lme4, + model_prior = list(r = 1 / nrow(df)), # model_prior is passed to mlpost_params + extra_params = list(dr = droplevels(Zambia$dr)) # extra_params are passed to mlpost_params +) + +printn(summary(result2a, tol = 0.05, labels = names(df)[-1])) + +## ----------------------------------------------------------------------------- +n <- 2000 +p <- 50 + +set.seed(1) +X2 <- as.data.frame(matrix(rbinom(n * p, size = 1, prob = runif(n * p, 0, 1)), n, p)) +y2.Mean <- 1 + 7*(X2$V4*X2$V17*X2$V30*X2$V10) + 9*(X2$V7*X2$V20*X2$V12) + + 3.5*(X2$V9*X2$V2) + 1.5*(X2$V37) + +Y2 <- rnorm(n, mean = y2.Mean, sd = 1) +df <- data.frame(Y2, X2) + +# Train/test split +df.training <- df[1:(n/2), ] +df.test <- df[(n/2 + 1):n, ] +df.test$Mean <- y2.Mean[(n/2 + 1):n] + +## ----------------------------------------------------------------------------- +estimate.logic.lm <- function(y, x, model, complex, mlpost_params) { + suppressWarnings({ + mod <- fastglm(as.matrix(x[, model]), y, family = gaussian()) + }) + mloglik <- -(mod$aic + (log(length(y)) - 2) * (mod$rank)) / 2 + wj <- complex$width + lp <- sum(log(factorial(wj))) - sum(wj * log(4 * mlpost_params$p) - log(4)) + logpost <- mloglik + lp + if (logpost == -Inf) logpost <- -10000 + list(crit = logpost, coefs = mod$coefficients) +} + +## ----------------------------------------------------------------------------- +set.seed(5001) + +# Only "not" operator; "or" is implied by De Morgan via "and" + "not" +transforms <- c("not") +probs <- gen.probs.gmjmcmc(transforms) +probs$gen <- c(1, 1, 0, 1) # no projections + +params <- gen.params.gmjmcmc(p) +params$feat$pop.max <- 50 +params$feat$L <- 15 + +result <- fbms( + formula = Y2 ~ 1 + ., + data = df.training, + method = "gmjmcmc", + transforms = transforms, + N = 500, + P = 25, + family = "custom", + loglik.pi = estimate.logic.lm, + probs = probs, + params = params, + model_prior = list(p = p) +) + +printn(summary(result)) + +# Extract models +mpm <- get.mpm.model(result, y = df.training$Y2, x = df.training[,-1], + family = "custom", loglik.pi = estimate.logic.lm, params = list(p = 50)) +printn(mpm$coefs) + +mpm2 <- get.mpm.model(result, y = df.training$Y2, x = df.training[,-1]) +printn(mpm2$coefs) + +mbest <- get.best.model(result) +printn(mbest$coefs) + +## ----------------------------------------------------------------------------- +# Correct link is identity for Gaussian +pred <- predict(result, x = df.test[,-1], link = function(x) x) +pred_mpm <- predict(mpm, x = df.test[,-1], link = function(x) x) +pred_best <- predict(mbest, x = df.test[,-1], link = function(x) x) + +# RMSEs +printn(sqrt(mean((pred$aggr$mean - df.test$Y2)^2))) +printn(sqrt(mean((pred_mpm - df.test$Y2)^2))) +printn(sqrt(mean((pred_best - df.test$Y2)^2))) +printn(sqrt(mean((df.test$Mean - df.test$Y2)^2))) + +# Errors to the true mean (oracle) +printn(sqrt(mean((pred$aggr$mean - df.test$Mean)^2))) +printn(sqrt(mean((pred_best - df.test$Mean)^2))) +printn(sqrt(mean((pred_mpm - df.test$Mean)^2))) + +# Quick diagnostic plot +plot(pred$aggr$mean, df.test$Y2, + xlab = "Predicted (BMA)", ylab = "Observed") +points(pred$aggr$mean, df.test$Mean, col = 2) +points(pred_best, df.test$Mean, col = 3) +points(pred_mpm, df.test$Mean, col = 4) + diff --git a/R_script/oldvers/bacteremia.R b/R_script/oldvers/bacteremia.R new file mode 100644 index 0000000..3aef69b --- /dev/null +++ b/R_script/oldvers/bacteremia.R @@ -0,0 +1,155 @@ +#devtools::install_github("jonlachmann/GMJMCMC@FBMS", force=T, build_vignettes=F) + +library(Metrics)# For C-index +library(FBMS) +set.seed(123) + +# Function to calculate C-index manually for binary classification +cindex_manual <- function(predictions, labels) { + # Ensure predictions and labels are numeric + n <- length(labels) + + # Initialize counters for concordant, discordant, and ties + concordant <- 0 + discordant <- 0 + ties <- 0 + + # Loop through all possible pairs + for (i in 1:(n-1)) { + for (j in (i+1):n) { + if (labels[i] != labels[j]) { # Only consider pairs with different labels + if (predictions[i] == predictions[j]) { + ties <- ties + 1 + } else if ((predictions[i] > predictions[j] && labels[i] > labels[j]) || + (predictions[i] < predictions[j] && labels[i] < labels[j])) { + concordant <- concordant + 1 + } else { + discordant <- discordant + 1 + } + } + } + } + + # Calculate the C-index + total_pairs <- concordant + discordant + ties + c_index <- (concordant + 0.5 * ties) / total_pairs + return(c_index) +} + + +df = read.csv("https://zenodo.org/records/7554815/files/Bacteremia_public_S2.csv", + header = TRUE, sep = ",", dec = ".") + +df = df[,!(names(df) %in% c("MONOR", "LYMR", "NEUR", "EOSR", "BASOR", "WBC", "MCV", "HCT"))] +df$BloodCulture = ifelse(df$BloodCulture == "yes", 1, 0) + +#df = na.omit(df) +trid = sample.int(dim(df)[1],round(dim(df)[1]*2/3)) + +df.train = (df[trid,]) +df.test = (df[-trid,]) + + +# Number of bootstrap iterations +n_bootstrap <- 100 + +# Store results for each bootstrap iteration +accuracy_oob <- numeric(n_bootstrap) +accuracy_boot <- numeric(n_bootstrap) +cindex_oob <- numeric(n_bootstrap) +cindex_boot <- numeric(n_bootstrap) + +# Full model performance +result.nonlinear = fbms(formula = BloodCulture ~ 1 + ., family = "binomial", data = df.train,beta_prior = list(type = "Jeffreys-BIC"), impute = T, method = "gmjmcmc.parallel",P = 10,cores = 6, runs = 6, transforms = c("sigmoid","sin","cos","exp_dbl")) +summary(result.nonlinear) + +# Full model performance +result = fbms(formula = BloodCulture ~ 1 + ., family = "binomial", data = df.train,beta_prior = list(type = "Jeffreys-BIC"), impute = T, method = "mjmcmc") +summary(result) +preds = predict(result.nonlinear, df.test[,-45]) +prob_full <- sigmoid(preds$aggr$mean) + +# Accuracy on full model +accuracy_full <- mean((prob_full > 0.5) == df.test$BloodCulture) + +# AUC on full model +auc_full <- auc(df.test$BloodCulture, prob_full) + +# C-index on full model +cindex_full <- cindex_manual(prob_full, df.test$BloodCulture) + +# Theoretical performance (no effect) for AUC and C-index +p_M0_auc <- 0.5 # For AUC +p_M0_cindex <- 0.5 # For C-index +p_M0_accuracy <- 0.5 # For accuracy (random guessing) + +# Bootstrap procedure +for (i in 1:n_bootstrap) { + + # Bootstrap resample from training data + boot_indices <- sample(1:nrow(df.train), replace = TRUE) + df_boot <- df.train[boot_indices, ] + + # Fit model on bootstrap sample + result_boot <- fbms(formula = BloodCulture ~ 1 + ., family = "binomial", data = df_boot, impute = TRUE, method = "mjmcmc") + + # Predictions on bootstrap sample (in-sample performance) + preds_boot <- predict(result_boot, df_boot[,-45]) + prob_boot <- sigmoid(preds_boot$mean) + + # Predictions on the original data (out-of-bag performance) + preds_oob <- predict(result_boot, df.test[,-45]) + prob_oob <- sigmoid(preds_oob$mean) + + # Accuracy + accuracy_boot[i] <- mean((prob_boot > 0.5) == df_boot$BloodCulture) + accuracy_oob[i] <- mean((prob_oob > 0.5) == df.test$BloodCulture) + + # AUC + auc_boot[i] <- auc(df_boot$BloodCulture, prob_boot) + auc_oob[i] <- auc(df.test$BloodCulture, prob_oob) + + # C-index + cindex_boot[i] <- cindex_manual(prob_boot, df_boot$BloodCulture) + cindex_oob[i] <- cindex_manual(prob_oob, df.test$BloodCulture) +} + +# Calculate overfitting rate R for accuracy +R_accuracy <- mean(accuracy_boot - accuracy_oob) / (p_M0_accuracy - accuracy_full) + +# Calculate overfitting rate R for AUC +R_auc <- mean(auc_boot - auc_oob) / (p_M0_auc - auc_full) + +# Calculate overfitting rate R for C-index +R_cindex <- mean(cindex_boot - cindex_oob) / (p_M0_cindex - cindex_full) + +# .632+ weight for accuracy +w_accuracy <- 0.632 / (1 - 0.368 * R_accuracy) + +# .632+ weight for AUC +w_auc <- 0.632 / (1 - 0.368 * R_auc) + +# .632+ weight for C-index +w_cindex <- 0.632 / (1 - 0.368 * R_cindex) +# .632+ estimate +cindex_632plus <- (1 - w_cindex) * cindex_full + w_cindex * mean(cindex_boot) +accuracy_632plus <- (1 - w_accuracy) * accuracy_full + w_accuracy * mean(accuracy_boot) +auc_632plus <- (1 - w_auc) * auc_full + w_auc * mean(auc_boot) + +# Confidence intervals (95% CI using bootstrap percentiles) +cindex_ci <- quantile(cindex_boot, probs = c(0.025, 0.975)) +accuracy_ci <- quantile(accuracy_boot, probs = c(0.025, 0.975)) +auc_ci <- quantile(auc_boot, probs = c(0.025, 0.975)) + +# Print results +cat("Full model C-index:", cindex_full, "\n") +cat(".632+ C-index estimate:", cindex_632plus, "\n") +cat("C-index 95% CI from bootstrap:", cindex_ci, "\n\n") + +cat("Full model Accuracy:", accuracy_full, "\n") +cat(".632+ Accuracy estimate:", accuracy_632plus, "\n") +cat("Accuracy 95% CI from bootstrap:", accuracy_ci, "\n\n") + +cat("Full model AUC:", auc_full, "\n") +cat(".632+ AUC estimate:", auc_632plus, "\n") +cat("AUC 95% CI from bootstrap:", auc_ci, "\n") \ No newline at end of file diff --git a/R_script/oldvers/fix impute jon.R b/R_script/oldvers/fix impute jon.R new file mode 100644 index 0000000..abad342 --- /dev/null +++ b/R_script/oldvers/fix impute jon.R @@ -0,0 +1,144 @@ +library(mgcv) +library(foreign) +#install.packages("XLConnect") +#library(XLConnect) +library(readxl) +# install.packages("e1071") +library(e1071) + +#install.packages("knitr") +library(knitr) +library(foreign) +library(readxl) +## tableone package itself +#install.packages("tableone") +#library(tableone) +## plotting +library(ggplot2) +#install.packages("pROC") +library(pROC) +#install.packages("memisc") +#library(memisc) +library(sjlabelled) + +#library(gmodels) # Für CrossTable +library(plyr) +library(dplyr) +#install.packages("devtools") +library(devtools) +library(survival) +## Load rms package +library(rms) + +#library(DescTools) + +#install.packages("Hmisc") +library(Hmisc) +#install.packages("Gmisc") +#library(Gmisc) +library(corrplot) + +library(tidyr) + +library(lubridate) +library(mice) +#library("VIM") + +library(glmnet) + + +Excel.File = "/Users/aliaksandrhome/Rprojects/FBMS impute Bones data/Data4.xlsx" + +#df <- read.csv2("C:/CC/Projekte/P24110_Kanz/Data3.csv") +#df <- as.data.frame(read_excel(Excel.File)) + + +df <- as.data.frame(read_excel(Excel.File, col_types = c( + "text", "numeric", "numeric", "numeric", + "numeric", "numeric", "numeric", "numeric", + "numeric", "numeric", "numeric", "numeric", + "numeric", "numeric", "numeric", "numeric", + "numeric", "numeric", "numeric", "numeric", + "numeric", "numeric", "numeric", "numeric", + "numeric", "numeric", "numeric", "numeric", + "numeric", "numeric", "numeric", "numeric", + "numeric", "numeric", "numeric", "numeric", + "numeric", "numeric", "numeric", "numeric", + "numeric", "numeric", "numeric", "numeric", + "numeric", "numeric", "numeric", "numeric", + "numeric", "numeric", "numeric", "numeric", + "numeric", "numeric", "numeric", "numeric", + "numeric", "numeric", "numeric", "numeric", + "numeric", "numeric", "numeric", "numeric", + "numeric", "numeric", "numeric", "numeric", + "numeric", "numeric", "numeric"))) +str(df) + +#df$Bef_Nr = as.factor(df$Bef_Nr) +df$TestData = is.na(df$Sex_Molekular) + + + +df.Training = df[df$TestData == F,] +df.Training = df.Training %>% dplyr::select(- C14_mean) + +df.Training = df.Training %>% dplyr::select(- c(Clavicula_4, Clavicula_5,Femur_18, Tibia_71, Tibia_74,Fibula_1, Fibula_4a )) +m = dim(df.Training)[2] + +x.indx = 7:(m-1) + +MV = rowSums(!is.na(df.Training[,x.indx[-1]])) + +df.Training = df.Training[MV>1,] +n = dim(df.Training)[1] + + +devtools::load_all() + + +transforms <- c("p0","p2","p3","p05","pm05","pm1","pm2") +#transforms <- c("p0","p2","p3","p05","pm05","pm1","pm2","p0p0","p0p05","p0p1","p0p2","p0p3","p0p05","p0pm05","p0pm1","p0pm2") + +probs <- gen.probs.gmjmcmc(transforms) +probs$gen <- c(1,1,0,1) + +df.Training$Bef_Nr <- as.integer(df.Training$Bef_Nr) +df.Training$Sex_Molekular = as.integer(df.Training$Sex_Molekular) - 1 + +df.test <- df.Training[201:285,] +df.Training <- df.Training[1:200,] + +#df.Training$TestData <- NULL + +params <- gen.params.gmjmcmc(ncol(df.Training)- 1 + sum(sapply(names(df.Training)[-1],function(name)sum(is.na(df.Training[[name]]))>0))) + +params$feat$pop.max <- 150 + + +result_parallel = fbms(formula = Sex_Molekular ~ 1 + .,runs = 8, cores = 8, data = df.Training, transforms = transforms, probs = probs,params = params, P=25,impute = T, beta_prior = list(type = "Jeffreys-BIC"), method = "gmjmcmc.parallel") + +pred.obj = predict(result_parallel, x = df.test[,-2], link = function(x)(1/(1+exp(-x))),x_train = df.Training[,-2]) + +auc(df.test$Sex_Molekular, pred.obj$aggr$mean) +auc(df.Training$Sex_Molekular, df.Training$Sex_Morpho) +auc(df.test$Sex_Morpho, pred.obj$aggr$mean) + + +bm <- get.best.model(result_parallel) +pred = predict(object = bm, x = df.test[,-2], link = function(x)(1/(1+exp(-x))),x_train = df.Training[,-2]) + +auc(df.test$Sex_Molekular, pred) +auc(df.Training$Sex_Molekular, df.Training$Sex_Morpho) +auc(df.test$Sex_Morpho, pred) + + +mpm <- get.mpm.model(result_parallel,family = "binomial",y = df.Training$Sex_Molekular,x = df.Training[,-2]) +pred = predict(object = mpm, x = df.test[,-2], link = function(x)(1/(1+exp(-x))),x_train = df.Training[,-2]) + +auc(df.test$Sex_Molekular, pred) +auc(df.Training$Sex_Molekular, df.Training$Sex_Morpho) +auc(df.test$Sex_Morpho, pred) + + + + diff --git a/R_script/oldvers/gg.txt b/R_script/oldvers/gg.txt new file mode 100644 index 0000000..d114ca0 --- /dev/null +++ b/R_script/oldvers/gg.txt @@ -0,0 +1,48 @@ +M So Ed Po1 Po2 LF M.F Pop NW U1 U2 Wealth Ineq Prob Time Crime +15.1 1 9.1 5.8 5.6 0.51 95 33 30.1 0.108 4.1 3940 26.1 0.084602 26.2011 791 +14.3 0 11.3 10.3 9.5 0.583 101.2 13 10.2 0.096 3.6 5570 19.4 0.029599 25.2999 1635 +14.2 1 8.9 4.5 4.4 0.533 96.9 18 21.9 0.094 3.3 3180 25 0.083401 24.3006 578 +13.6 0 12.1 14.9 14.1 0.577 99.4 157 8 0.102 3.9 6730 16.7 0.015801 29.9012 1969 +14.1 0 12.1 10.9 10.1 0.591 98.5 18 3 0.091 2 5780 17.4 0.041399 21.2998 1234 +12.1 0 11 11.8 11.5 0.547 96.4 25 4.4 0.084 2.9 6890 12.6 0.034201 20.9995 682 +12.7 1 11.1 8.2 7.9 0.519 98.2 4 13.9 0.097 3.8 6200 16.8 0.0421 20.6993 963 +13.1 1 10.9 11.5 10.9 0.542 96.9 50 17.9 0.079 3.5 4720 20.6 0.040099 24.5988 1555 +15.7 1 9 6.5 6.2 0.553 95.5 39 28.6 0.081 2.8 4210 23.9 0.071697 29.4001 856 +14 0 11.8 7.1 6.8 0.632 102.9 7 1.5 0.1 2.4 5260 17.4 0.044498 19.5994 705 +12.4 0 10.5 12.1 11.6 0.58 96.6 101 10.6 0.077 3.5 6570 17 0.016201 41.6 1674 +13.4 0 10.8 7.5 7.1 0.595 97.2 47 5.9 0.083 3.1 5800 17.2 0.031201 34.2984 849 +12.8 0 11.3 6.7 6 0.624 97.2 28 1 0.077 2.5 5070 20.6 0.045302 36.2993 511 +13.5 0 11.7 6.2 6.1 0.595 98.6 22 4.6 0.077 2.7 5290 19 0.0532 21.501 664 +15.2 1 8.7 5.7 5.3 0.53 98.6 30 7.2 0.092 4.3 4050 26.4 0.0691 22.7008 798 +14.2 1 8.8 8.1 7.7 0.497 95.6 33 32.1 0.116 4.7 4270 24.7 0.052099 26.0991 946 +14.3 0 11 6.6 6.3 0.537 97.7 10 0.6 0.114 3.5 4870 16.6 0.076299 19.1002 539 +13.5 1 10.4 12.3 11.5 0.537 97.8 31 17 0.089 3.4 6310 16.5 0.119804 18.1996 929 +13 0 11.6 12.8 12.8 0.536 93.4 51 2.4 0.078 3.4 6270 13.5 0.019099 24.9008 750 +12.5 0 10.8 11.3 10.5 0.567 98.5 78 9.4 0.13 5.8 6260 16.6 0.034801 26.401 1225 +12.6 0 10.8 7.4 6.7 0.602 98.4 34 1.2 0.102 3.3 5570 19.5 0.0228 37.5998 742 +15.7 1 8.9 4.7 4.4 0.512 96.2 22 42.3 0.097 3.4 2880 27.6 0.089502 37.0994 439 +13.2 0 9.6 8.7 8.3 0.564 95.3 43 9.2 0.083 3.2 5130 22.7 0.0307 25.1989 1216 +13.1 0 11.6 7.8 7.3 0.574 103.8 7 3.6 0.142 4.2 5400 17.6 0.041598 17.6 968 +13 0 11.6 6.3 5.7 0.641 98.4 14 2.6 0.07 2.1 4860 19.6 0.069197 21.9003 523 +13.1 0 12.1 16 14.3 0.631 107.1 3 7.7 0.102 4.1 6740 15.2 0.041698 22.1005 1993 +13.5 0 10.9 6.9 7.1 0.54 96.5 6 0.4 0.08 2.2 5640 13.9 0.036099 28.4999 342 +15.2 0 11.2 8.2 7.6 0.571 101.8 10 7.9 0.103 2.8 5370 21.5 0.038201 25.8006 1216 +11.9 0 10.7 16.6 15.7 0.521 93.8 168 8.9 0.092 3.6 6370 15.4 0.0234 36.7009 1043 +16.6 1 8.9 5.8 5.4 0.521 97.3 46 25.4 0.072 2.6 3960 23.7 0.075298 28.3011 696 +14 0 9.3 5.5 5.4 0.535 104.5 6 2 0.135 4 4530 20 0.041999 21.7998 373 +12.5 0 10.9 9 8.1 0.586 96.4 97 8.2 0.105 4.3 6170 16.3 0.042698 30.9014 754 +14.7 1 10.4 6.3 6.4 0.56 97.2 23 9.5 0.076 2.4 4620 23.3 0.049499 25.5005 1072 +12.6 0 11.8 9.7 9.7 0.542 99 18 2.1 0.102 3.5 5890 16.6 0.040799 21.6997 923 +12.3 0 10.2 9.7 8.7 0.526 94.8 113 7.6 0.124 5 5720 15.8 0.0207 37.4011 653 +15 0 10 10.9 9.8 0.531 96.4 9 2.4 0.087 3.8 5590 15.3 0.0069 44.0004 1272 +17.7 1 8.7 5.8 5.6 0.638 97.4 24 34.9 0.076 2.8 3820 25.4 0.045198 31.6995 831 +13.3 0 10.4 5.1 4.7 0.599 102.4 7 4 0.099 2.7 4250 22.5 0.053998 16.6999 566 +14.9 1 8.8 6.1 5.4 0.515 95.3 36 16.5 0.086 3.5 3950 25.1 0.047099 27.3004 826 +14.5 1 10.4 8.2 7.4 0.56 98.1 96 12.6 0.088 3.1 4880 22.8 0.038801 29.3004 1151 +14.8 0 12.2 7.2 6.6 0.601 99.8 9 1.9 0.084 2 5900 14.4 0.0251 30.0001 880 +14.1 0 10.9 5.6 5.4 0.523 96.8 4 0.2 0.107 3.7 4890 17 0.088904 12.1996 542 +16.2 1 9.9 7.5 7 0.522 99.6 40 20.8 0.073 2.7 4960 22.4 0.054902 31.9989 823 +13.6 0 12.1 9.5 9.6 0.574 101.2 29 3.6 0.111 3.7 6220 16.2 0.0281 30.0001 1030 +13.9 1 8.8 4.6 4.1 0.48 96.8 19 4.9 0.135 5.3 4570 24.9 0.056202 32.5996 455 +12.6 0 10.4 10.6 9.7 0.599 98.9 40 2.4 0.078 2.5 5930 17.1 0.046598 16.6999 508 +13 0 12.1 9 9.1 0.623 104.9 3 2.2 0.113 4 5880 16 0.052802 16.0997 849 \ No newline at end of file diff --git a/R_script/oldvers/kristoffer.R b/R_script/oldvers/kristoffer.R new file mode 100644 index 0000000..c363692 --- /dev/null +++ b/R_script/oldvers/kristoffer.R @@ -0,0 +1,80 @@ + +library(BAS) + +x <- read.csv("https://raw.githubusercontent.com/aliaksah/EMJMCMC2016/refs/heads/master/supplementaries/Mode%20Jumping%20MCMC/supplementary/examples/US%20Data/simcen-x1.txt",header = F) +y <- read.csv( + "https://raw.githubusercontent.com/aliaksah/EMJMCMC2016/refs/heads/master/supplementaries/Mode%20Jumping%20MCMC/supplementary/examples/US%20Data/simcen-y1.txt",header = F) + + + +data <- data.frame(x,y) + +names(data)[16] = "Crime" + + +res <- BAS::bas.lm(formula = Crime ~.,data = data,prior = "g-prior",alpha = 47,n.models = 32768,method = "deterministic", modelprior = uniform()) + +strue <- summary(res) + +strue[1+c(8,13,14,12,5,9,7,4,6,1,3,2,11,10,15),1] + +res.bas1 <- BAS::bas.lm(formula = Crime ~.,data = data,prior = "g-prior",method = "BAS",alpha = 47,n.models=3276,update=500,modelprior= uniform(),initprobs="Uniform") + +sbas1 <- summary(res.bas1) + +sbas1[1+c(8,13,14,12,5,9,7,4,6,1,3,2,11,10,15),1] + + +res.bas2 <- BAS::bas.lm(formula = Crime ~.,data = data,prior = "g-prior",method = "BAS",alpha = 47,n.models=3276,update=NULL,modelprior= uniform(),initprobs="Uniform") + +sbas2 <- summary(res.bas2) + +sbas2[1+c(8,13,14,12,5,9,7,4,6,1,3,2,11,10,15),1] + + +res.bas3 <- BAS::bas.lm(formula = Crime ~.,data = data,prior = "g-prior",method = "BAS",alpha = 47,n.models=3276,update=500,modelprior= uniform(),initprobs="eplogp") + +sbas3 <- summary(res.bas3) + +sbas3[1+c(8,13,14,12,5,9,7,4,6,1,3,2,11,10,15),1] + + +res.mc31 <- BAS::bas.lm(formula = Crime ~.,data = data,prior = "g-prior",alpha = 47,method = "MCMC",n.models = 327, update=NULL,modelprior= uniform(),initprobs="Uniform") + +smc31 <- summary(res.mc31) + +smc31[1+c(8,13,14,12,5,9,7,4,6,1,3,2,11,10,15),1] + + + + +params <- gen.params.mjmcmc(15) +probs <- gen.probs.mjmcmc() + + + +res100 <- mjmcmc(x = x,y = y,loglik.pi = fbms.mlik.master.temp,mlpost_params = list(family = "gaussian", beta_prior = list(type = "g-prior", g = 47), temp = 100, r = 0.5),N = 100000,params = params,probs = probs) + +res1 <- mjmcmc(x = x, y = y,loglik.pi = fbms.mlik.master.temp,mlpost_params = list(family = "gaussian", beta_prior = list(type = "g-prior", g = 47), temp = 1, r = 0.5), N = 100000,params = params,probs = probs) + +res0.1 <- mjmcmc(x = x,y = y,loglik.pi = fbms.mlik.master.temp,mlpost_params = list(family = "gaussian", beta_prior = list(type = "g-prior", g = 47), temp = 0.5, r = 0.5), N = 100000,params = params,probs = probs) + + +results <- data.frame(true = strue[2:16,1], t100 = res100$marg.probs[1,],t1 = res1$marg.probs[1,], t01 = res0.1$marg.probs[1,], BAS1 = sbas1[2:16,1],BAS2 = sbas2[2:16,1],BAS3 = sbas3[2:16,1],mc31 = smc31[2:16,1])[c(8,13,14,12,5,9,7,4,6,1,3,2,11,10,15),] + +abs(results$true - results)*100 + + + + + + + + + + + + + + + diff --git a/R_script/oldvers/likelihoods2.R b/R_script/oldvers/likelihoods2.R new file mode 100644 index 0000000..0cbe708 --- /dev/null +++ b/R_script/oldvers/likelihoods2.R @@ -0,0 +1,800 @@ +# Title : Log likelihood functions +# Objective : Log likelihood functions with priors to be used as templates or directly in GMJMCMC +# Created by: jonlachmann +# Created on: 2021-02-24 + +#' Log likelihood function for glm regression with parameter priors from BAS package +#' This function is created as an example of how to create an estimator that is used +#' to calculate the marginal likelihood of a model. +#' +#' @param y A vector containing the dependent variable +#' @param x The matrix containing the precalculated features +#' @param model The model to estimate as a logical vector +#' @param complex A list of complexity measures for the features +#' @param mlpost_params A list of parameters for the log likelihood, supplied by the user, important to specify the tuning parameters of beta priors and family that BAS uses in glm models +#' +#' @return A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +#' +#' @examples +#' glm.logpost.bas(as.integer(rnorm(100) > 0),cbind(1,matrix(rnorm(100))),c(TRUE,TRUE),list(oc = 1)) +#' +#' @importFrom BAS uniform Jeffreys g.prior +#' @importFrom stats poisson Gamma glm.control +#' @export glm.logpost.bas +glm.logpost.bas2 <- function (y, x, model, complex, mlpost_params = list(r = exp(-0.5), family = "binomial", prior_beta = Jeffreys(), laplace = FALSE)) { + if (length(mlpost_params) == 0) + mlpost_params <- list(r = 1/dim(x)[1], family = "binomial", prior_beta = g.prior(max(dim(x)[1], sum(model)-1)), laplace = FALSE) + p <- sum(model) - 1 + if(p==0) + { + probinit <- as.numeric(c(1,0.99)) + model[2] <- T + }else{ + probinit <- as.numeric(c(1,rep(0.99,p))) + } + + mod<-NULL + + + tryCatch({ + if(mlpost_params$family == "binomial") + suppressWarnings({ + mod <- .Call(BAS:::C_glm_deterministic, + y = as.numeric(y), X = as.matrix(x[,model]), + Roffset = as.numeric(rep(0, length(y))), + Rweights = as.numeric(rep(1, length(y))), + Rprobinit = probinit, + Rmodeldim = as.integer(rep(0,ifelse(p==0,2,1))), + modelprior = uniform(), + betaprior = mlpost_params$prior_beta, + family = binomial(), + Rcontrol = glm.control(), + Rlaplace = as.integer(mlpost_params$laplace)) + }) + else if(mlpost_params$family == "poisson") + suppressWarnings({ + mod <- .Call(BAS:::C_glm_deterministic, + y = as.numeric(y), X = as.matrix(x[,model]), + Roffset = as.numeric(rep(0, length(y))), + Rweights = as.numeric(rep(1, length(y))), + Rprobinit = probinit, + Rmodeldim = as.integer(rep(0,ifelse(p==0,2,1))), + modelprior = uniform(), + betaprior = mlpost_params$prior_beta, + family = poisson(), + Rcontrol = glm.control(), + Rlaplace = as.integer(mlpost_params$laplace)) + }) + else{ + suppressWarnings({ + mod <- .Call(BAS:::C_glm_deterministic, + y = as.numeric(y), X = as.matrix(x[,model]), + Roffset = as.numeric(rep(0, length(y))), + Rweights = as.numeric(rep(1, length(y))), + Rprobinit = probinit, + Rmodeldim = as.integer(rep(0,ifelse(p==0,2,1))), + modelprior = uniform(), + betaprior = mlpost_params$prior_beta, + family = Gamma(), + Rcontrol = glm.control(), + Rlaplace = as.integer(mlpost_params$laplace)) + })} + }, error = function(e) { + # Handle the error by setting result to NULL + mod <- NULL + # You can also print a message or log the error if needed + cat("An error occurred:", conditionMessage(e), "\n") + }) + + if(length(mod)==0) { + return(list(crit = -.Machine$double.xmax + log(mlpost_params$r * sum(complex$oc)), coefs = rep(0, p+1))) + } + + if(p == 0) + { + ret <- mod$logmarg[2] + log(mlpost_params$r) * sum(complex$oc) + return(list(crit=ret, coefs=mod$mle[[2]])) + } + ret <- mod$logmarg + log(mlpost_params$r) * sum(complex$oc) + return(list(crit=ret, coefs=mod$mle[[1]])) +} + + +#' Log likelihood function for Gaussian regression with parameter priors from BAS package +#' This function is created as an example of how to create an estimator that is used +#' to calculate the marginal likelihood of a model. +#' +#' @param y A vector containing the dependent variable +#' @param x The matrix containing the precalculated features +#' @param model The model to estimate as a logical vector +#' @param complex A list of complexity measures for the features +#' @param mlpost_params A list of parameters for the log likelihood, supplied by the user, important to specify the tuning parameters of beta priors where the corresponding integers as prior_beta must be provided "g-prior" = 0, "hyper-g" = 1, "EB-local" = 2, "BIC" = 3, "ZS-null" = 4, "ZS-full" = 5, "hyper-g-laplace" = 6, "AIC" = 7, "EB-global" = 2, "hyper-g-n" = 8, "JZS" = 9 and in Gaussian models +#' +#' @return A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +#' +#' @examples +#' lm.logpost.bas(rnorm(100), cbind(1,matrix(rnorm(100))), c(TRUE,TRUE), list(oc = 1)) +#' +#' +#' @export lm.logpost.bas +lm.logpost.bas2 <- function (y, x, model, complex, mlpost_params = list(r = exp(-0.5), prior_beta = "g-prior", alpha = 4)) { + if (length(mlpost_params) == 0) + mlpost_params <- list(r = 1/dim(x)[1], prior_beta = 0, alpha = max(dim(x)[1], sum(model)^2)) + + + p <- sum(model) - 1 + if(p==0) + { + probinit <- as.numeric(c(1,0.99)) + model[2] <- T + }else{ + probinit <- as.numeric(c(1,rep(0.99,p))) + } + + mod<-NULL + + tryCatch({ + suppressWarnings({ + mod <- .Call(BAS:::C_deterministic, + y = y, X = as.matrix(x[,model]), + as.numeric(rep(1, length(y))), + probinit, + as.integer(rep(0,ifelse(p==0,2,1))), + incint = as.integer(F), + alpha = ifelse(length(mlpost_params$alpha)>0, as.numeric(mlpost_params$alpha), NULL), + method = as.integer(mlpost_params$prior_beta), + modelprior = uniform(), + Rpivot = TRUE, + Rtol = 1e-7) + }) + }, error = function(e) { + # Handle the error by setting result to NULL + mod <- NULL + # You can also print a message or log the error if needed + cat("An error occurred:", conditionMessage(e), "\n") + }) + + if(length(mod)==0) { + return(list(crit = -.Machine$double.xmax + log(mlpost_params$r * sum(complex$oc)), coefs = rep(0, p+1))) + } + + if(p == 0) + { + ret <- mod$logmarg[2] + log(mlpost_params$r) * sum(complex$oc) + return(list(crit=ret, coefs=mod$mle[[2]])) + } + ret <- mod$logmarg + log(mlpost_params$r) * sum(complex$oc) + return(list(crit=ret, coefs=mod$mle[[1]])) +} + + +#' Log likelihood function for logistic regression with a Jeffreys parameter prior and BIC approximations of the posterior +#' This function is created as an example of how to create an estimator that is used +#' to calculate the marginal likelihood of a model. +#' +#' @param y A vector containing the dependent variable +#' @param x The matrix containing the precalculated features +#' @param model The model to estimate as a logical vector +#' @param complex A list of complexity measures for the features +#' @param mlpost_params A list of parameters for the log likelihood, supplied by the user +#' +#' @return A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +#' +#' @examples +#' logistic.loglik2(as.integer(rnorm(100) > 0), matrix(rnorm(100)), TRUE, list(oc = 1)) +#' +#' +#' @export logistic.loglik2 +logistic.loglik2 <- function (y, x, model, complex, mlpost_params = list(r = exp(-0.5))) { + if (length(mlpost_params) == 0) + mlpost_params <- list(r = 1/dim(x)[1]) + suppressWarnings({mod <- fastglm(as.matrix(x[, model]), y, family = binomial())}) + ret <- (-(mod$deviance + log(length(y)) * (mod$rank - 1) - 2 * log(mlpost_params$r) * sum(complex$oc))) / 2 + return(list(crit=ret, coefs=mod$coefficients)) +} + +#' Log likelihood function for glm regression with a Jeffreys parameter prior and BIC approximations of the posterior +#' This function is created as an example of how to create an estimator that is used +#' to calculate the marginal likelihood of a model. +#' +#' @param y A vector containing the dependent variable +#' @param x The matrix containing the precalculated features +#' @param model The model to estimate as a logical vector +#' @param complex A list of complexity measures for the features +#' @param mlpost_params A list of parameters for the log likelihood, supplied by the user, family must be specified +#' +#' @return A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +#' +#' @examples +#' glm.loglik(abs(rnorm(100))+1, matrix(rnorm(100)), TRUE, list(oc = 1)) +#' +#' +#' @export glm.loglik +glm.loglik2 <- function (y, x, model, complex, mlpost_params = list(r = exp(-0.5), family = "Gamma")) { + if (length(mlpost_params) == 0) + mlpost_params <- list(r = 1/dim(x)[1]) + + if(mlpost_params$family == "binomial") + { + fam = binomial() + }else if(mlpost_params$family == "poisson"){ + fam = poisson() + }else + { + fam = Gamma() + } + + suppressWarnings({mod <- fastglm(as.matrix(x[, model]), y, family = fam)}) + + if (length(mod) == 0 || is.nan(mod$deviance)) { + return(list(crit = -.Machine$double.xmax + log_prior(mlpost_params, complex), coefs = rep(0, sum(model)))) + } + + ret <- (-(mod$deviance + log(length(y)) * (mod$rank - 1) - 2 * log(mlpost_params$r) * sum(complex$oc))) / 2 + return(list(crit=ret, coefs=mod$coefficients)) +} + + +#' Log likelihood function for gaussian regression with a Jeffreys prior and BIC approximation of MLIK with both known and unknown variance of the responses +#' +#' @param y A vector containing the dependent variable +#' @param x The matrix containing the precalculated features +#' @param model The model to estimate as a logical vector +#' @param complex A list of complexity measures for the features +#' @param mlpost_params A list of parameters for the log likelihood, supplied by the user +#' +#' @return A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +#' +#' @examples +#' gaussian.loglik(rnorm(100), matrix(rnorm(100)), TRUE, list(oc = 1), NULL) +#' +#' +#' @export gaussian.loglik +gaussian.loglik2 <- function (y, x, model, complex, mlpost_params) { + if(length(mlpost_params)==0) + mlpost_params <- list() + if (length(mlpost_params$r) == 0) + mlpost_params$r <- 1/dim(x)[1] + if(length(mlpost_params$var) == 0) + mlpost_params$var <- 1 + suppressWarnings({mod <- fastglm(as.matrix(x[, model]), y, family = gaussian())}) + + if(mlpost_params$var == "unknown") + ret <- (-(mod$aic + (log(length(y))-2) * (mod$rank) - 2 * log(mlpost_params$r) * (sum(complex$oc)))) / 2 + else + ret <- (-(mod$deviance/mlpost_params$var + log(length(y)) * (mod$rank - 1) - 2 * log_prior(mlpost_params, complex))) / 2 + + return(list(crit=ret, coefs=mod$coefficients)) +} + + +#' Log likelihood function for linear regression using Zellners g-prior +#' +#' @param y A vector containing the dependent variable +#' @param x The matrix containing the precalculated features +#' @param model The model to estimate as a logical vector +#' @param complex A list of complexity measures for the features +#' @param mlpost_params A list of parameters for the log likelihood, supplied by the user +#' +#' @return A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +#' +#' @examples +#' gaussian.loglik2.g(rnorm(100), matrix(rnorm(100)), TRUE, list(oc=1)) +#' +#' @export gaussian.loglik2.g +gaussian.loglik2.g <- function (y, x, model, complex, mlpost_params = NULL) +{ + if(length(mlpost_params)==0) + mlpost_params <- list() + if (length(mlpost_params$r) == 0) + mlpost_params$r <- 1/dim(x)[1] + suppressWarnings({ + mod <- fastglm(as.matrix(x[, model]), y, family = gaussian()) + }) + # Calculate R-squared + y_mean <- mean(y) + TSS <- sum((y - y_mean)^2) + RSS <- sum(mod$residuals^2) + Rsquare <- 1 - (RSS / TSS) + + if (length(mlpost_params$r) == 0 || length(mlpost_params$g) == 0) + { + mlpost_params$r <- 1/dim(x)[1] + mlpost_params$g <- max(mod$rank^2, length(y)) + } + + # logarithm of marginal likelihood + mloglik <- 0.5*(log(1.0 + mlpost_params$g) * (dim(x)[1] - mod$rank) - log(1.0 + mlpost_params$g * (1.0 - Rsquare)) * (dim(x)[1] - 1))*(mod$rank!=1) + + # logarithm of model prior + # default value or parameter r + lp <- log_prior(mlpost_params, complex) + + return(list(crit = mloglik + lp, coefs = mod$coefficients)) +} + + +#' Log likelihood function for Gaussian regression with parameter priors from BAS package +#' +#' This function computes the marginal likelihood of a Gaussian regression model under different priors. +#' +#' @param y A numeric vector containing the dependent variable. +#' @param x A matrix containing the independent variables, including an intercept column. +#' @param model A logical vector indicating which variables to include in the model. +#' @param complex A list containing complexity measures for the features. +#' @param mlpost_params A list of parameters for the log likelihood, specifying the tuning parameters of beta priors. +#' +#' @return A list with elements: +#' \item{crit}{Log marginal likelihood combined with the log prior.} +#' \item{coefs}{Posterior mode of the coefficients.} +#' +#' @examples +#' gaussian_tcch_log_likelihood2(rnorm(100), matrix(rnorm(100)), TRUE, list(oc=1)) +#' +#' @importFrom BAS phi1 hypergeometric1F1 hypergeometric2F1 +#' @importFrom tolerance F1 +#' @export +gaussian_tcch_log_likelihood2 <- function(y, x, model, complex, mlpost_params = list(r = exp(-0.5), prior_beta = "intrinsic")) { + + # Fit the linear model using fastglm + fitted_model <- fastglm(as.matrix(x[, model]), y, family = gaussian()) + log_likelihood <- -(fitted_model$aic -2 * (fitted_model$rank))/2 + # Compute R-squared manually + y_mean <- mean(y) + TSS <- sum((y - y_mean)^2) + RSS <- sum(fitted_model$residuals^2) + R2_M <- 1 - (RSS / TSS) + + p_M <- fitted_model$rank + n <- length(y) + + # Switch-like structure to assign hyperparameters based on prior + if (mlpost_params$prior_beta[[1]] == "CH") { + # CH prior: b and s should be user-specified, with defaults if not provided + a <- ifelse(!is.null(mlpost_params$prior_beta$a), mlpost_params$prior_beta$a, 1) # Default to 1 if not specified + b <- ifelse(!is.null(mlpost_params$prior_beta$b), mlpost_params$prior_beta$b, 2) # Default to 1 if not specified + r <- 0 + s <- ifelse(!is.null(mlpost_params$prior_beta$s), mlpost_params$prior_beta$s, 1) # Default to 1 if not specified + v <- 1 + k <- 1 + + } else if (mlpost_params$prior_beta[[1]] == "hyper-g") { + a <- 1 + b <- 2 + r <- 0 + s <- 0 + v <- 1 + k <- 1 + + } else if (mlpost_params$prior_beta[[1]] == "uniform") { + a <- 2 + b <- 2 + r <- 0 + s <- 0 + v <- 1 + k <- 1 + + } else if (mlpost_params$prior_beta[[1]] == "Jeffreys") { + a <- 0.0001 + b <- 2 + r <- 0 + s <- 0 + v <- 1 + k <- 1 + } else if (mlpost_params$prior_beta[[1]] == "beta.prime") { + a <- 1/2 + b <- n - p_M - 1.5 + r <- 0 + s <- 0 + v <- 1 + k <- 1 + + } else if (mlpost_params$prior_beta[[1]] == "benchmark") { + a <- 0.02 + b <- 0.02 * max(n, p_M^2) + r <- 0 + s <- 0 + v <- 1 + k <- 1 + + } else if (mlpost_params$prior_beta[[1]] == "TG") { + + a <- 2 * ifelse(!is.null(mlpost_params$prior_beta$a), mlpost_params$prior_beta$a, 1) + b <- 2 + r <- 0 + s <- 2 * ifelse(!is.null(mlpost_params$prior_beta$s), mlpost_params$prior_beta$s, 1) + v <- 1 + k <- 1 + + } else if (mlpost_params$prior_beta[[1]] == "ZS-adapted") { + a <- 1 + b <- 2 + r <- 0 + s <- n + 3 + v <- 1 + k <- 1 + } else if (mlpost_params$prior_beta[[1]] == "robust") { + a <- 1 + b <- 2 + r <- 1.5 + s <- 0 + v <- (n + 1) / (p_M + 1) + k <- 1 + + } else if (mlpost_params$prior_beta[[1]] == "hyper-g-n") { + a <- 1 + b <- 2 + r <- 1.5 + s <- 0 + v <- 1 + k <- 1 + + } else if (mlpost_params$prior_beta[[1]] == "intrinsic") { + a <- 1 + b <- 1 + r <- 1 + s <- 0 + v <- (n + p_M + 1) / (p_M + 1) + k <- (n + p_M + 1) / n + + }else if (mlpost_params$prior_beta[[1]] == "tCCH") { + a <- mlpost_params$prior_beta$a + b <- mlpost_params$prior_beta$b + r <- mlpost_params$prior_beta$rho + s <- mlpost_params$prior_beta$s + v <- mlpost_params$prior_beta$v + k <- mlpost_params$prior_beta$k + }else { + stop("Unknown prior name: ", mlpost_params$prior_beta) + } + + # + if (!is.null(r) & r == 0) { + + term1 <- lbeta((a + p_M) / 2, b / 2) + term2 <- phi1(b / 2, (n - 1) / 2, (a + b + p_M) / 2, s / (2 * v), min(0.8,R2_M/(v - (v - 1) * R2_M),log = T)) + + if(R2_M/(v - (v - 1) * R2_M)>0.8) + { + warning("Infinite marginal log likelihood! phi1 last argument reduced to 0.8. Use a different prior_beta (Robust, Hyper-g/n, Intrinsic, or g-prior)") + } + + term3 <- lbeta(a / 2, b / 2) + term4 <- hypergeometric1F1(b / 2, (a + b) / 2, s / (2 * v),log = T) + marginal_likelihood <- log_likelihood + (term1) + (term2) - (p_M / 2) * log(v) - ((n - 1) / 2)*log(1 - (1 - 1 / v) * R2_M) - (term3) - (term4) + } else if (!is.null(s) & s == 0) { + term1 <- lbeta((a + p_M) / 2, b / 2) + term2 <- hypergeometric2F1(r, b / 2, (a + b) / 2, 1 - k,log = T) + term3 <- F1((a + p_M) / 2, (a + b + p_M + 1 - n - 2 * r) / 2, (n - 1) / 2, (a + b + p_M) / 2, 1 - k, 1 - k - (R2_M^2 * k) / ((1 - R2_M) * v)) + marginal_likelihood <- log_likelihood + (a+p_M-2*r)/2*log(k) + (term1) - (term2) - (term3) - (p_M / 2) * log(v) - log(1 - R2_M) * ((n - 1) / 2) - lbeta(a / 2, b / 2) + + } else { + stop("Invalid inputs: either r = 0 or s = 0 must be specified.") + } + + if (length(mlpost_params$r) == 0) mlpost_params$r <- 1/dim(x)[1] # default value or parameter r + + lp <- log_prior(mlpost_params, complex) + + return(list(crit = marginal_likelihood + lp, coefs = fitted_model$coefficients)) +} + + + +#' Log likelihood function for logistic regression with an approximate Laplace approximations used +#' This function is created as an example of how to create an estimator that is used +#' to calculate the marginal likelihood of a model. +#' +#' @param y A vector containing the dependent variable +#' @param x The matrix containing the precalculated features +#' @param model The model to estimate as a logical vector +#' @param complex A list of complexity measures for the features +#' @param mlpost_params A list of parameters for the log likelihood, supplied by the user +#' +#' @return A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +#' +#' @examples +#' logistic.loglik2.ala(as.integer(rnorm(100) > 0), matrix(rnorm(100)), TRUE, list(oc = 1)) +#' +#' +#' @export logistic.loglik2.ala +logistic.loglik2.ala <- function (y, x, model, complex, mlpost_params = list(r = exp(-0.5))) { + if (length(mlpost_params) == 0) + mlpost_params <- list(r = 1/dim(x)[1]) + suppressWarnings({mod <- fastglm(as.matrix(x[, model]), y, family = binomial(),maxit = 1)}) + ret <- (-(mod$deviance + log(length(y)) * (mod$rank - 1) -2 * log(mlpost_params$r) * sum(complex$oc))) / 2 + return(list(crit=ret, coefs=mod$coefficients)) +} + + + +#' Log likelihood function for logistic regression for alpha calculation +#' This function is just the bare likelihood function +#' +#' @param a A vector of the alphas to be used +#' @param data The data to be used for calculation +#' @param mu_func The function linking the mean to the covariates, +#' as a string with the alphas as a\[i\]. +#' +#' @return A numeric with the log likelihood. +#' +#' @export logistic.loglik2.alpha +logistic.loglik2.alpha <- function (a, data, mu_func) { + m <- 1 / (1 + exp(-eval(parse(text = mu_func)))) + -sum((data[,1] * log(m) + (1 - data[, 1]) * log(1 - m))) +} + + +#' Log likelihood function for gaussian regression for alpha calculation +#' This function is just the bare likelihood function +#' Note that it only gives a proportional value and is equivalent to least squares +#' +#' @param a A vector of the alphas to be used +#' @param data The data to be used for calculation +#' @param mu_func The function linking the mean to the covariates, +#' as a string with the alphas as a\[i\]. +#' +#' @return A numeric with the log likelihood. +#' @examples +#'\dontrun{ +#'gaussian.loglik2.alpha(my_alpha,my_data,my_mu) +#'} +#' @export gaussian.loglik2.alpha +gaussian.loglik2.alpha <- function (a, data, mu_func) { + m <- eval(parse(text=mu_func)) + sum((data[,1]-m)^2) +} + + +#' Log model prior function +#' @param mlpost_params list of passed parameters of the likelihood in GMJMCMC +#' @param complex list of complexity measures of the features included into the model +#' +#' @return A numeric with the log model prior. +#' +#' @examples +#' log_prior(mlpost_params = list(r=2), complex = list(oc = 2)) +#' +#' @export log_prior +log_prior <- function (mlpost_params, complex) { + pl <- log(mlpost_params$r) * (sum(complex$oc)) + return(pl) +} + + +#' Master Log Marginal Likelihood Function +#' +#' This function serves as a unified interface to compute the log marginal likelihood +#' for different regression models and priors by calling specific log likelihood functions. +#' +#' @param y A numeric vector containing the dependent variable. +#' @param x A matrix containing the precalculated features (independent variables). +#' @param model A logical vector indicating which variables to include in the model. +#' @param complex A list of complexity measures for the features. +#' @param mlpost_params A list of parameters controlling the model family, prior, and tuning parameters. +#' Key elements include: +#' - family: "binomial", "poisson", "gamma" (all three referred to as GLM below), or "gaussian" (default: "gaussian") +#' - prior_beta: Type of prior as a string (default: "g-prior"). Possible values include: +#' - "beta.prime": Beta-prime prior (GLM/Gaussian, no additional args) +#' - "CH": Compound Hypergeometric prior (GLM/Gaussian, requires `a`, `b`, optionally `s`) +#' - "EB-local": Empirical Bayes local prior (GLM/Gaussian, requires `a` for Gaussian) +#' - "EB-global": Empirical Bayes local prior (Gaussian, requires `a` for Gaussian) +#' - "g-prior": Zellner's g-prior (GLM/Gaussian, requires `g`) +#' - "hyper-g": Hyper-g prior (GLM/Gaussian, requires `a`) +#' - "hyper-g-n": Hyper-g/n prior (GLM/Gaussian, requires `a`) +#' - "tCCH": Truncated Compound Hypergeometric prior (GLM/Gaussian, requires `a`, `b`, `s`, `rho`, `v`, `k`) +#' - "intrinsic": Intrinsic prior (GLM/Gaussian, no additional args) +#' - "TG": Truncated Gamma prior (GLM/Gamma, requires `a`, `s`) +#' - "Jeffreys": Jeffreys prior (GLM/Gaussian, no additional args) +#' - "uniform": Uniform prior (GLM/Gaussian, no additional args) +#' - "benchmark": Benchmark prior (Gaussian/GLM, no additional args) +#' - "ZS-adapted": Zellner-Siow adapted prior (Gaussian TCCH, no additional args) +#' - "robust": Robust prior (Gaussian/GLM, no additional args) +#' - "Jeffreys-BIC": Jeffreys prior with BIC approximation of marginal likelihood (Gaussian/GLM) +#' - "ZS-null": Zellner-Siow null prior (Gaussian, requires `a`) +#' - "ZS-full": Zellner-Siow full prior (Gaussian, requires `a`) +#' - "hyper-g-laplace": Hyper-g Laplace prior (Gaussian, requires `a`) +#' - "AIC": AIC prior from BAS (Gaussian, requires penalty `a`) +#' - "BIC": BIC prior from BAS (Gaussian/GLM) +#' - "JZS": Jeffreys-Zellner-Siow prior (Gaussian, requires `a`) +#' - r: Model complexity penalty (default: 1/n) +#' - g: Tuning parameter for g-prior (default: max(n, p^2)) +#' - a, b, s, v, rho, k: Hyperparameters for various priors +#' - n: Sample size for some priors (default: length(y)) +#' - var: Variance assumption for Gaussian models ("known" or "unknown", default: "unknown") +#' - laplace: Logical for Laplace approximation in GLM only (default: FALSE) +#' +#' @return A list with elements: +#' \item{crit}{Log marginal likelihood combined with the log prior.} +#' \item{coefs}{Posterior mode of the coefficients.} +#' +#' @examples +#' fbms.mlik.master(rnorm(100), matrix(rnorm(100)), c(TRUE,TRUE), list(oc = 1)) +#' +#' @importFrom BAS robust beta.prime bic.prior CCH EB.local g.prior hyper.g hyper.g.n tCCH intrinsic TG Jeffreys uniform +#' @export +fbms.mlik.master_old <- function(y, x, model, complex, mlpost_params = list(family = "gaussian", prior_beta = "g-prior", r = exp(-0.5))) { + # Extract dimensions + n <- length(y) + p <- sum(model) - 1 # Number of predictors excluding intercept + + # Set default parameters if not fully specified + if (is.null(mlpost_params$family)) mlpost_params$family <- "gaussian" + if (is.null(mlpost_params$prior_beta)) mlpost_params$prior_beta <- "g-prior" + if (is.null(mlpost_params$g)) mlpost_params$g <- max(p^2, n) + if (is.null(mlpost_params$n)) mlpost_params$n <- n + if (is.null(mlpost_params$r)) mlpost_params$r <- 1/n + + # Ensure complex has oc if not provided, ignore by default + if (is.null(complex$oc)) complex$oc <- 0 + + # Homogenize and prepare mlpost_params for nested calls + params_master <- mlpost_params + params_nested <- list(r = params_master$r) + + # Define valid priors for each family + #glm_only_priors <- c("CCH", "tCCH", "TG") + glm_and_gaussian_priors <- c("CH", "tCCH", "TG","beta.prime", "EB-local", "g-prior", "hyper-g", "hyper-g-n", + "intrinsic", "ZS-adapted", "Jeffreys", "uniform", "benchmark", "robust", "Jeffreys-BIC") + gaussian_only_priors <- c("ZS-null", "ZS-full", "hyper-g-laplace","BIC", "AIC", "JZS","EB-global") + + #review a bit + gaussian_not_robust <- c("CH", "tCCH", "ZS-adapted", "TG","beta.prime", "benchmark","Jeffreys") + gaussian_robust <- c("g-prior", "hyper-g", "EB-local","BIC", "Jeffreys-BIC", "ZS-null", "ZS-full", "hyper-g-laplace", + "AIC", "hyper-g-n", "JZS") + gaussian_tcch <- c("CH", "tCCH", "TG","beta.prime", "intrinsic", "ZS-adapted", "uniform","Jeffreys", "benchmark", "robust") + gaussian_bas <- c("g-prior", "hyper-g", "EB-local","ZS-null", "ZS-full", "BIC", "hyper-g-laplace", "AIC", "EB-global", "hyper-g-n", "JZS") + + all_priors <- c(glm_and_gaussian_priors, gaussian_only_priors) + #browser() + # Validate prior_beta + if (!params_master$prior_beta %in% all_priors) { + stop(sprintf("Prior '%s' is not supported. Supported priors: %s", + params_master$prior_beta, paste(all_priors, collapse = ", "))) + } + + # Decision logic based on family and prior_beta + if (params_master$family %in% c("binomial", "poisson", "gamma")) { + if (params_master$prior_beta %in% gaussian_only_priors) { + stop(sprintf("Prior '%s' is not supported for GLM family '%s'. Supported GLM priors: %s", + params_master$prior_beta, params_master$family, + paste(c(glm_and_gaussian_priors), collapse = ", "))) + } + + params_nested$family <- params_master$family + if (is.null(params_master$laplace)) params_nested$laplace <- FALSE else params_nested$laplace <- params_master$laplace + + #if(params_nested$laplace) + # print("Laplace approximations will be used") + + if (params_master$prior_beta == "Jeffreys-BIC") { + if(params_nested$family == "binomial") + result <- logistic.loglik2(y, x, model, complex, params_nested) + else if(params_nested$family%in% c("poisson", "gamma")) + result <- glm.loglik2(y, x, model, complex, params_nested) + + } else { + params_nested$prior_beta <- switch( + params_master$prior_beta, + "beta.prime" = beta.prime(n = n), + "CH" = CCH(alpha = if (is.null(params_master$a)) stop("a must be provided") else params_master$a, + beta = if (is.null(params_master$b)) stop("b must be provided") else params_master$b, + s = if (is.null(params_master$s)) stop("s must be provided") else params_master$s), + "EB-local" = EB.local(), + "g-prior" = g.prior(g = params_master$g), + "hyper-g" = hyper.g(alpha = if (is.null(params_master$a)) stop("a must be provided") else params_master$a), + "tCCH" = tCCH(alpha = if (is.null(params_master$a)) stop("a must be provided") else params_master$a, + beta = if (is.null(params_master$b)) stop("b must be provided") else params_master$b, + s = if (is.null(params_master$s)) stop("s must be provided") else params_master$s, + r = if (is.null(params_master$rho)) stop("rho must be provided") else params_master$rho, + v = if (is.null(params_master$v)) stop("v must be provided") else params_master$v, + theta = if (is.null(params_master$k)) stop("k must be provided") else params_master$k), + "intrinsic" = intrinsic(n = params_master$n), + "TG" = TG(alpha = if (is.null(params_master$a)) stop("a must be provided") else params_master$a), + "Jeffreys" = Jeffreys(), + "uniform" = tCCH(alpha = 2, + beta = 2, + s = 0, + r = 0, + v = 1, + theta = 1), + "benchmark" = tCCH(alpha = 0.02, + beta = 0.02*max(n,p^2), + s = 0, + r = 0, + v = 1, + theta = 1), + "ZS-adapted" = tCCH(alpha = 1, + beta = 2, + s = n + 3, + r = 0, + v = 1, + theta = 1), + "TG" = TG(alpha = if (is.null(params_master$a)) stop("a must be provided") else params_master$a), + "robust" = robust(n = if (is.null(params_master$n)) as.numeric(n) else as.numeric(params_master$n)), + "hyper-g-n" = hyper.g.n(alpha = if (is.null(params_master$a)) 3 else params_master$a, + n = params_master$n), + "BIC" = bic.prior(n = if (is.null(params_master$n)) n else params_master$n), + stop("Unrecognized prior_beta for GLM: ", params_master$prior_beta) + ) + result <- glm.logpost.bas2(y, x, model, complex, params_nested) + } + } else if (params_master$family == "gaussian") { + + if (params_master$prior_beta %in% gaussian_not_robust) { + warning(sprintf("Prior '%s' is not reccomended supported for Gaussian family '%s'. Can be unstable for strong signals (R^2 > 0.9). Reccomended priors under Gaussian family: %s", + params_master$prior_beta, params_master$family, + paste(gaussian_robust, collapse = ", "))) + } + + params_nested$r <- params_master$r + + if (params_master$prior_beta %in% gaussian_tcch) { + + params_nested$prior_beta <- switch( + params_master$prior_beta, + "beta.prime" = list("beta.prime"), + "CH" = list("CH",a = if (is.null(params_master$a)) stop("a must be provided") else params_master$a, + b = if (is.null(params_master$b)) stop("b must be provided") else params_master$b, + s = if (is.null(params_master$s)) stop("s must be provided") else params_master$s), + "tCCH" = list("tCCH", a = if (is.null(params_master$a)) stop("a must be provided") else params_master$a, + b = if (is.null(params_master$b)) stop("b must be provided") else params_master$b, + s = if (is.null(params_master$s)) stop("s must be provided") else params_master$s, + rho = if (is.null(params_master$rho)) stop("rho must be provided") else params_master$rho, + v = if (is.null(params_master$v)) stop("v must be provided") else params_master$v, + k = if (is.null(params_master$k)) stop("k must be provided") else params_master$k), + "intrinsic" = list("intrinsic"), + "TG" = list("TG",a = if (is.null(params_master$a)) stop("a must be provided") else params_master$a, + s = if (is.null(params_master$s)) stop("s must be provided") else params_master$s), + "Jeffreys" = list("Jeffreys"), + "ZS-adapted" = list("ZS-adapted"), + "benchmark" = list("benchmark"), + "robust" = list("robust"), + "uniform" = list("uniform"), + stop("Unrecognized prior_beta for Gaussian GLM: ", params_master$prior_beta) + ) + result <- gaussian_tcch_log_likelihood2(y, x, model, complex, params_nested) + + }else if (params_master$prior_beta == "Jeffreys-BIC") { + if (is.null(params_master$var)) params_nested$var <- "unknown" else params_nested$var <- params_master$var + result <- gaussian.loglik2(y, x, model, complex, params_nested) + } else if (params_master$prior_beta %in% gaussian_bas) { + + params_nested$prior_beta <- switch( + params_master$prior_beta, + "g-prior" = 0, + "hyper-g" = 1, + "EB-local" = 2, + "BIC" = 3, + "ZS-null" = 4, + "ZS-full" = 5, + "hyper-g-laplace" = 6, + "AIC" = 7, + "EB-global" = 2, + "hyper-g-n" = 8, + "JZS" = 9 + ) + if(params_master$prior_beta == "g-prior") + { + if (!is.null(params_master$g)) params_nested$g <- params_master$g else stop("g must be provided") + result <- gaussian.loglik2.g(y, x, model, complex, params_nested) + } + else{ + if (!is.null(params_master$a)) params_nested$alpha <- params_master$a else params_nested$alpha = -1 + result <- lm.logpost.bas2(y, x, model, complex, params_nested) + } + + } else { + stop("Unexpected error in prior_beta logic for Gaussian.") + } + } else { + stop("Unsupported family: ", params_master$family, ". Supported families are 'binomial', 'poisson', 'gamma', or 'gaussian'.") + } + + # Ensure consistent return structure + if (is.null(result$crit) || is.null(result$coefs)) { + stop("Error in computation: Returned result does not contain 'crit' and 'coefs'.") + } + + return(list(crit = result$crit, coefs = result$coefs)) +} \ No newline at end of file diff --git a/R_script/oldvers/new general estimators.R b/R_script/oldvers/new general estimators.R new file mode 100644 index 0000000..eec656e --- /dev/null +++ b/R_script/oldvers/new general estimators.R @@ -0,0 +1,262 @@ +library(microbenchmark) +library(Rcpp) +library(RcppArmadillo) +library(fastglm) +library(BAS) + +# Original R function +estimate.logic.tcch.general <- function(y, x, model, complex, params) { + + if (length(params) == 0) + params <- list(r = 1/dim(x)[1]) + + suppressWarnings({ + mod <- fastglm(as.matrix(x[, model]), y, family = gaussian()) + }) + + # Compute the general complexity prior + log_prior <- log(params$r) * sum(complex$oc) + + # Compute other terms for tCCH prior on g + p.v <- (params$n + 1) / (mod$rank + 1) + + y_mean <- mean(y) + TSS <- sum((y - y_mean)^2) + RSS <- sum(mod$residuals^2) + R.2 <- 1 - (RSS / TSS) + p <- mod$rank + + # Marginal log-likelihood using tCCH prior for g + mloglik = (-0.5 * p * log(p.v) + - 0.5 * (params$n - 1) * log(1 - (1 - 1/p.v) * R.2) + + log(beta((params$p.a + p) / 2, params$p.b / 2)) + - log(beta(params$p.a / 2, params$p.b / 2)) + + log(phi1(params$p.b / 2, + (params$n - 1) / 2, + (params$p.a + params$p.b + p) / 2, + params$p.s / (2 * p.v), + R.2 / (p.v - (p.v - 1) * R.2))) + - hypergeometric1F1(params$p.b / 2, + (params$p.a + params$p.b) / 2, + params$p.s / (2 * p.v), log = T)) + + # Stability check + if (is.na(mloglik) || is.nan(mloglik) || mloglik == -Inf) { + mloglik = -10000 + } + + logpost <- mloglik + log_prior + + # Stability check for final log-posterior + if (logpost == -Inf) { + logpost = -10000 + } + + return(list(crit = logpost, coefs = mod$coefficients)) +} + + +# Rcpp implementation of the function +cppFunction(depends = "RcppArmadillo", code = ' +double compute_log_posterior(NumericVector residuals, int p, int n, double r, double p_a, double p_b, double p_s, NumericVector complexity_oc) { + // Compute R^2 + double RSS = sum(residuals * residuals); + double TSS = sum((residuals - mean(residuals)) * (residuals - mean(residuals))); + double R2 = 1 - (RSS / TSS); + + // Compute log prior complexity term + double log_prior_complexity = log(r) * sum(complexity_oc); + + // Compute p.v + double p_v = (n + 1.0) / (p + 1.0); + + // Compute marginal log likelihood + double mloglik = (-0.5 * p * log(p_v) + - 0.5 * (n - 1) * log(1 - (1 - 1 / p_v) * R2) + + R::lbeta((p_a + p) / 2.0, p_b / 2.0) + - R::lbeta(p_a / 2.0, p_b / 2.0) + + log(R::pbeta(R2 / (p_v - (p_v - 1) * R2), p_b / 2.0, (n - 1) / 2.0, 1, 0)) + - R::pbeta(p_s / (2.0 * p_v), p_b / 2.0, (p_a + p_b) / 2.0, 1, 1)); + + // Stability check + if (std::isnan(mloglik) || std::isinf(mloglik)) { + mloglik = -10000; + } + + double logpost = mloglik + log_prior_complexity + n; + + if (std::isinf(logpost)) { + logpost = -10000; + } + + return logpost; +}') + + + +estimate_logic_tcch_rcpp <- function(y, x, model, complex, params) { + + if (length(params) == 0) + params <- list(r = 1 / nrow(x)) + + # Fit the model using fastglm + suppressWarnings({ + mod <- fastglm(as.matrix(x[, model]), y, family = gaussian()) + }) + + # Call the Rcpp function for log-posterior computation + logpost <- compute_log_posterior( + residuals = mod$residuals, + p = mod$rank, + n = params$n, + r = params$r, + p_a = params$p.a, + p_b = params$p.b, + p_s = params$p.s, + complexity_oc = complex$oc + ) + + return(list(crit = logpost, coefs = mod$coefficients)) +} + +# Generate test data +set.seed(42) +n <- 100 +p <- 5 +X <- matrix(rnorm(n * p), n, p) +beta <- c(1, -1, 0.5, 0, 0) +y <- X %*% beta + rnorm(n) + +params <- list(n = n, p.a = 1, p.b = 1, p.s = 0.1, r = 0.01) # Prior hyperparameters +model <- c(1, 2, 3) # Assume we select variables 1, 2, 3 +complex <- list(oc = c(1, 1, 1)) # Example complexity for selected model + +# Convert for Rcpp function +model_vec <- as.integer(model) # Convert to unsigned integer vector for Rcpp +complex_vec <- as.numeric(complex$oc) + +# Run both implementations +result_r <- estimate.logic.tcch.general(y, X, model, complex, params) +result_rcpp <- estimate_logic_tcch_rcpp(y, X, model, complex, params) + +# Check if results match +print("Checking results...") +print(all.equal(result_r$crit, result_rcpp$crit, tolerance = 1e-6)) +print(all.equal(result_r$coefs, as.numeric(result_rcpp$coefs), tolerance = 1e-6)) + +# Benchmarking +bench <- microbenchmark( + R = estimate.logic.tcch.general(y, X, model, complex, params), + Rcpp = estimate_logic_tcch_rcpp(y, X, model_vec, complex_vec, params$r, params$n, params$p.a, params$p.b, params$p.s), + times = 100 +) + +# Print benchmark results +print(bench) + +# Calculate speedup +speedup <- median(bench$time[bench$expr == "R"]) / median(bench$time[bench$expr == "Rcpp"]) +print(paste("Speedup: ", round(speedup, 2), "x")) + + + +?BAS::bayesglm.fit(x = X, y = y>mean(y),family = binomial(),coefprior = aic.prior) + + +library(BAS) + +set.seed(42) +n <- 100 +p <- 5 +X <- matrix(rnorm(n * p), n, p) +beta <- c(1, -1, 0.5, 0, 0) +y <- X %*% beta + rnorm(n) + +data <- data.frame(y = y>mean(y), x = X) + + +suppressWarnings({mod <- bas.glm(y ~ 1+x.1, data = data, betaprior = CCH(alpha = 0.5, beta = as.numeric(nrow(data)), s = 0), method = "deterministic", family = binomial(), modelprior = uniform(), n.models = 2, initprobs = 'eplogp', laplace = T)}) +mod$logmarg + + +result <- tryCatch({ +result <- .Call(BAS:::C_glm_deterministic, + y = as.numeric(y>mean(y)), X = cbind(1,X[,1]), + Roffset = as.numeric(rep(0, length(y))), + Rweights = as.numeric(rep(1, length(y))), + Rprobinit = as.numeric(c(1,0.99)), + Rmodeldim = as.integer(c(0,0)), + modelprior = uniform(), + betaprior = CCH(alpha = 0.5, beta = as.numeric(nrow(data)), s = 0), + family = binomial(), + Rcontrol = glm.control(), + Rlaplace = as.integer(T)) +lm = result$logmarg +print(lm) +}, error = function(e) { + warning("Error in C call: ", e$message) + list(logmarg = NA) +}) + +for (i in 1:1500) { + result <- .Call(BAS:::C_glm_deterministic, + y = as.numeric(y > mean(y)), + X = cbind(1, X), + Roffset = as.numeric(rep(0, length(y))), + Rweights = as.numeric(rep(1, length(y))), + Rprobinit = as.numeric(c(1, 0.99)), + Rmodeldim = as.integer(0), + modelprior = BAS::uniform(), + betaprior = betaprior, + family = family, + Rcontrol = control, + Rlaplace = as.integer(FALSE)) + print(result$logmarg) +} + +data <- data.frame(y = y, x = X) +mod <- bas.lm(y ~ 1+x.1, data = data, prior = "ZS-null", alpha = 4, method = "deterministic", modelprior = uniform(), n.models = 2, initprobs = 'eplogp') +mod$logmarg + + +result <- .Call(BAS:::C_glm_deterministic, + y = as.numeric(y>mean(y)), X = cbind(1,X[,1]), + Roffset = as.numeric(rep(0, length(y))), + Rweights = as.numeric(rep(1, length(y))), + Rprobinit = as.numeric(c(1,0.99)), + Rmodeldim = as.integer(c(0,0)), + modelprior = uniform(), + betaprior = CCH(alpha = 0.5, beta = as.numeric(nrow(data)), s = 0), + family = binomial(), + Rcontrol = glm.control(), + Rlaplace = as.integer(T)) + +result <- .Call(BAS:::C_deterministic, + y = y, + X = cbind(1,X[,1]), + as.numeric(rep(1, length(y))), + as.numeric(c(1,0.99)), + as.integer(c(0,0)), + incint = as.integer(TRUE), + alpha = as.numeric(4), + method = as.integer(0), + modelprior = uniform(), + Rpivot = TRUE, + Rtol = 1e-7 + ) +result$logmarg + +method.num <- switch( + prior, + "g-prior" = 0, + "hyper-g" = 1, + "EB-local" = 2, + "BIC" = 3, + "ZS-null" = 4, + "ZS-full" = 5, + "hyper-g-laplace" = 6, + "AIC" = 7, + "EB-global" = 2, + "hyper-g-n" = 8, + "JZS" = 9 +) \ No newline at end of file diff --git a/R_script/oldvers/synthetic_gaussian_data.csv b/R_script/oldvers/synthetic_gaussian_data.csv new file mode 100644 index 0000000..c9a4f0c --- /dev/null +++ b/R_script/oldvers/synthetic_gaussian_data.csv @@ -0,0 +1,48 @@ +X1,X2,X3,X4,X5,Y +0.4967141530112327,-0.13826430117118466,0.6476885381006925,1.5230298564080254,-0.23415337472333597,0.4288730369987499 +-0.23413695694918055,1.5792128155073915,0.7674347291529088,-0.4694743859349521,0.5425600435859647,-1.4417490630610725 +-0.46341769281246226,-0.46572975357025687,0.24196227156603412,-1.913280244657798,-1.7249178325130328,1.5408190135238018 +-0.5622875292409727,-1.0128311203344238,0.3142473325952739,-0.9080240755212109,-1.4123037013352915,2.236874253556987 +1.465648768921554,-0.22577630048653566,0.06752820468792384,-1.4247481862134568,-0.5443827245251827,2.757722204323676 +0.11092258970986608,-1.1509935774223028,0.37569801834567196,-0.600638689918805,-0.2916937497932768,3.5866875225449184 +-0.6017066122293969,1.8522781845089378,-0.013497224737933921,-1.0577109289559004,0.822544912103189,-1.7814283691741235 +-1.2208436499710222,0.2088635950047554,-1.9596701238797756,-1.3281860488984305,0.19686123586912352,0.4191701822203946 +0.7384665799954104,0.1713682811899705,-0.11564828238824053,-0.3011036955892888,-1.4785219903674274,0.21499003036051567 +-0.7198442083947086,-0.4606387709597875,1.0571222262189157,0.3436182895684614,-1.763040155362734,-0.016136705356076468 +0.324083969394795,-0.38508228041631654,-0.6769220003059587,0.6116762888408679,1.030999522495951,2.2070453676729884 +0.9312801191161986,-0.8392175232226385,-0.3092123758512146,0.33126343140356396,0.9755451271223592,3.9363850721912477 +-0.47917423784528995,-0.18565897666381712,-1.1063349740060282,-1.1962066240806708,0.812525822394198,2.562420375354225 +1.356240028570823,-0.07201012158033385,1.0035328978920242,0.36163602504763415,-0.6451197546051243,1.5066892713929099 +0.36139560550841393,1.5380365664659692,-0.03582603910995154,1.5646436558140062,-2.6197451040897444,-5.312882766840247 +0.8219025043752238,0.08704706823817121,-0.2990073504658674,0.0917607765355023,-1.9875689146008928,-0.5144337487656874 +-0.21967188783751193,0.3571125715117464,1.477894044741516,-0.5182702182736474,-0.8084936028931876,0.19359008863520835 +-0.5017570435845365,0.9154021177020741,0.32875110965968446,-0.5297602037670388,0.5132674331133561,-0.39324862858882215 +0.09707754934804039,0.9686449905328892,-0.7020530938773524,-0.3276621465977682,-0.39210815313215763,-1.104027489184881 +-1.4635149481321186,0.29612027706457605,0.26105527217988933,0.00511345664246089,-0.23458713337514692,-0.6046513704872727 +-1.4153707420504142,-0.42064532276535904,-0.3427145165267695,-0.8022772692216189,-0.16128571166600914,1.191493593205777 +0.4040508568145384,1.8861859012105302,0.17457781283183896,0.25755039072276437,-0.07444591576616721,-2.7709651627312986 +-1.9187712152990415,-0.026513875449216878,0.06023020994102644,2.463242112485286,-0.19236096478112252,-2.2696629342119454 +0.30154734233361247,-0.03471176970524331,-1.168678037619532,1.1428228145150205,0.7519330326867741,0.82577716502127 +0.7910319470430469,-0.9093874547947389,1.4027943109360992,-1.4018510627922809,0.5868570938002703,5.007372306331659 +2.1904556258099785,-0.9905363251306883,-0.5662977296027719,0.09965136508764122,-0.5034756541161992,3.669054455635204 +-1.5506634310661327,0.06856297480602733,-1.0623037137261049,0.4735924306351816,-0.9194242342338032,-1.25383975343152 +1.5499344050175394,-0.7832532923362371,-0.3220615162056756,0.8135172173696698,-1.2308643164339552,1.778064146771105 +0.22745993460412942,1.307142754282428,-1.6074832345612275,0.1846338585323042,0.25988279424842353,-1.747341344407948 +0.7818228717773104,-1.236950710878082,-1.3204566130842763,0.5219415656168976,0.29698467323318606,3.6752289493140053 +0.25049285034587654,0.3464482094969757,-0.6800247215784908,0.23225369716100355,0.29307247329868125,0.5706413376611352 +-0.7143514180263678,1.8657745111447566,0.4738329209117875,-1.1913034972026486,0.6565536086338297,-1.9384816827002367 +-0.9746816702273214,0.787084603742452,1.158595579007404,-0.8206823183517105,0.9633761292443218,0.4848440948657714 +0.4127809269364983,0.82206015999449,1.8967929826539474,-0.2453881160028705,-0.7537361643574896,-0.5285461135761962 +-0.8895144296255233,-0.8158102849654383,-0.0771017094141042,0.3411519748166439,0.27669079933001905,1.985815923067319 +0.8271832490360238,0.01300189187790702,1.4535340771573169,-0.2646568332379561,2.720169166589619,4.30917374326559 +0.6256673477650062,-0.8571575564162826,-1.0708924980611123,0.4824724152431853,-0.2234627853258509,2.4429413056749008 +0.714000494092092,0.47323762457354485,-0.07282891265687277,-0.846793718068405,-1.5148472246858646,-0.3102657750810609 +-0.4465149520670211,0.8563987943234723,0.21409374413020396,-1.245738778711988,0.173180925851182,0.0015531759809442412 +0.3853173797288368,-0.883857436201133,0.1537251059455279,0.058208718445999896,-1.142970297830623,2.011500976278653 +0.3577873603482833,0.5607845263682344,1.083051243175277,1.053802052034903,-1.377669367957091,-1.5673323288417744 +-0.9378250399151228,0.5150352672086598,0.5137859509122088,0.5150476863060479,3.852731490654721,2.467830854350032 +0.570890510693167,1.135565640180599,0.9540017634932023,0.651391251305798,-0.3152692446403456,-1.3011324837202884 +0.7589692204932674,-0.7728252145375718,-0.23681860674000887,-0.48536354782910346,0.08187413938632256,3.2692681514162683 +2.3146585666735087,-1.867265192591748,0.6862601903745135,-1.6127158711896517,-0.47193186578943347,7.2550269496210715 +1.088950596967366,0.06428001909546277,-1.0777447779293061,-0.7153037092599682,0.6795977489346758,2.2166614380546554 +-0.7303666317171367,0.21645858958197486,0.045571839903813784,-0.6516003476058171,2.1439440893253257,2.3507554477205126 diff --git a/tests_current/Ex10_Sec6_3.R b/tests_current/Ex10_Sec6_3.R deleted file mode 100644 index 433b8db..0000000 --- a/tests_current/Ex10_Sec6_3.R +++ /dev/null @@ -1,102 +0,0 @@ -####################################################### -# -# Example 10 (Section 6.3): Epil data set from the INLA package -# -# Mixed Effect Poisson Model with Fractional Polynomials, using only fbms -# -# This is the valid version for the JSS Paper -# -####################################################### - -library(FBMS) -library(INLA) -library(tictoc) - -data <- INLA::Epil -data <- data[,-c(5,6)] - -df <- data[1:5] -df$V2 <- rep(c(0,1,0,0),59) -df$V3 <- rep(c(0,0,1,0),59) -df$V4 <- rep(c(0,0,0,1),59) - -transforms <- c("p0","p2","p3","p05","pm05","pm1","pm2","p0p0","p0p05","p0p1","p0p2","p0p3","p0p05","p0pm05","p0pm1","p0pm2") -probs <- gen.probs.gmjmcmc(transforms) -probs$gen <- c(1,1,0,1) # Only modifications! - -params <- gen.params.gmjmcmc(ncol(df) - 1) -params$feat$D <- 2 # Set depth of features to 2 (allow for interactions) -params$feat$keep.min <- 0.2 -params$greedy$steps <- 2 -params$greedy$tries <- 1 -params$sa$t.min <- 0.1 -params$sa$dt <- 10 - -# function to estimate log posterior -poisson.loglik.inla <- function (y, x, model, complex, mlpost_params) -{ - if(sum(model)>1) - { - data1 <- data.frame(y, as.matrix(x[,model]), mlpost_params$PID) - formula1 <- as.formula(paste0(names(data1)[1],"~",paste0(names(data1)[3:(dim(data1)[2]-1)],collapse = "+"),"+ f(mlpost_params.PID,model = \"iid\")")) - } else - { - data1 <- data.frame(y, mlpost_params$PID) - formula1 <- as.formula(paste0(names(data1)[1],"~","1 + f(mlpost_params.PID,model = \"iid\")")) - } - - #to make sure inla is not stuck - inla.setOption(inla.timeout=30) - inla.setOption(num.threads=mlpost_params$INLA.num.threads) - - mod<-NULL - - #error handling for unstable libraries that might crash - tryCatch({ - mod <- inla(family = "poisson",silent = 1L,safe = F,data = data1,formula = formula1) - }, error = function(e) { - # Handle the error by setting result to NULL - mod <- NULL - # Print a message or log the error if needed - cat("An error occurred:", conditionMessage(e), "\n") - }) - - # logarithm of model prior - if (length(mlpost_params$r) == 0) mlpost_params$r <- 1/dim(x)[1] # default value or parameter r - lp <- log_prior(mlpost_params, complex) - - if(length(mod)<3||length(mod$mlik[1])==0) { - return(list(crit = -10000 + lp,coefs = rep(0,dim(data1)[2]-2))) - } else { - mloglik <- mod$mlik[1] - return(list(crit = mloglik + lp, coefs = mod$summary.fixed$mode)) - } -} - -set.seed(03052024) -#specify indices for a random effect - -result <- fbms(formula = y ~ 1+., data = df, transforms = transforms, - method = "gmjmcmc", probs = probs, params = params, P=25, N = 100, - family = "custom", loglik.pi = poisson.loglik.inla, - model_prior = list(r = 1/dim(df)[1]), - extra_params = list(PID = data$Ind, INLA.num.threads = 1)) - -plot(result) -summary(result) - - -set.seed(23052024) - -tic() -# Number of threads used by INLA set to 1 to avoid conflicts between two layers of parallelization -result2 <- fbms(formula = y ~ 1+., data = df, transforms = transforms, - probs = probs, params = params, P=25, N = 100, - method = "gmjmcmc.parallel", runs = 40, cores = 40, - family = "custom", loglik.pi = poisson.loglik.inla, - model_prior = list(r = 1/dim(df)[1]), - extra_params = list(PID = data$Ind, INLA.num.threads = 1)) -time.inla <- toc() - -plot(result2) -summary(result2, labels = names(df)[-1], tol = 0.01) \ No newline at end of file diff --git a/tests_current/Ex11_Sec6_4.R b/tests_current/Ex11_Sec6_4.R deleted file mode 100644 index 92b10fb..0000000 --- a/tests_current/Ex11_Sec6_4.R +++ /dev/null @@ -1,185 +0,0 @@ -####################################################### -# -# Example 11 (Section 6.4): -# -# Subsampling, using only fbms -# -# Heart Disease Health Indicators Dataset” -# -# This is the valid version for the JSS Paper -# -####################################################### - -library(tictoc) -library(FBMS) -#library(devtools) -#devtools::install_github("jonlachmann/irls.sgd", force=T, build_vignettes=F) -library(irls.sgd) -#Kaggle API -library(RKaggle) - -# Download latest version -df <- RKaggle::get_dataset("alexteboul/heart-disease-health-indicators-dataset") - -summary(df) -dim(df) - - - -#number of observations and covariates in the data - -n <- dim(df)[1] -p <- dim(df)[2] - 1 - -params <- gen.params.gmjmcmc(p) -transforms <- c("sigmoid","pm1","p0","p05","p2","p3") - -r = 0.01 # Parameter for the model prior - -logistic.posterior.bic.irlssgd <- function (y, x, model, complex, mlpost_params) -{ - if (!is.null(mlpost_params$crit)) { - mod <- glm.sgd(x[,model], y, binomial(), - sgd.ctrl = list(start=mlpost_params$coefs, subs=mlpost_params$subs, - maxit=10, alpha=0.00008, decay=0.99, histfreq=10)) - mod$deviance <- get_deviance(mod$coefficients, x[,model], y, binomial()) - mod$rank <- length(mod$coefficients) - } else { - mod <- irls.sgd(as.matrix(x[,model]), y, binomial(), - irls.control=list(subs=mlpost_params$subs, maxit=20, tol=1e-7, - cooling = c(1,0.9,0.75), expl = c(3,1.5,1)), - sgd.control=list(subs=mlpost_params$subs, maxit=250, alpha=0.001, - decay=0.99, histfreq=10)) - } - - # logarithm of marginal likelihood - mloglik <- -mod$deviance / 2 - 0.5 * log(length(y)) * (mod$rank - 1) - - # logarithm of model prior - if (length(mlpost_params$r) == 0) mlpost_params$r <- 1/dim(x)[1] # default value or parameter r - lp <- log_prior(mlpost_params, complex) - crit <- mloglik + lp - - if (!is.null(mlpost_params$crit) && mlpost_params$crit > crit) { - return(list(crit = mlpost_params$crit, coefs = mlpost_params$coefs)) - } - - return(list(crit = crit, coefs = mod$coefficients)) -} - - - -############################ -# -# Testing runtime -# -############################ - -set.seed(100001) -tic() -result1 <- fbms(formula = HeartDiseaseorAttack ~ 1 + ., data = df, P = 2, - transforms = transforms, params = params, method = "gmjmcmc", - family = "custom", loglik.pi = logistic.posterior.bic.irlssgd, - model_prior = list(r = r, subs = 0.01), sub = T) -time1 <- toc() - -set.seed(100002) -# regular analysis -tic() -result2 <- fbms(formula = HeartDiseaseorAttack ~ 1 + ., data = df, P = 2, - transforms = transforms, params = params, method = "gmjmcmc", - family = "binomial", beta_prior = list(type = "Jeffreys-BIC"), - model_prior = list(r = r)) -time2 <- toc() - -c(time1, time2) - -############################ -# -# More serious analysis -# -############################ - - -# with subsampling - -set.seed(100003) - -tic() -result_parallel_1 <- fbms(formula = HeartDiseaseorAttack ~ 1 + ., data = df, P = 3, - transforms = transforms, params = params, - method = "gmjmcmc.parallel", runs = 10, cores = 10, - family = "custom", loglik.pi = logistic.posterior.bic.irlssgd, - model_prior = list(r = r, subs = 0.01), sub = T) -time3 <- toc() - -summary(result_parallel_1) - -# without subsampling - - -set.seed(100004) - -tic() -result_parallel_2 <- fbms(formula = HeartDiseaseorAttack ~ 1 + ., data = df, P = 3, - transforms = transforms, params = params, family = "binomial", - method = "gmjmcmc.parallel", runs = 10, cores = 10, - model_prior = list(r = r), - beta_prior = list(type = "Jeffreys-BIC")) -time4 <- toc() - -summary(result_parallel_2) - -filename = paste0("Ex11_Results_",r,"_4.RData") -save.image(filename) - -############################ -# -# Final analysis -# -############################ - -# with subsampling - -set.seed(100005) -tic() -result_parallel_long_1 <- fbms(formula = HeartDiseaseorAttack ~ 1 + ., data = df, P = 10, - transforms = transforms, params = params, N = 500, - method = "gmjmcmc.parallel", runs = 40, cores = 40, - family = "custom", loglik.pi = logistic.posterior.bic.irlssgd, - model_prior = list(r = r, subs = 0.01), sub = T) - -time5 <- toc() -summary(result_parallel_long_1) - -filename = paste0("Ex11_Results_",r,"_5.RData") -save.image(filename) - -# regular analysis - - -set.seed(100006) - -tic() -result_parallel_long_2 <- fbms(formula = HeartDiseaseorAttack ~ 1 + ., data = df, P = 10, - transforms = transforms, params = params, family = "binomial", - method = "gmjmcmc.parallel", runs = 40, cores = 40, N = 500, - model_prior = list(r = r), - beta_prior = list(type = "Jeffreys-BIC")) -time6 <- toc() - - -summary(result_parallel_long_2) - - -############################################################################ - -C = cor(df, use = "everything", - method = "spearman") - -corrplot::corrplot(C) - -apply((abs(C - diag(diag(C)))), 2, max) - -filename = paste0("Ex11_Results_",r,".RData") -save.image(filename) \ No newline at end of file diff --git a/tests_current/Ex1_Sec3.R b/tests_current/Ex1_Sec3.R deleted file mode 100644 index c3c0cf1..0000000 --- a/tests_current/Ex1_Sec3.R +++ /dev/null @@ -1,213 +0,0 @@ -################################################# -# -# Example 1: -# -# Kepler Example with the most recent database update, only using fbms function -# -# This is the valid version for the JSS paper -# -################################################## - - -#install.packages("FBMS") -library(FBMS) - -data(exoplanet) - -train.indx <- 1:500 -df.train = exoplanet[train.indx, ] -df.test = exoplanet[-train.indx, ] - - -to3 <- function(x) x^3 -transforms <- c("sigmoid","sin_deg","exp_dbl","p0","troot","to3") - - -#################################################### -# -# single thread analysis (default values, Section 3.1) -# -#################################################### - - -set.seed(123) - -result.default <- fbms(formula = semimajoraxis ~ 1 + . , data = df.train, method = "gmjmcmc", transforms = transforms) - -get.best.model(result.default) - -#################################################### -# -# single thread analysis (more iterations, Section 3.2) -# -#################################################### - - -set.seed(123) - -result.P50 <- fbms(data = df.train, method = "gmjmcmc", transforms = transforms, - P = 50, N = 1000, N.final = 5000) - - -# coef -# add posterior modes best model sentence -coef(result_parallel) - -#for predictions rename mean to postmean same for postquantile - - - -#################################################### -# -# multiple thread analysis (Section 3.3) -# -#################################################### - -set.seed(123) - -result_parallel <- fbms(data = df.train, method = "gmjmcmc.parallel", transforms = transforms, - runs = 40, cores = parallel::detectCores()-1, P = 25) - - -#################################################### -# -# Inspection of Results (Section 3.4) -# -#################################################### - -###################### -# summary - -summary(result.default) -summary(result.default, pop = "all", labels = paste0("x",1:length(df.train[,-1]))) - - -summary(result.P50) -summary(result.P50, pop = "best", labels = paste0("x",1:length(df.train[,-1]))) -summary(result.P50, pop = "last", labels = paste0("x",1:length(df.train[,-1]))) -summary(result.P50, pop = "last", tol = 0.01, labels = paste0("x",1:length(df.train[,-1]))) - - -summary(result_parallel) -library(tictoc) -tic() -summary(result_parallel, tol = 0.01, pop = "all") -toc() - - - - -###################### -# plot - -pdf("result.pdf") -plot(result.default) -dev.off() - -plot(result.default) - - - -pdf("result.P50.pdf") -plot(result.P50) -dev.off() - -plot(result.P50) - - - -pdf("result_parallel.pdf") -plot(result_parallel) -dev.off() - -plot(result_parallel) - - -###################### -# Prediction - - -#preds <- predict(result.default, df.test[,-1], link = function(x) x) -preds <- predict(result.default, df.test[,-1]) -rmse.default <- sqrt(mean((predmean(preds) - df.test$semimajoraxis)^2)) - -pdf("prediction.pdf") -plot(predmean(preds), df.test$semimajoraxis) -dev.off() - -plot(predmean(preds), df.test$semimajoraxis) - - - - - - -############################### - - -#preds.P50 = predict(result.P50, df.test[,-1], link = function(x) x) -preds.P50 = predict(result.P50, df.test[,-1]) -rmse.P50 <- sqrt(mean((predmean(preds.P50) - df.test$semimajoraxis)^2)) - -pdf("prediction.P50.pdf") -plot(predmean(preds.P50), df.test$semimajoraxis) -dev.off() - -plot(predmean(preds.P50), df.test$semimajoraxis) - - - -############################### - - -preds.multi <- predict(result_parallel , df.test[,-1], link = function(x) x) -rmse.parallel <- sqrt(mean((predmean(preds.multi) - df.test$semimajoraxis)^2)) - -pdf("pred_parallel.pdf") -plot(predmean(preds.multi), df.test$semimajoraxis) -dev.off() - - -round(c(rmse.default, rmse.P50, rmse.parallel),2) - - -############################### - - -#Prediction based on the best model () or the MPM (Median Probability Model) - -get.best.model(result = result.default) -preds.best <- predict(get.best.model(result.default), df.test[, -1]) -sqrt(mean((preds.best - df.test$semimajoraxis)^2)) - -get.mpm.model(result = result.default, y = df.train$semimajoraxis, x = df.train[, -1]) -preds.mpm <- predict(get.mpm.model(result.default, y = df.train$semimajoraxis, x = df.train[, -1]), df.test[, -1]) -sqrt(mean((preds.mpm - df.test$semimajoraxis)^2)) - - -#################################################### -# -# Diagnostic plots (Section 3.5) -# -#################################################### - - -pdf("diagn_default.pdf") -diagn_plot(result.default, ylim = c(600,1500), FUN = max) -dev.off() -diagn_plot(result.default, ylim = c(600,1500), FUN = max) - - -pdf("diagn_long.pdf") -diagn_plot(result.P50, ylim = c(600,1500), FUN = max) -dev.off() -diagn_plot(result.P50, ylim = c(600,1500), FUN = max) - - -pdf("diagn_par.pdf") -diagn_plot(result_parallel, ylim = c(600,1500),FUN = max) -dev.off() - -diagn_plot(result_parallel, ylim = c(600,1500),FUN = max) - - From 551f2c0419fff87f874655eebdcd564be87edf6c Mon Sep 17 00:00:00 2001 From: aliaksah Date: Fri, 21 Nov 2025 12:23:54 +0100 Subject: [PATCH 02/15] cleaned up further branch jss_v2 --- R_script/oldvers/Ex12_Sec6_5.R | 322 -------- R_script/oldvers/Ex2_Sec4_1.R | 91 --- R_script/oldvers/Ex3_Sec4_2.R | 84 -- R_script/oldvers/Ex4_Sec4_2.R | 84 -- R_script/oldvers/Ex4_Sec4_3.R | 97 --- R_script/oldvers/Ex5_Sec4_3.R | 97 --- R_script/oldvers/Ex6_Sec4_4.R | 226 ------ R_script/oldvers/Ex6_Sec5_1.R | 118 --- R_script/oldvers/Ex7_Sec5.1.R | 311 ------- R_script/oldvers/Ex7_Sec5.2.R | 297 ------- R_script/oldvers/Ex7_Sec5_2.R | 288 ------- R_script/oldvers/Ex8_Sec6_1.R | 112 --- R_script/oldvers/Ex9_Sec6_2.R | 283 ------- R_script/oldvers/FBMS-guide.R | 354 -------- R_script/oldvers/bacteremia.R | 155 ---- R_script/oldvers/fix impute jon.R | 144 ---- R_script/oldvers/gg.txt | 48 -- R_script/oldvers/kristoffer.R | 80 -- R_script/oldvers/likelihoods2.R | 800 ------------------- R_script/oldvers/new general estimators.R | 262 ------ R_script/oldvers/synthetic_gaussian_data.csv | 48 -- 21 files changed, 4301 deletions(-) delete mode 100644 R_script/oldvers/Ex12_Sec6_5.R delete mode 100644 R_script/oldvers/Ex2_Sec4_1.R delete mode 100644 R_script/oldvers/Ex3_Sec4_2.R delete mode 100644 R_script/oldvers/Ex4_Sec4_2.R delete mode 100644 R_script/oldvers/Ex4_Sec4_3.R delete mode 100644 R_script/oldvers/Ex5_Sec4_3.R delete mode 100644 R_script/oldvers/Ex6_Sec4_4.R delete mode 100644 R_script/oldvers/Ex6_Sec5_1.R delete mode 100644 R_script/oldvers/Ex7_Sec5.1.R delete mode 100644 R_script/oldvers/Ex7_Sec5.2.R delete mode 100644 R_script/oldvers/Ex7_Sec5_2.R delete mode 100644 R_script/oldvers/Ex8_Sec6_1.R delete mode 100644 R_script/oldvers/Ex9_Sec6_2.R delete mode 100644 R_script/oldvers/FBMS-guide.R delete mode 100644 R_script/oldvers/bacteremia.R delete mode 100644 R_script/oldvers/fix impute jon.R delete mode 100644 R_script/oldvers/gg.txt delete mode 100644 R_script/oldvers/kristoffer.R delete mode 100644 R_script/oldvers/likelihoods2.R delete mode 100644 R_script/oldvers/new general estimators.R delete mode 100644 R_script/oldvers/synthetic_gaussian_data.csv diff --git a/R_script/oldvers/Ex12_Sec6_5.R b/R_script/oldvers/Ex12_Sec6_5.R deleted file mode 100644 index c159f0d..0000000 --- a/R_script/oldvers/Ex12_Sec6_5.R +++ /dev/null @@ -1,322 +0,0 @@ -####################################################### -# -# Example 13 (Section 6.5): -# -# Cox Regression (using only fbms) -# -# This is the valid version for the JSS Paper -# -####################################################### - -#install.packages("FBMS") -library(FBMS) -library(pec) #for the computation of cindex - -#install.packages("survival") -library(survival) - - -# Download data -download.file('https://www.uniklinik-freiburg.de/fileadmin/mediapool/08_institute/biometrie-statistik/Dateien/Studium_und_Lehre/Lehrbuecher/Multivariable_Model-building/gbsg_br_ca.zip', - 'gbsg_br_ca.zip') -df1 <- read.csv(unz('gbsg_br_ca.zip', - 'gbsg_br_ca/gbsg_br_ca.csv'), - header = TRUE) -file.remove("gbsg_br_ca.zip") - -# Prepare data -df <- df1[, c(13, 14, 2:4, 6:8, 10:12)] -names(df) = c("time","cens",names(df)[3:ncol(df)]) - -# Split into training and test set -set.seed(123) -train <- c(sample((1:nrow(df))[df$cens == 1], sum(df$cens)*2/3), # split separately events - sample((1:nrow(df))[df$cens == 0], sum(!df$cens)*2/3)) # and censored observations - -df.train <- df[train,] -df.test <- df[-train,] - -# time will be used as an extra parameter in the custom function -time <- df.train$time - - -params <- gen.params.gmjmcmc(ncol(df.train) - 2) -transforms <- c("p0","p2","p3","p05","pm05","pm1","pm2") -probs <- gen.probs.gmjmcmc(transforms) -probs$gen <- c(1,1,0,0) - - -# specify the custom function to estimate log posterior for cox -surv.pseudo.loglik = function(y, x, model, complex, mlpost_params){ - - data <- data.frame(time = mlpost_params$time, cens = y, as.matrix(x[,model]))[,-3] # Removing intercept - if(dim(data)[2]==2) - { - return(list(crit=-.Machine$double.xmax, coefs=rep(0,1))) - } else { - formula1 <- as.formula(paste0("Surv(time,cens)","~ 1 + .")) - - out <- coxph(formula1, data = data) - - # logarithm of marginal likelihood - mloglik <- (out$loglik[2] - out$loglik[1]) - log(length(y)) * (dim(data)[2] - 2)/2 - - # logarithm of model prior - if (length(mlpost_params$r) == 0) mlpost_params$r <- 1/dim(x)[1] # default value or parameter r - lp <- log_prior(mlpost_params, complex) - - # Compute criterion and consider special cases like multicollinearity - - crit <- mloglik + lp - if(sum(is.na(out$coefficients))>0) #Get rid of models with collinearities (with more than two features) - crit <- -.Machine$double.xmax - - return(list(crit = crit, coefs = c(0,out$coefficients))) - } -} - - - -####################################################### -# -# Analysis with 4 different modeling strategies -# -# - - -# 1) Single chain analysis (just to illustrate how it works) -set.seed(121) -result1 <- fbms(formula = cens ~ 1 + .,data = df.train[,-1], params = params, P = 5, - transforms = transforms, method = "gmjmcmc", - family = "custom", loglik.pi = surv.pseudo.loglik, - model_prior = list(r = 0.5), - extra_params = list(time = time)) - - -summary(result1,labels = names(df.train)[-(1:2)]) - -# 2) Parallel version only linear terms - - -set.seed(122) -result2 <- fbms(formula = cens ~ 1 + .,data = df.train[,-1], params = params, - method = "mjmcmc.parallel", runs = 40, cores = 40, - family = "custom", loglik.pi = surv.pseudo.loglik, - model_prior = list(r = 0.5), extra_params = list(time = time)) - -summary(result2,tol = 0.01,labels = names(df.train)[-(1:2)],effects = c(0.025,0.5,0.975)) - - - -# 3) Parallel version only fractional polynomials - -set.seed(123) -probs$gen <- c(0,1,0,1) -params$feat$D <- 1 - -result3 <- fbms(formula = cens ~ 1 + .,data = df.train[,-1], params = params, probs = probs, P = 10, - transforms = transforms, method = "gmjmcmc.parallel", runs = 40, cores = 40, - family = "custom", loglik.pi = surv.pseudo.loglik, - model_prior = list(r = 0.5), extra_params = list(time = time)) - - -summary(result3,tol = 0.01, effects = c(0.025,0.5,0.975)) - - - -# 4) Parallel version using all types of non-linear features -set.seed(124) -probs$gen <- c(1,1,1,1) -params$feat$D <- 5 -result4 <- fbms(formula = cens ~ 1 + .,data = df.train[,-1], params = params, probs = probs,P = 20, - transforms = transforms, method = "gmjmcmc.parallel", runs = 40, cores = 40, - family = "custom", loglik.pi = surv.pseudo.loglik, - model_prior = list(r = 0.5), extra_params = list(time = time)) - - -summary(result4,tol = 0.01) - - - - - - -################################################ -# -# Prediction and C index using model averaging -# -################################################ - - -linpreds1.train <- predict(result1,df.train[,-(1:2)], link = function(x) x) -linpreds1 <- predict(result1,df.test[,-(1:2)], link = function(x) x) - -linpreds2.train <- predict(result2,df.train[,-(1:2)], link = function(x) x) -linpreds2 <- predict(result2,df.test[,-(1:2)], link = function(x) x) - -linpreds3.train <- predict(result3,df.train[,-(1:2)], link = function(x) x) -linpreds3 <- predict(result3,df.test[,-(1:2)], link = function(x) x) - -linpreds4.train <- predict(result4,df.train[,-(1:2)], link = function(x) x) -linpreds4 <- predict(result4,df.test[,-(1:2)], link = function(x) x) - - - -df.train$average.lin.pred1 <- linpreds1.train$aggr$mean -df.train$average.lin.pred2 <- linpreds2.train$aggr$mean -df.train$average.lin.pred3 <- linpreds3.train$aggr$mean -df.train$average.lin.pred4 <- linpreds4.train$aggr$mean - -df.test$average.lin.pred1 <- linpreds1$aggr$mean -df.test$average.lin.pred2 <- linpreds2$aggr$mean -df.test$average.lin.pred3 <- linpreds3$aggr$mean -df.test$average.lin.pred4 <- linpreds4$aggr$mean - - - -# Compute cindex using package pec - -mod1 <- coxph(Surv(time, cens) ~ average.lin.pred1, data = as.data.frame(df.train), x = TRUE) -cindex1 <- cindex(mod1, mod1$formula, data = as.data.frame(df.test), cens.model = 'cox')$AppCindex - -mod2 <- coxph(Surv(time, cens) ~ average.lin.pred2, data = as.data.frame(df.train), x = TRUE) -cindex2 <- cindex(mod2, mod2$formula, data = as.data.frame(df.test), cens.model = 'cox')$AppCindex - -mod3 <- coxph(Surv(time, cens) ~ average.lin.pred3, data = as.data.frame(df.train), x = TRUE) -cindex3 <- cindex(mod3, mod3$formula, data = as.data.frame(df.test), cens.model = 'cox')$AppCindex - -mod4 <- coxph(Surv(time, cens) ~ average.lin.pred4, data = as.data.frame(df.train), x = TRUE) -cindex4 <- cindex(mod4, mod4$formula, data = as.data.frame(df.test), cens.model = 'cox')$AppCindex - - -#Full model without nonlinearities (for the sake of comparison) -mod5 <- coxph(Surv(time, cens) ~ 1+., data = as.data.frame(df.train[,1:11]),x = T) -cindex5 <- cindex(mod5, mod5$formula, data = as.data.frame(df.test), cens.model = 'cox')$AppCindex - -#Model without predictors (for the sake of comparison) -mod6 <- coxph(Surv(time, cens) ~ 1, data = as.data.frame(df.train[,1:11]),x = T) -cindex6 <- cindex(mod6, mod6$formula, data = as.data.frame(df.test), cens.model = 'cox')$AppCindex - -all.cindices = round(unlist(c(cindex1, cindex2, cindex3, cindex4, cindex5, cindex6)),3) -names(all.cindices) = c("Model 1", "Model 2", "Model 3", "Model 4", "Full Linear Model", "Null Model") - -# Clean the train and test data for the next type of predictions - -df.train <- df[train,] -df.test <- df[-train,] - -############################################## -# -# Prediction and C index using best model -# -############################################## - - -linpreds.train.best <- predict(get.best.model(result1),df.train[,-(1:2)], link = function(x) x) -linpreds.best <- predict(get.best.model(result1),df.test[,-(1:2)], link = function(x) x) - - -linpreds2.train.best <- predict(get.best.model(result2),df.train[,-(1:2)], link = function(x) x) -linpreds2.best <- predict(get.best.model(result2),df.test[,-(1:2)], link = function(x) x) - - -linpreds3.train.best <- predict(get.best.model(result3),df.train[,-(1:2)], link = function(x) x) -linpreds3.best <- predict(get.best.model(result3),df.test[,-(1:2)], link = function(x) x) - - -linpreds4.train.best <- predict(get.best.model(result4),df.train[,-(1:2)], link = function(x) x) -linpreds4.best <- predict(get.best.model(result4),df.test[,-(1:2)], link = function(x) x) - - -df.train$best.lin.pred1 <- linpreds.train.best -df.train$best.lin.pred2 <- linpreds2.train.best -df.train$best.lin.pred3 <- linpreds3.train.best -df.train$best.lin.pred4 <- linpreds4.train.best - -df.test$best.lin.pred1 <- linpreds.best -df.test$best.lin.pred2 <- linpreds2.best -df.test$best.lin.pred3 <- linpreds3.best -df.test$best.lin.pred4 <- linpreds4.best - -mod1 <- coxph(Surv(time, cens) ~ best.lin.pred1, data = as.data.frame(df.train), x = TRUE) -cindex1 <- cindex(mod1, mod1$formula, data = as.data.frame(df.test), cens.model = 'cox')$AppCindex - -mod2 <- coxph(Surv(time, cens) ~ best.lin.pred2, data = as.data.frame(df.train), x = TRUE) -cindex2 <- cindex(mod2, mod2$formula, data = as.data.frame(df.test), cens.model = 'cox')$AppCindex - -mod3 <- coxph(Surv(time, cens) ~ best.lin.pred3, data = as.data.frame(df.train), x = TRUE) -cindex3 <- cindex(mod3, mod3$formula, data = as.data.frame(df.test), cens.model = 'cox')$AppCindex - -mod4 <- coxph(Surv(time, cens) ~ best.lin.pred4, data = as.data.frame(df.train), x = TRUE) -cindex4 <- cindex(mod4, mod4$formula, data = as.data.frame(df.test), cens.model = 'cox')$AppCindex - -all.cindices <- rbind(all.cindices, round(unlist(c(cindex1, cindex2, cindex3, cindex4, cindex5, cindex6)),3)) - -# Clean the train and test data for the next type of predictions - -df.train <- df[train,] -df.test <- df[-train,] - -############################################## -# -# Prediction and C index using mpm model -# -############################################## - - -linpreds.train.mpm <- predict(get.mpm.model(result1, y = df.train$cens, x = df.train[, -c(1,2)],family = "custom", - loglik.pi = surv.pseudo.loglik,params = list(r = 0.5, time = time)), - df.train[,-(1:2)], link = function(x) x) - -linpreds.mpm <- predict(get.mpm.model(result1, y = df.train$cens, x = df.train[, -c(1,2)],family = "custom", - loglik.pi = surv.pseudo.loglik,params = list(r = 0.5, time = time)),df.test[,-(1:2)], link = function(x) x) - - -linpreds2.train.mpm <- predict(get.mpm.model(result2, y = df.train$cens, x = df.train[, -c(1,2)],family = "custom", - loglik.pi = surv.pseudo.loglik,params = list(r = 0.5, time = time)), - df.train[,-(1:2)], link = function(x) x) - -linpreds2.mpm <- predict(get.mpm.model(result2, y = df.train$cens, x = df.train[, -c(1,2)],family = "custom", - loglik.pi = surv.pseudo.loglik,params = list(r = 0.5, time = time)),df.test[,-(1:2)], link = function(x) x) - -linpreds3.train.mpm <- predict(get.mpm.model(result3, y = df.train$cens, x = df.train[, -c(1,2)],family = "custom", - loglik.pi = surv.pseudo.loglik,params = list(r = 0.5, time = time)), - df.train[,-(1:2)], link = function(x) x) -linpreds3.mpm <- predict(get.mpm.model(result3, y = df.train$cens, x = df.train[, -c(1,2)],family = "custom", - loglik.pi = surv.pseudo.loglik,params = list(r = 0.5, time = time)),df.test[,-(1:2)], link = function(x) x) - - -linpreds4.train.mpm <- predict(get.mpm.model(result4, y = df.train$cens, x = df.train[, -c(1,2)],family = "custom", - loglik.pi = surv.pseudo.loglik,params = list(r = 0.5, time = time)), - df.train[,-(1:2)], link = function(x) x) -linpreds4.mpm <- predict(get.mpm.model(result4, y = df.train$cens, x = df.train[, -c(1,2)],family = "custom", - loglik.pi = surv.pseudo.loglik,params = list(r = 0.5, time = time)),df.test[,-(1:2)], link = function(x) x) - - -df.train$mpm.lin.pred1 <- linpreds.train.mpm -df.train$mpm.lin.pred2 <- linpreds2.train.mpm -df.train$mpm.lin.pred3 <- linpreds3.train.mpm -df.train$mpm.lin.pred4 <- linpreds4.train.mpm - -df.test$mpm.lin.pred1 <- linpreds.mpm -df.test$mpm.lin.pred2 <- linpreds2.mpm -df.test$mpm.lin.pred3 <- linpreds3.mpm -df.test$mpm.lin.pred4 <- linpreds4.mpm - -mod1 <- coxph(Surv(time, cens) ~ mpm.lin.pred1, data = as.data.frame(df.train), x = TRUE) -cindex1 <- cindex(mod1, mod1$formula, data = as.data.frame(df.test), cens.model = 'cox')$AppCindex - -mod2 <- coxph(Surv(time, cens) ~ mpm.lin.pred2, data = as.data.frame(df.train), x = TRUE) -cindex2 <- cindex(mod2, mod2$formula, data = as.data.frame(df.test), cens.model = 'cox')$AppCindex - -mod3 <- coxph(Surv(time, cens) ~ mpm.lin.pred3, data = as.data.frame(df.train), x = TRUE) -cindex3 <- cindex(mod3, mod3$formula, data = as.data.frame(df.test), cens.model = 'cox')$AppCindex - -mod4 <- coxph(Surv(time, cens) ~ mpm.lin.pred4, data = as.data.frame(df.train), x = TRUE) -cindex4 <- cindex(mod4, mod4$formula, data = as.data.frame(df.test), cens.model = 'cox')$AppCindex - - -all.cindices <- rbind(all.cindices, round(unlist(c(cindex1, cindex2, cindex3, cindex4, cindex5, cindex6)),3)) -rownames(all.cindices) = c("Model Averaging", "Best Model", "MPM") - -print(all.cindices) diff --git a/R_script/oldvers/Ex2_Sec4_1.R b/R_script/oldvers/Ex2_Sec4_1.R deleted file mode 100644 index afc16b7..0000000 --- a/R_script/oldvers/Ex2_Sec4_1.R +++ /dev/null @@ -1,91 +0,0 @@ -####################################################### -# -# Example 2 (Section 4.1): -# -# Simulated data without any nonlinearities, only using fbms function -# -# This is the valid version for the JSS Paper -# -####################################################### - -#install.packages("FBMS") - -library(mvtnorm) -library(FBMS) - - - -n <- 100 # sample size -p <- 20 # number of covariates -p.vec <- 1:p - - -k <- 5 #size of the data generating model - - -correct.model <- 1:k -beta.k <- (1:5)/5 # Coefficents of the correct submodel - -beta <- c(rep(0, p)) -beta[correct.model] <- beta.k - -set.seed(123) - -x <- rmvnorm(n, rep(0, p)) -y <- x %*% beta + rnorm(n) -X <- as.matrix(x) - -y<-scale(y) -X<-scale(X)/sqrt(n) - - -df <- as.data.frame(cbind(y, X)) -colnames(df) <- c("Y", paste0("X", seq_len(ncol(df) - 1))) - -correct.model -beta.k - -######################################################## -# -# Models with non-linear effects (gmjmcmc) -# -# - -to3 <- function(x) x^3 -transforms <- c("sigmoid","sin_deg","exp_dbl","p0","troot","to3") - -set.seed(1) - result <- fbms(data = df, method = "gmjmcmc", transforms = transforms) - summary(result) - plot(result) - - -set.seed(2) - result2 <- fbms(data = df, method = "gmjmcmc", transforms = transforms, - N = 1000, P = 40) - summary(result2, tol = 0.1) - plot(result) - - - -######################################################## -# -# Model which includes no non-linear effects (mjmcmc) -# -# - - # The default value of N = 1000 works relatively well here. - set.seed(1) - result.lindef <- fbms(data = df) - summary(result.lindef) - plot(result.lindef) - - # Check that this is actually the default - set.seed(1) - result.lin <- fbms(data = df, N = 1000) - summary(result.lin) - plot(result.lin) - - - - diff --git a/R_script/oldvers/Ex3_Sec4_2.R b/R_script/oldvers/Ex3_Sec4_2.R deleted file mode 100644 index f2f0b4f..0000000 --- a/R_script/oldvers/Ex3_Sec4_2.R +++ /dev/null @@ -1,84 +0,0 @@ -####################################################### -# -# Example 3 (Section 4.2): -# -# Simulated data with interactions, using only fbms -# -# This is the valid version for the JSS Paper -# -####################################################### - -library(mvtnorm) -library(FBMS) - -n <- 100 # sample size -p <- 20 # number of covariates - -# Model: -# X1: Pure Main effect -# X2 : X3: Pure interaction effect -# X4 * X5: Main effects plus interaction effect - - -set.seed(1003) - -x = rmvnorm(n, rep(0, p)) -X <- as.matrix(x) -X <- scale(X)/sqrt(n) - -y <- (1.2 * x[,1] + 1.5 * x[,2]* x[,3] - x[,4] + 1.1*x[,5] - 1.3 * x[,4]*x[,5])+ rnorm(n) -y<-scale(y) - -df <- data.frame(y = y, X) - - -transforms <- c("") -probs <- gen.probs.gmjmcmc(transforms) -probs$gen <- c(1,0,0,1) #Include interactions and mutations - -#################################################### -# -# single thread analysis (two different runs) -# -#################################################### - -set.seed(123) -result <- fbms(data = df, method = "gmjmcmc", transforms = transforms, - probs = probs) -summary(result) - - -set.seed(123) -result2 <- fbms(data = df, method = "gmjmcmc", transforms = transforms, - N = 1000, probs = probs, P=40) -summary(result2, tol = 0.01) - - -#################################################### -# -# multiple thread analysis -# -#################################################### - - -set.seed(123) - - result_parallel <- fbms(data = df, method = "gmjmcmc.parallel", transforms = transforms, - runs = 40, cores = 40, - probs = probs, P=25) - -summary(result_parallel, tol = 0.01) - - - -# Using longer more iterations of MJMCMC chains does not change results substantially -set.seed(123) - -result_parallel2 <- fbms(data = df, method = "gmjmcmc.parallel", transforms = transforms, - runs = 40, cores = 10, N=1000, N.final=2000, - probs = probs, P=25) -summary(result_parallel2, tol = 0.01) - -#summary(result_parallel2, pop = "all", tol = 0.01) - - diff --git a/R_script/oldvers/Ex4_Sec4_2.R b/R_script/oldvers/Ex4_Sec4_2.R deleted file mode 100644 index 13f4f8d..0000000 --- a/R_script/oldvers/Ex4_Sec4_2.R +++ /dev/null @@ -1,84 +0,0 @@ -####################################################### -# -# Example 4 (Section 4.2): -# -# Simulated data with interactions, using only fbms -# -# This is the valid version for the JSS Paper -# -####################################################### - -library(mvtnorm) -library(FBMS) - -n <- 100 # sample size -p <- 20 # number of covariates - -# Model: -# X1: Pure Main effect -# X2 : X3: Pure interaction effect -# X4 * X5: Main effects plus interaction effect - - -set.seed(1003) - -x = rmvnorm(n, rep(0, p)) -X <- as.matrix(x) -X <- scale(X)/sqrt(n) - -y <- (1.2 * x[,1] + 1.5 * x[,2]* x[,3] - x[,4] + 1.1*x[,5] - 1.3 * x[,4]*x[,5])+ rnorm(n) -y<-scale(y) - -df <- data.frame(y = y, X) - - -transforms <- c("") -probs <- gen.probs.gmjmcmc(transforms) -probs$gen <- c(1,0,0,1) #Include interactions and mutations - -#################################################### -# -# single thread analysis (two different runs) -# -#################################################### - -set.seed(123) -result <- fbms(data = df, method = "gmjmcmc", transforms = transforms, - probs = probs) -summary(result) - - -set.seed(123) -result2 <- fbms(data = df, method = "gmjmcmc", transforms = transforms, - N = 1000, probs = probs, P=40) -summary(result2, tol = 0.01) - - -#################################################### -# -# multiple thread analysis -# -#################################################### - - -set.seed(123) - - result_parallel <- fbms(data = df, method = "gmjmcmc.parallel", transforms = transforms, - runs = 40, cores = 40, - probs = probs, P=25) - -summary(result_parallel, tol = 0.01) - - - -# Using longer more iterations of MJMCMC chains does not change results substantially -set.seed(123) - -result_parallel2 <- fbms(data = df, method = "gmjmcmc.parallel", transforms = transforms, - runs = 40, cores = 10, N=1000, N.final=2000, - probs = probs, P=25) -summary(result_parallel2, tol = 0.01) - -#summary(result_parallel2, pop = "all", tol = 0.01) - - diff --git a/R_script/oldvers/Ex4_Sec4_3.R b/R_script/oldvers/Ex4_Sec4_3.R deleted file mode 100644 index 2ffdcb7..0000000 --- a/R_script/oldvers/Ex4_Sec4_3.R +++ /dev/null @@ -1,97 +0,0 @@ -####################################################### -# -# Example 4 (Section 4.3): -# -# Fractional Polynomials: Depths is set to 1, using only fbms -# -# This is the valid version for the JSS Paper -# -####################################################### - - -library(FBMS) - - -url <- "https://www.uniklinik-freiburg.de/fileadmin/mediapool/08_institute/biometrie-statistik/Dateien/Studium_und_Lehre/Lehrbuecher/Multivariable_Model-building/ART.zip" -temp_dir <- tempfile() -download.file(url, tf <- tempfile(fileext = ".zip"), mode = "wb") -unzip(tf, exdir = temp_dir) - -df <- read.csv(file.path(temp_dir, "ART/art", "art.csv"))[,c(16,1:3,5:8,10:14)] - -summary(df) - - -#number of observations in the data - -n = dim(df)[1] - -#number of covariates - -p = dim(df)[2] - 1 - - -set.seed(040590) - - -mu = 0.1 + p05(df$x1) + df$x1 + pm05(df$x3) + p0pm05(df$x3) + df$x4a + pm1(df$x5) + p0(df$x6) + df$x8 + df$x10 -df$y = rnorm(n =n, mean = mu,sd = 1) - - -transforms <- c("p0","p2","p3","p05","pm05","pm1","pm2","p0p0","p0p05","p0p1","p0p2","p0p3","p0p05","p0pm05","p0pm1","p0pm2") -probs <- gen.probs.gmjmcmc(transforms) -probs$gen <- c(0,1,0,1) # Only modifications! -params <- gen.params.gmjmcmc(ncol(df) - 1) -params$feat$D <- 1 # Set depth of features to 1 - - -#################################################### -# -# single thread analysis -# -#################################################### - -set.seed(123) -result <- fbms(data = df, method = "gmjmcmc", transforms = transforms, - probs = probs, params = params) -summary(result) - - - -#################################################### -# -# multiple thread analysis -# -#################################################### - -set.seed(101) -result_parallel <- fbms(data = df, method = "gmjmcmc.parallel", transforms = transforms, - probs = probs, params = params, P=25,runs = 40, cores = 40) -summary(result_parallel, tol = 0.05) - -diagn_plot(result_parallel, FUN = median) - - - - -set.seed(102) - result_parallel2 <- fbms(data = df, method = "gmjmcmc.parallel", transforms = transforms, - probs = probs, params = params, P=25, N=1000, N.final=2000, - runs = 40, cores = 40,) - -summary(result_parallel2, tol = 0.05) - -diagn_plot(result_parallel2,FUN = median) - - -########################## - -# Using Jeffreys-BIC prior - -set.seed(103) -result_parallel3 <- fbms(data = df, method = "gmjmcmc.parallel", beta_prior = list(type = "Jeffreys-BIC"), transforms = transforms, - probs = probs, params = params, P=25, N=1000, N.final=2000, - runs = 40, cores = 40,) - -summary(result_parallel3, tol = 0.05) - diff --git a/R_script/oldvers/Ex5_Sec4_3.R b/R_script/oldvers/Ex5_Sec4_3.R deleted file mode 100644 index 36942d3..0000000 --- a/R_script/oldvers/Ex5_Sec4_3.R +++ /dev/null @@ -1,97 +0,0 @@ -####################################################### -# -# Example 5 (Section 4.3): -# -# Fractional Polynomials: Depths is set to 1, using only fbms -# -# This is the valid version for the JSS Paper -# -####################################################### - - -library(FBMS) - -url <- "https://www.uniklinik-freiburg.de/fileadmin/mediapool/08_institute/biometrie-statistik/Dateien/Studium_und_Lehre/Lehrbuecher/Multivariable_Model-building/ART.zip" -temp_dir <- tempfile() -download.file(url, tf <- tempfile(fileext = ".zip"), mode = "wb") -unzip(tf, exdir = temp_dir) - -df <- read.csv(file.path(temp_dir, "ART/art", "art.csv"))[,c(16,1:3,5:8,10:14)] - -summary(df) - - -#number of observations in the data - -n = dim(df)[1] - -#number of covariates - -p = dim(df)[2] - 1 - - -set.seed(040590) - - -mu = 0.1 + p05(df$x1) + df$x1 + pm05(df$x3) + p0pm05(df$x3) + df$x4a + pm1(df$x5) + p0(df$x6) + df$x8 + df$x10 -df$y = rnorm(n =n, mean = mu,sd = 1) - - -transforms <- c("p0","p2","p3","p05","pm05","pm1","pm2","p0p0","p0p05","p0p1","p0p2","p0p3","p0p05","p0pm05","p0pm1","p0pm2") -probs <- gen.probs.gmjmcmc(transforms) -probs$gen <- c(0,1,0,1) # Only modifications! -params <- gen.params.gmjmcmc(ncol(df) - 1) -params$feat$D <- 1 # Set depth of features to 1 - - -#################################################### -# -# single thread analysis -# -#################################################### - -set.seed(123) -result <- fbms(data = df, method = "gmjmcmc", transforms = transforms, - probs = probs, params = params) -summary(result) - - - -#################################################### -# -# multiple thread analysis -# -#################################################### - -set.seed(101) -result_parallel <- fbms(data = df, method = "gmjmcmc.parallel", transforms = transforms, - probs = probs, params = params, P=25,runs = 40, cores = 40) -summary(result_parallel, tol = 0.01) - -diagn_plot(result_parallel, FUN = median) - - - - -set.seed(102) - result_parallel2 <- fbms(data = df, method = "gmjmcmc.parallel", transforms = transforms, - probs = probs, params = params, P=25, N=1000, N.final=2000, - runs = 40, cores = 40,) - -summary(result_parallel2, tol = 0.01) - -diagn_plot(result_parallel2,FUN = median) - - -# Very large number of mjmcmc iterations (not needed for paper) -set.seed(104) - -if (use.fbms) { - result_parallel3 <- fbms(data = df, method = "gmjmcmc.parallel", transforms = transforms, beta_prior = list(type = "Jeffreys-BIC"), - probs = probs, params = params, P=50, runs = 40, cores = 40, N.init=2000, N.final=4000) -} else { - result_parallel3 = gmjmcmc.parallel(runs = 40, cores = 40, x = df[, -1], y = df[, 1], transforms = transforms, mlpost_params = list(family = "gaussian", beta_prior = list(type = "Jeffreys-BIC")), - probs = probs, params = params, P=50, N.init=2000, N.final=4000) -} -#summary(result_parallel3, labels = names(df[-1])) -summary(result_parallel3, labels = names(df[-1]), tol = 0.01) \ No newline at end of file diff --git a/R_script/oldvers/Ex6_Sec4_4.R b/R_script/oldvers/Ex6_Sec4_4.R deleted file mode 100644 index 370691c..0000000 --- a/R_script/oldvers/Ex6_Sec4_4.R +++ /dev/null @@ -1,226 +0,0 @@ -####################################################### -# -# Example 6 (Section 4.4): -# -# Prediction using non-linear Projections, only using fbms function -# -# DATA - abalone data set -# -# Data set is available at https://www.kaggle.com/datasets/rodolfomendes/abalone-dataset -# -# For convenience we provide the file abalone.csv which contains already the names of the variables -# -# -# This is the valid version for the JSS Paper -# -####################################################### - - -library(FBMS) - -data("abalone") - -df = abalone -df$Sex_F_vs_I = as.numeric(df$Sex == "F") -df$Sex_M_vs_I = as.numeric(df$Sex == "M") -df$Sex = as.factor(df$Sex) -df$Rings = as.numeric(df$Rings) - -summary(df) - - -#number of observations in the data - -n = dim(df)[1] - -# Create Training and Test dataset - -# Sam Waugh (1995) "Extending and benchmarking Cascade-Correlation", PhD -# thesis, Computer Science Department, University of Tasmania. - -#-- Test set performance (final 1044 examples, first 3133 used for training): -# 24.86% Cascade-Correlation (no hidden nodes) -# 26.25% Cascade-Correlation (5 hidden nodes) -# 21.5% C4.5 -# 0.0% Linear Discriminate Analysis -# 3.57% k=5 Nearest Neighbour -# (Problem encoded as a classification task) - - -# remove variable sex because gmjmcmc cannot handle factor variables -df.training = df[1:3133,-2] -df.test = df[3134:n,-2] - - -summary(df.training) - - -pred.RMSE = rep(0,5) # To collect the results of prediction RMSE from the five different methods - -pred.RMSE.mpm = rep(0,5) # Same for MPM -pred.RMSE.best = rep(0,5) # Same for posterior modes in the model space - -transforms = c("sigmoid") -probs = gen.probs.gmjmcmc(transforms) -probs$gen = c(0,0,1,1) #Only projections! - - -############################################################################# -# -# Using method 0 for alpha (simply set to 1, default) -# -############################################################################# - -set.seed(5001) -result = fbms(data = df.training, method = "gmjmcmc", transforms = transforms, - probs = probs) -summary(result) - -pred = predict(result, x = df.test[,-1]) -pred.RMSE[1] = sqrt(mean((pred$aggr$mean - df.test$Rings)^2)) - -preds = predict(get.best.model(result), df.test[, -1]) -pred.RMSE.best[1] = sqrt(mean((preds - df.test$Rings)^2)) - -preds = predict(get.mpm.model(result, y = df.training$Rings, x = df.training[, -1]), df.test[, -1]) -pred.RMSE.mpm[1] = sqrt(mean((preds - df.test$Rings)^2)) - - -plot(pred$aggr$mean, df.test$Rings) - - - - - -############################################################################# -# -# Parallel version -# -############################################################################# - -set.seed(5003) -result_parallel = fbms(data = df.training, method = "gmjmcmc.parallel", runs = 40, cores = 40, - transforms = transforms, probs = probs, P=25) -summary(result_parallel, tol = 0.05) - - - -pred_parallel = predict(result_parallel, x = df.test[,-1], link = function(x)(x)) -pred.RMSE[2] = sqrt(mean((pred_parallel$aggr$mean - df.test$Rings)^2)) - -preds = predict(get.best.model(result_parallel), df.test[, -1]) -pred.RMSE.best[2] = sqrt(mean((preds - df.test$Rings)^2)) - -preds = predict(get.mpm.model(result_parallel, y = df.training$Rings, x = df.training[, -1]), df.test[, -1]) -pred.RMSE.mpm[2] = sqrt(mean((preds - df.test$Rings)^2)) - - - -plot(pred_parallel$aggr$mean, df.test$Rings) -abline(0,1) - - - -############################################################################# -# -# Using method 3 to estimate alpha -# -############################################################################# - - -params = gen.params.gmjmcmc(ncol(df.training) - 1) -params$feat$alpha = "deep" - - -set.seed(5003) -result.a3 = fbms(data = df.training, method = "gmjmcmc", transforms = transforms, - probs = probs, params = params) -summary(result.a3) - - - -pred.a3 = predict(result.a3, x = df.test[,-1], link = function(x)(x)) -pred.RMSE[3] = sqrt(mean((pred.a3$aggr$mean - df.test$Rings)^2)) - -plot(pred.a3$aggr$mean, df.test$Rings) - - -preds = predict(get.best.model(result.a3), df.test[, -1]) -pred.RMSE.best[3] = sqrt(mean((preds - df.test$Rings)^2)) - -#not yet applicable to the deep method -#preds = predict(get.mpm.model(result.a3, y = df.training$Rings, x = df.training[, -1]), df.test[, -1]) -pred.RMSE.mpm[3] = NA#sqrt(mean((preds - df.test$Rings)^2)) - - - - -############################################################################# -# -# Parallel version params$feat$alpha = "random" -# -############################################################################# - -params$feat$alpha = "random" - -set.seed(5004) -result_parallel.a3 = fbms(data = df.training, method = "gmjmcmc.parallel", runs = 40, cores = 40, - transforms = transforms, probs = probs, params = params, P=25) -summary(result_parallel.a3, tol = 0.05) - - - -pred_parallel.a3 = predict(result_parallel.a3, x = df.test[,-1], link = function(x)(x)) -pred.RMSE[4] = sqrt(mean((pred_parallel.a3$aggr$mean - df.test$Rings)^2)) - -preds = predict(get.best.model(result_parallel.a3), df.test[, -1]) -pred.RMSE.best[4] = sqrt(mean((preds - df.test$Rings)^2)) - -#preds = predict(get.mpm.model(result_parallel, y = df.training$Rings, x = df.training[, -1]), df.test[, -1]) -pred.RMSE.mpm[4] = NA #sqrt(mean((preds - df.test$Rings)^2)) - - -plot(pred_parallel.a3$aggr$mean, df.test$Rings) -abline(0,1) - - - -############################################################################# -# -# Parallel version with fractional polynomials -# -############################################################################# - -transforms = c("p0","p2","p3","p05","pm05","pm1","pm2","p0p0","p0p05","p0p1","p0p2","p0p3","p0p05","p0pm05","p0pm1","p0pm2") -probs = gen.probs.gmjmcmc(transforms) -probs$gen = c(0,1,0,1) #Only modifications! - -set.seed(50005) -result.fp = fbms(data = df.training, method = "gmjmcmc.parallel", runs = 40, cores = 40, - transforms = transforms, probs = probs, P=25) -summary(result.fp) - - -pred_fp = predict(result.fp, x = df.test[,-1], link = function(x)(x)) -pred.RMSE[5] = sqrt(mean((pred_fp$aggr$mean - df.test$Rings)^2)) - -plot(pred_fp$aggr$mean, df.test$Rings) - -preds = predict(get.best.model(result.fp), df.test[, -1]) -pred.RMSE.best[5] = sqrt(mean((preds - df.test$Rings)^2)) - -preds = predict(get.mpm.model(result.fp, y = df.training$Rings, x = df.training[, -1]), df.test[, -1]) -pred.RMSE.mpm[5] = sqrt(mean((preds - df.test$Rings)^2)) - - - -############################################################################# -# -# Summary of predictions -# -############################################################################# - -round(pred.RMSE,3) -round(pred.RMSE.best,3) -round(pred.RMSE.mpm,3) - diff --git a/R_script/oldvers/Ex6_Sec5_1.R b/R_script/oldvers/Ex6_Sec5_1.R deleted file mode 100644 index 1781f63..0000000 --- a/R_script/oldvers/Ex6_Sec5_1.R +++ /dev/null @@ -1,118 +0,0 @@ -####################################################### -# -# Example 6 (Section 5.1): Sanger data again -# -# High dimensional analysis without nonlinearities, using only FBMS -# -# This is the valid version for the JSS Paper -# -####################################################### - -library(FBMS) -library(xtable) -library(tictoc) -run.parallel <- TRUE # Flag to control whether to run gmjmcmc in parallel or just load results - - -data(SangerData2) -df <- SangerData2 -# Rename columns for clarity: response is "y", predictors "x1", "x2", ..., "xp" -colnames(df) = c("y",paste0("x",1:(ncol(df)-1))) -n = dim(df)[1]; p=dim(df)[2]-1 - -#Use only linear terms and mutations -transforms = c("") -probs = gen.probs.gmjmcmc(transforms) -probs$gen = c(0,0,0,1) - - -# Select candidate features for the first MJMCMC round by correlation with response -c.vec = unlist(mclapply(2:ncol(df), function(x)abs(cor(df[,1],df[,x])))) -ids = sort(order(c.vec,decreasing=TRUE)[1:50]) - -# Generate default parameters for GMJMCMC for p-1 predictors -params = gen.params.gmjmcmc(p) -# Restrict feature pre-filtering to top 50 predictors selected by correlation -params$feat$prel.filter <- ids - -params$feat$pop.max <- 50 # Maximum population size for the GMJMCMC search - -#################################################### -# -# Three independent runs of gmjmcmc.parallel -# -#################################################### - - -if (run.parallel) { - set.seed(123) - result_parallel1 = fbms(data=df, transforms=transforms, - beta_prior = list(type="g-prior", g=max(n,p^2)), - probs=probs,params=params, - method="gmjmcmc.parallel", - P=50,N=1000,runs=10,cores=10) - save(result_parallel1,file="Ex6_parallel1_orig.RData") - - set.seed(1234) - result_parallel2=fbms(data=df, transforms=transforms, - beta_prior = list(type="g-prior", g=max(n,p^2)), - probs=probs,params=params, - method="gmjmcmc.parallel", - P=50,N=1000,runs=10,cores=10) - save(result_parallel2,file="Ex6_parallel2_orig.RData") - - set.seed(123456) - result_parallel3=fbms(data=df, transforms=transforms, - beta_prior = list(type="g-prior", g=max(n,p^2)), - probs=probs,params=params, - method="gmjmcmc.parallel", - P=50,N=1000,runs=10,cores=10) - save(result_parallel3,file="Ex6_parallel3_orig.RData") - -} else { - - # If not running gmjmcmc.parallel again, load previously saved results - load("Ex6_parallel1.RData") - load("Ex6_parallel2.RData") - load("Ex6_parallel3.RData") - -} - - -# Summarize results from each of the three parallel runs with tolerance of 0.01 - -res1 = summary(result_parallel1,tol=0.01) -res1$marg.probs = round(res1$marg.probs,3) -res2 = summary(result_parallel2,tol=0.01) -res2$marg.probs = round(res2$marg.probs,3) -res3 = summary(result_parallel3,tol=0.01) -res3$marg.probs = round(res3$marg.probs,3) - -# Combine unique feature names found in all three runs -names.best = unique(c(res1$feats.strings,res2$feats.strings,res3$feats.strings)) - -# Find maximum number of rows across summaries to equalize sizes for cbind -m = max(nrow(res1),nrow(res2),nrow(res3)) -# Pad shorter summaries with empty rows to make them all length m -while(nrow(res1) 1) { - x.model = x[,model] - data <- data.frame(y, x = x.model[,-1], dr = mlpost_params$dr) - - mm <- lmer(as.formula(paste0("y ~ 1 +",paste0(names(data)[2:(dim(data)[2]-1)],collapse = "+"), "+ (1 | dr)")), data = data, REML = FALSE) - } else{ #model without fixed effects - data <- data.frame(y, dr = mlpost_params$dr) - mm <- lmer(as.formula(paste0("y ~ 1 + (1 | dr)")), data = data, REML = FALSE) - } - - mloglik <- as.numeric(logLik(mm)) - 0.5*log(length(y)) * (dim(data)[2] - 2) #Laplace approximation for beta prior - - # logarithm of model prior - if (length(mlpost_params$r) == 0) mlpost_params$r <- 1/dim(x)[1] # default value or parameter r - lp <- log_prior(mlpost_params, complex) - - - return(list(crit = mloglik + lp, coefs = fixef(mm))) -} - - -# function to estimate log posterior with INLA - -mixed.model.loglik.inla <- function (y, x, model, complex, mlpost_params) -{ - if(sum(model)>1) - { - data1 = data.frame(y, as.matrix(x[,model]), mlpost_params$dr) - formula1 = as.formula(paste0(names(data1)[1],"~",paste0(names(data1)[3:(dim(data1)[2]-1)],collapse = "+"),"+ f(mlpost_params.dr,model = \"iid\")")) - } else - { - data1 = data.frame(y, mlpost_params$dr) - formula1 = as.formula(paste0(names(data1)[1],"~","1 + f(mlpost_params.dr,model = \"iid\")")) - } - - #to make sure inla is not stuck - inla.setOption(inla.timeout=30) - inla.setOption(num.threads=mlpost_params$INLA.num.threads) - - mod<-NULL - #importance with error handling for unstable libraries that one does not trust 100% - tryCatch({ - mod <- inla(family = "gaussian",silent = 1L,safe = F, data = data1,formula = formula1) - }, error = function(e) { - - # Handle the error by setting result to NULL - mod <- NULL - - # You can also print a message or log the error if needed - cat("An error occurred:", conditionMessage(e), "\n") - }) - - # logarithm of model prior - if (length(mlpost_params$r) == 0) mlpost_params$r <- 1/dim(x)[1] # default value or parameter r - lp <- log_prior(mlpost_params, complex) - - if(length(mod)<3||length(mod$mlik[1])==0) { - return(list(crit = -10000 + lp,coefs = rep(0,dim(data1)[2]-2))) - } else { - mloglik <- mod$mlik[1] - return(list(crit = mloglik + lp, coefs = mod$summary.fixed$mode)) - } -} - - -# function to estimate log posterior with RTMB - -mixed.model.loglik.rtmb <- function (y, x, model, complex, mlpost_params) -{ - z = model.matrix(y~mlpost_params$dr) #Design matrix for random effect - - msize = sum(model) - #Set up and estimate model - dat = list(y = y, xm = x[,model], z = z) - par = list(logsd_eps = 0, - logsd_dr = 0, - beta = rep(0,msize), - u = rep(0,mlpost_params$nr_dr)) - - nll = function(par){ - getAll(par,dat) - sd_eps = exp(logsd_eps) - sd_dr = exp(logsd_dr) - - nll = 0 - #-log likelihood random effect - nll = nll - sum(dnorm(u, 0, sd_dr, log = TRUE)) - mu = as.vector(as.matrix(xm)%*%beta) + z%*%u - nll <- nll - sum(dnorm(y, mu, sd_eps, log = TRUE)) - - return(nll) - } - obj <- MakeADFun(nll , par, random = "u", silent = T ) - opt <- nlminb ( obj$par , obj$fn , obj$gr, control = list(iter.max = 10)) - - # logarithm of model prior - if (length(mlpost_params$r) == 0) mlpost_params$r <- 1/dim(x)[1] # default value or parameter r - lp <- log_prior(mlpost_params, complex) - - mloglik <- -opt$objective - 0.5*log(dim(x)[1])*msize - return(list(crit = mloglik + lp, coefs = opt$par[-(1:2)])) -} - - - -###################### -# -# Compare runtime -# - -set.seed(03052024) - -tic() -result1a <- fbms(formula = z ~ 1+., data = df, transforms = transforms, - method = "gmjmcmc",probs = probs, params = params, P=3, N = 30, - family = "custom", loglik.pi = mixed.model.loglik.lme4, - model_prior = list(r = 1/dim(df)[1]), - extra_params = list(dr = droplevels(Zambia$dr))) -time.lme4 = toc() - - -tic() -result1b <- fbms(formula = z ~ 1+., data = df, transforms = transforms, - method = "gmjmcmc",probs = probs, params = params, P=3, N = 30, - family = "custom", loglik.pi = mixed.model.loglik.inla, - model_prior = list(r = 1/dim(df)[1]), - extra_params = list(dr = droplevels(Zambia$dr), - INLA.num.threads = 10)) -time.inla = toc() - -tic() -result1c <- fbms(formula = z ~ 1+., data = df, transforms = transforms, - method = "gmjmcmc",probs = probs, params = params, P=3, N = 30, - family = "custom", loglik.pi = mixed.model.loglik.rtmb, - model_prior = list(r = 1/dim(df)[1]), - extra_params = list(dr = droplevels(Zambia$dr), - nr_dr = sum((table(Zambia$dr))>0))) -time.rtmb = toc() -plot(result1c) -summary(result1c, labels = names(df)[-1]) - -c(time.lme4$callback_msg, time.inla$callback_msg, time.rtmb$callback_msg) - -###################### -# -# Analysis with lme4 -# -# - - -# Only this one is actually reported in the manuscript -set.seed(20062024) -params$feat$pop.max = 10 - -result2a <- fbms(formula = z ~ 1+., data = df, transforms = transforms, - probs = probs, params = params, P=25, N = 100, - method = "gmjmcmc.parallel", runs = 40, cores = 40, - family = "custom", loglik.pi = mixed.model.loglik.lme4, - model_prior = list(r = 1/dim(df)[1]), - extra_params = list(dr = droplevels(Zambia$dr))) - -summary(result2a,tol = 0.05,labels=names(df)[-1]) - - -set.seed(21062024) - -result2b <- fbms(formula = z ~ 1+., data = df, transforms = transforms, - probs = probs, params = params, P=25, N = 100, - method = "gmjmcmc.parallel", runs = 120, cores = 40, - family = "custom", loglik.pi = mixed.model.loglik.lme4, - model_prior = list(r = 1/dim(df)[1]), - extra_params = list(dr = droplevels(Zambia$dr))) - -summary(result2b, labels = names(df)[-1]) - -summary(result2b, labels = names(df)[-1], pop = "all") -summary(result2b, labels = names(df)[-1], pop = "last") - -plot(result2b) - - -set.seed(03072024) - -result2c <- fbms(formula = z ~ 1+., data = df, transforms = transforms, - probs = probs, params = params, P=25, N = 100, - method = "gmjmcmc.parallel", runs = 200, cores = 40, - family = "custom", loglik.pi = mixed.model.loglik.lme4, - model_prior = list(r = 1/dim(df)[1]), - extra_params = list(dr = droplevels(Zambia$dr))) - -summary(result2c, labels = names(df)[-1]) -summary(result2c, labels = names(df)[-1], pop = "last") -summary(result2c, labels = names(df)[-1], pop = "all") -summary(result2c, labels = names(df)[-1], pop = "best") - - -summary(result2a, labels = names(df)[-1]) -summary(result2b, labels = names(df)[-1]) -summary(result2c, labels = names(df)[-1]) - - -###################### -# -# Analysis with INLA (Not used for manuscript, very long runtime) -# -# - -set.seed(22052024) - -# Number of threads used by INLA set to 1 -result2aI <- fbms(formula = z ~ 1+., data = df, transforms = transforms, - probs = probs, params = params, P=25, N = 100, - method = "gmjmcmc.parallel", runs = 40, cores = 40, - family = "custom", loglik.pi = mixed.model.loglik.inla, - model_prior = list(r = 1/dim(df)[1]), - extra_params = list(dr = droplevels(Zambia$dr), INLA.num.threads = 1)) - -plot(result2aI) -summary(result2aI, labels = names(df)[-1]) - - - -params$feat$check.col = F - -set.seed(20062024) -# Number of threads used by INLA set to 1 -result2bI <- fbms(formula = z ~ 1+., data = df, transforms = transforms, - probs = probs, params = params, P=25, N = 100, - method = "gmjmcmc.parallel", runs = 120, cores = 40, - family = "custom", loglik.pi = mixed.model.loglik.inla, - model_prior = list(r = 1/dim(df)[1]), - extra_params = list(dr = droplevels(Zambia$dr), INLA.num.threads = 1)) - -plot(result2bI) -summary(result2bI, labels = names(df)[-1]) \ No newline at end of file diff --git a/R_script/oldvers/FBMS-guide.R b/R_script/oldvers/FBMS-guide.R deleted file mode 100644 index 435c2da..0000000 --- a/R_script/oldvers/FBMS-guide.R +++ /dev/null @@ -1,354 +0,0 @@ -ß## ----include=FALSE------------------------------------------------------------ -knitr::opts_chunk$set( - message = TRUE, # show package startup and other messages - warning = FALSE, # suppress warnings - echo = TRUE, # show code - results = "hide" # hide default printed results unless printed via printn() -) - -# For careful printing of only what I explicitly ask for -printn <- function(x) { - # Capture the *exact* console print output as a character vector - txt <- capture.output(print(x)) - # Combine lines with newline, send as a message to be shown in output - message(paste(txt, collapse = "\n")) -} - -library(FBMS) - -## ----eval=FALSE, include=FALSE------------------------------------------------ -# library(FBMS) - -## ----------------------------------------------------------------------------- - -# Parameters for parallel runs are set to a single thread and single core to comply with CRAN requirenments (please tune for your machine if you have more capacity) -runs <- 1 # 1 set for simplicity; use rather 16 or more -cores <- 1 # 1 set for simplicity; use rather 8 or more - -## ----------------------------------------------------------------------------- -# Load example -data <- FBMS::exoplanet - -# Choose a small but expressive transform set for a quick demo -transforms <- c("sigmoid", "sin_deg", "exp_dbl", "p0", "troot", "p3") - -# ---- fbms() call (simple GMJMCMC) ---- -# Key parameters (explicit): -# - formula : semimajoraxis ~ 1 + . # response and all predictors -# - data : data # dataset -# - beta_prior : list(type = "g-prior") # parameter prior -# - model_prior : list(r = 1/dim(data)[1]) # model prior -# - method : "gmjmcmc" # exploration strategy -# - transforms : transforms # nonlinear feature dictionary -# - P : population size per generation (search breadth) -result_single <- fbms( - formula = semimajoraxis ~ 1 + ., - data = data, - beta_prior = list(type = "g-prior", alpha = dim(data)[1]), - model_prior = list(r = 1/dim(data)[1]), - method = "gmjmcmc", - transforms = transforms, - P = 20 -) - -# Summarize -printn(summary(result_single)) - -## ----------------------------------------------------------------------------- - -# ---- fbms() call (parallel GMJMCMC) ---- -# Key parameters (explicit): -# - formula : semimajoraxis ~ 1 + . # response and all predictors -# - data : data # dataset -# - beta_prior : list(type = "g-prior") # parameter prior -# - model_prior : list(r = 1/dim(data)[1]) # model prior -# - method : "gmjmcmc" # exploration strategy -# - transforms : transforms # nonlinear feature dictionary -# - runs, cores : parallelization controls -# - P : population size per generation (search breadth) -result_parallel <- fbms( - formula = semimajoraxis ~ 1 + ., - data = data, - beta_prior = list(type = "g-prior", alpha = dim(data)[1]), - model_prior = list(r = 1/dim(data)[1]), - method = "gmjmcmc.parallel", - transforms = transforms, - runs = runs*10, # by default the rmd has runs = 1; increase for convergence - cores = cores, # by default the rmd has cores = 1; increase for convergence - P = 20 -) - -# Summarize -printn(summary(result_parallel)) - -## ----------------------------------------------------------------------------- -plot(result_parallel) - -## ----------------------------------------------------------------------------- -diagn_plot(result_parallel) - -## ----------------------------------------------------------------------------- -library(mvtnorm) - -n <- 100 # sample size -p <- 20 # number of covariates -k <- 5 # size of true submodel - -correct.model <- 1:k -beta.k <- (1:5)/5 - -beta <- rep(0, p) -beta[correct.model] <- beta.k - -set.seed(123) -x <- rmvnorm(n, rep(0, p)) -y <- x %*% beta + rnorm(n) - -# Standardize -y <- scale(y) -X <- scale(x) / sqrt(n) - -df <- as.data.frame(cbind(y, X)) -colnames(df) <- c("Y", paste0("X", seq_len(ncol(df) - 1))) - -printn(correct.model) -printn(beta.k) - -## ----------------------------------------------------------------------------- -# ---- fbms() call (MJMCMC) ---- -# Explicit prior choice: -# beta_prior = list(type = "g-prior", alpha = 100) -# To switch to another prior, e.g. robust: -# beta_prior = list(type = "robust") -result.lin <- fbms( - formula = Y ~ 1 + ., - data = df, - method = "mjmcmc", - N = 5000, # number of iterations - beta_prior = list(type = "g-prior", alpha = 100) -) - -## ----------------------------------------------------------------------------- -plot(result.lin) - -## ----------------------------------------------------------------------------- -# 'effects' specifies quantiles for posterior modes of effects across models -printn(summary(result.lin, effects = c(0.5, 0.025, 0.975))) - -## ----------------------------------------------------------------------------- -# ---- fbms() call (parallel MJMCMC) ---- -# Explicit prior choice: -# beta_prior = list(type = "g-prior", alpha = 100) -# To switch to another prior, e.g. robust: -# beta_prior = list(type = "robust") -# method = mjmcmc.parallel -# runs, cores : parallelization controls -result.lin.par <- fbms( - formula = Y ~ 1 + ., - data = df, - method = "mjmcmc.parallel", - N = 5000, # number of iterations - beta_prior = list(type = "g-prior", alpha = 100), - runs = runs, - cores = cores -) -printn(summary(result.lin.par, effects = c(0.5, 0.025, 0.975))) - -## ----------------------------------------------------------------------------- -# Create FP-style response with known structure, covariates are from previous example -df$Y <- p05(df$X1) + df$X1 + pm05(df$X3) + p0pm05(df$X3) + df$X4 + - pm1(df$X5) + p0(df$X6) + df$X8 + df$X10 + rnorm(nrow(df)) - -# Allow common FP transforms -transforms <- c( - "p0", "p2", "p3", "p05", "pm05", "pm1", "pm2", "p0p0", - "p0p05", "p0p1", "p0p2", "p0p3", "p0p05", "p0pm05", "p0pm1", "p0pm2" -) - -# Generation probabilities — here only modifications and mutations -probs <- gen.probs.gmjmcmc(transforms) -probs$gen <- c(0, 1, 0, 1) - -# Feature-generation parameters -params <- gen.params.gmjmcmc(ncol(df) - 1) -params$feat$D <- 1 # max depth 1 features - -## ----------------------------------------------------------------------------- -result <- fbms( - formula = Y ~ 1 + ., - data = df, - method = "gmjmcmc", - transforms = transforms, - beta_prior = list(type = "Jeffreys-BIC"), - probs = probs, - params = params, - P = 25 -) - -printn(summary(result)) - -## ----------------------------------------------------------------------------- -result_parallel <- fbms( - formula = Y ~ 1 + ., - data = df, - method = "gmjmcmc.parallel", - transforms = transforms, - beta_prior = list(type = "Jeffreys-BIC"), - probs = probs, - params = params, - P = 25, - runs = runs, - cores = cores -) - -printn(summary(result_parallel)) - -## ----------------------------------------------------------------------------- -# Custom approximate log marginal likelihood for mixed model using Laplace approximation -mixed.model.loglik.lme4 <- function (y, x, model, complex, mlpost_params) { - if (sum(model) > 1) { - x.model <- x[, model] - data <- data.frame(y, x = x.model[, -1], dr = mlpost_params$dr) - mm <- lmer(as.formula(paste0("y ~ 1 +", - paste0(names(data)[2:(ncol(data)-1)], collapse = "+"), - " + (1 | dr)")), data = data, REML = FALSE) - } else { - data <- data.frame(y, dr = mlpost_params$dr) - mm <- lmer(y ~ 1 + (1 | dr), data = data, REML = FALSE) - } - # log marginal likelihood (Laplace approx) + log model prior - mloglik <- as.numeric(logLik(mm)) - 0.5 * log(length(y)) * (ncol(data) - 2) - if (length(mlpost_params$r) == 0) mlpost_params$r <- 1 / nrow(x) - lp <- log_prior(mlpost_params, complex) - list(crit = mloglik + lp, coefs = fixef(mm)) -} - -## ----------------------------------------------------------------------------- -library(lme4) -data(Zambia, package = "cAIC4") - -df <- as.data.frame(sapply(Zambia[1:5], scale)) - -transforms <- c("p0","p2","p3","p05","pm05","pm1","pm2", - "p0p0","p0p05","p0p1","p0p2","p0p3", - "p0p05","p0pm05","p0pm1","p0pm2") - -probs <- gen.probs.gmjmcmc(transforms) -probs$gen <- c(1, 1, 0, 1) # include modifications and interactions - -params <- gen.params.gmjmcmc(ncol(df) - 1) -params$feat$D <- 1 -params$feat$pop.max <- 10 - -result2a <- fbms( - formula = z ~ 1 + ., - data = df, - method = "gmjmcmc.parallel", - transforms = transforms, - probs = probs, - params = params, - P = 25, - N = 100, - runs = runs, - cores = cores, - family = "custom", - loglik.pi = mixed.model.loglik.lme4, - model_prior = list(r = 1 / nrow(df)), # model_prior is passed to mlpost_params - extra_params = list(dr = droplevels(Zambia$dr)) # extra_params are passed to mlpost_params -) - -printn(summary(result2a, tol = 0.05, labels = names(df)[-1])) - -## ----------------------------------------------------------------------------- -n <- 2000 -p <- 50 - -set.seed(1) -X2 <- as.data.frame(matrix(rbinom(n * p, size = 1, prob = runif(n * p, 0, 1)), n, p)) -y2.Mean <- 1 + 7*(X2$V4*X2$V17*X2$V30*X2$V10) + 9*(X2$V7*X2$V20*X2$V12) + - 3.5*(X2$V9*X2$V2) + 1.5*(X2$V37) - -Y2 <- rnorm(n, mean = y2.Mean, sd = 1) -df <- data.frame(Y2, X2) - -# Train/test split -df.training <- df[1:(n/2), ] -df.test <- df[(n/2 + 1):n, ] -df.test$Mean <- y2.Mean[(n/2 + 1):n] - -## ----------------------------------------------------------------------------- -estimate.logic.lm <- function(y, x, model, complex, mlpost_params) { - suppressWarnings({ - mod <- fastglm(as.matrix(x[, model]), y, family = gaussian()) - }) - mloglik <- -(mod$aic + (log(length(y)) - 2) * (mod$rank)) / 2 - wj <- complex$width - lp <- sum(log(factorial(wj))) - sum(wj * log(4 * mlpost_params$p) - log(4)) - logpost <- mloglik + lp - if (logpost == -Inf) logpost <- -10000 - list(crit = logpost, coefs = mod$coefficients) -} - -## ----------------------------------------------------------------------------- -set.seed(5001) - -# Only "not" operator; "or" is implied by De Morgan via "and" + "not" -transforms <- c("not") -probs <- gen.probs.gmjmcmc(transforms) -probs$gen <- c(1, 1, 0, 1) # no projections - -params <- gen.params.gmjmcmc(p) -params$feat$pop.max <- 50 -params$feat$L <- 15 - -result <- fbms( - formula = Y2 ~ 1 + ., - data = df.training, - method = "gmjmcmc", - transforms = transforms, - N = 500, - P = 25, - family = "custom", - loglik.pi = estimate.logic.lm, - probs = probs, - params = params, - model_prior = list(p = p) -) - -printn(summary(result)) - -# Extract models -mpm <- get.mpm.model(result, y = df.training$Y2, x = df.training[,-1], - family = "custom", loglik.pi = estimate.logic.lm, params = list(p = 50)) -printn(mpm$coefs) - -mpm2 <- get.mpm.model(result, y = df.training$Y2, x = df.training[,-1]) -printn(mpm2$coefs) - -mbest <- get.best.model(result) -printn(mbest$coefs) - -## ----------------------------------------------------------------------------- -# Correct link is identity for Gaussian -pred <- predict(result, x = df.test[,-1], link = function(x) x) -pred_mpm <- predict(mpm, x = df.test[,-1], link = function(x) x) -pred_best <- predict(mbest, x = df.test[,-1], link = function(x) x) - -# RMSEs -printn(sqrt(mean((pred$aggr$mean - df.test$Y2)^2))) -printn(sqrt(mean((pred_mpm - df.test$Y2)^2))) -printn(sqrt(mean((pred_best - df.test$Y2)^2))) -printn(sqrt(mean((df.test$Mean - df.test$Y2)^2))) - -# Errors to the true mean (oracle) -printn(sqrt(mean((pred$aggr$mean - df.test$Mean)^2))) -printn(sqrt(mean((pred_best - df.test$Mean)^2))) -printn(sqrt(mean((pred_mpm - df.test$Mean)^2))) - -# Quick diagnostic plot -plot(pred$aggr$mean, df.test$Y2, - xlab = "Predicted (BMA)", ylab = "Observed") -points(pred$aggr$mean, df.test$Mean, col = 2) -points(pred_best, df.test$Mean, col = 3) -points(pred_mpm, df.test$Mean, col = 4) - diff --git a/R_script/oldvers/bacteremia.R b/R_script/oldvers/bacteremia.R deleted file mode 100644 index 3aef69b..0000000 --- a/R_script/oldvers/bacteremia.R +++ /dev/null @@ -1,155 +0,0 @@ -#devtools::install_github("jonlachmann/GMJMCMC@FBMS", force=T, build_vignettes=F) - -library(Metrics)# For C-index -library(FBMS) -set.seed(123) - -# Function to calculate C-index manually for binary classification -cindex_manual <- function(predictions, labels) { - # Ensure predictions and labels are numeric - n <- length(labels) - - # Initialize counters for concordant, discordant, and ties - concordant <- 0 - discordant <- 0 - ties <- 0 - - # Loop through all possible pairs - for (i in 1:(n-1)) { - for (j in (i+1):n) { - if (labels[i] != labels[j]) { # Only consider pairs with different labels - if (predictions[i] == predictions[j]) { - ties <- ties + 1 - } else if ((predictions[i] > predictions[j] && labels[i] > labels[j]) || - (predictions[i] < predictions[j] && labels[i] < labels[j])) { - concordant <- concordant + 1 - } else { - discordant <- discordant + 1 - } - } - } - } - - # Calculate the C-index - total_pairs <- concordant + discordant + ties - c_index <- (concordant + 0.5 * ties) / total_pairs - return(c_index) -} - - -df = read.csv("https://zenodo.org/records/7554815/files/Bacteremia_public_S2.csv", - header = TRUE, sep = ",", dec = ".") - -df = df[,!(names(df) %in% c("MONOR", "LYMR", "NEUR", "EOSR", "BASOR", "WBC", "MCV", "HCT"))] -df$BloodCulture = ifelse(df$BloodCulture == "yes", 1, 0) - -#df = na.omit(df) -trid = sample.int(dim(df)[1],round(dim(df)[1]*2/3)) - -df.train = (df[trid,]) -df.test = (df[-trid,]) - - -# Number of bootstrap iterations -n_bootstrap <- 100 - -# Store results for each bootstrap iteration -accuracy_oob <- numeric(n_bootstrap) -accuracy_boot <- numeric(n_bootstrap) -cindex_oob <- numeric(n_bootstrap) -cindex_boot <- numeric(n_bootstrap) - -# Full model performance -result.nonlinear = fbms(formula = BloodCulture ~ 1 + ., family = "binomial", data = df.train,beta_prior = list(type = "Jeffreys-BIC"), impute = T, method = "gmjmcmc.parallel",P = 10,cores = 6, runs = 6, transforms = c("sigmoid","sin","cos","exp_dbl")) -summary(result.nonlinear) - -# Full model performance -result = fbms(formula = BloodCulture ~ 1 + ., family = "binomial", data = df.train,beta_prior = list(type = "Jeffreys-BIC"), impute = T, method = "mjmcmc") -summary(result) -preds = predict(result.nonlinear, df.test[,-45]) -prob_full <- sigmoid(preds$aggr$mean) - -# Accuracy on full model -accuracy_full <- mean((prob_full > 0.5) == df.test$BloodCulture) - -# AUC on full model -auc_full <- auc(df.test$BloodCulture, prob_full) - -# C-index on full model -cindex_full <- cindex_manual(prob_full, df.test$BloodCulture) - -# Theoretical performance (no effect) for AUC and C-index -p_M0_auc <- 0.5 # For AUC -p_M0_cindex <- 0.5 # For C-index -p_M0_accuracy <- 0.5 # For accuracy (random guessing) - -# Bootstrap procedure -for (i in 1:n_bootstrap) { - - # Bootstrap resample from training data - boot_indices <- sample(1:nrow(df.train), replace = TRUE) - df_boot <- df.train[boot_indices, ] - - # Fit model on bootstrap sample - result_boot <- fbms(formula = BloodCulture ~ 1 + ., family = "binomial", data = df_boot, impute = TRUE, method = "mjmcmc") - - # Predictions on bootstrap sample (in-sample performance) - preds_boot <- predict(result_boot, df_boot[,-45]) - prob_boot <- sigmoid(preds_boot$mean) - - # Predictions on the original data (out-of-bag performance) - preds_oob <- predict(result_boot, df.test[,-45]) - prob_oob <- sigmoid(preds_oob$mean) - - # Accuracy - accuracy_boot[i] <- mean((prob_boot > 0.5) == df_boot$BloodCulture) - accuracy_oob[i] <- mean((prob_oob > 0.5) == df.test$BloodCulture) - - # AUC - auc_boot[i] <- auc(df_boot$BloodCulture, prob_boot) - auc_oob[i] <- auc(df.test$BloodCulture, prob_oob) - - # C-index - cindex_boot[i] <- cindex_manual(prob_boot, df_boot$BloodCulture) - cindex_oob[i] <- cindex_manual(prob_oob, df.test$BloodCulture) -} - -# Calculate overfitting rate R for accuracy -R_accuracy <- mean(accuracy_boot - accuracy_oob) / (p_M0_accuracy - accuracy_full) - -# Calculate overfitting rate R for AUC -R_auc <- mean(auc_boot - auc_oob) / (p_M0_auc - auc_full) - -# Calculate overfitting rate R for C-index -R_cindex <- mean(cindex_boot - cindex_oob) / (p_M0_cindex - cindex_full) - -# .632+ weight for accuracy -w_accuracy <- 0.632 / (1 - 0.368 * R_accuracy) - -# .632+ weight for AUC -w_auc <- 0.632 / (1 - 0.368 * R_auc) - -# .632+ weight for C-index -w_cindex <- 0.632 / (1 - 0.368 * R_cindex) -# .632+ estimate -cindex_632plus <- (1 - w_cindex) * cindex_full + w_cindex * mean(cindex_boot) -accuracy_632plus <- (1 - w_accuracy) * accuracy_full + w_accuracy * mean(accuracy_boot) -auc_632plus <- (1 - w_auc) * auc_full + w_auc * mean(auc_boot) - -# Confidence intervals (95% CI using bootstrap percentiles) -cindex_ci <- quantile(cindex_boot, probs = c(0.025, 0.975)) -accuracy_ci <- quantile(accuracy_boot, probs = c(0.025, 0.975)) -auc_ci <- quantile(auc_boot, probs = c(0.025, 0.975)) - -# Print results -cat("Full model C-index:", cindex_full, "\n") -cat(".632+ C-index estimate:", cindex_632plus, "\n") -cat("C-index 95% CI from bootstrap:", cindex_ci, "\n\n") - -cat("Full model Accuracy:", accuracy_full, "\n") -cat(".632+ Accuracy estimate:", accuracy_632plus, "\n") -cat("Accuracy 95% CI from bootstrap:", accuracy_ci, "\n\n") - -cat("Full model AUC:", auc_full, "\n") -cat(".632+ AUC estimate:", auc_632plus, "\n") -cat("AUC 95% CI from bootstrap:", auc_ci, "\n") \ No newline at end of file diff --git a/R_script/oldvers/fix impute jon.R b/R_script/oldvers/fix impute jon.R deleted file mode 100644 index abad342..0000000 --- a/R_script/oldvers/fix impute jon.R +++ /dev/null @@ -1,144 +0,0 @@ -library(mgcv) -library(foreign) -#install.packages("XLConnect") -#library(XLConnect) -library(readxl) -# install.packages("e1071") -library(e1071) - -#install.packages("knitr") -library(knitr) -library(foreign) -library(readxl) -## tableone package itself -#install.packages("tableone") -#library(tableone) -## plotting -library(ggplot2) -#install.packages("pROC") -library(pROC) -#install.packages("memisc") -#library(memisc) -library(sjlabelled) - -#library(gmodels) # Für CrossTable -library(plyr) -library(dplyr) -#install.packages("devtools") -library(devtools) -library(survival) -## Load rms package -library(rms) - -#library(DescTools) - -#install.packages("Hmisc") -library(Hmisc) -#install.packages("Gmisc") -#library(Gmisc) -library(corrplot) - -library(tidyr) - -library(lubridate) -library(mice) -#library("VIM") - -library(glmnet) - - -Excel.File = "/Users/aliaksandrhome/Rprojects/FBMS impute Bones data/Data4.xlsx" - -#df <- read.csv2("C:/CC/Projekte/P24110_Kanz/Data3.csv") -#df <- as.data.frame(read_excel(Excel.File)) - - -df <- as.data.frame(read_excel(Excel.File, col_types = c( - "text", "numeric", "numeric", "numeric", - "numeric", "numeric", "numeric", "numeric", - "numeric", "numeric", "numeric", "numeric", - "numeric", "numeric", "numeric", "numeric", - "numeric", "numeric", "numeric", "numeric", - "numeric", "numeric", "numeric", "numeric", - "numeric", "numeric", "numeric", "numeric", - "numeric", "numeric", "numeric", "numeric", - "numeric", "numeric", "numeric", "numeric", - "numeric", "numeric", "numeric", "numeric", - "numeric", "numeric", "numeric", "numeric", - "numeric", "numeric", "numeric", "numeric", - "numeric", "numeric", "numeric", "numeric", - "numeric", "numeric", "numeric", "numeric", - "numeric", "numeric", "numeric", "numeric", - "numeric", "numeric", "numeric", "numeric", - "numeric", "numeric", "numeric", "numeric", - "numeric", "numeric", "numeric"))) -str(df) - -#df$Bef_Nr = as.factor(df$Bef_Nr) -df$TestData = is.na(df$Sex_Molekular) - - - -df.Training = df[df$TestData == F,] -df.Training = df.Training %>% dplyr::select(- C14_mean) - -df.Training = df.Training %>% dplyr::select(- c(Clavicula_4, Clavicula_5,Femur_18, Tibia_71, Tibia_74,Fibula_1, Fibula_4a )) -m = dim(df.Training)[2] - -x.indx = 7:(m-1) - -MV = rowSums(!is.na(df.Training[,x.indx[-1]])) - -df.Training = df.Training[MV>1,] -n = dim(df.Training)[1] - - -devtools::load_all() - - -transforms <- c("p0","p2","p3","p05","pm05","pm1","pm2") -#transforms <- c("p0","p2","p3","p05","pm05","pm1","pm2","p0p0","p0p05","p0p1","p0p2","p0p3","p0p05","p0pm05","p0pm1","p0pm2") - -probs <- gen.probs.gmjmcmc(transforms) -probs$gen <- c(1,1,0,1) - -df.Training$Bef_Nr <- as.integer(df.Training$Bef_Nr) -df.Training$Sex_Molekular = as.integer(df.Training$Sex_Molekular) - 1 - -df.test <- df.Training[201:285,] -df.Training <- df.Training[1:200,] - -#df.Training$TestData <- NULL - -params <- gen.params.gmjmcmc(ncol(df.Training)- 1 + sum(sapply(names(df.Training)[-1],function(name)sum(is.na(df.Training[[name]]))>0))) - -params$feat$pop.max <- 150 - - -result_parallel = fbms(formula = Sex_Molekular ~ 1 + .,runs = 8, cores = 8, data = df.Training, transforms = transforms, probs = probs,params = params, P=25,impute = T, beta_prior = list(type = "Jeffreys-BIC"), method = "gmjmcmc.parallel") - -pred.obj = predict(result_parallel, x = df.test[,-2], link = function(x)(1/(1+exp(-x))),x_train = df.Training[,-2]) - -auc(df.test$Sex_Molekular, pred.obj$aggr$mean) -auc(df.Training$Sex_Molekular, df.Training$Sex_Morpho) -auc(df.test$Sex_Morpho, pred.obj$aggr$mean) - - -bm <- get.best.model(result_parallel) -pred = predict(object = bm, x = df.test[,-2], link = function(x)(1/(1+exp(-x))),x_train = df.Training[,-2]) - -auc(df.test$Sex_Molekular, pred) -auc(df.Training$Sex_Molekular, df.Training$Sex_Morpho) -auc(df.test$Sex_Morpho, pred) - - -mpm <- get.mpm.model(result_parallel,family = "binomial",y = df.Training$Sex_Molekular,x = df.Training[,-2]) -pred = predict(object = mpm, x = df.test[,-2], link = function(x)(1/(1+exp(-x))),x_train = df.Training[,-2]) - -auc(df.test$Sex_Molekular, pred) -auc(df.Training$Sex_Molekular, df.Training$Sex_Morpho) -auc(df.test$Sex_Morpho, pred) - - - - diff --git a/R_script/oldvers/gg.txt b/R_script/oldvers/gg.txt deleted file mode 100644 index d114ca0..0000000 --- a/R_script/oldvers/gg.txt +++ /dev/null @@ -1,48 +0,0 @@ -M So Ed Po1 Po2 LF M.F Pop NW U1 U2 Wealth Ineq Prob Time Crime -15.1 1 9.1 5.8 5.6 0.51 95 33 30.1 0.108 4.1 3940 26.1 0.084602 26.2011 791 -14.3 0 11.3 10.3 9.5 0.583 101.2 13 10.2 0.096 3.6 5570 19.4 0.029599 25.2999 1635 -14.2 1 8.9 4.5 4.4 0.533 96.9 18 21.9 0.094 3.3 3180 25 0.083401 24.3006 578 -13.6 0 12.1 14.9 14.1 0.577 99.4 157 8 0.102 3.9 6730 16.7 0.015801 29.9012 1969 -14.1 0 12.1 10.9 10.1 0.591 98.5 18 3 0.091 2 5780 17.4 0.041399 21.2998 1234 -12.1 0 11 11.8 11.5 0.547 96.4 25 4.4 0.084 2.9 6890 12.6 0.034201 20.9995 682 -12.7 1 11.1 8.2 7.9 0.519 98.2 4 13.9 0.097 3.8 6200 16.8 0.0421 20.6993 963 -13.1 1 10.9 11.5 10.9 0.542 96.9 50 17.9 0.079 3.5 4720 20.6 0.040099 24.5988 1555 -15.7 1 9 6.5 6.2 0.553 95.5 39 28.6 0.081 2.8 4210 23.9 0.071697 29.4001 856 -14 0 11.8 7.1 6.8 0.632 102.9 7 1.5 0.1 2.4 5260 17.4 0.044498 19.5994 705 -12.4 0 10.5 12.1 11.6 0.58 96.6 101 10.6 0.077 3.5 6570 17 0.016201 41.6 1674 -13.4 0 10.8 7.5 7.1 0.595 97.2 47 5.9 0.083 3.1 5800 17.2 0.031201 34.2984 849 -12.8 0 11.3 6.7 6 0.624 97.2 28 1 0.077 2.5 5070 20.6 0.045302 36.2993 511 -13.5 0 11.7 6.2 6.1 0.595 98.6 22 4.6 0.077 2.7 5290 19 0.0532 21.501 664 -15.2 1 8.7 5.7 5.3 0.53 98.6 30 7.2 0.092 4.3 4050 26.4 0.0691 22.7008 798 -14.2 1 8.8 8.1 7.7 0.497 95.6 33 32.1 0.116 4.7 4270 24.7 0.052099 26.0991 946 -14.3 0 11 6.6 6.3 0.537 97.7 10 0.6 0.114 3.5 4870 16.6 0.076299 19.1002 539 -13.5 1 10.4 12.3 11.5 0.537 97.8 31 17 0.089 3.4 6310 16.5 0.119804 18.1996 929 -13 0 11.6 12.8 12.8 0.536 93.4 51 2.4 0.078 3.4 6270 13.5 0.019099 24.9008 750 -12.5 0 10.8 11.3 10.5 0.567 98.5 78 9.4 0.13 5.8 6260 16.6 0.034801 26.401 1225 -12.6 0 10.8 7.4 6.7 0.602 98.4 34 1.2 0.102 3.3 5570 19.5 0.0228 37.5998 742 -15.7 1 8.9 4.7 4.4 0.512 96.2 22 42.3 0.097 3.4 2880 27.6 0.089502 37.0994 439 -13.2 0 9.6 8.7 8.3 0.564 95.3 43 9.2 0.083 3.2 5130 22.7 0.0307 25.1989 1216 -13.1 0 11.6 7.8 7.3 0.574 103.8 7 3.6 0.142 4.2 5400 17.6 0.041598 17.6 968 -13 0 11.6 6.3 5.7 0.641 98.4 14 2.6 0.07 2.1 4860 19.6 0.069197 21.9003 523 -13.1 0 12.1 16 14.3 0.631 107.1 3 7.7 0.102 4.1 6740 15.2 0.041698 22.1005 1993 -13.5 0 10.9 6.9 7.1 0.54 96.5 6 0.4 0.08 2.2 5640 13.9 0.036099 28.4999 342 -15.2 0 11.2 8.2 7.6 0.571 101.8 10 7.9 0.103 2.8 5370 21.5 0.038201 25.8006 1216 -11.9 0 10.7 16.6 15.7 0.521 93.8 168 8.9 0.092 3.6 6370 15.4 0.0234 36.7009 1043 -16.6 1 8.9 5.8 5.4 0.521 97.3 46 25.4 0.072 2.6 3960 23.7 0.075298 28.3011 696 -14 0 9.3 5.5 5.4 0.535 104.5 6 2 0.135 4 4530 20 0.041999 21.7998 373 -12.5 0 10.9 9 8.1 0.586 96.4 97 8.2 0.105 4.3 6170 16.3 0.042698 30.9014 754 -14.7 1 10.4 6.3 6.4 0.56 97.2 23 9.5 0.076 2.4 4620 23.3 0.049499 25.5005 1072 -12.6 0 11.8 9.7 9.7 0.542 99 18 2.1 0.102 3.5 5890 16.6 0.040799 21.6997 923 -12.3 0 10.2 9.7 8.7 0.526 94.8 113 7.6 0.124 5 5720 15.8 0.0207 37.4011 653 -15 0 10 10.9 9.8 0.531 96.4 9 2.4 0.087 3.8 5590 15.3 0.0069 44.0004 1272 -17.7 1 8.7 5.8 5.6 0.638 97.4 24 34.9 0.076 2.8 3820 25.4 0.045198 31.6995 831 -13.3 0 10.4 5.1 4.7 0.599 102.4 7 4 0.099 2.7 4250 22.5 0.053998 16.6999 566 -14.9 1 8.8 6.1 5.4 0.515 95.3 36 16.5 0.086 3.5 3950 25.1 0.047099 27.3004 826 -14.5 1 10.4 8.2 7.4 0.56 98.1 96 12.6 0.088 3.1 4880 22.8 0.038801 29.3004 1151 -14.8 0 12.2 7.2 6.6 0.601 99.8 9 1.9 0.084 2 5900 14.4 0.0251 30.0001 880 -14.1 0 10.9 5.6 5.4 0.523 96.8 4 0.2 0.107 3.7 4890 17 0.088904 12.1996 542 -16.2 1 9.9 7.5 7 0.522 99.6 40 20.8 0.073 2.7 4960 22.4 0.054902 31.9989 823 -13.6 0 12.1 9.5 9.6 0.574 101.2 29 3.6 0.111 3.7 6220 16.2 0.0281 30.0001 1030 -13.9 1 8.8 4.6 4.1 0.48 96.8 19 4.9 0.135 5.3 4570 24.9 0.056202 32.5996 455 -12.6 0 10.4 10.6 9.7 0.599 98.9 40 2.4 0.078 2.5 5930 17.1 0.046598 16.6999 508 -13 0 12.1 9 9.1 0.623 104.9 3 2.2 0.113 4 5880 16 0.052802 16.0997 849 \ No newline at end of file diff --git a/R_script/oldvers/kristoffer.R b/R_script/oldvers/kristoffer.R deleted file mode 100644 index c363692..0000000 --- a/R_script/oldvers/kristoffer.R +++ /dev/null @@ -1,80 +0,0 @@ - -library(BAS) - -x <- read.csv("https://raw.githubusercontent.com/aliaksah/EMJMCMC2016/refs/heads/master/supplementaries/Mode%20Jumping%20MCMC/supplementary/examples/US%20Data/simcen-x1.txt",header = F) -y <- read.csv( - "https://raw.githubusercontent.com/aliaksah/EMJMCMC2016/refs/heads/master/supplementaries/Mode%20Jumping%20MCMC/supplementary/examples/US%20Data/simcen-y1.txt",header = F) - - - -data <- data.frame(x,y) - -names(data)[16] = "Crime" - - -res <- BAS::bas.lm(formula = Crime ~.,data = data,prior = "g-prior",alpha = 47,n.models = 32768,method = "deterministic", modelprior = uniform()) - -strue <- summary(res) - -strue[1+c(8,13,14,12,5,9,7,4,6,1,3,2,11,10,15),1] - -res.bas1 <- BAS::bas.lm(formula = Crime ~.,data = data,prior = "g-prior",method = "BAS",alpha = 47,n.models=3276,update=500,modelprior= uniform(),initprobs="Uniform") - -sbas1 <- summary(res.bas1) - -sbas1[1+c(8,13,14,12,5,9,7,4,6,1,3,2,11,10,15),1] - - -res.bas2 <- BAS::bas.lm(formula = Crime ~.,data = data,prior = "g-prior",method = "BAS",alpha = 47,n.models=3276,update=NULL,modelprior= uniform(),initprobs="Uniform") - -sbas2 <- summary(res.bas2) - -sbas2[1+c(8,13,14,12,5,9,7,4,6,1,3,2,11,10,15),1] - - -res.bas3 <- BAS::bas.lm(formula = Crime ~.,data = data,prior = "g-prior",method = "BAS",alpha = 47,n.models=3276,update=500,modelprior= uniform(),initprobs="eplogp") - -sbas3 <- summary(res.bas3) - -sbas3[1+c(8,13,14,12,5,9,7,4,6,1,3,2,11,10,15),1] - - -res.mc31 <- BAS::bas.lm(formula = Crime ~.,data = data,prior = "g-prior",alpha = 47,method = "MCMC",n.models = 327, update=NULL,modelprior= uniform(),initprobs="Uniform") - -smc31 <- summary(res.mc31) - -smc31[1+c(8,13,14,12,5,9,7,4,6,1,3,2,11,10,15),1] - - - - -params <- gen.params.mjmcmc(15) -probs <- gen.probs.mjmcmc() - - - -res100 <- mjmcmc(x = x,y = y,loglik.pi = fbms.mlik.master.temp,mlpost_params = list(family = "gaussian", beta_prior = list(type = "g-prior", g = 47), temp = 100, r = 0.5),N = 100000,params = params,probs = probs) - -res1 <- mjmcmc(x = x, y = y,loglik.pi = fbms.mlik.master.temp,mlpost_params = list(family = "gaussian", beta_prior = list(type = "g-prior", g = 47), temp = 1, r = 0.5), N = 100000,params = params,probs = probs) - -res0.1 <- mjmcmc(x = x,y = y,loglik.pi = fbms.mlik.master.temp,mlpost_params = list(family = "gaussian", beta_prior = list(type = "g-prior", g = 47), temp = 0.5, r = 0.5), N = 100000,params = params,probs = probs) - - -results <- data.frame(true = strue[2:16,1], t100 = res100$marg.probs[1,],t1 = res1$marg.probs[1,], t01 = res0.1$marg.probs[1,], BAS1 = sbas1[2:16,1],BAS2 = sbas2[2:16,1],BAS3 = sbas3[2:16,1],mc31 = smc31[2:16,1])[c(8,13,14,12,5,9,7,4,6,1,3,2,11,10,15),] - -abs(results$true - results)*100 - - - - - - - - - - - - - - - diff --git a/R_script/oldvers/likelihoods2.R b/R_script/oldvers/likelihoods2.R deleted file mode 100644 index 0cbe708..0000000 --- a/R_script/oldvers/likelihoods2.R +++ /dev/null @@ -1,800 +0,0 @@ -# Title : Log likelihood functions -# Objective : Log likelihood functions with priors to be used as templates or directly in GMJMCMC -# Created by: jonlachmann -# Created on: 2021-02-24 - -#' Log likelihood function for glm regression with parameter priors from BAS package -#' This function is created as an example of how to create an estimator that is used -#' to calculate the marginal likelihood of a model. -#' -#' @param y A vector containing the dependent variable -#' @param x The matrix containing the precalculated features -#' @param model The model to estimate as a logical vector -#' @param complex A list of complexity measures for the features -#' @param mlpost_params A list of parameters for the log likelihood, supplied by the user, important to specify the tuning parameters of beta priors and family that BAS uses in glm models -#' -#' @return A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). -#' -#' @examples -#' glm.logpost.bas(as.integer(rnorm(100) > 0),cbind(1,matrix(rnorm(100))),c(TRUE,TRUE),list(oc = 1)) -#' -#' @importFrom BAS uniform Jeffreys g.prior -#' @importFrom stats poisson Gamma glm.control -#' @export glm.logpost.bas -glm.logpost.bas2 <- function (y, x, model, complex, mlpost_params = list(r = exp(-0.5), family = "binomial", prior_beta = Jeffreys(), laplace = FALSE)) { - if (length(mlpost_params) == 0) - mlpost_params <- list(r = 1/dim(x)[1], family = "binomial", prior_beta = g.prior(max(dim(x)[1], sum(model)-1)), laplace = FALSE) - p <- sum(model) - 1 - if(p==0) - { - probinit <- as.numeric(c(1,0.99)) - model[2] <- T - }else{ - probinit <- as.numeric(c(1,rep(0.99,p))) - } - - mod<-NULL - - - tryCatch({ - if(mlpost_params$family == "binomial") - suppressWarnings({ - mod <- .Call(BAS:::C_glm_deterministic, - y = as.numeric(y), X = as.matrix(x[,model]), - Roffset = as.numeric(rep(0, length(y))), - Rweights = as.numeric(rep(1, length(y))), - Rprobinit = probinit, - Rmodeldim = as.integer(rep(0,ifelse(p==0,2,1))), - modelprior = uniform(), - betaprior = mlpost_params$prior_beta, - family = binomial(), - Rcontrol = glm.control(), - Rlaplace = as.integer(mlpost_params$laplace)) - }) - else if(mlpost_params$family == "poisson") - suppressWarnings({ - mod <- .Call(BAS:::C_glm_deterministic, - y = as.numeric(y), X = as.matrix(x[,model]), - Roffset = as.numeric(rep(0, length(y))), - Rweights = as.numeric(rep(1, length(y))), - Rprobinit = probinit, - Rmodeldim = as.integer(rep(0,ifelse(p==0,2,1))), - modelprior = uniform(), - betaprior = mlpost_params$prior_beta, - family = poisson(), - Rcontrol = glm.control(), - Rlaplace = as.integer(mlpost_params$laplace)) - }) - else{ - suppressWarnings({ - mod <- .Call(BAS:::C_glm_deterministic, - y = as.numeric(y), X = as.matrix(x[,model]), - Roffset = as.numeric(rep(0, length(y))), - Rweights = as.numeric(rep(1, length(y))), - Rprobinit = probinit, - Rmodeldim = as.integer(rep(0,ifelse(p==0,2,1))), - modelprior = uniform(), - betaprior = mlpost_params$prior_beta, - family = Gamma(), - Rcontrol = glm.control(), - Rlaplace = as.integer(mlpost_params$laplace)) - })} - }, error = function(e) { - # Handle the error by setting result to NULL - mod <- NULL - # You can also print a message or log the error if needed - cat("An error occurred:", conditionMessage(e), "\n") - }) - - if(length(mod)==0) { - return(list(crit = -.Machine$double.xmax + log(mlpost_params$r * sum(complex$oc)), coefs = rep(0, p+1))) - } - - if(p == 0) - { - ret <- mod$logmarg[2] + log(mlpost_params$r) * sum(complex$oc) - return(list(crit=ret, coefs=mod$mle[[2]])) - } - ret <- mod$logmarg + log(mlpost_params$r) * sum(complex$oc) - return(list(crit=ret, coefs=mod$mle[[1]])) -} - - -#' Log likelihood function for Gaussian regression with parameter priors from BAS package -#' This function is created as an example of how to create an estimator that is used -#' to calculate the marginal likelihood of a model. -#' -#' @param y A vector containing the dependent variable -#' @param x The matrix containing the precalculated features -#' @param model The model to estimate as a logical vector -#' @param complex A list of complexity measures for the features -#' @param mlpost_params A list of parameters for the log likelihood, supplied by the user, important to specify the tuning parameters of beta priors where the corresponding integers as prior_beta must be provided "g-prior" = 0, "hyper-g" = 1, "EB-local" = 2, "BIC" = 3, "ZS-null" = 4, "ZS-full" = 5, "hyper-g-laplace" = 6, "AIC" = 7, "EB-global" = 2, "hyper-g-n" = 8, "JZS" = 9 and in Gaussian models -#' -#' @return A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). -#' -#' @examples -#' lm.logpost.bas(rnorm(100), cbind(1,matrix(rnorm(100))), c(TRUE,TRUE), list(oc = 1)) -#' -#' -#' @export lm.logpost.bas -lm.logpost.bas2 <- function (y, x, model, complex, mlpost_params = list(r = exp(-0.5), prior_beta = "g-prior", alpha = 4)) { - if (length(mlpost_params) == 0) - mlpost_params <- list(r = 1/dim(x)[1], prior_beta = 0, alpha = max(dim(x)[1], sum(model)^2)) - - - p <- sum(model) - 1 - if(p==0) - { - probinit <- as.numeric(c(1,0.99)) - model[2] <- T - }else{ - probinit <- as.numeric(c(1,rep(0.99,p))) - } - - mod<-NULL - - tryCatch({ - suppressWarnings({ - mod <- .Call(BAS:::C_deterministic, - y = y, X = as.matrix(x[,model]), - as.numeric(rep(1, length(y))), - probinit, - as.integer(rep(0,ifelse(p==0,2,1))), - incint = as.integer(F), - alpha = ifelse(length(mlpost_params$alpha)>0, as.numeric(mlpost_params$alpha), NULL), - method = as.integer(mlpost_params$prior_beta), - modelprior = uniform(), - Rpivot = TRUE, - Rtol = 1e-7) - }) - }, error = function(e) { - # Handle the error by setting result to NULL - mod <- NULL - # You can also print a message or log the error if needed - cat("An error occurred:", conditionMessage(e), "\n") - }) - - if(length(mod)==0) { - return(list(crit = -.Machine$double.xmax + log(mlpost_params$r * sum(complex$oc)), coefs = rep(0, p+1))) - } - - if(p == 0) - { - ret <- mod$logmarg[2] + log(mlpost_params$r) * sum(complex$oc) - return(list(crit=ret, coefs=mod$mle[[2]])) - } - ret <- mod$logmarg + log(mlpost_params$r) * sum(complex$oc) - return(list(crit=ret, coefs=mod$mle[[1]])) -} - - -#' Log likelihood function for logistic regression with a Jeffreys parameter prior and BIC approximations of the posterior -#' This function is created as an example of how to create an estimator that is used -#' to calculate the marginal likelihood of a model. -#' -#' @param y A vector containing the dependent variable -#' @param x The matrix containing the precalculated features -#' @param model The model to estimate as a logical vector -#' @param complex A list of complexity measures for the features -#' @param mlpost_params A list of parameters for the log likelihood, supplied by the user -#' -#' @return A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). -#' -#' @examples -#' logistic.loglik2(as.integer(rnorm(100) > 0), matrix(rnorm(100)), TRUE, list(oc = 1)) -#' -#' -#' @export logistic.loglik2 -logistic.loglik2 <- function (y, x, model, complex, mlpost_params = list(r = exp(-0.5))) { - if (length(mlpost_params) == 0) - mlpost_params <- list(r = 1/dim(x)[1]) - suppressWarnings({mod <- fastglm(as.matrix(x[, model]), y, family = binomial())}) - ret <- (-(mod$deviance + log(length(y)) * (mod$rank - 1) - 2 * log(mlpost_params$r) * sum(complex$oc))) / 2 - return(list(crit=ret, coefs=mod$coefficients)) -} - -#' Log likelihood function for glm regression with a Jeffreys parameter prior and BIC approximations of the posterior -#' This function is created as an example of how to create an estimator that is used -#' to calculate the marginal likelihood of a model. -#' -#' @param y A vector containing the dependent variable -#' @param x The matrix containing the precalculated features -#' @param model The model to estimate as a logical vector -#' @param complex A list of complexity measures for the features -#' @param mlpost_params A list of parameters for the log likelihood, supplied by the user, family must be specified -#' -#' @return A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). -#' -#' @examples -#' glm.loglik(abs(rnorm(100))+1, matrix(rnorm(100)), TRUE, list(oc = 1)) -#' -#' -#' @export glm.loglik -glm.loglik2 <- function (y, x, model, complex, mlpost_params = list(r = exp(-0.5), family = "Gamma")) { - if (length(mlpost_params) == 0) - mlpost_params <- list(r = 1/dim(x)[1]) - - if(mlpost_params$family == "binomial") - { - fam = binomial() - }else if(mlpost_params$family == "poisson"){ - fam = poisson() - }else - { - fam = Gamma() - } - - suppressWarnings({mod <- fastglm(as.matrix(x[, model]), y, family = fam)}) - - if (length(mod) == 0 || is.nan(mod$deviance)) { - return(list(crit = -.Machine$double.xmax + log_prior(mlpost_params, complex), coefs = rep(0, sum(model)))) - } - - ret <- (-(mod$deviance + log(length(y)) * (mod$rank - 1) - 2 * log(mlpost_params$r) * sum(complex$oc))) / 2 - return(list(crit=ret, coefs=mod$coefficients)) -} - - -#' Log likelihood function for gaussian regression with a Jeffreys prior and BIC approximation of MLIK with both known and unknown variance of the responses -#' -#' @param y A vector containing the dependent variable -#' @param x The matrix containing the precalculated features -#' @param model The model to estimate as a logical vector -#' @param complex A list of complexity measures for the features -#' @param mlpost_params A list of parameters for the log likelihood, supplied by the user -#' -#' @return A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). -#' -#' @examples -#' gaussian.loglik(rnorm(100), matrix(rnorm(100)), TRUE, list(oc = 1), NULL) -#' -#' -#' @export gaussian.loglik -gaussian.loglik2 <- function (y, x, model, complex, mlpost_params) { - if(length(mlpost_params)==0) - mlpost_params <- list() - if (length(mlpost_params$r) == 0) - mlpost_params$r <- 1/dim(x)[1] - if(length(mlpost_params$var) == 0) - mlpost_params$var <- 1 - suppressWarnings({mod <- fastglm(as.matrix(x[, model]), y, family = gaussian())}) - - if(mlpost_params$var == "unknown") - ret <- (-(mod$aic + (log(length(y))-2) * (mod$rank) - 2 * log(mlpost_params$r) * (sum(complex$oc)))) / 2 - else - ret <- (-(mod$deviance/mlpost_params$var + log(length(y)) * (mod$rank - 1) - 2 * log_prior(mlpost_params, complex))) / 2 - - return(list(crit=ret, coefs=mod$coefficients)) -} - - -#' Log likelihood function for linear regression using Zellners g-prior -#' -#' @param y A vector containing the dependent variable -#' @param x The matrix containing the precalculated features -#' @param model The model to estimate as a logical vector -#' @param complex A list of complexity measures for the features -#' @param mlpost_params A list of parameters for the log likelihood, supplied by the user -#' -#' @return A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). -#' -#' @examples -#' gaussian.loglik2.g(rnorm(100), matrix(rnorm(100)), TRUE, list(oc=1)) -#' -#' @export gaussian.loglik2.g -gaussian.loglik2.g <- function (y, x, model, complex, mlpost_params = NULL) -{ - if(length(mlpost_params)==0) - mlpost_params <- list() - if (length(mlpost_params$r) == 0) - mlpost_params$r <- 1/dim(x)[1] - suppressWarnings({ - mod <- fastglm(as.matrix(x[, model]), y, family = gaussian()) - }) - # Calculate R-squared - y_mean <- mean(y) - TSS <- sum((y - y_mean)^2) - RSS <- sum(mod$residuals^2) - Rsquare <- 1 - (RSS / TSS) - - if (length(mlpost_params$r) == 0 || length(mlpost_params$g) == 0) - { - mlpost_params$r <- 1/dim(x)[1] - mlpost_params$g <- max(mod$rank^2, length(y)) - } - - # logarithm of marginal likelihood - mloglik <- 0.5*(log(1.0 + mlpost_params$g) * (dim(x)[1] - mod$rank) - log(1.0 + mlpost_params$g * (1.0 - Rsquare)) * (dim(x)[1] - 1))*(mod$rank!=1) - - # logarithm of model prior - # default value or parameter r - lp <- log_prior(mlpost_params, complex) - - return(list(crit = mloglik + lp, coefs = mod$coefficients)) -} - - -#' Log likelihood function for Gaussian regression with parameter priors from BAS package -#' -#' This function computes the marginal likelihood of a Gaussian regression model under different priors. -#' -#' @param y A numeric vector containing the dependent variable. -#' @param x A matrix containing the independent variables, including an intercept column. -#' @param model A logical vector indicating which variables to include in the model. -#' @param complex A list containing complexity measures for the features. -#' @param mlpost_params A list of parameters for the log likelihood, specifying the tuning parameters of beta priors. -#' -#' @return A list with elements: -#' \item{crit}{Log marginal likelihood combined with the log prior.} -#' \item{coefs}{Posterior mode of the coefficients.} -#' -#' @examples -#' gaussian_tcch_log_likelihood2(rnorm(100), matrix(rnorm(100)), TRUE, list(oc=1)) -#' -#' @importFrom BAS phi1 hypergeometric1F1 hypergeometric2F1 -#' @importFrom tolerance F1 -#' @export -gaussian_tcch_log_likelihood2 <- function(y, x, model, complex, mlpost_params = list(r = exp(-0.5), prior_beta = "intrinsic")) { - - # Fit the linear model using fastglm - fitted_model <- fastglm(as.matrix(x[, model]), y, family = gaussian()) - log_likelihood <- -(fitted_model$aic -2 * (fitted_model$rank))/2 - # Compute R-squared manually - y_mean <- mean(y) - TSS <- sum((y - y_mean)^2) - RSS <- sum(fitted_model$residuals^2) - R2_M <- 1 - (RSS / TSS) - - p_M <- fitted_model$rank - n <- length(y) - - # Switch-like structure to assign hyperparameters based on prior - if (mlpost_params$prior_beta[[1]] == "CH") { - # CH prior: b and s should be user-specified, with defaults if not provided - a <- ifelse(!is.null(mlpost_params$prior_beta$a), mlpost_params$prior_beta$a, 1) # Default to 1 if not specified - b <- ifelse(!is.null(mlpost_params$prior_beta$b), mlpost_params$prior_beta$b, 2) # Default to 1 if not specified - r <- 0 - s <- ifelse(!is.null(mlpost_params$prior_beta$s), mlpost_params$prior_beta$s, 1) # Default to 1 if not specified - v <- 1 - k <- 1 - - } else if (mlpost_params$prior_beta[[1]] == "hyper-g") { - a <- 1 - b <- 2 - r <- 0 - s <- 0 - v <- 1 - k <- 1 - - } else if (mlpost_params$prior_beta[[1]] == "uniform") { - a <- 2 - b <- 2 - r <- 0 - s <- 0 - v <- 1 - k <- 1 - - } else if (mlpost_params$prior_beta[[1]] == "Jeffreys") { - a <- 0.0001 - b <- 2 - r <- 0 - s <- 0 - v <- 1 - k <- 1 - } else if (mlpost_params$prior_beta[[1]] == "beta.prime") { - a <- 1/2 - b <- n - p_M - 1.5 - r <- 0 - s <- 0 - v <- 1 - k <- 1 - - } else if (mlpost_params$prior_beta[[1]] == "benchmark") { - a <- 0.02 - b <- 0.02 * max(n, p_M^2) - r <- 0 - s <- 0 - v <- 1 - k <- 1 - - } else if (mlpost_params$prior_beta[[1]] == "TG") { - - a <- 2 * ifelse(!is.null(mlpost_params$prior_beta$a), mlpost_params$prior_beta$a, 1) - b <- 2 - r <- 0 - s <- 2 * ifelse(!is.null(mlpost_params$prior_beta$s), mlpost_params$prior_beta$s, 1) - v <- 1 - k <- 1 - - } else if (mlpost_params$prior_beta[[1]] == "ZS-adapted") { - a <- 1 - b <- 2 - r <- 0 - s <- n + 3 - v <- 1 - k <- 1 - } else if (mlpost_params$prior_beta[[1]] == "robust") { - a <- 1 - b <- 2 - r <- 1.5 - s <- 0 - v <- (n + 1) / (p_M + 1) - k <- 1 - - } else if (mlpost_params$prior_beta[[1]] == "hyper-g-n") { - a <- 1 - b <- 2 - r <- 1.5 - s <- 0 - v <- 1 - k <- 1 - - } else if (mlpost_params$prior_beta[[1]] == "intrinsic") { - a <- 1 - b <- 1 - r <- 1 - s <- 0 - v <- (n + p_M + 1) / (p_M + 1) - k <- (n + p_M + 1) / n - - }else if (mlpost_params$prior_beta[[1]] == "tCCH") { - a <- mlpost_params$prior_beta$a - b <- mlpost_params$prior_beta$b - r <- mlpost_params$prior_beta$rho - s <- mlpost_params$prior_beta$s - v <- mlpost_params$prior_beta$v - k <- mlpost_params$prior_beta$k - }else { - stop("Unknown prior name: ", mlpost_params$prior_beta) - } - - # - if (!is.null(r) & r == 0) { - - term1 <- lbeta((a + p_M) / 2, b / 2) - term2 <- phi1(b / 2, (n - 1) / 2, (a + b + p_M) / 2, s / (2 * v), min(0.8,R2_M/(v - (v - 1) * R2_M),log = T)) - - if(R2_M/(v - (v - 1) * R2_M)>0.8) - { - warning("Infinite marginal log likelihood! phi1 last argument reduced to 0.8. Use a different prior_beta (Robust, Hyper-g/n, Intrinsic, or g-prior)") - } - - term3 <- lbeta(a / 2, b / 2) - term4 <- hypergeometric1F1(b / 2, (a + b) / 2, s / (2 * v),log = T) - marginal_likelihood <- log_likelihood + (term1) + (term2) - (p_M / 2) * log(v) - ((n - 1) / 2)*log(1 - (1 - 1 / v) * R2_M) - (term3) - (term4) - } else if (!is.null(s) & s == 0) { - term1 <- lbeta((a + p_M) / 2, b / 2) - term2 <- hypergeometric2F1(r, b / 2, (a + b) / 2, 1 - k,log = T) - term3 <- F1((a + p_M) / 2, (a + b + p_M + 1 - n - 2 * r) / 2, (n - 1) / 2, (a + b + p_M) / 2, 1 - k, 1 - k - (R2_M^2 * k) / ((1 - R2_M) * v)) - marginal_likelihood <- log_likelihood + (a+p_M-2*r)/2*log(k) + (term1) - (term2) - (term3) - (p_M / 2) * log(v) - log(1 - R2_M) * ((n - 1) / 2) - lbeta(a / 2, b / 2) - - } else { - stop("Invalid inputs: either r = 0 or s = 0 must be specified.") - } - - if (length(mlpost_params$r) == 0) mlpost_params$r <- 1/dim(x)[1] # default value or parameter r - - lp <- log_prior(mlpost_params, complex) - - return(list(crit = marginal_likelihood + lp, coefs = fitted_model$coefficients)) -} - - - -#' Log likelihood function for logistic regression with an approximate Laplace approximations used -#' This function is created as an example of how to create an estimator that is used -#' to calculate the marginal likelihood of a model. -#' -#' @param y A vector containing the dependent variable -#' @param x The matrix containing the precalculated features -#' @param model The model to estimate as a logical vector -#' @param complex A list of complexity measures for the features -#' @param mlpost_params A list of parameters for the log likelihood, supplied by the user -#' -#' @return A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). -#' -#' @examples -#' logistic.loglik2.ala(as.integer(rnorm(100) > 0), matrix(rnorm(100)), TRUE, list(oc = 1)) -#' -#' -#' @export logistic.loglik2.ala -logistic.loglik2.ala <- function (y, x, model, complex, mlpost_params = list(r = exp(-0.5))) { - if (length(mlpost_params) == 0) - mlpost_params <- list(r = 1/dim(x)[1]) - suppressWarnings({mod <- fastglm(as.matrix(x[, model]), y, family = binomial(),maxit = 1)}) - ret <- (-(mod$deviance + log(length(y)) * (mod$rank - 1) -2 * log(mlpost_params$r) * sum(complex$oc))) / 2 - return(list(crit=ret, coefs=mod$coefficients)) -} - - - -#' Log likelihood function for logistic regression for alpha calculation -#' This function is just the bare likelihood function -#' -#' @param a A vector of the alphas to be used -#' @param data The data to be used for calculation -#' @param mu_func The function linking the mean to the covariates, -#' as a string with the alphas as a\[i\]. -#' -#' @return A numeric with the log likelihood. -#' -#' @export logistic.loglik2.alpha -logistic.loglik2.alpha <- function (a, data, mu_func) { - m <- 1 / (1 + exp(-eval(parse(text = mu_func)))) - -sum((data[,1] * log(m) + (1 - data[, 1]) * log(1 - m))) -} - - -#' Log likelihood function for gaussian regression for alpha calculation -#' This function is just the bare likelihood function -#' Note that it only gives a proportional value and is equivalent to least squares -#' -#' @param a A vector of the alphas to be used -#' @param data The data to be used for calculation -#' @param mu_func The function linking the mean to the covariates, -#' as a string with the alphas as a\[i\]. -#' -#' @return A numeric with the log likelihood. -#' @examples -#'\dontrun{ -#'gaussian.loglik2.alpha(my_alpha,my_data,my_mu) -#'} -#' @export gaussian.loglik2.alpha -gaussian.loglik2.alpha <- function (a, data, mu_func) { - m <- eval(parse(text=mu_func)) - sum((data[,1]-m)^2) -} - - -#' Log model prior function -#' @param mlpost_params list of passed parameters of the likelihood in GMJMCMC -#' @param complex list of complexity measures of the features included into the model -#' -#' @return A numeric with the log model prior. -#' -#' @examples -#' log_prior(mlpost_params = list(r=2), complex = list(oc = 2)) -#' -#' @export log_prior -log_prior <- function (mlpost_params, complex) { - pl <- log(mlpost_params$r) * (sum(complex$oc)) - return(pl) -} - - -#' Master Log Marginal Likelihood Function -#' -#' This function serves as a unified interface to compute the log marginal likelihood -#' for different regression models and priors by calling specific log likelihood functions. -#' -#' @param y A numeric vector containing the dependent variable. -#' @param x A matrix containing the precalculated features (independent variables). -#' @param model A logical vector indicating which variables to include in the model. -#' @param complex A list of complexity measures for the features. -#' @param mlpost_params A list of parameters controlling the model family, prior, and tuning parameters. -#' Key elements include: -#' - family: "binomial", "poisson", "gamma" (all three referred to as GLM below), or "gaussian" (default: "gaussian") -#' - prior_beta: Type of prior as a string (default: "g-prior"). Possible values include: -#' - "beta.prime": Beta-prime prior (GLM/Gaussian, no additional args) -#' - "CH": Compound Hypergeometric prior (GLM/Gaussian, requires `a`, `b`, optionally `s`) -#' - "EB-local": Empirical Bayes local prior (GLM/Gaussian, requires `a` for Gaussian) -#' - "EB-global": Empirical Bayes local prior (Gaussian, requires `a` for Gaussian) -#' - "g-prior": Zellner's g-prior (GLM/Gaussian, requires `g`) -#' - "hyper-g": Hyper-g prior (GLM/Gaussian, requires `a`) -#' - "hyper-g-n": Hyper-g/n prior (GLM/Gaussian, requires `a`) -#' - "tCCH": Truncated Compound Hypergeometric prior (GLM/Gaussian, requires `a`, `b`, `s`, `rho`, `v`, `k`) -#' - "intrinsic": Intrinsic prior (GLM/Gaussian, no additional args) -#' - "TG": Truncated Gamma prior (GLM/Gamma, requires `a`, `s`) -#' - "Jeffreys": Jeffreys prior (GLM/Gaussian, no additional args) -#' - "uniform": Uniform prior (GLM/Gaussian, no additional args) -#' - "benchmark": Benchmark prior (Gaussian/GLM, no additional args) -#' - "ZS-adapted": Zellner-Siow adapted prior (Gaussian TCCH, no additional args) -#' - "robust": Robust prior (Gaussian/GLM, no additional args) -#' - "Jeffreys-BIC": Jeffreys prior with BIC approximation of marginal likelihood (Gaussian/GLM) -#' - "ZS-null": Zellner-Siow null prior (Gaussian, requires `a`) -#' - "ZS-full": Zellner-Siow full prior (Gaussian, requires `a`) -#' - "hyper-g-laplace": Hyper-g Laplace prior (Gaussian, requires `a`) -#' - "AIC": AIC prior from BAS (Gaussian, requires penalty `a`) -#' - "BIC": BIC prior from BAS (Gaussian/GLM) -#' - "JZS": Jeffreys-Zellner-Siow prior (Gaussian, requires `a`) -#' - r: Model complexity penalty (default: 1/n) -#' - g: Tuning parameter for g-prior (default: max(n, p^2)) -#' - a, b, s, v, rho, k: Hyperparameters for various priors -#' - n: Sample size for some priors (default: length(y)) -#' - var: Variance assumption for Gaussian models ("known" or "unknown", default: "unknown") -#' - laplace: Logical for Laplace approximation in GLM only (default: FALSE) -#' -#' @return A list with elements: -#' \item{crit}{Log marginal likelihood combined with the log prior.} -#' \item{coefs}{Posterior mode of the coefficients.} -#' -#' @examples -#' fbms.mlik.master(rnorm(100), matrix(rnorm(100)), c(TRUE,TRUE), list(oc = 1)) -#' -#' @importFrom BAS robust beta.prime bic.prior CCH EB.local g.prior hyper.g hyper.g.n tCCH intrinsic TG Jeffreys uniform -#' @export -fbms.mlik.master_old <- function(y, x, model, complex, mlpost_params = list(family = "gaussian", prior_beta = "g-prior", r = exp(-0.5))) { - # Extract dimensions - n <- length(y) - p <- sum(model) - 1 # Number of predictors excluding intercept - - # Set default parameters if not fully specified - if (is.null(mlpost_params$family)) mlpost_params$family <- "gaussian" - if (is.null(mlpost_params$prior_beta)) mlpost_params$prior_beta <- "g-prior" - if (is.null(mlpost_params$g)) mlpost_params$g <- max(p^2, n) - if (is.null(mlpost_params$n)) mlpost_params$n <- n - if (is.null(mlpost_params$r)) mlpost_params$r <- 1/n - - # Ensure complex has oc if not provided, ignore by default - if (is.null(complex$oc)) complex$oc <- 0 - - # Homogenize and prepare mlpost_params for nested calls - params_master <- mlpost_params - params_nested <- list(r = params_master$r) - - # Define valid priors for each family - #glm_only_priors <- c("CCH", "tCCH", "TG") - glm_and_gaussian_priors <- c("CH", "tCCH", "TG","beta.prime", "EB-local", "g-prior", "hyper-g", "hyper-g-n", - "intrinsic", "ZS-adapted", "Jeffreys", "uniform", "benchmark", "robust", "Jeffreys-BIC") - gaussian_only_priors <- c("ZS-null", "ZS-full", "hyper-g-laplace","BIC", "AIC", "JZS","EB-global") - - #review a bit - gaussian_not_robust <- c("CH", "tCCH", "ZS-adapted", "TG","beta.prime", "benchmark","Jeffreys") - gaussian_robust <- c("g-prior", "hyper-g", "EB-local","BIC", "Jeffreys-BIC", "ZS-null", "ZS-full", "hyper-g-laplace", - "AIC", "hyper-g-n", "JZS") - gaussian_tcch <- c("CH", "tCCH", "TG","beta.prime", "intrinsic", "ZS-adapted", "uniform","Jeffreys", "benchmark", "robust") - gaussian_bas <- c("g-prior", "hyper-g", "EB-local","ZS-null", "ZS-full", "BIC", "hyper-g-laplace", "AIC", "EB-global", "hyper-g-n", "JZS") - - all_priors <- c(glm_and_gaussian_priors, gaussian_only_priors) - #browser() - # Validate prior_beta - if (!params_master$prior_beta %in% all_priors) { - stop(sprintf("Prior '%s' is not supported. Supported priors: %s", - params_master$prior_beta, paste(all_priors, collapse = ", "))) - } - - # Decision logic based on family and prior_beta - if (params_master$family %in% c("binomial", "poisson", "gamma")) { - if (params_master$prior_beta %in% gaussian_only_priors) { - stop(sprintf("Prior '%s' is not supported for GLM family '%s'. Supported GLM priors: %s", - params_master$prior_beta, params_master$family, - paste(c(glm_and_gaussian_priors), collapse = ", "))) - } - - params_nested$family <- params_master$family - if (is.null(params_master$laplace)) params_nested$laplace <- FALSE else params_nested$laplace <- params_master$laplace - - #if(params_nested$laplace) - # print("Laplace approximations will be used") - - if (params_master$prior_beta == "Jeffreys-BIC") { - if(params_nested$family == "binomial") - result <- logistic.loglik2(y, x, model, complex, params_nested) - else if(params_nested$family%in% c("poisson", "gamma")) - result <- glm.loglik2(y, x, model, complex, params_nested) - - } else { - params_nested$prior_beta <- switch( - params_master$prior_beta, - "beta.prime" = beta.prime(n = n), - "CH" = CCH(alpha = if (is.null(params_master$a)) stop("a must be provided") else params_master$a, - beta = if (is.null(params_master$b)) stop("b must be provided") else params_master$b, - s = if (is.null(params_master$s)) stop("s must be provided") else params_master$s), - "EB-local" = EB.local(), - "g-prior" = g.prior(g = params_master$g), - "hyper-g" = hyper.g(alpha = if (is.null(params_master$a)) stop("a must be provided") else params_master$a), - "tCCH" = tCCH(alpha = if (is.null(params_master$a)) stop("a must be provided") else params_master$a, - beta = if (is.null(params_master$b)) stop("b must be provided") else params_master$b, - s = if (is.null(params_master$s)) stop("s must be provided") else params_master$s, - r = if (is.null(params_master$rho)) stop("rho must be provided") else params_master$rho, - v = if (is.null(params_master$v)) stop("v must be provided") else params_master$v, - theta = if (is.null(params_master$k)) stop("k must be provided") else params_master$k), - "intrinsic" = intrinsic(n = params_master$n), - "TG" = TG(alpha = if (is.null(params_master$a)) stop("a must be provided") else params_master$a), - "Jeffreys" = Jeffreys(), - "uniform" = tCCH(alpha = 2, - beta = 2, - s = 0, - r = 0, - v = 1, - theta = 1), - "benchmark" = tCCH(alpha = 0.02, - beta = 0.02*max(n,p^2), - s = 0, - r = 0, - v = 1, - theta = 1), - "ZS-adapted" = tCCH(alpha = 1, - beta = 2, - s = n + 3, - r = 0, - v = 1, - theta = 1), - "TG" = TG(alpha = if (is.null(params_master$a)) stop("a must be provided") else params_master$a), - "robust" = robust(n = if (is.null(params_master$n)) as.numeric(n) else as.numeric(params_master$n)), - "hyper-g-n" = hyper.g.n(alpha = if (is.null(params_master$a)) 3 else params_master$a, - n = params_master$n), - "BIC" = bic.prior(n = if (is.null(params_master$n)) n else params_master$n), - stop("Unrecognized prior_beta for GLM: ", params_master$prior_beta) - ) - result <- glm.logpost.bas2(y, x, model, complex, params_nested) - } - } else if (params_master$family == "gaussian") { - - if (params_master$prior_beta %in% gaussian_not_robust) { - warning(sprintf("Prior '%s' is not reccomended supported for Gaussian family '%s'. Can be unstable for strong signals (R^2 > 0.9). Reccomended priors under Gaussian family: %s", - params_master$prior_beta, params_master$family, - paste(gaussian_robust, collapse = ", "))) - } - - params_nested$r <- params_master$r - - if (params_master$prior_beta %in% gaussian_tcch) { - - params_nested$prior_beta <- switch( - params_master$prior_beta, - "beta.prime" = list("beta.prime"), - "CH" = list("CH",a = if (is.null(params_master$a)) stop("a must be provided") else params_master$a, - b = if (is.null(params_master$b)) stop("b must be provided") else params_master$b, - s = if (is.null(params_master$s)) stop("s must be provided") else params_master$s), - "tCCH" = list("tCCH", a = if (is.null(params_master$a)) stop("a must be provided") else params_master$a, - b = if (is.null(params_master$b)) stop("b must be provided") else params_master$b, - s = if (is.null(params_master$s)) stop("s must be provided") else params_master$s, - rho = if (is.null(params_master$rho)) stop("rho must be provided") else params_master$rho, - v = if (is.null(params_master$v)) stop("v must be provided") else params_master$v, - k = if (is.null(params_master$k)) stop("k must be provided") else params_master$k), - "intrinsic" = list("intrinsic"), - "TG" = list("TG",a = if (is.null(params_master$a)) stop("a must be provided") else params_master$a, - s = if (is.null(params_master$s)) stop("s must be provided") else params_master$s), - "Jeffreys" = list("Jeffreys"), - "ZS-adapted" = list("ZS-adapted"), - "benchmark" = list("benchmark"), - "robust" = list("robust"), - "uniform" = list("uniform"), - stop("Unrecognized prior_beta for Gaussian GLM: ", params_master$prior_beta) - ) - result <- gaussian_tcch_log_likelihood2(y, x, model, complex, params_nested) - - }else if (params_master$prior_beta == "Jeffreys-BIC") { - if (is.null(params_master$var)) params_nested$var <- "unknown" else params_nested$var <- params_master$var - result <- gaussian.loglik2(y, x, model, complex, params_nested) - } else if (params_master$prior_beta %in% gaussian_bas) { - - params_nested$prior_beta <- switch( - params_master$prior_beta, - "g-prior" = 0, - "hyper-g" = 1, - "EB-local" = 2, - "BIC" = 3, - "ZS-null" = 4, - "ZS-full" = 5, - "hyper-g-laplace" = 6, - "AIC" = 7, - "EB-global" = 2, - "hyper-g-n" = 8, - "JZS" = 9 - ) - if(params_master$prior_beta == "g-prior") - { - if (!is.null(params_master$g)) params_nested$g <- params_master$g else stop("g must be provided") - result <- gaussian.loglik2.g(y, x, model, complex, params_nested) - } - else{ - if (!is.null(params_master$a)) params_nested$alpha <- params_master$a else params_nested$alpha = -1 - result <- lm.logpost.bas2(y, x, model, complex, params_nested) - } - - } else { - stop("Unexpected error in prior_beta logic for Gaussian.") - } - } else { - stop("Unsupported family: ", params_master$family, ". Supported families are 'binomial', 'poisson', 'gamma', or 'gaussian'.") - } - - # Ensure consistent return structure - if (is.null(result$crit) || is.null(result$coefs)) { - stop("Error in computation: Returned result does not contain 'crit' and 'coefs'.") - } - - return(list(crit = result$crit, coefs = result$coefs)) -} \ No newline at end of file diff --git a/R_script/oldvers/new general estimators.R b/R_script/oldvers/new general estimators.R deleted file mode 100644 index eec656e..0000000 --- a/R_script/oldvers/new general estimators.R +++ /dev/null @@ -1,262 +0,0 @@ -library(microbenchmark) -library(Rcpp) -library(RcppArmadillo) -library(fastglm) -library(BAS) - -# Original R function -estimate.logic.tcch.general <- function(y, x, model, complex, params) { - - if (length(params) == 0) - params <- list(r = 1/dim(x)[1]) - - suppressWarnings({ - mod <- fastglm(as.matrix(x[, model]), y, family = gaussian()) - }) - - # Compute the general complexity prior - log_prior <- log(params$r) * sum(complex$oc) - - # Compute other terms for tCCH prior on g - p.v <- (params$n + 1) / (mod$rank + 1) - - y_mean <- mean(y) - TSS <- sum((y - y_mean)^2) - RSS <- sum(mod$residuals^2) - R.2 <- 1 - (RSS / TSS) - p <- mod$rank - - # Marginal log-likelihood using tCCH prior for g - mloglik = (-0.5 * p * log(p.v) - - 0.5 * (params$n - 1) * log(1 - (1 - 1/p.v) * R.2) - + log(beta((params$p.a + p) / 2, params$p.b / 2)) - - log(beta(params$p.a / 2, params$p.b / 2)) - + log(phi1(params$p.b / 2, - (params$n - 1) / 2, - (params$p.a + params$p.b + p) / 2, - params$p.s / (2 * p.v), - R.2 / (p.v - (p.v - 1) * R.2))) - - hypergeometric1F1(params$p.b / 2, - (params$p.a + params$p.b) / 2, - params$p.s / (2 * p.v), log = T)) - - # Stability check - if (is.na(mloglik) || is.nan(mloglik) || mloglik == -Inf) { - mloglik = -10000 - } - - logpost <- mloglik + log_prior - - # Stability check for final log-posterior - if (logpost == -Inf) { - logpost = -10000 - } - - return(list(crit = logpost, coefs = mod$coefficients)) -} - - -# Rcpp implementation of the function -cppFunction(depends = "RcppArmadillo", code = ' -double compute_log_posterior(NumericVector residuals, int p, int n, double r, double p_a, double p_b, double p_s, NumericVector complexity_oc) { - // Compute R^2 - double RSS = sum(residuals * residuals); - double TSS = sum((residuals - mean(residuals)) * (residuals - mean(residuals))); - double R2 = 1 - (RSS / TSS); - - // Compute log prior complexity term - double log_prior_complexity = log(r) * sum(complexity_oc); - - // Compute p.v - double p_v = (n + 1.0) / (p + 1.0); - - // Compute marginal log likelihood - double mloglik = (-0.5 * p * log(p_v) - - 0.5 * (n - 1) * log(1 - (1 - 1 / p_v) * R2) - + R::lbeta((p_a + p) / 2.0, p_b / 2.0) - - R::lbeta(p_a / 2.0, p_b / 2.0) - + log(R::pbeta(R2 / (p_v - (p_v - 1) * R2), p_b / 2.0, (n - 1) / 2.0, 1, 0)) - - R::pbeta(p_s / (2.0 * p_v), p_b / 2.0, (p_a + p_b) / 2.0, 1, 1)); - - // Stability check - if (std::isnan(mloglik) || std::isinf(mloglik)) { - mloglik = -10000; - } - - double logpost = mloglik + log_prior_complexity + n; - - if (std::isinf(logpost)) { - logpost = -10000; - } - - return logpost; -}') - - - -estimate_logic_tcch_rcpp <- function(y, x, model, complex, params) { - - if (length(params) == 0) - params <- list(r = 1 / nrow(x)) - - # Fit the model using fastglm - suppressWarnings({ - mod <- fastglm(as.matrix(x[, model]), y, family = gaussian()) - }) - - # Call the Rcpp function for log-posterior computation - logpost <- compute_log_posterior( - residuals = mod$residuals, - p = mod$rank, - n = params$n, - r = params$r, - p_a = params$p.a, - p_b = params$p.b, - p_s = params$p.s, - complexity_oc = complex$oc - ) - - return(list(crit = logpost, coefs = mod$coefficients)) -} - -# Generate test data -set.seed(42) -n <- 100 -p <- 5 -X <- matrix(rnorm(n * p), n, p) -beta <- c(1, -1, 0.5, 0, 0) -y <- X %*% beta + rnorm(n) - -params <- list(n = n, p.a = 1, p.b = 1, p.s = 0.1, r = 0.01) # Prior hyperparameters -model <- c(1, 2, 3) # Assume we select variables 1, 2, 3 -complex <- list(oc = c(1, 1, 1)) # Example complexity for selected model - -# Convert for Rcpp function -model_vec <- as.integer(model) # Convert to unsigned integer vector for Rcpp -complex_vec <- as.numeric(complex$oc) - -# Run both implementations -result_r <- estimate.logic.tcch.general(y, X, model, complex, params) -result_rcpp <- estimate_logic_tcch_rcpp(y, X, model, complex, params) - -# Check if results match -print("Checking results...") -print(all.equal(result_r$crit, result_rcpp$crit, tolerance = 1e-6)) -print(all.equal(result_r$coefs, as.numeric(result_rcpp$coefs), tolerance = 1e-6)) - -# Benchmarking -bench <- microbenchmark( - R = estimate.logic.tcch.general(y, X, model, complex, params), - Rcpp = estimate_logic_tcch_rcpp(y, X, model_vec, complex_vec, params$r, params$n, params$p.a, params$p.b, params$p.s), - times = 100 -) - -# Print benchmark results -print(bench) - -# Calculate speedup -speedup <- median(bench$time[bench$expr == "R"]) / median(bench$time[bench$expr == "Rcpp"]) -print(paste("Speedup: ", round(speedup, 2), "x")) - - - -?BAS::bayesglm.fit(x = X, y = y>mean(y),family = binomial(),coefprior = aic.prior) - - -library(BAS) - -set.seed(42) -n <- 100 -p <- 5 -X <- matrix(rnorm(n * p), n, p) -beta <- c(1, -1, 0.5, 0, 0) -y <- X %*% beta + rnorm(n) - -data <- data.frame(y = y>mean(y), x = X) - - -suppressWarnings({mod <- bas.glm(y ~ 1+x.1, data = data, betaprior = CCH(alpha = 0.5, beta = as.numeric(nrow(data)), s = 0), method = "deterministic", family = binomial(), modelprior = uniform(), n.models = 2, initprobs = 'eplogp', laplace = T)}) -mod$logmarg - - -result <- tryCatch({ -result <- .Call(BAS:::C_glm_deterministic, - y = as.numeric(y>mean(y)), X = cbind(1,X[,1]), - Roffset = as.numeric(rep(0, length(y))), - Rweights = as.numeric(rep(1, length(y))), - Rprobinit = as.numeric(c(1,0.99)), - Rmodeldim = as.integer(c(0,0)), - modelprior = uniform(), - betaprior = CCH(alpha = 0.5, beta = as.numeric(nrow(data)), s = 0), - family = binomial(), - Rcontrol = glm.control(), - Rlaplace = as.integer(T)) -lm = result$logmarg -print(lm) -}, error = function(e) { - warning("Error in C call: ", e$message) - list(logmarg = NA) -}) - -for (i in 1:1500) { - result <- .Call(BAS:::C_glm_deterministic, - y = as.numeric(y > mean(y)), - X = cbind(1, X), - Roffset = as.numeric(rep(0, length(y))), - Rweights = as.numeric(rep(1, length(y))), - Rprobinit = as.numeric(c(1, 0.99)), - Rmodeldim = as.integer(0), - modelprior = BAS::uniform(), - betaprior = betaprior, - family = family, - Rcontrol = control, - Rlaplace = as.integer(FALSE)) - print(result$logmarg) -} - -data <- data.frame(y = y, x = X) -mod <- bas.lm(y ~ 1+x.1, data = data, prior = "ZS-null", alpha = 4, method = "deterministic", modelprior = uniform(), n.models = 2, initprobs = 'eplogp') -mod$logmarg - - -result <- .Call(BAS:::C_glm_deterministic, - y = as.numeric(y>mean(y)), X = cbind(1,X[,1]), - Roffset = as.numeric(rep(0, length(y))), - Rweights = as.numeric(rep(1, length(y))), - Rprobinit = as.numeric(c(1,0.99)), - Rmodeldim = as.integer(c(0,0)), - modelprior = uniform(), - betaprior = CCH(alpha = 0.5, beta = as.numeric(nrow(data)), s = 0), - family = binomial(), - Rcontrol = glm.control(), - Rlaplace = as.integer(T)) - -result <- .Call(BAS:::C_deterministic, - y = y, - X = cbind(1,X[,1]), - as.numeric(rep(1, length(y))), - as.numeric(c(1,0.99)), - as.integer(c(0,0)), - incint = as.integer(TRUE), - alpha = as.numeric(4), - method = as.integer(0), - modelprior = uniform(), - Rpivot = TRUE, - Rtol = 1e-7 - ) -result$logmarg - -method.num <- switch( - prior, - "g-prior" = 0, - "hyper-g" = 1, - "EB-local" = 2, - "BIC" = 3, - "ZS-null" = 4, - "ZS-full" = 5, - "hyper-g-laplace" = 6, - "AIC" = 7, - "EB-global" = 2, - "hyper-g-n" = 8, - "JZS" = 9 -) \ No newline at end of file diff --git a/R_script/oldvers/synthetic_gaussian_data.csv b/R_script/oldvers/synthetic_gaussian_data.csv deleted file mode 100644 index c9a4f0c..0000000 --- a/R_script/oldvers/synthetic_gaussian_data.csv +++ /dev/null @@ -1,48 +0,0 @@ -X1,X2,X3,X4,X5,Y -0.4967141530112327,-0.13826430117118466,0.6476885381006925,1.5230298564080254,-0.23415337472333597,0.4288730369987499 --0.23413695694918055,1.5792128155073915,0.7674347291529088,-0.4694743859349521,0.5425600435859647,-1.4417490630610725 --0.46341769281246226,-0.46572975357025687,0.24196227156603412,-1.913280244657798,-1.7249178325130328,1.5408190135238018 --0.5622875292409727,-1.0128311203344238,0.3142473325952739,-0.9080240755212109,-1.4123037013352915,2.236874253556987 -1.465648768921554,-0.22577630048653566,0.06752820468792384,-1.4247481862134568,-0.5443827245251827,2.757722204323676 -0.11092258970986608,-1.1509935774223028,0.37569801834567196,-0.600638689918805,-0.2916937497932768,3.5866875225449184 --0.6017066122293969,1.8522781845089378,-0.013497224737933921,-1.0577109289559004,0.822544912103189,-1.7814283691741235 --1.2208436499710222,0.2088635950047554,-1.9596701238797756,-1.3281860488984305,0.19686123586912352,0.4191701822203946 -0.7384665799954104,0.1713682811899705,-0.11564828238824053,-0.3011036955892888,-1.4785219903674274,0.21499003036051567 --0.7198442083947086,-0.4606387709597875,1.0571222262189157,0.3436182895684614,-1.763040155362734,-0.016136705356076468 -0.324083969394795,-0.38508228041631654,-0.6769220003059587,0.6116762888408679,1.030999522495951,2.2070453676729884 -0.9312801191161986,-0.8392175232226385,-0.3092123758512146,0.33126343140356396,0.9755451271223592,3.9363850721912477 --0.47917423784528995,-0.18565897666381712,-1.1063349740060282,-1.1962066240806708,0.812525822394198,2.562420375354225 -1.356240028570823,-0.07201012158033385,1.0035328978920242,0.36163602504763415,-0.6451197546051243,1.5066892713929099 -0.36139560550841393,1.5380365664659692,-0.03582603910995154,1.5646436558140062,-2.6197451040897444,-5.312882766840247 -0.8219025043752238,0.08704706823817121,-0.2990073504658674,0.0917607765355023,-1.9875689146008928,-0.5144337487656874 --0.21967188783751193,0.3571125715117464,1.477894044741516,-0.5182702182736474,-0.8084936028931876,0.19359008863520835 --0.5017570435845365,0.9154021177020741,0.32875110965968446,-0.5297602037670388,0.5132674331133561,-0.39324862858882215 -0.09707754934804039,0.9686449905328892,-0.7020530938773524,-0.3276621465977682,-0.39210815313215763,-1.104027489184881 --1.4635149481321186,0.29612027706457605,0.26105527217988933,0.00511345664246089,-0.23458713337514692,-0.6046513704872727 --1.4153707420504142,-0.42064532276535904,-0.3427145165267695,-0.8022772692216189,-0.16128571166600914,1.191493593205777 -0.4040508568145384,1.8861859012105302,0.17457781283183896,0.25755039072276437,-0.07444591576616721,-2.7709651627312986 --1.9187712152990415,-0.026513875449216878,0.06023020994102644,2.463242112485286,-0.19236096478112252,-2.2696629342119454 -0.30154734233361247,-0.03471176970524331,-1.168678037619532,1.1428228145150205,0.7519330326867741,0.82577716502127 -0.7910319470430469,-0.9093874547947389,1.4027943109360992,-1.4018510627922809,0.5868570938002703,5.007372306331659 -2.1904556258099785,-0.9905363251306883,-0.5662977296027719,0.09965136508764122,-0.5034756541161992,3.669054455635204 --1.5506634310661327,0.06856297480602733,-1.0623037137261049,0.4735924306351816,-0.9194242342338032,-1.25383975343152 -1.5499344050175394,-0.7832532923362371,-0.3220615162056756,0.8135172173696698,-1.2308643164339552,1.778064146771105 -0.22745993460412942,1.307142754282428,-1.6074832345612275,0.1846338585323042,0.25988279424842353,-1.747341344407948 -0.7818228717773104,-1.236950710878082,-1.3204566130842763,0.5219415656168976,0.29698467323318606,3.6752289493140053 -0.25049285034587654,0.3464482094969757,-0.6800247215784908,0.23225369716100355,0.29307247329868125,0.5706413376611352 --0.7143514180263678,1.8657745111447566,0.4738329209117875,-1.1913034972026486,0.6565536086338297,-1.9384816827002367 --0.9746816702273214,0.787084603742452,1.158595579007404,-0.8206823183517105,0.9633761292443218,0.4848440948657714 -0.4127809269364983,0.82206015999449,1.8967929826539474,-0.2453881160028705,-0.7537361643574896,-0.5285461135761962 --0.8895144296255233,-0.8158102849654383,-0.0771017094141042,0.3411519748166439,0.27669079933001905,1.985815923067319 -0.8271832490360238,0.01300189187790702,1.4535340771573169,-0.2646568332379561,2.720169166589619,4.30917374326559 -0.6256673477650062,-0.8571575564162826,-1.0708924980611123,0.4824724152431853,-0.2234627853258509,2.4429413056749008 -0.714000494092092,0.47323762457354485,-0.07282891265687277,-0.846793718068405,-1.5148472246858646,-0.3102657750810609 --0.4465149520670211,0.8563987943234723,0.21409374413020396,-1.245738778711988,0.173180925851182,0.0015531759809442412 -0.3853173797288368,-0.883857436201133,0.1537251059455279,0.058208718445999896,-1.142970297830623,2.011500976278653 -0.3577873603482833,0.5607845263682344,1.083051243175277,1.053802052034903,-1.377669367957091,-1.5673323288417744 --0.9378250399151228,0.5150352672086598,0.5137859509122088,0.5150476863060479,3.852731490654721,2.467830854350032 -0.570890510693167,1.135565640180599,0.9540017634932023,0.651391251305798,-0.3152692446403456,-1.3011324837202884 -0.7589692204932674,-0.7728252145375718,-0.23681860674000887,-0.48536354782910346,0.08187413938632256,3.2692681514162683 -2.3146585666735087,-1.867265192591748,0.6862601903745135,-1.6127158711896517,-0.47193186578943347,7.2550269496210715 -1.088950596967366,0.06428001909546277,-1.0777447779293061,-0.7153037092599682,0.6795977489346758,2.2166614380546554 --0.7303666317171367,0.21645858958197486,0.045571839903813784,-0.6516003476058171,2.1439440893253257,2.3507554477205126 From b858173432a2741cc7d495e6cf701b7f1dda74ea Mon Sep 17 00:00:00 2001 From: aliaksah Date: Fri, 21 Nov 2025 12:40:25 +0100 Subject: [PATCH 03/15] commit changes --- NAMESPACE | 2 +- man/fitted.fbms_predict.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 6e27645..73f2986 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,6 +6,7 @@ S3method(coef,gmjmcmc) S3method(coef,gmjmcmc_merged) S3method(coef,mjmcmc) S3method(coef,mjmcmc_parallel) +S3method(fitted,fbms_predict) S3method(get.best.model,gmjmcmc) S3method(get.best.model,gmjmcmc_merged) S3method(get.best.model,mjmcmc) @@ -50,7 +51,6 @@ export(erf) export(exp_dbl) export(fbms) export(fbms.mlik.master) -export(fitted.fbms_predict) export(gaussian.loglik) export(gelu) export(gen.params.gmjmcmc) diff --git a/man/fitted.fbms_predict.Rd b/man/fitted.fbms_predict.Rd index dac0ade..a0e78c1 100644 --- a/man/fitted.fbms_predict.Rd +++ b/man/fitted.fbms_predict.Rd @@ -4,7 +4,7 @@ \alias{fitted.fbms_predict} \title{Access Fitted Values} \usage{ -fitted.fbms_predict(object, ...) +\method{fitted}{fbms_predict}(object, ...) } \arguments{ \item{object}{Object of class "fbms_predict".} From fbe8b57352d150f83b37826cad170aabad491c7a Mon Sep 17 00:00:00 2001 From: aliaksah Date: Tue, 25 Nov 2025 11:06:55 +0100 Subject: [PATCH 04/15] script updated --- R_script/JSS_Script.R | 2 +- R_script/JSS_script_v2.R | 554 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 555 insertions(+), 1 deletion(-) create mode 100644 R_script/JSS_script_v2.R diff --git a/R_script/JSS_Script.R b/R_script/JSS_Script.R index 17705a7..e9f2f0b 100644 --- a/R_script/JSS_Script.R +++ b/R_script/JSS_Script.R @@ -4,7 +4,7 @@ # # Kepler Example with the most recent database update # -# Basic introduction of the FBMS packagea +# Basic introduction of the FBMS package # ################################################## diff --git a/R_script/JSS_script_v2.R b/R_script/JSS_script_v2.R new file mode 100644 index 0000000..db04ab4 --- /dev/null +++ b/R_script/JSS_script_v2.R @@ -0,0 +1,554 @@ +############################################################### +# FBMS Reproducibility Script (JSS Submission) +# ------------------------------------------------------------- +# This script reproduces examples in: +# +# FBMS: Flexible Bayesian Model Selection and Model Averaging +# +# It installs the correct package versions and runs the two +# main examples used in the article. +# +# The script uses minimal, readable checks suitable for JSS: +# - Mandatory packages are installed if missing +# - Optional packages are installed if possible; otherwise skipped +# - FBMS is always installed from a dedicated GitHub branch "jss_v2" +############################################################### + + + +############################################################### +# 0. Helper: Install a mandatory package (stop if fails) +############################################################### + +install_mandatory <- function(pkg) { + if (!requireNamespace(pkg, quietly = TRUE)) { + message("Installing mandatory package: ", pkg) + tryCatch( + install.packages(pkg), + error = function(e) { + stop("Failed to install mandatory package ", pkg, call. = FALSE) + } + ) + } +} + +############################################################### +# 1. Install mandatory packages +############################################################### + +mandatory_pkgs <- c("devtools", "parallel", "tictoc", "lme4","cAIC4") + +for (p in mandatory_pkgs) install_mandatory(p) + +library(devtools) + + +############################################################### +# 2. Install FBMS (always from GitHub to enforce correct version) +############################################################### + +message("Installing FBMS from GitHub (branch jss_v2)...") +install_github("jonlachmann/FBMS@jss_v2", + force = TRUE, build_vignettes = FALSE) + +library(FBMS) +library(tictoc) + +#################################################################### +# 3. Install optional packages (continue even if installation fails, +# which may happen for INLA as it is not on CRAN). +# INLA is not central but is only used for a custom implementation +# of marginal likelihood computations to show how to extend FBMS +################################################################### + +optional_pkgs <- c("RTMB", "INLA") + +# Optional: RTMB (CRAN) +if (!requireNamespace("RTMB", quietly = TRUE)) { + message("Trying to install optional package RTMB...") + try(install.packages("RTMB"), silent = TRUE) +} + +# Optional: INLA (not on CRAN) +if (!requireNamespace("INLA", quietly = TRUE)) { + message("Trying to install optional package INLA...") + + # Try to load the installer (only if previously installed) + if (!requireNamespace("INLA", quietly = TRUE)) { + tryCatch( + { + install.packages("INLA",repos=c(getOption("repos"),INLA="https://inla.r-inla-download.org/R/stable"), dep=TRUE) + }, + error = function(e) { + message("INLA could not be installed; continuing without it.") + } + ) + } +} + + + +################################################################ +################################################################ +# +# EXAMPLE 1: EXOPLANET DATA +# +# Section 3 of the article +# +################################################################ +################################################################ + +library(FBMS) +data(exoplanet) + +train.indx <- 1:500 +df.train = exoplanet[train.indx, ] +df.test = exoplanet[-train.indx, ] + +to3 <- function(x) x^3 +transforms <- c("sigmoid", "sin_deg", "exp_dbl", "p0", "troot", "to3") + + +############################################################### +# Example 1.1 — Default single-thread GMJMCMC (Section 3) +############################################################### +set.seed(123) + +result.default <- fbms( + formula = semimajoraxis ~ 1 + ., + data = df.train, + method = "gmjmcmc", + transforms = transforms +) + + +############################################################### +# Example 1.2 — Alternative priors +############################################################### + +set.seed(234) +result.BIC <- fbms( + formula = semimajoraxis ~ 1 + ., + data = df.train, + method = "gmjmcmc", + transforms = transforms, + beta_prior = list(type = "Jeffreys-BIC", Var = "unknown") +) + +set.seed(345) +result.EB <- fbms( + formula = semimajoraxis ~ 1 + ., + data = df.train, + method = "gmjmcmc", + transforms = transforms, + beta_prior = list(type = "EB-global", a = 1) +) + + +############################################################### +# Example 1.3 — Longer single-thread run +############################################################### +set.seed(123) + +result.P50 <- fbms( + data = df.train, + method = "gmjmcmc", + transforms = transforms, + P = 50, N = 1000, N.final = 5000 +) + + +############################################################### +# Example 1.4 — Parallel GMJMCMC +############################################################### +set.seed(123) + +result.parallel <- fbms( + data = df.train, + method = "gmjmcmc.parallel", + transforms = transforms, + runs = 40, + cores = parallel::detectCores() - 1, + P = 25 +) + + +############################################################### +# Example 1.5 — Summaries and plotting +############################################################### + +summary(result.default) +summary(result.default, pop = "all", labels = paste0("x",1:length(df.train[,-1]))) + + +summary(result.P50) +summary(result.P50, pop = "best", labels = paste0("x",1:length(df.train[,-1]))) +summary(result.P50, pop = "last", labels = paste0("x",1:length(df.train[,-1]))) +summary(result.P50, pop = "last", tol = 0.01, labels = paste0("x",1:length(df.train[,-1]))) +summary(result.P50, pop = "all") + +summary(result.parallel) +library(tictoc) +tic() +summary(result.parallel, tol = 0.01, pop = "all",data = df.train) +toc() + + + +plot(result.default) +plot(result.P50) +plot(result.parallel) + + + +############################################################### +# Example 1.6 — Prediction +############################################################### +preds <- predict(result.default, df.test[,-1]) +str(aggr(preds)) + +rmse.default <- sqrt(mean((predmean(preds) - df.test$semimajoraxis)^2)) +plot(predmean(preds), df.test$semimajoraxis) + + +############################### + +preds.P50 = predict(result.P50, df.test[,-1]) +rmse.P50 <- sqrt(mean((predmean(preds.P50) - df.test$semimajoraxis)^2)) +plot(predmean(preds.P50), df.test$semimajoraxis) + + +############################### + + +preds.multi <- predict(result.parallel , df.test[,-1], link = function(x) x) +rmse.parallel <- sqrt(mean((predmean(preds.multi) - df.test$semimajoraxis)^2)) +plot(predmean(preds.multi), df.test$semimajoraxis) + + +round(c(rmse.default, rmse.P50, rmse.parallel),2) + + +############################################################### +# Example 1.7 — Best model & MPM +############################################################### + +best.default <- get.best.model(result.default) +mpm.default <- get.mpm.model(result.default, + y = df.train$semimajoraxis, + x = df.train[,-1]) + +sqrt(mean((predict(best.default, df.test[,-1]) - + df.test$semimajoraxis)^2)) + +sqrt(mean((predict(mpm.default, df.test[,-1]) - + df.test$semimajoraxis)^2)) + +################################################################ +################################################################ +# +# EXAMPLE 2: MIXED MODELS WITH FRACTIONAL POLYNOMIALS +# +# Section 4 of the article +# +################################################################ +################################################################ + +rm(list = ls()) +library(FBMS) + + + +############################################################### +# 2.0 Load Zambia data (requires cAIC4) +############################################################### +if (!requireNamespace("cAIC4", quietly = TRUE)) { + stop("Optional package 'cAIC4' is required for Example 2. Please install it.") +} + +data(Zambia, package = "cAIC4") +df <- as.data.frame(sapply(Zambia[1:5],scale)) + + +transforms <- c("p0","p2","p3","p05","pm05","pm1","pm2", + "p0p0","p0p05","p0p1","p0p2","p0p3", + "p0p05","p0pm05","p0pm1","p0pm2") + + +probs <- gen.probs.gmjmcmc(transforms) +probs$gen <- c(1/3,1/3,0,1/3) # Modifications and interactions! + +params <- gen.params.gmjmcmc(ncol(df) - 1) +params$feat$D <- 1 # Set depth of features to 1 (still allows for interactions) +params$feat$pop.max = 10 + + + +############################################################### +# 2.1 Define custom log-likelihoods for lme4, INLA, RTMB +############################################################### + +# lme4 version + +library(lme4) +mixed.model.loglik.lme4 <- function (y, x, model, complex, mlpost_params) +{ + + # logarithm of marginal likelihood (Laplace approximation) + if (sum(model) > 1) { + x.model = x[,model] + data <- data.frame(y, x = x.model[,-1], dr = mlpost_params$dr) + + mm <- lmer(as.formula(paste0("y ~ 1 +",paste0(names(data)[2:(dim(data)[2]-1)],collapse = "+"), "+ (1 | dr)")), data = data, REML = FALSE) + } else{ #model without fixed effects + data <- data.frame(y, dr = mlpost_params$dr) + mm <- lmer(as.formula(paste0("y ~ 1 + (1 | dr)")), data = data, REML = FALSE) + } + + mloglik <- as.numeric(logLik(mm)) - 0.5*log(length(y)) * (dim(data)[2] - 2) #Laplace approximation for beta prior + + # logarithm of model prior + if (length(mlpost_params$r) == 0) mlpost_params$r <- 1/dim(x)[1] # default value or parameter r + lp <- log_prior(mlpost_params, complex) + + + return(list(crit = mloglik + lp, coefs = fixef(mm))) +} + +# --------------------------------------------------------------- +# INLA version (only used if INLA is properly installed) +mixed.model.loglik.inla <- function (y, x, model, complex, mlpost_params) +{ + if(sum(model)>1) + { + data1 = data.frame(y, as.matrix(x[,model]), mlpost_params$dr) + formula1 = as.formula(paste0(names(data1)[1],"~",paste0(names(data1)[3:(dim(data1)[2]-1)],collapse = "+"),"+ f(mlpost_params.dr,model = \"iid\")")) + } else + { + data1 = data.frame(y, mlpost_params$dr) + formula1 = as.formula(paste0(names(data1)[1],"~","1 + f(mlpost_params.dr,model = \"iid\")")) + } + + #to make sure inla is not stuck + inla.setOption(inla.timeout=30) + inla.setOption(num.threads=mlpost_params$INLA.num.threads) + + mod<-NULL + #importance with error handling for unstable libraries that one does not trust 100% + tryCatch({ + mod <- inla(family = "gaussian",silent = 1L,safe = F, data = data1,formula = formula1) + }, error = function(e) { + + # Handle the error by setting result to NULL + mod <- NULL + + # You can also print a message or log the error if needed + cat("An error occurred:", conditionMessage(e), "\n") + }) + + # logarithm of model prior + if (length(mlpost_params$r) == 0) mlpost_params$r <- 1/dim(x)[1] # default value or parameter r + lp <- log_prior(mlpost_params, complex) + + if(length(mod)<3||length(mod$mlik[1])==0) { + return(list(crit = -10000 + lp,coefs = rep(0,dim(data1)[2]-2))) + } else { + mloglik <- mod$mlik[1] + return(list(crit = mloglik + lp, coefs = mod$summary.fixed$mode)) + } +} + +# --------------------------------------------------------------- +# RTMB version (only used if RTMB is properly installed) +mixed.model.loglik.rtmb <- function (y, x, model, complex, mlpost_params) +{ + z = model.matrix(y~mlpost_params$dr) #Design matrix for random effect + + msize = sum(model) + #Set up and estimate model + dat = list(y = y, xm = x[,model], z = z) + par = list(logsd_eps = 0, + logsd_dr = 0, + beta = rep(0,msize), + u = rep(0,mlpost_params$nr_dr)) + + nll = function(par){ + getAll(par,dat) + sd_eps = exp(logsd_eps) + sd_dr = exp(logsd_dr) + + nll = 0 + #-log likelihood random effect + nll = nll - sum(dnorm(u, 0, sd_dr, log = TRUE)) + mu = as.vector(as.matrix(xm)%*%beta) + z%*%u + nll <- nll - sum(dnorm(y, mu, sd_eps, log = TRUE)) + + return(nll) + } + obj <- MakeADFun(nll , par, random = "u", silent = T ) + opt <- nlminb ( obj$par , obj$fn , obj$gr, control = list(iter.max = 10)) + + # logarithm of model prior + if (length(mlpost_params$r) == 0) mlpost_params$r <- 1/dim(x)[1] # default value or parameter r + lp <- log_prior(mlpost_params, complex) + + mloglik <- -opt$objective - 0.5*log(dim(x)[1])*msize + return(list(crit = mloglik + lp, coefs = opt$par[-(1:2)])) +} + + +############################################################### +# 2.2 Small demonstration run for runtime comparisons +############################################################### + +set.seed(3052024) + +library(tictoc) + +tic() +result1a <- fbms( + formula = z ~ 1+., data = df, + transforms = transforms, + method = "gmjmcmc", P = 3, N = 30, + probs = gen.probs.gmjmcmc(transforms), + params = gen.params.gmjmcmc(ncol(df) - 1), + family = "custom", + loglik.pi = mixed.model.loglik.lme4, + model_prior = list(r = 1/nrow(df)), + extra_params = list(dr = droplevels(Zambia$dr)) +) +time.lme4 <- toc() + + +if (requireNamespace("INLA", quietly = TRUE)) { + library(INLA) + library(cAIC4) + + data(Zambia, package = "cAIC4") + df <- as.data.frame(sapply(Zambia[1:5],scale)) + + tic() + result1b <- fbms( + formula = z ~ 1+., data = df, + transforms = transforms, + method = "gmjmcmc", P = 3, N = 30, + family = "custom", + loglik.pi = mixed.model.loglik.inla, + model_prior = list(r = 1/nrow(df)), + extra_params = list(dr = droplevels(Zambia$dr), + INLA.num.threads = 4) + ) + time.inla <- toc() +} + + +if (requireNamespace("RTMB", quietly = TRUE)) { + library(RTMB) + + data(Zambia, package = "cAIC4") + df <- as.data.frame(sapply(Zambia[1:5],scale)) + + + tic() + result1c <- fbms( + formula = z ~ 1+., data = df, + transforms = transforms, + method = "gmjmcmc", P = 3, N = 30, + family = "custom", + loglik.pi = mixed.model.loglik.rtmb, + model_prior = list(r = 1/nrow(df)), + extra_params = list( + dr = droplevels(Zambia$dr), + nr_dr = sum(table(Zambia$dr) > 0) + ) + ) + time.rtmb <- toc() +} + + +############################################################### +# 2.3 Serious analysis with lme4 (Section 4). Runs within time +# constraints of JSS on Apple M1 Max 32 GB, but can be slower +# on older machines. Please, set run.long.mixed = FALSE, if the +# example exceeds reasonable time. +############################################################### + +# Specify if to run long chains under mixed effect models. +# Default is false as these chains an run longer than 20 minutes +# depending on the eequipment used. +run.long.mixed = TRUE + +if(run.long.mixed) +{ + probs <- gen.probs.gmjmcmc(transforms) + params <- gen.params.gmjmcmc(ncol(df) - 1) + params$feat$D <- 1 + params$feat$pop.max <- 10 + + + # No nonlinear features + result2a <- fbms( + formula = z ~ 1+., data = df, + N = 5000, + method = "mjmcmc.parallel", + runs = 40, + cores = parallel::detectCores() - 1, + family = "custom", + loglik.pi = mixed.model.loglik.lme4, + model_prior = list(r = 1/nrow(df)), + extra_params = list(dr = droplevels(Zambia$dr)) + ) + + summary(result2a, labels = names(df)[-1]) + plot(result2a) + + + # Fractional polynomials + result2b <- fbms( + formula = z ~ 1+., data = df, + transforms = transforms, probs = probs, params = params, + P = 25, N = 100, + method = "gmjmcmc.parallel", + runs = 40, + cores = parallel::detectCores() - 1, + family = "custom", + loglik.pi = mixed.model.loglik.lme4, + model_prior = list(r = 1/nrow(df)), + extra_params = list(dr = droplevels(Zambia$dr)) + ) + + summary(result2b, tol = 0.05, labels = names(df)[-1]) + + + # Non-linear projections + transforms.sigmoid <- c("sigmoid") + probs.sigmoid <- gen.probs.gmjmcmc(transforms.sigmoid) + probs.sigmoid$gen <- c(0, 0, 0.5, 0.5) + + params.sigmoid <- gen.params.gmjmcmc(ncol(df) - 1) + params.sigmoid$feat$pop.max <- 10 + + result2c <- fbms( + formula = z ~ 1+., data = df, + transforms = transforms.sigmoid, + probs = probs.sigmoid, + params = params.sigmoid, + P = 25, N = 100, + method = "gmjmcmc.parallel", + runs = 40, + cores = parallel::detectCores() - 1, + family = "custom", + loglik.pi = mixed.model.loglik.lme4, + model_prior = list(r = 1/nrow(df)), + extra_params = list(dr = droplevels(Zambia$dr)) + ) + + summary(result2c, tol = 0.05, labels = names(df)[-1]) + + + # Comparison + summary(result2a, labels = names(df)[-1]) + summary(result2b, labels = names(df)[-1]) + summary(result2c, labels = names(df)[-1]) +} + From d7bb465ca2de878207b01fc15e70613bf308b59b Mon Sep 17 00:00:00 2001 From: aliaksah Date: Tue, 25 Nov 2025 11:28:08 +0100 Subject: [PATCH 05/15] final script polished --- R_script/JSS_script_v2.R | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/R_script/JSS_script_v2.R b/R_script/JSS_script_v2.R index db04ab4..7202ce7 100644 --- a/R_script/JSS_script_v2.R +++ b/R_script/JSS_script_v2.R @@ -419,7 +419,7 @@ result1a <- fbms( ) time.lme4 <- toc() - +time.inla <- -1 if (requireNamespace("INLA", quietly = TRUE)) { library(INLA) library(cAIC4) @@ -441,7 +441,7 @@ if (requireNamespace("INLA", quietly = TRUE)) { time.inla <- toc() } - +time.rtmb <- -1 if (requireNamespace("RTMB", quietly = TRUE)) { library(RTMB) @@ -465,6 +465,8 @@ if (requireNamespace("RTMB", quietly = TRUE)) { time.rtmb <- toc() } +cat(c(time.lme4$callback_msg, time.inla$callback_msg, time.rtmb$callback_msg) +) ############################################################### # 2.3 Serious analysis with lme4 (Section 4). Runs within time @@ -475,7 +477,7 @@ if (requireNamespace("RTMB", quietly = TRUE)) { # Specify if to run long chains under mixed effect models. # Default is false as these chains an run longer than 20 minutes -# depending on the eequipment used. +# depending on the machines used. run.long.mixed = TRUE if(run.long.mixed) From 8158a0d9cf76a4d44ed9fb6e13e32e85f372c9b8 Mon Sep 17 00:00:00 2001 From: aliaksah Date: Tue, 25 Nov 2025 11:53:55 +0100 Subject: [PATCH 06/15] prediction file and extra script (old version) removed --- R_script/JSS_Script.R | 5 ++--- prediction.pdf | Bin 0 -> 27980 bytes 2 files changed, 2 insertions(+), 3 deletions(-) create mode 100644 prediction.pdf diff --git a/R_script/JSS_Script.R b/R_script/JSS_Script.R index e9f2f0b..9075736 100644 --- a/R_script/JSS_Script.R +++ b/R_script/JSS_Script.R @@ -9,11 +9,10 @@ ################################################## -#install.packages("FBMS") -#install.packages("devtools") +#install.packages("devtools") library(devtools) -install_github("jonlachmann/FBMS", force=T, build_vignettes=F) +install_github("jonlachmann/FBMS@jss_v2", force=T, build_vignettes=F) library(FBMS) diff --git a/prediction.pdf b/prediction.pdf new file mode 100644 index 0000000000000000000000000000000000000000..e3128a119b334307e99d1c530cc8f217ebf1b9d9 GIT binary patch literal 27980 zcmZ^~1yCGav^9z+1PksG+%32VcXtgA!5sn&1OfyJ6Ck*Ia0u=YJU9e*9o#1P0I$h+ z?|W76ufJ-ly61GCmUH&md+oJX)2U0#vT?EVyrTQG`DyFZ_@`-O_g7q0oK!9rA6|)w zyyB2@w>I~*cX5_B_q3*BlosOV^(t zceS&(^q^An@^tm`1iGoayI6TyTDw0V)b2^8Vi?b*2 z*MmynUv~~oYY!JMcT3=UJpaGzJ)iKu!=Y^LZ0l)9#m&R{R*;HA*51(*cm)nwM__cM ztu0-wte+p;!xI=Zr&qoih3!gJUc=7hv*Mi={8QgZZnaP4<|Hj7f-wg5BuIXe{ECaw zKhw1^LmV3)Pb0JZma!C1M)TF*d8Sl_9po`nzUfg7dg#-8xQnQIf-RYW9w1eIL5b(} z_f=dSvgDoK_Xn+yX~&11u!p&)^Gk5&!vf{gO6Q}VSvI zuF8>+*~Q!XOccv4IpBy~hin>HTL#8i)LUkU!5vILBo`^(d3fih%j$1Byt~d@gTsWD#Rk4u#?|=Iahyu(Q_@l3lYQ5%lW!Jt&0Oghl#now#$8y7| zD}~Eiwg5Q0ei(X_+bpsYl^r4g7UzFymvyZM;{>|0Wd&8m!}BPJCEwJ}i9dz=(VoA3 z%E-7PYiS?H=+xfPAFs4Ko^M70hQ%8Tl5OIz0QHNNgK_SPotQX$d+w;{m@R()*@a^F z+AqPRG_vu~y*%CA<1wQ)Z)5&uf4Sd(O&)rrbKWD61vf_(I%#za8@taK9T>|zO$q7< z6dTXoh*`$kIRPfWzNmg2dW74@gc=lLiG*#a$)`+{K)vn z55DajXS%yR{2**T7D02;Ij+w?xB;V4AjBBx2S5F^?NG*LYoY7Kvh461PYvcBe9vP5 zOo7Ki`$?N^tnew+JEjS0sN}N>jVV28yEu#r-)e$#4m?=%fFSirry}?4olu)-VV8!v zOuc)^K%1G@355SRcu(-|=EmnR&avR5P! z%qp;$XYz=bxREx*mnU2StD#@8KA>|Qz05mJHKBCr)QEXN=o1HIz}aoW)e&~XvE&_I zFPS#J;N6F1d6H~v_TK5D{PSPYn*|Wl!|lM#qF3lW15_~b1MxV%ZdvBv`Vq(uWCS$* zGdp7M5F*HvkyKlG{Y%`w!AaUE>*261dKL4@xl)UL%BIZu0~pnZxOQn5U{jJ z1~s|g>q-&#&a@K{`-m&>wCtVCLs^n^V?JV3Z2F%W^f9?ZMI&zmdBPy{j4ce~9xlcH zKSe%bo(_FT%bIj0yQbY8_5|f`nApLlMD5r5{?`B|YcH6lj*=?94`T|owxO*fl*RI3 zEi1rI1EN@43l9@dA`*Vxw5!I|Y~tCgKAX3Oldahr{>*BgIfz#OW_XNge)A3L(YbZQ z1|G(MQM_x%QD+#R53V?HJ^ueOQG=RME%)THC?|h+3E|J$zN_h$pdJ&$9%2k&l`nlu z3HpI$Pf-HcW)}YShkrf{(rVGwLB5D@xnE%T7gh8z!_vhtt`gQJm;fw3;k*rBCB0oT zhQvt`h7@2FOlo3hsSxer1Dug1#EoQh;XE4=*iiW2*WA|wj?AB|Vr2mF9dN4?zp-RL zqnO==vdsvt<0ON6z8^HRckx&_*W(X=7la->f+jonvX3CM`#WMwC3Dc_{09%NmHY*$ z%7cesAjs70bUYuY!bYn@_Q^`*v}3wqfX{7Jg+lJhxm<~TqGS8&K~t__gsZde=q~=U zegB;kD5_}&RP7KGzLg0Rn&!zhl$SbOx}h6h?ijAPI^a?0Xfi7GqbpuxOUxQ_f^0z^ zgCFknzw#wvcnb!dU`@V92elX}-!nz?WSay4!?VJs;t-u=VZd<pq0EU;X9Krd`T_H8sF?TEyVP@NHtOwx{Wkp*n*c_ud*9AU~VTW8Rj_K=RSyE!d)MU8KE2~iIPJ8~=~_6gF>r}^?MS3`SIQv5 z(vPKKwqa#6XW?&O!*OFqX4id`jR=M5MJ?AuY{h>ui;#l8c4pWOE5ROXy4>QXeSN1F zbD=WqP5jSePYP9!eI>@+)GiKndFTy++7W_1XTsx85bg7fvvcukfe|%48WNB}oOZr{ zG9E^kWE}L;JSKhp10aeXCk1{dcIQ1QI6ZQ+rB-k%@f>58OAGvCmcEBaR{I1Mh63Wk z18ykqA2XWsx`|+(6zPP{%L4pqH{^};XTWM!PJi4Xfib#qje52`V5sej{>&{kNUY8- zT^gSAju{o##F%sqh?m39n$}c+W;D||vfRxtcL>Qmk2j<>Z}DJY@hSG!Zy9GP;KTUn zdzaB>G7r7K6}&g65+1~j`+85@dw(0h?LxiTfQM|gz^lM8WS$WFaF*XYESA~cB znB_)%9+N(8-2Z8lcx$;mk>@c6@=<(ibDZ)YN~plZ+z}h)gN+{5NHy^T;Q~;c7q93d8OT zj6`1hDEK_7w`WjBPlSY9ZWViA@eNXsTBd)Y?El`xTcL87b?|YB&?kBYJV#ggT0EDP3J%5-`wqYt2bLl`i zCa&r9ymr6eq*D580*G?Eh0uMd_#dJb8bb3kd-S-mzY4sMTg2?_Z`zO}dEEWl-TeGl ze5%7m9Ovk-xr-jQc+8I6qG1AMNpf<4TjO8K^V3QbF25ko$CaSoPKT%ljV`L~vzqT9G+O8!B#Cu==bgXN zZUdXj6Au*wv&l>uYEXTBSih@_%?;>51Fg zbt2svfS07d847RM?c)PB*nYLItQa9xdS6pLs<=3B=oc?dF77aymGukDgzYVn6$pkl z_A-+V8)?_i5k*H|FI{wh%I45e^TSm}BRTbNuE9LlxS$p{K3US1WSKz4IL#VhpsqUo=03 zZ&d=vWRFkg&9otUm%Hb!AEZm4r@au(`jRYWN2ty>j#vXx(yU`@ zxer351M9`|pBHey3{)1fDsDy8P-ADAqdu<-w5Qp_G15O|C1K0oK%FNd*Gzl{QhJMH zhhsg)no}-}p#8D*--J%_=3GK1pk=?GC8`b3o8xLI75-7%w;Ro>^q)|PE$L8LQj9w2p1potg}LdMW4@R9)V}>!&frp5)xql3@Upw~a^tg&XBzHC+L`#7 z4#RW@&kZXR**}P%*?ScO>55;BzdSMKU~*Xu#7o#Vj?U}VfDmHkYW?1|QtAN1BY%;y3xx&{v1`jD?6bOpD zn{JO=zrUzcfZliji{oI))}exvy5&l#;|qFcoytgV1cq!-@hyA5(o!8OYNbj4VovK! zxsyAl=!o494R%BIDZUr^{g?766STLCZu#OtoRU*SE=^x41_tIVHPG$?!)e1$&;xUY zN6p*J(dQI5?o$hHMfTZ1L|PU%+*$SXL6ZSP*QJ~D<4&YHWEAPW98m$r8Kk3^rdHnd zWr`Sm^yxQlK@E86tVMp$5yQna)sS@LsC0+UY}v0U4(~b)s~7Vq*{E7Jdh>f9^o+x+ z*$L8vjWHnScay>;)mwE&9B6dBx@C3gLI3m{NwHB6ixKv zY>-ev39yZQB8S@S)b{*UH`n27hdB}VP}>7M@q@By5x*}BT@CF3{HY+{Hhgzq31C+= zHI!1-B7g3qtR`=vfRmQu&Hk}_x!Pk8q4sou)1xnoG!TIj7{7` zqfAn^Rt_(KoDD|Gif~8n(K~d!W}=kqu{HF)?=$yxJ{CI#o8$!%W#NQJtOOe1D4%?Q^A)GS@#W zUY9~D@nECoN#%;=-zhzU7M+9S=uIQt=cn@*h4vW21=BkVy#VrTeZdY1iPi!LiM%f> z0^$DxxH7Yrp-+JhiC)ppMzUOs>6ZR|tt;qNrnr-RTOxXQb0Sj6?(Gh+beelw|3mWV z#vteKlXYnO8`Mk$%sl$qC5nb%;s7hel5MuFddP4fSmj{j@QW;WA^ zJ8B$vF~`=D`i+>e9ASw}OaMWD#3SIsIFIg{Sn&MYW$y}r@G$tQSt{J;WWzCcB1w@M z($i6FDD?e73~WCqxGP*&Hd|!knbY0W8Xx6nEwrPHz;Nq_z|dJXan6|uU~r~;HsRkp z#$h#H@%dz9`hvfGkRnRfhv%6$2~k2xjTi)f$1$?X-O6`wj9nA;&%OfjG^nfp*k&Km z(SDWuBED5Mkio}G4Hj}QDO0Eg)AUu7DO|yALP4XTX^uvLB|Vce5Ar9ahk-#fE3sh~ zEQL?EPJ?(|_}AALt*}8P@OEY96zn;dSW|MyTCO*{Xl9NIQ5UpXr!HmoqdYAK*}H6bS3E%8?B1tvS5 zHdTE<6KX8fJSDz8W0&*Trj;F8k0Ip5mp}T(oE4%sV`ytGna*)bU4i4ziSy1uptGU9 zR(ZLcZ1{*JL;MZqGhh-~`6hq{A;hT%x{O>SIJZyHCS;hsy_qcdIEBy;bz0-f4&a{Q z(NDnvyS28B44XFjb(Vx8$1 zU&H$$W{-H>qNZ}gm+QR+HCC@H(#&K%GkjGL#BNd$!HwjCOg}UwDx>CT*U_47;=j>H z6Th?AqdpnQtYpE+I$;#N!m&KDU4GRw7IMfFv7fk<**tfdHTtW78pNO&`vTW}_}}RJ zeMBMK?n@YmBjT;V!)G6Na<-H+%)RV+ zKS*6ssDR6y-$QG&YO}gIcnO)3ZAV5>))_wF$<-2}eO)t> z>M8)-eRME1QvNP@PnX>~@~c zaZ!L3gQBjFut;$et0Zw~7AbaE(hrl8{_b&DreSm;i+fomysHq6gE0UaaP82ysCkGL zRPJP}1ZR<4kGlhYN#8iy`P*rV5soS?)hJ^riE{-v{E*`*9Z9UuM;~mmUZ@2^JYXf! z4h~K@HU$*`8|KG3)Ytv7q50$MhVE`uh5l{8V>nby*Xx${_k&xDe}xbi2+9XD{*5HV zx~-MZWr;*lS6AqmILS%&oI5E^P~_>t zvb6X4k4@~DAK-nVOguJI<2wntjyK}Se!rxPbfO9oa4b|`>WE9=2b_eBQaOQ=F6 zignNKrH*bGp$31BQ8D4!7P3}Gti^ouIfFDw04JM#>w2>OTBcAWd2#s*Pa_4L#K3Ss zd_WD0>6_elMsfQs6gY+=q#tgML})$jt5%E7Rc(Ktonej%d=88;dxiRDU9=8214xp* z@e#5WdF|0brUeo26sOVP`!z&S;T}Vx`1|bnLaTbF>9406Iqr}ED5>1~fdXJzkZV4k zDQ#?Jz7lH5)Sif@;)*hT@^MFoKdHlJ5_I+YPVpDf1)|4#t3hS}fI{!g zpzUuvUrP4Fid%glU%hYJRch$4JL9BWC9*ldkA+$!UuEnY4S1LZvrvygS0pK}knAa{ z^`%QZU0>_;+mzRaJ@;sxNvp>dQxvWC4bx=d{xmcAx$CnE&}9bzWw3S^^3pZcKgM&k z?Fi(Oo4$mchc}Y(zf_!J9=J#%9HRs`0W=r|1yc)#X7?2ykS4L68j^~DulOHD3jdKW;MSccq`Jr%B|(`+~?zyR*a=MuIT@e4pBi0(9i_Fl+T zF<}7%#`c=>vrSUS=PKse1Hv`+v&y&?%HLWy8NmJ4%~W=;vCL}%dJQpg1>s=)G6_4; zfid3;;C|FfP;U}yP3WJrX2PpC)_Ln|gS8x@v#+=ue<-GHsp~5?{ShN@B(T<39zN+- zqp(53H^G%OH0Y^9>_l5l>!}I&91Yd7E6gn~jm-AOT0cjz&uUb9PMB~1Bf{j+T+0Xi zOe1b*I;I(>6_E~tao;yUG6NsWO-J7s;uA&bKfTV(*+GBiS$lsYOAC+ebF8K;m_b|X zRTbXd$KNyPt93b2scPZst0}z~g|D3y@TsPN6XQGIN|&h4dYATgpNU2vTwH=%J^=p_ z$=W?6Tcr6)!RKrKB5@F0kR1-;yN|2!AhVmi+a=*q8`gTc+;D|(ZubHaj)<4!Sq)36 z7i}&7VtiH&ej%A!o5E^65op*lr|sE#Nea~T7~ z&}kc{zo;PQv~H-Ieto4KrdpKZLs86;CG^L;NIQhCtA-nyf(^%{dak}@8VQz9$8T0h z8I%cClE&4rMMC#;@O@?B-oY#51{C;sj6fZIFV?NkC%o3vjv-staoDViaOKZXxADz- zrfS@CRP1ZE?Wo=vs*rzMsxaJz`35f0-yKyFIY)BRwQZU-iipk;0YL@GGK8+fXHlk8 z4}6CpK4=vtVDByVldCxgnYtyKH5h)J9rCxPEuM&)RybD)V=T^o`IOSlE07No4PVfNg zO03z3@^GE{n07A%Yl}MjP1$%88xY98IDAb~WFsq{nw90OUqoi)F&GS%tUgp&n<%Xr zXtnzpppoMFQx4O^Z@ZLQ)PK5)Z1ybd*l0yMh_o3@878H zhZhJZ6Ww7>wZ`eLj>gIv`uVhDuL^A^u@*P~jjDnpl?+*<9j)19JnKm*Ce35;`Ip7O zMx{h#TP-HjjT6QEM{6E@X-Nj6#PP?namb$o{WAW{75ghM4fN-9DND54$t6KK0=ZlG zW~ejl&&}~;1x>Vp1|x0jbq`lIxib_s8>rA-N&Ai=llV2nuOt>3iEai&&lXKRoGOWZ zf}4d<1#nXvNE~JB8*JeEehelr>y{L)4hXcjX|YI^kKJy`-4S53RqE?~jFbt<^cE4G z^}|ZUyZ7=s%a!XbJB2+D=uL1;RqG+GE^ao{T6sGU0lxqppLY#h1ChS zsH{GK+qvyicj^v)!Uxbm+vFA6-K95HHmG)dqK_+V zjJ_~%vOAXK+wLt6^^UQOLfeYTttTKu{&m_!Ob(E$*2dVcUt<{5r(4=z15Q+bP%pfH z2`vx}?dUiCXpKAdnXu`H&!?M<-@F>kF8sz=y!P@k^tR_j7xnlvKXRYq-g@=CmGz!p zv;mO^2Up%Ktlbh%OKTBfY^G`#`1e#9ME!GX)OvJ>)G5RM-emuD>CI#3( z(I26@KiNZ?)A*p?W<}wFsWQ;sbu*@6dc2!a}1gF{G?U_DWW10V(gk|Z8d@DZn>+5lkM9Z_$l?WAC7ZPYF z7DE5ny|I*GQ>xnqjk=}hne4(r?2Lz7y0yU{dZr2SI#XBJB?{hs8-4R-$mr!4B+B^< zpO+9S_vnUc<2%gCLrpjfxh&GL7J9(2#*Ph1k#C7Y;w_vC*t#7(vDCJQ(Y_U5u zL>c0y7|(9!BA#`ZTWDQ@P&~$O{B6HG?X(SwHwNf)v-EEHsscO_+qBxZVdM$nU^*l+ z4pm)=OWooHBzw&HK)m6N2L#U*Pf*X=knCQIZ3TD4LoSeLRgWM#lPBHUnWsK){oowB zxaC-4s7V?}s_w^u#@8fCeuw6%FP86SDo&DaQ#fvaG4Q4EUK~S>N~UvYUlYg^lO4`E z1Nce&1(mL)mBFk+zIZ#q6d9aF$*+9@P7Fr`eRdNbgAT^*aGL|-hd&`2Vi@f^vVAJ& zF`)TsM&1fl4^$Y=SuosQ_sOTsmR^o-M9fv;;1|o!r8<7B*NNCF6cj7VBi{Y(aK86n z=n*$5Cbww-i&DE5v8c}EiFuE3=TvCN{j&f#F>mPNA=n=+*mqt5?#pR(Vvv!m3;gRf zVThcoOK?T%W-@D+O0cg!`SoXHBDnJru%P)RylE9m=QfjQ)(VZ} zVvXuy4Z4f@y6#s2S}YH!Pq%lzzm8OgQyUrnFzkrcyqzR)pkZ9?OUFBCJ&)W*I5s-T zejSr5no8O64v`YOiUG;ePL0>w3tL5kbC+J;b`ootne<$#CLrQH>%Zz$FwdUgtGSnN z1KIB34Bz1<N{{z|bP0i^2_UOSl_@JeWj zfpvA_;}^uQ{3&#z(}$;Huz@O1FzExSy@t_<^%-r0oOfXJd96!axEKj2)#To`N`Km zN)6O+;+${dBs&WzOTs8i63=J~O}y)NK?Oh!Jl}O<`MXh=Uu^~{)LJaAL8VDC#*bsX zq<)Mnr=6!V%c!sc|2}?~tPhKCV94bop3iH*MLSFBPn>YQ&%!HL40hWz%_2MGBg+uy z&jN6C>E3z(mjl+!fC-FKx7^3UiHY*C?ODXoHWb_}txETv-F!5b6#Fi?*>hg0Aq{@S zx^TADB)N3r8=5_44l8#1IA?OO2z*!nvtRjBnzJ)}X^3B!j9oXGpkcQbT)WZ4K|Et& z7h5acbMAIl^E>T_iH^05Vo+E!lp}QmP*sxb|a$Y5gmV>TTE#Iw1oJQvlOJ4i{ zKS*PZizq4R2gAppS}qZJl!7&x!G$S;JN++_XDBJ2l74=R1~6CL=}rD7#&{Xs4Y!qd zG9|vtTb(wi;%))GHYe>Wx|^Q>f1uYks(n8D%nu1Z2E)!xJ_*_NE~e}Y;d5~eseSp4 z6ciaCRzhHq~an5^B0QS=2SVmt?yZVvbtSx6rUzk-m+Uiuoqy$Gj=&n^o^ zHl0bLJl{Hb7fvT}tWK&8AgvS2*%-c|I@~Yo3OU}Tizm*c)bSyF$kjD&Ht>I&o16qH zKLj&h-!Q@7EqnA`oRd)TAOm&$`r)+Mq(#%K~@gvta0o`Dae-KWyz_Zw`#jr0NMMVZA>Z=VE-W=q~!GTFFop;-IG zvHg1qndCZvHv$w2$idD6RL*;Dc5Wr>ZJr?o_sQ#o%R&5~+rw5T*A+3wTzp%J7j}~1 zK^egJ7&C}Wtv3L~C<+}Www^Oo5l;jx46 zq*p!me6>zUJrT_kbQ=3Sy{Aa%*~$^dUr+IL zJIbzg^Wd7aU_||i^i_L$C5UTWzPJCGjw3qJB%h?6f-c63FtnnWb zH0#wOIiVAm?u|Pjp$mJ>RYPy9)?L=AN4r(tM#~ zVE)r0el=j`yV@6AI0U)-u~c1n87=~{&f+O~XBLwA#M1Wg&Zku@h2c;83^wSli(~@& zSDU_Y|A-uW1rh>7z);`t#N(vPM}{9$d@2E3qIac|5pJy6N z(acF@?$XC{yzkk2Gti6X$k6m!%dB_rjhtNzNtZk$ALV9oh07AI==Q{AW#rlruq8$m z-F`OAU)D?R8ITHTDui%3-!RgeN@*AO3Y{9{K8kmNpNq_-5cmOy37HFv-YG z#Pw__fb|Ao2A14QFv`8?2X1L71 z=f3;3>DS1H+uB4Nuum|M=p*g%Cr}XQCyt`-zkREATt(^ytEuz8&++TU!sA(YQL0r}X!%uSC%oP^Ivjp0 zemV}xa4+}F_1LhREJ&_E2GD5TG;pMv(&`B{Mcz*z`i?uWnb$9hBUi5B79N^bscp zn#iCc{s@EmwK;F?IWcW{c1=o5(=jbC&}{#%-|8km=aOI?qtmNm&QPk|6plqqwcL?6 zjM7GAOZ+*(hLN0rq)H9{&V6+ukV4&k>6X_=dZU%ZJgx{Z2?RTAwfUepY!cyJZdxWj6vuT2g z?hn+tWiTPEbFv={bijGWkAdF|QNAQ8o z(bsL0JY!&GYIVXtd6bYYNdT7tZ{@#jR zjB{=-$w(cq$UiVR{PBf=%@&jTG#g!tn}DrUp?4{F`(GVJK79w1N?@KIb0l(V8L`?? zm@7Kdz{|D#wf3B$CD(-gB(tPR8Y!t}hC*7c9}2=}xum6BnR+4XGlX$$SW^!}NOe}$ zJJHHv-4a$}`%Fs^A{^%Hqh?i)R0Pr?QTcZ=OLzrV05PfmDZ8ZpU9jklu+3RQhi!M+ z=6lgTZvha;6%6|n&7fZK>mka{muw>6O-G@RWPpwF1yVim{An11gZ*zLh<`-D@yW> z&6oV^y5%NV@Dlt3Q*lMJ^&NwGof9uSbdR^bTd=fIRY}=`WV)+ud9dWcV!_&_-hJHL_`H4?mVQC(X^n zn^ix=mD||n#J-Q|B;V05$GDD9wdxy2r|18WNx+Uy`2y)#H!6`<;X4U`U_Dyb9w@^D z^1LWy4$g)C3yXL61qh(g*!yDX)7wsS8*W?gG|aaJfh+7p#XBQX!*^EtzPfciJVNIb z6z^w!9LU`x`G${MUM7UJ;Uh%(d4sPLaxcbeR_}`Hdur|P{TsE6m}tA<8eUg_n7+lB z>qj|`75pb4)aWXc)mh{6aqY)nj|pt+CWdGI=%hyo5TffbeBubUpKYz8*8D!bWgmOf zgDtT23ZxkNT6^DTgl)&krD!H2pR>3pllxC3PqSjXE8AD{lkXl%36=uWnp;G_Gbnzq zDXWsm{nG*#zr1rP{dQb!M_3W?Tc^g5cdv!f>W~1#?lZQtDk4!GqZO{Q5nF-z8zVYT zlLGZEv3DOYfmuGa)$Y4&-$@f+R~iY3{h>%}r94Jc%B`v0H$C-IuUbLscr~B7@)TYY zC++FE(pG?qnDaaLa3Tv)ogTl?%!Z*7kgVGf8bCL_Cf^6BGUrnf>C%mA5L56>9h$Z= zPLhhKI0OzsD4ya(3p1py`Q+t4N~J3hWlcHni|W^Il1-sswmU82542~=O1ao2V34eQ z+hHn1*Qq!F`6}|`1|Zeeq^8wYO;xZI+A^HywdRmjln9P=kWhAZ&eS1mB8XW6l=|H$ zGvvED_U8+v&g3VNmN6P;O=S9)6*CVIc(a9q$tgKZlG|}98d`^jz;3Uj+)oEe6*NwIQV0Xs=cmua2*DcKYo}3;`Jd~eDYMDb#Nj#N`pkf;Ns(?u zUZEFlsG_{fnZJyV>|esuD1POS7V}ufT&OZSX|t~d;lJiFPkO%K;$8kae`y5Mj6qP~ z)F+L>%4w@X67Sm?UrMtMI*)tx{+O@%rzu@za*BV@d9GW74XQwcf>iX(ef0f4pR<1L zD3A<1?&iM9r(JDxi~E8cw8#VgO+j|ZewdLhyHbx83-%Wl6ldk9f=d93ylam3PEZ>; z11VIH;>AJp%RH?YNXvH$u7?i*Gjxs+sbqrNkw*=JvFUsJDQtydvbO$}Zb?|5O@4O% z@enyr*TOw2i@%JlC*pabc1yv(EWnKJZCd7T(swc8*j=MGHrV7MZ}UP~WM1f<`5Oe+ zcCtw{BuE-}L?F+@{#e-VQxLqjH-}dR|M`W$n!sbrSr(d^-~Q0IY?y%jg*A=>X4)rm zD944>g@kQ(SdvmK$ECKh9POu1zy@k5grC@bn13Pcv*p>UF*03WWRcZkdJ)!Ou&Uww zy#Z&jS@ygCE6I0@Mep&l_!R+NFe5+C99*3na$+^PKF#|G2kB-o@ER#eHU_v@DOBW$ zcW|8RgzcWAZ(vrRGYeCRMf#}CJDktC(ynU#wvu070uZLSy)@dVuPesuIU@AQ^nWvriLWf@T5ld@VORG&JZ zrjw#BnI|*ml6j+wSXC=kI^rpETNV3C3LeGJta_uu5D(+~kK>ns-LAhAneCDE!w@QZ zI0l97#UBA%bwF;nH>sApYx)%g-eaSb$T@8gucD)MD_xsMTFa*#&sO zu)y|<%DosK?MpqMK+KX~x#M2@KKIqqD0j=o_Ij@DftBMEkZ-bNcgEtm<1_eo#wJxJ zZYto9e4-}ucW~Z64fw|UU1qEA;Su|!Ih5yKjNkKQPT1<`wFiYb2IZJ^1@m_hkuLQ$ ze6(%mq5Az(GoXyKMemDMig*yh`gY54J!QDqRaczFuPQF*uNZ^=6AMVn#~l2~AELE= zY9lWU$(Gg@uH7&~$Q#nzyT=0*hdGTYswYv>m&CcMbBfcKLVvVWc=b+GIAe_-nX?OQ z)jPkF2b}EE--84%3Q_60;hfN3f4^GOz{c4($#%8NFjV#y8X;LU@gGULk1)@1AH7hE znx1<5BAPq}Ub}VsRPiqxeRfBkAAS=0u{u z9nWhYxogO`=n!ohd}hU5U@p)=J9q;Uk~`K#3PwVc=slH=v;R;lPy8pO1xeic!=}@f z0J%F+ZUx?pFx@5a$vHp5I1O{Fq>=q*@qG6yr!csS&1AUznFiw75rf={jP(I1_B!tX zwD0wsQ*SY9Nilc}LBZYImrLl#Kj!Q)%^72t9KU?qGQ*&B1(1UuaE596Gfo?~L;!bm zYNP+UrQg=6srXE^UQWhB?^^GjaupjgPY;Sg&G~}GhF7&C#R{3zLH~97>pv;%$!+TO zxhB`kliP~G0Bv-fd#$iPa2{J?mCPvTBUCqcaY}q`RChm!XoF)LX9=R;eXE__9S>Xv z&7-m{pgOL6XWNy#UzZcW9)j90%Bl>eZD zlNVR~c>=z0u6r-zca4J)__Dc;R176}A|KfSzcXRDH4ZzZd}8_EM5uyRpUai!=cGgVb-nSF4s2_Tzg_(rV*E|(x9W6O>^ z!QrpB+%w7te*e5>ey(`nAS(Ls8i0d+1!DNHz|3++gG0;FGo}})cf;DZ?>O!&+%e2{blcz8DrNf239tW;Ui})aX(w@*M~gk- zB6+GMNiPE`0ClwyZzY9g{egMCodMAR!GydF4QcZ-Q&|5RMi> zZ2vdc+EAdN_*c;PT0s=?K1?_QLuw^EAS7Op@>Q_$akcNzWy=k=?xb+?hk4BBjEY!> zW^GRi@G0bQ?X51V<`04A1_+K6#HaBwd%BapTHKtfRi*;Hc zhaZQU&#(d=5c0abwlyLY_ZvNPF)Z-gCIAEyyi10pe~_*LO}tB%3?VLSq8=(+mec3L zMY6-u^bB#AqYJ*!idYkj+ ziB!OF2q5WDK*>Z*xs>1-7j~2Ttk$3WxoA>7G8xbaWv|XIuKNX?Jaah$jG5U$1`>W;S;>1M` zHWnZPz_;ECn4}M;*8|utg9Ryvgf96rljQZ{X_ZvTH7+@SuC9%o_7rZdG)S?IH4axg z(gI1?>-K3U1go?z_mba67D)oddQsd?a_t8oSHi9m4*w>{$fpYd6D!M zNQ0q^c^fwcZ7Oh%i0<%i!R)}de0rW0Y^xD=*b>_b1eLGr~c6)NDt9VeC@`?X3En9yJaZ=a8hiWxtqw0Lcu($xINBb{4SeOUu zEIfobIOu2f=93Z}T7roCQhs7pW!}KL+OkylYUybv#Y=2Z66z-Ay3SDf^E(%TZ&UlX4xXw?70wKV7%gjH{ufn!{yj6 zBWH6bJGEYlioo|EH-ngOA)SFIk4QXl(xgp(0!d&;x3LcqzrsaSVB0jpBUvv-4IB|8 zY~&|VFq>8Ac+;H@>9P-lDmvcM8hu4q=n@f4Dtow2e6vtBo3q0gqFa?VR!H*|ceMtT zh`eA=DHl#l1^M=7;W?RkB;7{5K^>B9pTK@kKC68XF%)qD>94Ei_j?VfY3{PrX-B*P z%7Q^2^+o+&?d{thD@a}L(_RUm!l`x5Ftahb%GayxE7Pmf$kt)19yifrUz&Rkfv=BI zmQZuWwwjznNWOZ|_D~C)y#!sXb&zqz*x>|OovGjTit+gBjT%%+e}{F%?R(pac6M6n;U z(j_G+2qPrLnELis7my+Oe_Gkec)fl6i;2Wye5A+Vn zE)RL=fWrC2zLdKZR^#tSFO%?DWkR}_r7b-5gt%3f!VY}0avHbVf9#L73ZJyKn)y&* zV}u{#OA2&hc(O%#?u7Nt({iDuIwUuD{Ln7nv<-^zqj7(!I=BxjJ;cAMRLry#2H*g6 zs%#ytGZ2JU%NchHZlZ(vBH%gx-O;fkhz3Z^dvdk+d>w*0ZfimTUQ26ea0z+4IZ^b( z*EAh|?UQg@=}j(30jKAqJC81P8ZRUsy-51_@t4#Ca2HH|$UATPARw&)OkR{r>|H#E zzvuY*e7eYwCg?W?jCMl7snKybo-Zz(^l z%`F5d-IW?{64PAFZ7mD<(kEK3SSNdA!TWyrDi$R)m3lF+8qH(t9+A^w(fiifaN4hB zRtb<~ieIRx%VFirWCjl}fdg>)Sos5wfiIFXTtdPZjt4_DGddmy`4=#K#fUSVAarJX z_`!7OtZ$(oaxr?by@&H(pvN+Caeu>p!6k3g0HSy`Xb0Ogx8N5gUB?^I2pG z;UcZ~Jr-TE&Qo|>5aQG5IubZ1I{c{af1qVKUH}PTIu)`Wu5E9eTLl;KzI%5Cjpv{} z5kzcO5^l^uGE{uKlY*jL#T`#slCjIkvc!^-TGOt{UUE~2zWs*fgJL+{eki57rgHAb zZ}NMs(+o>6mv9qzjIRaw>uHXoSuP*2_;#Z%&mA3$`y883$Hcjw((RIOcYFPtPgMcD zqVQ@ldD+|=4dJiwZc;^kavEw*^TLk4?$rjYrT}35RA=Vl8YA!{cPV9Hv3hu0txbF- zt$F*lXrQ{{;24IZ)}n?IxLd;26)2xttk+NP3i6#L*01FEu~3;NJst_M&E)FSqt>Qb zeXi?gMa{Lv|I^l6Kt=g=?ZZQtN`v%((lK<0bc29&OG`6Dr?h||A}9^gC?(wuB3(m= zbcZwy@ZI=4zvp}2_x;a;#XXxd_c`}oYtGqqUHf7cMuio?04MaiDeTq~A;0-PA-*Fx zXinl(6(@{k=jIKFNs>mz-vAdGBg_eYt`xKNZnwE|iM-_Am9E>d4Pi$w@nop9Spk(> zu=AyvNASw)DQnd!Uk~QP8MyxFEsz0WDUBJI9P^$}T_H0;tXG&xfhcp%$lHB(PyFep zJkbzLo&HF%Aim135OUMGe*sEKvWutLU)Z`mzB(n*mo&14#pdDb4yl`mq0@0_Bi7h8 zt+;H5UK?R5SV!T^c)B`{B-sIgtXo9*w$+ez$L>mq0j9&vb2?qr=TUqfn*$5eVsMcT zOiVo&9XhjYbvzi@QSwD_Q#6ea;*B4ec~o6EdT$fsqy~X7Gx?l;gW6bgE7#aojUH?~ zr^c)Xolp`@QI;N-$Q6P&Pyq1;iEy(iL+--lYwUmqTGy|FLmcXmDw+kmDV5mHz>ex1 zhxV_>Y9@BpIKojs%fHOMsxdpGaI-Jj{j>+U@^j{K+xJb?*e4$*)-RJMEB}MSg5zPT zg9akDh@x?(d4-lWTgp0~I4sgckx)Xkyif?7643^gNjwSYyW&9?o}sy+~zHDmjaNfi&}eiKe{s7om6=``etKL_)M`LQ|E1s9(9Hnsq;+HAzXNkz%hy9K1qDw+XI03ya;WgQlt(ILX~|p8CbP-yA93G*T>ejV@s;%#-8F zv*vHP)(+dgV>=xBR5&aMbKROuVYQ-HYFk9_ZCK*^I8; zGICPHuAiaBX3{c(f$4Q)e{2ZfHg*so;%0oM&>T=}rn?^=zl29b1? z+Yg!3qxib&Cr!3peCF9X`|TQpnBF3jZJXYWGM%nY*l)%97{nGMF=nLdnNh!cuGMKj zBt>%9i31H|8l@9xr~^d?M(pEHR*!3cQ3Y?9>=Fllo1hQU7Zbhncq%WRF~}==SMyYa zW0l@?c}QOl{%BrkxpUTZ7{IJElB;j`P>(viiAG;dNL8(Pzrrw>+@<$tL-Pk|Eu6vK z{*7b1dSu*@pB$_2W$)W*l+1?jEHT7!g=v{Anb#{%5o@I37bn;WT}M0%EOQ4lIqJ6x z`6V&M%Z#vkHIcQs&DtYgOsL~5eIvW69lg}oRa2si$ zK29R;Z3*0X3FUyi>L>eiRXD0(@rJmAL99yoDz9!GZRsJtmgR>HkdT)5egB!5SD9|Y zUwc}v$BccD?^5L-#d(S(zd?qZ z{%Q8;CP6N{$1W6PaQhW$tkLVMJAp6q+vEL(h`{HlbU2klG%-O5|_`TWN8$-z+SA6BlLCAF?uOkaZU#Dc(W4*gpa3}Jf1xX?D^T> zQb#+Qw=DB0=bFcQ;BjG@8k9_yDnqq%h<5t}oTfw0jlgqGULy+r1l0~BX#UKkR&0hREtPU!=BwUWeW(FW_n z0hiG=9V8>}Lk9`7ogCW*U)Br4-MOnguF8yzS68XZA+1yp^s$==AjrFGY46BsxdCl^wkhCf8qXNo^Y~m@ zUr9B$qT*hi3lS>w6S~GSquGj`DG78SG=9qhj<#a6ysLE_QQm_~6Ut&>$b!5NxsrN) zY6>&6Xv@G2dsP3Z?9o0&VmSz_&Sl*TRZ?!&`z<92qg78^v4 zmE}-ZVW#3huXIG#dF$qI_yow0^x-G|uB=w*M8%Xe#Z@;CjmGK2mwhPr!FoG4jP0S{ zOAb@x4^g@Xq}UJd4jldfJR#A$YXhIX*|%##rG(({F^sEB{Pq7F_WQ&R0d~G+~`Uz6P*@@q+tp} z*FxLgp9d;nexK*sN-;LYdiw@LPwS?YHGrA+<^DbBs`;VdP>H$L@`J9D(15$>s>riE z#I<*fjh{{Kr<@!pK5#yHi0`SU@XS!7GOzRSX6N(eeLB4T2*JKr=hq^|iJ5l43TMW~ z4B3*4?MYRrda$n>;w?&@)-bD_j+L!hUIa2m|NaboZ88pwJ~3QSLE8y=Vfg&DEKxUT zoeKjaJt_2Mcr{t9?SsD?B03+FKh|7tgNz*=8Gn%uf5zEBfWRDyAlQMUjnt}!$Z4Rp zse@E7OT^xisFlzc>5tn;r_IvohFSZfkC)(G-*j9cbG5{Mwt6|LGnaY$l**9e*we`kFoHw8);{d=|3wdhLrLN zB^|(pBxDt-j91g8T2yLWrWe}hLYeIXbU5i9mBKxFW^|#6Dd{d_qA5a8F>gI`vDRUm zECp%y5+I^iuF;ryZzl)jlIjA}>LeNc6e~ouF>*alb1F@h%==Z4+(Y zZqw=kGTl617_($N<9Dsb^w(6;n2uG@w|5iPPKX14*Bo?5Zc?rUARQ;uxW0y zvVR0sZl-3Odu2wf@7@ZD%6v;^RNPH?&7nR@qHIlaxKcEB{g>aUWzCxTu~L2G9eo<^ zVt&7pUDnw|M}?R+Dowcu*88}GqsYwL1uJgU7g>Os9IzyU*+&$2|(4&!jfE| z@#>2ZWd2Us8Pc%`{@$@P#?8g&ZwFnv)h`76kP>+1WgYyX-*0XJ1`scv`DI3?$vNAuA#w#f00V_tN;Lx9+WJ?=iO@P;;AN5 zwB7l`GeoCmi9M)B5Hn3^Ffoxzv8VhCgGs zDV)Le%P_0EyLk`o_=dCY-HkOV04&k+eakdVJzJ#~+L7>mf^cL}Bvv2H=OLQ;yS$$r4@mzI+o-cxyYU@Q66w?$)9>ICo8x%06vJ)e=fmmZrY2M}F8jC!pu_M#=~ zwgYsvQ^T5a8)Z9A4mNePe_R62kFxHHYeaU`jZQh~2viH8f9uz7g7Q=M0ZRT!u1(1W z2k+>&gA&pbj>C0Ng<46+l?E#*e;%HMx9Eq(`ZJ?~+u77}d-swj{A>heDL|GlAoNxz zcLAX2@R`QF!{b+vtePvW?nhSR%}QOlz`n^I2v;TO!d=y!#(lXy38;LYSO*L%$Rz&K z7&u_C__e)XpHjXhwl{ckk9ZR2779*K2M}Ns7 zluKH;oY|g;o;!Dw&0zHUX{k-6&{G2tViv_%O&bsirt<;;08c|D*#xNO^u-UNIuh*4 z+0D(BnW{#R1bXto`PWg^Ji=t_5Mr1c^?2x~i(CBE1U? zq47i`xIop3@V1y`Xk-1o%8_7k5H(W`pOLwaFX1tRG=9UGJ^6@U6EBZypG&|tM26OM zntZX`Yy5UXASupG+^c{lq(4x!Sm#X$muEL&Y8o;fj65cFjzpO`T9-E{a8JqgbS1B{ z8l8-#`4#IW(prDa*=$49B!V!yOw`MGAL-a2Np_tdyb2^4_SliHfPVZknODU1j!3!M{3p}q`CjMz7JMBfJJ`o) zCyM@aV=dmTxr%=>&97xG+LGf_wRbl7*MV@`(dU&}Wh+l7_4c@`s0=FGwW&v0wn+SOt##yRD;rjSOb3FUs!Tn zP+7Ic^V&dCX+jxZa8Er77>hSEuOBoh!)8U}in0mTEZ-P@YNZT9PXQ`-XgF`l)b*g+ zo;6Wi8VBwOGF-?6rHVf58NI2fg-D}~9;5PNw<0(S-Tt-fcbmO>3izvZ-p`(r$hszf zh^TA&M1VrYjN{Q+Gb9p20SpQHcb&DrgNTFtC7rs6)`rAUE3w+JFXiLyD`cT-IM{O@ zopIc`3E1Vqvq1D#C1NM}*TPfCmFPNoP~0E|Gx8%hq&lLa33_mBsXZoBP9R*Eunj&b zm^CFrSc$jLG^S2@$E4(q^fuIX8`z#88OK@FH&jr4MlHwVb6PLiHz0`EmENjPty&G> z9>tZby*x7LK3q%k*7D)D`rtkmk{Lzy!RaQ4YW$_r6`#UwiH8bOADZtQs=RNe)ed+P zcO>NnmqthyA7{DHa;f%;l4iLE|9pgGR9wYLO+^foiQ{!d4Mh@$m8$EF7U_*!HTgsX zF~AmoW)`lZ-Of$Q261!Q0!uwq*R6K-dRdU3L55+d^_FzKY+_$;xm@Y}tg)l7npKXl z40{2PUsCslp6fV`u5g6p{RvK3P7&?&ZC$%@4T+h0{PmS~zJ38k?p>W^@e)``yLsiJ zK{9FytRTOj)Eck=NFFv#oK}!ho@fb`tQx4}wFhPsG|XcA2suPFY}9h+{R(3%h^>*N zVv;3*5ykxX9>~iG3T%B{YAU_d8ILE4)8~VcWOszYFQhw@6Hc_YJ)4YIpFNI%2JMh$ z^cn(yiM=&jN7sw2wQm>_vg{Z^)Jn36J9E~C86FO7gQNo>1HAlery^q}maE9O)GY`p!*%%+@jMh?57*Yy%w zQ@3QRrr|cC580J?^Q}yz6AWY0851mo?;Yr#0}Igzlz2b7dw+u?OpQYh6pM`=t*efzgJ~8)k0(KMR5Ptl~V$*{9P}0Ntu4T5GD48`oAYIxZ zng1_dkW)+EM*j#0yCG8N++G2ut^RtU8XJtQ#1c)Og7)eD2tiUdlqSmVE54aAM5!DN z;^8O@k|;Hr5dU-oPloKt?t9@p>qK&)@2-+5ABNOi}7AOSgwta!R7eSgNM zpaI9v{*e7_;is_D%y~L=SeTCJ$K|$@q}{7`-*W)U++8Pq1FeTKA(=|~CPD{VR(?PG z!_@s*47oPzp2?#K9q_Ez4b%Mn^9(m9a*4H<3Qk+BTnuPT57<14;hsSs70hsdhXhf1y)QvCnxf%j zPV-#0Ht$3>7B8rH)aJQ%d#EZ8ZerV8i981cU3=$mS}b0IqLzw)f*{iRgGdzbdQLVW zOG`h9z_!t_j|ZIv2*K8Sf0}Av9h<@7CV%7Bx*J}+9Bqgn zeGEY-zt_#}2g+|)HwT`y#+>LLuc{)Cpma4lYeSKpQ@0x*1>I~agm>3nKM5F<&qk^% zMWy*g_Ge0G9fR4E{>r|>qlg(S=i9A*&+Qu>D-GAzro1#yLsSE1WHhlM&!u7`#UZ7+ zZ71lhrG{hi5+u8dymP4(_JJ}H)ueQo*P zJG_q9rtOGiC{q~eFE>oBx#Kzsk~V@f){AnAyfNmBe93z&ayng}CT15Ku^ z9=YSqXcO|+iNYYJm0pTaZSh22d15@{NxV;&g>=Z=<;ZjZF(7r*u}S4aJQ1eBjOF4nLt zpS4$HC~hK%*6U#J<3f^1XX%2NgV}$`HHG!B+ISI8eRzmjUh(v_Xs;QJ$S5xbh_Jr0 zSrrU$EY%hW|^9H@!$c0w1$@nsgUaH`(uGLikZ z?c}-msRi^n$lf-dXKStr$WazphpHNMuFAUv$cj@1kiw&u3x||j?)UKhzOzTbZSB(CwR6j_sij+;*1MIH*1*e>l&D+h0AHgZo&1ZsiPoMr3j6W< z)3dV(n%yt#(E1rS;|RI*5@^a@)}S&I@ePXFIL+xhZlybJ2 zZ{P|(;`kyEu|IrwajxBZw8={0rNV5@g=?rjBOD<_4c_-4M? z9?fpzJEBZ|gVH(SLHqRaQw&D~9?dagOUAb&5I67Im!U+!MEdZMg8Zwub>N z->ux!?N^RVq;6#1_T9^_@U!7>X5J6p%kC`pLiG?Q64$K?V#Q(c_eG}ZWz)_ma2$0c z)n3k}wZJjltnK@&FpI7G3;XGmdbi^Ge44Af!}~uMffu(Ifj4pYi*kX8i@LJQ@mQb)E_<^5Z z6Ko9d*gLxeeDqvkh^m#Pow<~=7ub*!_#Yo9@58V0e^J^$P}2WHK@TwJTWDFqz=jZj zyC0ar$_r+U4Uq%r?7^a<5IJrz*TYTz65>N-y9wvdvSb5l4 zSUubnq6QG{L;j)5=M&=pf3oF2tm!{&`RPfXue4@JB8Hmx!{@VHqa zo`Z!83s0%-1Bi}a=9P<#MtnXdH`8)Zj&ytlT~rv7M#U~FnaqT%3lT+-F9@YM~=xI|1l=sxpPy*?cm=;fD~%|DkjMr5RYf?~R9^tI}J zUut+&hd7mn==6iRbrPU|*bM#7ldW)tPPjuw$@-x~D^^Eq^Ng$Sg$P<=;?2lNH!&~k zxmy@4LSTyFFM6EUD#-lQ_(4n`=Q%c@T9yv%79W|ogV|TTLCXq#rblBCy@6rEe%$V% z0Rs+#=LL~u3`Sbv?-K~4e-96a*@it=6 z3<8S!(c=EZ-<7(-&qT9ph0TWb+x)zdGE$~JGi2-VI`c`Ow5i{)X3N6iy1Ca$g5XCO zm$Zq1leJI2OxMq$6_=|=8<`Q8G#Gd8)Fk+b=Pi+tPqH%o`6^(Qq-w*Tgpp{;QsFNy zGgB^cJMVI5>XY8hUgD1==^^LQlD`O`1~IYVgRy)qSw{~2aeCxYt_Gt=lfR_j3(;5<{UmuF7;7Of!(B(9F|?P0FhJ%7gJfTz?7X1ZLLqEJt!cxr1K}>CAWJeuqPm)K zEe8{QK#~EI5(QOfJ^d71V#ZjA?w*Cu8^Z7M6!z9)nO6{l<88kg>O7Jr=aY;e<|FKZ z7kHszKkhFDXA^(o0JzAQQjX&C)ESEDqnvw%#(_VuF!3 zfF4WNW@*?GToe0XJmbX5Z#O8%cR;H&Qy0XH4JB>wJK`3k;Rvp6OLRf;!#fC7@BF-s zuZJ!1wg5i}4`29eG$%euhn!Tr*K;g<+HW5Sa*^#`%zmKGrKyinh%fkQNQ5pc%k@&_ z6V5Lh;;@ykN!dEt{5i!M7%$Tc#V52UnBHIm5Cl2Il_V z&Ti27V!>aZ~+R+{$s04IX~P0 zOo`Y`S|-96tkO^gDHec*?&FA@pmLqEm*jJTvq-frm#;x};d+XqGNS5~^eyxn3f9m0 zncqIAb)?;AOs89*YX&Fu$ywsMiR!B?N;xrP(k?N!YGkqKv7j;?5$Ui_Fu_=iG`#X7 zb$MT%zB2m$rA#l|OU6~L#Y`L(m9A8! zl%e$gmn$(dRW`*a%GUNN_qF-8`?ZKRySBP^%MQaN|0HX1+2lr{cj57*(!}hf+{9uL z&xHTP_(W}yXVKF53%f6Nq4sNU_C^K29mAeD#`aSlqrUTr))3Ob+n3`91?KiYq6K&cNNgL9y4l+sZ zcf=so0NO_5UNKe{c9}LR&4{af_30CCThlH9hCEMt_W!>c4Gz4EsCTIQ7=fS$K|TyrFsB+TJpC z7ZQ>a@-utTd~CFhYqSD>j2_*CZQ*F0?kOeWEmYzmym1PjY1iNLo4z6?%~wuQj_%L# z?m4+UA>VvozfH_^r%**w%JM3)7UfCH{=Ed*B>`SKA|_Xpvb0pQ*zkG1a3lC z_M~je!RB1)oY}c+Zn&|_cO+tka9wL3x}-ZSFjG6}`D=L5Ic$kJuqyDwJ^wugk^$0u z&_+f+4Kwx9tj%R&%Wr0=}Zdq zr-{YF#VNxId}>apfj8P3)L_;ZoP<_J*G$(>cPNuAqxjV)hd77uYif>ZuUGGQR9(>UGBXWt2wKwD+Q$2Z1eRlqV3|@g1@inR%N}TIoRNOX)TwR1l{y$^zsCw z@3`KvzdIDlPD39$9Sh2IN&%S%)oHQaQ=As3$m&Hyq+PEQB280bumLUc_l+L z{?Ds6l2L!}{ z&wi}3Dj&#p7}gok`3>Klo3Eu}(&ZS2(9BW)Xog}~$oBz|+s}JVFU@r|YNN}Qe<&M} z8O<15y{xX@@po4`Q+Thd>RV#HpFca*=-pVhcRW3=Q=qMDaN$&Db~xzMdOd`m6CUON z#b4-#nqN-fJaF*3p>A6 z9KJah)PAN$j(8qH9hiE#f7U4(;Tu;TTjg(aY3ztmos&zrW3(tVnK(wjAG=E}PVQAz z+nZOg>f3*KK{GC<12tOrn!)OL_mwbHZ6h;HJkfn;dwXr%qFLGJFzuZfv;@-ReB>}P zbF%3-oW6arC*i$#hN!?9B8@e%G~R1ib18zh1!S#a+|UT4Q=yAQP)0OBM2ru}G6eiq zqHYq$b(_mF8cAIC$9^K>fA4BcIWH>I(Kh!rUTx`Imu%KTordT2p``w`YZ+tBQ8y_U z6al=q6~;SYG4UNS;Uly55D|;{3PuU5`_q7?E!r#d?$9BblV@}Cy$t@05DAk@FN7jI zA-SMig7x0$W?*f`v(*d1%}Ff6cV~2p7%xjTEo_~>A3Lx*giW8`>XMaNn|yB42!t-P z?>gQ$e(@yasXweZWR>_5uyytH^!T74Mm;kC3XHTmS!ex^_;IPEfml1lSN6J8Nqzpdr*l$0tKDFCWCw&dD7L7UGAvx;w+H9IUKi z*j&845KCvEhZPiR2Zq?VnR@^>uDON#gQ;s_=VsyVXzgI-1%|-v94rBmqq&8fvlAHd z+Rf@=An-|Z3k#sf6gI@t4wwpR2L(eooSb2nR@PvM+uuc60G7XlgZY2SZQY%0%-!4_ z9n9TfV2HDgvy+uQFq`?`J3}8VcjNzRj`i2p|HlyjS9h$xo&ZG4{Wa`wv#bXp;9{tm zI|54ryo0}b`PUc)D+doNn4N_=Hsl|@fFCsa-_5kV+^npzIl)|9*qr})f%*CPc=*89 z;Qz?DftFzpFR;^pWL*4!AM`(D+`PPiXY+q#f`C8tzsB(i0n7MbGJYPQ5!nBbadL8V z{Xb+v|69f_z{CB2<`Cc${I9jZ+|2D9tlWT(XJCkyosSjpykLl;leIJ8ojvFSqUr1m f13z>k`>Pu$%-jw3S2NrK{DNHA^z^doa@hYL=bEG_ literal 0 HcmV?d00001 From 90667425ae35c79ecd855d505a5a6fd62cabebee Mon Sep 17 00:00:00 2001 From: aliaksah Date: Tue, 25 Nov 2025 11:54:16 +0100 Subject: [PATCH 07/15] prediction pdf and extra script removed --- R_script/JSS_Script.R | 512 ------------------------------------------ prediction.pdf | Bin 27980 -> 0 bytes 2 files changed, 512 deletions(-) delete mode 100644 R_script/JSS_Script.R delete mode 100644 prediction.pdf diff --git a/R_script/JSS_Script.R b/R_script/JSS_Script.R deleted file mode 100644 index 9075736..0000000 --- a/R_script/JSS_Script.R +++ /dev/null @@ -1,512 +0,0 @@ -################################################# -# -# Example 1 (Section 3): -# -# Kepler Example with the most recent database update -# -# Basic introduction of the FBMS package -# -################################################## - - - -#install.packages("devtools") -library(devtools) -install_github("jonlachmann/FBMS@jss_v2", force=T, build_vignettes=F) - -library(FBMS) - -data(exoplanet) - -train.indx <- 1:500 -df.train = exoplanet[train.indx, ] -df.test = exoplanet[-train.indx, ] - - -to3 <- function(x) x^3 -transforms <- c("sigmoid","sin_deg","exp_dbl","p0","troot","to3") - - -#################################################### -# -# single thread analysis (default values, Section 3.1) -# -#################################################### - - -set.seed(123) - -result.default <- fbms(formula = semimajoraxis ~ 1 + . , data = df.train, - method = "gmjmcmc", transforms = transforms) - - -#################################################### -# -# Choosing a different prior (more iterations, Section 3.3) -# -#################################################### - - -set.seed(234) - -result.BIC <- fbms(formula = semimajoraxis ~ 1 + . , data = df.train, - method = "gmjmcmc", transforms = transforms, - beta_prior = list(type = "Jeffreys-BIC", Var = "unknown")) - - -set.seed(345) - -result.EB <- fbms(formula = semimajoraxis ~ 1 + . , data = df.train, - method = "gmjmcmc", transforms = transforms, - beta_prior = list(type = "EB-global", a = 1)) - -#################################################### -# -# single thread analysis (more iterations, Section 3.4) -# -#################################################### - - -set.seed(123) - -result.P50 <- fbms(data = df.train, method = "gmjmcmc", transforms = transforms, - P = 50, N = 1000, N.final = 5000) - - -#################################################### -# -# multiple thread analysis (Section 3.5) -# -#################################################### - -set.seed(123) - -result.parallel <- fbms(data = df.train, method = "gmjmcmc.parallel", transforms = transforms, - runs = 40, cores = parallel::detectCores()-1, P = 25) - - -#################################################### -# -# Inspection of Results (Section 3.6) -# -#################################################### - -###################### -# summary - -summary(result.default) -summary(result.default, pop = "all", labels = paste0("x",1:length(df.train[,-1]))) - - -summary(result.P50) -summary(result.P50, pop = "best", labels = paste0("x",1:length(df.train[,-1]))) -summary(result.P50, pop = "last", labels = paste0("x",1:length(df.train[,-1]))) -summary(result.P50, pop = "last", tol = 0.01, labels = paste0("x",1:length(df.train[,-1]))) -summary(result.P50, pop = "all") - -summary(result.parallel) -library(tictoc) -tic() -summary(result.parallel, tol = 0.01, pop = "all",data = df.train) -toc() - - - - -###################### -# plot - -pdf("result.pdf") -plot(result.default) -dev.off() - -plot(result.default) - - - -pdf("result.P50.pdf") -plot(result.P50) -dev.off() - -plot(result.P50) - - - -pdf("result.parallel.pdf") -plot(result.parallel) -dev.off() - -plot(result.parallel) - - -#################################################### -# -# Prediction (Section 3.7) -# -#################################################### - - -#preds <- predict(result.default, df.test[,-1], link = function(x) x) -preds <- predict(result.default, df.test[,-1]) - -str(aggr(preds)) - - - -rmse.default <- sqrt(mean((predmean(preds) - df.test$semimajoraxis)^2)) - -pdf("prediction.pdf") -plot(predmean(preds), df.test$semimajoraxis) -dev.off() - -plot(predmean(preds), df.test$semimajoraxis) - - - - - - -############################### - - -#preds.P50 = predict(result.P50, df.test[,-1], link = function(x) x) -preds.P50 = predict(result.P50, df.test[,-1]) -rmse.P50 <- sqrt(mean((predmean(preds.P50) - df.test$semimajoraxis)^2)) - -pdf("prediction.P50.pdf") -plot(predmean(preds.P50), df.test$semimajoraxis) -dev.off() - -plot(predmean(preds.P50), df.test$semimajoraxis) - - - -############################### - - -preds.multi <- predict(result.parallel , df.test[,-1], link = function(x) x) -rmse.parallel <- sqrt(mean((predmean(preds.multi) - df.test$semimajoraxis)^2)) - -pdf("pred_parallel.pdf") -plot(predmean(preds.multi), df.test$semimajoraxis) -dev.off() - - -round(c(rmse.default, rmse.P50, rmse.parallel),2) - - -############################### - - -#Prediction based on the best model () or the MPM (Median Probability Model) - -get.best.model(result = result.default) -preds.best <- predict(get.best.model(result.default), df.test[, -1]) -sqrt(mean((preds.best - df.test$semimajoraxis)^2)) - -get.mpm.model(result = result.default, y = df.train$semimajoraxis, x = df.train[, -1]) -preds.mpm <- predict(get.mpm.model(result.default, y = df.train$semimajoraxis, x = df.train[, -1]), df.test[, -1]) -sqrt(mean((preds.mpm - df.test$semimajoraxis)^2)) - - - -get.best.model(result = result.parallel) -preds.best_parallel <- predict(get.best.model(result.parallel), df.test[, -1]) -sqrt(mean((preds.best_parallel - df.test$semimajoraxis)^2)) - - - - -# Coefficients of the best model - -coef(result.default) -coef(result.P50) -coef(result.parallel) - - -#################################################### -# -# Diagnostic plots (Section 3.8) -# -#################################################### - - -pdf("diagn_default.pdf") -diagn_plot(result.default, ylim = c(600,1500), FUN = max) -dev.off() -diagn_plot(result.default, ylim = c(600,1500), FUN = max) - - -pdf("diagn_long.pdf") -diagn_plot(result.P50, ylim = c(600,1500), FUN = max) -dev.off() -diagn_plot(result.P50, ylim = c(600,1500), FUN = max) - - -pdf("diagn_par.pdf") -diagn_plot(result.parallel, ylim = c(600,1500),FUN = max) -dev.off() - -diagn_plot(result.parallel, ylim = c(600,1500),FUN = max) - - - -####################################################### -# -# Example 2 (Section 4): -# -# Zambia data set from the cAIC4 package -# -# Linear Mixed Model with Fractional Polynomials and other non-linear features -# -# Custom function to compute Marginal Likelihood with lme4, INLA and RTMB -# -####################################################### - -rm(list = ls()) - -library(FBMS) - -#install.packages("INLA",repos=c(getOption("repos"),INLA="https://inla.r-inla-download.org/R/stable"), dep=TRUE) -#options(repos=c( inlabruorg = "https://inlabru-org.r-universe.dev", INLA = "https://inla.r-inla-download.org/R/testing", CRAN = "https://cran.rstudio.com") ) -#install.packages("fmesher") - -library(INLA) - -library(tictoc) -library(lme4) -library(RTMB) - -#install.packages("cAIC4") -library(cAIC4) - -data(Zambia, package = "cAIC4") -df <- as.data.frame(sapply(Zambia[1:5],scale)) - - -transforms <- c("p0","p2","p3","p05","pm05","pm1","pm2","p0p0","p0p05","p0p1","p0p2","p0p3","p0p05","p0pm05","p0pm1","p0pm2") -probs <- gen.probs.gmjmcmc(transforms) -probs$gen <- c(1/3,1/3,0,1/3) # Modifications and interactions! - -params <- gen.params.gmjmcmc(ncol(df) - 1) -params$feat$D <- 1 # Set depth of features to 1 (still allows for interactions) -params$feat$pop.max = 10 - - -# function to estimate log posterior with lme4 - -mixed.model.loglik.lme4 <- function (y, x, model, complex, mlpost_params) -{ - - # logarithm of marginal likelihood (Laplace approximation) - if (sum(model) > 1) { - x.model = x[,model] - data <- data.frame(y, x = x.model[,-1], dr = mlpost_params$dr) - - mm <- lmer(as.formula(paste0("y ~ 1 +",paste0(names(data)[2:(dim(data)[2]-1)],collapse = "+"), "+ (1 | dr)")), data = data, REML = FALSE) - } else{ #model without fixed effects - data <- data.frame(y, dr = mlpost_params$dr) - mm <- lmer(as.formula(paste0("y ~ 1 + (1 | dr)")), data = data, REML = FALSE) - } - - mloglik <- as.numeric(logLik(mm)) - 0.5*log(length(y)) * (dim(data)[2] - 2) #Laplace approximation for beta prior - - # logarithm of model prior - if (length(mlpost_params$r) == 0) mlpost_params$r <- 1/dim(x)[1] # default value or parameter r - lp <- log_prior(mlpost_params, complex) - - - return(list(crit = mloglik + lp, coefs = fixef(mm))) -} - - -# function to estimate log posterior with INLA - -mixed.model.loglik.inla <- function (y, x, model, complex, mlpost_params) -{ - if(sum(model)>1) - { - data1 = data.frame(y, as.matrix(x[,model]), mlpost_params$dr) - formula1 = as.formula(paste0(names(data1)[1],"~",paste0(names(data1)[3:(dim(data1)[2]-1)],collapse = "+"),"+ f(mlpost_params.dr,model = \"iid\")")) - } else - { - data1 = data.frame(y, mlpost_params$dr) - formula1 = as.formula(paste0(names(data1)[1],"~","1 + f(mlpost_params.dr,model = \"iid\")")) - } - - #to make sure inla is not stuck - inla.setOption(inla.timeout=30) - inla.setOption(num.threads=mlpost_params$INLA.num.threads) - - mod<-NULL - #importance with error handling for unstable libraries that one does not trust 100% - tryCatch({ - mod <- inla(family = "gaussian",silent = 1L,safe = F, data = data1,formula = formula1) - }, error = function(e) { - - # Handle the error by setting result to NULL - mod <- NULL - - # You can also print a message or log the error if needed - cat("An error occurred:", conditionMessage(e), "\n") - }) - - # logarithm of model prior - if (length(mlpost_params$r) == 0) mlpost_params$r <- 1/dim(x)[1] # default value or parameter r - lp <- log_prior(mlpost_params, complex) - - if(length(mod)<3||length(mod$mlik[1])==0) { - return(list(crit = -10000 + lp,coefs = rep(0,dim(data1)[2]-2))) - } else { - mloglik <- mod$mlik[1] - return(list(crit = mloglik + lp, coefs = mod$summary.fixed$mode)) - } -} - - -# function to estimate log posterior with RTMB - -mixed.model.loglik.rtmb <- function (y, x, model, complex, mlpost_params) -{ - z = model.matrix(y~mlpost_params$dr) #Design matrix for random effect - - msize = sum(model) - #Set up and estimate model - dat = list(y = y, xm = x[,model], z = z) - par = list(logsd_eps = 0, - logsd_dr = 0, - beta = rep(0,msize), - u = rep(0,mlpost_params$nr_dr)) - - nll = function(par){ - getAll(par,dat) - sd_eps = exp(logsd_eps) - sd_dr = exp(logsd_dr) - - nll = 0 - #-log likelihood random effect - nll = nll - sum(dnorm(u, 0, sd_dr, log = TRUE)) - mu = as.vector(as.matrix(xm)%*%beta) + z%*%u - nll <- nll - sum(dnorm(y, mu, sd_eps, log = TRUE)) - - return(nll) - } - obj <- MakeADFun(nll , par, random = "u", silent = T ) - opt <- nlminb ( obj$par , obj$fn , obj$gr, control = list(iter.max = 10)) - - # logarithm of model prior - if (length(mlpost_params$r) == 0) mlpost_params$r <- 1/dim(x)[1] # default value or parameter r - lp <- log_prior(mlpost_params, complex) - - mloglik <- -opt$objective - 0.5*log(dim(x)[1])*msize - return(list(crit = mloglik + lp, coefs = opt$par[-(1:2)])) -} - - - -###################### -# -# Compare runtime -# - -set.seed(03052024) - -tic() -result1a <- fbms(formula = z ~ 1+., data = df, transforms = transforms, - method = "gmjmcmc",probs = probs, params = params, P=3, N = 30, - family = "custom", loglik.pi = mixed.model.loglik.lme4, - model_prior = list(r = 1/dim(df)[1]), - extra_params = list(dr = droplevels(Zambia$dr))) -time.lme4 = toc() - - -tic() -result1b <- fbms(formula = z ~ 1+., data = df, transforms = transforms, - method = "gmjmcmc",probs = probs, params = params, P=3, N = 30, - family = "custom", loglik.pi = mixed.model.loglik.inla, - model_prior = list(r = 1/dim(df)[1]), - extra_params = list(dr = droplevels(Zambia$dr), - INLA.num.threads = 10)) -time.inla = toc() - -tic() -result1c <- fbms(formula = z ~ 1+., data = df, transforms = transforms, - method = "gmjmcmc",probs = probs, params = params, P=3, N = 30, - family = "custom", loglik.pi = mixed.model.loglik.rtmb, - model_prior = list(r = 1/dim(df)[1]), - extra_params = list(dr = droplevels(Zambia$dr), - nr_dr = sum((table(Zambia$dr))>0))) -time.rtmb = toc() - - -c(time.lme4$callback_msg, time.inla$callback_msg, time.rtmb$callback_msg) - - - -####################################################### -# -# Serious analysis with lme4 -# -# - - -# Analysis without non-linear features - - -result2a <- fbms(formula = z ~ 1+., data = df, N = 5000, - method = "mjmcmc.parallel", runs = 40, cores = parallel::detectCores()-1, - family = "custom", loglik.pi = mixed.model.loglik.lme4, - model_prior = list(r = 1/dim(df)[1]), - extra_params = list(dr = droplevels(Zambia$dr))) - -summary(result2a, labels = names(df)[-1]) - -plot(result2a) - - - -# Analysis with fractional polynomials -probs <- gen.probs.gmjmcmc(transforms) -probs$gen <- c(1/3,1/3,0,1/3) # Modifications and interactions! - -params <- gen.params.gmjmcmc(ncol(df) - 1) -params$feat$D <- 1 # Set depth of features to 1 (still allows for interactions) -params$feat$pop.max = 10 - -result2b <- fbms(formula = z ~ 1+., data = df, transforms = transforms, - probs = probs, params = params, P=25, N = 100, - method = "gmjmcmc.parallel", runs = 40, cores = parallel::detectCores()-1, - family = "custom", loglik.pi = mixed.model.loglik.lme4, - model_prior = list(r = 1/dim(df)[1]), - extra_params = list(dr = droplevels(Zambia$dr))) - -summary(result2b,tol = 0.05,labels=names(df)[-1]) - - - -# Analysis with non-linear projections -transforms <- c("sigmoid") -probs <- gen.probs.gmjmcmc(transforms) -probs$gen <- c(0,0,0.5,0.5) - -params <- gen.params.gmjmcmc(ncol(df) - 1) -params$feat$pop.max = 10 - - -result2c <- fbms(formula = z ~ 1+., data = df, transforms = transforms, - probs = probs, params = params, P=25, N = 100, - method = "gmjmcmc.parallel", runs = 40, cores = parallel::detectCores()-1, - family = "custom", loglik.pi = mixed.model.loglik.lme4, - model_prior = list(r = 1/dim(df)[1]), - extra_params = list(dr = droplevels(Zambia$dr))) - -summary(result2c,tol = 0.05,labels=names(df)[-1]) - - -# Comparison of results - -summary(result2a, labels = names(df)[-1]) -summary(result2b, labels = names(df)[-1]) -summary(result2c, labels = names(df)[-1]) - - diff --git a/prediction.pdf b/prediction.pdf deleted file mode 100644 index e3128a119b334307e99d1c530cc8f217ebf1b9d9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 27980 zcmZ^~1yCGav^9z+1PksG+%32VcXtgA!5sn&1OfyJ6Ck*Ia0u=YJU9e*9o#1P0I$h+ z?|W76ufJ-ly61GCmUH&md+oJX)2U0#vT?EVyrTQG`DyFZ_@`-O_g7q0oK!9rA6|)w zyyB2@w>I~*cX5_B_q3*BlosOV^(t zceS&(^q^An@^tm`1iGoayI6TyTDw0V)b2^8Vi?b*2 z*MmynUv~~oYY!JMcT3=UJpaGzJ)iKu!=Y^LZ0l)9#m&R{R*;HA*51(*cm)nwM__cM ztu0-wte+p;!xI=Zr&qoih3!gJUc=7hv*Mi={8QgZZnaP4<|Hj7f-wg5BuIXe{ECaw zKhw1^LmV3)Pb0JZma!C1M)TF*d8Sl_9po`nzUfg7dg#-8xQnQIf-RYW9w1eIL5b(} z_f=dSvgDoK_Xn+yX~&11u!p&)^Gk5&!vf{gO6Q}VSvI zuF8>+*~Q!XOccv4IpBy~hin>HTL#8i)LUkU!5vILBo`^(d3fih%j$1Byt~d@gTsWD#Rk4u#?|=Iahyu(Q_@l3lYQ5%lW!Jt&0Oghl#now#$8y7| zD}~Eiwg5Q0ei(X_+bpsYl^r4g7UzFymvyZM;{>|0Wd&8m!}BPJCEwJ}i9dz=(VoA3 z%E-7PYiS?H=+xfPAFs4Ko^M70hQ%8Tl5OIz0QHNNgK_SPotQX$d+w;{m@R()*@a^F z+AqPRG_vu~y*%CA<1wQ)Z)5&uf4Sd(O&)rrbKWD61vf_(I%#za8@taK9T>|zO$q7< z6dTXoh*`$kIRPfWzNmg2dW74@gc=lLiG*#a$)`+{K)vn z55DajXS%yR{2**T7D02;Ij+w?xB;V4AjBBx2S5F^?NG*LYoY7Kvh461PYvcBe9vP5 zOo7Ki`$?N^tnew+JEjS0sN}N>jVV28yEu#r-)e$#4m?=%fFSirry}?4olu)-VV8!v zOuc)^K%1G@355SRcu(-|=EmnR&avR5P! z%qp;$XYz=bxREx*mnU2StD#@8KA>|Qz05mJHKBCr)QEXN=o1HIz}aoW)e&~XvE&_I zFPS#J;N6F1d6H~v_TK5D{PSPYn*|Wl!|lM#qF3lW15_~b1MxV%ZdvBv`Vq(uWCS$* zGdp7M5F*HvkyKlG{Y%`w!AaUE>*261dKL4@xl)UL%BIZu0~pnZxOQn5U{jJ z1~s|g>q-&#&a@K{`-m&>wCtVCLs^n^V?JV3Z2F%W^f9?ZMI&zmdBPy{j4ce~9xlcH zKSe%bo(_FT%bIj0yQbY8_5|f`nApLlMD5r5{?`B|YcH6lj*=?94`T|owxO*fl*RI3 zEi1rI1EN@43l9@dA`*Vxw5!I|Y~tCgKAX3Oldahr{>*BgIfz#OW_XNge)A3L(YbZQ z1|G(MQM_x%QD+#R53V?HJ^ueOQG=RME%)THC?|h+3E|J$zN_h$pdJ&$9%2k&l`nlu z3HpI$Pf-HcW)}YShkrf{(rVGwLB5D@xnE%T7gh8z!_vhtt`gQJm;fw3;k*rBCB0oT zhQvt`h7@2FOlo3hsSxer1Dug1#EoQh;XE4=*iiW2*WA|wj?AB|Vr2mF9dN4?zp-RL zqnO==vdsvt<0ON6z8^HRckx&_*W(X=7la->f+jonvX3CM`#WMwC3Dc_{09%NmHY*$ z%7cesAjs70bUYuY!bYn@_Q^`*v}3wqfX{7Jg+lJhxm<~TqGS8&K~t__gsZde=q~=U zegB;kD5_}&RP7KGzLg0Rn&!zhl$SbOx}h6h?ijAPI^a?0Xfi7GqbpuxOUxQ_f^0z^ zgCFknzw#wvcnb!dU`@V92elX}-!nz?WSay4!?VJs;t-u=VZd<pq0EU;X9Krd`T_H8sF?TEyVP@NHtOwx{Wkp*n*c_ud*9AU~VTW8Rj_K=RSyE!d)MU8KE2~iIPJ8~=~_6gF>r}^?MS3`SIQv5 z(vPKKwqa#6XW?&O!*OFqX4id`jR=M5MJ?AuY{h>ui;#l8c4pWOE5ROXy4>QXeSN1F zbD=WqP5jSePYP9!eI>@+)GiKndFTy++7W_1XTsx85bg7fvvcukfe|%48WNB}oOZr{ zG9E^kWE}L;JSKhp10aeXCk1{dcIQ1QI6ZQ+rB-k%@f>58OAGvCmcEBaR{I1Mh63Wk z18ykqA2XWsx`|+(6zPP{%L4pqH{^};XTWM!PJi4Xfib#qje52`V5sej{>&{kNUY8- zT^gSAju{o##F%sqh?m39n$}c+W;D||vfRxtcL>Qmk2j<>Z}DJY@hSG!Zy9GP;KTUn zdzaB>G7r7K6}&g65+1~j`+85@dw(0h?LxiTfQM|gz^lM8WS$WFaF*XYESA~cB znB_)%9+N(8-2Z8lcx$;mk>@c6@=<(ibDZ)YN~plZ+z}h)gN+{5NHy^T;Q~;c7q93d8OT zj6`1hDEK_7w`WjBPlSY9ZWViA@eNXsTBd)Y?El`xTcL87b?|YB&?kBYJV#ggT0EDP3J%5-`wqYt2bLl`i zCa&r9ymr6eq*D580*G?Eh0uMd_#dJb8bb3kd-S-mzY4sMTg2?_Z`zO}dEEWl-TeGl ze5%7m9Ovk-xr-jQc+8I6qG1AMNpf<4TjO8K^V3QbF25ko$CaSoPKT%ljV`L~vzqT9G+O8!B#Cu==bgXN zZUdXj6Au*wv&l>uYEXTBSih@_%?;>51Fg zbt2svfS07d847RM?c)PB*nYLItQa9xdS6pLs<=3B=oc?dF77aymGukDgzYVn6$pkl z_A-+V8)?_i5k*H|FI{wh%I45e^TSm}BRTbNuE9LlxS$p{K3US1WSKz4IL#VhpsqUo=03 zZ&d=vWRFkg&9otUm%Hb!AEZm4r@au(`jRYWN2ty>j#vXx(yU`@ zxer351M9`|pBHey3{)1fDsDy8P-ADAqdu<-w5Qp_G15O|C1K0oK%FNd*Gzl{QhJMH zhhsg)no}-}p#8D*--J%_=3GK1pk=?GC8`b3o8xLI75-7%w;Ro>^q)|PE$L8LQj9w2p1potg}LdMW4@R9)V}>!&frp5)xql3@Upw~a^tg&XBzHC+L`#7 z4#RW@&kZXR**}P%*?ScO>55;BzdSMKU~*Xu#7o#Vj?U}VfDmHkYW?1|QtAN1BY%;y3xx&{v1`jD?6bOpD zn{JO=zrUzcfZliji{oI))}exvy5&l#;|qFcoytgV1cq!-@hyA5(o!8OYNbj4VovK! zxsyAl=!o494R%BIDZUr^{g?766STLCZu#OtoRU*SE=^x41_tIVHPG$?!)e1$&;xUY zN6p*J(dQI5?o$hHMfTZ1L|PU%+*$SXL6ZSP*QJ~D<4&YHWEAPW98m$r8Kk3^rdHnd zWr`Sm^yxQlK@E86tVMp$5yQna)sS@LsC0+UY}v0U4(~b)s~7Vq*{E7Jdh>f9^o+x+ z*$L8vjWHnScay>;)mwE&9B6dBx@C3gLI3m{NwHB6ixKv zY>-ev39yZQB8S@S)b{*UH`n27hdB}VP}>7M@q@By5x*}BT@CF3{HY+{Hhgzq31C+= zHI!1-B7g3qtR`=vfRmQu&Hk}_x!Pk8q4sou)1xnoG!TIj7{7` zqfAn^Rt_(KoDD|Gif~8n(K~d!W}=kqu{HF)?=$yxJ{CI#o8$!%W#NQJtOOe1D4%?Q^A)GS@#W zUY9~D@nECoN#%;=-zhzU7M+9S=uIQt=cn@*h4vW21=BkVy#VrTeZdY1iPi!LiM%f> z0^$DxxH7Yrp-+JhiC)ppMzUOs>6ZR|tt;qNrnr-RTOxXQb0Sj6?(Gh+beelw|3mWV z#vteKlXYnO8`Mk$%sl$qC5nb%;s7hel5MuFddP4fSmj{j@QW;WA^ zJ8B$vF~`=D`i+>e9ASw}OaMWD#3SIsIFIg{Sn&MYW$y}r@G$tQSt{J;WWzCcB1w@M z($i6FDD?e73~WCqxGP*&Hd|!knbY0W8Xx6nEwrPHz;Nq_z|dJXan6|uU~r~;HsRkp z#$h#H@%dz9`hvfGkRnRfhv%6$2~k2xjTi)f$1$?X-O6`wj9nA;&%OfjG^nfp*k&Km z(SDWuBED5Mkio}G4Hj}QDO0Eg)AUu7DO|yALP4XTX^uvLB|Vce5Ar9ahk-#fE3sh~ zEQL?EPJ?(|_}AALt*}8P@OEY96zn;dSW|MyTCO*{Xl9NIQ5UpXr!HmoqdYAK*}H6bS3E%8?B1tvS5 zHdTE<6KX8fJSDz8W0&*Trj;F8k0Ip5mp}T(oE4%sV`ytGna*)bU4i4ziSy1uptGU9 zR(ZLcZ1{*JL;MZqGhh-~`6hq{A;hT%x{O>SIJZyHCS;hsy_qcdIEBy;bz0-f4&a{Q z(NDnvyS28B44XFjb(Vx8$1 zU&H$$W{-H>qNZ}gm+QR+HCC@H(#&K%GkjGL#BNd$!HwjCOg}UwDx>CT*U_47;=j>H z6Th?AqdpnQtYpE+I$;#N!m&KDU4GRw7IMfFv7fk<**tfdHTtW78pNO&`vTW}_}}RJ zeMBMK?n@YmBjT;V!)G6Na<-H+%)RV+ zKS*6ssDR6y-$QG&YO}gIcnO)3ZAV5>))_wF$<-2}eO)t> z>M8)-eRME1QvNP@PnX>~@~c zaZ!L3gQBjFut;$et0Zw~7AbaE(hrl8{_b&DreSm;i+fomysHq6gE0UaaP82ysCkGL zRPJP}1ZR<4kGlhYN#8iy`P*rV5soS?)hJ^riE{-v{E*`*9Z9UuM;~mmUZ@2^JYXf! z4h~K@HU$*`8|KG3)Ytv7q50$MhVE`uh5l{8V>nby*Xx${_k&xDe}xbi2+9XD{*5HV zx~-MZWr;*lS6AqmILS%&oI5E^P~_>t zvb6X4k4@~DAK-nVOguJI<2wntjyK}Se!rxPbfO9oa4b|`>WE9=2b_eBQaOQ=F6 zignNKrH*bGp$31BQ8D4!7P3}Gti^ouIfFDw04JM#>w2>OTBcAWd2#s*Pa_4L#K3Ss zd_WD0>6_elMsfQs6gY+=q#tgML})$jt5%E7Rc(Ktonej%d=88;dxiRDU9=8214xp* z@e#5WdF|0brUeo26sOVP`!z&S;T}Vx`1|bnLaTbF>9406Iqr}ED5>1~fdXJzkZV4k zDQ#?Jz7lH5)Sif@;)*hT@^MFoKdHlJ5_I+YPVpDf1)|4#t3hS}fI{!g zpzUuvUrP4Fid%glU%hYJRch$4JL9BWC9*ldkA+$!UuEnY4S1LZvrvygS0pK}knAa{ z^`%QZU0>_;+mzRaJ@;sxNvp>dQxvWC4bx=d{xmcAx$CnE&}9bzWw3S^^3pZcKgM&k z?Fi(Oo4$mchc}Y(zf_!J9=J#%9HRs`0W=r|1yc)#X7?2ykS4L68j^~DulOHD3jdKW;MSccq`Jr%B|(`+~?zyR*a=MuIT@e4pBi0(9i_Fl+T zF<}7%#`c=>vrSUS=PKse1Hv`+v&y&?%HLWy8NmJ4%~W=;vCL}%dJQpg1>s=)G6_4; zfid3;;C|FfP;U}yP3WJrX2PpC)_Ln|gS8x@v#+=ue<-GHsp~5?{ShN@B(T<39zN+- zqp(53H^G%OH0Y^9>_l5l>!}I&91Yd7E6gn~jm-AOT0cjz&uUb9PMB~1Bf{j+T+0Xi zOe1b*I;I(>6_E~tao;yUG6NsWO-J7s;uA&bKfTV(*+GBiS$lsYOAC+ebF8K;m_b|X zRTbXd$KNyPt93b2scPZst0}z~g|D3y@TsPN6XQGIN|&h4dYATgpNU2vTwH=%J^=p_ z$=W?6Tcr6)!RKrKB5@F0kR1-;yN|2!AhVmi+a=*q8`gTc+;D|(ZubHaj)<4!Sq)36 z7i}&7VtiH&ej%A!o5E^65op*lr|sE#Nea~T7~ z&}kc{zo;PQv~H-Ieto4KrdpKZLs86;CG^L;NIQhCtA-nyf(^%{dak}@8VQz9$8T0h z8I%cClE&4rMMC#;@O@?B-oY#51{C;sj6fZIFV?NkC%o3vjv-staoDViaOKZXxADz- zrfS@CRP1ZE?Wo=vs*rzMsxaJz`35f0-yKyFIY)BRwQZU-iipk;0YL@GGK8+fXHlk8 z4}6CpK4=vtVDByVldCxgnYtyKH5h)J9rCxPEuM&)RybD)V=T^o`IOSlE07No4PVfNg zO03z3@^GE{n07A%Yl}MjP1$%88xY98IDAb~WFsq{nw90OUqoi)F&GS%tUgp&n<%Xr zXtnzpppoMFQx4O^Z@ZLQ)PK5)Z1ybd*l0yMh_o3@878H zhZhJZ6Ww7>wZ`eLj>gIv`uVhDuL^A^u@*P~jjDnpl?+*<9j)19JnKm*Ce35;`Ip7O zMx{h#TP-HjjT6QEM{6E@X-Nj6#PP?namb$o{WAW{75ghM4fN-9DND54$t6KK0=ZlG zW~ejl&&}~;1x>Vp1|x0jbq`lIxib_s8>rA-N&Ai=llV2nuOt>3iEai&&lXKRoGOWZ zf}4d<1#nXvNE~JB8*JeEehelr>y{L)4hXcjX|YI^kKJy`-4S53RqE?~jFbt<^cE4G z^}|ZUyZ7=s%a!XbJB2+D=uL1;RqG+GE^ao{T6sGU0lxqppLY#h1ChS zsH{GK+qvyicj^v)!Uxbm+vFA6-K95HHmG)dqK_+V zjJ_~%vOAXK+wLt6^^UQOLfeYTttTKu{&m_!Ob(E$*2dVcUt<{5r(4=z15Q+bP%pfH z2`vx}?dUiCXpKAdnXu`H&!?M<-@F>kF8sz=y!P@k^tR_j7xnlvKXRYq-g@=CmGz!p zv;mO^2Up%Ktlbh%OKTBfY^G`#`1e#9ME!GX)OvJ>)G5RM-emuD>CI#3( z(I26@KiNZ?)A*p?W<}wFsWQ;sbu*@6dc2!a}1gF{G?U_DWW10V(gk|Z8d@DZn>+5lkM9Z_$l?WAC7ZPYF z7DE5ny|I*GQ>xnqjk=}hne4(r?2Lz7y0yU{dZr2SI#XBJB?{hs8-4R-$mr!4B+B^< zpO+9S_vnUc<2%gCLrpjfxh&GL7J9(2#*Ph1k#C7Y;w_vC*t#7(vDCJQ(Y_U5u zL>c0y7|(9!BA#`ZTWDQ@P&~$O{B6HG?X(SwHwNf)v-EEHsscO_+qBxZVdM$nU^*l+ z4pm)=OWooHBzw&HK)m6N2L#U*Pf*X=knCQIZ3TD4LoSeLRgWM#lPBHUnWsK){oowB zxaC-4s7V?}s_w^u#@8fCeuw6%FP86SDo&DaQ#fvaG4Q4EUK~S>N~UvYUlYg^lO4`E z1Nce&1(mL)mBFk+zIZ#q6d9aF$*+9@P7Fr`eRdNbgAT^*aGL|-hd&`2Vi@f^vVAJ& zF`)TsM&1fl4^$Y=SuosQ_sOTsmR^o-M9fv;;1|o!r8<7B*NNCF6cj7VBi{Y(aK86n z=n*$5Cbww-i&DE5v8c}EiFuE3=TvCN{j&f#F>mPNA=n=+*mqt5?#pR(Vvv!m3;gRf zVThcoOK?T%W-@D+O0cg!`SoXHBDnJru%P)RylE9m=QfjQ)(VZ} zVvXuy4Z4f@y6#s2S}YH!Pq%lzzm8OgQyUrnFzkrcyqzR)pkZ9?OUFBCJ&)W*I5s-T zejSr5no8O64v`YOiUG;ePL0>w3tL5kbC+J;b`ootne<$#CLrQH>%Zz$FwdUgtGSnN z1KIB34Bz1<N{{z|bP0i^2_UOSl_@JeWj zfpvA_;}^uQ{3&#z(}$;Huz@O1FzExSy@t_<^%-r0oOfXJd96!axEKj2)#To`N`Km zN)6O+;+${dBs&WzOTs8i63=J~O}y)NK?Oh!Jl}O<`MXh=Uu^~{)LJaAL8VDC#*bsX zq<)Mnr=6!V%c!sc|2}?~tPhKCV94bop3iH*MLSFBPn>YQ&%!HL40hWz%_2MGBg+uy z&jN6C>E3z(mjl+!fC-FKx7^3UiHY*C?ODXoHWb_}txETv-F!5b6#Fi?*>hg0Aq{@S zx^TADB)N3r8=5_44l8#1IA?OO2z*!nvtRjBnzJ)}X^3B!j9oXGpkcQbT)WZ4K|Et& z7h5acbMAIl^E>T_iH^05Vo+E!lp}QmP*sxb|a$Y5gmV>TTE#Iw1oJQvlOJ4i{ zKS*PZizq4R2gAppS}qZJl!7&x!G$S;JN++_XDBJ2l74=R1~6CL=}rD7#&{Xs4Y!qd zG9|vtTb(wi;%))GHYe>Wx|^Q>f1uYks(n8D%nu1Z2E)!xJ_*_NE~e}Y;d5~eseSp4 z6ciaCRzhHq~an5^B0QS=2SVmt?yZVvbtSx6rUzk-m+Uiuoqy$Gj=&n^o^ zHl0bLJl{Hb7fvT}tWK&8AgvS2*%-c|I@~Yo3OU}Tizm*c)bSyF$kjD&Ht>I&o16qH zKLj&h-!Q@7EqnA`oRd)TAOm&$`r)+Mq(#%K~@gvta0o`Dae-KWyz_Zw`#jr0NMMVZA>Z=VE-W=q~!GTFFop;-IG zvHg1qndCZvHv$w2$idD6RL*;Dc5Wr>ZJr?o_sQ#o%R&5~+rw5T*A+3wTzp%J7j}~1 zK^egJ7&C}Wtv3L~C<+}Www^Oo5l;jx46 zq*p!me6>zUJrT_kbQ=3Sy{Aa%*~$^dUr+IL zJIbzg^Wd7aU_||i^i_L$C5UTWzPJCGjw3qJB%h?6f-c63FtnnWb zH0#wOIiVAm?u|Pjp$mJ>RYPy9)?L=AN4r(tM#~ zVE)r0el=j`yV@6AI0U)-u~c1n87=~{&f+O~XBLwA#M1Wg&Zku@h2c;83^wSli(~@& zSDU_Y|A-uW1rh>7z);`t#N(vPM}{9$d@2E3qIac|5pJy6N z(acF@?$XC{yzkk2Gti6X$k6m!%dB_rjhtNzNtZk$ALV9oh07AI==Q{AW#rlruq8$m z-F`OAU)D?R8ITHTDui%3-!RgeN@*AO3Y{9{K8kmNpNq_-5cmOy37HFv-YG z#Pw__fb|Ao2A14QFv`8?2X1L71 z=f3;3>DS1H+uB4Nuum|M=p*g%Cr}XQCyt`-zkREATt(^ytEuz8&++TU!sA(YQL0r}X!%uSC%oP^Ivjp0 zemV}xa4+}F_1LhREJ&_E2GD5TG;pMv(&`B{Mcz*z`i?uWnb$9hBUi5B79N^bscp zn#iCc{s@EmwK;F?IWcW{c1=o5(=jbC&}{#%-|8km=aOI?qtmNm&QPk|6plqqwcL?6 zjM7GAOZ+*(hLN0rq)H9{&V6+ukV4&k>6X_=dZU%ZJgx{Z2?RTAwfUepY!cyJZdxWj6vuT2g z?hn+tWiTPEbFv={bijGWkAdF|QNAQ8o z(bsL0JY!&GYIVXtd6bYYNdT7tZ{@#jR zjB{=-$w(cq$UiVR{PBf=%@&jTG#g!tn}DrUp?4{F`(GVJK79w1N?@KIb0l(V8L`?? zm@7Kdz{|D#wf3B$CD(-gB(tPR8Y!t}hC*7c9}2=}xum6BnR+4XGlX$$SW^!}NOe}$ zJJHHv-4a$}`%Fs^A{^%Hqh?i)R0Pr?QTcZ=OLzrV05PfmDZ8ZpU9jklu+3RQhi!M+ z=6lgTZvha;6%6|n&7fZK>mka{muw>6O-G@RWPpwF1yVim{An11gZ*zLh<`-D@yW> z&6oV^y5%NV@Dlt3Q*lMJ^&NwGof9uSbdR^bTd=fIRY}=`WV)+ud9dWcV!_&_-hJHL_`H4?mVQC(X^n zn^ix=mD||n#J-Q|B;V05$GDD9wdxy2r|18WNx+Uy`2y)#H!6`<;X4U`U_Dyb9w@^D z^1LWy4$g)C3yXL61qh(g*!yDX)7wsS8*W?gG|aaJfh+7p#XBQX!*^EtzPfciJVNIb z6z^w!9LU`x`G${MUM7UJ;Uh%(d4sPLaxcbeR_}`Hdur|P{TsE6m}tA<8eUg_n7+lB z>qj|`75pb4)aWXc)mh{6aqY)nj|pt+CWdGI=%hyo5TffbeBubUpKYz8*8D!bWgmOf zgDtT23ZxkNT6^DTgl)&krD!H2pR>3pllxC3PqSjXE8AD{lkXl%36=uWnp;G_Gbnzq zDXWsm{nG*#zr1rP{dQb!M_3W?Tc^g5cdv!f>W~1#?lZQtDk4!GqZO{Q5nF-z8zVYT zlLGZEv3DOYfmuGa)$Y4&-$@f+R~iY3{h>%}r94Jc%B`v0H$C-IuUbLscr~B7@)TYY zC++FE(pG?qnDaaLa3Tv)ogTl?%!Z*7kgVGf8bCL_Cf^6BGUrnf>C%mA5L56>9h$Z= zPLhhKI0OzsD4ya(3p1py`Q+t4N~J3hWlcHni|W^Il1-sswmU82542~=O1ao2V34eQ z+hHn1*Qq!F`6}|`1|Zeeq^8wYO;xZI+A^HywdRmjln9P=kWhAZ&eS1mB8XW6l=|H$ zGvvED_U8+v&g3VNmN6P;O=S9)6*CVIc(a9q$tgKZlG|}98d`^jz;3Uj+)oEe6*NwIQV0Xs=cmua2*DcKYo}3;`Jd~eDYMDb#Nj#N`pkf;Ns(?u zUZEFlsG_{fnZJyV>|esuD1POS7V}ufT&OZSX|t~d;lJiFPkO%K;$8kae`y5Mj6qP~ z)F+L>%4w@X67Sm?UrMtMI*)tx{+O@%rzu@za*BV@d9GW74XQwcf>iX(ef0f4pR<1L zD3A<1?&iM9r(JDxi~E8cw8#VgO+j|ZewdLhyHbx83-%Wl6ldk9f=d93ylam3PEZ>; z11VIH;>AJp%RH?YNXvH$u7?i*Gjxs+sbqrNkw*=JvFUsJDQtydvbO$}Zb?|5O@4O% z@enyr*TOw2i@%JlC*pabc1yv(EWnKJZCd7T(swc8*j=MGHrV7MZ}UP~WM1f<`5Oe+ zcCtw{BuE-}L?F+@{#e-VQxLqjH-}dR|M`W$n!sbrSr(d^-~Q0IY?y%jg*A=>X4)rm zD944>g@kQ(SdvmK$ECKh9POu1zy@k5grC@bn13Pcv*p>UF*03WWRcZkdJ)!Ou&Uww zy#Z&jS@ygCE6I0@Mep&l_!R+NFe5+C99*3na$+^PKF#|G2kB-o@ER#eHU_v@DOBW$ zcW|8RgzcWAZ(vrRGYeCRMf#}CJDktC(ynU#wvu070uZLSy)@dVuPesuIU@AQ^nWvriLWf@T5ld@VORG&JZ zrjw#BnI|*ml6j+wSXC=kI^rpETNV3C3LeGJta_uu5D(+~kK>ns-LAhAneCDE!w@QZ zI0l97#UBA%bwF;nH>sApYx)%g-eaSb$T@8gucD)MD_xsMTFa*#&sO zu)y|<%DosK?MpqMK+KX~x#M2@KKIqqD0j=o_Ij@DftBMEkZ-bNcgEtm<1_eo#wJxJ zZYto9e4-}ucW~Z64fw|UU1qEA;Su|!Ih5yKjNkKQPT1<`wFiYb2IZJ^1@m_hkuLQ$ ze6(%mq5Az(GoXyKMemDMig*yh`gY54J!QDqRaczFuPQF*uNZ^=6AMVn#~l2~AELE= zY9lWU$(Gg@uH7&~$Q#nzyT=0*hdGTYswYv>m&CcMbBfcKLVvVWc=b+GIAe_-nX?OQ z)jPkF2b}EE--84%3Q_60;hfN3f4^GOz{c4($#%8NFjV#y8X;LU@gGULk1)@1AH7hE znx1<5BAPq}Ub}VsRPiqxeRfBkAAS=0u{u z9nWhYxogO`=n!ohd}hU5U@p)=J9q;Uk~`K#3PwVc=slH=v;R;lPy8pO1xeic!=}@f z0J%F+ZUx?pFx@5a$vHp5I1O{Fq>=q*@qG6yr!csS&1AUznFiw75rf={jP(I1_B!tX zwD0wsQ*SY9Nilc}LBZYImrLl#Kj!Q)%^72t9KU?qGQ*&B1(1UuaE596Gfo?~L;!bm zYNP+UrQg=6srXE^UQWhB?^^GjaupjgPY;Sg&G~}GhF7&C#R{3zLH~97>pv;%$!+TO zxhB`kliP~G0Bv-fd#$iPa2{J?mCPvTBUCqcaY}q`RChm!XoF)LX9=R;eXE__9S>Xv z&7-m{pgOL6XWNy#UzZcW9)j90%Bl>eZD zlNVR~c>=z0u6r-zca4J)__Dc;R176}A|KfSzcXRDH4ZzZd}8_EM5uyRpUai!=cGgVb-nSF4s2_Tzg_(rV*E|(x9W6O>^ z!QrpB+%w7te*e5>ey(`nAS(Ls8i0d+1!DNHz|3++gG0;FGo}})cf;DZ?>O!&+%e2{blcz8DrNf239tW;Ui})aX(w@*M~gk- zB6+GMNiPE`0ClwyZzY9g{egMCodMAR!GydF4QcZ-Q&|5RMi> zZ2vdc+EAdN_*c;PT0s=?K1?_QLuw^EAS7Op@>Q_$akcNzWy=k=?xb+?hk4BBjEY!> zW^GRi@G0bQ?X51V<`04A1_+K6#HaBwd%BapTHKtfRi*;Hc zhaZQU&#(d=5c0abwlyLY_ZvNPF)Z-gCIAEyyi10pe~_*LO}tB%3?VLSq8=(+mec3L zMY6-u^bB#AqYJ*!idYkj+ ziB!OF2q5WDK*>Z*xs>1-7j~2Ttk$3WxoA>7G8xbaWv|XIuKNX?Jaah$jG5U$1`>W;S;>1M` zHWnZPz_;ECn4}M;*8|utg9Ryvgf96rljQZ{X_ZvTH7+@SuC9%o_7rZdG)S?IH4axg z(gI1?>-K3U1go?z_mba67D)oddQsd?a_t8oSHi9m4*w>{$fpYd6D!M zNQ0q^c^fwcZ7Oh%i0<%i!R)}de0rW0Y^xD=*b>_b1eLGr~c6)NDt9VeC@`?X3En9yJaZ=a8hiWxtqw0Lcu($xINBb{4SeOUu zEIfobIOu2f=93Z}T7roCQhs7pW!}KL+OkylYUybv#Y=2Z66z-Ay3SDf^E(%TZ&UlX4xXw?70wKV7%gjH{ufn!{yj6 zBWH6bJGEYlioo|EH-ngOA)SFIk4QXl(xgp(0!d&;x3LcqzrsaSVB0jpBUvv-4IB|8 zY~&|VFq>8Ac+;H@>9P-lDmvcM8hu4q=n@f4Dtow2e6vtBo3q0gqFa?VR!H*|ceMtT zh`eA=DHl#l1^M=7;W?RkB;7{5K^>B9pTK@kKC68XF%)qD>94Ei_j?VfY3{PrX-B*P z%7Q^2^+o+&?d{thD@a}L(_RUm!l`x5Ftahb%GayxE7Pmf$kt)19yifrUz&Rkfv=BI zmQZuWwwjznNWOZ|_D~C)y#!sXb&zqz*x>|OovGjTit+gBjT%%+e}{F%?R(pac6M6n;U z(j_G+2qPrLnELis7my+Oe_Gkec)fl6i;2Wye5A+Vn zE)RL=fWrC2zLdKZR^#tSFO%?DWkR}_r7b-5gt%3f!VY}0avHbVf9#L73ZJyKn)y&* zV}u{#OA2&hc(O%#?u7Nt({iDuIwUuD{Ln7nv<-^zqj7(!I=BxjJ;cAMRLry#2H*g6 zs%#ytGZ2JU%NchHZlZ(vBH%gx-O;fkhz3Z^dvdk+d>w*0ZfimTUQ26ea0z+4IZ^b( z*EAh|?UQg@=}j(30jKAqJC81P8ZRUsy-51_@t4#Ca2HH|$UATPARw&)OkR{r>|H#E zzvuY*e7eYwCg?W?jCMl7snKybo-Zz(^l z%`F5d-IW?{64PAFZ7mD<(kEK3SSNdA!TWyrDi$R)m3lF+8qH(t9+A^w(fiifaN4hB zRtb<~ieIRx%VFirWCjl}fdg>)Sos5wfiIFXTtdPZjt4_DGddmy`4=#K#fUSVAarJX z_`!7OtZ$(oaxr?by@&H(pvN+Caeu>p!6k3g0HSy`Xb0Ogx8N5gUB?^I2pG z;UcZ~Jr-TE&Qo|>5aQG5IubZ1I{c{af1qVKUH}PTIu)`Wu5E9eTLl;KzI%5Cjpv{} z5kzcO5^l^uGE{uKlY*jL#T`#slCjIkvc!^-TGOt{UUE~2zWs*fgJL+{eki57rgHAb zZ}NMs(+o>6mv9qzjIRaw>uHXoSuP*2_;#Z%&mA3$`y883$Hcjw((RIOcYFPtPgMcD zqVQ@ldD+|=4dJiwZc;^kavEw*^TLk4?$rjYrT}35RA=Vl8YA!{cPV9Hv3hu0txbF- zt$F*lXrQ{{;24IZ)}n?IxLd;26)2xttk+NP3i6#L*01FEu~3;NJst_M&E)FSqt>Qb zeXi?gMa{Lv|I^l6Kt=g=?ZZQtN`v%((lK<0bc29&OG`6Dr?h||A}9^gC?(wuB3(m= zbcZwy@ZI=4zvp}2_x;a;#XXxd_c`}oYtGqqUHf7cMuio?04MaiDeTq~A;0-PA-*Fx zXinl(6(@{k=jIKFNs>mz-vAdGBg_eYt`xKNZnwE|iM-_Am9E>d4Pi$w@nop9Spk(> zu=AyvNASw)DQnd!Uk~QP8MyxFEsz0WDUBJI9P^$}T_H0;tXG&xfhcp%$lHB(PyFep zJkbzLo&HF%Aim135OUMGe*sEKvWutLU)Z`mzB(n*mo&14#pdDb4yl`mq0@0_Bi7h8 zt+;H5UK?R5SV!T^c)B`{B-sIgtXo9*w$+ez$L>mq0j9&vb2?qr=TUqfn*$5eVsMcT zOiVo&9XhjYbvzi@QSwD_Q#6ea;*B4ec~o6EdT$fsqy~X7Gx?l;gW6bgE7#aojUH?~ zr^c)Xolp`@QI;N-$Q6P&Pyq1;iEy(iL+--lYwUmqTGy|FLmcXmDw+kmDV5mHz>ex1 zhxV_>Y9@BpIKojs%fHOMsxdpGaI-Jj{j>+U@^j{K+xJb?*e4$*)-RJMEB}MSg5zPT zg9akDh@x?(d4-lWTgp0~I4sgckx)Xkyif?7643^gNjwSYyW&9?o}sy+~zHDmjaNfi&}eiKe{s7om6=``etKL_)M`LQ|E1s9(9Hnsq;+HAzXNkz%hy9K1qDw+XI03ya;WgQlt(ILX~|p8CbP-yA93G*T>ejV@s;%#-8F zv*vHP)(+dgV>=xBR5&aMbKROuVYQ-HYFk9_ZCK*^I8; zGICPHuAiaBX3{c(f$4Q)e{2ZfHg*so;%0oM&>T=}rn?^=zl29b1? z+Yg!3qxib&Cr!3peCF9X`|TQpnBF3jZJXYWGM%nY*l)%97{nGMF=nLdnNh!cuGMKj zBt>%9i31H|8l@9xr~^d?M(pEHR*!3cQ3Y?9>=Fllo1hQU7Zbhncq%WRF~}==SMyYa zW0l@?c}QOl{%BrkxpUTZ7{IJElB;j`P>(viiAG;dNL8(Pzrrw>+@<$tL-Pk|Eu6vK z{*7b1dSu*@pB$_2W$)W*l+1?jEHT7!g=v{Anb#{%5o@I37bn;WT}M0%EOQ4lIqJ6x z`6V&M%Z#vkHIcQs&DtYgOsL~5eIvW69lg}oRa2si$ zK29R;Z3*0X3FUyi>L>eiRXD0(@rJmAL99yoDz9!GZRsJtmgR>HkdT)5egB!5SD9|Y zUwc}v$BccD?^5L-#d(S(zd?qZ z{%Q8;CP6N{$1W6PaQhW$tkLVMJAp6q+vEL(h`{HlbU2klG%-O5|_`TWN8$-z+SA6BlLCAF?uOkaZU#Dc(W4*gpa3}Jf1xX?D^T> zQb#+Qw=DB0=bFcQ;BjG@8k9_yDnqq%h<5t}oTfw0jlgqGULy+r1l0~BX#UKkR&0hREtPU!=BwUWeW(FW_n z0hiG=9V8>}Lk9`7ogCW*U)Br4-MOnguF8yzS68XZA+1yp^s$==AjrFGY46BsxdCl^wkhCf8qXNo^Y~m@ zUr9B$qT*hi3lS>w6S~GSquGj`DG78SG=9qhj<#a6ysLE_QQm_~6Ut&>$b!5NxsrN) zY6>&6Xv@G2dsP3Z?9o0&VmSz_&Sl*TRZ?!&`z<92qg78^v4 zmE}-ZVW#3huXIG#dF$qI_yow0^x-G|uB=w*M8%Xe#Z@;CjmGK2mwhPr!FoG4jP0S{ zOAb@x4^g@Xq}UJd4jldfJR#A$YXhIX*|%##rG(({F^sEB{Pq7F_WQ&R0d~G+~`Uz6P*@@q+tp} z*FxLgp9d;nexK*sN-;LYdiw@LPwS?YHGrA+<^DbBs`;VdP>H$L@`J9D(15$>s>riE z#I<*fjh{{Kr<@!pK5#yHi0`SU@XS!7GOzRSX6N(eeLB4T2*JKr=hq^|iJ5l43TMW~ z4B3*4?MYRrda$n>;w?&@)-bD_j+L!hUIa2m|NaboZ88pwJ~3QSLE8y=Vfg&DEKxUT zoeKjaJt_2Mcr{t9?SsD?B03+FKh|7tgNz*=8Gn%uf5zEBfWRDyAlQMUjnt}!$Z4Rp zse@E7OT^xisFlzc>5tn;r_IvohFSZfkC)(G-*j9cbG5{Mwt6|LGnaY$l**9e*we`kFoHw8);{d=|3wdhLrLN zB^|(pBxDt-j91g8T2yLWrWe}hLYeIXbU5i9mBKxFW^|#6Dd{d_qA5a8F>gI`vDRUm zECp%y5+I^iuF;ryZzl)jlIjA}>LeNc6e~ouF>*alb1F@h%==Z4+(Y zZqw=kGTl617_($N<9Dsb^w(6;n2uG@w|5iPPKX14*Bo?5Zc?rUARQ;uxW0y zvVR0sZl-3Odu2wf@7@ZD%6v;^RNPH?&7nR@qHIlaxKcEB{g>aUWzCxTu~L2G9eo<^ zVt&7pUDnw|M}?R+Dowcu*88}GqsYwL1uJgU7g>Os9IzyU*+&$2|(4&!jfE| z@#>2ZWd2Us8Pc%`{@$@P#?8g&ZwFnv)h`76kP>+1WgYyX-*0XJ1`scv`DI3?$vNAuA#w#f00V_tN;Lx9+WJ?=iO@P;;AN5 zwB7l`GeoCmi9M)B5Hn3^Ffoxzv8VhCgGs zDV)Le%P_0EyLk`o_=dCY-HkOV04&k+eakdVJzJ#~+L7>mf^cL}Bvv2H=OLQ;yS$$r4@mzI+o-cxyYU@Q66w?$)9>ICo8x%06vJ)e=fmmZrY2M}F8jC!pu_M#=~ zwgYsvQ^T5a8)Z9A4mNePe_R62kFxHHYeaU`jZQh~2viH8f9uz7g7Q=M0ZRT!u1(1W z2k+>&gA&pbj>C0Ng<46+l?E#*e;%HMx9Eq(`ZJ?~+u77}d-swj{A>heDL|GlAoNxz zcLAX2@R`QF!{b+vtePvW?nhSR%}QOlz`n^I2v;TO!d=y!#(lXy38;LYSO*L%$Rz&K z7&u_C__e)XpHjXhwl{ckk9ZR2779*K2M}Ns7 zluKH;oY|g;o;!Dw&0zHUX{k-6&{G2tViv_%O&bsirt<;;08c|D*#xNO^u-UNIuh*4 z+0D(BnW{#R1bXto`PWg^Ji=t_5Mr1c^?2x~i(CBE1U? zq47i`xIop3@V1y`Xk-1o%8_7k5H(W`pOLwaFX1tRG=9UGJ^6@U6EBZypG&|tM26OM zntZX`Yy5UXASupG+^c{lq(4x!Sm#X$muEL&Y8o;fj65cFjzpO`T9-E{a8JqgbS1B{ z8l8-#`4#IW(prDa*=$49B!V!yOw`MGAL-a2Np_tdyb2^4_SliHfPVZknODU1j!3!M{3p}q`CjMz7JMBfJJ`o) zCyM@aV=dmTxr%=>&97xG+LGf_wRbl7*MV@`(dU&}Wh+l7_4c@`s0=FGwW&v0wn+SOt##yRD;rjSOb3FUs!Tn zP+7Ic^V&dCX+jxZa8Er77>hSEuOBoh!)8U}in0mTEZ-P@YNZT9PXQ`-XgF`l)b*g+ zo;6Wi8VBwOGF-?6rHVf58NI2fg-D}~9;5PNw<0(S-Tt-fcbmO>3izvZ-p`(r$hszf zh^TA&M1VrYjN{Q+Gb9p20SpQHcb&DrgNTFtC7rs6)`rAUE3w+JFXiLyD`cT-IM{O@ zopIc`3E1Vqvq1D#C1NM}*TPfCmFPNoP~0E|Gx8%hq&lLa33_mBsXZoBP9R*Eunj&b zm^CFrSc$jLG^S2@$E4(q^fuIX8`z#88OK@FH&jr4MlHwVb6PLiHz0`EmENjPty&G> z9>tZby*x7LK3q%k*7D)D`rtkmk{Lzy!RaQ4YW$_r6`#UwiH8bOADZtQs=RNe)ed+P zcO>NnmqthyA7{DHa;f%;l4iLE|9pgGR9wYLO+^foiQ{!d4Mh@$m8$EF7U_*!HTgsX zF~AmoW)`lZ-Of$Q261!Q0!uwq*R6K-dRdU3L55+d^_FzKY+_$;xm@Y}tg)l7npKXl z40{2PUsCslp6fV`u5g6p{RvK3P7&?&ZC$%@4T+h0{PmS~zJ38k?p>W^@e)``yLsiJ zK{9FytRTOj)Eck=NFFv#oK}!ho@fb`tQx4}wFhPsG|XcA2suPFY}9h+{R(3%h^>*N zVv;3*5ykxX9>~iG3T%B{YAU_d8ILE4)8~VcWOszYFQhw@6Hc_YJ)4YIpFNI%2JMh$ z^cn(yiM=&jN7sw2wQm>_vg{Z^)Jn36J9E~C86FO7gQNo>1HAlery^q}maE9O)GY`p!*%%+@jMh?57*Yy%w zQ@3QRrr|cC580J?^Q}yz6AWY0851mo?;Yr#0}Igzlz2b7dw+u?OpQYh6pM`=t*efzgJ~8)k0(KMR5Ptl~V$*{9P}0Ntu4T5GD48`oAYIxZ zng1_dkW)+EM*j#0yCG8N++G2ut^RtU8XJtQ#1c)Og7)eD2tiUdlqSmVE54aAM5!DN z;^8O@k|;Hr5dU-oPloKt?t9@p>qK&)@2-+5ABNOi}7AOSgwta!R7eSgNM zpaI9v{*e7_;is_D%y~L=SeTCJ$K|$@q}{7`-*W)U++8Pq1FeTKA(=|~CPD{VR(?PG z!_@s*47oPzp2?#K9q_Ez4b%Mn^9(m9a*4H<3Qk+BTnuPT57<14;hsSs70hsdhXhf1y)QvCnxf%j zPV-#0Ht$3>7B8rH)aJQ%d#EZ8ZerV8i981cU3=$mS}b0IqLzw)f*{iRgGdzbdQLVW zOG`h9z_!t_j|ZIv2*K8Sf0}Av9h<@7CV%7Bx*J}+9Bqgn zeGEY-zt_#}2g+|)HwT`y#+>LLuc{)Cpma4lYeSKpQ@0x*1>I~agm>3nKM5F<&qk^% zMWy*g_Ge0G9fR4E{>r|>qlg(S=i9A*&+Qu>D-GAzro1#yLsSE1WHhlM&!u7`#UZ7+ zZ71lhrG{hi5+u8dymP4(_JJ}H)ueQo*P zJG_q9rtOGiC{q~eFE>oBx#Kzsk~V@f){AnAyfNmBe93z&ayng}CT15Ku^ z9=YSqXcO|+iNYYJm0pTaZSh22d15@{NxV;&g>=Z=<;ZjZF(7r*u}S4aJQ1eBjOF4nLt zpS4$HC~hK%*6U#J<3f^1XX%2NgV}$`HHG!B+ISI8eRzmjUh(v_Xs;QJ$S5xbh_Jr0 zSrrU$EY%hW|^9H@!$c0w1$@nsgUaH`(uGLikZ z?c}-msRi^n$lf-dXKStr$WazphpHNMuFAUv$cj@1kiw&u3x||j?)UKhzOzTbZSB(CwR6j_sij+;*1MIH*1*e>l&D+h0AHgZo&1ZsiPoMr3j6W< z)3dV(n%yt#(E1rS;|RI*5@^a@)}S&I@ePXFIL+xhZlybJ2 zZ{P|(;`kyEu|IrwajxBZw8={0rNV5@g=?rjBOD<_4c_-4M? z9?fpzJEBZ|gVH(SLHqRaQw&D~9?dagOUAb&5I67Im!U+!MEdZMg8Zwub>N z->ux!?N^RVq;6#1_T9^_@U!7>X5J6p%kC`pLiG?Q64$K?V#Q(c_eG}ZWz)_ma2$0c z)n3k}wZJjltnK@&FpI7G3;XGmdbi^Ge44Af!}~uMffu(Ifj4pYi*kX8i@LJQ@mQb)E_<^5Z z6Ko9d*gLxeeDqvkh^m#Pow<~=7ub*!_#Yo9@58V0e^J^$P}2WHK@TwJTWDFqz=jZj zyC0ar$_r+U4Uq%r?7^a<5IJrz*TYTz65>N-y9wvdvSb5l4 zSUubnq6QG{L;j)5=M&=pf3oF2tm!{&`RPfXue4@JB8Hmx!{@VHqa zo`Z!83s0%-1Bi}a=9P<#MtnXdH`8)Zj&ytlT~rv7M#U~FnaqT%3lT+-F9@YM~=xI|1l=sxpPy*?cm=;fD~%|DkjMr5RYf?~R9^tI}J zUut+&hd7mn==6iRbrPU|*bM#7ldW)tPPjuw$@-x~D^^Eq^Ng$Sg$P<=;?2lNH!&~k zxmy@4LSTyFFM6EUD#-lQ_(4n`=Q%c@T9yv%79W|ogV|TTLCXq#rblBCy@6rEe%$V% z0Rs+#=LL~u3`Sbv?-K~4e-96a*@it=6 z3<8S!(c=EZ-<7(-&qT9ph0TWb+x)zdGE$~JGi2-VI`c`Ow5i{)X3N6iy1Ca$g5XCO zm$Zq1leJI2OxMq$6_=|=8<`Q8G#Gd8)Fk+b=Pi+tPqH%o`6^(Qq-w*Tgpp{;QsFNy zGgB^cJMVI5>XY8hUgD1==^^LQlD`O`1~IYVgRy)qSw{~2aeCxYt_Gt=lfR_j3(;5<{UmuF7;7Of!(B(9F|?P0FhJ%7gJfTz?7X1ZLLqEJt!cxr1K}>CAWJeuqPm)K zEe8{QK#~EI5(QOfJ^d71V#ZjA?w*Cu8^Z7M6!z9)nO6{l<88kg>O7Jr=aY;e<|FKZ z7kHszKkhFDXA^(o0JzAQQjX&C)ESEDqnvw%#(_VuF!3 zfF4WNW@*?GToe0XJmbX5Z#O8%cR;H&Qy0XH4JB>wJK`3k;Rvp6OLRf;!#fC7@BF-s zuZJ!1wg5i}4`29eG$%euhn!Tr*K;g<+HW5Sa*^#`%zmKGrKyinh%fkQNQ5pc%k@&_ z6V5Lh;;@ykN!dEt{5i!M7%$Tc#V52UnBHIm5Cl2Il_V z&Ti27V!>aZ~+R+{$s04IX~P0 zOo`Y`S|-96tkO^gDHec*?&FA@pmLqEm*jJTvq-frm#;x};d+XqGNS5~^eyxn3f9m0 zncqIAb)?;AOs89*YX&Fu$ywsMiR!B?N;xrP(k?N!YGkqKv7j;?5$Ui_Fu_=iG`#X7 zb$MT%zB2m$rA#l|OU6~L#Y`L(m9A8! zl%e$gmn$(dRW`*a%GUNN_qF-8`?ZKRySBP^%MQaN|0HX1+2lr{cj57*(!}hf+{9uL z&xHTP_(W}yXVKF53%f6Nq4sNU_C^K29mAeD#`aSlqrUTr))3Ob+n3`91?KiYq6K&cNNgL9y4l+sZ zcf=so0NO_5UNKe{c9}LR&4{af_30CCThlH9hCEMt_W!>c4Gz4EsCTIQ7=fS$K|TyrFsB+TJpC z7ZQ>a@-utTd~CFhYqSD>j2_*CZQ*F0?kOeWEmYzmym1PjY1iNLo4z6?%~wuQj_%L# z?m4+UA>VvozfH_^r%**w%JM3)7UfCH{=Ed*B>`SKA|_Xpvb0pQ*zkG1a3lC z_M~je!RB1)oY}c+Zn&|_cO+tka9wL3x}-ZSFjG6}`D=L5Ic$kJuqyDwJ^wugk^$0u z&_+f+4Kwx9tj%R&%Wr0=}Zdq zr-{YF#VNxId}>apfj8P3)L_;ZoP<_J*G$(>cPNuAqxjV)hd77uYif>ZuUGGQR9(>UGBXWt2wKwD+Q$2Z1eRlqV3|@g1@inR%N}TIoRNOX)TwR1l{y$^zsCw z@3`KvzdIDlPD39$9Sh2IN&%S%)oHQaQ=As3$m&Hyq+PEQB280bumLUc_l+L z{?Ds6l2L!}{ z&wi}3Dj&#p7}gok`3>Klo3Eu}(&ZS2(9BW)Xog}~$oBz|+s}JVFU@r|YNN}Qe<&M} z8O<15y{xX@@po4`Q+Thd>RV#HpFca*=-pVhcRW3=Q=qMDaN$&Db~xzMdOd`m6CUON z#b4-#nqN-fJaF*3p>A6 z9KJah)PAN$j(8qH9hiE#f7U4(;Tu;TTjg(aY3ztmos&zrW3(tVnK(wjAG=E}PVQAz z+nZOg>f3*KK{GC<12tOrn!)OL_mwbHZ6h;HJkfn;dwXr%qFLGJFzuZfv;@-ReB>}P zbF%3-oW6arC*i$#hN!?9B8@e%G~R1ib18zh1!S#a+|UT4Q=yAQP)0OBM2ru}G6eiq zqHYq$b(_mF8cAIC$9^K>fA4BcIWH>I(Kh!rUTx`Imu%KTordT2p``w`YZ+tBQ8y_U z6al=q6~;SYG4UNS;Uly55D|;{3PuU5`_q7?E!r#d?$9BblV@}Cy$t@05DAk@FN7jI zA-SMig7x0$W?*f`v(*d1%}Ff6cV~2p7%xjTEo_~>A3Lx*giW8`>XMaNn|yB42!t-P z?>gQ$e(@yasXweZWR>_5uyytH^!T74Mm;kC3XHTmS!ex^_;IPEfml1lSN6J8Nqzpdr*l$0tKDFCWCw&dD7L7UGAvx;w+H9IUKi z*j&845KCvEhZPiR2Zq?VnR@^>uDON#gQ;s_=VsyVXzgI-1%|-v94rBmqq&8fvlAHd z+Rf@=An-|Z3k#sf6gI@t4wwpR2L(eooSb2nR@PvM+uuc60G7XlgZY2SZQY%0%-!4_ z9n9TfV2HDgvy+uQFq`?`J3}8VcjNzRj`i2p|HlyjS9h$xo&ZG4{Wa`wv#bXp;9{tm zI|54ryo0}b`PUc)D+doNn4N_=Hsl|@fFCsa-_5kV+^npzIl)|9*qr})f%*CPc=*89 z;Qz?DftFzpFR;^pWL*4!AM`(D+`PPiXY+q#f`C8tzsB(i0n7MbGJYPQ5!nBbadL8V z{Xb+v|69f_z{CB2<`Cc${I9jZ+|2D9tlWT(XJCkyosSjpykLl;leIJ8ojvFSqUr1m f13z>k`>Pu$%-jw3S2NrK{DNHA^z^doa@hYL=bEG_ From 261511fea0efa226f11f2dd68186a80d4aadac58 Mon Sep 17 00:00:00 2001 From: aliaksah Date: Wed, 3 Dec 2025 13:57:18 +0100 Subject: [PATCH 08/15] script name changed --- R_script/{JSS_script_v2.R => reproducible_script.R} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename R_script/{JSS_script_v2.R => reproducible_script.R} (100%) diff --git a/R_script/JSS_script_v2.R b/R_script/reproducible_script.R similarity index 100% rename from R_script/JSS_script_v2.R rename to R_script/reproducible_script.R From 492c2740048f8f066ef20c955af2c3d29737b22a Mon Sep 17 00:00:00 2001 From: Aliaksandr Hubin Date: Wed, 3 Dec 2025 14:27:06 +0100 Subject: [PATCH 09/15] Update linter.yml --- .github/workflows/linter.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/linter.yml b/.github/workflows/linter.yml index 0f3966e..39f5764 100644 --- a/.github/workflows/linter.yml +++ b/.github/workflows/linter.yml @@ -41,7 +41,7 @@ jobs: library(lintr) style_rules <- list( brace_linter(), todo_comment_linter(), equals_na_linter(), - function_left_parentheses_linter(), no_tab_linter(), + function_left_parentheses_linter(), whitespace_linter(), absolute_path_linter(), nonportable_path_linter(), pipe_continuation_linter(), semicolon_linter(), single_quotes_linter(), trailing_blank_lines_linter(), trailing_whitespace_linter(), From e53ad087425d602ac9beabc3cdf562b859ebb6dc Mon Sep 17 00:00:00 2001 From: Aliaksandr Hubin Date: Wed, 3 Dec 2025 14:30:26 +0100 Subject: [PATCH 10/15] Update linter.yml --- .github/workflows/linter.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/linter.yml b/.github/workflows/linter.yml index 39f5764..eb2591f 100644 --- a/.github/workflows/linter.yml +++ b/.github/workflows/linter.yml @@ -44,7 +44,7 @@ jobs: function_left_parentheses_linter(), whitespace_linter(), absolute_path_linter(), nonportable_path_linter(), pipe_continuation_linter(), semicolon_linter(), - single_quotes_linter(), trailing_blank_lines_linter(), trailing_whitespace_linter(), + quotes_linter(), trailing_blank_lines_linter(), trailing_whitespace_linter(), undesirable_function_linter(), undesirable_operator_linter() ) # TODO: expand style rules as package matures lint_package(linters = style_rules) From 83233d73173c32c50af5a9dc7968067b60e3471c Mon Sep 17 00:00:00 2001 From: aliaksah Date: Tue, 16 Dec 2025 17:09:29 +0100 Subject: [PATCH 11/15] scripts updated for SoftwareX --- R_script/appendix_script.R | 213 +++++++++++++++++++++++++++++++++ R_script/reproducible_script.R | 171 +------------------------- 2 files changed, 217 insertions(+), 167 deletions(-) create mode 100644 R_script/appendix_script.R diff --git a/R_script/appendix_script.R b/R_script/appendix_script.R new file mode 100644 index 0000000..37ce72d --- /dev/null +++ b/R_script/appendix_script.R @@ -0,0 +1,213 @@ + +#################################################################### +# Install INLA and RTMB packages (consult with IT if installation fails, +# which may occasionally happen for INLA as it is not on CRAN). +################################################################### + + + +if (!requireNamespace("RTMB", quietly = TRUE)) { + message("Trying to install optional package RTMB...") + try(install.packages("RTMB"), silent = TRUE) +} + +if (!requireNamespace("INLA", quietly = TRUE)) { + message("Trying to install optional package INLA...") + + # Try to load the installer (only if previously installed) + if (!requireNamespace("INLA", quietly = TRUE)) { + tryCatch( + { + install.packages("INLA",repos=c(getOption("repos"),INLA="https://inla.r-inla-download.org/R/stable"), dep=TRUE) + }, + error = function(e) { + message("INLA could not be installed; continuing without it.") + } + ) + } +} + + +################################################################ +################################################################ +# +# EXAMPLE 2: MIXED MODELS WITH FRACTIONAL POLYNOMIALS +# +# Section 4 of the article +# +################################################################ +################################################################ + + +library(FBMS) + + + +############################################################### +# 2.0 Load Zambia data (requires cAIC4) +############################################################### +if (!requireNamespace("cAIC4", quietly = TRUE)) { + stop("Optional package 'cAIC4' is required for Example 2. Please install it.") +} + +data(Zambia, package = "cAIC4") +df <- as.data.frame(sapply(Zambia[1:5],scale)) + + +transforms <- c("p0","p2","p3","p05","pm05","pm1","pm2", + "p0p0","p0p05","p0p1","p0p2","p0p3", + "p0p05","p0pm05","p0pm1","p0pm2") + + +probs <- gen.probs.gmjmcmc(transforms) +probs$gen <- c(1/3,1/3,0,1/3) # Modifications and interactions! + +params <- gen.params.gmjmcmc(ncol(df) - 1) +params$feat$D <- 1 # Set depth of features to 1 (still allows for interactions) +params$feat$pop.max = 10 + + + +############################################################### +# 2.1 Define custom log-likelihoods for INLA, RTMB +############################################################### + +# --------------------------------------------------------------- +# INLA version (only used if INLA is properly installed) +mixed.model.loglik.inla <- function (y, x, model, complex, mlpost_params) +{ + if(sum(model)>1) + { + data1 = data.frame(y, as.matrix(x[,model]), mlpost_params$dr) + formula1 = as.formula(paste0(names(data1)[1],"~",paste0(names(data1)[3:(dim(data1)[2]-1)],collapse = "+"),"+ f(mlpost_params.dr,model = \"iid\")")) + } else + { + data1 = data.frame(y, mlpost_params$dr) + formula1 = as.formula(paste0(names(data1)[1],"~","1 + f(mlpost_params.dr,model = \"iid\")")) + } + + #to make sure inla is not stuck + inla.setOption(inla.timeout=30) + inla.setOption(num.threads=mlpost_params$INLA.num.threads) + + mod<-NULL + #importance with error handling for unstable libraries that one does not trust 100% + tryCatch({ + mod <- inla(family = "gaussian",silent = 1L,safe = F, data = data1,formula = formula1) + }, error = function(e) { + + # Handle the error by setting result to NULL + mod <- NULL + + # You can also print a message or log the error if needed + cat("An error occurred:", conditionMessage(e), "\n") + }) + + # logarithm of model prior + if (length(mlpost_params$r) == 0) mlpost_params$r <- 1/dim(x)[1] # default value or parameter r + lp <- log_prior(mlpost_params, complex) + + if(length(mod)<3||length(mod$mlik[1])==0) { + return(list(crit = -10000 + lp,coefs = rep(0,dim(data1)[2]-2))) + } else { + mloglik <- mod$mlik[1] + return(list(crit = mloglik + lp, coefs = mod$summary.fixed$mode)) + } +} + +# --------------------------------------------------------------- +# RTMB version (only used if RTMB is properly installed) +mixed.model.loglik.rtmb <- function (y, x, model, complex, mlpost_params) +{ + z = model.matrix(y~mlpost_params$dr) #Design matrix for random effect + + msize = sum(model) + #Set up and estimate model + dat = list(y = y, xm = x[,model], z = z) + par = list(logsd_eps = 0, + logsd_dr = 0, + beta = rep(0,msize), + u = rep(0,mlpost_params$nr_dr)) + + nll = function(par){ + getAll(par,dat) + sd_eps = exp(logsd_eps) + sd_dr = exp(logsd_dr) + + nll = 0 + #-log likelihood random effect + nll = nll - sum(dnorm(u, 0, sd_dr, log = TRUE)) + mu = as.vector(as.matrix(xm)%*%beta) + z%*%u + nll <- nll - sum(dnorm(y, mu, sd_eps, log = TRUE)) + + return(nll) + } + obj <- MakeADFun(nll , par, random = "u", silent = T ) + opt <- nlminb ( obj$par , obj$fn , obj$gr, control = list(iter.max = 10)) + + # logarithm of model prior + if (length(mlpost_params$r) == 0) mlpost_params$r <- 1/dim(x)[1] # default value or parameter r + lp <- log_prior(mlpost_params, complex) + + mloglik <- -opt$objective - 0.5*log(dim(x)[1])*msize + return(list(crit = mloglik + lp, coefs = opt$par[-(1:2)])) +} + + +############################################################### +# 2.2 Small demonstration run for runtime comparisons +############################################################### + +set.seed(3052024) + +library(tictoc) + + +time.inla <- -1 +if (requireNamespace("INLA", quietly = TRUE)) { + library(INLA) + library(cAIC4) + + data(Zambia, package = "cAIC4") + df <- as.data.frame(sapply(Zambia[1:5],scale)) + + tic() + result1b <- fbms( + formula = z ~ 1+., data = df, + transforms = transforms, + method = "gmjmcmc", P = 3, N = 30, + family = "custom", + loglik.pi = mixed.model.loglik.inla, + model_prior = list(r = 1/nrow(df)), + extra_params = list(dr = droplevels(Zambia$dr), + INLA.num.threads = 4) + ) + time.inla <- toc() +} + +time.rtmb <- -1 +if (requireNamespace("RTMB", quietly = TRUE)) { + library(RTMB) + + data(Zambia, package = "cAIC4") + df <- as.data.frame(sapply(Zambia[1:5],scale)) + + + tic() + result1c <- fbms( + formula = z ~ 1+., data = df, + transforms = transforms, + method = "gmjmcmc", P = 3, N = 30, + family = "custom", + loglik.pi = mixed.model.loglik.rtmb, + model_prior = list(r = 1/nrow(df)), + extra_params = list( + dr = droplevels(Zambia$dr), + nr_dr = sum(table(Zambia$dr) > 0) + ) + ) + time.rtmb <- toc() +} + +cat(c(time.inla$callback_msg, time.rtmb$callback_msg)) + diff --git a/R_script/reproducible_script.R b/R_script/reproducible_script.R index 7202ce7..2978d2b 100644 --- a/R_script/reproducible_script.R +++ b/R_script/reproducible_script.R @@ -47,47 +47,13 @@ library(devtools) # 2. Install FBMS (always from GitHub to enforce correct version) ############################################################### -message("Installing FBMS from GitHub (branch jss_v2)...") -install_github("jonlachmann/FBMS@jss_v2", +message("Installing FBMS from GitHub (branch softwareX)...") +install_github("jonlachmann/FBMS@jsoftwareX", force = TRUE, build_vignettes = FALSE) library(FBMS) library(tictoc) -#################################################################### -# 3. Install optional packages (continue even if installation fails, -# which may happen for INLA as it is not on CRAN). -# INLA is not central but is only used for a custom implementation -# of marginal likelihood computations to show how to extend FBMS -################################################################### - -optional_pkgs <- c("RTMB", "INLA") - -# Optional: RTMB (CRAN) -if (!requireNamespace("RTMB", quietly = TRUE)) { - message("Trying to install optional package RTMB...") - try(install.packages("RTMB"), silent = TRUE) -} - -# Optional: INLA (not on CRAN) -if (!requireNamespace("INLA", quietly = TRUE)) { - message("Trying to install optional package INLA...") - - # Try to load the installer (only if previously installed) - if (!requireNamespace("INLA", quietly = TRUE)) { - tryCatch( - { - install.packages("INLA",repos=c(getOption("repos"),INLA="https://inla.r-inla-download.org/R/stable"), dep=TRUE) - }, - error = function(e) { - message("INLA could not be installed; continuing without it.") - } - ) - } -} - - - ################################################################ ################################################################ # @@ -258,7 +224,6 @@ rm(list = ls()) library(FBMS) - ############################################################### # 2.0 Load Zambia data (requires cAIC4) ############################################################### @@ -285,7 +250,7 @@ params$feat$pop.max = 10 ############################################################### -# 2.1 Define custom log-likelihoods for lme4, INLA, RTMB +# 2.1 Define custom log-likelihoods for lme4 ############################################################### # lme4 version @@ -315,88 +280,6 @@ mixed.model.loglik.lme4 <- function (y, x, model, complex, mlpost_params) return(list(crit = mloglik + lp, coefs = fixef(mm))) } -# --------------------------------------------------------------- -# INLA version (only used if INLA is properly installed) -mixed.model.loglik.inla <- function (y, x, model, complex, mlpost_params) -{ - if(sum(model)>1) - { - data1 = data.frame(y, as.matrix(x[,model]), mlpost_params$dr) - formula1 = as.formula(paste0(names(data1)[1],"~",paste0(names(data1)[3:(dim(data1)[2]-1)],collapse = "+"),"+ f(mlpost_params.dr,model = \"iid\")")) - } else - { - data1 = data.frame(y, mlpost_params$dr) - formula1 = as.formula(paste0(names(data1)[1],"~","1 + f(mlpost_params.dr,model = \"iid\")")) - } - - #to make sure inla is not stuck - inla.setOption(inla.timeout=30) - inla.setOption(num.threads=mlpost_params$INLA.num.threads) - - mod<-NULL - #importance with error handling for unstable libraries that one does not trust 100% - tryCatch({ - mod <- inla(family = "gaussian",silent = 1L,safe = F, data = data1,formula = formula1) - }, error = function(e) { - - # Handle the error by setting result to NULL - mod <- NULL - - # You can also print a message or log the error if needed - cat("An error occurred:", conditionMessage(e), "\n") - }) - - # logarithm of model prior - if (length(mlpost_params$r) == 0) mlpost_params$r <- 1/dim(x)[1] # default value or parameter r - lp <- log_prior(mlpost_params, complex) - - if(length(mod)<3||length(mod$mlik[1])==0) { - return(list(crit = -10000 + lp,coefs = rep(0,dim(data1)[2]-2))) - } else { - mloglik <- mod$mlik[1] - return(list(crit = mloglik + lp, coefs = mod$summary.fixed$mode)) - } -} - -# --------------------------------------------------------------- -# RTMB version (only used if RTMB is properly installed) -mixed.model.loglik.rtmb <- function (y, x, model, complex, mlpost_params) -{ - z = model.matrix(y~mlpost_params$dr) #Design matrix for random effect - - msize = sum(model) - #Set up and estimate model - dat = list(y = y, xm = x[,model], z = z) - par = list(logsd_eps = 0, - logsd_dr = 0, - beta = rep(0,msize), - u = rep(0,mlpost_params$nr_dr)) - - nll = function(par){ - getAll(par,dat) - sd_eps = exp(logsd_eps) - sd_dr = exp(logsd_dr) - - nll = 0 - #-log likelihood random effect - nll = nll - sum(dnorm(u, 0, sd_dr, log = TRUE)) - mu = as.vector(as.matrix(xm)%*%beta) + z%*%u - nll <- nll - sum(dnorm(y, mu, sd_eps, log = TRUE)) - - return(nll) - } - obj <- MakeADFun(nll , par, random = "u", silent = T ) - opt <- nlminb ( obj$par , obj$fn , obj$gr, control = list(iter.max = 10)) - - # logarithm of model prior - if (length(mlpost_params$r) == 0) mlpost_params$r <- 1/dim(x)[1] # default value or parameter r - lp <- log_prior(mlpost_params, complex) - - mloglik <- -opt$objective - 0.5*log(dim(x)[1])*msize - return(list(crit = mloglik + lp, coefs = opt$par[-(1:2)])) -} - - ############################################################### # 2.2 Small demonstration run for runtime comparisons ############################################################### @@ -419,54 +302,8 @@ result1a <- fbms( ) time.lme4 <- toc() -time.inla <- -1 -if (requireNamespace("INLA", quietly = TRUE)) { - library(INLA) - library(cAIC4) - - data(Zambia, package = "cAIC4") - df <- as.data.frame(sapply(Zambia[1:5],scale)) - - tic() - result1b <- fbms( - formula = z ~ 1+., data = df, - transforms = transforms, - method = "gmjmcmc", P = 3, N = 30, - family = "custom", - loglik.pi = mixed.model.loglik.inla, - model_prior = list(r = 1/nrow(df)), - extra_params = list(dr = droplevels(Zambia$dr), - INLA.num.threads = 4) - ) - time.inla <- toc() -} - -time.rtmb <- -1 -if (requireNamespace("RTMB", quietly = TRUE)) { - library(RTMB) - - data(Zambia, package = "cAIC4") - df <- as.data.frame(sapply(Zambia[1:5],scale)) - - - tic() - result1c <- fbms( - formula = z ~ 1+., data = df, - transforms = transforms, - method = "gmjmcmc", P = 3, N = 30, - family = "custom", - loglik.pi = mixed.model.loglik.rtmb, - model_prior = list(r = 1/nrow(df)), - extra_params = list( - dr = droplevels(Zambia$dr), - nr_dr = sum(table(Zambia$dr) > 0) - ) - ) - time.rtmb <- toc() -} -cat(c(time.lme4$callback_msg, time.inla$callback_msg, time.rtmb$callback_msg) -) +cat(c(time.lme4$callback_msg)) ############################################################### # 2.3 Serious analysis with lme4 (Section 4). Runs within time From e52604d03ac9295e8bd6e902643b87bb41a7741f Mon Sep 17 00:00:00 2001 From: aliaksah Date: Wed, 17 Dec 2025 09:35:07 +0100 Subject: [PATCH 12/15] documentation updated summary defaults for pop updated --- R/summary.R | 2 +- man/summary.gmjmcmc.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/summary.R b/R/summary.R index a947243..d9d8e3a 100644 --- a/R/summary.R +++ b/R/summary.R @@ -1,7 +1,7 @@ #' Function to Print a Quick Summary of the Results #' #' @param object The results to use -#' @param pop The population to print for, defaults to last +#' @param pop The population to print for, defaults to "best", other options are "last" and "all" #' @param tol The tolerance to use as a threshold when reporting the results. #' @param labels Should the covariates be named, or just referred to as their place in the data.frame. #' @param effects Quantiles for posterior modes of the effects across models to be reported, if either effects are NULL or if labels are NULL, no effects are reported. diff --git a/man/summary.gmjmcmc.Rd b/man/summary.gmjmcmc.Rd index 7a84158..e7a7d65 100644 --- a/man/summary.gmjmcmc.Rd +++ b/man/summary.gmjmcmc.Rd @@ -18,7 +18,7 @@ \arguments{ \item{object}{The results to use} -\item{pop}{The population to print for, defaults to last} +\item{pop}{The population to print for, defaults to "best", other options are "last" and "all"} \item{tol}{The tolerance to use as a threshold when reporting the results.} From e80957a8da0eb3ee86de37251438d95f12c85527 Mon Sep 17 00:00:00 2001 From: aliaksah Date: Wed, 17 Dec 2025 10:39:53 +0100 Subject: [PATCH 13/15] libraries in the script updated --- R_script/reproducible_script.R | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/R_script/reproducible_script.R b/R_script/reproducible_script.R index 2978d2b..5500442 100644 --- a/R_script/reproducible_script.R +++ b/R_script/reproducible_script.R @@ -227,10 +227,20 @@ library(FBMS) ############################################################### # 2.0 Load Zambia data (requires cAIC4) ############################################################### +if (!requireNamespace("lme4", quietly = TRUE)) { + stop("Optional package 'lme4' is required for Example 2. Please install it.") +} +if (!requireNamespace("tictoc", quietly = TRUE)) { + stop("Optional package 'tictoc' is required for Example 2. Please install it.") +} if (!requireNamespace("cAIC4", quietly = TRUE)) { stop("Optional package 'cAIC4' is required for Example 2. Please install it.") } +library(tictoc) +library(lme4) + + data(Zambia, package = "cAIC4") df <- as.data.frame(sapply(Zambia[1:5],scale)) @@ -254,8 +264,6 @@ params$feat$pop.max = 10 ############################################################### # lme4 version - -library(lme4) mixed.model.loglik.lme4 <- function (y, x, model, complex, mlpost_params) { @@ -284,23 +292,16 @@ mixed.model.loglik.lme4 <- function (y, x, model, complex, mlpost_params) # 2.2 Small demonstration run for runtime comparisons ############################################################### -set.seed(3052024) -library(tictoc) +set.seed(03052024) tic() -result1a <- fbms( - formula = z ~ 1+., data = df, - transforms = transforms, - method = "gmjmcmc", P = 3, N = 30, - probs = gen.probs.gmjmcmc(transforms), - params = gen.params.gmjmcmc(ncol(df) - 1), - family = "custom", - loglik.pi = mixed.model.loglik.lme4, - model_prior = list(r = 1/nrow(df)), - extra_params = list(dr = droplevels(Zambia$dr)) -) -time.lme4 <- toc() +result1a <- fbms(formula = z ~ 1+., data = df, transforms = transforms, + method = "gmjmcmc",probs = probs, params = params, P=3, N = 30, + family = "custom", loglik.pi = mixed.model.loglik.lme4, + model_prior = list(r = 1/dim(df)[1]), + extra_params = list(dr = droplevels(Zambia$dr))) +time.lme4 = toc() cat(c(time.lme4$callback_msg)) From cde314789e27b989a90c8e853ded6af2d2117a59 Mon Sep 17 00:00:00 2001 From: aliaksah Date: Thu, 18 Dec 2025 13:27:11 +0100 Subject: [PATCH 14/15] = replaced with <- in suppl script --- R_script/reproducible_script.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R_script/reproducible_script.R b/R_script/reproducible_script.R index 5500442..8b2fb51 100644 --- a/R_script/reproducible_script.R +++ b/R_script/reproducible_script.R @@ -179,7 +179,7 @@ plot(predmean(preds), df.test$semimajoraxis) ############################### -preds.P50 = predict(result.P50, df.test[,-1]) +preds.P50 <- predict(result.P50, df.test[,-1]) rmse.P50 <- sqrt(mean((predmean(preds.P50) - df.test$semimajoraxis)^2)) plot(predmean(preds.P50), df.test$semimajoraxis) @@ -255,7 +255,7 @@ probs$gen <- c(1/3,1/3,0,1/3) # Modifications and interactions! params <- gen.params.gmjmcmc(ncol(df) - 1) params$feat$D <- 1 # Set depth of features to 1 (still allows for interactions) -params$feat$pop.max = 10 +params$feat$pop.max <- 10 @@ -301,7 +301,7 @@ result1a <- fbms(formula = z ~ 1+., data = df, transforms = transforms, family = "custom", loglik.pi = mixed.model.loglik.lme4, model_prior = list(r = 1/dim(df)[1]), extra_params = list(dr = droplevels(Zambia$dr))) -time.lme4 = toc() +time.lme4 <- toc() cat(c(time.lme4$callback_msg)) @@ -316,7 +316,7 @@ cat(c(time.lme4$callback_msg)) # Specify if to run long chains under mixed effect models. # Default is false as these chains an run longer than 20 minutes # depending on the machines used. -run.long.mixed = TRUE +run.long.mixed <- TRUE if(run.long.mixed) { From 014f6eb1caf3c29b3c2230a4ffb6e4b7ead1685d Mon Sep 17 00:00:00 2001 From: aliaksah Date: Thu, 18 Dec 2025 17:36:18 +0100 Subject: [PATCH 15/15] pharsing improved in lmm --- R_script/reproducible_script.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R_script/reproducible_script.R b/R_script/reproducible_script.R index 8b2fb51..4037edc 100644 --- a/R_script/reproducible_script.R +++ b/R_script/reproducible_script.R @@ -278,7 +278,7 @@ mixed.model.loglik.lme4 <- function (y, x, model, complex, mlpost_params) mm <- lmer(as.formula(paste0("y ~ 1 + (1 | dr)")), data = data, REML = FALSE) } - mloglik <- as.numeric(logLik(mm)) - 0.5*log(length(y)) * (dim(data)[2] - 2) #Laplace approximation for beta prior + mloglik <- as.numeric(logLik(mm)) - 0.5*log(length(y)) * (dim(data)[2] - 2) #Laplace approximation # logarithm of model prior if (length(mlpost_params$r) == 0) mlpost_params$r <- 1/dim(x)[1] # default value or parameter r