## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup-------------------------------------------------------------------- library(SBMTrees) library(mitml) library(lme4) ## ----prediction_sim----------------------------------------------------------- # Simulate data data <- simulation_prediction( n_subject = 100, seed = 1234, nonlinear = TRUE, nonrandeff = TRUE, nonresidual = TRUE ) # Extract training and testing datasets X_train <- data$X_train Y_train <- data$Y_train Z_train <- data$Z_train subject_id_train <- data$subject_id_train X_test <- data$X_test Z_test <- data$Z_test subject_id_test <- data$subject_id_test Y_test_true <- data$Y_test_true ## ----prediction--------------------------------------------------------------- # Fit the predictive model model <- BMTrees_prediction( X_train, Y_train, Z_train, subject_id_train, X_test, Z_test, subject_id_test, model = "BMTrees", binary = FALSE, nburn = 3L, npost = 4L, skip = 1L, verbose = FALSE, seed = 1234 ) # Posterior expectation for the testing dataset posterior_predictions <- model$post_predictive_y_test head(colMeans(posterior_predictions)) ## ----prediction_evaluation---------------------------------------------------- point_predictions = colMeans(posterior_predictions) # Compute MAE mae <- mean(abs(point_predictions - Y_test_true)) cat("Mean Absolute Error (MAE):", mae, "\n") # Compute MSE mse <- mean((point_predictions - Y_test_true)^2) cat("Mean Squared Error (MSE):", mse, "\n") # Compute 95% credible intervals lower_bounds <- apply(posterior_predictions, 2, quantile, probs = 0.025) upper_bounds <- apply(posterior_predictions, 2, quantile, probs = 0.975) # Check if true values fall within the intervals coverage <- mean(Y_test_true >= lower_bounds & Y_test_true <= upper_bounds) cat("95% Posterior Predictive Interval Coverage:", coverage * 100, "%\n") plot(Y_test_true, point_predictions, xlab = "True Values", ylab = "Predicted Values", main = "True vs Predicted Values") abline(0, 1, col = "red") # Add a 45-degree reference line ## ----imputation_sim----------------------------------------------------------- # Simulate data with missing values data <- simulation_imputation( n_subject = 100, seed = 1234, nonrandeff = TRUE, nonresidual = TRUE, alligned = FALSE ) # Extract components of the dataset X_mis <- data$X_mis # Covariates with missing values Y_mis <- data$Y_mis # Outcomes with missing values Z <- data$Z # Random predictors subject_id <- data$subject_id ## ----imputation--------------------------------------------------------------- # Run the sequential imputation imputed_model <- sequential_imputation( X_mis, Y_mis, Z, subject_id, type = rep(0, ncol(X_mis)), binary_outcome = FALSE, model = "BMTrees", nburn = 3L, npost = 40L, skip = 2L, verbose = FALSE, seed = 1234 ) # Extract imputed data imputed_data <- imputed_model$imputed_data dim(imputed_data) # Dimensions: posterior samples x observations x variables ## ----imputation_evaluation---------------------------------------------------- # create structure which can be used in mitml MI_data = list() for (i in 1:dim(imputed_data)[1]) { MI_data[[i]] = cbind(as.data.frame(imputed_data[i,,]), Z, subject_id) colnames(MI_data[[i]]) = c(colnames(X_mis), "Y", "Z1", "Z2", "Z3", "subject_id") } MI_data <- as.mitml.list(MI_data) # Replace with actual datasets # Fit the model on each imputed dataset lmm_results <- with(MI_data, lmer(Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + (0 + Z1 + Z2 + Z3 | subject_id))) # Pool fixed effects using Rubin's Rules pooled_results <- testEstimates(lmm_results) # Print pooled results print(pooled_results)