## ----echo = FALSE------------------------------------------------------------- knitr::opts_chunk$set( tidy = TRUE, collapse = TRUE, comment = "#>" ) ## ----------------------------------------------------------------------------- # parameter vector betas <- c(1, 3, 2, 1.25, 3.25, 1.75, 0.75, 2.75, 2.25, 0, 0, 0) # sample size sample_size <- 500 # number of nominal response categories categories_no <- 4 # cluster size cluster_size <- 3 set.seed(1) # time-stationary covariate x_{i1} x1 <- rep(rnorm(sample_size), each = cluster_size) # time-varying covariate x_{it2} x2 <- rnorm(sample_size * cluster_size) # create covariates dataframe xdata <- data.frame(x1, x2) set.seed(321) library("SimCorMultRes") # latent correlation matrix for the NORTA method equicorrelation_matrix <- toeplitz(c(1, rep(0.95,cluster_size - 1))) identity_matrix <- diag(categories_no) latent_correlation_matrix <- kronecker(equicorrelation_matrix, identity_matrix) # simulation of clustered nominal responses simulated_nominal_dataset <- rmult.bcl(clsize = cluster_size, ncategories = categories_no, betas = betas, xformula = ~ x1 + x2, xdata = xdata, cor.matrix = latent_correlation_matrix) suppressPackageStartupMessages(library("multgee")) # fitting a GEE model nominal_gee_model <- nomLORgee(y ~ x1 + x2, data = simulated_nominal_dataset$simdata, id = id, repeated = time, LORstr = "time.exch") # checking regression coefficients round(coef(nominal_gee_model), 2) ## ----------------------------------------------------------------------------- set.seed(12345) # sample size sample_size <- 500 # cluster size cluster_size <- 4 # category-specific intercepts beta_intercepts <- c(-1.5, -0.5, 0.5, 1.5) # time-varying regression parameters associated with covariates beta_coefficients <- matrix(c(1, 2, 3, 4), 4, 1) # time-stationary covariate x <- rep(rnorm(sample_size), each = cluster_size) # latent correlation matrix for the NORTA method latent_correlation_matrix <- toeplitz(c(1, 0.85, 0.5, 0.15)) # simulation of ordinal responses simulated_ordinal_dataset <- rmult.clm(clsize = cluster_size, intercepts = beta_intercepts, betas = beta_coefficients, xformula = ~ x, cor.matrix = latent_correlation_matrix, link = "probit") # first eight rows of the simulated dataframe head(simulated_ordinal_dataset$simdata, n = 8) ## ----------------------------------------------------------------------------- set.seed(1) # sample size sample_size <- 500 # cluster size cluster_size <- 4 # category-specific intercepts beta_intercepts <- c(-1.5, -0.5, 0.5, 1.5) # regression parameters associated with covariates beta_coefficients <- 1 # time-varying covariate x <- rnorm(sample_size * cluster_size) # number of ordinal response categories categories_no <- 5 # correlation matrix for the NORTA method latent_correlation_matrix <- diag(1, (categories_no - 1) * cluster_size) + kronecker(toeplitz(c(0, rep(0.24, categories_no - 2))), matrix(1, cluster_size, cluster_size)) # simulation of ordinal responses simulated_ordinal_dataset <- rmult.crm(clsize = cluster_size, intercepts = beta_intercepts, betas = beta_coefficients, xformula = ~ x, cor.matrix = latent_correlation_matrix, link = "probit") # first six clusters with ordinal responses head(simulated_ordinal_dataset$Ysim) ## ----------------------------------------------------------------------------- # intercepts beta_intercepts <- c(3, 2, 1) # parameter vector beta_coefficients <- c(1, 1) # sample size sample_size <- 500 # cluster size cluster_size <- 3 set.seed(321) # time-stationary covariate x_{i1} x1 <- rep(rnorm(sample_size), each = cluster_size) # time-varying covariate x_{it2} x2 <- rnorm(sample_size * cluster_size) # create covariates dataframe xdata <- data.frame(x1, x2) # correlation matrix for the NORTA method equicorrelation_matrix <- toeplitz(c(1, rep(0.95, cluster_size - 1))) identity_matrix <- diag(4) latent_correlation_matrix <- kronecker(equicorrelation_matrix, identity_matrix) # simulation of clustered ordinal responses simulated_ordinal_dataset <- rmult.acl(clsize = cluster_size, intercepts = beta_intercepts, betas = beta_coefficients, xformula = ~ x1 + x2, xdata = xdata, cor.matrix = latent_correlation_matrix) suppressPackageStartupMessages(library("multgee")) # fitting a GEE model ordinal_gee_model <- ordLORgee(y ~ x1 + x2, data = simulated_ordinal_dataset$simdata, id = id, repeated = time, LORstr = "time.exch", link = "acl") # checking regression coefficients round(coef(ordinal_gee_model), 2) ## ----------------------------------------------------------------------------- set.seed(123) # sample size sample_size <- 100 # cluster size cluster_size <- 4 # intercept beta_intercepts <- 0 # regression parameter associated with the covariate beta_coefficients <- 0.2 # correlation matrix for the NORTA method latent_correlation_matrix <- toeplitz(c(1, 0.9, 0.9, 0.9)) # time-stationary covariate x <- rep(rnorm(sample_size), each = cluster_size) # simulation of clustered binary responses simulated_binary_dataset <- rbin(clsize = cluster_size, intercepts = beta_intercepts, betas = beta_coefficients, xformula = ~ x, cor.matrix = latent_correlation_matrix, link = "probit") library("gee") # fitting a GEE model binary_gee_model <- gee(y ~ x, family = binomial("probit"), id = id, data = simulated_binary_dataset$simdata) # checking the estimated coefficients summary(binary_gee_model)$coefficients ## ----------------------------------------------------------------------------- set.seed(8) # simulation of epsilon variables library("evd") simulated_latent_variables1 <- rmvevd(sample_size, dep = sqrt(1 - 0.9), model = "log", d = cluster_size) simulated_latent_variables2 <- rmvevd(sample_size, dep = sqrt(1 - 0.9), model = "log", d = cluster_size) simulated_latent_variables <- simulated_latent_variables1 - simulated_latent_variables2 # simulation of clustered binary responses simulated_binary_dataset <- rbin(clsize = cluster_size, intercepts = beta_intercepts, betas = beta_coefficients, xformula = ~ x, rlatent = simulated_latent_variables) # fitting a GEE model binary_gee_model <- gee(y ~ x, family = binomial("logit"), id = id, data = simulated_binary_dataset$simdata) # checking the estimated coefficients summary(binary_gee_model)$coefficients ## ----------------------------------------------------------------------------- set.seed(123) # sample size sample_size <- 5000 # cluster size cluster_size <- 4 # intercept beta_intercepts <- qnorm(0.8) # pseudo-covariate x <- rep(0, each = cluster_size * sample_size) # regression parameter associated with the covariate beta_coefficients <- 0 # correlation matrix for the NORTA method latent_correlation_matrix <- diag(cluster_size) # simulation of clustered binary responses simulated_binary_dataset <- rbin(clsize = cluster_size, intercepts = beta_intercepts, betas = beta_coefficients, xformula = ~x, cor.matrix = latent_correlation_matrix, link = "probit") # simulated marginal probabilities colMeans(simulated_binary_dataset$Ysim) ## ----------------------------------------------------------------------------- # sample size sample_size <- 5000 # cluster size cluster_size <- 3 # pseudo-covariate x <- rep(0, each = cluster_size * sample_size) # parameter vector betas <- c(log(0.1/0.4), 0, log(0.2/0.4), 0, log(0.3/0.4), 0, 0, 0) # number of nominal response categories categories_no <- 4 set.seed(1) # correlation matrix for the NORTA method latent_correlation_matrix <- kronecker(diag(cluster_size), diag(categories_no)) # simulation of clustered nominal responses simulated_nominal_dataset <- rmult.bcl(clsize = cluster_size, ncategories = categories_no, betas = betas, xformula = ~ x, cor.matrix = latent_correlation_matrix) # simulated marginal probabilities apply(simulated_nominal_dataset$Ysim, 2, table) / sample_size ## ----echo = FALSE, fig.cap= "Difference between the correlation parameters of the bivariate normal distribution and of the latent variables for three different marginal distributions."---- plot(rho - normal ~ rho, data = simulation, type = "l", col = "blue", ylim = c(0, 0.016), ylab = expression(rho - hat(rho)), xlab = expression(rho)) points(rho - logistic ~ rho, data = simulation, type = "l", col = "red", lty = 2) points(rho - gumbel ~ rho, data = simulation, type = "l", col = "grey", lty = 3) legend("topright", legend = c("Normal", "Logistic", "Gumbel"), col = c("blue", "red", "grey"), lwd = 1, lty = c(1, 2, 3)) title(main = paste("Difference between true and simulated correlation")) ## ----comment=""--------------------------------------------------------------- citation("SimCorMultRes")