## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----eval=FALSE--------------------------------------------------------------- # install.packages("HTRX") ## ----setup-------------------------------------------------------------------- library(HTRX) ## ----------------------------------------------------------------------------- ## load the data data(example_data_nosnp) data(example_hap1) data(example_hap2) ## ----------------------------------------------------------------------------- ## example_data_nosnp[41:43,1:6] ## ----------------------------------------------------------------------------- head(example_hap1,3) ## ----------------------------------------------------------------------------- ## create haplotype data removing haplotypes rarer than 1% HTRX_matrix_rmrare = make_htrx(hap1=example_hap1[1:2000,1:4], hap2=example_hap2[1:2000,1:4], rareremove=TRUE,rare_threshold=0.01) ## display the created haplotype data HTRX_matrix_rmrare[1:3,47:50] ## create haplotype data without removing any haplotypes HTRX_matrix_allhaps = make_htrx(hap1=example_hap1[1:2000,1:4], hap2=example_hap2[1:2000,1:4]) ## create haplotype data while at maximum 3 SNPs can interact HTRX_matrix_3snphaps = make_htrx(hap1=example_hap1[1:2000,1:4], hap2=example_hap2[1:2000,1:4],max_int=3) ## compare the numbers of haplotypes created by setting different 'mat_int' cat(ncol(HTRX_matrix_rmrare), ncol(HTRX_matrix_allhaps), ncol(HTRX_matrix_3snphaps)) ## ----warning=FALSE------------------------------------------------------------ ## selecting the best haplotype model using "AIC" from all the haplotypes CV_results_nocovar <- do_cv(data_nosnp=example_data_nosnp[1:2000,1,drop=FALSE], featuredata=HTRX_matrix_rmrare, sim_times=2,featurecap=4,usebinary=1, method="simple",criteria="BIC",gain=FALSE) cat('The selected features', as.character(CV_results_nocovar[[2]]), 'explains \n',mean(CV_results_nocovar[[1]])*100, '% average out-of-sample variance') ## ----warning=FALSE------------------------------------------------------------ ## selecting the best haplotype model using "BIC" from all the haplotypes ## here we include the sex and age as fixed covariates CV_results_withcovar <- do_cv(data_nosnp=example_data_nosnp[1:2000,1:3], featuredata=HTRX_matrix_rmrare, sim_times=2,featurecap=8,usebinary=1, method="stratified",criteria="AIC",gain=TRUE) cat('The selected features', as.character(CV_results_withcovar[[2]]), 'explains \n', mean(CV_results_withcovar[[1]])*100, '% extra average out-of-sample variance') ## ----warning=FALSE------------------------------------------------------------ ## selecting the best haplotype model using "BIC" ## we include all the 8 SNPs, but specify at most 4 SNPs can interact ## we also include the sex and age as fixed covariates cumu_CV_results <- do_cumulative_htrx(data_nosnp=example_data_nosnp[1:2000,1:3], hap1=example_hap1[1:2000,], hap2=example_hap2[1:2000,], sim_times=1,featurecap=8,usebinary=1, method="stratified",criteria="AIC", gain=TRUE,max_int=4) cat('The selected features', as.character(cumu_CV_results[[2]]), 'explains \n',mean(cumu_CV_results[[1]])*100, '% average out-of-sample variance')