## ----configure_knitr, eval = TRUE--------------------------------------------- knitr::opts_chunk$set(echo = TRUE, fig.width=5, fig.height=4) ## ----clear_memory, eval = TRUE------------------------------------------------ rm(list=ls()) ## ----runchunks, eval = TRUE--------------------------------------------------- # Set whether or not the following chunks will be executed (run): execute.vignette <- FALSE ## ----setup, eval = execute.vignette------------------------------------------- # knitr::opts_chunk$set(echo = TRUE, fig.width=5, fig.height=4) # library(httk) # library(ggplot2) # library(gridExtra) # library(cowplot) # library(ggrepel) # library(dplyr) # library(stringr) # library(forcats) # library(smatr) ## ----data, eval = execute.vignette-------------------------------------------- # met_data <- metabolism_data_Linakis2020 # conc_data <- concentration_data_Linakis2020 # #conc_data <- subset(concentration_data_Linakis2020, !(SAMPLING_MATRIX %in% # # c("EEB","MEB","EB"))) # #conc_data <- concentration_data_Linakis2020 # # subset(concentration_data_Linakis2020, !(SOURCE_CVT %in% c( # # "11453305"))) # conc_data[,"DOSE_U"] <- ifelse(conc_data[,"DOSE_U"] == "ppm", # yes = "ppmv", # conc_data[,"DOSE_U"]) # conc_data[,"ORIG_CONC_U"] <- ifelse(conc_data[,"ORIG_CONC_U"] == "ppm", # yes = "ppmv", # conc_data[,"ORIG_CONC_U"]) # # Not sure what to do with percent: # conc_data <- subset(conc_data,toupper(ORIG_CONC_U) != "PERCENT") # # Rename this column: # colnames(conc_data)[colnames(conc_data)=="ORIG_CONC_U"] <- "CONC_U" # conc_data$ORIGINAL_CONC_U <- conc_data$CONC_U # conc_data$ORIGINAL_CONC <- conc_data$CONCENTRATION # # Maybe Linakis et al translated all concentrations to uM? # conc_data$CONC_U <- "uM" ## ----convert_to_ppmweight, eval = FALSE--------------------------------------- # gas.media <- c("EB","MEB","EEB","EB (+W)") # gas.units <- unique(subset(conc_data, # SAMPLING_MATRIX %in% gas.media)$CONC_U) # target.unit <- "ppmv" # for (this.unit in gas.units) # if (this.unit != target.unit) # { # these.chems <- unique(subset(conc_data, # SAMPLING_MATRIX %in% gas.media & # CONC_U==this.unit)$DTXSID) # for (this.chem in these.chems) # { # this.factor <- convert_units( # input.units=this.unit, # output.units=target.unit, # dtxsid=this.chem, state="gas") # print(paste("Use",this.factor,"to convert",this.unit,"to",target.unit)) # # # Scale the observation # conc_data[conc_data$DTXSID==this.chem & # conc_data$SAMPLING_MATRIX %in% gas.media & # conc_data$CONC_U==this.unit,"CONCENTRATION"] <- # this.factor * conc_data[ # conc_data$DTXSID==this.chem & # conc_data$SAMPLING_MATRIX %in% gas.media & # conc_data$CONC_U==this.unit,"CONCENTRATION"] # # Change the reported unit # conc_data[conc_data$DTXSID==this.chem & # conc_data$SAMPLING_MATRIX %in% gas.media & # conc_data$CONC_U==this.unit,"CONC_U"] <- # target.unit # # } # } ## ----normalize_other_units, eval = FALSE-------------------------------------- # tissue.media <- c("VBL","BL","ABL","PL","BL (+W)") # tissue.units <- unique(subset(conc_data, # SAMPLING_MATRIX %in% tissue.media)$CONC_U) # target.unit <- "uM" # for (this.unit in tissue.units) # if (this.unit != target.unit) # { # these.chems <- unique(subset(conc_data, # SAMPLING_MATRIX %in% tissue.media & # CONC_U==this.unit)$DTXSID) # for (this.chem in these.chems) # { # this.factor <- try(convert_units( # input.units=this.unit, # output.units=target.unit, # dtxsid=this.chem)) # print(paste("Use",this.factor,"to convert",this.unit,"to",target.unit)) # # # Scale the observation # conc_data[conc_data$DTXSID==this.chem & # conc_data$SAMPLING_MATRIX %in% tissue.media & # conc_data$CONC_U==this.unit,"CONCENTRATION"] <- # this.factor * conc_data[ # conc_data$DTXSID==this.chem & # conc_data$SAMPLING_MATRIX %in% tissue.media & # conc_data$CONC_U==this.unit,"CONCENTRATION"] # # Change the reported unit # conc_data[conc_data$DTXSID==this.chem & # conc_data$SAMPLING_MATRIX %in% tissue.media & # conc_data$CONC_U==this.unit,"CONC_U"] <- # target.unit # # } # } ## ----summary, eval = execute.vignette----------------------------------------- # # Small molecule chemicals # summary(met_data$AVERAGE_MASS) # # Generally more lipophilic chemicals # summary(met_data$OCTANOL_WATER_PARTITION_LOGP_OPERA_PRED) # # Unsurprisingly then, the chemicals are generally less water-soluble # summary(met_data$WATER_SOLUBILITY_MOL.L_OPERA_PRED) # # ~60% of samples in humans # table(conc_data$CONC_SPECIES)/nrow(conc_data)*100 # # ~72% of samples are from blood # table(conc_data$SAMPLING_MATRIX)/nrow(conc_data)*100 ## ----scenarios, eval = execute.vignette--------------------------------------- # # Create a dataframe with 1 row for each unique external exposure scenario # unique_scenarios <- conc_data[with(conc_data, # order(PREFERRED_NAME, # CONC_SPECIES, # SAMPLING_MATRIX, # as.numeric(as.character(DOSE)),EXP_LENGTH,-TIME)),] %>% # distinct(DTXSID,DOSE,DOSE_U,EXP_LENGTH,CONC_SPECIES,SAMPLING_MATRIX, .keep_all = TRUE) ## ----runmodel, eval = execute.vignette---------------------------------------- # # Store the output of each simulation: # simlist <- list() # # Store the Cvt data relevant to each simulation # obslist <- list() # # Conduct one simulation for each unique combination of chemical, species, dose: # for (i in 1:nrow(unique_scenarios)) # if (unique_scenarios$CASRN[i] %in% get_cheminfo(model="gas_pbtk", # suppress.messages = TRUE)) # { # # Identify relevant Cvt data: # relconc <- subset(conc_data,conc_data$DTXSID == unique_scenarios$DTXSID[i] & # conc_data$DOSE == unique_scenarios$DOSE[i] & # conc_data$EXP_LENGTH == unique_scenarios$EXP_LENGTH[i] & # conc_data$CONC_SPECIES == unique_scenarios$CONC_SPECIES[i] & # conc_data$SAMPLING_MATRIX == unique_scenarios$SAMPLING_MATRIX[i]) # obslist[[i]] <- relconc # # # # # # # # # # # # THE FOLLOWING CODE RUNS solve_gas_pbtk FOR EACH SCENARIO # # (UNIQUE COMBINATION OF CHEMICAL, SPECIES, DOSE, ETC.) # # # # # # # # # # # solver.out <- try(suppressWarnings(as.data.frame(solve_gas_pbtk( # chem.cas = unique_scenarios$CASRN[i], # days = (unique_scenarios$TIME[i]+unique_scenarios$EXP_LENGTH[i]), # # Make sure we get predicted conc's at the observed times: # times=unique(c(0,signif(obslist[[i]]$TIME,4))), # days # tsteps = 500, # exp.conc = as.numeric(unique_scenarios$DOSE[i]), # SED (06-22-2021) think this is ppmv for all scenarios # input.units = unique_scenarios$DOSE_U[i], # specify the units for exp.conc (ppmv) # exp.duration = unique_scenarios$EXP_LENGTH[i], # days # period = (unique_scenarios$TIME[i]+unique_scenarios$EXP_LENGTH[i]), # days # species = as.character(unique_scenarios$CONC_SPECIES[i]), # monitor.vars = c( # "Cven", # "Clung", # "Cart", # "Cplasma", # "Calvppmv", # "Cendexhppmv", # "Cmixexhppmv", # "Calv", # "Cendexh", # "Cmixexh", # "Cmuc", # "AUC"), # vmax.km = FALSE, # suppress.messages = TRUE)))) # # # # # # # # # # # # # # # # # # # # # if (class(solver.out) %in% "try-error") # solver.out <- data.frame(time=NA,Conc=NA) # # print(solver.out) # # Get the blood:plasma ratio: # this.Rb2p <- suppressWarnings(available_rblood2plasma( # chem.cas=unique_scenarios$CASRN[i], # species=as.character(unique_scenarios$CONC_SPECIES[i]))) # solver.out$Rb2p <- this.Rb2p # # # The column simconc holds the appropriate prediction for the sampling matrix # # BL : blood # # EEB : end-exhaled breath # # MEB : mixed exhaled breath # # VBL : venous blood # # ABL : arterial blood # # EB : unspecified exhaled breath sample (assumed to be EEB) # # PL: plasma # # +W with work/exercise # # # # The model outputs are provided in the following units: # # uM: Cven, Clung, Cart, Cplasma, Calv, Cendexh, Cmixexh, Cmuc # # ppmv: Calvppmv, Cendexhppmv, Cmixexhppmv # # uM*days: AUC # if (unique_scenarios$SAMPLING_MATRIX[i] == "VBL" | # unique_scenarios$SAMPLING_MATRIX[i] == "BL" | # unique_scenarios$SAMPLING_MATRIX[i] == "BL (+W)") # { # solver.out$simconc <- solver.out$Cven*this.Rb2p # solver.out$unit <- "uM" # } else if (unique_scenarios$SAMPLING_MATRIX[i] == "ABL") { # solver.out$simconc <- solver.out$Cart*this.Rb2p # solver.out$unit <- "uM" # } else if (unique_scenarios$SAMPLING_MATRIX[i] == "EB" | # unique_scenarios$SAMPLING_MATRIX[i] == "EEB" | # unique_scenarios$SAMPLING_MATRIX[i] == "EB (+W)") # { # solver.out$simconc <- solver.out$Cendexh # uM # solver.out$unit <- "uM" # } else if (unique_scenarios$SAMPLING_MATRIX[i] == "MEB") { # solver.out$simconc <- solver.out$Cmixexh # uM # solver.out$unit <- "uM" # } else if (unique_scenarios$SAMPLING_MATRIX[i] == "PL") { # solver.out$simconc <- solver.out$Cplasma # solver.out$unit <- "uM" # } else { # solver.out$simconc <- NA # solver.out$unit <- NA # } # simlist[[i]] <- solver.out # } ## ----Makestudyplots, eval = execute.vignette---------------------------------- # cvtlist <- list() # for(i in 1:nrow(unique_scenarios)) # { # plot.data <- simlist[[i]] # relconc <- obslist[[i]] # # if (!is.null(plot.data)) # { # #Right now this is only calculating real concentrations according to mg/L in blood # cvtlist[[i]] <- ggplot(data=plot.data, aes(time*24, simconc)) + # geom_line() + # xlab("Time (h)") + # ylab(paste0("Simulated ", # unique_scenarios$SAMPLING_MATRIX[i], # "\nConcentration (" , # solver.out$unit, ")")) + # ggtitle(paste0( # unique_scenarios$PREFERRED_NAME[i], # " (", # unique_scenarios$CONC_SPECIES[i], # ", ", # round(as.numeric(unique_scenarios$DOSE[i]), digits = 2), # unique_scenarios$DOSE_U[i], # " for ", # round(unique_scenarios$EXP_LENGTH[i]*24, digits = 2), # "h in ", # unique_scenarios$SAMPLING_MATRIX[i], ")")) + # geom_point(data = relconc, aes(TIME*24,CONCENTRATION)) + # theme(plot.title = element_text(face = 'bold', size = 20), # axis.title.x = element_text(face = 'bold', size = 20), # axis.text.x = element_text(size=16), # axis.title.y = element_text(face = 'bold', size = 20), # axis.text.y = element_text(size = 16), # legend.title = element_text(face = 'bold', size = 16), # legend.text = element_text(face = 'bold',size = 14))+ # theme_bw() # } # } ## ----init_dataset, eval = execute.vignette------------------------------------ # # Creation of simulated vs. observed concentration dataset # unique_scenarios$RSQD <- 0 # unique_scenarios$RMSE <- 0 # unique_scenarios$AIC <- 0 # simobslist <- list() # obvpredlist <- list() ## ----merge_datasets, eval = execute.vignette---------------------------------- # for(i in 1:length(simlist)) # { # obsdata <- as.data.frame(obslist[[i]]) # simdata <- as.data.frame(simlist[[i]]) # # skips over anything for which there was no observed data or # # insufficient information to run simulation: # if (!is.null(simdata) & # !is.null(obsdata) & # dim(simdata)[1]>1) # { # # Make sure we are looking at consistent time points: # simobscomb <- simdata[simdata$time %in% signif(obsdata$TIME,4),] # obsdata <- subset(obsdata,signif(TIME,4) %in% simobscomb$time) # # Merge with obsdata # colnames(obsdata)[colnames(obsdata) == # "TIME"] <- # "obstime" # # Round to match sim time: # obsdata$time <- signif(obsdata$obstime,4) # colnames(obsdata)[colnames(obsdata) == # "CONCENTRATION"] <- # "obsconc" # colnames(obsdata)[colnames(obsdata) == # "PREFERRED_NAME"] <- # "chem" # colnames(obsdata)[colnames(obsdata) == # "DOSE"] <- # "dose" # colnames(obsdata)[colnames(obsdata) == # "EXP_LENGTH"] <- # "explen" # colnames(obsdata)[colnames(obsdata) == # "CONC_SPECIES"] <- # "species" # colnames(obsdata)[colnames(obsdata) == # "SAMPLING_MATRIX"] <- # "matrix" # colnames(obsdata)[colnames(obsdata) == # "AVERAGE_MASS"] <- # "mw" # colnames(obsdata)[colnames(obsdata) == # "CONC_U"] <- # "CONC_U" # simobscomb <- suppressWarnings(merge(obsdata[,c( # "time", # "obstime", # "obsconc", # "chem", # "dose", # "explen", # "species", # "matrix", # "mw", # "CONC_U", # "ORIGINAL_CONC_U" # )], simobscomb, by="time", all.x=TRUE)) # # # Merge with met_data # this.met_data <- subset(met_data, # PREFERRED_NAME == simobscomb[1,"chem"] & # SPECIES == simobscomb[1,"species"]) # colnames(this.met_data)[colnames(this.met_data)=="CHEM_CLASS"] <- # "chemclass" # colnames(this.met_data)[colnames(this.met_data) == # "OCTANOL_WATER_PARTITION_LOGP_OPERA_PRED"] <- # "logp" # colnames(this.met_data)[colnames(this.met_data) == # "WATER_SOLUBILITY_MOL.L_OPERA_PRED"] <- # "sol" # colnames(this.met_data)[colnames(this.met_data) == # "HENRYS_LAW_ATM.M3.MOLE_OPERA_PRED"] <- # "henry" # colnames(this.met_data)[colnames(this.met_data) == # "VMAX"] <- # "vmax" # colnames(this.met_data)[colnames(this.met_data) == # "KM"] <- # "km" # simobscomb <- suppressWarnings(cbind(simobscomb,this.met_data[c( # "chemclass", # "logp", # "sol", # "henry", # "vmax", # "km")])) # simobslist[[i]] <- simobscomb # } # } ## ----time_quartile, eval = execute.vignette----------------------------------- # for(i in 1:length(simobslist)) # if (!is.null(simobslist[[i]])) # { # simobscomb <- simobslist[[i]] # for (j in 1:nrow(simobscomb)) # { # max.time <- max(simobscomb$time,na.rm=TRUE) # if (is.na(max.time)) simobscomb$tquart <- NA # else if (max.time == 0) simobscomb$tquart <- "1" # else if (!is.na(simobscomb$time[j])) # { # simobscomb$tquart[j] <- as.character(1 + # floor(simobscomb$time[j]/max.time/0.25)) # simobscomb$tquart[simobscomb$tquart=="5"] <- # "4" # } else simobscomb$tquart[j] >- NA # } # simobslist[[i]] <- simobscomb # } ## ----calc_AUC, eval = execute.vignette---------------------------------------- # for(i in 1:length(simobslist)) # if (!is.null(simobslist[[i]])) # { # simobscomb <- simobslist[[i]] # # Calculat the AUC with the trapezoidal rule: # if (nrow(simobscomb)>1) # { # for (k in 2:max(nrow(simobscomb)-1,2,na.rm=TRUE)) # { # simobscomb$obsAUCtrap[1] <- 0 # simobscomb$simAUCtrap[1] <- 0 # if (min(simobscomb$time) <= (simobscomb$explen[1]*1.03) & # nrow(simobscomb) >=2) # { # simobscomb$obsAUCtrap[k] <- simobscomb$obsAUCtrap[k-1] + # 0.5*(simobscomb$time[k] - simobscomb$time[k-1]) * # (simobscomb$obsconc[k] + simobscomb$obsconc[k-1]) # simobscomb$simAUCtrap[k] <- simobscomb$simAUCtrap[k-1] + # 0.5*(simobscomb$time[k]-simobscomb$time[k-1]) * # (simobscomb$simconc[k] + simobscomb$simconc[k-1]) # } else { # simobscomb$obsAUCtrap <- 0 # simobscomb$simAUCtrap <- 0 # } # } # } else { # simobscomb$obsAUCtrap <- 0 # simobscomb$simAUCtrap <- 0 # } # simobscomb$AUCobs <- max(simobscomb$obsAUCtrap) # simobscomb$AUCsim <- max(simobscomb$simAUCtrap) # simobscomb$calcAUC <- max(simobscomb$AUC) # if (min(simobscomb$time) <= simobscomb$explen[1]*1.03) # { # simobscomb$Cmaxobs <- max(simobscomb$obsconc) # simobscomb$Cmaxsim <- max(simobscomb$simconc) # } else { # simobscomb$Cmaxobs <- 0 # simobscomb$Cmaxsim <- 0 # } # simobslist[[i]] <- simobscomb # } ## ----calc_error, eval = execute.vignette-------------------------------------- # for(i in 1:length(simobslist)) # if (!is.null(simobslist[[i]])) # { # simobscomb <- simobslist[[i]] # unique_scenarios$RSQD[i] <- 1 - ( # sum((simobscomb$obsconc - simobscomb$simconc)^2) / # sum((simobscomb$obsconc-mean(simobscomb$obsconc))^2) # ) # unique_scenarios$RMSE[i] <- # sqrt(mean((simobscomb$simconc - simobscomb$obsconc)^2)) # unique_scenarios$AIC[i] <- # nrow(simobscomb)*( # log(2*pi) + 1 + # log((sum((simobscomb$obsconc-simobscomb$simconc)^2) / # nrow(simobscomb))) # ) + ((44+1)*2) #44 is the number of parameters from inhalation_inits.R # simobslist[[i]] <- simobscomb # } ## ----combine_studies, eval = execute.vignette--------------------------------- # obsvpredlist <- list() # for(i in 1:length(simobslist)) # if (!is.null(simobslist[[i]])) # { # simobscomb <- simobslist[[i]] # obsvpredlist[[i]] <- ggplot(simobscomb, aes(x = simconc, y = obsconc)) + # geom_point() + # geom_abline() + # xlab("Simulated Concentrations (uM)") + # ylab("Observed Concentrations (uM)") + # ggtitle(paste0( # unique_scenarios$PREFERRED_NAME[i], # " (", # unique_scenarios$CONC_SPECIES[i], # ", ", # round(as.numeric(unique_scenarios$DOSE[i]), digits = 2), # unique_scenarios$DOSE_U[i], # " for ", # round(unique_scenarios$EXP_LENGTH[i]*24, digits = 2), # "h in ", # unique_scenarios$SAMPLING_MATRIX[i], ")")) + # theme_bw() + # theme(plot.title = element_text(face = 'bold', size = 20), # axis.title.x = element_text(face = 'bold', size = 20), # axis.text.x = element_text(size=16), # axis.title.y = element_text(face = 'bold', size = 20), # axis.text.y = element_text(size = 16), # legend.title = element_text(face = 'bold', size = 16), # legend.text = element_text(face = 'bold',size = 14)) # } ## ----obsvspredplots, eval = execute.vignette---------------------------------- # # Create pdfs of observed vs. predicted concentration plots # dir.create("Linakis2020") # pdf("Linakis2020/obvpredplots.pdf", width = 10, height = 10) # for (i in 1:length(obsvpredlist)) { # print(obsvpredlist[[i]]) # } # dev.off() ## ----annotate_study_stats, eval = execute.vignette---------------------------- # for (i in 1:length(cvtlist)) # if (!is.null(cvtlist[[i]])) # { # cvtlist[[i]] <- cvtlist[[i]] + # geom_text( # x = Inf, # y = Inf, # hjust = 1.3, # vjust = 1.3, # # size = 6, # label = paste0( # "RMSE: ", # round(unique_scenarios$RMSE[i],digits = 2), # "\nAIC: ", # round(unique_scenarios$AIC[i],digits = 2)))# + # # theme( # # plot.title = element_text(face = 'bold', size = 15), # # axis.title.x = element_text(face = 'bold', size = 20), # # axis.text.x = element_text(size=16), # # axis.title.y = element_text(face = 'bold', size = 20), # # axis.text.y = element_text(size = 16), # # legend.title = element_text(face = 'bold', size = 16), # # legend.text = element_text(face = 'bold',size = 14)) # } ## ----cvtplots, eval = execute.vignette---------------------------------------- # # Create pdfs of simulated concentration-time plots against observed c-t values # pdf("Linakis2020/simdataplots.pdf") # for (i in 1:length(cvtlist)) { # print(cvtlist[[i]]) # } # dev.off() ## ----combine_obs_vs_pred, eval = execute.vignette----------------------------- # simobsfull <- do.call("rbind",simobslist) # simobsfullrat <- subset(simobsfull, simobsfull$species == "Rat") # simobsfullhum <- subset(simobsfull, simobsfull$species == "Human") # unique_scenarios <- subset(unique_scenarios,!is.na(unique_scenarios$RSQD)) ## ----Standardize Units, eval = FALSE------------------------------------------ # these.chems <- unique(subset(simobsfull,unit=="ppmv")$chem) # for (this.chem in these.chems) # { # # Use HTTK unit conversion: # this.factor <- convert_units( # input.units="ppmv", output.units="um", chem.name=this.chem, state="gas") # # # Scale the observation # simobsfull[simobsfull$chem==this.chem & # simobsfull$unit=="ppmv","obsconc"] <- # this.factor * simobsfull[ # simobsfull$chem==this.chem & # simobsfull$unit=="ppmv","obsconc"] # # Scale the prediction # simobsfull[simobsfull$chem==this.chem & # simobsfull$unit=="ppmv","simconc"] <- # this.factor * simobsfull[ # simobsfull$chem==this.chem & # simobsfull$unit=="ppmv","simconc"] # # Change the reported unit # simobsfull[simobsfull$chem==this.chem & # simobsfull$unit=="ppmv","unit"] <- # "uM" # } ## ----regression, eval = execute.vignette-------------------------------------- # table(unique_scenarios$CONC_SPECIES) # nrow(simobsfull) - nrow(simobsfull[ # !is.na(simobsfull$simconc) & # simobsfull$simconc > 0 & # simobsfull$obsconc > 0,]) # pmiss <- (nrow(simobsfull) - # nrow(simobsfull[ # !is.na(simobsfull$simconc) & # simobsfull$simconc > 0 & # simobsfull$obsconc > 0,])) / # nrow(simobsfull) * 100 # missdata <- (simobsfull[ # is.na(simobsfull$simconc) | # simobsfull$simconc <= 0 | # simobsfull$obsconc <= 0,]) # t0df <- simobsfull[simobsfull$obstime == 0,] # lmall <- lm( # #log transforms: # log10(simobsfull$obsconc[ # !is.na(simobsfull$simconc) & # simobsfull$simconc > 0 & # simobsfull$obsconc > 0]) ~ # #log transforms: # log10(simobsfull$simconc[ # !is.na(simobsfull$simconc) & # simobsfull$simconc > 0 & # simobsfull$obsconc > 0])) # #Linear binned 1 # lmsub1 <- lm( # simobsfull$obsconc[ # !is.na(simobsfull$simconc) & # simobsfull$simconc > 0 & # simobsfull$obsconc < 0.1] ~ # simobsfull$simconc[ # !is.na(simobsfull$simconc) & # simobsfull$simconc > 0 & # simobsfull$obsconc < 0.1]) # #Linear binned 2 # lmsub2 <- lm( # simobsfull$obsconc[ # !is.na(simobsfull$simconc) & # simobsfull$simconc > 0 & # simobsfull$obsconc >= 0.1 & # simobsfull$obsconc < 10] ~ # simobsfull$simconc[ # !is.na(simobsfull$simconc) & # simobsfull$simconc > 0 & # simobsfull$obsconc >= 0.1 & # simobsfull$obsconc < 10]) # #Linear binned 3 # lmsub3 <- lm( # simobsfull$obsconc[ # !is.na(simobsfull$simconc) & # simobsfull$simconc > 0 & # simobsfull$obsconc >= 10] ~ # simobsfull$simconc[ # !is.na(simobsfull$simconc) & # simobsfull$simconc > 0 & # simobsfull$obsconc >= 10]) # lmrat <- lm( # log10(simobsfullrat$obsconc[ # !is.na(simobsfullrat$simconc) & # simobsfullrat$simconc > 0 & # simobsfullrat$obsconc > 0]) ~ # log10(simobsfullrat$simconc[ # !is.na(simobsfullrat$simconc) & # simobsfullrat$simconc > 0 & # simobsfullrat$obsconc > 0])) # unique(simobsfullrat$chem) # lmhum <- lm( # log10(simobsfullhum$obsconc[ # !is.na(simobsfullhum$simconc) & # simobsfullhum$simconc > 0 & # simobsfullhum$obsconc > 0]) ~ # log10(simobsfullhum$simconc[ # !is.na(simobsfullhum$simconc) & # simobsfullhum$simconc > 0 & # simobsfullhum$obsconc > 0])) # unique(simobsfullhum$chem) # concregslope <- summary(lmall)$coef[2,1] # concregr2 <- summary(lmall)$r.squared # concregrmse <- sqrt(mean(lmall$residuals^2)) # totalrmse <- sqrt(mean(( # log10(simobsfull$simconc[ # !is.na(simobsfull$simconc) & # simobsfull$simconc > 0 & # simobsfull$obsconc > 0]) - # log10(simobsfull$obsconc[ # !is.na(simobsfull$simconc) & # simobsfull$simconc > 0 & # simobsfull$obsconc > 0]))^2, # na.rm = TRUE)) # totalmae <- mean(abs( # log10(simobsfull$simconc[ # !is.na(simobsfull$simconc) & # simobsfull$simconc > 0 & # simobsfull$obsconc > 0]) - # log10(simobsfull$obsconc[ # !is.na(simobsfull$simconc) & # simobsfull$simconc > 0 & # simobsfull$obsconc > 0])), # na.rm = TRUE) # totalaic <- nrow( # simobsfull[ # !is.na(simobsfull$simconc) & # simobsfull$simconc >0 & # simobsfull$obsconc > # 0,]) * # (log(2*pi) + # 1 + # log((sum( # (simobsfull$obsconc[ # !is.na(simobsfull$simconc) & # simobsfull$simconc > 0 & # simobsfull$obsconc > 0] - # simobsfull$simconc[ # !is.na(simobsfull$simconc) & # simobsfull$simconc > 0 & # simobsfull$obsconc > 0])^2, # na.rm=TRUE) / # nrow(simobsfull[ # !is.na(simobsfull$simconc) & # simobsfull$simconc > 0 & # simobsfull$obsconc > 0,])))) + # ((44+1)*2) #44 is the number of parameters from inhalation_inits.R # mispred <- table(abs( # log10(simobsfull$simconc) - # log10(simobsfull$obsconc))>2 & # simobsfull$simconc>0) # mispred[2] # mispred[2] / nrow(simobsfull[ # !is.na(simobsfull$simconc) & # simobsfull$simconc >0 & # simobsfull$obsconc > 0,])*100 # overpred <- table( # log10(simobsfull$simconc) - # log10(simobsfull$obsconc)>2 & # simobsfull$simconc>0) # overpred[2] # overpred[2] / nrow(simobsfull[ # !is.na(simobsfull$simconc) & # simobsfull$simconc >0 & # simobsfull$obsconc > 0,])*100 # underpred <- table( # log10(simobsfull$obsconc) - # log10(simobsfull$simconc)>2 & # simobsfull$simconc>0) # underpred[2] # underpred[2] / nrow(simobsfull[ # !is.na(simobsfull$simconc) & # simobsfull$simconc >0 & # simobsfull$obsconc > 0,])*100 # mispredhalf <- table(abs( # log10(simobsfull$simconc) - # log10(simobsfull$obsconc))>0.5 & # simobsfull$simconc>0) # mispredhalf[2] # mispredhalf[2] / nrow(simobsfull[ # !is.na(simobsfull$simconc) & # simobsfull$simconc >0 & # simobsfull$obsconc > 0,])*100 # overpredhalf <- table( # log10(simobsfull$simconc) - # log10(simobsfull$obsconc)>0.5 & # simobsfull$simconc>0) # overpredhalf[2] # overpredhalf[2] / nrow(simobsfull[ # !is.na(simobsfull$simconc) & # simobsfull$simconc >0 & # simobsfull$obsconc > 0,])*100 # underpredhalf <- table( # log10(simobsfull$obsconc) - # log10(simobsfull$simconc)>0.5 & # simobsfull$simconc>0) # underpredhalf[2] # underpredhalf[2] / nrow(simobsfull[ # !is.na(simobsfull$simconc) & # simobsfull$simconc > 0 & # simobsfull$obsconc > 0,])*100 # chemunderpred <- subset(simobsfull, # log10(simobsfull$simconc) - # log10(simobsfull$obsconc) < 0 & # simobsfull$simconc > 0) # table(chemunderpred$chemclass) / table(simobsfull$chemclass)*100 ## ----obspredFig2, eval = execute.vignette------------------------------------- # fig2 <- ggplot( # data = simobsfull[ # simobsfull$simconc > 0 & # simobsfull$obsconc > 0,], # aes(x = log10(simconc), y = log10(obsconc))) + # geom_point( # color = ifelse( # abs( # log10(simobsfull[ # simobsfull$simconc > 0 & # simobsfull$obsconc > 0,]$simconc) - # log10(simobsfull[ # simobsfull$simconc > 0 & # simobsfull$obsconc > 0,]$obsconc)) >2, # 'red', # 'black')) + # geom_abline() + # xlab("Log(Simulated Concentrations)") + # ylab("Log(Observed Concentrations)") + # theme_bw() + # geom_smooth(method = 'lm',se = FALSE, aes(color = 'Overall')) + # geom_smooth(method = 'lm', se = FALSE, aes(color = species)) + # geom_text( # x = Inf, # y = -Inf, # hjust = 1.05, # vjust = -0.15, # size = 8, # label = paste0( # # "Regression slope: ", # # round(summary(lmall)$coef[2,1],digits = 2), # "\nRegression R^2: ", # round(summary(lmall)$r.squared,digits = 2), # "\nRegression RMSE: ", # round(sqrt(mean(lmall$residuals^2)),digits = 2), # "\nRMSE (Identity): ", # round(totalrmse,digits = 2) # # "\n% Missing:", # # round(pmiss, digits = 2), "%") # )) + # #geom_text( # # data = simobsfull[ # # abs(log10(simobsfull$simconc) - log10(simobsfull$obsconc))>7 & # # simobsfull$simconc>0 & simobsfull$obsconc > 0,], # # aes(label = paste(chem,species,matrix)), # ## fontface = 'bold', # # check_overlap = TRUE, # # size = 3.5, # # hjust = 0.5, # # vjust = -0.8) + # scale_color_discrete(name = 'Species', breaks = c("Overall","Human","Rat")) + # theme( # plot.title = element_text(face = 'bold', size = 15), # axis.title.x = element_text(face = 'bold', size = 30), # axis.text.x = element_text(size=16), # axis.title.y = element_text(face = 'bold', size = 30), # axis.text.y = element_text(size = 16), # legend.title = element_text(face = 'bold', size = 24), # legend.text = element_text(face = 'bold',size = 24)) # fig2 #Display plot in R ## ----make_how_to_add_models_version, eval=execute.vignette-------------------- # library(scales) # # Function for formatting tick labels: # scientific_10 <- function(x) { # out <- gsub("1e", "10^", scientific_format()(x)) # out <- gsub("\\+","",out) # out <- gsub("10\\^01","10",out) # out <- parse(text=gsub("10\\^00","1",out)) # } # # font.size.large <- 10 # font.size.small <- 8 # # figaddmodels <- ggplot( # data = simobsfull[ # simobsfull$simconc > 0 & # simobsfull$obsconc > 0,], # aes(x = simconc, y = obsconc)) + # geom_point( # color = ifelse( # abs( # log10(simobsfull[ # simobsfull$simconc > 0 & # simobsfull$obsconc > 0,]$simconc) - # log10(simobsfull[ # simobsfull$simconc > 0 & # simobsfull$obsconc > 0,]$obsconc)) >2, # 'red', # 'black'),alpha=0.15) + # geom_abline() + # scale_y_log10(label=scientific_10,limits=c(1e-4,1e4))+ # scale_x_log10(label=scientific_10,limits=c(1e-4,1e4))+ # xlab("Simulated Concentrations") + # ylab("Observed Concentrations") + # theme_bw() + # geom_smooth(method = 'lm',se = FALSE, aes(color = 'Overall', linetype="Overall")) + # geom_smooth(method = 'lm', se = FALSE, aes(color = species, linetype = species)) + # geom_text( # x = 2, # y = -1, # size = 4, # label = paste0("RMSLE: ", # round(totalrmse,digits = 2) # )) + # scale_color_discrete(name = 'Species', breaks = c("Overall","Human","Rat")) + # scale_linetype_discrete(name = 'Species', breaks = c("Overall","Human","Rat")) + # theme( # plot.title = element_text(face = 'bold', size = font.size.small), # axis.title.x = element_text(face = 'bold', size = font.size.large), # axis.text.x = element_text(size=font.size.small), # axis.title.y = element_text(face = 'bold', size = font.size.large), # axis.text.y = element_text(size = font.size.small), # legend.title = element_text(face = 'bold', size = font.size.large), # legend.text = element_text(face = 'bold',size = font.size.large)) # print(figaddmodels) # ggsave("c:/users/jwambaug/AddModelsFig1.tiff", width=6, height=4, dpi=300) # # counts <- simobsfull[,c("chem","dose","explen","species")] # counts <- subset(counts,!duplicated(counts)) # paste(length(unique(counts$chem)), # "chemicals across",dim(counts)[1], # "experimental conditions in", # length(unique(counts$species)),"species.") ## ----write_figure2, eval = FALSE---------------------------------------------- # pdf("Linakis2020/Figure2.pdf", width = 10, height = 10) # print(fig2) # dev.off() ## ----calculations, eval = execute.vignette------------------------------------ # # Create data and run calculations for populating plots # cmaxfull <- subset(simobsfull, !duplicated(simobsfull$AUCobs) & simobsfull$Cmaxobs != 0) # cmaxobs <- cmaxfull$Cmaxobs # cmaxsim <- cmaxfull$Cmaxsim # cmaxobs <- cmaxobs[!is.nan(cmaxsim)] # cmaxsim <- cmaxsim[!is.nan(cmaxsim)] # cmaxsim[!is.finite(log10(cmaxsim))] <- NA # cmaxlm <- lm(log10(cmaxobs)~log10(cmaxsim), na.action = na.exclude) # cmaxvcbkg <- subset(cmaxfull, # paste( # cmaxfull$chem, # cmaxfull$dose, # cmaxfull$explen, # cmaxfull$species, # cmaxfull$matrix) %in% # paste( # t0df$chem, # t0df$dose, # t0df$explen, # t0df$species, # t0df$matrix)) # cmaxvcbkg$cmaxcbkgratio <- cmaxvcbkg$Cmaxobs / cmaxvcbkg$obsconc # cmaxvcbkg$adjustedCmaxsim <- cmaxvcbkg$Cmaxsim - cmaxvcbkg$obsconc # aucfull <-subset(simobsfull, # !duplicated(simobsfull$AUCobs) & # simobsfull$AUCobs != 0) # aucobs <- aucfull$AUCobs # aucsim <- aucfull$AUCsim # aucobs <- aucobs[!is.nan(aucsim)] # aucsim <- aucsim[!is.nan(aucsim)] # aucsim[!is.finite(log10(aucsim))] <- NA # auclm <- lm(log10(aucobs)~log10(aucsim), na.action = na.exclude) # cmaxslope <- summary(cmaxlm)$coef[2,1] # cmaxrsq <- summary(cmaxlm)$r.squared # totalrmsecmax <- sqrt(mean((log10(cmaxfull$Cmaxsim) - # log10(cmaxfull$Cmaxobs))^2, na.rm = TRUE)) # cmaxmiss <- nrow(cmaxfull[ # abs(log10(cmaxfull$Cmaxsim) - # log10(cmaxfull$Cmaxobs)) > 1,]) # cmaxmissp <- nrow(cmaxfull[ # abs(log10(cmaxfull$Cmaxsim) - # log10(cmaxfull$Cmaxobs)) > 1,]) / # nrow(cmaxfull) * 100 # cmaxmisschem <- table(cmaxfull[ # abs(log10(cmaxfull$Cmaxsim) - # log10(cmaxfull$Cmaxobs)) > 1,]$chem) # aucslope <- summary(auclm)$coef[2,1] # aucrsq <- summary(auclm)$r.squared # totalrmseauc <- sqrt(mean(( # log10(aucfull$AUCsim) - # log10(aucfull$AUCobs))^2, na.rm = TRUE)) # aucmiss <- nrow(aucfull[ # abs(log10(aucfull$AUCsim) - # log10(aucfull$AUCobs)) > 1,]) # aucmissp <- nrow(aucfull[ # abs(log10(aucfull$AUCsim) - # log10(aucfull$AUCobs)) > 1,]) / # nrow(aucfull) * 100 # aucmisschem <- table(aucfull[ # abs(log10(aucfull$AUCsim) - # log10(aucfull$AUCobs)) > 1,]$chem) ## ----Fig4plot, eval = execute.vignette---------------------------------------- # cmaxp <- ggplot(data = cmaxfull, aes(x = log10(Cmaxsim), y = log10(Cmaxobs))) + # geom_point(color = # ifelse(abs(log10(cmaxfull$Cmaxsim) -log10(cmaxfull$Cmaxobs))>=2, "red","black")) + # geom_abline() + # xlab("Log(Simulated Max Concentration)") + # ylab("Log(Observed Max Concentration)") + # theme_bw() + # geom_smooth(method = 'lm', se = FALSE, aes(color = 'Overall')) + # geom_smooth(method = 'lm', se = FALSE, aes(color = species)) + # geom_text(x = Inf, # y = -Inf, # hjust = 1.05, # vjust = -0.15, # # size = 6, # label = paste0("Regression slope: ", # round(summary(cmaxlm)$coef[2,1],digits = 2), # "\nRegression R^2: ", # round(summary(cmaxlm)$r.squared,digits = 2))) + # geom_text_repel( # data = cmaxfull[ # (log10(cmaxfull$Cmaxsim)-log10(cmaxfull$Cmaxobs))>=2 & # log10(cmaxfull$Cmaxsim) > 2,], # aes(label = paste(chem,species,matrix)), # force = 2, # # size = 5.3, # fontface = 'bold', # color = 'black', # hjust = -0.05, # vjust = 0.5) + # scale_y_continuous(lim = c (-1,5)) + # scale_x_continuous(lim = c(-1,5)) + # geom_text_repel( # data = cmaxfull[ # (log10(cmaxfull$Cmaxsim)-log10(cmaxfull$Cmaxobs))>=2 & # log10(cmaxfull$Cmaxsim) <= 2,], # aes(label = paste(chem,species,matrix)), # nudge_x = 0.0, # nudge_y = -0.2, # force = 2, # # size = 5.3, # fontface = 'bold', # color = 'black', # hjust = -0.05, # vjust = 0.5) + # geom_text( # data = cmaxfull[ # (log10(cmaxfull$Cmaxsim)-log10(cmaxfull$Cmaxobs))<=-2,], # aes(label = paste(chem,species,matrix)), # # size = 5.3, # fontface = 'bold', # color = 'black', # hjust = 0.5, # vjust = -0.7) + # scale_color_discrete( # name = 'Species', # breaks = c("Overall","Human","Rat")) #+ # # theme(plot.title = element_text(face = 'bold', size = 10), # # axis.title.x = element_text(face = 'bold', size = 10), # # axis.text.x = element_text(size=8), # # axis.title.y = element_text(face = 'bold', size = 10), # # axis.text.y = element_text(size = 8), # # legend.title = element_text(face = 'bold', size = 8), # # legend.text = element_text(face = 'bold',size = 8)) # cmaxp # aucp <- ggplot( # data = aucfull, # aes(x = log10(AUCsim), y = log10(AUCobs))) + # geom_point(color = # ifelse(abs(log10(aucfull$AUCsim)-log10(aucfull$AUCobs))>=2, "red","black")) + # geom_abline() + # xlab("Log(Simulated AUC)") + # ylab("Log(Observed AUC)") + # theme_bw() + # geom_smooth(method = 'lm', se = FALSE, aes(color = "Overall")) + # geom_smooth(method = 'lm', se = FALSE, aes(color = species)) + # geom_text( # x = Inf, # y = -Inf, # hjust = 1.05, # vjust = -0.15, # # size = 6, # label = paste0( # "Regression slope: ", # round(summary(auclm)$coef[2,1],digits = 2), # "\nRegression R^2: ", # round(summary(auclm)$r.squared,digits = 2))) + # geom_text_repel( # data = aucfull[(log10(aucfull$AUCsim)-log10(aucfull$AUCobs))>=2,], # aes(label = paste(chem,species,matrix)), # # size = 5.3, # fontface = 'bold', # color = 'black', # hjust = -0.05, # vjust = 0.5) + # scale_y_continuous(lim = c (-2,4)) + # scale_x_continuous(lim = c(-2,4)) + # geom_text( # data = aucfull[(log10(aucfull$AUCsim)-log10(aucfull$AUCobs))<=-2,], # aes(label = paste(chem,species,matrix)), # # size = 5.3, # fontface = 'bold', # color = 'black', # hjust = 0.5, # vjust = -0.8) + # scale_color_discrete(name = 'Species', breaks = c("Overall","Human","Rat")) #+ # # theme( # # plot.title = element_text(face = 'bold', size = 15), # # axis.title.x = element_text(face = 'bold', size = 20), # # axis.text.x = element_text(size=16), # # axis.title.y = element_text(face = 'bold', size = 20), # # axis.text.y = element_text(size = 16), # # legend.title = element_text(face = 'bold', size = 16), # # legend.text = element_text(face = 'bold',size = 14)) # aucp ## ----write_figure4, eval = execute.vignette----------------------------------- # pdf("Linakis2020/Figure4.pdf", width = 20, height = 10) # plot_grid(cmaxp,aucp,nrow = 1, labels = c('A','B'), label_size = 30) # dev.off() ## ----Fig3plot, eval = execute.vignette---------------------------------------- # simobsfull$aggscen <- as.factor(paste(simobsfull$chem, # simobsfull$species, # simobsfull$matrix)) # chem.lm <- lm( # log10(simconc) - log10(obsconc) ~ # aggscen, # data = simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,]) # chem.res <- resid(chem.lm) # # Number of observations per chemical class # numpt <- simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,] %>% # group_by(chemclass) %>% summarize(n = paste("n =", length(simconc))) # fig3 <- ggplot( # data = simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,], # aes(x = aggscen, y = log10(simconc)-log10(obsconc), fill = chemclass)) + # geom_boxplot() + # geom_hline(yintercept = 0) + # geom_hline(yintercept = 2, linetype = 2)+ # geom_hline(yintercept = -2, linetype = 2)+ # xlab("Exposure Scenario") + # ylab("Log(Simulated Concentration)-\nLog(Observed Concentration)\n") + # facet_wrap(vars(chemclass), scales = 'free_x', nrow = 1) + #35 by 12 for poster # theme_bw() + # geom_text( # data = numpt, # aes(x = Inf, y = -Inf, hjust = 1.05, vjust = -0.5, label = n), # size = 10, # colour = 'black', # inherit.aes = FALSE, # parse = FALSE) + # theme( # axis.text.x = element_text(angle = 90, hjust = 1,vjust=0.5,size = 20, face = 'bold'), # strip.text.x = element_text(face = 'bold', size = 24), # legend.position = 'none', # axis.title.x = element_text(face = 'bold', size = 30), # axis.title.y = element_text(face = 'bold', size = 30), # axis.text.y = element_text(face = 'bold',size = 25, color = 'black')) # fig3 ## ----write_figure3, eval = execute.vignette----------------------------------- # pdf("Linakis2020/Figure3.pdf", width = 40, height = 13) # print(fig3) # dev.off() ## ----FigS1, eval = execute.vignette------------------------------------------- # figs1a <- ggplot( # data = simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,], # aes(x = tquart, y = log10(simconc)-log10(obsconc))) + # geom_boxplot()+ # geom_hline(yintercept = 0)+ # geom_hline(yintercept = 2, linetype = 2)+ # geom_hline(yintercept = -2, linetype = 2)+ # xlab("\nTime Quartile\n") + # ylab("Log(Simulated Concentration)-\nLog(Observed Concentration)\n") + # theme_bw()+ # theme( # axis.text.x = element_text(size = 20, face = 'bold'), # strip.text.x = element_text(face = 'bold', size = 20), # legend.position = 'none', # axis.title.x = element_text(face = 'bold', size = 20), # axis.title.y = element_text(face = 'bold', size = 20), # axis.text.y = element_text(size = 20, face = 'bold')) # figs1a # figs1b <- ggplot( # data = simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,], # aes(x = mw, y = log10(simconc)-log10(obsconc))) + # geom_point()+ # geom_hline(yintercept = 0)+ # geom_hline(yintercept = 2, linetype = 2)+ # geom_hline(yintercept = -2, linetype = 2)+ # xlab("\nMolecular Weight (g/mol)\n") + # ylab("\nLog(Simulated Concentration)-\nLog(Observed Concentration)\n") + # theme_bw()+ # theme( # axis.text.x = element_text(size = 20, face = 'bold'), # strip.text.x = element_text(face = 'bold', size = 20), # legend.position = 'none', # axis.title.x = element_text(face = 'bold', size = 20), # axis.title.y = element_text(face = 'bold', size = 20), # axis.text.y = element_text(size = 20, face = 'bold')) # figs1b # figs1c <- ggplot( # data = simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,], # aes(x = logp, y = log10(simconc)-log10(obsconc))) + # geom_point()+ # geom_hline(yintercept = 0)+ # geom_hline(yintercept = 2, linetype = 2)+ # geom_hline(yintercept = -2, linetype = 2)+ # xlab("\nLog P") + # ylab("Log(Simulated Concentration)-\nLog(Observed Concentration)\n") + # theme_bw() + # theme( # axis.text.x = element_text(size = 20, face = 'bold'), # strip.text.x = element_text(face = 'bold', size = 20), # legend.position = 'none', # axis.title.x = element_text(face = 'bold', size = 20), # axis.title.y = element_text(face = 'bold', size = 20), # axis.text.y = element_text(size = 20, face = 'bold')) # figs1c # figs1d <- ggplot( # data = simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,], # aes(x = sol, y = log10(simconc)-log10(obsconc))) + # geom_point()+ # geom_hline(yintercept = 0)+ # geom_hline(yintercept = 2, linetype = 2)+ # geom_hline(yintercept = -2, linetype = 2)+ # xlab("\nSolubility (mol/L)") + # ylab("\nLog(Simulated Concentration)-\nLog(Observed Concentration)\n") + # scale_x_log10()+ # theme_bw() + # theme( # axis.text.x = element_text(size = 20, face = 'bold'), # strip.text.x = element_text(face = 'bold', size = 20), # legend.position = 'none', # axis.title.x = element_text(face = 'bold', size = 20), # axis.title.y = element_text(face = 'bold', size = 20), # axis.text.y = element_text(size = 20, face = 'bold')) # figs1d ## ----write_figures1, eval = execute.vignette---------------------------------- # pdf("Linakis2020/FigureS1.pdf", width = 20, height = 20) # plot_grid(figs1a,figs1b,figs1c,figs1d,nrow = 2, labels = c('A','B','C','D'), label_size = 30) # dev.off() ## ----SupTable2, eval = execute.vignette--------------------------------------- # senschem <- list() # sensslope <- list() # sensrsq <- list() # sensregrmse <- list() # senstotalrmse <- list() # senspmiss <- list() # senscmaxslope <- list() # senscmaxrsq <- list() # senstotalrmsecmax <- list() # sensaucslope <- list() # sensaucrsq <- list() # senstotalrmseauc <- list() # for (i in 1:nrow(simobsfull)) # { # simobsfullsens <- subset(simobsfull,simobsfull$chem != simobsfull$chem[i]) # senschem[i] <- as.character(simobsfull$chem[i]) # senslm <- lm( # log10(simobsfullsens$obsconc[ # !is.na(simobsfullsens$simconc) & # simobsfullsens$simconc > 0 & # simobsfullsens$obsconc > 0]) ~ # log10(simobsfullsens$simconc[ # !is.na(simobsfullsens$simconc) & # simobsfullsens$simconc >0 & # simobsfullsens$obsconc > 0])) # sensslope[i] <- round(summary(senslm)$coef[2,1],digits = 2) # sensrsq[i] <- round(summary(senslm)$r.squared,digits = 2) # sensregrmse[i] <- round(sqrt(mean(senslm$residuals^2)),digits = 2) # senstotalrmse[i] <- round(sqrt(mean(( # log10(simobsfullsens$simconc[ # !is.na(simobsfullsens$simconc) & # simobsfullsens$simconc >0 & # simobsfullsens$obsconc > 0]) - # log10(simobsfullsens$obsconc[ # !is.na(simobsfullsens$simconc) & # simobsfullsens$simconc >0 & # simobsfullsens$obsconc > 0]))^2, # na.rm = TRUE)), digits = 2) # senspmiss[i] <- round((nrow(simobsfullsens) - # nrow(simobsfullsens[ # !is.na(simobsfullsens$simconc) & # simobsfullsens$simconc >0 & # simobsfullsens$obsconc > 0,])) / nrow(simobsfullsens) * 100, # digits = 2) # senscmaxfull <- subset(simobsfullsens, !duplicated(simobsfullsens$Cmaxobs)) # senscmaxlm <- lm( # log10(senscmaxfull$Cmaxobs[senscmaxfull$Cmaxobs>0]) ~ # log10(senscmaxfull$Cmaxsim[senscmaxfull$Cmaxobs>0]), # na.action = na.exclude) # sensaucfull <-subset(simobsfullsens, !duplicated(simobsfullsens$AUCobs)) # sensauclm <- lm( # log10(aucfull$AUCobs[aucfull$AUCobs>0]) ~ # log10(aucfull$AUCsim[aucfull$AUCobs>0]), # na.action = na.exclude) # senscmaxslope[i] <- round(summary(senscmaxlm)$coef[2,1],digits = 2) # senscmaxrsq[i] <- round(summary(senscmaxlm)$r.squared,digits = 2) # senstotalrmsecmax[i] <- sqrt(mean((log10(senscmaxfull$Cmaxsim[senscmaxfull$Cmaxobs>0]) - log10(senscmaxfull$Cmaxobs[senscmaxfull$Cmaxobs>0]))^2, na.rm = TRUE)) # sensaucslope[i] <- round(summary(sensauclm)$coef[2,1],digits = 2) # sensaucrsq[i] <- round(summary(sensauclm)$r.squared,digits = 2) # senstotalrmseauc[i] <- sqrt(mean((log10(sensaucfull$AUCsim[sensaucfull$AUCobs>0]) - log10(sensaucfull$AUCobs[sensaucfull$AUCobs>0]))^2, na.rm = TRUE)) # } # sensitivitydf <- data.frame(Chemical <- as.character(senschem), # sensSlope <- as.numeric(sensslope), # sensRsq <- as.numeric(sensrsq), # sensRegrmse <- as.numeric(sensregrmse), # sensTotrmse <- as.numeric(senstotalrmse), # sensPmiss <- as.numeric(senspmiss), # sensCmaxslope <- as.numeric(senscmaxslope), # sensCmaxrsq <- as.numeric(senscmaxrsq), # sensCmaxrmse <- as.numeric(senstotalrmsecmax), # sensAUCslope <- as.numeric(sensaucslope), # sensAUCrsq <- as.numeric(sensaucrsq), # sensAUCrmse <- as.numeric(senstotalrmseauc), # stringsAsFactors=FALSE) # sensitivitydf <- subset(sensitivitydf,!duplicated(sensitivitydf$Chemical....as.character.senschem.)) # names(sensitivitydf) <- c('Chemical Dropped','Regression Slope','Regression R^2','Regression RMSE','Orthogonal RMSE', 'Percent Data Censored','Cmax Regression Slope','Cmax Regression R^2','Cmax Orthogonal RMSE','AUC Regression Slope','AUC Regression R^2','AUC Orthogonal RMSE') # notdropped <- c('None',concregslope,concregr2,concregrmse,totalrmse,pmiss,cmaxslope,cmaxrsq,totalrmsecmax,aucslope,aucrsq,totalrmseauc) # sensitivitydf <- rbind(notdropped, sensitivitydf) # sensitivitydf[,-1] <- sapply(sensitivitydf[,-1],as.numeric) # sensitivitydf[,-1] <- round(sensitivitydf[,-1],2) # # Clean up and write file # rm(chem.lm, obvpredplot, p, pdata1, plot.data, plots, relconc, sensaucfull, sensauclm, sensaucrsq, sensaucslope, senschem, senscmaxfull, senscmaxlm, senscmaxrsq, senscmaxslope, senslm, senspmiss, sensregrmse, sensrsq, sensslope, senstotalrmse, senstotalrmseauc, senstotalrmsecmax, solve, AUCrmse, AUCrsq, AUCslope, chem.res, Chemical, Cmaxrmse, Cmaxrsq, Cmaxslope, colors, count, i, j, k, met_col, name, name1, Pmiss, Regrmse, Rsq, Slope, rem, Totrmse) # # write.csv(sensitivitydf, 'supptab2.csv',row.names = FALSE) ## ----SupTable1, eval = execute.vignette--------------------------------------- # supptab1 <- subset(unique_scenarios, !duplicated(unique_scenarios$PREFERRED_NAME) | !duplicated(unique_scenarios$SOURCE_CVT) | !duplicated(unique_scenarios$CONC_SPECIES)) # for(i in 1:nrow(supptab1)){ # tryCatch({ # supptab1$Metabolism_Source[i] <- met_data$SOURCE_MET[met_data$DTXSID %in% supptab1$DTXSID[i] & met_data$SPECIES %in% supptab1$CONC_SPECIES[i]] # supptab1$Log_P[i] <- met_data$OCTANOL_WATER_PARTITION_LOGP_OPERA_PRED[met_data$DTXSID %in% supptab1$DTXSID[i]& met_data$SPECIES %in% supptab1$CONC_SPECIES[i]] # supptab1$Solubility[i] <- met_data$WATER_SOLUBILITY_MOL.L_OPERA_PRED[met_data$DTXSID %in% supptab1$DTXSID[i]& met_data$SPECIES %in% supptab1$CONC_SPECIES[i]] # supptab1$Blood_Air_Partition_Coefficient[i] <- met_data$CALC_PBA[met_data$DTXSID %in% supptab1$DTXSID[i]& met_data$SPECIES %in% supptab1$CONC_SPECIES[i]] # supptab1$Chem_Class[i] <- met_data$CHEM_CLASS[met_data$DTXSID %in% supptab1$DTXSID[i] & met_data$SPECIES %in% supptab1$CONC_SPECIES[i]] # supptab1$Species[i] <- met_data$SPECIES[met_data$DTXSID %in% supptab1$DTXSID[i] & met_data$SPECIES %in% supptab1$CONC_SPECIES[i]] # supptab1$Vmax[i] <- met_data$VMAX[met_data$DTXSID %in% supptab1$DTXSID[i] & met_data$SPECIES %in% supptab1$CONC_SPECIES[i]] # supptab1$Km[i] <- met_data$KM[met_data$DTXSID %in% supptab1$DTXSID[i] & met_data$SPECIES %in% supptab1$CONC_SPECIES[i]] # }, error = function(e){}) # } # supptab1 <- supptab1[c('PREFERRED_NAME','DTXSID','CASRN','Chem_Class','AVERAGE_MASS','Log_P','Solubility','Blood_Air_Partition_Coefficient','Species','Vmax','Km','Metabolism_Source','SOURCE_CVT')] # names(supptab1) <- c('Chemical','DTXSID','CASRN','CAMEO Chemical Class','Molecular Weight (g/mol)','Log P','Solubility (mol/L)','Blood Air Partition Coefficient','Available Species Data','Vmax (pmol/min/10^6 cells)','KM (uM)','Metabolism Data Source','Concentration-Time Data Source') # write.csv(supptab1, "supptab1.csv", row.names = FALSE)