## ----setup, include=FALSE--------------------------------------------------------------------------------------------------------------------------- library(rmarkdown) library(SmartEDA) library(knitr) library(ISLR) library(scales) library(gridExtra) library(ggplot2) ## ----eda-c3-r, warning=FALSE,eval=F----------------------------------------------------------------------------------------------------------------- # #install.packages("ISLR") # library("ISLR") # #install.packages("SmartEDA") # library("SmartEDA") # ## Load sample dataset from ISLR pacakge # Carseats= ISLR::Carseats ## ----od_1,warning=FALSE,eval=F,include=T------------------------------------------------------------------------------------------------------------ # # Overview of the data - Type = 1 # ExpData(data=Carseats,type=1) # # # Structure of the data - Type = 2 # ExpData(data=Carseats,type=2) ## ----od_2,warning=FALSE,eval=T,include=F------------------------------------------------------------------------------------------------------------ ovw_tabl <- ExpData(data=Carseats,type=1) ovw_tab2 <- ExpData(data=Carseats,type=2) ## ----od_3,warning=FALSE,eval=T,render=ovw_tabl,echo=F----------------------------------------------------------------------------------------------- kable(ovw_tabl, "html") ## ----od_31,warning=FALSE,eval=T,render=ovw_tab2,echo=F---------------------------------------------------------------------------------------------- kable(ovw_tab2, "html") ## ----od_du2,warning=FALSE,eval=T,include=F---------------------------------------------------------------------------------------------------------- ovw_tabl_du <- ExpData(data=Carseats,type=2, fun = c("mean", "median", "var")) ## ----od_du1,warning=FALSE,eval=F,include=T---------------------------------------------------------------------------------------------------------- # # Metadata Information with additional statistics like mean, median and variance # ExpData(data=Carseats,type=2, fun = c("mean", "median", "var")) ## ----od_du3,warning=FALSE,eval=T,render=ovw_tabl_du,echo=F------------------------------------------------------------------------------------------ kable(ovw_tabl_du, "html") ## ----od_du4,warning=FALSE,eval=T,include=T---------------------------------------------------------------------------------------------------------- # Derive Quantile quantile_10 = function(x){ quantile_10 = quantile(x, na.rm = TRUE, 0.1) } quantile_90 = function(x){ quantile_90 = quantile(x, na.rm = TRUE, 0.9) } output_e1 <- ExpData(data=Carseats, type=2, fun=c("quantile_10", "quantile_90")) ## ----od_du5,warning=FALSE,eval=T,render=output_e1,echo=F-------------------------------------------------------------------------------------------- kable(output_e1, "html") ## ----c1.1,warning=FALSE,eval=T,include=F------------------------------------------------------------------------------------------------------------ ec1 = ExpNumStat(Carseats,by="A",gp=NULL,Qnt=seq(0,1,0.1),MesofShape=2,Outlier=TRUE,round=2,Nlim=3) rownames(ec1)<-NULL ## ----c1.11, warning=FALSE,eval=F,include=T---------------------------------------------------------------------------------------------------------- # ExpNumStat(Carseats,by="A",gp=NULL,Qnt=seq(0,1,0.1),MesofShape=2,Outlier=TRUE,round=2,Nlim=10) # ## ----c1.12,warning=FALSE,eval=T,render=ec1,echo=F--------------------------------------------------------------------------------------------------- paged_table(ec1) ## ----warning=FALSE,eval=T,include=T----------------------------------------------------------------------------------------------------------------- carseat = ISLR::Carseats ## Compute random weight carseat$wt = stats::runif( nrow(carseat), 0.5, 1.5 ) wt_summary = ExpNumStat(carseat,by="A",gp=NULL,round=2,Nlim=10, weight = "wt") wt_summary[,c("Vname","TN","W_count","mean", "W_Mean", "SD","W_Sd")] ## ----warning=FALSE,eval=T,include=T----------------------------------------------------------------------------------------------------------------- ## With group by statement wt_summary = ExpNumStat(carseat,by="GA",gp="ShelveLoc",round=2,Nlim=10, weight = "wt") wt_summary[,c("Vname","Group","TN","W_count","mean", "W_Mean", "SD","W_Sd")] ## ----c1.2 ,warning=FALSE,eval=T,include=T,fig.align='center',fig.height=7,fig.width=7--------------------------------------------------------------- # Note: Variable excluded (if unique value of variable which is less than or eaual to 10 [nlim=10]) plot1 <- ExpNumViz(Carseats,target=NULL,nlim=10,Page=c(2,2),sample=4) plot1[[1]] ## ----ec13, eval=T,include=F------------------------------------------------------------------------------------------------------------------------- et1 <- ExpCTable(Carseats,Target=NULL,margin=1,clim=10,nlim=5,round=2,bin=NULL,per=T) rownames(et1)<-NULL ## ----ec14, warning=FALSE,eval=F,include=T----------------------------------------------------------------------------------------------------------- # ExpCTable(Carseats,Target=NULL,margin=1,clim=10,nlim=3,round=2,bin=NULL,per=T) ## ----ec14.1,warning=FALSE,eval=T,render=et1,echo=F-------------------------------------------------------------------------------------------------- kable(et1,"html") ## ----bp1,warning=FALSE,eval=T,include=T,fig.align='center',fig.height=7,fig.width=7----------------------------------------------------------------- plot2 <- ExpCatViz(Carseats,target=NULL,col ="slateblue4",clim=10,margin=2,Page = c(2,2),sample=4) plot2[[1]] ## ----tbd0,warning=FALSE,eval=T,include=T------------------------------------------------------------------------------------------------------------ summary(Carseats[,"Price"]) ## ----con_1,warning=FALSE,eval=T,include=F----------------------------------------------------------------------------------------------------------- cpp = ExpNumStat(Carseats,by="A",gp="Price",Qnt=seq(0,1,0.1),MesofShape=1,Outlier=TRUE,round=2) rownames(cpp)<-NULL ## ----con_2, warning=FALSE,eval=F,include=T---------------------------------------------------------------------------------------------------------- # ExpNumStat(Carseats,by="A",gp="Price",Qnt=seq(0,1,0.1),MesofShape=1,Outlier=TRUE,round=2) ## ----con_3,warning=FALSE,eval=T,render=cpp,echo=F--------------------------------------------------------------------------------------------------- paged_table(cpp) ## ----snv1,warning=FALSE,eval=T,include=T,fig.align='center',fig.height=7,fig.width=7---------------------------------------------------------------- #Note: sample=8 means randomly selected 8 scatter plots #Note: nlim=4 means included numeric variable with unique value is more than 4 plot3 <- ExpNumViz(Carseats,target="Price",nlim=4,scatter=FALSE,fname=NULL,col="green",Page=c(2,2),sample=8) plot3[[1]] ## ----snv1_1,warning=FALSE,eval=T,include=T,fig.align='center',fig.height=7,fig.width=7-------------------------------------------------------------- #Note: sample=8 means randomly selected 8 scatter plots #Note: nlim=4 means included numeric variable with unique value is more than 4 plot31 <- ExpNumViz(Carseats,target="US",nlim=4,scatter=TRUE,fname=NULL,Page=c(2,1),sample=4) plot31[[1]] ## ----eda_41, eval=T,include=F----------------------------------------------------------------------------------------------------------------------- et11 <- ExpCTable(Carseats,Target="Price",margin=1,clim=10,round=2,bin=4,per=F) rownames(et11)<-NULL ## ----e4.2, warning=FALSE,eval=F,include=T----------------------------------------------------------------------------------------------------------- # ##bin=4, descretized 4 categories based on quantiles # ExpCTable(Carseats,Target="Price",margin=1,clim=10,round=2,bin=4,per=F) ## ----e4.2.1,warning=FALSE,eval=T,render=et11,echo=F------------------------------------------------------------------------------------------------- paged_table(et11) ## ----warning=FALSE,eval=T,include=T----------------------------------------------------------------------------------------------------------------- carseat = ISLR::Carseats ## Compute random weight carseat$wt = stats::runif( nrow(carseat), 0.5, 1.5 ) wt_summary = ExpCTable(carseat,margin=1,clim=10,round=2,bin=4,per=F, weight = "wt") wt_summary ## ----dd,warning=FALSE,eval=T,include=F-------------------------------------------------------------------------------------------------------------- tab_tar <- data.frame(table(Carseats[,"Urban"])) tab_tar$Descriptions <- "Store location" names(tab_tar) <- c("Urban","Frequency","Descriptions") rownames(tab_tar)<-NULL ## ----dv-r,warning=FALSE,eval=T,render=tab_tar,echo=F------------------------------------------------------------------------------------------------ kable(tab_tar, "html") ## ----snc1,warning=FALSE,eval=T,include=F------------------------------------------------------------------------------------------------------------ snc = ExpNumStat(Carseats,by="GA",gp="Urban",Qnt=seq(0,1,0.1),MesofShape=2,Outlier=TRUE,round=2) rownames(snc)<-NULL ## ----snc2, warning=FALSE,eval=F,include=T----------------------------------------------------------------------------------------------------------- # ExpNumStat(Carseats,by="GA",gp="Urban",Qnt=seq(0,1,0.1),MesofShape=2,Outlier=TRUE,round=2) ## ----snc3,warning=FALSE,eval=T,render=snc,echo=F---------------------------------------------------------------------------------------------------- paged_table(snc) ## ----bp3.1,warning=FALSE,eval=T,include=T,fig.align='center',fig.height=7,fig.width=7--------------------------------------------------------------- plot4 <- ExpNumViz(Carseats,target="Urban",type=1,nlim=3,fname=NULL,col=c("darkgreen","springgreen3","springgreen1"),Page=c(2,2),sample=8) plot4[[1]] ## ----ed3.3, eval=T,include=F------------------------------------------------------------------------------------------------------------------------ et100 <- ExpCTable(Carseats,Target="Urban",margin=1,clim=10,nlim=3,round=2,bin=NULL,per=F) rownames(et100)<-NULL et4 <- ExpCatStat(Carseats,Target="Urban",result = "Stat",clim=3,nlim=3,bins=10,Pclass="Yes",plot=FALSE,top=20,Round=2) rownames(et4)<-NULL et5 <- ExpCatStat(Carseats,Target="Urban",result = "IV",clim=10,nlim=5,bins=10,Pclass="Yes",plot=FALSE,top=20,Round=2) rownames(et5)<-NULL et5 <- et5[1:15,] ## ----ed3.4, warning=FALSE,eval=F,include=T---------------------------------------------------------------------------------------------------------- # ExpCTable(Carseats,Target="Urban",margin=1,clim=10,nlim=3,round=2,bin=NULL,per=F) ## ----ed3.5,warning=FALSE,eval=T,render=et100,echo=F,out.height=8,out.width=8------------------------------------------------------------------------ kable(et100,"html") ## ----ed3.6, warning=FALSE,eval=F,include=T---------------------------------------------------------------------------------------------------------- # ExpCatStat(Carseats,Target="Urban",result = "IV",clim=10,nlim=5,bins=10,Pclass="Yes",plot=FALSE,top=20,Round=2) # ## ----ed3.7,warning=FALSE,eval=T,render=et5,echo=F,out.height=8,out.width=8-------------------------------------------------------------------------- kable(et5,"html") ## ----ed3.8, warning=FALSE,eval=F,include=T---------------------------------------------------------------------------------------------------------- # et4 <- ExpCatStat(Carseats,Target="Urban",result = "Stat",clim=10,nlim=5,bins=10,Pclass="Yes",plot=FALSE,top=20,Round=2) ## ----ed3.9,warning=FALSE,eval=T,render=et4,echo=F,out.height=8,out.width=8-------------------------------------------------------------------------- kable(et4,"html") ## ----ed3.91,warning=FALSE,eval=T,fig.align='center',fig.height=7,fig.width=7------------------------------------------------------------------------ varimp <- ExpCatStat(Carseats,Target="Urban",result = "Stat",clim=10,nlim=5,bins=10,Pclass="Yes",plot=TRUE,top=10,Round=2) ## ----ed3.10,warning=FALSE,eval=T,include=T,fig.align='center',fig.height=7,fig.width=7-------------------------------------------------------------- plot5 <- ExpCatViz(Carseats,target="Urban",fname=NULL,clim=5,col=c("slateblue4","slateblue1"),margin=2,Page = c(2,1),sample=2) plot5[[1]] ## ----warning=FALSE,eval=T,include=T,fig.align='center',fig.height=7,fig.width=7--------------------------------------------------------------------- options(width = 150) CData = ISLR::Carseats qqp <- ExpOutQQ(CData,nlim=10,fname=NULL,Page=c(2,2),sample=4) qqp[[1]] ## ----warning=FALSE,eval=T,include=T,fig.align='center',fig.height=3,fig.width=7--------------------------------------------------------------------- ExpParcoord(CData,Group=NULL,Stsize=NULL,Nvar=c("Price","Income","Advertising","Population","Age","Education")) ## ----warning=FALSE,eval=T,include=T,fig.align='center',fig.height=3,fig.width=7--------------------------------------------------------------------- ExpParcoord(CData,Group="ShelveLoc",Stsize=c(10,15,20),Nvar=c("Price","Income"),Cvar=c("Urban","US")) ## ----warning=FALSE,eval=T,include=T,fig.align='center',fig.height=3,fig.width=7--------------------------------------------------------------------- ExpParcoord(CData,Group="ShelveLoc",Nvar=c("Price","Income"),Cvar=c("Urban","US"),scale=NULL) ## ----warning=FALSE,eval=T,include=T,fig.align='center',fig.height=3,fig.width=7--------------------------------------------------------------------- ExpParcoord(CData,Group="US",Nvar=c("Price","Income"),Cvar=c("ShelveLoc"),scale="std") ## ----warning=FALSE,eval=T,include=T,fig.align='center',fig.height=3,fig.width=7--------------------------------------------------------------------- ExpParcoord(CData,Group="ShelveLoc",Stsize=c(10,15,20),Nvar=c("Price","Income","Advertising","Population","Age","Education")) ## ----warning=FALSE,eval=T,include=T,fig.align='center',fig.height=3,fig.width=7--------------------------------------------------------------------- ExpParcoord(CData,Group="US",Stsize=c(15,50),Cvar=c("ShelveLoc","Urban")) ## ----dudu, eval=T,include=F------------------------------------------------------------------------------------------------------------------------- e1du <- ExpCustomStat(Carseats,Cvar="Urban",Nvar=c("Age","Price"),stat=c("mean","count"),gpby=TRUE,dcast=F) rownames(e1du)<-NULL e1du1 <- ExpCustomStat(Carseats,Cvar="Urban",Nvar=c("Age","Price"),stat=c("mean","count"),gpby=TRUE,dcast=T) rownames(e1du1)<-NULL e1du2 <- ExpCustomStat(Carseats,Cvar=c("Urban","ShelveLoc"),Nvar=c("Age","Price","Advertising","Sales"),stat=c("mean"),gpby=FALSE,dcast=T) rownames(e1du2)<-NULL ## ----dud1, warning=FALSE,eval=F,include=T----------------------------------------------------------------------------------------------------------- # ExpCustomStat(Carseats,Cvar="Urban",Nvar=c("Age","Price"),stat=c("mean","count"),gpby=TRUE,dcast=F) # ## ----dud12,warning=FALSE,eval=T,render=e1du,echo=F,out.height=8,out.width=8------------------------------------------------------------------------- kable(e1du,"html") ## ----dud2, warning=FALSE,eval=F,include=T----------------------------------------------------------------------------------------------------------- # ExpCustomStat(Carseats,Cvar="Urban",Nvar=c("Age","Price"),stat=c("mean","count"),gpby=TRUE,dcast=T) # ## ----dud21,warning=FALSE,eval=T,render=e1du1,echo=F,out.height=8,out.width=8------------------------------------------------------------------------ kable(e1du1,"html") ## ----dud3, warning=FALSE,eval=F,include=T----------------------------------------------------------------------------------------------------------- # ExpCustomStat(Carseats,Cvar=c("Urban","ShelveLoc"),Nvar=c("Age","Price","Advertising","Sales"),stat=c("mean"),gpby=FALSE,dcast=T) # ## ----dud31,warning=FALSE,eval=T,render=e1du2,echo=F,out.height=8,out.width=8------------------------------------------------------------------------ kable(e1du2,"html") ## ----ktana, eval=T,include=F------------------------------------------------------------------------------------------------------------------------ ana1 <- ExpOutliers(Carseats, varlist = c("Sales","CompPrice","Income"), method = "boxplot", treatment = "mean", capping = c(0.1, 0.9)) outlier_summ <- ana1[[1]] outlier_data <- ana1[[2]] ana2 <- ExpOutliers(Carseats, varlist = c("Sales","CompPrice","Income"), method = "3xStDev", treatment = "median", capping = c(0.1, 0.9)) outlier_summ1 <- ana2[[1]] outlier_data1 <- ana2[[2]] ## ----out1, warning=FALSE,eval=F,include=T----------------------------------------------------------------------------------------------------------- # ExpOutliers(Carseats, varlist = c("Sales","CompPrice","Income"), method = "boxplot", treatment = "mean", capping = c(0.1, 0.9)) ## ----out11,warning=FALSE,eval=T,render=outlier_summ,echo=F,out.height=8,out.width=8----------------------------------------------------------------- kable(outlier_summ,"html") ## ----out12,warning=FALSE,eval=T,render=outlier_data,echo=F,out.height=8,out.width=8----------------------------------------------------------------- kable(head(outlier_data),"html") ## ----out2, warning=FALSE,eval=F,include=T----------------------------------------------------------------------------------------------------------- # ExpOutliers(Carseats, varlist = c("Sales","CompPrice","Income"), method = "3xStDev", treatment = "medain", capping = c(0.1, 0.9)) # ## ----out21,warning=FALSE,eval=T,render=outlier_summ1,echo=F,out.height=8,out.width=8---------------------------------------------------------------- kable(outlier_summ1,"html") ## ----out22,warning=FALSE,eval=T,render=outlier_data1,echo=F,out.height=8,out.width=8---------------------------------------------------------------- kable(head(outlier_data1),"html")