## ---- include = FALSE, echo=FALSE--------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.height= 5, fig.width=7 ) ## ----getcaretdata, warning=FALSE, error=FALSE, message=FALSE, fig.height= 5, fig.width=7---- library(caret) library(dplyr) library(mlbench) library(tidyr) library(e1071) library(randomForest) # Load in the iris data set for this problem data(iris) df <- iris # View the class distribution, as this is a multiclass problem, we can use the multi-uclassification data table builder table(iris$Species) ## ----split_data, warning=FALSE, error=FALSE, message=FALSE, fig.height= 5, fig.width=7---- train_split_idx <- caret::createDataPartition(df$Species, p = 0.75, list = FALSE) # Here we define a split index and we are now going to use a multiclass ML model to fit the data train <- df[train_split_idx, ] test <- df[-train_split_idx, ] str(train) ## ----train_data, warning=FALSE, error=FALSE, message=FALSE, fig.height= 5, fig.width=7---- rf_model <- caret::train(Species ~ ., data = df, method = "rf", metric = "Accuracy") rf_model ## ----conf_mat, warning=FALSE, error=FALSE, message=FALSE, fig.height= 5, fig.width=7---- # Make a prediction on the fitted model with the test data rf_class <- predict(rf_model, newdata = test, type = "raw") predictions <- cbind(data.frame(train_preds=rf_class, test$Species)) # Create a confusion matrix object cm <- caret::confusionMatrix(predictions$train_preds, predictions$test.Species) print(cm) ## ----using_multi_function, warning=FALSE, error=FALSE, message=FALSE, fig.height= 5, fig.width=7---- # Implementing function to collapse data library(ConfusionTableR) mc_df <- ConfusionTableR::multi_class_cm(predictions$train_preds, predictions$test.Species, mode="everything") # Access the reduced data for storage in databases print(mc_df$record_level_cm) glimpse(mc_df$record_level_cm) ## ----using_multi_function_cm1, warning=FALSE, error=FALSE, message=FALSE, fig.height= 5, fig.width=7---- mc_df$confusion_matrix ## ----using_multi_function_cm2, warning=FALSE, error=FALSE, message=FALSE, fig.height= 5, fig.width=7---- mc_df$cm_tbl ## ----load_cancer, warning=FALSE, error=FALSE, message=FALSE, fig.height= 5, fig.width=7---- # Load in the data library(dplyr) library(ConfusionTableR) library(caret) library(tidyr) library(mlbench) # Load in the data data("BreastCancer", package = "mlbench") breast <- BreastCancer[complete.cases(BreastCancer), ] #Create a copy breast <- breast[, -1] breast$Class <- factor(breast$Class) # Create as factor for(i in 1:9) { breast[, i] <- as.numeric(as.character(breast[, i])) } ## ----predict_cm, warning=FALSE, error=FALSE, message=FALSE, fig.height= 5, fig.width=7---- #Perform train / test split on the data train_split_idx <- caret::createDataPartition(breast$Class, p = 0.75, list = FALSE) train <- breast[train_split_idx, ] test <- breast[-train_split_idx, ] rf_fit <- caret::train(Class ~ ., data=train, method="rf") #Make predictions to expose class labels preds <- predict(rf_fit, newdata=test, type="raw") predicted <- cbind(data.frame(class_preds=preds), test) ## ----binary_df, warning=FALSE, error=FALSE, message=FALSE, fig.height= 5, fig.width=7---- bin_cm <- ConfusionTableR::binary_class_cm(predicted$class_preds, predicted$Class) # Get the record level data bin_cm$record_level_cm glimpse(bin_cm$record_level_cm) ## ----visual_confusion_matrix, warning=FALSE, error=FALSE, message=FALSE, fig.height= 5, fig.width=7---- ConfusionTableR::binary_visualiseR(train_labels = predicted$class_preds, truth_labels= predicted$Class, class_label1 = "Not Stranded", class_label2 = "Stranded", quadrant_col1 = "#28ACB4", quadrant_col2 = "#4397D2", custom_title = "Breast Cancer Confusion Matrix", text_col= "black")