Show table with comparison of RF w/ or w/o SMOTE, cutoff

Change-Id: I298effe9ef38e4099c1d58beb6d12d126af6b004
diff --git a/R/idiomclassification_mk_pf.R b/R/idiomclassification_mk_pf.R
index af96b27..f564231 100644
--- a/R/idiomclassification_mk_pf.R
+++ b/R/idiomclassification_mk_pf.R
@@ -58,7 +58,7 @@
 train <- ngramme[trainRows,]
 test <- ngramme[setdiff(1:nrow(ngramme),trainRows),]
 
-cat("Random Forest without SMOTE\n")
+cat("Random Forest\n")
 
 rf_classifier = randomForest(fmla, train, importance=TRUE)
 
@@ -69,25 +69,38 @@
 
 res <- confusionMatrix(prediction_for_table, test$CO_IDIOM, positive= "idiom")
 print(res)
+collected_results <- bind_cols("rf" = res$byClass)
 
 # Sensitivity is recall of class 1
 # Pos Pred Value is precision
 varImpPlot(rf_classifier)
 
-cat("With SMOTE resampled training data\n")
+cat("Random Forest with cutoff\n")
+prediction_for_table <- predict(rf_classifier,test %>% select(-CO_IDIOM), cutoff = c(0.2, 0.8))
+res <- confusionMatrix(prediction_for_table,test$CO_IDIOM, positive = "idiom")
+collected_results <- bind_cols(collected_results, "rf with cutoff" = res$byClass)
+print(res)
 
+cat("With SMOTE resampled training data\n")
 smoted.data <- SMOTE(fmla, subset(train, select = c("CO_IDIOM", vars)), perc.over = 1200, perc.under = 100)
 rf_classifier = randomForest(fmla, smoted.data, importance=TRUE)
 prediction_for_table <- predict(rf_classifier,test %>% select(-CO_IDIOM))
 res <- confusionMatrix(prediction_for_table,test$CO_IDIOM, positive = "idiom")
+collected_results <- bind_cols(collected_results, "rf with SMOTE" = res$byClass)
 print(res)
 
-cat("With SMOTE and detection task oriented cutoff for prediction\n")
-
+cat("With SMOTE and cutoff\n")
 prediction_for_table <- predict(rf_classifier,test %>% select(-CO_IDIOM), cutoff = c(0.2, 0.8))
 res <- confusionMatrix(prediction_for_table,test$CO_IDIOM, positive = "idiom")
+collected_results <- bind_cols(collected_results, "rf with SMOTE and cutoff" = res$byClass)
 print(res)
 
+collected_results <- collected_results %>%
+  round(3) %>%
+  add_column(measure = names(res$byClass)) %>%
+  column_to_rownames("measure")
+
+View(collected_results)
 
 # Using estimates by random forest on entire dataset