Clarified data vs. reference for caret:confusion matrices.
Change-Id: Ieffefbf109668af49fe3d95b61469d594195953b
diff --git a/R/idiomclassification_mk_pf.R b/R/idiomclassification_mk_pf.R
index 31edf3a..3119d92 100644
--- a/R/idiomclassification_mk_pf.R
+++ b/R/idiomclassification_mk_pf.R
@@ -66,11 +66,10 @@
# different cutoff for prediction
# prediction_for_table <- predict(rf_classifier, test %>% select(-CO_IDIOM), cutoff = c(0.8, 0.2))
-confusion <- table(observed=test$CO_IDIOM,predicted=prediction_for_table)
-conf <- confusionMatrix(confusion, positive= "1")
-print(conf)
-# Sensitivity is precision of class 1
-# Pos Pred Value is recall
+confusionMatrix(prediction_for_table, test$CO_IDIOM,positive= "1")
+
+# Sensitivity is recall of class 1
+# Pos Pred Value is precision
varImpPlot(rf_classifier)
# optional resampling with smote
@@ -78,9 +77,7 @@
smoted.data <- SMOTE(fmla, subset(train, select = c("CO_IDIOM", vars)), perc.over = 1200, perc.under = 100)
rf_classifier = randomForest(fmla, smoted.data, ntree=100, mtry=10, importance=TRUE)
prediction_for_table <- predict(rf_classifier,test %>% select(-CO_IDIOM))
-confusion <- table(observed=test$CO_IDIOM,predicted=prediction_for_table)
-conf <- confusionMatrix(confusion, positive = "1")
-print(conf)
+confusionMatrix(prediction_for_table,test$CO_IDIOM, positive = "1")
# Using estimates by random forest on entire dataset