Fix input for caret::confusionMatrix
Sensitivity, Specificity, ... are nor correctly computed.
See examples in ?caret::confusionMatrix
Output with SMOTE:
print(conf)
Confusion Matrix and Statistics
observed
predicted 0 1
0 2238 15
1 81 48
Accuracy : 0.9597
95% CI : (0.951, 0.9672)
No Information Rate : 0.9736
P-Value [Acc > NIR] : 1
Kappa : 0.4816
Mcnemar's Test P-Value : 3.266e-11
Sensitivity : 0.76190
Specificity : 0.96507
Pos Pred Value : 0.37209
Neg Pred Value : 0.99334
Prevalence : 0.02645
Detection Rate : 0.02015
Detection Prevalence : 0.05416
Balanced Accuracy : 0.86349
'Positive' Class : 1
Change-Id: I7009c6b3a1e81f4f912ce13cbbde825abc490ee7
diff --git a/R/idiomclassification_mk_pf.R b/R/idiomclassification_mk_pf.R
index 1310b6b..99025b5 100644
--- a/R/idiomclassification_mk_pf.R
+++ b/R/idiomclassification_mk_pf.R
@@ -53,8 +53,8 @@
# different cutoff for prediction
# prediction_for_table <- predict(rf_classifier, test %>% select(-CO_IDIOM), cutoff = c(0.8, 0.2))
-confusion <- table(observed=test$CO_IDIOM,predicted=prediction_for_table)
-conf <- confusionMatrix(confusion)
+confusion <- table(predicted=prediction_for_table, observed=test$CO_IDIOM)
+conf <- confusionMatrix(confusion, positive = "1")
print(conf)
varImpPlot(rf_classifier)
@@ -63,9 +63,9 @@
smoted.data <- SMOTE(fmla, subset(train, select = c("CO_IDIOM", vars)), perc.over = 1200, perc.under = 100)
rf_classifier = randomForest(fmla, smoted.data, ntree=100, mtry=4, importance=TRUE)
prediction_for_table <- predict(rf_classifier,test %>% select(-CO_IDIOM))
-confusion <- table(observed=test$CO_IDIOM,predicted=prediction_for_table)
-confusionMatrix(confusion)
-
+confusion <- table(predicted=prediction_for_table, observed=test$CO_IDIOM)
+conf <- confusionMatrix(confusion, positive = "1")
+print(conf)
# Using estimates by random forest on entire dataset
library(randomForest)