Improve Sensitiviy with SMOTE
E.g.:
Confusion Matrix and Statistics
observed
predicted 0 1
0 2055 7
1 246 74
Accuracy : 0.8938
95% CI : (0.8807, 0.9059)
No Information Rate : 0.966
P-Value [Acc > NIR] : 1
Kappa : 0.3329
Mcnemar's Test P-Value : <2e-16
Sensitivity : 0.91358
Specificity : 0.89309
Pos Pred Value : 0.23125
Neg Pred Value : 0.99661
Prevalence : 0.03401
Detection Rate : 0.03107
Detection Prevalence : 0.13434
Balanced Accuracy : 0.90334
'Positive' Class : 1
Change-Id: Ibf6457af340b0ca7a4e814f2d77927515a142ee0
diff --git a/R/idiomclassification_mk_pf.R b/R/idiomclassification_mk_pf.R
index 99025b5..82516a3 100644
--- a/R/idiomclassification_mk_pf.R
+++ b/R/idiomclassification_mk_pf.R
@@ -61,13 +61,12 @@
# optional resampling with smote
smoted.data <- SMOTE(fmla, subset(train, select = c("CO_IDIOM", vars)), perc.over = 1200, perc.under = 100)
-rf_classifier = randomForest(fmla, smoted.data, ntree=100, mtry=4, importance=TRUE)
-prediction_for_table <- predict(rf_classifier,test %>% select(-CO_IDIOM))
+rf_classifier = randomForest(fmla, smoted.data, ntree=200, importance=TRUE)
+prediction_for_table <- predict(rf_classifier,test %>% select(-CO_IDIOM), cutoff=c(0.8,0.2))
confusion <- table(predicted=prediction_for_table, observed=test$CO_IDIOM)
conf <- confusionMatrix(confusion, positive = "1")
print(conf)
# Using estimates by random forest on entire dataset
-
library(randomForest)
rf_classifier_full = randomForest(fmla, data=ngramme, ntree=100, mtry=2, importance=TRUE, cutoff=c(0.8,0.2))
rf_classifier_full