Make output more readable
Change-Id: Ie0b6e674eeced39717e38ef3d132d902a4ceed0b
diff --git a/R/idiomclassification_mk_pf.R b/R/idiomclassification_mk_pf.R
index 47d4ef9..c1f4dbc 100644
--- a/R/idiomclassification_mk_pf.R
+++ b/R/idiomclassification_mk_pf.R
@@ -58,6 +58,8 @@
train <- ngramme[trainRows,]
test <- ngramme[setdiff(1:nrow(ngramme),trainRows),]
+cat("Random Forest without SMOTE\n")
+
rf_classifier = randomForest(fmla, train, importance=TRUE)
# only SY features
@@ -65,24 +67,19 @@
prediction_for_table <- predict(rf_classifier, test %>% select(-CO_IDIOM))
-# different cutoff for prediction
-# prediction_for_table <- predict(rf_classifier, test %>% select(-CO_IDIOM), cutoff = c(0.8, 0.2))
-
res <- confusionMatrix(prediction_for_table, test$CO_IDIOM,positive= "1")
-cat("Without SMOTE")
print(res)
# Sensitivity is recall of class 1
# Pos Pred Value is precision
varImpPlot(rf_classifier)
-# optional resampling with smote
+cat("With SMOTE resampled training data\n")
smoted.data <- SMOTE(fmla, subset(train, select = c("CO_IDIOM", vars)), perc.over = 1200, perc.under = 100)
rf_classifier = randomForest(fmla, smoted.data, importance=TRUE)
prediction_for_table <- predict(rf_classifier,test %>% select(-CO_IDIOM))
res <- confusionMatrix(prediction_for_table,test$CO_IDIOM, positive = "1")
-cat("With SMOTE")
print(res)
# Using estimates by random forest on entire dataset