Merge changes I02169a8a,I23fa3680,Iee2355e0,Iae35113f
* changes:
Restore original x axis direction for tradeoff plot
Plot also Sensitivity, Balanced Accuracy and use ggplot
Fscore over the full range
added Fscore plot for various cutoffs
diff --git a/R/idiomclassification_mk_pf.R b/R/idiomclassification_mk_pf.R
index 7f6451c..7521226 100644
--- a/R/idiomclassification_mk_pf.R
+++ b/R/idiomclassification_mk_pf.R
@@ -63,7 +63,7 @@
rf_classifier = randomForest(fmla, train, importance=TRUE)
# only SY features
-# rf_classifier = randomForest(fmlasy, train, ntree=100, mtry=10, importance=TRUE)
+# rf_classifier = randomForest(fmlasy, train, importance=TRUE)
prediction_for_table <- predict(rf_classifier, test %>% select(-CO_IDIOM))
@@ -142,7 +142,7 @@
# information gain
# multiply by 1000 to avoid undersized bins
-# features are ranked individually not matter their correlation
+# features are ranked individually no matter their correlation
igain<-information.gain(fmla, data=ngramme%>%mutate_at(vars, ~ . * 1000),unit="log2")
featureRanks<-cbind(rfranks,igain,ttestPvalues)
@@ -156,7 +156,7 @@
featureRanks<-matrix(0,4,length(vars))
for (i in 1:10) {
rfc =randomForest(fmla, data=ngramme, importance=TRUE)
- #rfc =randomForest(fmla, data=ngramme, ntree=100, importance=TRUE, cutoff=c(0.8,0.2))
+ #rfc =randomForest(fmla, data=ngramme, importance=TRUE, cutoff=c(0.2, 0.8))
errrate<-errrate+rfc$err.rate[100,1]
conf<-conf+rfc$confusion
featureRanks<-featureRanks+