Explicitly name factors idiom and no_idiom

Note that this also implicitly changes the order
of "event" and "no event" so that it follows the examples
in ?caret::confusionMatrix .

Change-Id: Ia33396fc104c190ffbd37eb08cc86d126311742e
diff --git a/R/idiomclassification_mk_pf.R b/R/idiomclassification_mk_pf.R
index 7a591b7..af96b27 100644
--- a/R/idiomclassification_mk_pf.R
+++ b/R/idiomclassification_mk_pf.R
@@ -35,7 +35,7 @@
   mutate(across(c("dice", "lfmd", "llr", "ld", "pmi"), ~ replace_na(.x, min(.x) - 1))) %>%
   rename_at(syfeaturenames$innames, ~ syfeaturenames[syfeaturenames$innames==.x,]$synames ) %>%
   mutate(across(everything(), ~ replace_na(.x, 0))) %>%
-  mutate(CO_IDIOM = as.factor(if_else(CO_IDIOM !=1, "0", "1"))) # just two classes: 0 no idiom, 1 idiom
+  mutate(CO_IDIOM = as.factor(if_else(CO_IDIOM == 1, "idiom", "no_idiom"))) # just two classes: 0 no idiom, 1 idiom
 
 # Optional
 write.table(ngramme,file=paste("../data/",ngramfile,"_cosy.csv",sep=""), sep = "\t", quote=F)
@@ -67,7 +67,7 @@
 
 prediction_for_table <- predict(rf_classifier, test %>% select(-CO_IDIOM))
 
-res <- confusionMatrix(prediction_for_table, test$CO_IDIOM,positive= "1")
+res <- confusionMatrix(prediction_for_table, test$CO_IDIOM, positive= "idiom")
 print(res)
 
 # Sensitivity is recall of class 1
@@ -79,13 +79,13 @@
 smoted.data <- SMOTE(fmla, subset(train, select = c("CO_IDIOM", vars)), perc.over = 1200, perc.under = 100)
 rf_classifier = randomForest(fmla, smoted.data, importance=TRUE)
 prediction_for_table <- predict(rf_classifier,test %>% select(-CO_IDIOM))
-res <- confusionMatrix(prediction_for_table,test$CO_IDIOM, positive = "1")
+res <- confusionMatrix(prediction_for_table,test$CO_IDIOM, positive = "idiom")
 print(res)
 
 cat("With SMOTE and detection task oriented cutoff for prediction\n")
 
-prediction_for_table <- predict(rf_classifier,test %>% select(-CO_IDIOM), cutoff = c(0.8, 0.2))
-res <- confusionMatrix(prediction_for_table,test$CO_IDIOM, positive = "1")
+prediction_for_table <- predict(rf_classifier,test %>% select(-CO_IDIOM), cutoff = c(0.2, 0.8))
+res <- confusionMatrix(prediction_for_table,test$CO_IDIOM, positive = "idiom")
 print(res)
 
 
@@ -107,8 +107,8 @@
 
 # ttest
 
-idioms<-ngramme %>% filter(CO_IDIOM==1)
-nonidioms<-ngramme %>% filter(CO_IDIOM!=1)
+idioms<-ngramme %>% filter(CO_IDIOM == "idiom")
+nonidioms<-ngramme %>% filter(CO_IDIOM != "idiom")
 
 ttestPvalues<-sapply(vars,
                      function(sel) t.test(idioms[sel],nonidioms[sel])$p.value)
@@ -143,8 +143,8 @@
       cbind(conf[,1:2]/i,(1-conf[,3]/i)*100),
       c(100*diag(conf[,1:2])/colSums(conf[,1:2]),NA),
       c(rowSums(conf[,1:2]/i),NA)),digits=2)
-  colnames(conf1)<-c("0","1","rec")
-  rownames(conf1)<-c("0","1","prec","sum")
+  colnames(conf1)<-c("1","0","rec")
+  rownames(conf1)<-c("1","0","prec","sum")
   print(conf1)
 }
 featureRanks<-featureRanks/10