Fully remove incomplete idioms from data

Change-Id: I6bbad3d66b012d6e904d151903bdb860df69bc62
diff --git a/R/idiomclassification_mk_pf.R b/R/idiomclassification_mk_pf.R
index e8c0f87..1310b6b 100644
--- a/R/idiomclassification_mk_pf.R
+++ b/R/idiomclassification_mk_pf.R
@@ -16,6 +16,9 @@
   wl[!(wl %in% stopwords)]
 }
 
+oringramme <- oringramme %>%
+  filter(CO_IDIOM < 2)   # just two classes: 0 no idiom, 1 idiom
+
 ngramme <- oringramme %>%
   add_column(NSTOPW = sapply(oringramme$tokens,function(x) length(deleteStopwords(tolower(unlist(strsplit(x," "))),stopwords)))) %>%
   # select(-matches("CO_TOKEN.*"), -tokens) %>%
@@ -24,7 +27,7 @@
   mutate(across(c("dice", "lfmd", "llr", "ld", "pmi"), ~ replace_na(.x, min(.x) - 1))) %>%
   rename_at(syfeatures$innames, ~ syfeatures[syfeatures$innames==.x,]$synames ) %>%
   mutate(across(everything(), ~ replace_na(.x, 0))) %>%
-  mutate(CO_IDIOM = as.factor(if_else(CO_IDIOM !=1, 0, 1))) # just two classes: 0 no idiom, 1 idiom
+  mutate(CO_IDIOM = as.factor(if_else(CO_IDIOM !=1, "0", "1")))
 
 covars <- c("CO_LL", "CO_Z", "CO_G", "CO_T", "CO_LOGDICE", "CO_PMI", "CO_MI3", "CO_DEREKO", "CO_SGT", "CO_WIN5_VEC","CO_WIN5_VEC_AUTOSEM")
 syvars <- c(syfeaturenames$synames,"NSTOPW")