collocatordb: fix window size influence on MI scores

For nPMI still not completely intuitive (maximum < 1, for windows > 1),
but probably valid.
diff --git a/collocatordb.cc b/collocatordb.cc
index 074a0b5..2fbed98 100644
--- a/collocatordb.cc
+++ b/collocatordb.cc
@@ -111,30 +111,50 @@
   }
 
   static inline double ca_pmi(uint64_t f1, uint64_t f2, uint64_t f12, uint64_t total, double window_size) {
-    return log2( total * ((double) f12) / (window_size * ((double) f1) * ((double)f2) ));
+    double
+      r1 = f1 * window_size,
+      c1 = f2,
+      e = r1 * c1 / total,
+      o = f12;
+    return log2(o/e);
   }
 
   // Bouma, Gerlof (2009): <a href="https://svn.spraakdata.gu.se/repos/gerlof/pub/www/Docs/npmi-pfd.pdf">
   // Normalized (pointwise) mutual information in collocation extraction</a>. In Proceedings of GSCL. 
   static inline double ca_npmi(uint64_t f1, uint64_t f2, uint64_t f12, uint64_t total, double window_size) {
-    if(f12 == 0)
+    double
+      r1 = f1 * window_size,
+      c1 = f2,
+      e = r1 * c1 / total,
+      o = f12;
+    if(f12 < FREQUENCY_THRESHOLD)
       return -1.0;
     else
-      return log2( total * ((double) f12) / (window_size * ((double) f1) * ((double)f2) )) / (-log2(((double) f12 / window_size / total)));
+      return log2(o/e) / (-log2(o/total/window_size));
   }
 
   // Thanopoulos, A., Fakotakis, N., Kokkinakis, G.: Comparative evaluation of collocation extraction metrics.
   // In: International Conference on Language Resources and Evaluation (LREC-2002). (2002) 620–625
   // double md = log2(pow((double)max * window_size / total, 2) /  (window_size * ((double)_vocab[w1].freq/total) * ((double)_vocab[last_w2].freq/total)));
   static inline double ca_md(uint64_t f1, uint64_t f2, uint64_t f12, uint64_t total, double window_size) {
-    return log2((double)f12 * f12 /  ((double) total * window_size * window_size * f1 * f2));
+    double
+      r1 = f1 * window_size,
+      c1 = f2,
+      e = r1 * c1 / total,
+      o = f12;
+    return log2(o*o/e);
   }
 
   static inline double ca_lfmd(uint64_t f1, uint64_t f2, uint64_t f12, uint64_t total, double window_size) {
+    double
+      r1 = f1 * window_size,
+      c1 = f2,
+      e = r1 * c1 / total,
+      o = f12;
     if(f12 == 0)
       return 0;
     else
-      return log2((double)f12 * f12 /  ((double) total * window_size * window_size * f1 * f2)) + log2((double) f12 / window_size / total);
+      return log2(o*o*o/e);
   }
 
   // Evert, Stefan (2004): The Statistics of Word Cooccurrences: Word Pairs and Collocations. PhD dissertation, IMS, University of Stuttgart. Published in 2005, URN urn:nbn:de:bsz:93-opus-23714.