derekovecs/collocatordb: add MI, MIĀ², dice, and log-dice scores
diff --git a/collocatordb.cc b/collocatordb.cc
index eff9c23..074a0b5 100644
--- a/collocatordb.cc
+++ b/collocatordb.cc
@@ -52,11 +52,13 @@
     double npmi;
     double llr;
     double lfmd;
-    double fpmi;
+    double md;
     double left_lfmd;
     double right_lfmd;
     double left_npmi;
     double right_npmi;
+    double dice;
+    double logdice;
   };
 
   size_t num_merge_operator_calls;
@@ -150,6 +152,23 @@
     return (2 * ( (o11>0? o11 * log(o11/e11):0) + (o12>0? o12 * log(o12/e12):0) + (o21>0? o21 * log(o21/e21):0) + (o22>0? o22 * log(o22/e22):0)));
   }
 
+
+  static inline double ca_dice(uint64_t w1, uint64_t w2, uint64_t w12, uint64_t n, uint64_t window_size) {
+    double
+      r1 = (double) w1 * window_size,
+      c1 = w2;
+    return 2 * w12 / (c1+r1);
+  }
+
+  // Rychlý, Pavel (2008): <a href="http://www.fi.muni.cz/usr/sojka/download/raslan2008/13.pdf">A lexicographer-friendly association score.</a> In Proceedings of Recent Advances in Slavonic Natural Language Processing, RASLAN, 6–9.
+  static inline double ca_logdice(uint64_t w1, uint64_t w2, uint64_t w12, uint64_t n, uint64_t window_size) {
+    double
+      e = 0.5,
+      r1 = (double) w1 * window_size,
+      c1 = w2;
+    return 14 + log2(2 * (w12+e) / (c1+e+r1+e));
+  }
+
   class CountMergeOperator : public AssociativeMergeOperator {
   public:
     CountMergeOperator() {
@@ -539,12 +558,15 @@
           double right_lfmd = ca_lfmd(_vocab[w1].freq, _vocab[last_w2].freq, right, total, 1);
           double left_npmi = ca_npmi(_vocab[w1].freq, _vocab[last_w2].freq, left, total, 1);
           double right_npmi = ca_npmi(_vocab[w1].freq, _vocab[last_w2].freq, right, total, 1);
-          collocators.push_back ( {last_w2, sum, pmi, pmi / (-log2(o)), /* normalize to [-1,1] */
+          collocators.push_back ( {last_w2, sum, pmi, pmi / (-log2(o/total/avg_window_size)), /* normalize to [-1,1] */
                 llr, lfmd, md,
                 left_lfmd,
                 right_lfmd,
                 left_npmi,
-                right_npmi}
+                right_npmi,
+                ca_dice((double)_vocab[w1].freq, (double)_vocab[last_w2].freq, sum, total, avg_window_size),
+                ca_logdice((double)_vocab[w1].freq, (double)_vocab[last_w2].freq, sum, total, avg_window_size)
+                }
             );
         }
         last_w2 = w2;
@@ -650,9 +672,12 @@
       "\"rank\":" << c.w2    << "," <<
       "\"f\":" << c.raw    << "," <<
       "\"npmi\":" << c.npmi  << "," <<
+      "\"pmi\":" << c.pmi  << "," <<
       "\"llr\":" << c.llr   << "," <<
       "\"lfmd\":" << c.lfmd  << "," <<
-      "\"fpmi\":" << c.fpmi  << "," <<
+      "\"md\":" << c.md  << "," <<
+      "\"dice\":" << c.dice  << "," <<
+      "\"ld\":" << c.logdice  << "," <<
       "\"llfmd\":" << c.left_lfmd  << "," <<
       "\"rlfmd\":" << c.right_lfmd  << "," <<
       "\"lnpmi\":" << c.left_npmi  << "," <<
diff --git a/collocatordb.h b/collocatordb.h
index ad9f0c6..70c6c11 100644
--- a/collocatordb.h
+++ b/collocatordb.h
@@ -20,11 +20,13 @@
     double npmi;
     double llr;
     double lfmd;
-    double fpmi;
+    double md;
     double left_lfmd;
     double right_lfmd;
     double left_npmi;
     double right_npmi;
+    double dice;
+    double logdice;
   };
 
     class CollocatorIterator : public Iterator  {