collocatordb: calculate association measures also for direct neighbours
diff --git a/collocatordb.cc b/collocatordb.cc
index 7aca8a5..af9cab9 100644
--- a/collocatordb.cc
+++ b/collocatordb.cc
@@ -51,9 +51,12 @@
     double pmi;
     double npmi;
     double llr;
-    double md;
     double lfmd;
     double fpmi;
+    double left_lfmd;
+    double right_lfmd;
+    double left_npmi;
+    double right_npmi;
   };
 
   size_t num_merge_operator_calls;
@@ -105,6 +108,25 @@
     }
   }
 
+  static inline double ca_pmi(uint64_t f1, uint64_t f2, uint64_t f12, uint64_t total, double window_size) {
+    return log2( total * ((double) f12) / (window_size * ((double) f1) * ((double)f2) ));
+  }
+
+  static inline double ca_npmi(uint64_t f1, uint64_t f2, uint64_t f12, uint64_t total, double window_size) {
+    return log2( total * ((double) f12) / (window_size * ((double) f1) * ((double)f2) )) * f12 / total / window_size;
+  }
+
+  // Thanopoulos, A., Fakotakis, N., Kokkinakis, G.: Comparative evaluation of collocation extraction metrics.
+  // In: International Conference on Language Resources and Evaluation (LREC-2002). (2002) 620–625
+  // double md = log2(pow((double)max * window_size / total, 2) /  (window_size * ((double)_vocab[w1].freq/total) * ((double)_vocab[last_w2].freq/total)));
+  static inline double ca_md(uint64_t f1, uint64_t f2, uint64_t f12, uint64_t total, double window_size) {
+    return log2((double)f12 * f12 /  ((double) total * window_size * window_size * f1 * f2));
+  }
+
+  static inline double ca_lfmd(uint64_t f1, uint64_t f2, uint64_t f12, uint64_t total, double window_size) {
+    return log2((double)f12 * f12 /  ((double) total * window_size * window_size * f1 * f2)) + log2((double) f12 / window_size / total);
+  }
+
 
   class CountMergeOperator : public AssociativeMergeOperator {
   public:
@@ -516,7 +538,7 @@
     int i=0;
     for (Collocator c : collocators) {
       if(i++>10) break;
-      std::cout << "w1:" << _vocab[w1].word << ", w2:" << _vocab[c.w2].word
+      std::cout << "dont call me w1:" << _vocab[w1].word << ", w2:" << _vocab[c.w2].word
                 << "\t f(w1):" << _vocab[w1].freq
                 << "\t f(w2):" << _vocab[c.w2].freq
                 << "\t f(w1, x):" << total_w1
@@ -524,7 +546,6 @@
                 << "\t pmi:" << c.pmi
                 << "\t npmi:" << c.npmi
                 << "\t llr:" << c.llr
-                << "\t md:" << c.md
                 << "\t lfmd:" << c.lfmd
                 << "\t fpmi:" << c.fpmi
                 << "\t total:" << total
@@ -536,9 +557,8 @@
 	std::vector<Collocator> rocksdb::CollocatorDB::get_collocators(uint32_t w1) {
 		std::vector<Collocator> collocators;
     uint64_t w2, last_w2 = 0xffffffffffffffff;
-    uint64_t max = 0, total_w1 = 0;
+    uint64_t maxv = 0, left = 0, right = 0, total_w1 = 0;
     const double window_size = 1;
-
     for ( auto it = std::unique_ptr<CollocatorIterator>(SeekIterator(w1, 0, 0)); it->isValid(); it->Next()) {
       uint64_t value = it->intValue(),
         key = it->intKey();
@@ -546,19 +566,29 @@
       total_w1 += value;
       if(last_w2 == 0xffffffffffffffff) last_w2 = w2;
       if (w2 != last_w2) {
-				double pmi = log2( total * ((double) max) /
-													 (window_size * ((double)_vocab[w1].freq) * ((double)_vocab[last_w2].freq) ));
-        //  Thanopoulos, A., Fakotakis, N., Kokkinakis, G.: Comparative evaluation of collocation extraction metrics. In: International Conference on Language Resources and Evaluation (LREC-2002). (2002) 620–625
-        // double md = log2(pow((double)max * window_size / total, 2) /  (window_size * ((double)_vocab[w1].freq/total) * ((double)_vocab[last_w2].freq/total)));
-        double md = log2((double)max * max /  ((double) total * window_size * window_size * _vocab[w1].freq * _vocab[last_w2].freq));
-        collocators.push_back ( {last_w2, max, pmi, pmi / (-log2(((double) max / window_size / total))), /* normalize to [-1,1] */
-							calculateLLR(_vocab[w1].freq, total, max, _vocab[last_w2].freq), md, md + log2((double)max / window_size / total), pmi*max/total/window_size} );
+				double pmi = ca_pmi(_vocab[w1].freq, _vocab[last_w2].freq, maxv, total, window_size);
+        double lfmd = ca_lfmd(_vocab[w1].freq, _vocab[last_w2].freq, maxv, total, window_size);
+        double left_lfmd = ca_lfmd(_vocab[w1].freq, _vocab[last_w2].freq, left, total, 1);
+        double right_lfmd = ca_lfmd(_vocab[w1].freq, _vocab[last_w2].freq, right, total, 1);
+        double left_npmi = ca_npmi(_vocab[w1].freq, _vocab[last_w2].freq, left, total, 1);
+        double right_npmi = ca_npmi(_vocab[w1].freq, _vocab[last_w2].freq, right, total, 1);
+        collocators.push_back ( {last_w2, maxv, pmi, pmi / (-log2(((double) maxv / window_size / total))), /* normalize to [-1,1] */
+							calculateLLR(_vocab[w1].freq, total, maxv, _vocab[last_w2].freq), lfmd, pmi*maxv/total/window_size,
+              left_lfmd,
+              right_lfmd,
+              left_npmi,
+              right_npmi}
+          );
         last_w2 = w2;
-        max = value;
+        maxv = value;
       } else {
-        if(value > max)
-          max = value;
+        if(value > maxv)
+          maxv = value;
       }
+      if(DIST(key) == -1)
+        left = value;
+      else if(DIST(key) == 1)
+        right = value;
     }
 
 		sort(collocators.begin(), collocators.end(), sortByLfmd);
@@ -574,7 +604,6 @@
                 << "\t pmi:" << c.pmi
                 << "\t npmi:" << c.npmi
                 << "\t llr:" << c.llr
-                << "\t md:" << c.md
                 << "\t lfmd:" << c.lfmd
                 << "\t fpmi:" << c.fpmi
                 << "\t total:" << total
@@ -607,10 +636,15 @@
       "\"npmi\":" << c.npmi  << "," <<
       "\"llr\":" << c.llr   << "," <<
       "\"lfmd\":" << c.lfmd  << "," <<
-      "\"fpmi\":" << c.fpmi  <<
+      "\"fpmi\":" << c.fpmi  << "," <<
+      "\"llfmd\":" << c.left_lfmd  << "," <<
+      "\"rlfmd\":" << c.right_lfmd  << "," <<
+      "\"lnpmi\":" << c.left_npmi  << "," <<
+      "\"rnpmi\":" << c.right_npmi  <<
       "}";
   }
   s << "]\n";
+  //  cout << s.str();
   return s.str();
 }