collocatordb: use max resp. instead of avg. window for ca
diff --git a/collocatordb.cc b/collocatordb.cc
index 9ecbf63..7aca8a5 100644
--- a/collocatordb.cc
+++ b/collocatordb.cc
@@ -11,7 +11,7 @@
 #include <string>
 #include <sstream> // for ostringstream
 #include <math.h>
-#include "rocksdb/cache.h"
+#include <rocksdb/cache.h>
 #include "rocksdb/comparator.h"
 #include "rocksdb/db.h"
 #include "rocksdb/env.h"
@@ -179,6 +179,7 @@
 
   bool rocksdb::CollocatorIterator::isValid() {
     return base_iterator_->Valid() && key().starts_with(std::string(prefixc,3));
+    // return key().starts_with(std::string(prefixc,3));
   }
 
   uint64_t rocksdb::CollocatorIterator::intKey() {
@@ -300,6 +301,7 @@
     virtual void inc(const uint32_t w1, const uint32_t w2, const uint8_t dist);
     void dump(uint32_t w1, uint32_t w2, int8_t dist);
     vector<Collocator> get_collocators(uint32_t w1);
+    vector<Collocator> get_collocators_avg(uint32_t w1);
     string collocators2json(vector<Collocator> collocators);
 
     // mapped to a rocksdb Merge operation
@@ -346,7 +348,7 @@
     }
     uint64_t i = 0;
     while(!feof(fin)) {
-      fscanf(fin, "%s %" PRIu64, strbuf, &freq);
+      fscanf(fin, "%s %lu", strbuf, &freq);
       _vocab.push_back({strbuf, freq});
       total += freq;
       i++;
@@ -482,8 +484,9 @@
 	
 	bool sortByNpmi(const Collocator &lhs, const Collocator &rhs) { return lhs.npmi > rhs.npmi; }
 	bool sortByLfmd(const Collocator &lhs, const Collocator &rhs) { return lhs.lfmd > rhs.lfmd; }
+	bool sortByLlr(const Collocator &lhs, const Collocator &rhs) { return lhs.llr > rhs.llr; }
 
-	std::vector<Collocator> rocksdb::CollocatorDB::get_collocators(uint32_t w1) {
+	std::vector<Collocator> rocksdb::CollocatorDB::get_collocators_avg(uint32_t w1) {
 		std::vector<Collocator> collocators;
     uint64_t w2, last_w2 = 0xffffffffffffffff;
     uint64_t sum = 0, total_w1 = 0;
@@ -530,6 +533,56 @@
 		return collocators;
   }
 
+	std::vector<Collocator> rocksdb::CollocatorDB::get_collocators(uint32_t w1) {
+		std::vector<Collocator> collocators;
+    uint64_t w2, last_w2 = 0xffffffffffffffff;
+    uint64_t max = 0, total_w1 = 0;
+    const double window_size = 1;
+
+    for ( auto it = std::unique_ptr<CollocatorIterator>(SeekIterator(w1, 0, 0)); it->isValid(); it->Next()) {
+      uint64_t value = it->intValue(),
+        key = it->intKey();
+      w2 = W2(key);
+      total_w1 += value;
+      if(last_w2 == 0xffffffffffffffff) last_w2 = w2;
+      if (w2 != last_w2) {
+				double pmi = log2( total * ((double) max) /
+													 (window_size * ((double)_vocab[w1].freq) * ((double)_vocab[last_w2].freq) ));
+        //  Thanopoulos, A., Fakotakis, N., Kokkinakis, G.: Comparative evaluation of collocation extraction metrics. In: International Conference on Language Resources and Evaluation (LREC-2002). (2002) 620–625
+        // double md = log2(pow((double)max * window_size / total, 2) /  (window_size * ((double)_vocab[w1].freq/total) * ((double)_vocab[last_w2].freq/total)));
+        double md = log2((double)max * max /  ((double) total * window_size * window_size * _vocab[w1].freq * _vocab[last_w2].freq));
+        collocators.push_back ( {last_w2, max, pmi, pmi / (-log2(((double) max / window_size / total))), /* normalize to [-1,1] */
+							calculateLLR(_vocab[w1].freq, total, max, _vocab[last_w2].freq), md, md + log2((double)max / window_size / total), pmi*max/total/window_size} );
+        last_w2 = w2;
+        max = value;
+      } else {
+        if(value > max)
+          max = value;
+      }
+    }
+
+		sort(collocators.begin(), collocators.end(), sortByLfmd);
+		
+    int i=0;
+    for (Collocator c : collocators) {
+      if(i++>10) break;
+      std::cout << "w1:" << _vocab[w1].word << ", w2:" << _vocab[c.w2].word
+                << "\t f(w1):" << _vocab[w1].freq
+                << "\t f(w2):" << _vocab[c.w2].freq
+                << "\t f(w1, x):" << total_w1
+                << "\t f(w1, w2):" << c.sum
+                << "\t pmi:" << c.pmi
+                << "\t npmi:" << c.npmi
+                << "\t llr:" << c.llr
+                << "\t md:" << c.md
+                << "\t lfmd:" << c.lfmd
+                << "\t fpmi:" << c.fpmi
+                << "\t total:" << total
+                << std::endl;
+    }
+		return collocators;
+  }
+
   rocksdb::Slice rocksdb::CollocatorIterator::key() const { return base_iterator_->key(); }
   rocksdb::Slice rocksdb::CollocatorIterator::value() const { return base_iterator_->value(); }
   rocksdb::Status rocksdb::CollocatorIterator::status() const { return base_iterator_->status(); }