Add `md_nws` MIĀ² score based on nominal window size (=10)
Change-Id: I5e431a8ba0f863191f378d447621f3a10039d122
diff --git a/src/collocatordb.cc b/src/collocatordb.cc
index 41d0ad0..87d9dbd 100644
--- a/src/collocatordb.cc
+++ b/src/collocatordb.cc
@@ -7,6 +7,7 @@
#include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "rocksdb/table.h"
+#include "rocksdb/slice.h"
#include <algorithm>
#include <cassert>
#include <cmath>
@@ -52,6 +53,7 @@
double llr;
double lfmd;
double md;
+ double md_nws;
uint64_t left_raw;
uint64_t right_raw;
double left_pmi;
@@ -603,8 +605,9 @@
uint64_t f1 = _vocab[w1].freq, f2 = _vocab[w2].freq;
double o = sum, r1 = f1 * true_window_size, c1 = f2, e = r1 * c1 / total,
pmi = log2(o / e), md = log2(o * o / e), lfmd = log2(o * o * o / e),
- llr = ca_ll(f1, f2, sum, total, true_window_size);
- double ld = ca_logdice(f1, f2, sum, total, true_window_size);
+ llr = ca_ll(f1, f2, sum, total, true_window_size),
+ md_nws = ca_md(f1, f2, sum, total, 2 * WINDOW_SIZE),
+ ld = ca_logdice(f1, f2, sum, total, true_window_size);
int bestWindow = usedPositions;
double bestAF = ld;
@@ -638,6 +641,7 @@
llr,
lfmd,
md,
+ md_nws,
sumWindow[WINDOW_SIZE],
sumWindow[WINDOW_SIZE - 1],
ca_pmi(f1, f2, sumWindow[WINDOW_SIZE], total, 1),
@@ -828,7 +832,7 @@
<< "\"," << "\"f2\":" << c.f2 << "," << "\"f\":" << c.raw << ","
<< "\"npmi\":" << c.npmi << "," << "\"pmi\":" << c.pmi << ","
<< "\"llr\":" << c.llr << "," << "\"lfmd\":" << c.lfmd << ","
- << "\"md\":" << c.md << "," << "\"dice\":" << c.dice << ","
+ << "\"md\":" << c.md << "," << "\"md_nws\":" << c.md_nws << "," << "\"dice\":" << c.dice << ","
<< "\"ld\":" << c.logdice << "," << "\"ln_count\":" << c.left_raw << ","
<< "\"rn_count\":" << c.right_raw << "," << "\"ln_pmi\":" << c.left_pmi
<< "," << "\"rn_pmi\":" << c.right_pmi << "," << "\"ldaf\":" << c.ldaf
diff --git a/src/collocatordb.h b/src/collocatordb.h
index d601902..7803cd1 100644
--- a/src/collocatordb.h
+++ b/src/collocatordb.h
@@ -106,6 +106,7 @@
double llr;
double lfmd;
double md;
+ double md_nws;
uint64_t left_raw;
uint64_t right_raw;
double left_pmi;