derekovecs/collocatordb: add MI, MIĀ², dice, and log-dice scores
diff --git a/templates/index.html.ep b/templates/index.html.ep
index 2882ea9..c05d7a7 100644
--- a/templates/index.html.ep
+++ b/templates/index.html.ep
@@ -41,9 +41,12 @@
MathJax.Hub.Queue(
["Typeset",MathJax.Hub,"ccd"],
function () {
+ $("#mi_tt").attr("title",$("#pmi_ttt").html());
$("#lfmd_tt").attr("title",$("#lfmd_ttt").html());
+ $("#md_tt").attr("title",$("#md_ttt").html());
$("#npmi_tt").attr("title",$("#npmi_ttt").html());
$("#ll_tt").attr("title",$("#ll_ttt").html());
+ $("#logdice_tt").attr("title",$("#logdice_ttt").html());
}
);
});
@@ -285,9 +288,13 @@
"columns": [
// { "data": "pos", width: "7%", sClass: "dt-center mono compact", render: function ( data, type, row ) {return bitvec2window(data, row.heat, row.word) }},
{ "data": "llr", render: function ( data, type, row ) {return data.toFixed(0) }},
- { "data": "lfmd", render: function ( data, type, row ) {return data.toFixed(1) }},
+ { "data": "pmi", render: function ( data, type, row ) {return data.toFixed(2) }},
+ { "data": "md", render: function ( data, type, row ) {return data.toFixed(2) }},
+ { "data": "lfmd", render: function ( data, type, row ) {return data.toFixed(2) }},
// { "data": "fpmi", type: "scientific", render: function ( data, type, row ) {return data.toExponential(2) } },
- { "data": "npmi", render: function ( data, type, row ) {return data.toFixed(2) }},
+ { "data": "npmi", render: function ( data, type, row ) {return data.toFixed(3) }},
+ { "data": "dice", render: function ( data, type, row ) {return data.toExponential(2) }},
+ { "data": "ld", render: function ( data, type, row ) {return data.toFixed(2) }},
{ "data": "llfmd", render: function ( data, type, row ) {return data.toFixed(1) }},
{ "data": "rlfmd", render: function ( data, type, row ) {return data.toFixed(1) }},
{ "data": "lnpmi", render: function ( data, type, row ) {return data.toFixed(2) }},
@@ -296,14 +303,14 @@
{ "data": "word", sClass: "collocator" }
],
"columnDefs": [
- { className: "dt-right", "targets": [0,1,2,3,4,5,6,7] },
- { className: "dt-right detail", "targets": [3,4,5,6] },
+ { className: "dt-right", "targets": [0,1,2,3,4,5,6,7,8,9,10,11] },
+ { className: "dt-right detail", "targets": [5,7,8,9,10] },
{ "searchable": false,
"orderable": false,
"targets": []
},
- { "orderSequence": [ "desc" ], "targets": [ 0, 1, 2,3,4,5,6,7 ] },
- { "orderSequence": [ "asc", "desc" ], "targets": [ 8 ] },
+ { "orderSequence": [ "desc" ], "targets": [0,1,2,3,4,5,6,7,8,9,10,11] },
+ { "orderSequence": [ "asc", "desc" ], "targets": [12] },
],
"oLanguage": {
"sSearch": "Filter: "
@@ -710,9 +717,12 @@
</script>
</head>
<body onload="onload()">
- <div style="display:none;" id="lfmd_ttt">PMI cubed [1], also called log-frequency biased mutual dependency [2]: $$\text{PMI}^3=\text{LFMD}=log_2\frac{p^3(w_1,w_2)}{p(w_1) p(w_2)}$$<p class="citation">[1] Daille, B. (1994): <a href="http://www.bdaille.com/index.php?option=com_docman&task=doc_download&gid=8&Itemid=">Approche mixte pour l’extraction automatique de terminologie: statistiques lexicales et filtres linguistiques</a>. PhD thesis, Université Paris 7.</p><p class="citation">[2] Thanopoulos, A., Fakotakis, N., Kokkinakis, G. (2002): <a href="http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.11.8101&rep=rep1&type=pdf">Comparative evaluation of collocation extraction metrics</a>. In: Proc. of LREC 2002: 620–625.</p></div>
+ <div style="display:none;" id="pmi_ttt">Pointwise mutual information: $$\text{MI}=\text{MI}=log_2\frac{p(w_1,w_2)}{p(w_1) p(w_2)}$$<p class="citation">Church, K. W. and Hanks, P. (1990): Word association norms, mutual information, and lexicography. Comput. Linguist. 16, 1 (March 1990), 22-29.</p></div>
+ <div style="display:none;" id="md_ttt">Pointwise mutual information squared [1], also called mutual dependency [2]: $$\text{MI}^2=\text{MD}=log_2\frac{p^2(w_1,w_2)}{p(w_1) p(w_2)}$$<p class="citation">[1] Daille, B. (1994): <a href="http://www.bdaille.com/index.php?option=com_docman&task=doc_download&gid=8&Itemid=">Approche mixte pour l’extraction automatique de terminologie: statistiques lexicales et filtres linguistiques</a>. PhD thesis, Université Paris 7.</p><p class="citation">[2] Thanopoulos, A., Fakotakis, N., Kokkinakis, G. (2002): <a href="http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.11.8101&rep=rep1&type=pdf">Comparative evaluation of collocation extraction metrics</a>. In: Proc. of LREC 2002: 620–625.</p></div>
+ <div style="display:none;" id="lfmd_ttt">Pointwise mutual information cubed [1], also called log-frequency biased mutual dependency [2]: $$\text{MI}^3=\text{LFMD}=log_2\frac{p^3(w_1,w_2)}{p(w_1) p(w_2)}$$<p class="citation">[1] Daille, B. (1994): <a href="http://www.bdaille.com/index.php?option=com_docman&task=doc_download&gid=8&Itemid=">Approche mixte pour l’extraction automatique de terminologie: statistiques lexicales et filtres linguistiques</a>. PhD thesis, Université Paris 7.</p><p class="citation">[2] Thanopoulos, A., Fakotakis, N., Kokkinakis, G. (2002): <a href="http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.11.8101&rep=rep1&type=pdf">Comparative evaluation of collocation extraction metrics</a>. In: Proc. of LREC 2002: 620–625.</p></div>
<div style="display:none;" id="npmi_ttt">Normalized pointwise mutual information: $$\frac{log_2\frac{p(w_1,w_2)}{p(w_1)p(w_2)}}{-log_2(p(w_1,w_2))}$$<p class="citation">Bouma, Gerlof (2009): <a href="https://svn.spraakdata.gu.se/repos/gerlof/pub/www/Docs/npmi-pfd.pdf">Normalized (pointwise) mutual information in collocation extraction</a>. In Proceedings of GSCL.</p></div>
<div style="display:none;" id="ll_ttt">Log-likelihood: $$2*\sum_{ij}O_{ij}*log\frac{O_{ij}}{E_{ij}}$$<p class="citation">Evert, Stefan (2004): <a href="http://purl.org/stefan.evert/PUB/Evert2004phd.pdf">The Statistics of Word Cooccurrences: Word Pairs and Collocations.</a> PhD dissertation, IMS, University of Stuttgart. Published in 2005, URN urn:nbn:de:bsz:93-opus-23714.</p></div>
+ <div style="display:none;" id="logdice_ttt">Log-Dice: $$14 + log_2 \frac{2*(f_{12}+ε)}{(f_1+ε) + (f_2+ε)}$$<p class="citation">Rychlý, Pavel (2008): <a href="http://www.fi.muni.cz/usr/sojka/download/raslan2008/13.pdf">A lexicographer-friendly association score.</a> In Proceedings of Recent Advances in Slavonic Natural Language Processing, RASLAN, 6–9, 2008</p></div>
<div id="header">
<div id="pagetitle">
<h1>DeReKoVecs</h1>