derekovecs: add tab for words with largest distances in reference space
diff --git a/templates/index.html.ep b/templates/index.html.ep
index b9dae8a..418fdc7 100644
--- a/templates/index.html.ep
+++ b/templates/index.html.ep
@@ -81,7 +81,7 @@
var collocatorTable_activated = false;
$( "#tabs" ).on( "tabsactivate", function( event, ui ) {
if (localStorage) localStorage['tab'] = ui.newTab.index();
- if(ui.newTab.index() == 2 && !collocatorTable_activated) {
+ if(ui.newTab.index() == 3 && !collocatorTable_activated) {
classicCollocatorTable.columns.adjust();
collocatorTable.columns.adjust();
collocatorTable_activated = true;
@@ -215,6 +215,51 @@
var collocatorData = <%= b(Mojo::JSON::to_json($collocators)) %>;
var maxHeat; // = Math.max.apply(Math,collocatorData.map(function(o){return o.cprob;}))
+ vocabDistanceTable = $('#vocabdistt').DataTable({
+ ajax: {
+ method: "GET",
+ url: baseURL + '/getBiggestVocabDistances',
+ dataType: 'json',
+ dataSrc: function (result) {
+ return result;
+ },
+ timeout: 30000,
+ },
+ "initComplete":function(settings, json){
+ vocabDistanceTable.columns.adjust().draw();
+ },
+ "createdRow": function (row, data, rowIndex) {
+ $.each($('td.collocator', row), function (colIndex) {
+ $(this).attr('title', "f("+data.word+")="+data.f2.toLocaleString("en-GB") + " f1: "+ccResult.f1+ " total: "+ccResult.N);
+ });
+ },
+ "sScrollY": "780px",
+ "bScrollCollapse": true,
+ "bPaginate": false,
+ "bJQueryUI": true,
+ "dom": '<"top">rt<"bottom"flp><"clear">',
+ "columns": [
+ { "data": "rank", type: "allnumeric" },
+ { "data": "dist", render: function ( data, type, row ) {return data.toFixed(3) }},
+ { "data": "word", class: "paradigmator", render: function ( data, type, row ) {
+ urlprefix.set("word", data); return '<a class="' + getMergedClass(row.rank) + '" href="?' + urlprefix + '">' + data + '</a>'
+ }}
+ ],
+ "columnDefs": [
+ { className: "dt-right", "targets": [0,1] },
+ { "searchable": false,
+ "orderable": false,
+ "targets": 0
+ },
+ { "orderSequence": [ "desc" ], "targets": [ 1 ] },
+ { "orderSequence": [ "asc", "desc" ], "targets": [ 2 ] },
+ ],
+ "oLanguage": {
+ "sSearch": "Filter: "
+ },
+ "order": [[ 1, 'desc' ]]
+ });
+
if (collocatorData != null) {
maxHeat = Math.max.apply(Math,collocatorData.map(function(o){return Math.max.apply(Math,o.heat);}))
collocatorTable = $('#secondtable').DataTable({
@@ -299,6 +344,7 @@
"order": [[ 0, 'desc' ]],
});
}
+
// var filterQuot = /(^quot?=[A-Z])|(quot$)/g;
var filterQuot = /^quot/;
var ccResult;
@@ -563,7 +609,7 @@
var text;
function getMergedClass(i) {
- if(data.mergedEnd && i > data.mergedEnd) {
+ if(typeof data !== 'undefined' && i > data.mergedEnd) {
return " merged"
} else {
return "";
@@ -828,10 +874,33 @@
<div id="topwrapper">
<div style="visibility: hidden;" id="tabs">
<ul>
+ % if($distantWords) {
+ <li><a href="#tabs-0" title="Cos offsets of the words furthest away from their position in the reference corpus."">Offsets</a></li>
+ % }
<li><a href="#tabs-1">Semantics (TSNE-map)</a></li>
<li><a href="#tabs-2">Semantics (SOM)</a></li>
<li><a href="#tabs-3">Syntagmatic (collocates)</a></li>
</ul>
+ <div id="tabs-0" style="display: flex; padding: 5px; flex-flow: row wrap;">
+ <div id="vocabdist" style="width: 230px; margin-bottom: 15px;">
+ <table class="display compact nowrap" id="vocabdistt">
+ <thead>
+ <tr>
+ <th align="right">#</th><th align="right">cos</th><th align="left">word</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <td align="right">
+ </td>
+ <td align="right">
+ </td>
+ <td></td>
+ </tr>
+ </tbody>
+ </table>
+ </div>
+ </div>
<div id="tabs-1" style="display: flex; padding: 5px; flex-flow: row wrap;">
% if($lists && (@$lists) > 0 && (@$lists)[0]) {
<div id="wrapper">
diff --git a/w2v-server.pl b/w2v-server.pl
index 2f402ec..38dc8e8 100755
--- a/w2v-server.pl
+++ b/w2v-server.pl
@@ -186,6 +186,16 @@
$self->render(data => getClassicCollocatorsCached($self, $self->param("w") ? $self->param("w") : $self->req->json), format=>'json');
};
+any '/getBiggestVocabDistances' => sub {
+ my $self = shift;
+ $self->render(data => getBiggestMergedDifferences(), format=>'json');
+};
+
+any '*/getBiggestVocabDistances' => sub {
+ my $self = shift;
+ $self->render(data => getBiggestMergedDifferences(), format=>'json');
+};
+
any '*/getSimilarProfiles' => sub {
my $self = shift;
$self->render(data => getSimilarProfilesCached($self, $self->param("w") ? $self->param("w") : $self->req->json), format=>'json');
@@ -268,7 +278,11 @@
$csv_data .= "\n";
return $c->render(text=>$csv_data);
} else {
- $c->render(template=>"index", title=>$title, word=>$word, cutoff=>$cutoff, no_nbs=>$no_nbs, no_iterations => $no_iterations, epsilon=> $epsilon, perplexity=> $perplexity, show_som=>$som, searchBaseVocabFirst=>$searchBaseVocabFirst, sort=>$sort, training_args=>$training_args, mergedEnd=> $mergedEnd, haveSProfiles=> $have_sprofiles, dedupe=> $dedupe, marked=>\%marked, lists=> \@lists, collocators=> $res->{syntagmatic});
+ my $distantWords="";
+ if(!defined($word) || $word !~ /^\s*$/) {
+ $distantWords = getBiggestMergedDifferences();
+ }
+ $c->render(template=>"index", title=>$title, word=>$word, distantWords=>$distantWords, cutoff=>$cutoff, no_nbs=>$no_nbs, no_iterations => $no_iterations, epsilon=> $epsilon, perplexity=> $perplexity, show_som=>$som, searchBaseVocabFirst=>$searchBaseVocabFirst, sort=>$sort, training_args=>$training_args, mergedEnd=> $mergedEnd, haveSProfiles=> $have_sprofiles, dedupe=> $dedupe, marked=>\%marked, lists=> \@lists, collocators=> $res->{syntagmatic});
}
};
@@ -585,7 +599,7 @@
words += merge_words;
fclose(f);
printf("merged_end: %lld, words: %lld\n", merged_end, words);
- printBiggestMergedDifferences();
+ //printBiggestMergedDifferences();
return((long) merged_end);
}
@@ -823,12 +837,16 @@
return(wl);
}
-void printBiggestMergedDifferences() {
+char *getBiggestMergedDifferences() {
+ static char *result = NULL;
float dist, len, vec[max_size];
long long a, b, c, d, cn, *bi;
char ch;
knn *nbs = NULL;
- int N = 100;
+ int N = 1000;
+
+ if(result != NULL)
+ return result;
printf("Looking for biggest distances between main and merged vectors ...\n");
collocator *best;
@@ -857,11 +875,14 @@
}
}
- printf("Most distant vectors for:\n ");
+ result = malloc(N*max_w);
+ char *p = result;
+ *p++ = '['; *p = 0;
for (a = 0; a < N; a++) {
- printf("%s ", &vocab[best[a].wordi * max_w]);
+ p += sprintf(p, "{\"rank\":%d,\"word\":\"%s\",\"dist\":%.3f},", a, &vocab[best[a].wordi * max_w], 1-best[a].activation);
}
- printf("\n");
+ *--p = ']';
+ return(result);
}
void *_get_neighbours(void *arg) {