w2v-server: show window/column normalized collocators
diff --git a/templates/index.html.ep b/templates/index.html.ep
index 427fb56..78ec921 100644
--- a/templates/index.html.ep
+++ b/templates/index.html.ep
@@ -48,18 +48,33 @@
]
} );
- $('#secondtable').DataTable({
+ var t = $('#secondtable').DataTable({
"sScrollY": "800px",
"bScrollCollapse": true,
"bPaginate": false,
"bJQueryUI": true,
"dom": '<"top">rt<"bottom"flp><"clear">',
- "aoColumnDefs": [
- { "sWidth": "10%", "aTargets": [ -1 ] }
- ]
+ "columnDefs": [
+ { "searchable": false,
+ "orderable": false,
+ "targets": 0
+ },
+ { "orderSequence": [ "desc" ], "targets": [ 2, 3, 4 ] },
+ { "orderSequence": [ "asc", "desc" ], "targets": [ 1, 5 ] },
+ { "orderSequence": [ "desc" ], "targets": [ 3 ] }
+ ],
+ "order": [[ 3, 'desc' ]],
} );
+ t.on( 'order.dt search.dt', function () {
+ t.column(0, {order:'applied'}).nodes().each( function (cell, i) {
+ cell.innerHTML = i+1;
+ } );
+ } ).draw();
+
});
+
+
$(function(){
$("#dropdownoptions").dialog({
title: "Options",
@@ -720,15 +735,15 @@
% if($collocators) {
<th>#</th>
<th align="right" title="The window around the target word that is considered for summation.">w'</th>
- <th align="right" title="Raw (max.) activation of the collocator in the output layers.">a</th>
- <th title="Σp(c<sub><small>@</small></sub>) – Sum of the probability approximations that the combination of the target word and the collocator at the relative position @ come from the training corpus. Single approximations can be distorted because of sub-sampling frequent words and the sum cannot itself be interpreted as probability." align="right">Σp</th>
- <th align="right">Σp/|w|</th>
+ <th align="right" title="Raw (max.) activation of the collocator in the output layers.">max(a)</th>
+ <th title="(c<sub><small>@</small></sub>) – Sum of the probability approximations that the combination of the target word and the collocator at the relative position @ come from the training corpus. Single approximations can be distorted because of sub-sampling frequent words and the sum cannot itself be interpreted as probability." align="right">⊥Σa</th>
+ <th align="right">Σa/Σw</th>
<th title="c" align="left">collocator</th>
% }
</tr>
</thead>
<tbody>
- % for(my $i=0; $i < 100; $i++) {
+ % for(my $i=0; $i < (@$collocators); $i++) {
% my $c = ($collocators? (@$collocators)[$i] : 0);
<tr>
<td align="right">
@@ -739,13 +754,13 @@
<span class="mono"><%= bitvec2window( $c->{pos} ) %></span>
</td>
<td align="right">
- <%= sprintf("%.3f", $c->{dist}) %>
+ <%= sprintf("%.3f", $c->{max}) %>
</td>
<td align="right">
- <%= sprintf("%.3e", $c->{norm}) %>
+ <%= sprintf("%.3e", $c->{conorm}) %>
</td>
<td align="right">
- <%= sprintf("%.3e", $c->{sum}) %>
+ <%= sprintf("%.3e", $c->{prob}) %>
</td>
<td align="left">
<a onclick="<%= sprintf("queryKorAPCII('%s /w5 %s')", $c->{word}, $word) =%>"
diff --git a/w2v-server.pl b/w2v-server.pl
index 0487d71..38b8cb8 100755
--- a/w2v-server.pl
+++ b/w2v-server.pl
@@ -189,9 +189,10 @@
long long wordi;
long position;
float activation;
- float activation_sum;
- float probability_sum;
float probability;
+ float activation_sum;
+ float conorm;
+ float max_activation;
} collocator;
typedef struct {
@@ -794,6 +795,9 @@
memcpy(best + b, &syn_nbs[0]->best[b], sizeof(collocator));
best[b].position = -1; // syn_nbs[0]->pos[b];
best[b].activation_sum = target_sums[syn_nbs[0]->best[b].wordi];
+ best[b].max_activation = 0.0;
+ best[b].conorm = 0.0;
+ best[b].probability = 0.0;
}
float best_window_sum[MAX_NEIGHBOURS];
@@ -804,6 +808,9 @@
if(best[i].wordi == syn_nbs[a]->best[b].wordi)
break;
if(i >= found_index) {
+ best[found_index].max_activation = 0.0;
+ best[found_index].conorm = 0.0;
+ best[found_index].probability = 0.0;
best[found_index++].wordi = syn_nbs[a]->best[b].wordi;
// printf("found: %s\n", &vocab[syn_nbs[a]->index[b] * max_w]);
}
@@ -813,9 +820,9 @@
printf("window: %d - syn_threads: %d, %d\n", window, syn_threads, (1 << syn_threads) -1);
int wpos;
for(i=0; i < found_index; i++) {
- best[i].activation = 0; best[i].probability = 0;
+ best[i].activation = best[i].probability = best[i].conorm = 0;
for(w=1; w < (1 << syn_threads); w++) { // loop through all possible windows
- float word_window_sum = 0, word_activation_sum = 0, total_window_sum = 0;
+ float word_window_sum = 0, word_window_conorm=0, word_activation_sum = 0, total_window_sum = 0;
int bits_set = 0;
for(a=0; a < syn_threads; a++) {
if((1 << a) & w) {
@@ -831,10 +838,13 @@
for(b=0; b < syn_nbs[a]->length; b++)
if(best[i].wordi == syn_nbs[a]->best[b].wordi) {
// word_window_sum += syn_nbs[a]->dist[b] * syn_nbs[a]->norm[b]; // / window_sums[wpos]; // syn_nbs[a]->norm[b];
-// word_window_sum += syn_nbs[a]->dist[b]; // / window_sums[wpos]; // syn_nbs[a]->norm[b];
// word_window_sum += syn_nbs[a]->norm[b]; // / window_sums[wpos]; // syn_nbs[a]->norm[b];
// word_window_sum = (word_window_sum + syn_nbs[a]->norm[b]) - (word_window_sum * syn_nbs[a]->norm[b]); // syn_nbs[a]->norm[b];
- word_window_sum += syn_nbs[a]->best[b].activation - word_window_sum * syn_nbs[a]->best[b].activation; // conormalied activation sum
+ word_window_sum += syn_nbs[a]->best[b].activation; // / window_sums[wpos]; // syn_nbs[a]->norm[b];
+ word_window_conorm += syn_nbs[a]->best[b].activation - word_window_sum * syn_nbs[a]->best[b].activation; // conormalied activation sum
+ word_activation_sum += syn_nbs[a]->best[b].activation;
+ if(syn_nbs[a]->best[b].activation > best[i].max_activation)
+ best[i].max_activation = syn_nbs[a]->best[b].activation;
word_activation_sum += syn_nbs[a]->best[b].activation;
}
}
@@ -843,13 +853,17 @@
// word_activation_sum /= bits_set;
// word_window_sum /= bits_set;
// }
-// word_window_sum /= total_window_sum;
+ word_window_sum /= total_window_sum;
if(word_window_sum > best[i].probability) {
best[i].probability = word_window_sum;
- best[i].activation = word_activation_sum;
best[i].position = w;
}
+
+ if(word_window_conorm > best[i].conorm) {
+ best[i].conorm = word_window_conorm;
+ best[i].activation = word_activation_sum;
+ }
}
}
qsort(best, found_index, sizeof(collocator), cmp_probability);
@@ -927,9 +941,9 @@
if(latin_enc == 0) SvUTF8_on(word);
hv_store(hash, "word", strlen("word"), word , 0);
hv_store(hash, "rank", strlen("rank"), newSVuv(best[a].wordi), 0);
- hv_store(hash, "dist", strlen("dist"), newSVnv(best[a].activation), 0);
- hv_store(hash, "norm", strlen("norm"), newSVnv(best[a].probability), 0);
- hv_store(hash, "sum", strlen("sum"), newSVnv(target_sums[best[a].wordi]), 0);
+ hv_store(hash, "conorm", strlen("conorm"), newSVnv(best[a].conorm), 0);
+ hv_store(hash, "prob", strlen("prob"), newSVnv(best[a].probability), 0);
+ hv_store(hash, "max", strlen("max"), newSVnv(best[a].max_activation), 0); // newSVnv(target_sums[best[a].wordi]), 0);
hv_store(hash, "pos", strlen("pos"), newSVnv(best[a].position), 0);
av_push(array, newRV_noinc((SV*)hash));
}