w2v-server: auto-focus based on maximized ⊥_w'(a/c)
and show average raw activations instead of co-norms
diff --git a/templates/index.html.ep b/templates/index.html.ep
index 5ede4fe..f50078c 100644
--- a/templates/index.html.ep
+++ b/templates/index.html.ep
@@ -134,7 +134,7 @@
{ "data": "rank", type: "allnumeric" },
{ "data": "pos", width: "7%", sClass: "dt-center mono compact", render: function ( data, type, row ) {return bitvec2window(data, row.heat, row.word) }},
{ "data": "max", render: function ( data, type, row ) {return data.toFixed(3) }},
- { "data": "conorm", render: function ( data, type, row ) {return data.toFixed(3) }},
+ { "data": "average", render: function ( data, type, row ) {return data.toFixed(3) }},
{ "data": "prob", type: "scientific", render: function ( data, type, row ) {return data.toExponential(3) } },
{ "data": "cprob", type: "scientific", render: function ( data, type, row ) {return data.toExponential(3) } },
{ "data": "overall", type: "scientific", render: function ( data, type, row ) {return data.toExponential(3) } },
@@ -323,7 +323,7 @@
}
.ui-tooltip-content {
- font-size: 9pt;
+ font-size: 10pt;
color: #222222;
}
@@ -820,11 +820,11 @@
<tr>
% if($collocators) {
<th>#</th>
- <th align="center" title="The columns (c) around the target are considered for summation are marked with *.">w'</th>
- <th align="right" title="Raw (max.) activation of the collocator in the output layers.">max(a)</th>
- <th title="Co-normalized raw activation sum of the collocator in the selected columns." align="right">⊥Σa</th>
- <th title="Sum of activations over the selected colunns normalized by the total activation sum of the selected columns." align="right">Σa/Σc</th>
- <th title="Sum of the column normalized activations over the selected colunns." align="right">Σ(a/c)</th>
+ <th align="center" title="Activation of the respective collocator in the columns around the target normalized by its maximum (red). Columns selected by the auto-focus funtion (which window of all possible column-combinations maximizes ⊥(a/c)?) are marked with +. Click on the column postions to lauch a KorAP query with target word and collocator in the respective position.">w'</th>
+ <th align="right" title="Maximum activation of the collocator anywhere in the output layer.">max(a)</th>
+ <th title="Average raw activation of the collocator in the columns selected by auto-focus." align="right">⟨a⟩</th>
+ <th title="Sum of activations over the selected colunns normalized by the total activation sum of the selected columns." align="right">Σa/Σw'</th>
+ <th title="Co-norm of the column-normalized activations over the colunns selected by the auto-focus." align="right">⊥(a/c)</th>
<th title="Sum of the activations over the whole window normalized by the total window sum (no auto-focus)." align="right">Σa/Σw</th>
<th align="left">collocator</th>
% }
diff --git a/w2v-server.pl b/w2v-server.pl
index 4276b60..2768296 100755
--- a/w2v-server.pl
+++ b/w2v-server.pl
@@ -189,12 +189,13 @@
long long wordi;
long position;
float activation;
+ float average;
float cprobability; // column wise probability
+ float cprobability_sum;
float probability;
float activation_sum;
- float conorm;
float max_activation;
- float heat[16];
+ float heat[16];
} collocator;
typedef struct {
@@ -792,7 +793,7 @@
best[b].position = -1; // syn_nbs[0]->pos[b];
best[b].activation_sum = target_sums[syn_nbs[0]->best[b].wordi];
best[b].max_activation = 0.0;
- best[b].conorm = 0.0;
+ best[b].average = 0.0;
best[b].probability = 0.0;
best[b].cprobability = syn_nbs[0]->best[b].cprobability;
}
@@ -806,7 +807,7 @@
break;
if(i >= found_index) {
best[found_index].max_activation = 0.0;
- best[found_index].conorm = 0.0;
+ best[found_index].average = 0.0;
best[found_index].probability = 0.0;
best[found_index].cprobability = syn_nbs[a]->best[b].cprobability;
best[found_index].activation_sum = target_sums[syn_nbs[a]->best[b].wordi]; // syn_nbs[a]->best[b].activation_sum;
@@ -819,11 +820,12 @@
if(sort_by != 1 && sort_by != 2) { // sort by auto focus mean
printf("window: %d - syn_threads: %d, %d\n", window, syn_threads, (1 << syn_threads) -1);
int wpos;
+ int bits_set = 0;
for(i=0; i < found_index; i++) {
- best[i].activation = best[i].probability = best[i].conorm = 0;
+ best[i].activation = best[i].probability = best[i].average = best[i].cprobability_sum = 0;
for(w=1; w < (1 << syn_threads); w++) { // loop through all possible windows
- float word_window_sum = 0, word_window_conorm=0, word_activation_sum = 0, total_window_sum = 0;
- int bits_set = 0;
+ float word_window_sum = 0, word_window_average=0, word_cprobability_sum=0, word_activation_sum = 0, total_window_sum = 0;
+ bits_set = 0;
for(a=0; a < syn_threads; a++) {
if((1 << a) & w) {
wpos = (a >= window? a+1 : a);
@@ -845,7 +847,8 @@
word_window_sum += syn_nbs[a]->best[b].activation; // / window_sums[wpos]; // syn_nbs[a]->norm[b];
// word_window_sum += acti - (word_window_sum * acti); syn_nbs[a]->best[b].activation; // / window_sums[wpos]; // syn_nbs[a]->norm[b];
- word_window_conorm += syn_nbs[a]->best[b].activation - word_window_conorm * syn_nbs[a]->best[b].activation; // conormalied activation sum
+ word_window_average += syn_nbs[a]->best[b].activation; // - word_window_average * syn_nbs[a]->best[b].activation; // conormalied activation sum
+ word_cprobability_sum += syn_nbs[a]->best[b].cprobability - word_cprobability_sum * syn_nbs[a]->best[b].cprobability; // conormalied column probability sum
word_activation_sum += syn_nbs[a]->best[b].activation;
if(syn_nbs[a]->best[b].activation > best[i].max_activation)
best[i].max_activation = syn_nbs[a]->best[b].activation;
@@ -854,23 +857,27 @@
}
}
}
-// if(bits_set) {
+ if(bits_set) {
+ word_window_average /= bits_set;
// word_activation_sum /= bits_set;
// word_window_sum /= bits_set;
-// }
+ }
word_window_sum /= total_window_sum;
if(word_window_sum > best[i].probability) {
+// best[i].position = w;
best[i].probability = word_window_sum;
- best[i].position = w;
}
- if(word_window_conorm > best[i].conorm) {
- best[i].conorm = word_window_conorm;
-// best[i].activation = word_activation_sum;
+ if(word_cprobability_sum > best[i].cprobability_sum) {
+ best[i].position = w;
+ best[i].cprobability_sum = word_cprobability_sum;
}
- }
+
+ best[i].average = word_window_average;
+// best[i].activation = word_activation_sum;
+ }
}
qsort(best, found_index, sizeof(collocator), cmp_probability);
// for(i=0; i < found_index; i++) {
@@ -950,9 +957,9 @@
if(latin_enc == 0) SvUTF8_on(word);
hv_store(hash, "word", strlen("word"), word , 0);
hv_store(hash, "rank", strlen("rank"), newSVuv(best[a].wordi), 0);
- hv_store(hash, "conorm", strlen("conorm"), newSVnv(best[a].conorm), 0);
+ hv_store(hash, "average", strlen("average"), newSVnv(best[a].average), 0);
hv_store(hash, "prob", strlen("prob"), newSVnv(best[a].probability), 0);
- hv_store(hash, "cprob", strlen("cprob"), newSVnv(best[a].cprobability), 0);
+ hv_store(hash, "cprob", strlen("cprob"), newSVnv(best[a].cprobability_sum), 0);
hv_store(hash, "max", strlen("max"), newSVnv(best[a].max_activation), 0); // newSVnv(target_sums[best[a].wordi]), 0);
hv_store(hash, "overall", strlen("overall"), newSVnv(best[a].activation_sum/total_activation), 0); // newSVnv(target_sums[best[a].wordi]), 0);
hv_store(hash, "pos", strlen("pos"), newSVnv(best[a].position), 0);