w2v-server: auto-focus based on maximized ⊥_w'(a/c) and show average raw activations instead of co-norms

commit: 4116b43f0f1786e67b838fe72fb69520150fb29a [log] [tgz]
author: Marc Kupietz <kupietz@ids-mannheim.de> Wed Dec 06 14:15:32 2017 +0100
committer: Marc Kupietz <kupietz@ids-mannheim.de> Wed Dec 06 14:15:32 2017 +0100
tree: 18737c4cfcf67c733153fd1209e298d676265e64
parent: 3eeb10278fc9ccbf51476465f7de8455ceb9b489 [diff]
diff --git a/templates/index.html.ep b/templates/index.html.ep
index 5ede4fe..f50078c 100644
--- a/templates/index.html.ep
+++ b/templates/index.html.ep

@@ -134,7 +134,7 @@
                      { "data": "rank", type: "allnumeric" },
                      { "data": "pos", width: "7%", sClass: "dt-center mono compact", render: function ( data, type, row ) {return bitvec2window(data, row.heat, row.word) }},
                      { "data": "max",  render: function ( data, type, row ) {return data.toFixed(3) }},
-                     { "data": "conorm", render: function ( data, type, row ) {return data.toFixed(3) }},
+                     { "data": "average", render: function ( data, type, row ) {return data.toFixed(3) }},
                      { "data": "prob", type: "scientific", render: function ( data, type, row ) {return data.toExponential(3) }  },
                      { "data": "cprob", type: "scientific", render: function ( data, type, row ) {return data.toExponential(3) }  },
                      { "data": "overall", type: "scientific", render: function ( data, type, row ) {return data.toExponential(3) } },
@@ -323,7 +323,7 @@
      }
 
      .ui-tooltip-content {
-       font-size: 9pt;
+       font-size: 10pt;
        color: #222222;
      }
 
@@ -820,11 +820,11 @@
                 <tr>
                   % if($collocators) {
                     <th>#</th>
-                    <th align="center" title="The columns (c) around the target are considered for summation are marked with *.">w'</th>
-                    <th align="right" title="Raw (max.) activation of the collocator in the output layers.">max(a)</th>
-                    <th title="Co-normalized raw activation sum of the collocator in the selected columns." align="right">⊥Σa</th>
-                    <th title="Sum of activations over the selected colunns normalized by the total activation sum of the selected columns." align="right">Σa/Σc</th>
-                    <th title="Sum of the column normalized activations over the selected colunns." align="right">Σ(a/c)</th>
+                    <th align="center" title="Activation of the respective collocator in the columns around the target normalized by its maximum (red). Columns selected by the auto-focus funtion (which window of all possible column-combinations maximizes ⊥(a/c)?) are marked with +. Click on the column postions to lauch a KorAP query with target word and collocator in the respective position.">w'</th>
+                    <th align="right" title="Maximum activation of the collocator anywhere in the output layer.">max(a)</th>
+                    <th title="Average raw activation of the collocator in the columns selected by auto-focus." align="right">⟨a⟩</th>
+                    <th title="Sum of activations over the selected colunns normalized by the total activation sum of the selected columns." align="right">Σa/Σw'</th>
+                    <th title="Co-norm of the column-normalized activations over the colunns selected by the auto-focus." align="right">⊥(a/c)</th>
                     <th title="Sum of the activations over the whole window normalized by the total window sum (no auto-focus)." align="right">Σa/Σw</th>
                     <th align="left">collocator</th>
                   % }

diff --git a/w2v-server.pl b/w2v-server.pl
index 4276b60..2768296 100755
--- a/w2v-server.pl
+++ b/w2v-server.pl

@@ -189,12 +189,13 @@
 	long long wordi;
 	long position;
 	float activation;
+	float average;
 	float cprobability; // column wise probability
+	float cprobability_sum;
 	float probability;
 	float activation_sum;
-	float conorm;
 	float max_activation;
-  float heat[16];
+	float heat[16];
 } collocator;
 
 typedef struct {
@@ -792,7 +793,7 @@
       best[b].position = -1; //  syn_nbs[0]->pos[b];
       best[b].activation_sum = target_sums[syn_nbs[0]->best[b].wordi];
 			best[b].max_activation = 0.0;
-			best[b].conorm = 0.0;
+			best[b].average = 0.0;
 			best[b].probability = 0.0;
 			best[b].cprobability = syn_nbs[0]->best[b].cprobability;
     }
@@ -806,7 +807,7 @@
 						break;
 				if(i >= found_index) {
 					best[found_index].max_activation = 0.0;
-					best[found_index].conorm = 0.0;
+					best[found_index].average = 0.0;
 					best[found_index].probability = 0.0;
 					best[found_index].cprobability = syn_nbs[a]->best[b].cprobability;
 					best[found_index].activation_sum = target_sums[syn_nbs[a]->best[b].wordi]; // syn_nbs[a]->best[b].activation_sum;
@@ -819,11 +820,12 @@
     if(sort_by != 1 && sort_by != 2) { // sort by auto focus mean
 			printf("window: %d  -  syn_threads: %d, %d\n", window, syn_threads, (1 << syn_threads) -1);
 			int wpos;
+      int bits_set = 0;
 			for(i=0; i < found_index; i++) {
-				best[i].activation = best[i].probability = best[i].conorm = 0;
+				best[i].activation = best[i].probability = best[i].average = best[i].cprobability_sum = 0;
 				for(w=1; w <  (1 << syn_threads); w++) { // loop through all possible windows
-					float word_window_sum = 0, word_window_conorm=0, word_activation_sum = 0, total_window_sum = 0;
-					int bits_set = 0;
+					float word_window_sum = 0, word_window_average=0, word_cprobability_sum=0, word_activation_sum = 0, total_window_sum = 0;
+          bits_set = 0;
 					for(a=0; a < syn_threads; a++) {
 						if((1 << a) & w) {
 							wpos = (a >= window? a+1 : a);
@@ -845,7 +847,8 @@
                   word_window_sum += syn_nbs[a]->best[b].activation; // / window_sums[wpos];  // syn_nbs[a]->norm[b];
 //                  word_window_sum += acti - (word_window_sum * acti); syn_nbs[a]->best[b].activation; // / window_sums[wpos];  // syn_nbs[a]->norm[b];
 
-                  word_window_conorm += syn_nbs[a]->best[b].activation - word_window_conorm * syn_nbs[a]->best[b].activation;  // conormalied activation sum
+                  word_window_average += syn_nbs[a]->best[b].activation; // - word_window_average * syn_nbs[a]->best[b].activation;  // conormalied activation sum
+                  word_cprobability_sum += syn_nbs[a]->best[b].cprobability - word_cprobability_sum * syn_nbs[a]->best[b].cprobability;  // conormalied column probability sum
 									word_activation_sum += syn_nbs[a]->best[b].activation;
                   if(syn_nbs[a]->best[b].activation > best[i].max_activation)
 										best[i].max_activation = syn_nbs[a]->best[b].activation;
@@ -854,23 +857,27 @@
 								}
 						}
 					}
-//          if(bits_set) {
+          if(bits_set) {
+            word_window_average /= bits_set;
 //						word_activation_sum /= bits_set;
 //						word_window_sum /= bits_set;
-//          }
+          }
 
 					word_window_sum /= total_window_sum;
 
           if(word_window_sum > best[i].probability) {
+//						best[i].position = w;
 						best[i].probability = word_window_sum;
-						best[i].position = w;
 					}
 
-          if(word_window_conorm > best[i].conorm) {
-						best[i].conorm = word_window_conorm;
-//						best[i].activation = word_activation_sum;
+          if(word_cprobability_sum > best[i].cprobability_sum) {
+						best[i].position = w;
+						best[i].cprobability_sum = word_cprobability_sum;
 					}
-			  }
+
+						best[i].average = word_window_average;
+//						best[i].activation = word_activation_sum;
+        }
 			}
 			qsort(best, found_index, sizeof(collocator), cmp_probability);
 //      for(i=0; i < found_index; i++) {
@@ -950,9 +957,9 @@
       if(latin_enc == 0) SvUTF8_on(word);
       hv_store(hash, "word", strlen("word"), word , 0);
       hv_store(hash, "rank", strlen("rank"), newSVuv(best[a].wordi), 0);
-      hv_store(hash, "conorm", strlen("conorm"), newSVnv(best[a].conorm), 0);
+      hv_store(hash, "average", strlen("average"), newSVnv(best[a].average), 0);
       hv_store(hash, "prob", strlen("prob"), newSVnv(best[a].probability), 0);
-      hv_store(hash, "cprob", strlen("cprob"), newSVnv(best[a].cprobability), 0);
+      hv_store(hash, "cprob", strlen("cprob"), newSVnv(best[a].cprobability_sum), 0);
       hv_store(hash, "max", strlen("max"), newSVnv(best[a].max_activation), 0); // newSVnv(target_sums[best[a].wordi]), 0);
       hv_store(hash, "overall", strlen("overall"), newSVnv(best[a].activation_sum/total_activation), 0); // newSVnv(target_sums[best[a].wordi]), 0);
       hv_store(hash, "pos", strlen("pos"), newSVnv(best[a].position), 0);
commit	4116b43f0f1786e67b838fe72fb69520150fb29a	[log] [tgz]
author	Marc Kupietz <kupietz@ids-mannheim.de>	Wed Dec 06 14:15:32 2017 +0100
committer	Marc Kupietz <kupietz@ids-mannheim.de>	Wed Dec 06 14:15:32 2017 +0100
tree	18737c4cfcf67c733153fd1209e298d676265e64
parent	3eeb10278fc9ccbf51476465f7de8455ceb9b489 [diff]