w2v-server: add column and overall normalized sums
diff --git a/templates/index.html.ep b/templates/index.html.ep
index 78ec921..9e78ea6 100644
--- a/templates/index.html.ep
+++ b/templates/index.html.ep
@@ -59,11 +59,10 @@
 									 "orderable": false,
 									 "targets": 0
 								 }, 
-								 { "orderSequence": [ "desc" ], "targets": [ 2, 3, 4 ] },
-								 { "orderSequence": [ "asc", "desc" ], "targets": [ 1, 5 ] },
-								 { "orderSequence": [ "desc" ], "targets": [ 3 ] }
+								 { "orderSequence": [ "desc" ], "targets": [ 2, 3, 4, 5, 6 ] },
+								 { "orderSequence": [ "asc", "desc" ], "targets": [ 1, 7 ] },
 						 ],
-						 "order": [[ 3, 'desc' ]],
+						 "order": [[ 4, 'desc' ]],
 				 } );
 				 t.on( 'order.dt search.dt', function () {
 						 t.column(0, {order:'applied'}).nodes().each( function (cell, i) {
@@ -626,8 +625,8 @@
 								<label for="sortby">window/sort</label>
 								<select id="sortby" name="sort">
 									<option value="0"  <%= ($sort!=1 && $sort!=2? "selected":"") %>>auto focus</option>
-									<option value="1"  <%= ($sort==1? "selected":"") %>>any single position</option>
-									<option value="2" <%= ($sort==2? "selected":"") %>>whole window</option>
+									<!-- <option value="1"  <%= ($sort==1? "selected":"") %>>any single position</option>
+											 <option value="2" <%= ($sort==2? "selected":"") %>>whole window</option> -->
 								</select>
 							% }
 							<input type="button" value="→ KorAP" onclick="queryKorAP();" title="query word with KorAP"/>
@@ -734,11 +733,13 @@
 								<tr>
 									% if($collocators) {
 										<th>#</th>
-										<th align="right" title="The window around the target word that is considered for summation.">w'</th>
+										<th align="right" title="The columns (c) around the target are considered for summation are marked with *.">w'</th>
 										<th align="right" title="Raw (max.) activation of the collocator in the output layers.">max(a)</th>
-										<th title="(c<sub><small>@</small></sub>) – Sum of the probability approximations that the combination of the target word and the collocator at the relative position @ come from the training corpus. Single approximations can be distorted because of sub-sampling frequent words and the sum cannot itself be interpreted as probability." align="right">⊥Σa</th>
-										<th align="right">Σa/Σw</th>
-										<th title="c" align="left">collocator</th>
+										<th title="Co-normalized raw activation sum of the collocator in the selected columns." align="right">⊥Σa</th>
+										<th title="Sum of activations over the selected colunns normalized by the total activation sum of the selected columns." align="right">Σa/Σc</th>
+										<th title="Sum of the column normalized activations over the selected colunns." align="right">Σ(a/c)</th>
+										<th title="Sum of the activations over the whole window normalized by the total window sum (no auto-focus)." align="right">Σa/Σw</th>
+										<th align="left">collocator</th>
 									% }
 								</tr>
 							</thead>
@@ -762,6 +763,12 @@
 											<td align="right">
 												<%= sprintf("%.3e", $c->{prob}) %>
 											</td>
+											<td align="right">
+												<%= sprintf("%.3e", $c->{cprob}) %>
+											</td>
+											<td align="right">
+												<%= sprintf("%.3e", $c->{overall}) %>
+											</td>
 											<td align="left">
 												<a onclick="<%= sprintf("queryKorAPCII('%s /w5 %s')", $c->{word}, $word) =%>"
 													 title="freq. rank: <%= $c->{rank} =%>">
diff --git a/w2v-server.pl b/w2v-server.pl
index 38b8cb8..9c02dc4 100755
--- a/w2v-server.pl
+++ b/w2v-server.pl
@@ -189,6 +189,7 @@
 	long long wordi;
 	long position;
 	float activation;
+	float cprobability; // column wise probability
 	float probability;
 	float activation_sum;
 	float conorm;
@@ -500,14 +501,15 @@
       }
       for (b = 0; b < N; b++)
         if(best[b].position == window-a)
-          best[b].probability = best[b].activation / wpos_sum;
+          best[b].cprobability = best[b].activation / wpos_sum;
     } else {
       printf("\x1b[1m%s\x1b[0m ", &vocab[d*max_w]);
     }
     pars->window_sums[a] = wpos_sum;
   }
   for (b = 0; b < pars->cutoff; b++)
-      pars->target_sums[b] += (target_sums[b] / wpos_sum ) / (window * 2);
+      pars->target_sums[b] += target_sums[b]; //(target_sums[b] / wpos_sum ) / (window * 2);
+	printf("Target-Summe von 0: %f\n", pars->target_sums[150298]);
   free(target_sums);
   for(b=0; b<N && best[b].wordi >= 0; b++);; // THIS LOOP IS NEEDED (b...)
 //		printf("%d: best syn: %s %.2f %.5f\n", b, &vocab[best[b].wordi*max_w], best[b].activation, best[b].probability);
@@ -783,11 +785,16 @@
     best[b].activation_sum = 0;
   }
 
+	float total_activation = 0;
+
   if (M2) {
     printf("Waiting for syn threads to join\n");
     fflush(stdout);
     for (a = 0; a < syn_threads; a++) pthread_join(pt[a+para_threads], (void *) &syn_nbs[a]);
- for (a = 0; a <= syn_threads; a++)			printf("window pos: %d, sum: %f\n", a, window_sums[a]);
+		for (a = 0; a <= syn_threads; a++) {
+			total_activation += window_sums[a];
+			printf("window pos: %d, sum: %f\n", a, window_sums[a]);
+		}
     printf("syn threads joint\n");
     fflush(stdout);
 
@@ -798,6 +805,7 @@
 			best[b].max_activation = 0.0;
 			best[b].conorm = 0.0;
 			best[b].probability = 0.0;
+			best[b].cprobability = syn_nbs[0]->best[b].cprobability;
     }
 		
 		float best_window_sum[MAX_NEIGHBOURS];
@@ -811,11 +819,14 @@
 					best[found_index].max_activation = 0.0;
 					best[found_index].conorm = 0.0;
 					best[found_index].probability = 0.0;
+					best[found_index].cprobability = syn_nbs[a]->best[b].cprobability;
+					best[found_index].activation_sum = target_sums[syn_nbs[a]->best[b].wordi]; // syn_nbs[a]->best[b].activation_sum;
 					best[found_index++].wordi = syn_nbs[a]->best[b].wordi;
 					//						printf("found: %s\n", &vocab[syn_nbs[a]->index[b] * max_w]);
 				}
 			}
 		}
+		sort_by =0; // ALWAYS AUTO-FOCUS
     if(sort_by != 1 && sort_by != 2) { // sort by auto focus mean
 			printf("window: %d  -  syn_threads: %d, %d\n", window, syn_threads, (1 << syn_threads) -1);
 			int wpos;
@@ -837,15 +848,18 @@
  						  bits_set++;
 						  for(b=0; b < syn_nbs[a]->length; b++)
 							  if(best[i].wordi == syn_nbs[a]->best[b].wordi) {
+//									float acti = syn_nbs[a]->best[b].activation / total_window_sum;
 //                  word_window_sum += syn_nbs[a]->dist[b] *  syn_nbs[a]->norm[b]; // / window_sums[wpos];  // syn_nbs[a]->norm[b];
 //                    word_window_sum += syn_nbs[a]->norm[b]; // / window_sums[wpos];  // syn_nbs[a]->norm[b];
 //                  word_window_sum = (word_window_sum + syn_nbs[a]->norm[b]) - (word_window_sum * syn_nbs[a]->norm[b]);  // syn_nbs[a]->norm[b];
+
                   word_window_sum += syn_nbs[a]->best[b].activation; // / window_sums[wpos];  // syn_nbs[a]->norm[b];
-                  word_window_conorm += syn_nbs[a]->best[b].activation - word_window_sum * syn_nbs[a]->best[b].activation;  // conormalied activation sum
+//                  word_window_sum += acti - (word_window_sum * acti); syn_nbs[a]->best[b].activation; // / window_sums[wpos];  // syn_nbs[a]->norm[b];
+
+                  word_window_conorm += syn_nbs[a]->best[b].activation - word_window_conorm * syn_nbs[a]->best[b].activation;  // conormalied activation sum
 									word_activation_sum += syn_nbs[a]->best[b].activation;
                   if(syn_nbs[a]->best[b].activation > best[i].max_activation)
 										best[i].max_activation = syn_nbs[a]->best[b].activation;
-									word_activation_sum += syn_nbs[a]->best[b].activation;
 								}
 						}
 					}
@@ -853,7 +867,8 @@
 //						word_activation_sum /= bits_set;
 //						word_window_sum /= bits_set;
 //          }
-          word_window_sum /= total_window_sum;
+
+					word_window_sum /= total_window_sum;
 
           if(word_window_sum > best[i].probability) {
 						best[i].probability = word_window_sum;
@@ -924,17 +939,19 @@
     array = newAV();
     for (a = 0, i=0; a < MAX_NEIGHBOURS && best[a].wordi >= 0; a++) {
       long long c = best[a].wordi;
+/*
       if (dedupe) {
 	  		int filtered=0;
         for (j=0; j<i; j++)
           if (strcasestr(&vocab[c * max_w], chosen[j]) ||
               strcasestr(chosen[j], &vocab[c * max_w])) {
-                printf("filtering %s %s\n", chosen[j], &vocab[c * max_w]);
-								filtered = 1;
-							}
+						printf("filtering %s %s\n", chosen[j], &vocab[c * max_w]);
+						filtered = 1;
+					}
 				if(filtered)
 					continue;
 			}
+*/
 			chosen[i++]=&vocab[c * max_w];
       HV* hash = newHV();
       SV* word = newSVpvf(&vocab[best[a].wordi * max_w], 0);
@@ -943,7 +960,9 @@
       hv_store(hash, "rank", strlen("rank"), newSVuv(best[a].wordi), 0);
       hv_store(hash, "conorm", strlen("conorm"), newSVnv(best[a].conorm), 0);
       hv_store(hash, "prob", strlen("prob"), newSVnv(best[a].probability), 0);
+      hv_store(hash, "cprob", strlen("cprob"), newSVnv(best[a].cprobability), 0);
       hv_store(hash, "max", strlen("max"), newSVnv(best[a].max_activation), 0); // newSVnv(target_sums[best[a].wordi]), 0);
+      hv_store(hash, "overall", strlen("overall"), newSVnv(best[a].activation_sum/total_activation), 0); // newSVnv(target_sums[best[a].wordi]), 0);
       hv_store(hash, "pos", strlen("pos"), newSVnv(best[a].position), 0);
       av_push(array, newRV_noinc((SV*)hash));
     }