w2v-server: use conormalized activation sum for auto-focus

Best results after trying all alternatives:

* sum of gloablly normalized probabilities
* sum of column-set normalized probabilities (also co-normalized)

"best" meant as a combination of complementarity to max(actiavtion)
and usefulness, particularly for low frequency words
diff --git a/w2v-server.pl b/w2v-server.pl
index a649820..d7ec900 100755
--- a/w2v-server.pl
+++ b/w2v-server.pl
@@ -199,9 +199,11 @@
 	long from;
 	unsigned long upto;
   float *target_sums;
+  float *window_sums;
 } knnpars;
 
 float *M, *M2=0L, *syn1neg_window, *expTable;
+float *window_sums;
 char *vocab;
 char *garbage = NULL;
 
@@ -321,6 +323,7 @@
 		expTable[i] = exp((i / (float) EXP_TABLE_SIZE * 2 - 1) * MAX_EXP); // Precompute the exp() table
 		expTable[i] = expTable[i] / (expTable[i] + 1); // Precompute f(x) = x / (x + 1)
 	}
+	window_sums = malloc(sizeof(float) * (window+1) * 2);
 	return 0;
 }
 
@@ -491,14 +494,14 @@
     } else {
       printf("\x1b[1m%s\x1b[0m ", &vocab[d*max_w]);
     }
-    
+    pars->window_sums[a] = wpos_sum;
   }
   for (b = 0; b < pars->cutoff; b++)
       pars->target_sums[b] += (target_sums[b] / wpos_sum ) / (window * 2);
   free(target_sums);
-  for(b=0; b<N && besti[b] >= 0; b++); // THIS LOOP IS NEEDED (b...)
-// printf("%s %.2f %d * ", &vocab[besti[b]*max_w], bestf[b], bestp[b]);
-// printf("\n");
+  for(b=0; b<N && besti[b] >= 0; b++);; // THIS LOOP IS NEEDED (b...)
+//		printf("%d: best syn: %s %.2f %.5f\n", b, &vocab[besti[b]*max_w], bestf[b], bestn[b]);
+//	printf("\n");
   nbs = malloc(sizeof(knn));
 	nbs->index = besti;
 	nbs->dist = bestf;
@@ -666,6 +669,7 @@
     for(a=0; a < syn_threads; a++) {
 			pars[a + para_threads].cutoff = cutoff;
       pars[a + para_threads].target_sums = target_sums;
+      pars[a + para_threads].window_sums = window_sums;
       pars[a + para_threads].wl = wl;
       pars[a + para_threads].N = N;
       pars[a + para_threads].from = a;
@@ -766,6 +770,7 @@
     printf("Waiting for syn threads to join\n");
     fflush(stdout);
     for (a = 0; a < syn_threads; a++) pthread_join(pt[a+para_threads], &syn_nbs[a]);
+ for (a = 0; a <= syn_threads; a++)			printf("window pos: %d, sum: %f\n", a, window_sums[a]);
     printf("syn threads joint\n");
     fflush(stdout);
 
@@ -780,7 +785,7 @@
 		float best_window_sum[MAX_NEIGHBOURS];
     int found_index=0, i=0, j, w;
     if(sort_by != 1 && sort_by != 2) { // sort by auto focus mean
-      for(a=1; a < syn_threads; a++) {
+      for(a=0; a < syn_threads; a++) {
         for(b=0; b < syn_nbs[a]->length; b++) {
 					for(i=0; i < found_index; i++)
 						if(besti[i] == syn_nbs[a]->index[b])
@@ -792,30 +797,54 @@
 				}
 			}
 			printf("window: %d  -  syn_threads: %d, %d\n", window, syn_threads, (1 << syn_threads) -1);
+			int wpos;
 			for(i=0; i < found_index; i++) {
+				bestd[i] = 0; bestn[i] = 0;
 				for(w=1; w <  (1 << syn_threads); w++) { // loop through all possible windows
-					float word_window_sum = 0;
+					float word_window_sum = 0, word_activation_sum = 0, total_window_sum = 0;
 					int bits_set = 0;
-					for(a=1; a < syn_threads; a++) {
+					for(a=0; a < syn_threads; a++) {
 						if((1 << a) & w) {
- 						  bits_set++;
-						  for(b=0; b < syn_nbs[a]->length; b++)
-							  if(besti[i] == syn_nbs[a]->index[b])
-								  word_window_sum += syn_nbs[a]->dist[b];
+							wpos = (a >= window? a+1 : a);
+							total_window_sum += window_sums[wpos];
 						}
 					}
-					if(bits_set)
-						word_window_sum /= bits_set;
-					if(word_window_sum > bestd[i]) {
-						bestd[i] = word_window_sum;
+//					printf("%d window-sum %f\n", w, total_window_sum);
+					for(a=0; a < syn_threads; a++) {
+						if((1 << a) & w) {
+							wpos = (a >= window? a+1 : a);
+ 						  bits_set++;
+						  for(b=0; b < syn_nbs[a]->length; b++)
+							  if(besti[i] == syn_nbs[a]->index[b]) {
+//                  word_window_sum += syn_nbs[a]->dist[b] *  syn_nbs[a]->norm[b]; // / window_sums[wpos];  // syn_nbs[a]->norm[b];
+//                    word_window_sum += syn_nbs[a]->dist[b]; // / window_sums[wpos];  // syn_nbs[a]->norm[b];
+//                    word_window_sum += syn_nbs[a]->norm[b]; // / window_sums[wpos];  // syn_nbs[a]->norm[b];
+//                  word_window_sum = (word_window_sum + syn_nbs[a]->norm[b]) - (word_window_sum * syn_nbs[a]->norm[b]);  // syn_nbs[a]->norm[b];
+                  word_window_sum += syn_nbs[a]->dist[b] - word_window_sum * syn_nbs[a]->dist[b];  // conormalied activation sum
+									word_activation_sum += syn_nbs[a]->dist[b];
+								}
+						}
+					}
+//          if(bits_set) {
+//						word_activation_sum /= bits_set;
+//						word_window_sum /= bits_set;
+//          }
+//          word_window_sum /= total_window_sum;
+
+          if(word_window_sum > bestn[i]) {
+						bestn[i] = word_window_sum;
+						bestd[i] = word_activation_sum;
 						bestp[i] = w;
 					}
 			  }
 			}
       for(i=0; i<found_index;i++) {
         for(j=0;j<found_index-1;j++) {
-          if(bestd[j]<bestd[j+1]) {
-            float tempd=bestd[j];
+          if(bestn[j]<bestn[j+1]) {
+            float tempd=bestn[j];
+            bestn[j]=bestn[j+1];
+            bestn[j+1]=tempd;
+            tempd=bestd[j];
             bestd[j]=bestd[j+1];
             bestd[j+1]=tempd;
             int tempi=besti[j];