w2vserver: print words with larges difference between main and merged vectors
diff --git a/w2v-server.pl b/w2v-server.pl
index d0084fb..01b49a7 100755
--- a/w2v-server.pl
+++ b/w2v-server.pl
@@ -527,6 +527,7 @@
   words += merge_words;
   fclose(f);
 	printf("merged_end: %lld, words: %lld\n", merged_end, words);
+	printBiggestMergedDifferences();
   return((long) merged_end);
 }
 
@@ -773,6 +774,47 @@
   return(wl);
 }
 
+void printBiggestMergedDifferences() {
+  float dist, len, vec[max_size];
+  long long a, b, c, d, cn, *bi;
+  char ch;
+  knn *nbs = NULL;
+  int N = 100;
+
+  printf("Looking for biggest distances between main and merged vectors ...\n");
+  collocator *best;
+  best = malloc(N * sizeof(collocator));
+  memset(best, 0, N * sizeof(collocator));
+
+  float worstbest=1000000;
+
+  for (a = 0; a < N; a++) best[a].activation = worstbest;
+
+  for (c = 0; c < 10000; c++) {
+    if(garbage && garbage[c]) continue;
+    a = 0;
+    dist = 0;
+    for (a = 0; a < size; a++) dist += M[a + c * size] * M[a + (c+merged_end) * size];
+    if(dist < worstbest) {
+      for (a = 0; a < N; a++) {
+        if (dist < best[a].activation) {
+          memmove(best + a + 1, best + a, (N - a -1) * sizeof(collocator));
+          best[a].activation = dist;
+          best[a].wordi = c;
+          break;
+        }
+      }
+      worstbest = best[N-1].activation;
+    }
+  }
+
+  printf("Most distant vectors for:\n ");
+  for (a = 0; a < N; a++) {
+    printf("%s ", &vocab[best[a].wordi * max_w]);
+  }
+  printf("\n");
+}
+
 void *_get_neighbours(void *arg) {
   knnpars *pars = arg;
 	char *st1 = pars->token;