w2v-server: change merge semantics to cover mono-lingual corpora only
diff --git a/w2v-server.pl b/w2v-server.pl
index 6f8d7f5..8701f90 100755
--- a/w2v-server.pl
+++ b/w2v-server.pl
@@ -178,10 +178,20 @@
 	if(defined($word) && $word !~ /^\s*$/) {
 		$c->inactivity_timeout(300);
 		$word =~ s/\s+/ /g;
+    if($opt_m && $word !~ /\|/) {
+      $word .= "|$word";
+    }
     for my $w (split(' *\| *', $word)) {
-      if ($cache{$w.$cutoff.$no_nbs.$sort.$dedupe}) {
+      if($opt_m) {
+        if($searchBaseVocabFirst) {
+          $searchBaseVocabFirst=0;
+        } else {
+          $searchBaseVocabFirst=1;
+        }
+      }
+      if ($cache{$w.$cutoff.$no_nbs.$sort.$dedupe,$searchBaseVocabFirst}) {
         $c->app->log->info("Getting $w results from cache");
-        $res = $cache{$w.$cutoff.$no_nbs.$sort.$dedupe}
+        $res = $cache{$w.$cutoff.$no_nbs.$sort.$dedupe.$searchBaseVocabFirst}
       } else {
         $c->app->log->info('Looking for neighbours of '.$w);
         if($opt_i) {
@@ -732,38 +742,30 @@
 
   while (1) {
     st[cn][b] = st1[c];
-    if (merge_words > 0)
-      st[cn+1][b] = st1[c];
     b++;
     c++;
     st[cn][b] = 0;
     if (st1[c] == 0) break;
     if (st1[c] == ' ' || st1[c] == '-') {
       sep[cn++] = st1[c];
-      if (merge_words > 0)
-        sep[cn++] = st1[c];
       b = 0;
       c++;
     }
   }
   cn++;
-  if (merge_words > 0)
-    cn++;
   for (a = 0; a < cn; a++) {
     if (search_backw) {
-      for (b = words - 1; b >= 0; b--) if (!strcmp(&vocab[b * max_w], st[a])) break;
+      for (b = words - 1; b >= (merge_words? merge_words : 0) && strcmp(&vocab[b * max_w], st[a]) !=0; b--);
     } else {
-      if (merge_words > 0 && a % 2 == 1)
-        for (b = merge_words; b < words && strcmp(&vocab[b * max_w], st[a]) != 0; b++);
-      else
-        for (b = 0; b < words && strcmp(&vocab[b * max_w], st[a]) != 0; b++);
+      for (b = 0;     b < (merge_words? merge_words : words) && strcmp(&vocab[b * max_w], st[a]) != 0; b++);
     }
     if (b == words) b = -1;
     wl->wordi[a] = b;
-    fprintf(stderr, "Word: \"%s\"  Position in vocabulary: %lld\n", &vocab[wl->wordi[a]*max_w], wl->wordi[a]);
     if (b == -1) {
       fprintf(stderr, "Out of dictionary word!\n");
       cn--;
+    } else {
+      fprintf(stderr, "Word: \"%s\"  Position in vocabulary: %lld\n", &vocab[wl->wordi[a]*max_w], wl->wordi[a]);
     }
   }
   wl->length=cn;
@@ -917,7 +919,7 @@
     goto end;
 
 	old_words = cutoff;
-	slice = (para_threads? cutoff / para_threads * (merge_words > 0? 2 : 1)  : 0);
+	slice = cutoff / para_threads;
 
 	a = posix_memalign((void **) &target_sums, 128, cutoff * sizeof(float));
   for(a = 0; a < cutoff; a++)
@@ -931,12 +933,12 @@
 		pars[a].wl = wl;
 		pars[a].N = N;
     pars[a].best = &best[N*a];
-    if(merge_words == 0 || a < para_threads / 2) {
+    if(merge_words == 0 || search_backw == 0) {
       pars[a].from = a*slice;
       pars[a].upto = ((a+1)*slice > cutoff? cutoff : (a+1) * slice);
     } else {
-      pars[a].from = merge_words + (a - para_threads / 2) * slice;
-      pars[a].upto = merge_words + ((a - para_threads / 2 + 1)*slice > cutoff? cutoff : (a - para_threads / 2 + 1) *slice);
+      pars[a].from = merge_words + a * slice;
+      pars[a].upto = merge_words + ((a+1)*slice > cutoff? cutoff : (a+1) * slice);
     }
     printf("From: %ld, Upto: %ld\n",  pars[a].from, pars[a].upto);
 		pthread_create(&pt[a], NULL, _get_neighbours, (void *) &pars[a]);
@@ -985,8 +987,9 @@
       if(filtered)
         continue;
     }
-/*
-    if(merge_words > 0) {
+
+
+    if(0 && merge_words > 0) {
         if(c >= merge_words) {
             if(l1_words > N / 2)
                 continue;
@@ -999,9 +1002,9 @@
                 l2_words++;
         }
     }
-*/
-    printf("%s l1:%d l2:%d i:%d a:%ld\n", &vocab[c * max_w], l1_words, l2_words, i, a);
-    fflush(stdout);
+
+//    printf("%s l1:%d l2:%d i:%d a:%ld\n", &vocab[c * max_w], l1_words, l2_words, i, a);
+//    fflush(stdout);
     HV* hash = newHV();
     SV* word = newSVpvf(&vocab[c * max_w], 0);
 		chosen[i] = c;