w2v-server: fix memory alignment and use qsort
diff --git a/w2v-server.pl b/w2v-server.pl
index 9e508bd..605db6c 100755
--- a/w2v-server.pl
+++ b/w2v-server.pl
@@ -214,6 +214,7 @@
 	int N;
 	long from;
 	unsigned long upto;
+  collocator *best;
   float *target_sums;
   float *window_sums;
 } knnpars;
@@ -573,7 +574,7 @@
 	knn *nbs = NULL;
   wordlist *wl = pars->wl;
   
-	collocator *best = malloc(N * sizeof(collocator));
+	collocator *best = pars->best;
 
 	float worstbest=-1;
 
@@ -622,9 +623,6 @@
 		}
 	}
 
-	nbs = malloc(sizeof(knn));
-	nbs->best = best;
-	nbs->length = N;
 end:
 	pthread_exit(nbs);
 }
@@ -655,7 +653,8 @@
   int syn_threads = (M2? window * 2 : 0);
   int para_threads = num_threads - syn_threads;
 
-  collocator *best = malloc(10*MAX_NEIGHBOURS * sizeof(collocator));
+  collocator *best;
+  posix_memalign((void **) &best, 128, 10 * N * sizeof(collocator));
 
   if(N>MAX_NEIGHBOURS) N=MAX_NEIGHBOURS;
 	
@@ -682,6 +681,7 @@
 		pars[a].token = st1;
 		pars[a].wl = wl;
 		pars[a].N = N;
+    pars[a].best = &best[N*a];
 		pars[a].from = a*slice;
 		pars[a].upto = ((a+1)*slice > cutoff? cutoff:(a+1)*slice);
 		pthread_create(&pt[a], NULL, _get_neighbours, (void *) &pars[a]);
@@ -707,23 +707,8 @@
 	/* if(!syn_nbs[0]) */
 	/* 	goto end; */
 
-	for(b=0; b < N; b++) {
-		best[b].wordi = para_nbs[0]->best[b].wordi;
-		best[b].activation = para_nbs[0]->best[b].activation;
-	}
+  qsort(best, N*para_threads, sizeof(collocator), cmp_activation);
 
-  long long size=N;
-	for(a=1; a < para_threads; a++) {
-		for(b=0; b < para_nbs[a]->length && para_nbs[a]->best[b].wordi >= 0; b++) {
-			for(c=0; c < N * para_threads; c++) {
-				if(para_nbs[a]->best[b].activation > best[c].activation) {
-          memmove(best + c + 1, best + c, (size++-c-1) * sizeof(collocator));
-				  memcpy(best + c, &para_nbs[a]->best[b], sizeof(collocator));
-					break;
-				}
-			}
-		}
-	}
 
   long long chosen[MAX_NEIGHBOURS];
   printf("N: %ld\n", N);