w2v-server: simplify neighbour aggregation
diff --git a/w2v-server.pl b/w2v-server.pl
index 8085874..9e508bd 100755
--- a/w2v-server.pl
+++ b/w2v-server.pl
@@ -712,13 +712,12 @@
best[b].activation = para_nbs[0]->best[b].activation;
}
+ long long size=N;
for(a=1; a < para_threads; a++) {
for(b=0; b < para_nbs[a]->length && para_nbs[a]->best[b].wordi >= 0; b++) {
for(c=0; c < N * para_threads; c++) {
if(para_nbs[a]->best[b].activation > best[c].activation) {
- for(d=N-1; d>c; d--) {
- memmove(best + d, best + d - 1, sizeof(collocator));
- }
+ memmove(best + c + 1, best + c, (size++-c-1) * sizeof(collocator));
memcpy(best + c, ¶_nbs[a]->best[b], sizeof(collocator));
break;
}
@@ -726,18 +725,21 @@
}
}
- char *chosen[600];
+ long long chosen[MAX_NEIGHBOURS];
+ printf("N: %ld\n", N);
+
AV* array = newAV();
int i, j;
int l1_words=0, l2_words=0;
- for (a = 0, i = 0; i < N && a < 600; a++) {
+
+ for (a = 0, i = 0; i < N && a < N*para_threads; a++) {
int filtered=0;
long long c = best[a].wordi;
if (dedupe && i > 0) {
- for (j=0; j<i; j++)
- if (strcasestr(&vocab[c * max_w], chosen[j]) ||
- strcasestr(chosen[j], &vocab[c * max_w])) {
- printf("filtering %s %s\n", chosen[j], &vocab[c * max_w]);
+ for (j=0; j<i && !filtered; j++)
+ if (strcasestr(&vocab[c * max_w], &vocab[chosen[j] * max_w]) ||
+ strcasestr(&vocab[chosen[j] * max_w], &vocab[c * max_w])) {
+ printf("filtering %s %s\n", &vocab[chosen[j] * max_w], &vocab[c * max_w]);
filtered = 1;
}
if(filtered)
@@ -756,12 +758,12 @@
l2_words++;
}
}
- fflush(stdout);
printf("%s l1:%d l2:%d i:%d a:%ld\n", &vocab[c * max_w], l1_words, l2_words, i, a);
+ fflush(stdout);
HV* hash = newHV();
SV* word = newSVpvf(&vocab[c * max_w], 0);
- chosen[i] = &vocab[c * max_w];
+ chosen[i] = c;
if(latin_enc == 0) SvUTF8_on(word);
fflush(stdout);
hv_store(hash, "word", strlen("word"), word , 0);