w2v-server: parallel getting of collocators (hacky)
diff --git a/w2v-server.pl b/w2v-server.pl
index 5664290..ec33052 100644
--- a/w2v-server.pl
+++ b/w2v-server.pl
@@ -77,6 +77,7 @@
#define MAX_CC 50
#define EXP_TABLE_SIZE 1000
#define MAX_EXP 6
+#define MIN_RESP 0.50
//the thread function
void *connection_handler(void *);
@@ -88,8 +89,7 @@
long long *pos;
unsigned int length;
} knn;
-
-
+
typedef struct {
long long wordi[MAX_NEIGHBOURS];
char sep[MAX_NEIGHBOURS];
@@ -204,7 +204,9 @@
return 0;
}
-knn *getCollocators(int cc, int N) {
+void *getCollocators(knnpars *pars) {
+ int N = pars->N;
+ int cc = pars->wl->wordi[0];
knn *nbs = NULL;
long window_layer_size = size * window * 2;
long a, b, c, d, e, window_offset, target, max_target=0, maxmax_target;
@@ -221,21 +223,23 @@
bestf = malloc(N * sizeof(float));
bestn = malloc(N * sizeof(float));
+ worstbest = MIN_RESP;
+
for (b = 0; b < words; b++)
target_sums[b]=0;
for (b = 0; b < N; b++) {
+ besti[b] = -1;
bestn[b] = 1;
- bestf[b] = -1;
+ bestf[b] = worstbest;
}
- worstbest = -1;
+
d = cc;
maxmax_f = -1;
maxmax_target = 0;
- besti[0]=d;
- bestf[0]=1.0;
- bestp[0]=0;
- for (a = window * 2 + 1; a >=0; a--) {
+ for (a = pars->from; a < pars->upto; a++) {
+ if(a >= window)
+ a++;
wpos_sum = 0;
printf("window pos: %ld\n", a);
if (a != window) {
@@ -243,7 +247,7 @@
window_offset = a * size;
if (a > window)
window_offset -= size;
- for(target = 0; target < words / 2; target ++) {
+ for(target = 0; target < words; target ++) {
if(target == d)
continue;
f = 0;
@@ -256,13 +260,10 @@
else
f = expTable[(int) ((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))];
wpos_sum += f;
- if(f > max_f) {
- max_f = f;
- max_target = target;
- }
+
target_sums[target] += (1-target_sums[target]) * f;
if(f > worstbest) {
- for (b = 0; b < N; b++) {
+ for (b = 0; b < N/2; b++) {
if (f > bestf[b]) {
memmove(bestf + b + 1, bestf + b, (N - b -1) * sizeof(float));
memmove(besti + b + 1, besti + b, (N - b -1) * sizeof(long long));
@@ -273,7 +274,8 @@
break;
}
}
- worstbest = bestf[N-1];
+ if(b == N/2 - 1)
+ worstbest = bestf[N/2-1];
}
}
printf("%d %.2f\n", max_target, max_f);
@@ -297,10 +299,7 @@
max_target = b;
}
}
- printf(" -- max sum: %s (%.2f), max resp.: \x1b[1m%s\x1b[0m (%.2f)\n",
- &vocab[max_target * max_w], max_f,
- &vocab[maxmax_target * max_w], maxmax_f);
- for(b=0; b<N; b++)
+ for(b=0; b<N && besti[b] >= 0; b++) // THIS LOOP IS NEEDED (b...)
printf("%-32s %.2f %d\n", &vocab[besti[b]*max_w], bestf[b], bestp[b]);
printf("\n");
free(target_sums);
@@ -309,8 +308,8 @@
nbs->dist = bestf;
nbs->norm = bestn;
nbs->pos = bestp;
- nbs->length = N;
- return(nbs);
+ nbs->length = b-1;
+ pthread_exit(nbs);
}
wordlist *getTargetWords(char *st1) {
@@ -371,10 +370,6 @@
b = bi[0];
c = 0;
- if(from < 0) {
- nbs = getCollocators(b, pars->N);
- pthread_exit(nbs);
- }
if (b == -1) {
N = 0;
goto end;
@@ -427,25 +422,24 @@
float bestd[MAX_NEIGHBOURS], bestn[MAX_NEIGHBOURS], vec[max_size];
long long besti[MAX_NEIGHBOURS], bestp[MAX_NEIGHBOURS], a, b, c, d, slice;
char *bestw[MAX_NEIGHBOURS];
- knn *nbs[MAX_THREADS];
+ knn *para_nbs[MAX_THREADS];
+ knn *syn_nbs[MAX_THREADS];
knnpars pars[MAX_THREADS];
pthread_t *pt = (pthread_t *)malloc((num_threads+1) * sizeof(pthread_t));
wordlist *wl;
+ int para_threads = num_threads - window * 2;
+ int syn_threads = window * 2;
+ num_threads = para_threads+syn_threads;
if(N>MAX_NEIGHBOURS) N=MAX_NEIGHBOURS;
- slice = words / num_threads;
+ slice = words / syn_threads;
wl = getTargetWords(st1);
+ if(wl->length < 1)
+ goto end;
- a = num_threads;
- pars[a].token = st1;
- pars[a].wl = wl;
- pars[a].N = N;
- pars[a].from = -1;
- pthread_create(&pt[a], NULL, _get_neighbours, (void *) &pars[a]);
-
- for(a=0; a < num_threads; a++) {
+ for(a=0; a < para_threads; a++) {
pars[a].token = st1;
pars[a].wl = wl;
pars[a].N = N;
@@ -453,77 +447,110 @@
pars[a].upto = ((a+1)*slice > words? words:(a+1)*slice);
pthread_create(&pt[a], NULL, _get_neighbours, (void *) &pars[a]);
}
- for (a = 0; a < num_threads; a++) pthread_join(pt[a], &nbs[a]);
+ for(a=0; a < syn_threads; a++) {
+ pars[a + para_threads].wl = wl;
+ pars[a + para_threads].N = N;
+ pars[a + para_threads].from = a;
+ pars[a + para_threads].upto = a+1;
+ pthread_create(&pt[a + para_threads], NULL, getCollocators, (void *) &pars[a + para_threads]);
+ }
+ printf("Waiting for para threads to join\n");
+ fflush(stdout);
+ for (a = 0; a < para_threads; a++) pthread_join(pt[a], ¶_nbs[a]);
+ printf("Para threads joint\n");
+ fflush(stdout);
- pthread_join(pt[a], &nbs[a]);
-
- if(!nbs[0])
+ if(!syn_nbs[0])
goto end;
for(b=0; b < N; b++) {
- besti[b] = nbs[0]->index[b];
- bestd[b] = nbs[0]->dist[b];
+ besti[b] = para_nbs[0]->index[b];
+ bestd[b] = para_nbs[0]->dist[b];
}
- for(a=1; a < num_threads; a++) {
- for(b=0; b < N; b++) {
+ for(a=1; a < para_threads; a++) {
+ for(b=0; b < para_nbs[a]->length && para_nbs[a]->index[b] >= 0; b++) {
for(c=0; c < N; c++) {
- if(nbs[a]->dist[b] > bestd[c]) {
+ if(para_nbs[a]->dist[b] > bestd[c]) {
for(d=N-1; d>c; d--) {
bestd[d] = bestd[d-1];
besti[d] = besti[d-1];
}
- besti[c] = nbs[a]->index[b];
- bestd[c] = nbs[a]->dist[b];
+ besti[c] = para_nbs[a]->index[b];
+ bestd[c] = para_nbs[a]->dist[b];
break;
}
}
}
}
-
- if(nbs) {
- AV* array = newAV();
- for (a = 0; a < N; a++) {
- bestw[a] = (char *)malloc(max_size * sizeof(char));
- }
- for (a = 0; a < N; a++) {
- strcpy(bestw[a], &vocab[besti[a] * max_w]);
- HV* hash = newHV();
- SV* word = newSVpvf(bestw[a], 0);
- if(latin_enc == 0) SvUTF8_on(word);
- hv_store(hash, "word", strlen("word"), word , 0);
- hv_store(hash, "dist", strlen("dist"), newSVnv(bestd[a]), 0);
- hv_store(hash, "rank", strlen("rank"), newSVuv(besti[a]), 0);
- AV *vector = newAV();
- for (b = 0; b < size; b++) {
- av_push(vector, newSVnv(M[b + besti[a] * size]));
- }
- hv_store(hash, "vector", strlen("vector"), newRV_noinc((SV*)vector), 0);
- av_push(array, newRV_noinc((SV*)hash));
- }
- hv_store(result, "paradigmatic", strlen("paradigmatic"), newRV_noinc((SV*)array), 0);
-
- for(b=0; b < nbs[num_threads]->length; b++) {
- besti[b] = nbs[num_threads]->index[b];
- bestd[b] = nbs[num_threads]->dist[b];
- bestn[b] = nbs[num_threads]->norm[b];
- bestp[b] = nbs[num_threads]->pos[b];
+ AV* array = newAV();
+ for (a = 0; a < N; a++) {
+ bestw[a] = (char *)malloc(max_size * sizeof(char));
+ }
+ for (a = 0; a < N; a++) {
+ strcpy(bestw[a], &vocab[besti[a] * max_w]);
+ HV* hash = newHV();
+ SV* word = newSVpvf(bestw[a], 0);
+ if(latin_enc == 0) SvUTF8_on(word);
+ hv_store(hash, "word", strlen("word"), word , 0);
+ hv_store(hash, "dist", strlen("dist"), newSVnv(bestd[a]), 0);
+ hv_store(hash, "rank", strlen("rank"), newSVuv(besti[a]), 0);
+ AV *vector = newAV();
+ for (b = 0; b < size; b++) {
+ av_push(vector, newSVnv(M[b + besti[a] * size]));
}
- array = newAV();
- for (a = 0; a < nbs[num_threads]->length; a++) {
- strcpy(bestw[a], &vocab[besti[a] * max_w]);
- HV* hash = newHV();
- SV* word = newSVpvf(bestw[a], 0);
- if(latin_enc == 0) SvUTF8_on(word);
- hv_store(hash, "word", strlen("word"), word , 0);
- hv_store(hash, "dist", strlen("dist"), newSVnv(bestd[a]), 0);
- hv_store(hash, "norm", strlen("norm"), newSVnv(bestn[a]), 0);
- hv_store(hash, "pos", strlen("pos"), newSVnv(bestp[a]), 0);
- av_push(array, newRV_noinc((SV*)hash));
- }
- hv_store(result, "syntagmatic", strlen("syntagmatic"), newRV_noinc((SV*)array), 0);
- }
+ hv_store(hash, "vector", strlen("vector"), newRV_noinc((SV*)vector), 0);
+ av_push(array, newRV_noinc((SV*)hash));
+ }
+ hv_store(result, "paradigmatic", strlen("paradigmatic"), newRV_noinc((SV*)array), 0);
+
+ printf("Waiting for syn threads to join\n");
+ fflush(stdout);
+ for (a = 0; a < syn_threads; a++) pthread_join(pt[a+para_threads], &syn_nbs[a]);
+ printf("syn threads joint\n");
+ fflush(stdout);
+
+ for(b=0; b < N; b++) {
+ besti[b] = syn_nbs[0]->index[b];
+ bestd[b] = syn_nbs[0]->dist[b];
+ bestn[b] = syn_nbs[0]->norm[b];
+ bestp[b] = syn_nbs[0]->pos[b];
+ }
+
+
+ for(a=1; a < syn_threads; a++) {
+ for(b=0; b < N; b++) {
+ for(c=0; c < N; c++) {
+ if(syn_nbs[a]->dist[b] > bestd[c]) {
+ for(d=N-1; d>c; d--) {
+ bestd[d] = bestd[d-1];
+ besti[d] = besti[d-1];
+ bestn[d] = bestn[d-1];
+ bestp[d] = bestp[d-1];
+ }
+ besti[c] = syn_nbs[a]->index[b];
+ bestd[c] = syn_nbs[a]->dist[b];
+ bestn[c] = syn_nbs[a]->norm[b];
+ bestp[c] = syn_nbs[a]->pos[b];
+ break;
+ }
+ }
+ }
+ }
+ array = newAV();
+ for (a = 0; a < N && besti[a] >= 0; a++) {
+ strcpy(bestw[a], &vocab[besti[a] * max_w]);
+ HV* hash = newHV();
+ SV* word = newSVpvf(bestw[a], 0);
+ if(latin_enc == 0) SvUTF8_on(word);
+ hv_store(hash, "word", strlen("word"), word , 0);
+ hv_store(hash, "dist", strlen("dist"), newSVnv(bestd[a]), 0);
+ hv_store(hash, "norm", strlen("norm"), newSVnv(bestn[a]), 0);
+ hv_store(hash, "pos", strlen("pos"), newSVnv(bestp[a]), 0);
+ av_push(array, newRV_noinc((SV*)hash));
+ }
+ hv_store(result, "syntagmatic", strlen("syntagmatic"), newRV_noinc((SV*)array), 0);
end:
return newRV_noinc((SV*)result);
}
@@ -833,6 +860,7 @@
<%= $item->{word} %>
</a>
</td>
+ % if($c) {
<td align="right">
<%= $c->{pos} %>:
</td>
@@ -846,6 +874,9 @@
<a href="/?word=<%= $c->{word} %>">
<%= $c->{word} %>
</td>
+ % } else {
+ <td colspan="4"/>
+ % }
</tr>
% }
% }