w2v-server: show mean probability of collocators
diff --git a/w2v-server.pl b/w2v-server.pl
index dc115a3..e177932 100644
--- a/w2v-server.pl
+++ b/w2v-server.pl
@@ -102,6 +102,7 @@
int N;
long from;
unsigned long upto;
+ float *target_sums;
} knnpars;
float *M, *M2, *syn1neg_window, *expTable;
@@ -261,7 +262,7 @@
f = expTable[(int) ((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))];
wpos_sum += f;
- target_sums[target] += (1-target_sums[target]) * f;
+ target_sums[target] += f;
if(f > worstbest) {
for (b = 0; b < N/2; b++) {
if (f > bestf[b]) {
@@ -292,17 +293,12 @@
}
}
- max_f = -1;
- for (b = 0; b < words; b++) {
- if(target_sums[b] > max_f) {
- max_f = target_sums[b];
- max_target = b;
- }
- }
+ for (b = 0; b < words; b++)
+ pars->target_sums[b] += (target_sums[b] / wpos_sum ) / (window * 2);
+ free(target_sums);
for(b=0; b<N && besti[b] >= 0; b++) // THIS LOOP IS NEEDED (b...)
- printf("%-32s %.2f %d\n", &vocab[besti[b]*max_w], bestf[b], bestp[b]);
+ printf("%s %.2f %d * ", &vocab[besti[b]*max_w], bestf[b], bestp[b]);
printf("\n");
- free(target_sums);
nbs = malloc(sizeof(knn));
nbs->index = besti;
nbs->dist = bestf;
@@ -419,7 +415,7 @@
SV *get_neighbours(char *st1, int N) {
HV *result = newHV();
- float bestd[MAX_NEIGHBOURS], bestn[MAX_NEIGHBOURS], vec[max_size];
+ float *target_sums, bestd[MAX_NEIGHBOURS], bestn[MAX_NEIGHBOURS], bests[MAX_NEIGHBOURS], vec[max_size];
long besti[MAX_NEIGHBOURS], bestp[MAX_NEIGHBOURS], a, b, c, d, slice;
knn *para_nbs[MAX_THREADS];
knn *syn_nbs[MAX_THREADS];
@@ -438,6 +434,10 @@
if(wl->length < 1)
goto end;
+ a = posix_memalign((void **) &target_sums, 128, words * sizeof(float));
+ for(a = 0; a < words; a++)
+ target_sums[a] = 0;
+
for(a=0; a < para_threads; a++) {
pars[a].token = st1;
pars[a].wl = wl;
@@ -447,6 +447,7 @@
pthread_create(&pt[a], NULL, _get_neighbours, (void *) &pars[a]);
}
for(a=0; a < syn_threads; a++) {
+ pars[a + para_threads].target_sums = target_sums;
pars[a + para_threads].wl = wl;
pars[a + para_threads].N = N;
pars[a + para_threads].from = a;
@@ -549,6 +550,7 @@
hv_store(hash, "word", strlen("word"), word , 0);
hv_store(hash, "dist", strlen("dist"), newSVnv(bestd[a]), 0);
hv_store(hash, "norm", strlen("norm"), newSVnv(bestn[a]), 0);
+ hv_store(hash, "sum", strlen("sum"), newSVnv(target_sums[besti[a]]), 0);
hv_store(hash, "pos", strlen("pos"), newSVnv(bestp[a]), 0);
av_push(array, newRV_noinc((SV*)hash));
}
@@ -840,7 +842,7 @@
<div id="wrapper">
<table id="first">
<tr>
- <th align="right">#</th><th align="right">cos</th><th align="left">paradigmatic</th><th title="Position in winodw around target word. Absolute value can be too low because of sub-sampling frequent words.">@</th><th align="right" title=""Responsivenes" of the collocator at the relative position @. Approximation of the probability that the combination of the target word and the collocator at the relative position @ come from the corpus.">resp.</th><th title="Probability of the collocator at window location @."align="right">p(c<sub><small>@</small></sub>)</th><th align="left">syntagmatic</th>
+ <th align="right">#</th><th align="right">cos</th><th align="left">paradigmatic</th><th title="Position in winodw around target word. Absolute value can be too low because of sub-sampling frequent words.">@</th><th align="right" title=""Responsivenes" of the collocator at the relative position @. Approximation of the probability that the combination of the target word and the collocator at the relative position @ come from the corpus.">resp.</th><th title="Probability of the collocator at window location @."align="right">p(c<sub><small>@</small></sub>)</th><th align="right">Σp(c<sub><small>@</small></sub>)/|w|</th><th align="left">syntagmatic</th>
</tr>
% my $j=0; my @words; my @vecs; my @ranks; for my $list (@$lists) {
% my $i=0; while(1) {
@@ -878,12 +880,15 @@
<td align="right">
<%= sprintf("%.3e", $c->{norm}) %>
</td>
+ <td align="right">
+ <%= sprintf("%.3e", $c->{sum}) %>
+ </td>
<td align="left">
<a href="/?word=<%= $c->{word} %>">
<%= $c->{word} %>
</td>
% } else {
- <td colspan="4"/>
+ <td colspan="5"/>
% }
</tr>
% }