w2v-server: display probability of collocator at window pos. @
diff --git a/w2v-server.pl b/w2v-server.pl
index c35095f..a83f37e 100644
--- a/w2v-server.pl
+++ b/w2v-server.pl
@@ -84,6 +84,7 @@
typedef struct {
long long *index;
float *dist;
+ float *norm;
long long *pos;
unsigned int length;
} knn;
@@ -208,7 +209,7 @@
long window_layer_size = size * window * 2;
long a, b, c, d, e, window_offset, target, max_target=0, maxmax_target;
float f, max_f, maxmax_f;
- float *target_sums, *bestf, worstbest;
+ float *target_sums, *bestf, *bestn, worstbest, wpos_sum;
long long *besti, *bestp;
if(cc == -1)
@@ -218,10 +219,14 @@
besti = malloc(N * sizeof(long long));
bestp = malloc(N * sizeof(long long));
bestf = malloc(N * sizeof(float));
+ bestn = malloc(N * sizeof(float));
+
for (b = 0; b < words; b++)
target_sums[b]=0;
- for (b = 0; b < N; b++)
- bestf[b]=-1;
+ for (b = 0; b < N; b++) {
+ bestn[b] = 1;
+ bestf[b] = -1;
+ }
worstbest = -1;
d = cc;
maxmax_f = -1;
@@ -230,8 +235,8 @@
besti[0]=d;
bestf[0]=1.0;
bestp[0]=0;
-
for (a = window * 2 + 1; a >=0; a--) {
+ wpos_sum = 0;
printf("window pos: %ld\n", a);
if (a != window) {
max_f = -1;
@@ -250,6 +255,7 @@
continue;
else
f = expTable[(int) ((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))];
+ wpos_sum += f;
if(f > max_f) {
max_f = f;
max_target = target;
@@ -278,9 +284,13 @@
maxmax_f = max_f;
maxmax_target = max_target;
}
+ for (b = 0; b < N; b++)
+ if(bestp[b] == window-a)
+ bestn[b] = bestf[b] / wpos_sum;
} else {
printf("\x1b[1m%s\x1b[0m ", &vocab[d*max_w]);
}
+
}
max_f = -1;
for (b = 0; b < words; b++) {
@@ -299,6 +309,7 @@
nbs = malloc(sizeof(knn));
nbs->index = besti;
nbs->dist = bestf;
+ nbs->norm = bestn;
nbs->pos = bestp;
nbs->length = N;
return(nbs);
@@ -417,7 +428,7 @@
SV *get_neighbours(char *st1, int N) {
HV *result = newHV();
- float bestd[MAX_NEIGHBOURS], vec[max_size];
+ float bestd[MAX_NEIGHBOURS], bestn[MAX_NEIGHBOURS], vec[max_size];
long long besti[MAX_NEIGHBOURS], bestp[MAX_NEIGHBOURS], a, b, c, d, slice;
char *bestw[MAX_NEIGHBOURS];
knn *nbs[MAX_THREADS];
@@ -500,6 +511,7 @@
for(b=0; b < nbs[num_threads]->length; b++) {
besti[b] = nbs[num_threads]->index[b];
bestd[b] = nbs[num_threads]->dist[b];
+ bestn[b] = nbs[num_threads]->norm[b];
bestp[b] = nbs[num_threads]->pos[b];
}
array = newAV();
@@ -510,6 +522,7 @@
if(latin_enc == 0) SvUTF8_on(word);
hv_store(hash, "word", strlen("word"), word , 0);
hv_store(hash, "dist", strlen("dist"), newSVnv(bestd[a]), 0);
+ hv_store(hash, "norm", strlen("norm"), newSVnv(bestn[a]), 0);
hv_store(hash, "pos", strlen("pos"), newSVnv(bestp[a]), 0);
av_push(array, newRV_noinc((SV*)hash));
}
@@ -802,7 +815,7 @@
<div id="wrapper">
<table id="first">
<tr>
- <th align="right">#</th><th align="right">cos</th><th align="left">paradigmatic</th><th title="Position in winodw around target word. Absolute value can be too low because of sub-sampling frequent words.">@</th><th align="right" title=""Responsivenes" of the collocator at the relative position @. Approximation of the probability that the combination of the target word and the collocator at the relative position @ come from the corpus.">resp.</th><th align="left">syntagmatic</th>
+ <th align="right">#</th><th align="right">cos</th><th align="left">paradigmatic</th><th title="Position in winodw around target word. Absolute value can be too low because of sub-sampling frequent words.">@</th><th align="right" title=""Responsivenes" of the collocator at the relative position @. Approximation of the probability that the combination of the target word and the collocator at the relative position @ come from the corpus.">resp.</th><th title="Probability of the collocator at window location @."align="right">p(c<sub><small>@</small></sub>)</th><th align="left">syntagmatic</th>
</tr>
% my $j=0; my @words; my @vecs; my @ranks; for my $list (@$lists) {
% my $i=0; for my $item (@$list) {
@@ -830,6 +843,9 @@
<td align="right">
<%= sprintf("%.3f", $c->{dist}) %>
</td>
+ <td align="right">
+ <%= sprintf("%.3e", $c->{norm}) %>
+ </td>
<td align="left">
<a href="/?word=<%= $c->{word} %>">
<%= $c->{word} %>