w2v-server: improve collocator display
diff --git a/w2v-server.pl b/w2v-server.pl
index fa8e652..c35095f 100644
--- a/w2v-server.pl
+++ b/w2v-server.pl
@@ -203,14 +203,17 @@
return 0;
}
-knn *getCollocators(int cc) {
+knn *getCollocators(int cc, int N) {
knn *nbs = NULL;
long window_layer_size = size * window * 2;
long a, b, c, d, e, window_offset, target, max_target=0, maxmax_target;
float f, max_f, maxmax_f;
float *target_sums, *bestf, worstbest;
long long *besti, *bestp;
- int N = 10;
+
+ if(cc == -1)
+ return NULL;
+
a = posix_memalign((void **) &target_sums, 128, words * sizeof(float));
besti = malloc(N * sizeof(long long));
bestp = malloc(N * sizeof(long long));
@@ -224,8 +227,10 @@
maxmax_f = -1;
maxmax_target = 0;
- if(cc == -1)
- return NULL;
+ besti[0]=d;
+ bestf[0]=1.0;
+ bestp[0]=0;
+
for (a = window * 2 + 1; a >=0; a--) {
printf("window pos: %ld\n", a);
if (a != window) {
@@ -287,7 +292,7 @@
printf(" -- max sum: %s (%.2f), max resp.: \x1b[1m%s\x1b[0m (%.2f)\n",
&vocab[max_target * max_w], max_f,
&vocab[maxmax_target * max_w], maxmax_f);
- for(b=0; b<N && bestf[b]>0.2; b++)
+ for(b=0; b<N; b++)
printf("%-32s %.2f %d\n", &vocab[besti[b]*max_w], bestf[b], bestp[b]);
printf("\n");
free(target_sums);
@@ -295,7 +300,7 @@
nbs->index = besti;
nbs->dist = bestf;
nbs->pos = bestp;
- nbs->length = b;
+ nbs->length = N;
return(nbs);
}
@@ -358,7 +363,7 @@
c = 0;
if(from < 0) {
- nbs = getCollocators(b);
+ nbs = getCollocators(b, pars->N);
pthread_exit(nbs);
}
if (b == -1) {
@@ -492,7 +497,7 @@
}
hv_store(result, "paradigmatic", strlen("paradigmatic"), newRV_noinc((SV*)array), 0);
- for(b=0; b < 10; b++) {
+ for(b=0; b < nbs[num_threads]->length; b++) {
besti[b] = nbs[num_threads]->index[b];
bestd[b] = nbs[num_threads]->dist[b];
bestp[b] = nbs[num_threads]->pos[b];
@@ -793,21 +798,15 @@
<span> </span><input type="submit" value="Show">
</form>
<br>
- % if($collocators) {
- <div id="collocators">
- % for my $item (@$collocators) {
- <i><%= $item->{word} %></i> (<%= $item->{pos} %>: <%= sprintf("%.2f", $item->{dist}) %>)
- % }
- </div>
- % }
% if($lists) {
<div id="wrapper">
<table id="first">
<tr>
- <th align="right">Pos.</th><th align="left">Word</th><th align="right">Cosine dist.</th><th>Freq. rank</th>
+ <th align="right">#</th><th align="right">cos</th><th align="left">paradigmatic</th><th title="Position in winodw around target word. Absolute value can be too low because of sub-sampling frequent words.">@</th><th align="right" title=""Responsivenes" of the collocator at the relative position @. Approximation of the probability that the combination of the target word and the collocator at the relative position @ come from the corpus.">resp.</th><th align="left">syntagmatic</th>
</tr>
% my $j=0; my @words; my @vecs; my @ranks; for my $list (@$lists) {
- % my $i=1; for my $item (@$list) {
+ % my $i=0; for my $item (@$list) {
+ % my $c = (@$collocators)[$i];
% if(!grep{$_ eq $item->{word}} @words) {
% push @vecs, $item->{vector};
% push @words, $item->{word};
@@ -815,18 +814,25 @@
% }
<tr>
<td align="right">
- <%= $i++ %>.
- </td>
- <td>
- <a href="/?word=<%= $item->{word} %>">
- <%= $item->{word} %>
- </a>
+ <%= ++$i %>.
</td>
<td align="right">
<%= sprintf("%.3f", $item->{dist}) %>
</td>
+ <td>
+ <a title="freq. rank: <%= $item->{rank} %>" href="/?word=<%= $item->{word} %>">
+ <%= $item->{word} %>
+ </a>
+ </td>
<td align="right">
- <%= $item->{rank} %>
+ <%= $c->{pos} %>:
+ </td>
+ <td align="right">
+ <%= sprintf("%.3f", $c->{dist}) %>
+ </td>
+ <td align="left">
+ <a href="/?word=<%= $c->{word} %>">
+ <%= $c->{word} %>
</td>
</tr>
% }