w2v-server: show all collected collocators
diff --git a/w2v-server.pl b/w2v-server.pl
index ec33052..5a24fdb 100644
--- a/w2v-server.pl
+++ b/w2v-server.pl
@@ -420,8 +420,7 @@
SV *get_neighbours(char *st1, int N) {
HV *result = newHV();
float bestd[MAX_NEIGHBOURS], bestn[MAX_NEIGHBOURS], vec[max_size];
- long long besti[MAX_NEIGHBOURS], bestp[MAX_NEIGHBOURS], a, b, c, d, slice;
- char *bestw[MAX_NEIGHBOURS];
+ long besti[MAX_NEIGHBOURS], bestp[MAX_NEIGHBOURS], a, b, c, d, slice;
knn *para_nbs[MAX_THREADS];
knn *syn_nbs[MAX_THREADS];
knnpars pars[MAX_THREADS];
@@ -486,12 +485,8 @@
AV* array = newAV();
for (a = 0; a < N; a++) {
- bestw[a] = (char *)malloc(max_size * sizeof(char));
- }
- for (a = 0; a < N; a++) {
- strcpy(bestw[a], &vocab[besti[a] * max_w]);
HV* hash = newHV();
- SV* word = newSVpvf(bestw[a], 0);
+ SV* word = newSVpvf(&vocab[besti[a] * max_w], 0);
if(latin_enc == 0) SvUTF8_on(word);
hv_store(hash, "word", strlen("word"), word , 0);
hv_store(hash, "dist", strlen("dist"), newSVnv(bestd[a]), 0);
@@ -505,13 +500,21 @@
}
hv_store(result, "paradigmatic", strlen("paradigmatic"), newRV_noinc((SV*)array), 0);
+ for(b=0; b < MAX_NEIGHBOURS; b++) {
+ besti[b] = -1L;
+ bestd[b] = 0;
+ bestn[b] = 0;
+ bestp[b] = 0;
+ }
+
printf("Waiting for syn threads to join\n");
fflush(stdout);
for (a = 0; a < syn_threads; a++) pthread_join(pt[a+para_threads], &syn_nbs[a]);
printf("syn threads joint\n");
fflush(stdout);
- for(b=0; b < N; b++) {
+
+ for(b=0; b < syn_nbs[0]->length; b++) {
besti[b] = syn_nbs[0]->index[b];
bestd[b] = syn_nbs[0]->dist[b];
bestn[b] = syn_nbs[0]->norm[b];
@@ -520,8 +523,8 @@
for(a=1; a < syn_threads; a++) {
- for(b=0; b < N; b++) {
- for(c=0; c < N; c++) {
+ for(b=0; b < syn_nbs[a]->length; b++) {
+ for(c=0; c < MAX_NEIGHBOURS; c++) {
if(syn_nbs[a]->dist[b] > bestd[c]) {
for(d=N-1; d>c; d--) {
bestd[d] = bestd[d-1];
@@ -539,10 +542,9 @@
}
}
array = newAV();
- for (a = 0; a < N && besti[a] >= 0; a++) {
- strcpy(bestw[a], &vocab[besti[a] * max_w]);
+ for (a = 0; a < MAX_NEIGHBOURS && besti[a] >= 0; a++) {
HV* hash = newHV();
- SV* word = newSVpvf(bestw[a], 0);
+ SV* word = newSVpvf(&vocab[besti[a] * max_w], 0);
if(latin_enc == 0) SvUTF8_on(word);
hv_store(hash, "word", strlen("word"), word , 0);
hv_store(hash, "dist", strlen("dist"), newSVnv(bestd[a]), 0);
@@ -841,17 +843,20 @@
<th align="right">#</th><th align="right">cos</th><th align="left">paradigmatic</th><th title="Position in winodw around target word. Absolute value can be too low because of sub-sampling frequent words.">@</th><th align="right" title=""Responsivenes" of the collocator at the relative position @. Approximation of the probability that the combination of the target word and the collocator at the relative position @ come from the corpus.">resp.</th><th title="Probability of the collocator at window location @."align="right">p(c<sub><small>@</small></sub>)</th><th align="left">syntagmatic</th>
</tr>
% my $j=0; my @words; my @vecs; my @ranks; for my $list (@$lists) {
- % my $i=0; for my $item (@$list) {
- % my $c = (@$collocators)[$i];
- % if(!grep{$_ eq $item->{word}} @words) {
- % push @vecs, $item->{vector};
- % push @words, $item->{word};
- % push @ranks, $item->{rank};
- % }
+ % my $i=0; while(1) {
+ % my $item = (@$list)[$i];
+ % my $c = (@$collocators)[$i];
+ % last if(!$c && !$item);
<tr>
<td align="right">
<%= ++$i %>.
</td>
+ % if($item) {
+ % if(!grep{$_ eq $item->{word}} @words) {
+ % push @vecs, $item->{vector};
+ % push @words, $item->{word};
+ % push @ranks, $item->{rank};
+ % }
<td align="right">
<%= sprintf("%.3f", $item->{dist}) %>
</td>
@@ -860,6 +865,9 @@
<%= $item->{word} %>
</a>
</td>
+ % } else {
+ <td colspan="2"/>
+ % }
% if($c) {
<td align="right">
<%= $c->{pos} %>: