w2v-server.pl: add option to sort collocators by mean probability
diff --git a/w2v-server.pl b/w2v-server.pl
index e177932..e7ad7f5 100644
--- a/w2v-server.pl
+++ b/w2v-server.pl
@@ -34,6 +34,7 @@
my $perplexity=$c->param('perplexity') || 20;
my $epsilon=$c->param('epsilon') || 5;
my $som=$c->param('som') || 0;
+ my $sort=$c->param('sort') || 0;
my $res;
my @lists;
my @collocations;
@@ -43,15 +44,15 @@
for my $w (split(' *\| *', $word)) {
$c->app->log->debug('Looking for neighbours of '.$w);
if($opt_i) {
- $res = get_neighbours(encode("iso-8859-1", $w), $no_nbs);
+ $res = get_neighbours(encode("iso-8859-1", $w), $no_nbs, $sort);
} else {
- $res = get_neighbours($w, $no_nbs);
+ $res = get_neighbours($w, $no_nbs, $sort);
}
push(@lists, $res->{paradigmatic});
}
}
$word =~ s/ *\| */ | /g;
- $c->render(template=>"index", word=>$word, no_nbs=>$no_nbs, no_iterations => $no_iterations, epsilon=> $epsilon, perplexity=> $perplexity, show_som=>$som, lists=> \@lists, collocators=> $res->{syntagmatic});
+ $c->render(template=>"index", word=>$word, no_nbs=>$no_nbs, no_iterations => $no_iterations, epsilon=> $epsilon, perplexity=> $perplexity, show_som=>$som, sort=>$sort, lists=> \@lists, collocators=> $res->{syntagmatic});
};
$daemon->run; # app->start;
@@ -264,7 +265,7 @@
target_sums[target] += f;
if(f > worstbest) {
- for (b = 0; b < N/2; b++) {
+ for (b = 0; b < N; b++) {
if (f > bestf[b]) {
memmove(bestf + b + 1, bestf + b, (N - b -1) * sizeof(float));
memmove(besti + b + 1, besti + b, (N - b -1) * sizeof(long long));
@@ -275,8 +276,8 @@
break;
}
}
- if(b == N/2 - 1)
- worstbest = bestf[N/2-1];
+ if(b == N - 1)
+ worstbest = bestf[N-1];
}
}
printf("%d %.2f\n", max_target, max_f);
@@ -413,7 +414,7 @@
}
-SV *get_neighbours(char *st1, int N) {
+SV *get_neighbours(char *st1, int N, int sort_by) {
HV *result = newHV();
float *target_sums, bestd[MAX_NEIGHBOURS], bestn[MAX_NEIGHBOURS], bests[MAX_NEIGHBOURS], vec[max_size];
long besti[MAX_NEIGHBOURS], bestp[MAX_NEIGHBOURS], a, b, c, d, slice;
@@ -506,6 +507,7 @@
bestd[b] = 0;
bestn[b] = 0;
bestp[b] = 0;
+ bests[b] = 0;
}
printf("Waiting for syn threads to join\n");
@@ -520,24 +522,48 @@
bestd[b] = syn_nbs[0]->dist[b];
bestn[b] = syn_nbs[0]->norm[b];
bestp[b] = syn_nbs[0]->pos[b];
+ bests[b] = target_sums[syn_nbs[0]->index[b]];
}
-
- for(a=1; a < syn_threads; a++) {
- for(b=0; b < syn_nbs[a]->length; b++) {
- for(c=0; c < MAX_NEIGHBOURS; c++) {
- if(syn_nbs[a]->dist[b] > bestd[c]) {
- for(d=N-1; d>c; d--) {
- bestd[d] = bestd[d-1];
- besti[d] = besti[d-1];
- bestn[d] = bestn[d-1];
- bestp[d] = bestp[d-1];
+ if(sort_by != 1) { // sort by responsiveness
+ for(a=1; a < syn_threads; a++) {
+ for(b=0; b < syn_nbs[a]->length; b++) {
+ for(c=0; c < MAX_NEIGHBOURS; c++) {
+ if(syn_nbs[a]->dist[b] > bestd[c]) {
+ for(d=MAX_NEIGHBOURS-1; d>c; d--) {
+ bestd[d] = bestd[d-1];
+ besti[d] = besti[d-1];
+ bestn[d] = bestn[d-1];
+ bestp[d] = bestp[d-1];
+ }
+ besti[c] = syn_nbs[a]->index[b];
+ bestd[c] = syn_nbs[a]->dist[b];
+ bestn[c] = syn_nbs[a]->norm[b];
+ bestp[c] = syn_nbs[a]->pos[b];
+ break;
}
- besti[c] = syn_nbs[a]->index[b];
- bestd[c] = syn_nbs[a]->dist[b];
- bestn[c] = syn_nbs[a]->norm[b];
- bestp[c] = syn_nbs[a]->pos[b];
- break;
+ }
+ }
+ }
+ } else { // sort by mean p
+ for(a=1; a < syn_threads; a++) {
+ for(b=0; b < syn_nbs[a]->length; b++) {
+ for(c=0; c < MAX_NEIGHBOURS; c++) {
+ if(target_sums[syn_nbs[a]->index[b]] > bests[c]) {
+ for(d=MAX_NEIGHBOURS-1; d>c; d--) {
+ bestd[d] = bestd[d-1];
+ besti[d] = besti[d-1];
+ bestn[d] = bestn[d-1];
+ bestp[d] = bestp[d-1];
+ bests[d] = bests[d-1];
+ }
+ besti[c] = syn_nbs[a]->index[b];
+ bestd[c] = syn_nbs[a]->dist[b];
+ bestn[c] = syn_nbs[a]->norm[b];
+ bestp[c] = syn_nbs[a]->pos[b];
+ bests[c] = target_sums[syn_nbs[a]->index[b]];
+ break;
+ }
}
}
}
@@ -835,6 +861,11 @@
max. neighbours: <input type="text" size="8" name="n" value="<%= $no_nbs %>">
max. iterations: <input type="text" name="N" size="8" value="<%= $no_iterations %>">
SOM <input type="checkbox" name="som" value="1" <%= ($show_som ? "checked" : "") %>>
+ <span> </span>sort collocators by
+ <select name="sort">
+ <option value="0" <%= ($sort!=1? "selected":"") %>>responsiveness</option>
+ <option value="1" <%= ($sort==1? "selected":"") %>>mean p</option>
+ </select>
<span> </span><input type="submit" value="Show">
</form>
<br>