derekovecs: add tab for words with largest distances in reference space
diff --git a/w2v-server.pl b/w2v-server.pl
index 2f402ec..38dc8e8 100755
--- a/w2v-server.pl
+++ b/w2v-server.pl
@@ -186,6 +186,16 @@
$self->render(data => getClassicCollocatorsCached($self, $self->param("w") ? $self->param("w") : $self->req->json), format=>'json');
};
+any '/getBiggestVocabDistances' => sub {
+ my $self = shift;
+ $self->render(data => getBiggestMergedDifferences(), format=>'json');
+};
+
+any '*/getBiggestVocabDistances' => sub {
+ my $self = shift;
+ $self->render(data => getBiggestMergedDifferences(), format=>'json');
+};
+
any '*/getSimilarProfiles' => sub {
my $self = shift;
$self->render(data => getSimilarProfilesCached($self, $self->param("w") ? $self->param("w") : $self->req->json), format=>'json');
@@ -268,7 +278,11 @@
$csv_data .= "\n";
return $c->render(text=>$csv_data);
} else {
- $c->render(template=>"index", title=>$title, word=>$word, cutoff=>$cutoff, no_nbs=>$no_nbs, no_iterations => $no_iterations, epsilon=> $epsilon, perplexity=> $perplexity, show_som=>$som, searchBaseVocabFirst=>$searchBaseVocabFirst, sort=>$sort, training_args=>$training_args, mergedEnd=> $mergedEnd, haveSProfiles=> $have_sprofiles, dedupe=> $dedupe, marked=>\%marked, lists=> \@lists, collocators=> $res->{syntagmatic});
+ my $distantWords="";
+ if(!defined($word) || $word !~ /^\s*$/) {
+ $distantWords = getBiggestMergedDifferences();
+ }
+ $c->render(template=>"index", title=>$title, word=>$word, distantWords=>$distantWords, cutoff=>$cutoff, no_nbs=>$no_nbs, no_iterations => $no_iterations, epsilon=> $epsilon, perplexity=> $perplexity, show_som=>$som, searchBaseVocabFirst=>$searchBaseVocabFirst, sort=>$sort, training_args=>$training_args, mergedEnd=> $mergedEnd, haveSProfiles=> $have_sprofiles, dedupe=> $dedupe, marked=>\%marked, lists=> \@lists, collocators=> $res->{syntagmatic});
}
};
@@ -585,7 +599,7 @@
words += merge_words;
fclose(f);
printf("merged_end: %lld, words: %lld\n", merged_end, words);
- printBiggestMergedDifferences();
+ //printBiggestMergedDifferences();
return((long) merged_end);
}
@@ -823,12 +837,16 @@
return(wl);
}
-void printBiggestMergedDifferences() {
+char *getBiggestMergedDifferences() {
+ static char *result = NULL;
float dist, len, vec[max_size];
long long a, b, c, d, cn, *bi;
char ch;
knn *nbs = NULL;
- int N = 100;
+ int N = 1000;
+
+ if(result != NULL)
+ return result;
printf("Looking for biggest distances between main and merged vectors ...\n");
collocator *best;
@@ -857,11 +875,14 @@
}
}
- printf("Most distant vectors for:\n ");
+ result = malloc(N*max_w);
+ char *p = result;
+ *p++ = '['; *p = 0;
for (a = 0; a < N; a++) {
- printf("%s ", &vocab[best[a].wordi * max_w]);
+ p += sprintf(p, "{\"rank\":%d,\"word\":\"%s\",\"dist\":%.3f},", a, &vocab[best[a].wordi * max_w], 1-best[a].activation);
}
- printf("\n");
+ *--p = ']';
+ return(result);
}
void *_get_neighbours(void *arg) {