w2v-server: add API-function: getSimilarity
Returns cosinus similarity of two words.
e.g.:
> GET 'http://compute:4801/getSimilarity?w1=good&w2=bad'
0.86
diff --git a/w2v-server.pl b/w2v-server.pl
index 45a45c7..9ee47d6 100755
--- a/w2v-server.pl
+++ b/w2v-server.pl
@@ -206,6 +206,20 @@
$self->render(data => getSimilarProfilesCached($self, $self->param("w") ? $self->param("w") : $self->req->json), format=>'json');
};
+any '/getSimilarity' => sub {
+ my $self = shift;
+ my $w1 = $self->param("w1");
+ my $w2 = $self->param("w2");
+ $self->render(data => cos_similarity_as_json($w1, $w2), format=>'json');
+};
+
+any '*/getSimilarity' => sub {
+ my $self = shift;
+ my $w1 = $self->param("w1");
+ my $w2 = $self->param("w2");
+ $self->render(data => cos_similarity_as_json($w1, $w2), format=>'json');
+};
+
get '*/img/*' => sub {
my $c = shift;
my $url = $c->req->url;
@@ -837,6 +851,13 @@
return(wl);
}
+float get_distance(long b, long c) {
+ long a;
+ float dist = 0;
+ for (a = 0; a < size; a++) dist += M[a + c * size] * M[a + b * size];
+ return dist;
+}
+
char *getBiggestMergedDifferences() {
static char *result = NULL;
float dist, len, vec[max_size];
@@ -888,6 +909,29 @@
return(result);
}
+
+float cos_similarity(long b, long c) {
+ float dist=0;
+ long a;
+ for (a = 0; a < size; a++) dist += M[b * size + a] * M[c * size + a];
+ return dist;
+}
+
+char *cos_similarity_as_json(char *w1, char *w2) {
+ wordlist *a, *b;
+ float res;
+ a = getTargetWords(w1, 0);
+ b = getTargetWords(w2, 0);
+ if (a == NULL || b==NULL || a->length != 1 || b->length != 1)
+ res = -1;
+ else
+ res = cos_similarity(a->wordi[0], b->wordi[0]);
+ fprintf(stderr, "a: %lld b: %lld res:%f\n", a->wordi[0], b->wordi[0], res);
+ char *json = malloc(16);
+ sprintf(json, "%.5f", res);
+ return json;
+}
+
void *_get_neighbours(void *arg) {
knnpars *pars = arg;
char *st1 = pars->token;