derekovecs: compare collocates with reference corpus
diff --git a/w2v-server.pl b/w2v-server.pl
index 22d94dc..0873562 100755
--- a/w2v-server.pl
+++ b/w2v-server.pl
@@ -114,16 +114,39 @@
sub getClassicCollocatorsCached {
my ($c, $word) = @_;
+ my $s2 = "";
if($word > $mergedEnd) {
$word-=$mergedEnd;
}
+
if(!$cccache{$word}) {
$c->app->log->info("Getting classic collocates of $word.");
$cccache{$word} = getClassicCollocators($word);
} else {
$c->app->log->info("Getting classic collocates for $word from cache.");
}
- return $cccache{$word};
+ if($opt_p >= 5000 && $opt_p < 5600) { # German non-reference
+ $s2 = `GET http://compute:5673/getClassicCollocators?w=$word`;
+ }
+ if(length($s2) > 2000) {
+ my $d1 = decode_json($cccache{$word});
+ my $d2 = decode_json($s2);
+ my %d2ld;
+ foreach my $i (@{$d2->{collocates}}) {
+ $d2ld{$i->{word}}=$i->{ld};
+ }
+ foreach my $i (@{$d1->{collocates}}) {
+ my $w = $i->{word};
+ $i->{delta} = $i->{ld} - (defined $d2ld{$w} ? $d2ld{$w} : -5);
+ }
+ return(encode_json($d1));
+ } else {
+ my $d1 = decode_json($cccache{$word});
+ foreach my $i (@{$d1->{collocates}}) {
+ $i->{delta} = 0;
+ }
+ return(encode_json($d1));
+ }
}
sub getSimilarProfilesCached {