Add w2v->compare_to parameter and use for count-based delta calcs

Change-Id: Ic53cbc5b3984659b3ebfa6c8ad30fd42850a9fc9
diff --git a/Changelog.md b/Changelog.md
index 506fcf4..750e924 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -1,5 +1,8 @@
 # Changelog
 
+- added `w2v.compare_to` configuration option to configure a derekovecs
+  instance to compare results to (currently only count-based LogDice-delta supported)
+
 ## [0.93.2] - 2024-11-15
 
 - fixed calculation of total token count by using collocatordb 1.3.2
diff --git a/example.conf b/example.conf
index 4991015..ddc1d2f 100644
--- a/example.conf
+++ b/example.conf
@@ -14,6 +14,7 @@
 
   w2v => {
     vecs => "example-models/wpd19_10000/wpd19_10000.vecs",
+    # compare_to => "https://corpora.ids-mannheim.de/openlab/derekovecs", # compare results to this derekovecs instance
     # korap_url => "https://korap.ids-mannheim.de"
   },
 
diff --git a/lib/IDS/DeReKoVecs/Read.pm b/lib/IDS/DeReKoVecs/Read.pm
index fb11fe9..6e2acfc 100644
--- a/lib/IDS/DeReKoVecs/Read.pm
+++ b/lib/IDS/DeReKoVecs/Read.pm
@@ -43,15 +43,18 @@
 }
 
 sub getClassicCollocatorsCached {
-  my ($c, $word) = @_;
+  my ($c, $word, $compare_to) = @_;
   my $s2 = "";
   if($word > $mergedEnd) {
     $word-=$mergedEnd;
   }
 
-  if($opt_p >= 5000 && $opt_p < 5600) { # German non-reference
-      open PIPE, "GET http://corpora.ids-mannheim.de/openlab/derekovecs/getClassicCollocators?w=$word  |";
+  my $pipe;
+  if($compare_to ne "") {
+    $c->app->log->info("comparing syn neighbours to: $compare_to/getClassicCollocators?w=$word");
+    open $pipe, "lwp-request $compare_to/getClassicCollocators?w=$word |";
   }
+
   if($opt_C || !$cccache{$word}) {
     $c->app->log->info("Getting classic collocates of $word.");
     $cccache{$word} = getClassicCollocators($word);
@@ -60,11 +63,12 @@
   } else {
     $c->app->log->info("Getting classic collocates for $word from cache.");
   }
-  if($opt_p >= 5000 && $opt_p < 5600) { # German non-reference
-    while(<PIPE>) {
+
+  if(defined($pipe)) {
+    while(<$pipe>) {
       $s2 .= $_;
     }
-    close(PIPE);
+    close($pipe);
   }
 
   if(length($s2) > 2000) {
diff --git a/script/derekovecs-server b/script/derekovecs-server
index 372e472..3811858 100755
--- a/script/derekovecs-server
+++ b/script/derekovecs-server
@@ -25,6 +25,8 @@
 }
 my $DEFAULT_NET           = app->config->{w2v}->{net}            // $DEFAULT_NET_NAME;
 my $DOWNTIME_CALENDAR_URL = app->config->{downtime_calendar_url} // '';
+my $COMPARE_TO            = app->config->{w2v}->{compare_to}     // '';
+
 app->static->paths->[0] = getcwd;
 
 plugin 'Piwik';
@@ -184,12 +186,14 @@
 
 any '*/getClassicCollocators' => sub {
   my $self = shift;
-  $self->render(data => getClassicCollocatorsCached($self, getWord($self->param("w") ? $self->param("w") : $self->req->json)), format=>'json');
+  $self->render(data => getClassicCollocatorsCached($self, getWord($self->param("w") ? $self->param("w",) : $self->req->json),
+      $COMPARE_TO), format=>'json');
 } => 'getClassicCollocators1';
 
 any '/getClassicCollocators' => sub {
   my $self = shift;
-  $self->render(data => getClassicCollocatorsCached($self, getWord($self->param("w") ? $self->param("w") : $self->req->json)), format=>'json');
+  $self->render(data => getClassicCollocatorsCached($self, getWord($self->param("w") ? $self->param("w") : $self->req->json),
+      $COMPARE_TO), format=>'json');
 } => 'getClassicCollocators';
 
 any '/getBiggestVocabDistances' => sub {