w2v-server: simplify configuration via MOJO_CONFIG
diff --git a/w2v-server.c b/w2v-server.c
index df66371..3791ade 100644
--- a/w2v-server.c
+++ b/w2v-server.c
@@ -3,7 +3,7 @@
 #include <math.h>
 #include <pthread.h>
 #include <stdio.h>
-#include <stdlib.h>  //strlen
+#include <stdlib.h>
 #include <string.h>
 #include <sys/mman.h>
 
@@ -114,6 +114,18 @@
   return 1;
 }
 
+char *removeExtension(char* myStr) {
+    char *retStr;
+    char *lastExt;
+    if (myStr == NULL) return NULL;
+    if ((retStr = malloc (strlen (myStr) + 1)) == NULL) return NULL;
+    strcpy (retStr, myStr);
+    lastExt = strrchr (retStr, '.');
+    if (lastExt != NULL)
+        *lastExt = '\0';
+    return retStr;
+}
+
 int init_net(char *file_name, char *net_name, int latin, int do_open_cdb) {
   FILE *f, *binvecs, *binwords;
   int binwords_fd, binvecs_fd, net_fd, i;
@@ -121,8 +133,13 @@
   float len;
   double val;
 
-  char binvecs_fname[256], binwords_fname[256];
-  strcpy(binwords_fname, file_name);
+  char binvecs_fname[1024], binwords_fname[1024];
+
+  if (strstr(file_name, ".txt")) {
+    strcpy(binwords_fname, removeExtension(file_name));
+  } else {
+    strcpy(binwords_fname, file_name);
+  }
   strcat(binwords_fname, ".words");
   strcpy(binvecs_fname, file_name);
   strcat(binvecs_fname, ".vecs");
@@ -145,6 +162,7 @@
       return -1;
     }
     if (strstr(file_name, ".txt")) {
+      printf("%lld words in ascii vector file with vector size %lld\n", words, size);
       for (b = 0; b < words; b++) {
         a = 0;
         while (1) {
@@ -228,6 +246,8 @@
           *ext = 0;
           fprintf(stderr, "Opening collocator DB	%s\n", collocatordb_name);
           cdb = open_collocatordb(collocatordb_name);
+        } else {
+           fprintf(stderr, "Cannot open collocator DB	%s\n", collocatordb_name);
         }
       }
     }
@@ -254,6 +274,8 @@
   long long merge_size;
 
   char binvecs_fname[256], binwords_fname[256];
+
+
   strcpy(binwords_fname, file_name);
   strcat(binwords_fname, ".words");
   strcpy(binvecs_fname, file_name);
@@ -535,7 +557,7 @@
     c++;
     st[cn][b] = 0;
     if (st1[c] == 0) break;
-    if (st1[c] == ' ' || st1[c] == '-') {
+    if (st1[c] == ' ' /*|| st1[c] == '-'*/) {
       sep[cn++] = st1[c];
       b = 0;
       c++;
@@ -816,7 +838,7 @@
   a = posix_memalign((void **)&target_sums, 128, cutoff * sizeof(float));
   memset(target_sums, 0, cutoff * sizeof(float));
 
-  printf("Starting %d threads\n", para_threads);
+  printf("Starting %d threads for paradigmatic search\n", para_threads);
   fflush(stdout);
   for (a = 0; a < para_threads; a++) {
     pars[a].cutoff = cutoff;
@@ -1150,7 +1172,7 @@
 int dump_for_numpy(char *fname) {
   long i, j;
   FILE *f;
-  int max = 300000;
+  int max = words; // 300000;
 
   if ((f = fopen(fname, "w")) == NULL) {
     fprintf(stderr, "cannot open %s for writing\n", fname);
diff --git a/w2v-server.conf b/w2v-server.conf
new file mode 100644
index 0000000..71e7003
--- /dev/null
+++ b/w2v-server.conf
@@ -0,0 +1,24 @@
+{
+  hypnotoad => {
+    listen  => ['http://*:5673'],
+    pid_file => 'w2v-dereko-2020-ii.'. `hostname -s | tr -d "\n"` .'.pid',
+    heartbeat_timeout => 180,
+      workers => 4
+  },
+
+  morbo => {
+    listen  => ['http://*:5673'],
+      workers => 1
+  },
+
+  w2v => {
+    vecs => "/vol/work/kupietz/Work2/kl/trunk/Analysemethoden/word2vec/models/dereko-2020-ii.vecs"
+  },
+
+  Piwik => {
+    url => 'https://stats.ids-mannheim.de',
+    token_auth => 'ad7609a669179c4ebca7c995342f7e12',
+    site_id => 16,
+    embed => 1
+  },
+}
\ No newline at end of file
diff --git a/w2v-server.pl b/w2v-server.pl
index 7d2bbdb..1b9e5ea 100755
--- a/w2v-server.pl
+++ b/w2v-server.pl
@@ -11,13 +11,19 @@
 #use Mojo::Server::Daemon;
 use Cwd;
 
-my $DEFAULT_VECS="/vol/work/kupietz/Work2/kl/trunk/Analysemethoden/word2vec/models/dereko-2020-ii-ALPHA.vecs";
-my $DEFAULT_NET = $DEFAULT_VECS;
-$DEFAULT_NET =~ s/\.vecs/.net/;
+my $mojo_config = $ENV{MOJO_CONFIG} // 'w2v-server.conf';
+plugin Config => {file => $mojo_config};
+
+my $DEFAULT_VECS = app->config->{w2v}->{vecs} // "/vol/work/kupietz/Work2/kl/trunk/Analysemethoden/word2vec/models/dereko-2020-ii.vecs";
+my $DEFAULT_NET_NAME = "";
+if ($DEFAULT_VECS=~ /\.vecs/) {
+  $DEFAULT_NET_NAME = $DEFAULT_VECS;
+  $DEFAULT_NET_NAME =~ s/\.vecs/.net/;
+}
+my $DEFAULT_NET = app->config->{w2v}->{net} // $DEFAULT_NET_NAME;
 
 app->static->paths->[0] = getcwd;
 
-plugin Config => {file => 'w2v-server.conf'};
 plugin 'Piwik';
 plugin "RemoteAddr";
 plugin 'Util::RandomString' => {
@@ -78,6 +84,10 @@
 
 my $have_sprofiles = load_sprofiles($vecs_name);
 
+if (app->config->{w2v}->{merge}) {
+  $opt_m = app->config->{w2v}->{merge};
+}
+
 if($opt_m) {
   $mergedEnd = mergeVectors($opt_m);
   $title = "<span class=\"merged\">" . $title . "</span> vs. " . fname2corpusname($opt_m);
@@ -102,6 +112,7 @@
 if($opt_G) {
   print "Filtering garbage\n";
   filter_garbage();
+  print "Finished filtering garbage\n";
 }
 
 get '*/js/*' => sub {