w2v-server: simplify configuration via MOJO_CONFIG
diff --git a/w2v-server.c b/w2v-server.c
index df66371..3791ade 100644
--- a/w2v-server.c
+++ b/w2v-server.c
@@ -3,7 +3,7 @@
#include <math.h>
#include <pthread.h>
#include <stdio.h>
-#include <stdlib.h> //strlen
+#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
@@ -114,6 +114,18 @@
return 1;
}
+char *removeExtension(char* myStr) {
+ char *retStr;
+ char *lastExt;
+ if (myStr == NULL) return NULL;
+ if ((retStr = malloc (strlen (myStr) + 1)) == NULL) return NULL;
+ strcpy (retStr, myStr);
+ lastExt = strrchr (retStr, '.');
+ if (lastExt != NULL)
+ *lastExt = '\0';
+ return retStr;
+}
+
int init_net(char *file_name, char *net_name, int latin, int do_open_cdb) {
FILE *f, *binvecs, *binwords;
int binwords_fd, binvecs_fd, net_fd, i;
@@ -121,8 +133,13 @@
float len;
double val;
- char binvecs_fname[256], binwords_fname[256];
- strcpy(binwords_fname, file_name);
+ char binvecs_fname[1024], binwords_fname[1024];
+
+ if (strstr(file_name, ".txt")) {
+ strcpy(binwords_fname, removeExtension(file_name));
+ } else {
+ strcpy(binwords_fname, file_name);
+ }
strcat(binwords_fname, ".words");
strcpy(binvecs_fname, file_name);
strcat(binvecs_fname, ".vecs");
@@ -145,6 +162,7 @@
return -1;
}
if (strstr(file_name, ".txt")) {
+ printf("%lld words in ascii vector file with vector size %lld\n", words, size);
for (b = 0; b < words; b++) {
a = 0;
while (1) {
@@ -228,6 +246,8 @@
*ext = 0;
fprintf(stderr, "Opening collocator DB %s\n", collocatordb_name);
cdb = open_collocatordb(collocatordb_name);
+ } else {
+ fprintf(stderr, "Cannot open collocator DB %s\n", collocatordb_name);
}
}
}
@@ -254,6 +274,8 @@
long long merge_size;
char binvecs_fname[256], binwords_fname[256];
+
+
strcpy(binwords_fname, file_name);
strcat(binwords_fname, ".words");
strcpy(binvecs_fname, file_name);
@@ -535,7 +557,7 @@
c++;
st[cn][b] = 0;
if (st1[c] == 0) break;
- if (st1[c] == ' ' || st1[c] == '-') {
+ if (st1[c] == ' ' /*|| st1[c] == '-'*/) {
sep[cn++] = st1[c];
b = 0;
c++;
@@ -816,7 +838,7 @@
a = posix_memalign((void **)&target_sums, 128, cutoff * sizeof(float));
memset(target_sums, 0, cutoff * sizeof(float));
- printf("Starting %d threads\n", para_threads);
+ printf("Starting %d threads for paradigmatic search\n", para_threads);
fflush(stdout);
for (a = 0; a < para_threads; a++) {
pars[a].cutoff = cutoff;
@@ -1150,7 +1172,7 @@
int dump_for_numpy(char *fname) {
long i, j;
FILE *f;
- int max = 300000;
+ int max = words; // 300000;
if ((f = fopen(fname, "w")) == NULL) {
fprintf(stderr, "cannot open %s for writing\n", fname);
diff --git a/w2v-server.conf b/w2v-server.conf
new file mode 100644
index 0000000..71e7003
--- /dev/null
+++ b/w2v-server.conf
@@ -0,0 +1,24 @@
+{
+ hypnotoad => {
+ listen => ['http://*:5673'],
+ pid_file => 'w2v-dereko-2020-ii.'. `hostname -s | tr -d "\n"` .'.pid',
+ heartbeat_timeout => 180,
+ workers => 4
+ },
+
+ morbo => {
+ listen => ['http://*:5673'],
+ workers => 1
+ },
+
+ w2v => {
+ vecs => "/vol/work/kupietz/Work2/kl/trunk/Analysemethoden/word2vec/models/dereko-2020-ii.vecs"
+ },
+
+ Piwik => {
+ url => 'https://stats.ids-mannheim.de',
+ token_auth => 'ad7609a669179c4ebca7c995342f7e12',
+ site_id => 16,
+ embed => 1
+ },
+}
\ No newline at end of file
diff --git a/w2v-server.pl b/w2v-server.pl
index 7d2bbdb..1b9e5ea 100755
--- a/w2v-server.pl
+++ b/w2v-server.pl
@@ -11,13 +11,19 @@
#use Mojo::Server::Daemon;
use Cwd;
-my $DEFAULT_VECS="/vol/work/kupietz/Work2/kl/trunk/Analysemethoden/word2vec/models/dereko-2020-ii-ALPHA.vecs";
-my $DEFAULT_NET = $DEFAULT_VECS;
-$DEFAULT_NET =~ s/\.vecs/.net/;
+my $mojo_config = $ENV{MOJO_CONFIG} // 'w2v-server.conf';
+plugin Config => {file => $mojo_config};
+
+my $DEFAULT_VECS = app->config->{w2v}->{vecs} // "/vol/work/kupietz/Work2/kl/trunk/Analysemethoden/word2vec/models/dereko-2020-ii.vecs";
+my $DEFAULT_NET_NAME = "";
+if ($DEFAULT_VECS=~ /\.vecs/) {
+ $DEFAULT_NET_NAME = $DEFAULT_VECS;
+ $DEFAULT_NET_NAME =~ s/\.vecs/.net/;
+}
+my $DEFAULT_NET = app->config->{w2v}->{net} // $DEFAULT_NET_NAME;
app->static->paths->[0] = getcwd;
-plugin Config => {file => 'w2v-server.conf'};
plugin 'Piwik';
plugin "RemoteAddr";
plugin 'Util::RandomString' => {
@@ -78,6 +84,10 @@
my $have_sprofiles = load_sprofiles($vecs_name);
+if (app->config->{w2v}->{merge}) {
+ $opt_m = app->config->{w2v}->{merge};
+}
+
if($opt_m) {
$mergedEnd = mergeVectors($opt_m);
$title = "<span class=\"merged\">" . $title . "</span> vs. " . fname2corpusname($opt_m);
@@ -102,6 +112,7 @@
if($opt_G) {
print "Filtering garbage\n";
filter_garbage();
+ print "Finished filtering garbage\n";
}
get '*/js/*' => sub {