w2v-server: simplify configuration via MOJO_CONFIG
diff --git a/w2v-server.c b/w2v-server.c
index df66371..3791ade 100644
--- a/w2v-server.c
+++ b/w2v-server.c
@@ -3,7 +3,7 @@
#include <math.h>
#include <pthread.h>
#include <stdio.h>
-#include <stdlib.h> //strlen
+#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
@@ -114,6 +114,18 @@
return 1;
}
+char *removeExtension(char* myStr) {
+ char *retStr;
+ char *lastExt;
+ if (myStr == NULL) return NULL;
+ if ((retStr = malloc (strlen (myStr) + 1)) == NULL) return NULL;
+ strcpy (retStr, myStr);
+ lastExt = strrchr (retStr, '.');
+ if (lastExt != NULL)
+ *lastExt = '\0';
+ return retStr;
+}
+
int init_net(char *file_name, char *net_name, int latin, int do_open_cdb) {
FILE *f, *binvecs, *binwords;
int binwords_fd, binvecs_fd, net_fd, i;
@@ -121,8 +133,13 @@
float len;
double val;
- char binvecs_fname[256], binwords_fname[256];
- strcpy(binwords_fname, file_name);
+ char binvecs_fname[1024], binwords_fname[1024];
+
+ if (strstr(file_name, ".txt")) {
+ strcpy(binwords_fname, removeExtension(file_name));
+ } else {
+ strcpy(binwords_fname, file_name);
+ }
strcat(binwords_fname, ".words");
strcpy(binvecs_fname, file_name);
strcat(binvecs_fname, ".vecs");
@@ -145,6 +162,7 @@
return -1;
}
if (strstr(file_name, ".txt")) {
+ printf("%lld words in ascii vector file with vector size %lld\n", words, size);
for (b = 0; b < words; b++) {
a = 0;
while (1) {
@@ -228,6 +246,8 @@
*ext = 0;
fprintf(stderr, "Opening collocator DB %s\n", collocatordb_name);
cdb = open_collocatordb(collocatordb_name);
+ } else {
+ fprintf(stderr, "Cannot open collocator DB %s\n", collocatordb_name);
}
}
}
@@ -254,6 +274,8 @@
long long merge_size;
char binvecs_fname[256], binwords_fname[256];
+
+
strcpy(binwords_fname, file_name);
strcat(binwords_fname, ".words");
strcpy(binvecs_fname, file_name);
@@ -535,7 +557,7 @@
c++;
st[cn][b] = 0;
if (st1[c] == 0) break;
- if (st1[c] == ' ' || st1[c] == '-') {
+ if (st1[c] == ' ' /*|| st1[c] == '-'*/) {
sep[cn++] = st1[c];
b = 0;
c++;
@@ -816,7 +838,7 @@
a = posix_memalign((void **)&target_sums, 128, cutoff * sizeof(float));
memset(target_sums, 0, cutoff * sizeof(float));
- printf("Starting %d threads\n", para_threads);
+ printf("Starting %d threads for paradigmatic search\n", para_threads);
fflush(stdout);
for (a = 0; a < para_threads; a++) {
pars[a].cutoff = cutoff;
@@ -1150,7 +1172,7 @@
int dump_for_numpy(char *fname) {
long i, j;
FILE *f;
- int max = 300000;
+ int max = words; // 300000;
if ((f = fopen(fname, "w")) == NULL) {
fprintf(stderr, "cannot open %s for writing\n", fname);