w2v-server.pl: reduce start-up time
diff --git a/w2v-server.pl b/w2v-server.pl
index 181c185..f4f83f6 100644
--- a/w2v-server.pl
+++ b/w2v-server.pl
@@ -57,7 +57,12 @@
char *stringBuffer;
int init_net(char *file_name) {
- FILE *f;
+ FILE *f, *binvecs, *binwords;
+ char binvecs_fname[256], binwords_fname[256];
+ strcpy(binwords_fname, file_name);
+ strcat(binwords_fname, ".words");
+ strcpy(binvecs_fname, file_name);
+ strcat(binvecs_fname, ".vecs");
stringBuffer = malloc(64000);
f = fopen(file_name, "rb");
@@ -75,20 +80,33 @@
printf("Cannot allocate memory: %lld MB %lld %lld\n", (long long)words * size * sizeof(float) / 1048576, words, size);
return -1;
}
- for (b = 0; b < words; b++) {
- a = 0;
- while (1) {
- vocab[b * max_w + a] = fgetc(f);
- if (feof(f) || (vocab[b * max_w + a] == ' ')) break;
- if ((a < max_w) && (vocab[b * max_w + a] != '\n')) a++;
- }
- vocab[b * max_w + a] = 0;
- fread(&M[b * size], sizeof(float), size, f);
- len = 0;
- for (a = 0; a < size; a++) len += M[a + b * size] * M[a + b * size];
- len = sqrt(len);
- for (a = 0; a < size; a++) M[a + b * size] /= len;
- }
+ if( (binvecs = fopen(binvecs_fname, "rb")) != NULL && (binwords = fopen(binwords_fname, "rb")) != NULL) {
+ fread(M, sizeof(float), (long long)words * (long long)size, binvecs);
+ fclose(binvecs);
+ fread(vocab, sizeof(char), (long long)words * max_w, binwords);
+ fclose(binwords);
+ } else {
+ for (b = 0; b < words; b++) {
+ a = 0;
+ while (1) {
+ vocab[b * max_w + a] = fgetc(f);
+ if (feof(f) || (vocab[b * max_w + a] == ' ')) break;
+ if ((a < max_w) && (vocab[b * max_w + a] != '\n')) a++;
+ }
+ vocab[b * max_w + a] = 0;
+ fread(&M[b * size], sizeof(float), size, f);
+ len = 0;
+ for (a = 0; a < size; a++) len += M[a + b * size] * M[a + b * size];
+ len = sqrt(len);
+ for (a = 0; a < size; a++) M[a + b * size] /= len;
+ }
+ if( (binvecs = fopen(binvecs_fname, "wb")) != NULL && (binwords = fopen(binwords_fname, "wb")) != NULL) {
+ fwrite(M, sizeof(float), (long long)words * (long long)size, binvecs);
+ fclose(binvecs);
+ fwrite(vocab, sizeof(char), (long long)words * max_w, binwords);
+ fclose(binwords);
+ }
+ }
fclose(f);
return 0;
}