w2v-server.pl: use hash to store vocabulary
diff --git a/w2v-server.pl b/w2v-server.pl
index c92400b..0b8f178 100644
--- a/w2v-server.pl
+++ b/w2v-server.pl
@@ -67,6 +67,7 @@
float *M;
char *vocab;
+HV* wordhash;
long long words, size;
int num_threads=20;
@@ -128,6 +129,13 @@
}
}
fclose(f);
+
+ wordhash = newHV();
+ for (a = 0; a < words; a++) {
+ hv_store(wordhash, &vocab[a * max_w], strlen(&vocab[a * max_w]), newSVuv(a), 0);
+ // fprintf(stderr, "%lld: %s\n", a, &vocab[a * max_w]);
+ }
+
return 0;
}
@@ -142,7 +150,8 @@
long long a, b, c, d, cn, bi[100], *besti;
char ch;
knn *nbs = NULL;
-
+ SV **svp;
+
besti = malloc(N * sizeof(long long));
bestd = malloc(N * sizeof(float));
@@ -167,8 +176,14 @@
}
cn++;
for (a = 0; a < cn; a++) {
- for (b = 0; b < words; b++) if (!strcmp(&vocab[b * max_w], st[a])) break;
- if (b == words) b = -1;
+ svp = hv_fetch(wordhash,st[a],strlen(st[a]),0);
+ if (svp) {
+ b = SvUV(*svp);
+ } else {
+ b = -1;
+ }
+ // for (b = 0; b < words; b++) if (!strcmp(&vocab[b * max_w], st[a])) break;
+ // if (b == words) b = -1;
bi[a] = b;
fprintf(stderr, "Word: \"%s\" Position in vocabulary: %lld\n", st[a], bi[a]);
if (b == -1) {