w2v-server.pl: get target words only once
diff --git a/w2v-server.pl b/w2v-server.pl
index 4b43c49..1cddc86 100644
--- a/w2v-server.pl
+++ b/w2v-server.pl
@@ -90,6 +90,13 @@
typedef struct {
+ long long wordi[MAX_NEIGHBOURS];
+ char sep[MAX_NEIGHBOURS];
+ int length;
+} wordlist;
+
+typedef struct {
+ wordlist *wl;
char *token;
int N;
long from;
@@ -196,7 +203,6 @@
return 0;
}
-
knn *getCollocators(int cc) {
knn *nbs = NULL;
long window_layer_size = size * window * 2;
@@ -226,7 +232,7 @@
window_offset = a * size;
if (a > window)
window_offset -= size;
- for(target = 0; target < words; target ++) {
+ for(target = 0; target < words / 2; target ++) {
if(target == d)
continue;
f = 0;
@@ -292,27 +298,11 @@
return(nbs);
}
-void *_get_neighbours(knnpars *pars) {
- char *st1 = pars->token;
- int N = pars->N;
- long from = pars -> from;
- unsigned long upto = pars -> upto;
- char file_name[max_size], st[100][max_size], sep[100];
- float dist, len, *bestd, vec[max_size];
- long long a, b, c, d, cn, bi[100], *besti;
- char ch;
- knn *nbs = NULL;
-
- besti = malloc(N * sizeof(long long));
- bestd = malloc(N * sizeof(float));
-
- float worstbest=-1;
-
- for (a = 0; a < N; a++) bestd[a] = 0;
- a = 0;
- cn = 0;
- b = 0;
- c = 0;
+wordlist *getTargetWords(char *st1) {
+ wordlist *wl = malloc(sizeof(wordlist));
+ char st[100][max_size], sep[100];
+ long a, b=0, c=0, cn=0;
+
while (1) {
st[cn][b] = st1[c];
b++;
@@ -329,18 +319,47 @@
for (a = 0; a < cn; a++) {
for (b = 0; b < words; b++) if (!strcmp(&vocab[b * max_w], st[a])) break;
if (b == words) b = -1;
- bi[a] = b;
- fprintf(stderr, "Word: \"%s\" Position in vocabulary: %lld\n", st[a], bi[a]);
- if(from < 0) {
- nbs = getCollocators(b);
- pthread_exit(nbs);
- }
+ wl->wordi[a] = b;
+ fprintf(stderr, "Word: \"%s\" Position in vocabulary: %lld\n", st[a], wl->wordi[a]);
if (b == -1) {
fprintf(stderr, "Out of dictionary word!\n");
cn--;
break;
}
}
+ wl->length=cn;
+ return(wl);
+}
+
+void *_get_neighbours(knnpars *pars) {
+ char *st1 = pars->token;
+ int N = pars->N;
+ long from = pars -> from;
+ unsigned long upto = pars -> upto;
+ char file_name[max_size], st[100][max_size], *sep;
+ float dist, len, *bestd, vec[max_size];
+ long long a, b, c, d, cn, *bi, *besti;
+ char ch;
+ knn *nbs = NULL;
+ wordlist *wl = pars->wl;
+
+ besti = malloc(N * sizeof(long long));
+ bestd = malloc(N * sizeof(float));
+
+ float worstbest=-1;
+
+ for (a = 0; a < N; a++) bestd[a] = 0;
+ a = 0;
+ bi = wl->wordi;
+ cn = wl->length;
+ sep = wl->sep;
+ b = bi[0];
+ c = 0;
+
+ if(from < 0) {
+ nbs = getCollocators(b);
+ pthread_exit(nbs);
+ }
if (b == -1) {
N = 0;
goto end;
@@ -398,19 +417,24 @@
knn *nbs[MAX_THREADS];
knnpars pars[MAX_THREADS];
pthread_t *pt = (pthread_t *)malloc((num_threads+1) * sizeof(pthread_t));
-
+ wordlist *wl;
+
if(N>MAX_NEIGHBOURS) N=MAX_NEIGHBOURS;
slice = words / num_threads;
+ wl = getTargetWords(st1);
+
a = num_threads;
pars[a].token = st1;
+ pars[a].wl = wl;
pars[a].N = N;
pars[a].from = -1;
pthread_create(&pt[a], NULL, _get_neighbours, (void *) &pars[a]);
for(a=0; a < num_threads; a++) {
pars[a].token = st1;
+ pars[a].wl = wl;
pars[a].N = N;
pars[a].from = a*slice;
pars[a].upto = ((a+1)*slice > words? words:(a+1)*slice);