w2vserver: HACK: find target words also in merged vocabluary
diff --git a/w2v-server.pl b/w2v-server.pl
index d72aff8..6cb8a7e 100755
--- a/w2v-server.pl
+++ b/w2v-server.pl
@@ -723,56 +723,56 @@
return res;
}
-wordlist *getTargetWords(char *st1, int search_backw) {
+wordlist *getTargetWords(char *st1, int sbw) {
wordlist *wl = malloc(sizeof(wordlist));
char st[100][max_size], sep[100];
- long a, b=0, c=0, cn=0;
+ long a, b=0, c=0, cn=0;
int unmerged;
+ int search_backw = 0;
- while (1) {
- st[cn][b] = st1[c];
- if(merge_words > 0)
+ while (1) {
+ st[cn][b] = st1[c];
+ if (merge_words > 0)
st[cn+1][b] = st1[c];
- b++;
- c++;
- st[cn][b] = 0;
- if (st1[c] == 0) break;
- if (st1[c] == ' ' || st1[c] == '-') {
+ b++;
+ c++;
+ st[cn][b] = 0;
+ if (st1[c] == 0) break;
+ if (st1[c] == ' ' || st1[c] == '-') {
sep[cn++] = st1[c];
- if(merge_words > 0)
+ if (merge_words > 0)
sep[cn++] = st1[c];
- b = 0;
- c++;
- }
- }
- cn++;
- if(merge_words > 0)
+ b = 0;
+ c++;
+ }
+ }
+ cn++;
+ if (merge_words > 0)
cn++;
- for (a = 0; a < cn; a++) {
- if(search_backw) {
- for (b = words - 1; b >= 0; b--) if (!strcmp(&vocab[b * max_w], st[a])) break;
- } else {
- for (b = 0; b < words; b++)
- if (!strcmp(&vocab[b * max_w], st[a])) {
- if(merge_words > 0)
- search_backw^=1;
- break;
- }
- }
- if (b == words) b = -1;
- wl->wordi[a] = b;
- fprintf(stderr, "Word: \"%s\" Position in vocabulary: %lld\n", st[a], wl->wordi[a]);
- if (b == -1) {
- fprintf(stderr, "Out of dictionary word!\n");
- cn--;
- free(wl);
- return NULL;
- }
- }
+ for (a = 0; a < cn; a++) {
+ if (search_backw) {
+ for (b = words - 1; b >= 0; b--) if (!strcmp(&vocab[b * max_w], st[a])) break;
+ } else {
+ if (merge_words > 0 && a % 2 == 1) {
+ for (b = merge_words; b < words; b++)
+ if (!strcmp(&vocab[b * max_w], st[a])) break;
+ } else {
+ for (b = 0; b < words; b++)
+ if (!strcmp(&vocab[b * max_w], st[a])) break;
+ }
+ }
+ if (b == words) b = -1;
+ wl->wordi[a] = b;
+ fprintf(stderr, "Word: \"%s\" Position in vocabulary: %lld\n", st[a], wl->wordi[a]);
+ if (b == -1) {
+ fprintf(stderr, "Out of dictionary word!\n");
+ cn--;
+ }
+ }
wl->length=cn;
return(wl);
}
-
+
void *_get_neighbours(void *arg) {
knnpars *pars = arg;
char *st1 = pars->token;