w2v-server: add nosp parameter do ignore similar profiles
diff --git a/w2v-server.pl b/w2v-server.pl
index 92c223f..0e14b20 100755
--- a/w2v-server.pl
+++ b/w2v-server.pl
@@ -171,6 +171,7 @@
my $json=$c->param('json') || 0;
my $cutoff=$c->param('cutoff') || 500000;
my $dedupe=$c->param('dedupe') || 0;
+ my $nosp=$c->param('nosp') || 0;
my $res;
my @lists;
my @collocations;
@@ -184,9 +185,9 @@
} else {
$c->app->log->info('Looking for neighbours of '.$w);
if($opt_i) {
- $res = get_neighbours(encode("iso-8859-1", $w), $no_nbs, $sort, $searchBaseVocabFirst, $cutoff, $dedupe);
+ $res = get_neighbours(encode("iso-8859-1", $w), $no_nbs, $sort, $searchBaseVocabFirst, $cutoff, $dedupe, $nosp);
} else {
- $res = get_neighbours($w, $no_nbs, $sort, $searchBaseVocabFirst, $cutoff, $dedupe);
+ $res = get_neighbours($w, $no_nbs, $sort, $searchBaseVocabFirst, $cutoff, $dedupe, $nosp);
}
$cache{$w.$cutoff.$no_nbs.$sort.$dedupe} = $res;
}
@@ -558,6 +559,27 @@
return;
}
+
+knn *simpleGetCollocators(int word, int number, long cutoff, int *result) {
+ knnpars *pars = calloc(sizeof(knnpars), 1);
+ float *target_sums;
+ float *window_sums = malloc(sizeof(float) * (window+1) * 2);
+ pars->cutoff = (cutoff? cutoff : 300000);
+ long a = posix_memalign((void **) &target_sums, 128, pars->cutoff * sizeof(float));
+ for(a = 0; a < cutoff; a++)
+ target_sums[a] = 0;
+ pars->target_sums = target_sums;
+ pars->window_sums = window_sums;
+ pars->N = (number? number : 20);
+ pars->from = 0;
+ pars->upto = window * 2 -1;
+ knn *syn_nbs; // = (knn*) getCollocators(pars);
+ free(pars);
+ free(window_sums);
+ free(target_sums);
+ return syn_nbs;
+}
+
void *getCollocators(void *args) {
knnpars *pars = args;
int N = pars->N;
@@ -819,7 +841,7 @@
}
-SV *get_neighbours(char *st1, int N, int sort_by, int search_backw, long cutoff, int dedupe) {
+SV *get_neighbours(char *st1, int N, int sort_by, int search_backw, long cutoff, int dedupe, int no_similar_profiles) {
HV *result = newHV();
float *target_sums, vec[max_size];
long long old_words;
@@ -830,7 +852,7 @@
pthread_t *pt = (pthread_t *)malloc((num_threads+1) * sizeof(pthread_t));
wordlist *wl;
int syn_threads = (M2? window * 2 : 0);
- int para_threads = num_threads - syn_threads;
+ int para_threads = (no_similar_profiles? 0 : num_threads - syn_threads);
collocator *best;
posix_memalign((void **) &best, 128, 10 * N * sizeof(collocator));
@@ -848,7 +870,7 @@
old_words = cutoff;
if(merge_words > 0)
cutoff = merge_words * 1.25; /* HACK */
- slice = cutoff / para_threads;
+ slice = (para_threads? cutoff / para_threads : 0);
a = posix_memalign((void **) &target_sums, 128, cutoff * sizeof(float));
for(a = 0; a < cutoff; a++)