w2v-server: add API function getPosWiseW2VCollocators
e.g.:
GET
'http://compute:4801/getPosWiseW2VCollocators?w=idea&cutoff=100000&max=20&threshold=0.8'
diff --git a/w2v-server.pl b/w2v-server.pl
index 7295d4b..99c957d 100755
--- a/w2v-server.pl
+++ b/w2v-server.pl
@@ -196,6 +196,24 @@
$self->render(data => getBiggestMergedDifferences(), format=>'json');
};
+any '*/getPosWiseW2VCollocators' => sub {
+ my $self = shift;
+ $self->render(data => getPosWiseW2VCollocatorsAsTsv($self->param("w"),
+ ($self->param("max")? $self->param("max") : 200),
+ ($self->param("cutoff")? $self->param("cutoff") :750000),
+ ($self->param("threshold")? $self->param("threshold") : 0.2)),
+ format=>'tsv');
+};
+
+any '/getPosWiseW2VCollocators' => sub {
+ my $self = shift;
+ $self->render(data => getPosWiseW2VCollocatorsAsTsv($self->param("w"),
+ ($self->param("max")? $self->param("max") : 200),
+ ($self->param("cutoff")? $self->param("cutoff") : 750000),
+ ($self->param("threshold")? $self->param("threshold") : 0.2)),
+ format=>'tsv');
+};
+
any '*/getSimilarProfiles' => sub {
my $self = shift;
$self->render(data => getSimilarProfilesCached($self, $self->param("w") ? $self->param("w") : $self->req->json), format=>'json');
@@ -1012,6 +1030,61 @@
return (fa > fb) - (fa < fb);
}
+char* getPosWiseW2VCollocatorsAsTsv(char *word, long maxPerPos, long cutoff, float threshold) {
+ HV *result = newHV();
+ float *target_sums=NULL, vec[max_size];
+ long long old_words;
+ long a, b, c, d;
+ knn *para_nbs[MAX_THREADS];
+ knn *syn_nbs[MAX_THREADS];
+ knnpars pars[MAX_THREADS];
+ pthread_t *pt = (pthread_t *)malloc((num_threads+1) * sizeof(pthread_t));
+ wordlist *wl;
+ int syn_threads = (M2? window * 2 : 0);
+ int search_backw = 0;
+ collocator *best = NULL;
+ posix_memalign((void **) &best, 128, 10 * (maxPerPos>=200? maxPerPos : 200) * sizeof(collocator));
+ memset(best, 0, (maxPerPos>=200? maxPerPos : 200) * sizeof(collocator));
+
+
+ if(cutoff < 1 || cutoff > words)
+ cutoff=words;
+
+ wl = getTargetWords(word, search_backw);
+ if(wl == NULL || wl->length < 1)
+ return "";
+
+ a = posix_memalign((void **) &target_sums, 128, cutoff * sizeof(float));
+ memset(target_sums, 0, cutoff * sizeof(float));
+
+ printf("Starting %d threads\n", syn_threads);
+ fflush(stdout);
+ for(a=0; a < syn_threads; a++) {
+ pars[a].cutoff = cutoff;
+ pars[a].target_sums = target_sums;
+ pars[a].window_sums = window_sums;
+ pars[a].wl = wl;
+ pars[a].N = maxPerPos;
+ pars[a].threshold = threshold;
+ pars[a].from = a;
+ pars[a].upto = a+1;
+ pthread_create(&pt[a], NULL, getCollocators, (void *) &pars[a]);
+ }
+ printf("Waiting for syn threads to join\n");
+ fflush(stdout);
+ for (a = 0; a < syn_threads; a++) pthread_join(pt[a], (void *) &syn_nbs[a]);
+ printf("Syn threads joint\n");
+ fflush(stdout);
+ result = malloc(maxPerPos*80*syn_threads);
+ char *p = result;
+ *p = 0;
+ for (a = syn_threads -1; a >= 0; a--) {
+ for (b=0; b < syn_nbs[a]->length; b++) {
+ p += sprintf(p, "%ld\t%s\t%f\n", syn_nbs[a]->best[b].position, &vocab[syn_nbs[a]->best[b].wordi * max_w], syn_nbs[a]->best[b].activation);
+ }
+ }
+ return(result);
+}
SV *get_neighbours(char *st1, int N, int sort_by, int search_backw, long cutoff, int dedupe, int no_similar_profiles) {
HV *result = newHV();