Add getPosWiseW2VCollocators -> json to API
Resolves #8
Change-Id: Id1eca2896fadc59bebb2786e45fd8a6b3efdca6c
diff --git a/README.md b/README.md
index a01e453..c3bdfa5 100644
--- a/README.md
+++ b/README.md
@@ -118,6 +118,13 @@
}
```
+### Get top predictive collocates position-wise
+
+| Command | Parameters | Description |
+|----------------------------|-------------------------|------------------------------------------------------------|
+| /getPosWiseW2VCollocators | w(,max=200,format=json) | get top `max`predictive collocates position-wise of word w |
+
+
### Examples
```bash
GET 'http://localhost:3000/?word=Grund&n=10&dedupe=0&sort=0&cutoff=1000000&json=1' | json_pp |less
@@ -131,6 +138,10 @@
GET 'http://localhost:3000/getCollocationAssociation?w=Grund&c=diesem'
```
+```bash
+GET 'http://localhost:3000/getPosWiseW2VCollocators?w=Test'
+```
+
## (Build and) run using docker / podman
### Optional: Build docker image from source
diff --git a/lib/IDS/DeReKoVecs/Read.pm b/lib/IDS/DeReKoVecs/Read.pm
index 378dba8..7bff3af 100644
--- a/lib/IDS/DeReKoVecs/Read.pm
+++ b/lib/IDS/DeReKoVecs/Read.pm
@@ -26,7 +26,7 @@
use Mojo::JSON qw(decode_json encode_json to_json);
use Exporter qw(import);
-our @EXPORT = qw(init_net load_sprofiles getVocabSize getDowntimeCalendar getCollocationAssociation getClassicCollocatorsCached getSimilarProfiles getSimilarProfilesCached getBiggestMergedDifferences filter_garbage get_neighbours getWordNumber dump_vecs dump_for_numpy cos_similarity_as_json get_version getPosWiseW2VCollocatorsAsTsv);
+our @EXPORT = qw(init_net load_sprofiles getVocabSize getDowntimeCalendar getCollocationAssociation getClassicCollocatorsCached getSimilarProfiles getSimilarProfilesCached getBiggestMergedDifferences filter_garbage get_neighbours getWordNumber dump_vecs dump_for_numpy cos_similarity_as_json get_version getPosWiseW2VCollocators);
sub getDowntimeCalendar {
my ($url) = @_;
diff --git a/lib/IDS/DeReKoVecs/derekovecs-server.c b/lib/IDS/DeReKoVecs/derekovecs-server.c
index 6592bfa..5ef2014 100644
--- a/lib/IDS/DeReKoVecs/derekovecs-server.c
+++ b/lib/IDS/DeReKoVecs/derekovecs-server.c
@@ -747,7 +747,7 @@
return (fa > fb) - (fa < fb);
}
-char *getPosWiseW2VCollocatorsAsTsv(char *word, long maxPerPos, long cutoff, float threshold) {
+char *getPosWiseW2VCollocators(char *word, long maxPerPos, long cutoff, float threshold, const char *format) {
HV *result = newHV();
float *target_sums = NULL;
long a, b;
@@ -793,14 +793,34 @@
result = malloc(maxPerPos * 80 * syn_threads);
char *p = (char *) result;
*p = 0;
- for (a = syn_threads - 1; a >= 0; a--) {
- for (b = 0; b < syn_nbs[a]->length; b++) {
- p += sprintf(p, "%ld\t%s\t%f\n", syn_nbs[a]->best[b].position, &vocab[syn_nbs[a]->best[b].wordi * max_w], syn_nbs[a]->best[b].activation);
+ if (strcmp(format, "tsv") == 0) {
+ for (a = syn_threads - 1; a >= 0; a--) {
+ for (b = 0; b < syn_nbs[a]->length; b++) {
+ p += sprintf(p, "%ld\t%s\t%f\n", syn_nbs[a]->best[b].position, &vocab[syn_nbs[a]->best[b].wordi * max_w], syn_nbs[a]->best[b].activation);
+ }
}
+ } else {
+ p += sprintf(p, "[");
+ for (a = syn_threads - 1; a >= 0; a--) {
+ for (b = 0; b < syn_nbs[a]->length; b++) {
+ p += sprintf(p, "{\"pos\": %ld, \"word\":\"%s\",\"activation\": %f},\n", syn_nbs[a]->best[b].position, &vocab[syn_nbs[a]->best[b].wordi * max_w], syn_nbs[a]->best[b].activation);
+ }
+ }
+ p-=2;
+ p += sprintf(p, "\n]");
}
+
return ((char *)result);
}
+char *getPosWiseW2VCollocatorsAsTsv(char *word, long maxPerPos, long cutoff, float threshold) {
+ return getPosWiseW2VCollocators(word, maxPerPos, cutoff, threshold, "tsv");
+}
+
+char *getPosWiseW2VCollocatorsAsJson(char *word, long maxPerPos, long cutoff, float threshold) {
+ return getPosWiseW2VCollocators(word, maxPerPos, cutoff, threshold, "json");
+}
+
SV *get_neighbours(char *st1, int N, int sort_by, int search_backw, long cutoff, int dedupe, int no_similar_profiles) {
HV *result = newHV();
float *target_sums = NULL;
diff --git a/script/derekovecs-server b/script/derekovecs-server
index 3811858..1930381 100755
--- a/script/derekovecs-server
+++ b/script/derekovecs-server
@@ -208,20 +208,24 @@
any '*/getPosWiseW2VCollocators' => sub {
my $self = shift;
- $self->render(data => getPosWiseW2VCollocatorsAsTsv($self->param("w"),
+ my $format = $self->param("format") || 'json';
+ $self->render(data => getPosWiseW2VCollocators($self->param("w"),
($self->param("max")? $self->param("max") : 200),
($self->param("cutoff")? $self->param("cutoff") :750000),
- ($self->param("threshold")? $self->param("threshold") : 0.2)),
- format=>'tsv');
+ ($self->param("threshold")? $self->param("threshold") : 0.2),
+ $format),
+ format=>$format);
};
any '/getPosWiseW2VCollocators' => sub {
my $self = shift;
- $self->render(data => getPosWiseW2VCollocatorsAsTsv($self->param("w"),
+ my $format = $self->param("format") || 'json';
+ $self->render(data => getPosWiseW2VCollocators($self->param("w"),
($self->param("max")? $self->param("max") : 200),
($self->param("cutoff")? $self->param("cutoff") : 750000),
- ($self->param("threshold")? $self->param("threshold") : 0.2)),
- format=>'tsv');
+ ($self->param("threshold")? $self->param("threshold") : 0.2),
+ $format),
+ format=>$format);
};
any '*/getSimilarProfiles' => sub {