Add getVocabSize
Change-Id: I642b39b1186752387da2b555b6e6cb20a0952be5
diff --git a/README.md b/README.md
index fc61424..48620f5 100644
--- a/README.md
+++ b/README.md
@@ -51,13 +51,14 @@
## Web Service API
In addition to the web user interface, derekovecs also provides a web api which is however still very unsystematic and **not stable**. To figure out the meaning of still undocumented result components, have a look at the table head mouse-overs in the GUI or at the source code around [here](https://korap.ids-mannheim.de/gerrit/plugins/gitiles/ids-kl/derekovecs/+/refs/heads/master/templates/index.html.ep#684).
-| Command | Parameters | Description |
-| ----------- | ----------- | ----------- |
-| / | word, n, dedupe, cutoff, json=1 | get paradigmatic and syntagmatic neighbours, from word embeddings |
-| getCollocationAssociation | w, c | get association scores for specific node collocate pairs |
-| getSimilarity | w1, w2 | get cosine similarity of w1 and w2 |
-| getVersion | | get version of derekovecs |
-| getModelName | | get name of model (inferred from the file name) |
+| Command | Parameters | Description |
+|---------------------------|---------------------------------|-------------------------------------------------------------------|
+| / | word, n, dedupe, cutoff, json=1 | get paradigmatic and syntagmatic neighbours, from word embeddings |
+| getCollocationAssociation | w, c | get association scores for specific node collocate pairs |
+| getSimilarity | w1, w2 | get cosine similarity of w1 and w2 |
+| getVersion | | get version of derekovecs |
+| getModelName | | get name of model (inferred from the file name) |
+| getVocabSize | | get vocabulary size of model |
### Get classical (count-based) collocates
diff --git a/lib/IDS/DeReKoVecs/Read.pm b/lib/IDS/DeReKoVecs/Read.pm
index 4887210..cf323cc 100644
--- a/lib/IDS/DeReKoVecs/Read.pm
+++ b/lib/IDS/DeReKoVecs/Read.pm
@@ -26,7 +26,7 @@
use Mojo::JSON qw(decode_json encode_json to_json);
use Exporter qw(import);
-our @EXPORT = qw(init_net load_sprofiles getDowntimeCalendar getCollocationAssociation getClassicCollocatorsCached getSimilarProfiles getSimilarProfilesCached getBiggestMergedDifferences filter_garbage get_neighbours getWordNumber dump_vecs dump_for_numpy cos_similarity_as_json);
+our @EXPORT = qw(init_net load_sprofiles getVocabSize getDowntimeCalendar getCollocationAssociation getClassicCollocatorsCached getSimilarProfiles getSimilarProfilesCached getBiggestMergedDifferences filter_garbage get_neighbours getWordNumber dump_vecs dump_for_numpy cos_similarity_as_json);
sub getDowntimeCalendar {
my ($url) = @_;
diff --git a/lib/IDS/DeReKoVecs/derekovecs-server.c b/lib/IDS/DeReKoVecs/derekovecs-server.c
index b2cdb20..2234c66 100644
--- a/lib/IDS/DeReKoVecs/derekovecs-server.c
+++ b/lib/IDS/DeReKoVecs/derekovecs-server.c
@@ -1185,3 +1185,7 @@
fclose(f);
return (0);
}
+
+unsigned long getVocabSize() {
+ return (unsigned long) words;
+}
diff --git a/t/server-test.t b/t/server-test.t
index 6ed4c57..5a216c6 100644
--- a/t/server-test.t
+++ b/t/server-test.t
@@ -1,6 +1,6 @@
use strict;
use warnings;
-use Test::More tests=>7;
+use Test::More tests=>8;
use Mojo::JSON qw(decode_json encode_json to_json);
use REST::Client;
use Data::Dump qw(dump);
@@ -43,6 +43,9 @@
$res = $client->responseContent();
is($res, '"wpd19_10000"', "model name ok");
+$client->GET('http://localhost:3000/getVocabSize');
+$res = $client->responseContent();
+ok($res > 1000, "vocab size ok");
for (my $i=0; $i<4; $i++) {
$pid++;