Fixed payloads, sorted tokens, major speed improvements
diff --git a/t/real_goethe.t b/t/real_goethe.t
index e2d6984..15fa677 100644
--- a/t/real_goethe.t
+++ b/t/real_goethe.t
@@ -7,6 +7,10 @@
use Data::Dumper;
use JSON::XS;
+use Benchmark qw/:hireswallclock/;
+
+my $t = Benchmark->new;
+
use utf8;
use lib 'lib', '../lib';
@@ -67,7 +71,7 @@
is($output->{foundries}, '', 'Foundries');
is($output->{layerInfos}, '', 'layerInfos');
-is($output->{data}->[0]->[0], 's:Autobiographische', 'data');
+is($output->{data}->[0]->[3], 's:Autobiographische', 'data');
is($output->{textSigle}, 'GOE_AGA.03828', 'Correct text sigle');
is($output->{docSigle}, 'GOE_AGA', 'Correct document sigle');
@@ -101,8 +105,8 @@
my $first_token = join('||', @{$output->{data}->[0]});
like($first_token, qr/s:Autobiographische/, 'data');
like($first_token, qr/_0#0-17/, 'data');
-like($first_token, qr!<>:base/s:s#0-30\$<i>2!, 'data');
-like($first_token, qr!<>:base\/s:t#0-35199\$<i>5226!, 'data');
+like($first_token, qr!<>:base/s:s#0-30\$<i>2<b>2!, 'data');
+like($first_token, qr!<>:base\/s:t#0-35199\$<i>5226<b>0!, 'data');
## OpenNLP
$tokens->add('OpenNLP', 'Sentences');
@@ -132,8 +136,7 @@
'Foundries');
is($output->{layerInfos}, 'base/s=spans opennlp/p=tokens opennlp/s=spans tt/s=spans', 'layerInfos');
$first_token = join('||', @{$output->{data}->[0]});
-like($first_token, qr!<>:tt/s:s#0-179\$<i>21!, 'data');
-
+like($first_token, qr!<>:tt/s:s#0-179\$<i>21<b>2!, 'data');
$tokens->add('TreeTagger', 'Morpho');
$output = decode_json( $tokens->to_json );
@@ -147,7 +150,6 @@
like($first_token, qr!tt/l:Autobiographische\$<b>89!, 'data');
like($first_token, qr!tt/p:NN\$<b>89!, 'data');
-
## CoreNLP
$tokens->add('CoreNLP', 'NamedEntities');
$output = decode_json( $tokens->to_json );
@@ -171,7 +173,6 @@
$first_token = join('||', @{$output->{data}->[0]});
like($first_token, qr!<>:corenlp/s:s#0-254\$<i>32!, 'data');
-
$tokens->add('CoreNLP', 'Morpho');
$output = decode_json( $tokens->to_json );
like($output->{foundries}, qr!corenlp/morpho!, 'Foundries');
@@ -179,7 +180,33 @@
$first_token = join('||', @{$output->{data}->[0]});
like($first_token, qr!corenlp/p:ADJA!, 'data');
-fail('Check for Constiuency!');
+$tokens->add('CoreNLP', 'Constituency');
+$output = decode_json( $tokens->to_json );
+like($output->{foundries}, qr!corenlp/constituency!, 'Foundries');
+like($output->{layerInfos}, qr!corenlp/c=spans!, 'layerInfos');
+$first_token = join('||', @{$output->{data}->[0]});
+
+# '<>:corenlp/c:ADJA#0-17$<i>1<b>0',
+# '<>:corenlp/c:NP#0-17$<i>1<b>0',
+# '<>:corenlp/c:CNP#0-17$<i>1<b>1',
+# '<>:corenlp/c:NP#0-17$<i>1<b>2',
+# '<>:corenlp/c:AP#0-17$<i>1<b>3',
+# '<>:corenlp/c:PP#0-58$<i>5<b>2',
+# '<>:corenlp/c:S#0-58$<i>5<b>3',
+# '<>:corenlp/c:ROOT#0-254$<i>32<b>0',
+# '<>:corenlp/c:S#0-254$<i>32<b>1',
+
+#like($first_token, qr!<>:corenlp/c:ADJA#0-17$<i>1<b>0!, 'data');
+#like($first_token, qr!<>:corenlp/c:NP#0-17$<i>1<b>0!, 'data');
+
+
+
+diag Dumper $output->{data}->[0];
+
+
+done_testing;
+__END__
+
## Glemm
$tokens->add('Glemm', 'Morpho');
@@ -193,15 +220,13 @@
like($first_token, qr!glemm/l:\+\+Biograph!, 'data');
like($first_token, qr!glemm/l:\+\+-isch!, 'data');
-
## Connexor
$tokens->add('Connexor', 'Sentences');
$output = decode_json( $tokens->to_json );
like($output->{foundries}, qr!connexor/sentences!, 'Foundries');
like($output->{layerInfos}, qr!cnx/s=spans!, 'layerInfos');
$first_token = join('||', @{$output->{data}->[0]});
-like($first_token, qr!<>:cnx/s:s#0-179\$<i>21!, 'data');
-
+like($first_token, qr!<>:cnx/s:s#0-179\$<i>21<b>2!, 'data');
$tokens->add('Connexor', 'Morpho');
$output = decode_json( $tokens->to_json );
@@ -220,7 +245,6 @@
$first_token = join('||', @{$output->{data}->[0]});
like($first_token, qr!<>:cnx/c:np#0-30\$<i>2!, 'data');
-
$tokens->add('Connexor', 'Syntax');
$output = decode_json( $tokens->to_json );
like($output->{foundries}, qr!connexor/syntax!, 'Foundries');
@@ -236,13 +260,55 @@
like($output->{layerInfos}, qr!mate/l=tokens!, 'layerInfos');
like($output->{layerInfos}, qr!mate/m=tokens!, 'layerInfos');
$first_token = join('||', @{$output->{data}->[0]});
-like($first_token, qr!---!, 'data');
+like($first_token, qr!mate/l:autobiographisch!, 'data');
+like($first_token, qr!mate/p:NN!, 'data');
+like($first_token, qr!mate/m:case:nom!, 'data');
+like($first_token, qr!mate/m:number:pl!, 'data');
+like($first_token, qr!mate/m:gender:\*!, 'data');
+
+
+fail("No test for mate dependency");
+
+## XIP
+$tokens->add('XIP', 'Sentences');
+$output = decode_json( $tokens->to_json );
+like($output->{foundries}, qr!xip/sentences!, 'Foundries');
+like($output->{layerInfos}, qr!xip/s=spans!, 'layerInfos');
+$first_token = join('||', @{$output->{data}->[0]});
+like($first_token, qr!<>:xip/s:s#0-179\$<i>21!, 'data');
+
+$tokens->add('XIP', 'Morpho');
+$output = decode_json( $tokens->to_json );
+like($output->{foundries}, qr!xip/morpho!, 'Foundries');
+like($output->{layerInfos}, qr!xip/l=tokens!, 'layerInfos');
+like($output->{layerInfos}, qr!xip/p=tokens!, 'layerInfos');
+$first_token = join('||', @{$output->{data}->[0]});
+like($first_token, qr!<>:xip/s:s#0-179\$<i>21!, 'data');
+
+
+# print timestr(timediff(Benchmark->new, $t));
+# 57.6802 wallclock secs (57.15 usr + 0.12 sys = 57.27 CPU)# $VAR1 = [
+# 55.026 wallclock secs (54.44 usr + 0.10 sys = 54.54 CPU)# $VAR1 = [
+# 55.3887 wallclock secs (54.62 usr + 0.17 sys = 54.79 CPU)# $VAR1 = [
+# 54.9578 wallclock secs (54.51 usr + 0.13 sys = 54.64 CPU)# $VAR1 = [
+# 53.7051 wallclock secs (53.42 usr + 0.11 sys = 53.53 CPU)# $VAR1 = [
+# 47.6566 wallclock secs (46.88 usr + 0.15 sys = 47.03 CPU)# $VAR1 = [
+# 47.2379 wallclock secs (46.60 usr + 0.11 sys = 46.71 CPU)# $VAR1 = [
+# 29.563 wallclock secs (29.37 usr + 0.10 sys = 29.47 CPU)# $VAR1 = [
+# 30.9321 wallclock secs (30.69 usr + 0.14 sys = 30.83 CPU)# $VAR1 = [
+
+$tokens->add('XIP', 'Constituency');
+$output = decode_json( $tokens->to_json );
+like($output->{foundries}, qr!xip/constituency!, 'Foundries');
+like($output->{layerInfos}, qr!xip/c=spans!, 'layerInfos');
+$first_token = join('||', @{$output->{data}->[0]});
+like($first_token, qr!<>:xip/c:NP#0-17\$<i>1<b>1!, 'data');
+like($first_token, qr!<>:xip/c:AP#0-17\$<i>1<b>2!, 'data');
+like($first_token, qr!<>:xip/c:ADJ#0-17\$<i>1<b>3!, 'data');
+like($first_token, qr!<>:xip/c:TOP#0-179\$<i>21<b>0!, 'data');
diag Dumper $output->{data}->[0];
-diag "Use token-ids in tokens!";
-diag "Sort tokens based on positions!";
-
done_testing;
__END__