Fixed payloads, sorted tokens, major speed improvements

commit: 6d565078709daab2b979c5fefe2f8dfc5c6ab912 [log] [tgz]
author: Nils Diewald <nils@diewald-online.de> Thu Oct 30 23:20:58 2014 +0000
committer: Nils Diewald <nils@diewald-online.de> Thu Oct 30 23:20:58 2014 +0000
tree: f0c425d7a385e108e05b980a2a5db6e82dc46b0c
parent: 32e30f07a3ddd06b4af06d74190de0699bfab5b8 [diff] [blame]
diff --git a/t/artificial.t b/t/artificial.t
index 5b5210f..10d4429 100644
--- a/t/artificial.t
+++ b/t/artificial.t

@@ -59,8 +59,9 @@
 is($tokens->name, 'tokens', 'Name');
 is($tokens->layer, 'Tokens', 'Layer');
 
-is($tokens->stream->pos(0)->to_string, '[(0-3)s:Zum|i:zum|_0#0-3|-:tokens$<i>18]', 'Token is correct');
-is($tokens->stream->pos(1)->to_string, '[(4-11)s:letzten|i:letzten|_1#4-11]', 'Token is correct');
+is($tokens->stream->pos(0)->to_string, '[(0-3)-:tokens$<i>18|_0#0-3|i:zum|s:Zum]', 'Token is correct');
+
+is($tokens->stream->pos(1)->to_string, '[(4-11)_1#4-11|i:letzten|s:letzten]', 'Token is correct');
 
 my $i = 2;
 foreach ([12,23, 'kulturellen'],
@@ -82,8 +83,8 @@
        ) {
   is($tokens->stream->pos($i++)->to_string,
      '[('.$_->[0].'-'.$_->[1].')'.
-       's:'.$_->[2].'|i:'.lc($_->[2]).'|'.
-       '_'.($i-1).'#'.$_->[0].'-'.$_->[1].']',
+       '_'.($i-1).'#'.$_->[0].'-'.$_->[1] . '|' .
+	 'i:'.lc($_->[2]).'|s:'.$_->[2].']',
      'Token is correct');
 };
 
@@ -92,6 +93,7 @@
 # Add OpenNLP/morpho
 ok($tokens->add('OpenNLP', 'Morpho'), 'Add OpenNLP/Morpho');
 
+
 $i = 0;
 foreach (qw/APPRART ADJA ADJA NN VVFIN ART NN ART NN NE PTKVZ KOUS ART NN NN NN VVPP VAFIN/) {
   like($tokens->stream->pos($i++)->to_string,
@@ -103,8 +105,7 @@
 # Add OpenNLP/sentences
 ok($tokens->add('OpenNLP', 'Sentences'), 'Add OpenNLP/Sentences');
 
-is($tokens->stream->pos(0)->to_string, '[(0-3)s:Zum|i:zum|_0#0-3|-:tokens$<i>18|opennlp/p:APPRART|<>:opennlp/s:s#0-129$<i>17|-:opennlp/sentences$<i>1]', 'Correct sentence');
-
+is($tokens->stream->pos(0)->to_string, '[(0-3)-:opennlp/sentences$<i>1|-:tokens$<i>18|_0#0-3|i:zum|s:Zum|opennlp/p:APPRART|<>:opennlp/s:s#0-129$<i>17]', 'Correct sentence');
 
 # New instantiation
 ok($tokens = KorAP::Tokenizer->new(
@@ -124,7 +125,7 @@
 ok($tokens->add('Base', 'Paragraphs'), 'Add Base/Paragraphs');
 
 is($tokens->stream->pos(0)->to_string,
-   '[(0-3)s:Zum|i:zum|_0#0-3|-:tokens$<i>18|<>:base/s:s#0-129$<i>17|<>:base/s:t#0-129$<i>17|-:base/sentences$<i>1|-:base/paragraphs$<i>0]',
+   '[(0-3)-:base/paragraphs$<i>0|-:base/sentences$<i>1|-:tokens$<i>18|_0#0-3|i:zum|s:Zum|<>:base/s:t#0-129$<i>17<b>0|<>:base/s:s#0-129$<i>17<b>0]',
    'Correct base annotation');
 
 
@@ -135,11 +136,11 @@
 ok($tokens->add('CoreNLP', 'NamedEntities', 'ne_dewac_175m_600'), 'Add CoreNLP/NamedEntities');
 ok($tokens->add('CoreNLP', 'NamedEntities', 'ne_hgc_175m_600'), 'Add CoreNLP/NamedEntities');
 
+# [(64-73)s:Hofbergli|i:hofbergli|_9#64-73|corenlp/ne_dewac_175m_600:I-LOC|corenlp/ne_hgc_175m_600:I-LOC]
 is($tokens->stream->pos(9)->to_string,
-   '[(64-73)s:Hofbergli|i:hofbergli|_9#64-73|corenlp/ne_dewac_175m_600:I-LOC|corenlp/ne_hgc_175m_600:I-LOC]',
+   '[(64-73)_9#64-73|corenlp/ne:I-LOC|i:hofbergli|s:Hofbergli]',
    'Correct NamedEntities annotation');
 
-
 # New instantiation
 ok($tokens = new_tokenizer->parse, 'Parse');
 
@@ -147,7 +148,7 @@
 ok($tokens->add('CoreNLP', 'Morpho'), 'Add CoreNLP/Morpho');
 
 is($tokens->stream->pos(0)->to_string,
-   '[(0-3)s:Zum|i:zum|_0#0-3|-:tokens$<i>18|corenlp/p:APPRART]',
+   '[(0-3)-:tokens$<i>18|_0#0-3|i:zum|s:Zum|corenlp/p:APPRART]',
    'Correct corenlp annotation');
 
 $i = 0;
@@ -161,10 +162,9 @@
 ok($tokens->add('CoreNLP', 'Sentences'), 'Add CoreNLP/Sentences');
 
 is($tokens->stream->pos(0)->to_string,
-   '[(0-3)s:Zum|i:zum|_0#0-3|-:tokens$<i>18|corenlp/p:APPRART|<>:corenlp/s:s#0-129$<i>17|-:corenlp/sentences$<i>1]',
+   '[(0-3)-:corenlp/sentences$<i>1|-:tokens$<i>18|_0#0-3|i:zum|s:Zum|corenlp/p:APPRART|<>:corenlp/s:s#0-129$<i>17]',
    'Correct corenlp annotation');
 
-
 # New instantiation
 ok($tokens = new_tokenizer->parse, 'New Tokenizer');
 
@@ -172,7 +172,7 @@
 ok($tokens->add('Connexor', 'Sentences'), 'Add Connexor/Sentences');
 
 is($tokens->stream->pos(0)->to_string,
-   '[(0-3)s:Zum|i:zum|_0#0-3|-:tokens$<i>18|<>:cnx/s:s#0-129$<i>17|-:cnx/sentences$<i>1]',
+   '[(0-3)-:cnx/sentences$<i>1|-:tokens$<i>18|_0#0-3|i:zum|s:Zum|<>:cnx/s:s#0-129$<i>17<b>0]',
    'Correct cnx annotation');
 
 # New instantiation
@@ -192,6 +192,7 @@
        'Annotation (Connexor/p) is correct: ' . $_);
 };
 
+
 $i = 0;
 foreach (qw/! ! ! ! IND:PRES ! ! ! ! Prop ! ! ! ! ! ! PCP:PERF IND:PRES/) {
   if ($_ eq '!') {
@@ -212,10 +213,10 @@
 # Add Connexor/Phrase
 ok($tokens->add('Connexor', 'Phrase'), 'Add Connexor/Phrase');
 my $stream = $tokens->stream;
-like($stream->pos(1)->to_string, qr!\|<>:cnx/c:np#4-30\$<i>4!, 'Annotation (Connexor/c) is correct');
-like($stream->pos(6)->to_string, qr!\|<>:cnx/c:np#40-47\$<i>7!, 'Annotation (Connexor/c) is correct');
-like($stream->pos(8)->to_string, qr!\|<>:cnx/c:np#52-73\$<i>10!, 'Annotation (Connexor/c) is correct');
-like($stream->pos(13)->to_string, qr!\|<>:cnx/c:np#89-111\$<i>16!, 'Annotation (Connexor/c) is correct');
+like($stream->pos(1)->to_string, qr!\|<>:cnx/c:np#4-30\$<i>4<b>0!, 'Annotation (Connexor/c) is correct');
+like($stream->pos(6)->to_string, qr!\|<>:cnx/c:np#40-47\$<i>7<b>0!, 'Annotation (Connexor/c) is correct');
+like($stream->pos(8)->to_string, qr!\|<>:cnx/c:np#52-73\$<i>10<b>0!, 'Annotation (Connexor/c) is correct');
+like($stream->pos(13)->to_string, qr!\|<>:cnx/c:np#89-111\$<i>16<b>0!, 'Annotation (Connexor/c) is correct');
 
 # New instantiation
 ok($tokens = new_tokenizer->parse, 'New Tokenizer');
@@ -242,7 +243,7 @@
 # Add XIP/Sentences
 ok($tokens->add('XIP', 'Sentences'), 'Add XIP/Sentences');
 
-is($tokens->stream->pos(0)->to_string, '[(0-3)s:Zum|i:zum|_0#0-3|-:tokens$<i>18|<>:xip/s:s#0-129$<i>17|-:xip/sentences$<i>1]', 'First sentence');
+is($tokens->stream->pos(0)->to_string, '[(0-3)-:tokens$<i>18|_0#0-3|i:zum|s:Zum|-:xip/sentences$<i>1|<>:xip/s:s#0-129$<i>17<b>0]', 'First sentence');
 
 # Add XIP/Morpho
 ok($tokens->add('XIP', 'Morpho'), 'Add XIP/Morpho');
commit	6d565078709daab2b979c5fefe2f8dfc5c6ab912	[log] [tgz]
author	Nils Diewald <nils@diewald-online.de>	Thu Oct 30 23:20:58 2014 +0000
committer	Nils Diewald <nils@diewald-online.de>	Thu Oct 30 23:20:58 2014 +0000
tree	f0c425d7a385e108e05b980a2a5db6e82dc46b0c
parent	32e30f07a3ddd06b4af06d74190de0699bfab5b8 [diff] [blame]