Minor speedup in tokenization by merging array pushes
Change-Id: I138226acb2648cf606563c57b3783f011bab7795
diff --git a/t/tokenization.t b/t/tokenization.t
index 3a89b18..a8f8935 100644
--- a/t/tokenization.t
+++ b/t/tokenization.t
@@ -2,6 +2,7 @@
use warnings;
use Test::More;
use File::Basename 'dirname';
+use Data::Dumper;
use File::Spec::Functions qw/catfile/;
use File::Temp 'tempfile';
@@ -14,12 +15,39 @@
# Test aggressive
my $aggr = KorAP::XML::TEI::Tokenization::aggressive("Der alte Mann");
-is_deeply($aggr, [0,3,4,8,9, 13]);
+is_deeply($aggr, [0,3,4,8,9,13]);
+
+$aggr = KorAP::XML::TEI::Tokenization::aggressive("Der alte bzw. der grau-melierte Mann");
+is_deeply($aggr, [0,3,4,8,9,12,12,13,14,17,18,22,22,23,23,31,32,36]);
# Test conservative
my $cons = KorAP::XML::TEI::Tokenization::conservative("Der alte Mann");
is_deeply($cons, [0,3,4,8,9,13]);
+$cons = KorAP::XML::TEI::Tokenization::conservative("Der alte bzw. der grau-melierte Mann");
+is_deeply($cons, [0,3,4,8,9,12,12,13,14,17,18,31,32,36]);
+
+$cons = KorAP::XML::TEI::Tokenization::conservative(". Der");
+is_deeply($cons, [0,1,2,5]);
+
+$cons = KorAP::XML::TEI::Tokenization::conservative(" . Der");
+is_deeply($cons, [1,2,3,6]);
+
+$cons = KorAP::XML::TEI::Tokenization::conservative(" . Der");
+is_deeply($cons, [3,4,5,8]);
+
+$cons = KorAP::XML::TEI::Tokenization::conservative("... Der");
+is_deeply($cons, [0,1,1,2,2,3,4,7]);
+
+$cons = KorAP::XML::TEI::Tokenization::conservative(".Der");
+is_deeply($cons, [1,4]);
+
+$cons = KorAP::XML::TEI::Tokenization::conservative(".Der.... ");
+is_deeply($cons, [1,4,4,5,5,6,6,7,7,8]);
+
+$cons = KorAP::XML::TEI::Tokenization::conservative("..Der.... ");
+is_deeply($cons, [0,1,1,2,2,5,5,6,6,7,7,8,8,9]);
+
# Test data
my $dataf = catfile(dirname(__FILE__), 'data', 'wikipedia.txt');
my $data = '';