Establish collection object for token annotations

Change-Id: I03f9ff1f28301135b24dc111b7ef85c3af86a8e6
diff --git a/t/script.t b/t/script.t
index 6af3095..6ceadd9 100644
--- a/t/script.t
+++ b/t/script.t
@@ -459,5 +459,41 @@
     ;
 };
 
+subtest 'Check Inline annotations with untagged file' => sub {
+
+  # Load example file
+  my $file = catfile($f, 'data', 'goe_sample.i5.xml');
+
+  my ($fh, $outzip) = korap_tempfile('script_untagged');
+
+  # Generate zip file (unportable!)
+  stderr_like(
+    sub { `cat '$file' | KORAPXMLTEI_INLINE=1 perl '$script' > '$outzip'` },
+    qr!tei2korapxml: .*? text_id=GOE_AGA\.00000!,
+    'Processing 1'
+  );
+
+  # TODO: there should be a better way to test this
+  stderr_unlike(
+    sub { `cat '$file' | KORAPXMLTEI_INLINE=1 perl '$script' > '$outzip'` },
+    qr!.*undefined value.*!,
+    'Processing 2'
+  );
+  #
+
+  ok(-e $outzip, "File $outzip exists");
+
+  my $zip = IO::Uncompress::Unzip->new(
+    $outzip,
+    Name => 'GOE/AGA/00000/tokens/morpho.xml'
+  );
+  ok((not $zip), 'missing morpho.xml');
+
+  $zip = IO::Uncompress::Unzip->new(
+    $outzip,
+    Name => 'GOE/AGA/00000/struct/structure.xml'
+  );
+  ok($zip, 'found structure.xml');
+};
 
 done_testing;
diff --git a/t/token.t b/t/token.t
new file mode 100644
index 0000000..85bab65
--- /dev/null
+++ b/t/token.t
@@ -0,0 +1,83 @@
+use strict;
+use warnings;
+use Test::More;
+use Test::XML::Loy;
+
+use FindBin;
+BEGIN {
+  unshift @INC, "$FindBin::Bin/../lib";
+};
+
+use_ok('KorAP::XML::TEI::Tokenizer::Token');
+
+subtest 'Initialization' => sub {
+  my $t = KorAP::XML::TEI::Tokenizer::Token->new;
+
+  ok(!defined($t->from), 'Undefined from');
+  ok(!defined($t->to), 'Undefined to');
+  ok(!defined($t->level), 'Undefined level');
+
+  $t->add_attribute('foo' => 'bar');
+  $t->add_attribute('x' => 'y');
+  $t->set_from(7);
+  $t->set_to(5);
+  $t->set_from(4);
+
+  my $loy = Test::XML::Loy->new($t->to_string(3));
+
+  $loy->attr_is('span', 'id', 's3')
+    ->attr_is('span', 'from', 4)
+    ->attr_is('span', 'to', 5)
+    ->attr_is('span fs f', 'name', 'lex')
+    ->attr_is('span fs f fs f:nth-of-type(1)', 'name', 'foo')
+    ->text_is('span fs f fs f:nth-of-type(1)', 'bar')
+    ->attr_is('span fs f fs f:nth-of-type(2)', 'name', 'x')
+    ->text_is('span fs f fs f:nth-of-type(2)', 'y')
+    ;
+
+  is($t->from,4);
+  is($t->to,5);
+  is($t->level,undef);
+  $t->set_level(19);
+  is($t->level,19);
+
+  $loy = Test::XML::Loy->new($t->to_string(3));
+
+  $loy->attr_is('span', 'id', 's3')
+    ->attr_is('span', 'from', 4)
+    ->attr_is('span', 'to', 5)
+    ->attr_is('span', 'l', 19)
+    ;
+};
+
+
+subtest 'Test inline annotations' => sub {
+  my $t = KorAP::XML::TEI::Tokenizer::Token->new('x1', 0, 6);
+  $t->add_attribute('ana' => 'DET @PREMOD');
+  $t->add_attribute('lemma' => 'C & A');
+
+  my $loy = Test::XML::Loy->new($t->to_string(1));
+
+  $loy->attr_is('span', 'id', 's1')
+    ->attr_is('span', 'to', 6)
+    ->attr_is('span > fs > f > fs f:nth-of-type(1)', 'name', 'ana')
+    ->text_is('span > fs > f > fs f:nth-of-type(1)', 'DET @PREMOD')
+    ->attr_is('span > fs > f > fs f:nth-of-type(2)', 'name', 'lemma')
+    ->text_is('span > fs > f > fs f:nth-of-type(2)', 'C & A')
+    ;
+
+  $loy = Test::XML::Loy->new($t->to_string_with_inline_annotations(1));
+
+  $loy->attr_is('span', 'id', 's1')
+    ->attr_is('span', 'to', 6)
+    ->attr_is('span > fs > f > fs f:nth-of-type(1)', 'name', 'pos')
+    ->text_is('span > fs > f > fs f:nth-of-type(1)', 'DET')
+    ->attr_is('span > fs > f > fs f:nth-of-type(2)', 'name', 'msd')
+    ->text_is('span > fs > f > fs f:nth-of-type(2)', '@PREMOD')
+    ->attr_is('span > fs > f > fs f:nth-of-type(3)', 'name', 'lemma')
+    ->text_is('span > fs > f > fs f:nth-of-type(3)', 'C & A')
+};
+
+
+done_testing;
+
diff --git a/t/tokenization-collect.t b/t/tokenization-collect.t
new file mode 100644
index 0000000..952c4b7
--- /dev/null
+++ b/t/tokenization-collect.t
@@ -0,0 +1,46 @@
+use strict;
+use warnings;
+use Test::More;
+use Test::XML::Loy;
+
+use FindBin;
+BEGIN {
+  unshift @INC, "$FindBin::Bin/../lib";
+};
+
+use_ok('KorAP::XML::TEI::Tokenizer::Collector');
+
+my $t = KorAP::XML::TEI::Tokenizer::Collector->new;
+
+$t->add_token('x1',0,8);
+my $token = $t->add_token('x2',9,14,2);
+$t->add_token('x3',15,20);
+
+my $loy = Test::XML::Loy->new($token->to_string(2));
+
+$loy->attr_is('span', 'id', 's2')
+  ->attr_is('span', 'from', 9)
+  ->attr_is('span', 'to', 14)
+  ->attr_is('span', 'l', 2)
+  ->attr_is('span fs f', 'name', 'lex')
+  ;
+
+$loy = Test::XML::Loy->new($t->last_token->to_string(3));
+
+$loy->attr_is('span', 'id', 's3')
+  ->attr_is('span', 'from', 15)
+  ->attr_is('span', 'to', 20)
+  ->attr_is('span fs f', 'name', 'lex')
+;
+
+$loy = Test::XML::Loy->new($t->to_string('text', 0))
+  ->attr_is('layer', 'docid', 'text')
+  ->attr_is('span#s0', 'to', '8')
+  ->attr_is('span#s1', 'to', '14')
+  ->attr_is('span#s1', 'l', '2')
+  ->attr_is('span#s2', 'to', '20')
+;
+
+
+done_testing;
+
diff --git a/t/tokenization.t b/t/tokenization.t
index 92b7cc3..9d986a0 100644
--- a/t/tokenization.t
+++ b/t/tokenization.t
@@ -18,10 +18,15 @@
 
 # Test aggressive
 my $aggr = KorAP::XML::TEI::Tokenizer::Aggressive->new;
+ok($aggr->empty, 'Empty');
 $aggr->tokenize("Der alte Mann");
+ok(!$aggr->empty, 'Not empty');
 is_deeply($aggr, [0,3,4,8,9,13]);
 
-$aggr->reset->tokenize("Der alte bzw. der grau-melierte Mann");
+$aggr->reset;
+ok($aggr->empty, 'Empty');
+
+$aggr->tokenize("Der alte bzw. der grau-melierte Mann");
 is_deeply($aggr, [0,3,4,8,9,12,12,13,14,17,18,22,22,23,23,31,32,36]);
 
 like(