Test and benchmark conversion of inline annotations

Change-Id: I2eaabb35373b2a4c87c329a4a5254a5f347e989c
diff --git a/script/tei2korapxml b/script/tei2korapxml
index 66f36b3..d1bb176 100755
--- a/script/tei2korapxml
+++ b/script/tei2korapxml
@@ -109,7 +109,7 @@
 
 ## TODO: optional
 # handling inline annotations (inside $_TOKENS_TAG)
-my $_INLINE_ANNOT    = 0;                            # on/off: set to 1 if inline annotations are present and should be processed (default: 0)
+my $_INLINE_ANNOT    = $ENV{KORAPXMLTEI_INLINE}?1:0; # on/off: set to 1 if inline annotations are present and should be processed (default: 0)
 my $_INLINE_LEM_RD   = "lemma";                      # from which attribute to read LEMMA information
 my $_INLINE_ATT_RD   = "ana";                        # from which attribute to read POS information (and evtl. additional MSD - Morphosyntactic Descriptions)
                                                      # TODO: The format for the POS and MSD information has to suffice the regular expression ([^ ]+)( (.+))?
diff --git a/t/script.t b/t/script.t
index 31ff24b..6af3095 100644
--- a/t/script.t
+++ b/t/script.t
@@ -390,4 +390,74 @@
   );
 };
 
+
+subtest 'Check Inline annotations' => sub {
+
+  # Load example file
+  my $file = catfile($f, 'data', 'goe_sample_tagged.i5.xml');
+
+  my ($fh, $outzip) = korap_tempfile('script_tagged');
+
+  # Generate zip file (unportable!)
+  stderr_like(
+    sub { `cat '$file' | KORAPXMLTEI_INLINE=1 perl '$script' > '$outzip'` },
+    qr!tei2korapxml: .*? text_id=GOE_AGA\.00000!,
+    'Processing'
+  );
+
+  ok(-e $outzip, "File $outzip exists");
+
+  my $zip = IO::Uncompress::Unzip->new(
+    $outzip,
+    Name => 'GOE/AGA/00000/tokens/morpho.xml'
+  );
+  ok($zip, 'Inline annotations');
+
+  my $tokens;
+  $tokens .= $zip->getline while !$zip->eof;
+  ok($zip->close, 'Closed');
+
+  my $t = Test::XML::Loy->new($tokens);
+
+  $t->attr_is('layer', 'docid', 'GOE_AGA.00000')
+    ->attr_is('spanList span:nth-child(1)', 'id', 's0')
+    ->attr_is('spanList span:nth-child(1)', 'from', '75')
+    ->attr_is('spanList span:nth-child(1)', 'to', '81')
+    ->attr_is('spanList span:nth-child(1)', 'l', '7')
+
+    ->attr_is('span#s0 > fs', 'type', 'lex')
+    ->attr_is('span#s0 > fs', 'xmlns', 'http://www.tei-c.org/ns/1.0')
+    ->attr_is('span#s0 > fs > f > fs > f:nth-child(1)', 'name', 'pos')
+    ->text_is('span#s0 > fs > f > fs > f:nth-child(1)', 'A')
+    ->attr_is('span#s0 > fs > f > fs > f:nth-child(2)', 'name', 'msd')
+    ->text_is('span#s0 > fs > f > fs > f:nth-child(2)', '@NH')
+
+    ->attr_is('span#s25', 'from', '259')
+    ->attr_is('span#s25', 'to', '263')
+    ->attr_is('span#s25', 'l', '7')
+    ->attr_is('span#s25 > fs > f > fs > f:nth-child(1)', 'name', 'pos')
+    ->text_is('span#s25 > fs > f > fs > f:nth-child(1)', 'PRON')
+    ->attr_is('span#s25 > fs > f > fs > f:nth-child(2)', 'name', 'msd')
+    ->text_is('span#s25 > fs > f > fs > f:nth-child(2)', '@NH')
+
+    ->attr_is('span#s58', 'from', '495')
+    ->attr_is('span#s58', 'to', '500')
+    ->attr_is('span#s58', 'l', '7')
+    ->attr_is('span#s58 > fs > f > fs > f:nth-child(1)', 'name', 'pos')
+    ->text_is('span#s58 > fs > f > fs > f:nth-child(1)', 'N')
+    ->attr_is('span#s58 > fs > f > fs > f:nth-child(2)', 'name', 'msd')
+    ->text_is('span#s58 > fs > f > fs > f:nth-child(2)', '@NH')
+
+    ->attr_is('span#s119', 'from', '914')
+    ->attr_is('span#s119', 'to', '925')
+    ->attr_is('span#s119', 'l', '7')
+    ->attr_is('span#s119 > fs > f > fs > f:nth-child(1)', 'name', 'pos')
+    ->text_is('span#s119 > fs > f > fs > f:nth-child(1)', 'A')
+    ->attr_is('span#s119 > fs > f > fs > f:nth-child(2)', 'name', 'msd')
+    ->text_is('span#s119 > fs > f > fs > f:nth-child(2)', '@NH')
+    ->element_exists_not('span#s120')
+    ;
+};
+
+
 done_testing;
diff --git a/xt/benchmark.pl b/xt/benchmark.pl
index 163b85b..71dbe54 100644
--- a/xt/benchmark.pl
+++ b/xt/benchmark.pl
@@ -36,8 +36,9 @@
 my $f = dirname(__FILE__);
 my $script = rel2abs(catfile($f, '..', 'script', $SCRIPT_NAME));
 
-# Load example file
+# Load example files
 my $file = rel2abs(catfile($f, '..', 't', 'data', 'goe_sample.i5.xml'));
+my $goe_tagged = rel2abs(catfile($f, '..', 't', 'data', 'goe_sample_tagged.i5.xml'));
 
 # Create a new benchmark object
 my $bench = Dumbbench->new(
@@ -86,6 +87,12 @@
     }
   ),
   Dumbbench::Instance::PerlSub->new(
+    name => 'Conversion-with-inline-annotations',
+    code => sub {
+      `cat '$goe_tagged' | KORAPXMLTEI_INLINE=1 perl '$script' > /dev/null 2>&1`
+    }
+  ),
+  Dumbbench::Instance::PerlSub->new(
     name => 'delHTMLcom',
     code => sub {
       for (1..100_000) {