Introduce exclusivity for inline token handling

Change-Id: Ia34ec87b2c55aabd94c65ec1e0d63d0cadb27d75
diff --git a/Changes b/Changes
index 992c8f8..eaa432b 100644
--- a/Changes
+++ b/Changes
@@ -12,6 +12,7 @@
         - Improve script handling of broken data
         - Improve handling of unknown header types
         - Check for valid sigles to avoid broken directories
+        - Introduce exclusivity for inline tokens handling.
 
 1.00 2021-02-18 Release
         - -s option added that uses sentence boundaries
diff --git a/Readme.pod b/Readme.pod
index 5c976c4..79180c4 100644
--- a/Readme.pod
+++ b/Readme.pod
@@ -141,6 +141,17 @@
 this will contain annotations as well.
 Defaults to C<tokens> and C<morpho>.
 
+The inline token data will also be stored in the
+inline structures file (see I<--inline-structures>),
+unless the inline token foundry is prepended
+by an B<!> exclamation mark, indicating that inline
+tokens are stored exclusively in the inline tokens
+file.
+
+Example:
+
+  tei2korapxml --inline-tokens '!gingko#morpho' < data.i5.xml > korapxml.zip
+
 =item B<--inline-structures> <foundry>#[<file>]
 
 Define the foundry and file (without extension)
diff --git a/lib/KorAP/XML/TEI/Inline.pm b/lib/KorAP/XML/TEI/Inline.pm
index 5446deb..fd83062 100644
--- a/lib/KorAP/XML/TEI/Inline.pm
+++ b/lib/KorAP/XML/TEI/Inline.pm
@@ -37,13 +37,14 @@
   TOKENS             => 5,
   STRUCTURES         => 6,
   SKIP_INLINE_TAGS   => 7,
-  SKIP_INLINE_TOKENS => 8
+  SKIP_INLINE_TOKENS => 8,
+  INLINE_TOKENS_EXCLUSIVE => 9
 };
 
 
 # Constructor
 sub new {
-  my ($class, $skip_inline_tokens, $skip_inline_tags) = @_;
+  my ($class, $skip_inline_tokens, $skip_inline_tags, $inline_tokens_exclusive) = @_;
 
   my @self = ();
 
@@ -67,9 +68,10 @@
   $self[TOKENS] = KorAP::XML::TEI::Annotations::Collector->new;
 
   # Initialize structure collector
-  $self[STRUCTURES]         = KorAP::XML::TEI::Annotations::Collector->new;
-  $self[SKIP_INLINE_TOKENS] = $skip_inline_tokens // undef;
-  $self[SKIP_INLINE_TAGS]   = $skip_inline_tags   // {};
+  $self[STRUCTURES]              = KorAP::XML::TEI::Annotations::Collector->new;
+  $self[SKIP_INLINE_TOKENS]      = $skip_inline_tokens // undef;
+  $self[INLINE_TOKENS_EXCLUSIVE] = $inline_tokens_exclusive // 0;
+  $self[SKIP_INLINE_TAGS]        = $skip_inline_tags   // {};
 
   bless \@self, $class;
 };
@@ -137,12 +139,26 @@
         next;
       };
 
-      my $anno = $self->[STRUCTURES]->add_new_annotation($node_info);
+      my $anno = KorAP::XML::TEI::Annotations::Annotation->new($node_info);
 
-      # Add element also to token list
-      if (!$self->[SKIP_INLINE_TOKENS] && $node_info eq $_TOKENS_TAG) {
-        $self->[TOKENS]->add_annotation($anno);
-      };
+      # Is token tag
+      if ($node_info eq $_TOKENS_TAG) {
+
+        # Do not add tokens to the structure file
+        unless ($self->[INLINE_TOKENS_EXCLUSIVE]) {
+          $self->[STRUCTURES]->add_annotation($anno);
+        }
+
+        # Add tokens to the token list
+        if (!$self->[SKIP_INLINE_TOKENS]) {
+          $self->[TOKENS]->add_annotation($anno);
+        };
+      }
+
+      # Not token tag
+      else {
+        $self->[STRUCTURES]->add_annotation($anno);
+      }
 
       # Handle attributes (if attributes exist)
       if (defined $e->[3]) {
diff --git a/script/tei2korapxml b/script/tei2korapxml
index 10a2787..0d196a5 100755
--- a/script/tei2korapxml
+++ b/script/tei2korapxml
@@ -42,6 +42,9 @@
   warn 'KORAPXMLTEI_INLINE is deprecated in favor of --skip-inline-token-annotations';
 };
 
+# Inline tokens won't be stored in the structure file
+my $inline_tokens_exclusive = 0;
+
 # Parse options from the command line
 GetOptions(
   'root|r=s'              => \(my $root_dir    = '.'),
@@ -136,6 +139,11 @@
 # i.e. tokens of the $_TOKENS_TAG, if $_TOKENS_PROC is set
 my ($_tokens_dir, $_tokens_file) = split '#', $inline_tokens . '#morpho';
 
+if (index($_tokens_dir, '!') == 0) {
+  $_tokens_dir = substr($_tokens_dir, 1);
+  $inline_tokens_exclusive = 1;
+};
+
 # Initialize zipper
 my $zipper = KorAP::XML::TEI::Zipper->new($root_dir);
 
@@ -168,7 +176,8 @@
 # Create inline parser object
 my $inline = KorAP::XML::TEI::Inline->new(
   $skip_inline_tokens,
-  \%skip_inline_tags
+  \%skip_inline_tags,
+  $inline_tokens_exclusive
 );
 
 
@@ -532,6 +541,17 @@
 this will contain annotations as well.
 Defaults to C<tokens> and C<morpho>.
 
+The inline token data will also be stored in the
+inline structures file (see I<--inline-structures>),
+unless the inline token foundry is prepended
+by an B<!> exclamation mark, indicating that inline
+tokens are stored exclusively in the inline tokens
+file.
+
+Example:
+
+  tei2korapxml --inline-tokens '!gingko#morpho' < data.i5.xml > korapxml.zip
+
 =item B<--inline-structures> <foundry>#[<file>]
 
 Define the foundry and file (without extension)
diff --git a/t/inline.t b/t/inline.t
index d5a1db2..76ff74d 100644
--- a/t/inline.t
+++ b/t/inline.t
@@ -250,4 +250,27 @@
       ;
 };
 
+
+subtest 'Treatment of tokens' => sub {
+  my $inline = KorAP::XML::TEI::Inline->new(0, {b => 1}, 1);
+
+  ok($inline->parse('aaa', \'<a>Der</a> <b>alte</b> <w pos="NN">Baum</w>'), 'Parsed');
+  is($inline->data->data, 'Der alte Baum');
+
+  # Only contains '<a>'
+  Test::XML::Loy->new($inline->structures->to_string('aaa', 1))
+      ->attr_is('#s1', 'to', 3)
+      ->element_exists_not('#s2')
+      ;
+
+  # Only contains 'w'
+  Test::XML::Loy->new($inline->tokens->to_string('aaa', 1))
+      ->attr_is('#s0', 'from', 9)
+      ->attr_is('#s0', 'to', 13)
+      ->attr_is('#s0 > fs > f > fs > f', 'name', 'pos')
+      ->text_is('#s0 > fs > f > fs > f[name=pos]', 'NN')
+      ->element_exists_not('#s1')
+      ;
+};
+
 done_testing;