Fix leading ptis for a XIP
Change-Id: Ic0b8e2f31eceffc46d7c713439415e112e902d40
diff --git a/lib/KorAP/Index/CoreNLP/NamedEntities.pm b/lib/KorAP/Index/CoreNLP/NamedEntities.pm
index 942b5b6..5308f83 100644
--- a/lib/KorAP/Index/CoreNLP/NamedEntities.pm
+++ b/lib/KorAP/Index/CoreNLP/NamedEntities.pm
@@ -1,6 +1,10 @@
package KorAP::Index::CoreNLP::NamedEntities;
use KorAP::Index::Base;
+# Import named entities, potentially with a specified
+# Model. However - now all models are mapped to the 'ne'-Prefix
+# and are indistinguishable in annotations. However - if only one
+# model is used, the model is listed in the foundries.
sub parse {
my $self = shift;
my $model = shift;
diff --git a/lib/KorAP/Index/XIP/Constituency.pm b/lib/KorAP/Index/XIP/Constituency.pm
index d181afd..7e2853e 100644
--- a/lib/KorAP/Index/XIP/Constituency.pm
+++ b/lib/KorAP/Index/XIP/Constituency.pm
@@ -88,7 +88,8 @@
term => '<>:xip/c:' . $type,
o_start => $span->o_start,
o_end => $span->o_end,
- p_end => $span->p_end
+ p_end => $span->p_end,
+ pti => 64
);
# Only add level payload if node != root
diff --git a/lib/KorAP/Index/XIP/Morpho.pm b/lib/KorAP/Index/XIP/Morpho.pm
index 474bef0..2c82ba7 100644
--- a/lib/KorAP/Index/XIP/Morpho.pm
+++ b/lib/KorAP/Index/XIP/Morpho.pm
@@ -43,9 +43,8 @@
# Composites
my (@token) = split('#', $found);
- if (@token == 1) {
- next;
- };
+ next if @token == 1;
+
my $full = '';
foreach (@token) {
$full .= $_;
diff --git a/lib/KorAP/Index/XIP/Sentences.pm b/lib/KorAP/Index/XIP/Sentences.pm
index 9d61825..0273b39 100644
--- a/lib/KorAP/Index/XIP/Sentences.pm
+++ b/lib/KorAP/Index/XIP/Sentences.pm
@@ -19,6 +19,7 @@
o_start => $span->o_start,
o_end => $span->o_end,
p_end => $span->p_end,
+ pti => 64,
payload => '<b>0' # Could be 2 as well for t/p/s
);
$i++;
diff --git a/t/index/corenlp_ent.t b/t/index/corenlp_ent.t
new file mode 100644
index 0000000..87bf741
--- /dev/null
+++ b/t/index/corenlp_ent.t
@@ -0,0 +1,52 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use utf8;
+use Test::More;
+use Scalar::Util qw/weaken/;
+use Data::Dumper;
+
+use_ok('KorAP::Document');
+
+use File::Basename 'dirname';
+use File::Spec::Functions 'catdir';
+
+my $path = catdir(dirname(__FILE__), 'corpus', 'doc', 'text');
+
+ok(my $doc = KorAP::Document->new(
+ path => $path . '/'
+), 'Load Korap::Document');
+
+like($doc->path, qr!$path/$!, 'Path');
+ok($doc->parse, 'Parse document');
+
+ok($doc->primary->data, 'Primary data in existence');
+is($doc->primary->data_length, 129, 'Data length');
+
+use_ok('KorAP::Tokenizer');
+
+ok(my $tokens = KorAP::Tokenizer->new(
+ path => $doc->path,
+ doc => $doc,
+ foundry => 'OpenNLP',
+ layer => 'Tokens',
+ name => 'tokens'
+), 'New Tokenizer');
+
+ok($tokens->parse, 'Parse');
+
+ok($tokens->add('CoreNLP', 'NamedEntities', 'ne_dewac_175m_600'), 'Add Structure');
+
+my $data = $tokens->to_data->{data};
+
+like($data->{foundries}, qr!corenlp/namedentities!, 'data');
+like($data->{foundries}, qr!corenlp/namedentities/ne_dewac_175m_600!, 'data');
+like($data->{layerInfos}, qr!corenlp/ne=tokens!, 'layerInfos');
+is($data->{stream}->[0]->[0], '-:tokens$<i>18', 'Number of tokens');
+is($data->{stream}->[9]->[0], '_9$<i>64<i>73', 'Position of NE');
+is($data->{stream}->[9]->[1], 'corenlp/ne:I-LOC', 'Position of NE');
+is($data->{stream}->[9]->[2], 'i:hofbergli', 'Position of NE');
+
+done_testing;
+
+__END__
diff --git a/t/index/corenlp_sentences.t b/t/index/corenlp_sentences.t
index fc0965e..18c9e06 100644
--- a/t/index/corenlp_sentences.t
+++ b/t/index/corenlp_sentences.t
@@ -39,12 +39,13 @@
my $data = $tokens->to_data->{data};
+like($data->{foundries}, qr!corenlp/sentences!, 'data');
+is($data->{stream}->[0]->[0], '-:corenlp/sentences$<i>1', 'Number of paragraphs');
+is($data->{stream}->[0]->[1], '-:tokens$<i>18', 'Number of tokens');
+is($data->{stream}->[0]->[2], '<>:corenlp/s:s$<b>64<i>0<i>129<i>17<b>0', 'Text');
+is($data->{stream}->[0]->[3], '_0$<i>0<i>3', 'Position');
+is($data->{stream}->[-1]->[0], '_17$<i>124<i>128', 'Position');
+
done_testing;
__END__
-
-like($data->{foundries}, qr!corenlp/morpho!, 'data');
-like($data->{layerInfos}, qr!corenlp/p=tokens!, 'data');
-is($data->{stream}->[0]->[2], 'corenlp/p:APPRART', 'POS');
-is($data->{stream}->[1]->[1], 'corenlp/p:ADJ', 'POS');
-is($data->{stream}->[2]->[1], 'corenlp/p:ADJA', 'POS');
diff --git a/t/index/corpus/doc/text/xip/constituency.xml b/t/index/corpus/doc/text/xip/constituency.xml
new file mode 100644
index 0000000..a410fe6
--- /dev/null
+++ b/t/index/corpus/doc/text/xip/constituency.xml
@@ -0,0 +1,251 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<layer docid="Corpus_Doc.00001" version="KorAP-0.4" xmlns="http://ids-mannheim.de/ns/KorAP">
+ <spanList>
+ <span from="0" id="s2_n40" to="130">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">TOP</f>
+ </fs>
+ <rel label="dominates" target="s2_n60" />
+ </span>
+ <span from="0" id="s2_n60" to="130">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">MC</f>
+ </fs>
+ <rel label="dominates" target="s2_n57" />
+ <rel label="dominates" uri="morpho.xml#s2_n8" />
+ <rel label="dominates" target="s2_n50" />
+ <rel label="dominates" target="s2_n51" />
+ <rel label="dominates" target="s2_n54" />
+ <rel label="dominates" uri="morpho.xml#s2_n20" />
+ <rel label="dominates" target="s2_n59" />
+ </span>
+ <span from="0" id="s2_n57" to="30">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">PP</f>
+ </fs>
+ <rel label="dominates" uri="morpho.xml#s2_n0" />
+ <rel label="dominates" target="s2_n53" />
+ </span>
+ <span from="0" id="s2_n0" to="3">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">PREP</f>
+ </fs>
+ </span>
+ <span from="4" id="s2_n53" to="30">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">NP</f>
+ </fs>
+ <rel label="dominates" target="s2_n43" />
+ </span>
+ <span from="4" id="s2_n43" to="30">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">NPA</f>
+ </fs>
+ <rel label="dominates" target="s2_n41" />
+ <rel label="dominates" target="s2_n42" />
+ <rel label="dominates" uri="morpho.xml#s2_n6" />
+ </span>
+ <span from="4" id="s2_n41" to="11">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">AP</f>
+ </fs>
+ <rel label="dominates" uri="morpho.xml#s2_n2" />
+ </span>
+ <span from="4" id="s2_n2" to="11">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">ADJ</f>
+ </fs>
+ </span>
+ <span from="12" id="s2_n42" to="23">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">AP</f>
+ </fs>
+ <rel label="dominates" uri="morpho.xml#s2_n4" />
+ </span>
+ <span from="12" id="s2_n4" to="23">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">ADJ</f>
+ </fs>
+ </span>
+ <span from="24" id="s2_n6" to="30">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">NOUN</f>
+ </fs>
+ </span>
+ <span from="31" id="s2_n8" to="36">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">VERB</f>
+ </fs>
+ </span>
+ <span from="37" id="s2_n50" to="48">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">NP</f>
+ </fs>
+ <rel label="dominates" uri="morpho.xml#s2_n10" />
+ <rel label="dominates" target="s2_n44" />
+ </span>
+ <span from="37" id="s2_n10" to="40">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">DET</f>
+ </fs>
+ </span>
+ <span from="41" id="s2_n44" to="48">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">NPA</f>
+ </fs>
+ <rel label="dominates" uri="morpho.xml#s2_n12" />
+ </span>
+ <span from="41" id="s2_n12" to="48">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">NOUN</f>
+ </fs>
+ </span>
+ <span from="49" id="s2_n51" to="64">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">NP</f>
+ </fs>
+ <rel label="dominates" uri="morpho.xml#s2_n14" />
+ <rel label="dominates" target="s2_n45" />
+ </span>
+ <span from="49" id="s2_n14" to="52">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">DET</f>
+ </fs>
+ </span>
+ <span from="53" id="s2_n45" to="64">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">NPA</f>
+ </fs>
+ <rel label="dominates" uri="morpho.xml#s2_n16" />
+ </span>
+ <span from="53" id="s2_n16" to="64">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">NOUN</f>
+ </fs>
+ </span>
+ <span from="65" id="s2_n54" to="74">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">NP</f>
+ </fs>
+ <rel label="dominates" target="s2_n46" />
+ </span>
+ <span from="65" id="s2_n46" to="74">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">NPA</f>
+ </fs>
+ <rel label="dominates" uri="morpho.xml#s2_n18" />
+ </span>
+ <span from="65" id="s2_n18" to="74">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">NOUN</f>
+ </fs>
+ </span>
+ <span from="75" id="s2_n20" to="78">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">PTCL</f>
+ </fs>
+ </span>
+ <span from="78" id="s2_n59" to="130">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">INS</f>
+ </fs>
+ <rel label="dominates" uri="morpho.xml#s2_n22" />
+ <rel label="dominates" target="s2_n58" />
+ <rel label="dominates" uri="morpho.xml#s2_n38" />
+ </span>
+ <span from="78" id="s2_n22" to="79">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">PUNCT</f>
+ </fs>
+ </span>
+ <span from="80" id="s2_n58" to="129">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">SC</f>
+ </fs>
+ <rel label="dominates" uri="morpho.xml#s2_n24" />
+ <rel label="dominates" target="s2_n52" />
+ <rel label="dominates" target="s2_n55" />
+ <rel label="dominates" target="s2_n56" />
+ <rel label="dominates" uri="morpho.xml#s2_n34" />
+ <rel label="dominates" uri="morpho.xml#s2_n36" />
+ </span>
+ <span from="80" id="s2_n24" to="85">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">CONJ</f>
+ </fs>
+ </span>
+ <span from="86" id="s2_n52" to="97">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">NP</f>
+ </fs>
+ <rel label="dominates" uri="morpho.xml#s2_n26" />
+ <rel label="dominates" target="s2_n47" />
+ </span>
+ <span from="86" id="s2_n26" to="89">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">DET</f>
+ </fs>
+ </span>
+ <span from="90" id="s2_n47" to="97">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">NPA</f>
+ </fs>
+ <rel label="dominates" uri="morpho.xml#s2_n28" />
+ </span>
+ <span from="90" id="s2_n28" to="97">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">NOUN</f>
+ </fs>
+ </span>
+ <span from="98" id="s2_n55" to="102">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">NP</f>
+ </fs>
+ <rel label="dominates" target="s2_n48" />
+ </span>
+ <span from="98" id="s2_n48" to="102">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">NPA</f>
+ </fs>
+ <rel label="dominates" uri="morpho.xml#s2_n30" />
+ </span>
+ <span from="98" id="s2_n30" to="102">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">NOUN</f>
+ </fs>
+ </span>
+ <span from="103" id="s2_n56" to="112">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">NP</f>
+ </fs>
+ <rel label="dominates" target="s2_n49" />
+ </span>
+ <span from="103" id="s2_n49" to="112">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">NPA</f>
+ </fs>
+ <rel label="dominates" uri="morpho.xml#s2_n32" />
+ </span>
+ <span from="103" id="s2_n32" to="112">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">NOUN</f>
+ </fs>
+ </span>
+ <span from="113" id="s2_n34" to="124">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">VERB</f>
+ </fs>
+ </span>
+ <span from="125" id="s2_n36" to="129">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">VERB</f>
+ </fs>
+ </span>
+ <span from="129" id="s2_n38" to="130">
+ <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="const">PUNCT</f>
+ </fs>
+ </span>
+ </spanList>
+</layer>
diff --git a/t/index/corpus/doc/text/xip/dependency.xml b/t/index/corpus/doc/text/xip/dependency.xml
new file mode 100644
index 0000000..3ba0237
--- /dev/null
+++ b/t/index/corpus/doc/text/xip/dependency.xml
@@ -0,0 +1,85 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<layer docid="Corpus_Doc.00001" version="KorAP-0.4" xmlns="http://ids-mannheim.de/ns/KorAP">
+ <spanList>
+ <span from="0" id="s2_n57" to="30">
+ <rel label="VMOD">
+ <span from="31" to="36" />
+ </rel>
+ </span>
+ <span from="4" id="s2_n2" to="11">
+ <rel label="NMOD">
+ <span from="24" to="30" />
+ </rel>
+ </span>
+ <span from="12" id="s2_n4" to="23">
+ <rel label="NMOD">
+ <span from="24" to="30" />
+ </rel>
+ </span>
+ <span from="31" id="s2_n8" to="36">
+ <rel label="VMAIN" type="unary" />
+ </span>
+ <span from="37" id="s2_n10" to="40">
+ <rel label="DETERM">
+ <span from="41" to="48" />
+ </rel>
+ </span>
+ <span from="41" id="s2_n12" to="48">
+ <rel label="SUBJ">
+ <span from="31" to="36" />
+ </rel>
+ </span>
+ <span from="49" id="s2_n14" to="52">
+ <rel label="DETERM">
+ <span from="53" to="64" />
+ </rel>
+ </span>
+ <span from="53" id="s2_n16" to="64">
+ <rel label="NMOD">
+ <span from="41" to="48" />
+ </rel>
+ </span>
+ <span from="65" id="s2_n18" to="74">
+ <rel label="NMOD">
+ <span from="53" to="64" />
+ </rel>
+ </span>
+ <span from="75" id="s2_n20" to="78">
+ <rel label="VPREF">
+ <span from="31" to="36" />
+ </rel>
+ </span>
+ <span from="80" id="s2_n24" to="85">
+ <rel label="CONNECT">
+ <span from="113" to="124" />
+ </rel>
+ </span>
+ <span from="86" id="s2_n26" to="89">
+ <rel label="DETERM">
+ <span from="90" to="97" />
+ </rel>
+ </span>
+ <span from="90" id="s2_n28" to="97">
+ <rel label="SUBJ">
+ <span from="113" to="124" />
+ </rel>
+ </span>
+ <span from="98" id="s2_n30" to="102">
+ <rel label="OBJ">
+ <span from="113" to="124" />
+ </rel>
+ </span>
+ <span from="103" id="s2_n32" to="112">
+ <rel label="OBJ">
+ <span from="113" to="124" />
+ </rel>
+ </span>
+ <span from="113" id="s2_n34" to="124">
+ <rel label="AUXIL">
+ <span from="125" to="129" />
+ </rel>
+ <rel label="VMAIN" type="unary" />
+ </span>
+ </spanList>
+</layer>
diff --git a/t/index/corpus/doc/text/xip/morpho.xml b/t/index/corpus/doc/text/xip/morpho.xml
new file mode 100644
index 0000000..7c10928
--- /dev/null
+++ b/t/index/corpus/doc/text/xip/morpho.xml
@@ -0,0 +1,206 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<layer docid="Corpus_Doc.00001" version="KorAP-0.4" xmlns="http://ids-mannheim.de/ns/KorAP">
+ <spanList>
+ <span from="0" id="s2_n0" to="3">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">zu</f>
+ <f name="pos">PREP</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span from="4" id="s2_n2" to="11">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">letzt</f>
+ <f name="pos">ADJ</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span from="12" id="s2_n4" to="23">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">kulturell</f>
+ <f name="pos">ADJ</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span from="24" id="s2_n6" to="30">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Anlass</f>
+ <f name="pos">NOUN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span from="31" id="s2_n8" to="36">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">=laden</f>
+ <f name="pos">VERB</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span from="37" id="s2_n10" to="40">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">die</f>
+ <f name="pos">DET</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span from="41" id="s2_n12" to="48">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Leitung</f>
+ <f name="pos">NOUN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span from="49" id="s2_n14" to="52">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">der</f>
+ <f name="pos">DET</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span from="53" id="s2_n16" to="64">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">schulen#Heim</f>
+ <f name="pos">NOUN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span from="65" id="s2_n18" to="74">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Hofbergli</f>
+ <f name="pos">NOUN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span from="75" id="s2_n20" to="78">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">ein</f>
+ <f name="pos">PTCL</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span from="78" id="s2_n22" to="79">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">,</f>
+ <f name="pos">PUNCT</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span from="80" id="s2_n24" to="85">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">bevor</f>
+ <f name="pos">CONJ</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span from="86" id="s2_n26" to="89">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">der</f>
+ <f name="pos">DET</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span from="90" id="s2_n28" to="97">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Betrieb</f>
+ <f name="pos">NOUN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span from="98" id="s2_n30" to="102">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Ende</f>
+ <f name="pos">NOUN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span from="103" id="s2_n32" to="112">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">schulen#Jahr</f>
+ <f name="pos">NOUN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span from="113" id="s2_n34" to="124">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">ein=stellen</f>
+ <f name="pos">VERB</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span from="125" id="s2_n36" to="129">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">werden</f>
+ <f name="pos">VERB</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span from="129" id="s2_n38" to="130">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">.</f>
+ <f name="pos">PUNCT</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ </spanList>
+</layer>
diff --git a/t/index/corpus/doc/text/xip/sentences.xml b/t/index/corpus/doc/text/xip/sentences.xml
new file mode 100644
index 0000000..0965bc6
--- /dev/null
+++ b/t/index/corpus/doc/text/xip/sentences.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<layer docid="Corpus_Doc.00001" version="KorAP-0.4" xmlns="http://ids-mannheim.de/ns/KorAP">
+ <spanList>
+ <span from="0" to="130" />
+ </spanList>
+</layer>
diff --git a/t/index/xip_constituency.t b/t/index/xip_constituency.t
new file mode 100644
index 0000000..7197bea
--- /dev/null
+++ b/t/index/xip_constituency.t
@@ -0,0 +1,58 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use utf8;
+use Test::More;
+use Scalar::Util qw/weaken/;
+use Data::Dumper;
+
+use_ok('KorAP::Document');
+
+use File::Basename 'dirname';
+use File::Spec::Functions 'catdir';
+
+my $path = catdir(dirname(__FILE__), 'corpus', 'doc', 'text');
+
+ok(my $doc = KorAP::Document->new(
+ path => $path . '/'
+), 'Load Korap::Document');
+
+like($doc->path, qr!$path/$!, 'Path');
+ok($doc->parse, 'Parse document');
+
+ok($doc->primary->data, 'Primary data in existence');
+is($doc->primary->data_length, 129, 'Data length');
+
+use_ok('KorAP::Tokenizer');
+
+ok(my $tokens = KorAP::Tokenizer->new(
+ path => $doc->path,
+ doc => $doc,
+ foundry => 'OpenNLP',
+ layer => 'Tokens',
+ name => 'tokens'
+), 'New Tokenizer');
+
+ok($tokens->parse, 'Parse');
+
+ok($tokens->add('XIP', 'Constituency'), 'Add Structure');
+
+my $data = $tokens->to_data->{data};
+like($data->{foundries}, qr!xip/constituency!, 'data');
+like($data->{layerInfos}, qr!xip/c=spans!, 'data');
+
+# The length includes the punct - but that doesn't matter
+is($data->{stream}->[0]->[1], '<>:xip/c:PREP$<b>64<i>0<i>3<i>1<b>3', 'Prep phrase');
+is($data->{stream}->[0]->[2], '<>:xip/c:PP$<b>64<i>0<i>30<i>4<b>2', 'pp phrase');
+is($data->{stream}->[0]->[3], '<>:xip/c:TOP$<b>64<i>0<i>129<i>17<b>0', 'top phrase');
+is($data->{stream}->[0]->[4], '<>:xip/c:MC$<b>64<i>0<i>129<i>17<b>1', 'mc phrase');
+
+is($data->{stream}->[-1]->[0], '<>:xip/c:VERB$<b>64<i>124<i>128<i>18<b>4', 'Noun phrase');
+
+done_testing;
+
+__END__
+
+
+
+
diff --git a/t/index/xip_morpho.t b/t/index/xip_morpho.t
new file mode 100644
index 0000000..4cf37b7
--- /dev/null
+++ b/t/index/xip_morpho.t
@@ -0,0 +1,61 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use utf8;
+use Test::More;
+use Scalar::Util qw/weaken/;
+use Data::Dumper;
+
+use_ok('KorAP::Document');
+
+use File::Basename 'dirname';
+use File::Spec::Functions 'catdir';
+
+my $path = catdir(dirname(__FILE__), 'corpus', 'doc', 'text');
+
+ok(my $doc = KorAP::Document->new(
+ path => $path . '/'
+), 'Load Korap::Document');
+
+like($doc->path, qr!$path/$!, 'Path');
+ok($doc->parse, 'Parse document');
+
+ok($doc->primary->data, 'Primary data in existence');
+is($doc->primary->data_length, 129, 'Data length');
+
+use_ok('KorAP::Tokenizer');
+
+ok(my $tokens = KorAP::Tokenizer->new(
+ path => $doc->path,
+ doc => $doc,
+ foundry => 'OpenNLP',
+ layer => 'Tokens',
+ name => 'tokens'
+), 'New Tokenizer');
+
+ok($tokens->parse, 'Parse');
+
+ok($tokens->add('XIP', 'Morpho'), 'Add Structure');
+
+my $data = $tokens->to_data->{data};
+
+like($data->{foundries}, qr!xip/morpho!, 'data');
+like($data->{layerInfos}, qr!xip/l=tokens!, 'data');
+like($data->{layerInfos}, qr!xip/p=tokens!, 'data');
+is($data->{stream}->[0]->[4], 'xip/l:zu', 'Lemma');
+is($data->{stream}->[0]->[5], 'xip/p:PREP', 'POS');
+
+is($data->{stream}->[1]->[3], 'xip/l:letzt', 'Lemma');
+is($data->{stream}->[1]->[4], 'xip/p:ADJ', 'POS');
+
+is($data->{stream}->[8]->[3], 'xip/l:\#Heim', 'Lemma (part)');
+is($data->{stream}->[8]->[4], 'xip/l:\#schulen', 'Lemma (part)');
+is($data->{stream}->[8]->[5], 'xip/l:schulen\#Heim', 'Lemma (part)');
+
+is($data->{stream}->[-1]->[3], 'xip/l:werden', 'Lemma');
+is($data->{stream}->[-1]->[4], 'xip/p:VERB', 'POS');
+
+done_testing;
+
+__END__
+
diff --git a/t/index/xip_sentences.t b/t/index/xip_sentences.t
new file mode 100644
index 0000000..1fa9ee6
--- /dev/null
+++ b/t/index/xip_sentences.t
@@ -0,0 +1,51 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use utf8;
+use Test::More;
+use Scalar::Util qw/weaken/;
+use Data::Dumper;
+
+use_ok('KorAP::Document');
+
+use File::Basename 'dirname';
+use File::Spec::Functions 'catdir';
+
+my $path = catdir(dirname(__FILE__), 'corpus', 'doc', 'text');
+
+ok(my $doc = KorAP::Document->new(
+ path => $path . '/'
+), 'Load Korap::Document');
+
+like($doc->path, qr!$path/$!, 'Path');
+ok($doc->parse, 'Parse document');
+
+ok($doc->primary->data, 'Primary data in existence');
+is($doc->primary->data_length, 129, 'Data length');
+
+use_ok('KorAP::Tokenizer');
+
+ok(my $tokens = KorAP::Tokenizer->new(
+ path => $doc->path,
+ doc => $doc,
+ foundry => 'OpenNLP',
+ layer => 'Tokens',
+ name => 'tokens'
+), 'New Tokenizer');
+
+ok($tokens->parse, 'Parse');
+
+ok($tokens->add('XIP', 'Sentences'), 'Add Structure');
+
+my $data = $tokens->to_data->{data};
+
+like($data->{foundries}, qr!xip/sentences!, 'data');
+
+is($data->{stream}->[0]->[1], '-:xip/sentences$<i>1', 'Number of paragraphs');
+is($data->{stream}->[0]->[0], '-:tokens$<i>18', 'Number of tokens');
+is($data->{stream}->[0]->[2], '<>:xip/s:s$<b>64<i>0<i>129<i>17<b>0', 'Text');
+is($data->{stream}->[0]->[3], '_0$<i>0<i>3', 'Position');
+is($data->{stream}->[-1]->[0], '_17$<i>124<i>128', 'Position');
+
+done_testing;
+__END__