Added tests for pti
Change-Id: I731161cfd2b52fa867eb8a59df3d5fc794f201db
diff --git a/lib/KorAP/Index/Connexor/Phrase.pm b/lib/KorAP/Index/Connexor/Phrase.pm
index 00a1b0d..309a52b 100644
--- a/lib/KorAP/Index/Connexor/Phrase.pm
+++ b/lib/KorAP/Index/Connexor/Phrase.pm
@@ -23,6 +23,7 @@
o_start => $span->o_start,
o_end => $span->o_end,
p_end => $span->p_end,
+ pti => 64,
payload => '<b>0' # Pseudo-depth
);
};
@@ -33,7 +34,7 @@
};
sub layer_info {
- ['cnx/c=spans'];
+ ['cnx/c=spans'];
};
diff --git a/lib/KorAP/Index/Connexor/Sentences.pm b/lib/KorAP/Index/Connexor/Sentences.pm
index 2d7d7ca..09246db 100644
--- a/lib/KorAP/Index/Connexor/Sentences.pm
+++ b/lib/KorAP/Index/Connexor/Sentences.pm
@@ -16,7 +16,8 @@
o_start => $span->o_start,
o_end => $span->o_end,
p_end => $span->p_end,
- payload => '<b>0' # Could be 2 as well for t/p/s
+ pti => 64,
+ payload => '<b>0'
);
$i++;
}
@@ -29,7 +30,7 @@
sub layer_info {
- ['cnx/s=spans'];
+ ['cnx/s=spans'];
};
1;
diff --git a/lib/KorAP/Index/Connexor/Syntax.pm b/lib/KorAP/Index/Connexor/Syntax.pm
index c65c4f5..d27801f 100644
--- a/lib/KorAP/Index/Connexor/Syntax.pm
+++ b/lib/KorAP/Index/Connexor/Syntax.pm
@@ -13,7 +13,6 @@
my $found;
my $spans = $token->hash->{fs}->{f}->{fs}->{f};
-
# syntax
foreach (@$spans) {
if (($_->{-name} eq 'pos') && ($found = $_->{'#text'})) {
@@ -28,7 +27,7 @@
};
sub layer_info {
- ['cnx/syn=tokens'];
+ ['cnx/syn=tokens'];
};
1;
diff --git a/lib/KorAP/Index/CoreNLP/Constituency.pm b/lib/KorAP/Index/CoreNLP/Constituency.pm
index af93c86..2e21565 100644
--- a/lib/KorAP/Index/CoreNLP/Constituency.pm
+++ b/lib/KorAP/Index/CoreNLP/Constituency.pm
@@ -30,12 +30,10 @@
foreach (@$rel) {
if ($_->{-label} eq 'dominates') {
if ($_->{-target}) {
-# warn $_->{-target} . ' is no root';
$corenlp_const_noroot->insert($_->{-target});
}
elsif (my $uri = $_->{-uri}) {
$uri =~ s/^morpho\.xml#//;
-# warn $uri . ' is no root';
$corenlp_const_noroot->insert($uri);
};
};
@@ -63,7 +61,8 @@
term => '<>:corenlp/c:' . $type,
o_start => $span->o_start,
o_end => $span->o_end,
- p_end => $span->p_end
+ p_end => $span->p_end,
+ pti => 64
);
$term{payload} = '<b>' . ($level // 0);
@@ -98,8 +97,10 @@
return 1;
};
+
sub layer_info {
- ['corenlp/c=spans']
-}
+ ['corenlp/c=spans']
+};
+
1;
diff --git a/lib/KorAP/Index/CoreNLP/Sentences.pm b/lib/KorAP/Index/CoreNLP/Sentences.pm
index 5dad896..abe97d6 100644
--- a/lib/KorAP/Index/CoreNLP/Sentences.pm
+++ b/lib/KorAP/Index/CoreNLP/Sentences.pm
@@ -16,6 +16,7 @@
o_start => $span->o_start,
o_end => $span->o_end,
p_end => $span->p_end,
+ pti => 64,
payload => '<b>0' # Could also be 2 for t/p/s
);
$i++;
diff --git a/lib/KorAP/Index/OpenNLP/Sentences.pm b/lib/KorAP/Index/OpenNLP/Sentences.pm
index a471dfb..7f1d8d5 100644
--- a/lib/KorAP/Index/OpenNLP/Sentences.pm
+++ b/lib/KorAP/Index/OpenNLP/Sentences.pm
@@ -7,7 +7,7 @@
$$self->add_spandata(
foundry => 'opennlp',
- layer => 'struct',
+ layer => 'sentences',
cb => sub {
my ($stream, $span) = @_;
my $mtt = $stream->pos($span->p_start);
@@ -16,7 +16,8 @@
o_start => $span->o_start,
o_end => $span->o_end,
p_end => $span->p_end,
- payload => '<b>0' # t/p/s -> could be 2 as well
+ pti => 64,
+ payload => '<b>0'
);
$i++;
}
diff --git a/t/index/connexor_morpho.t b/t/index/connexor_morpho.t
new file mode 100644
index 0000000..e0e0d76
--- /dev/null
+++ b/t/index/connexor_morpho.t
@@ -0,0 +1,55 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use utf8;
+use Test::More;
+use Scalar::Util qw/weaken/;
+use Data::Dumper;
+
+use_ok('KorAP::Document');
+
+use File::Basename 'dirname';
+use File::Spec::Functions 'catdir';
+
+my $path = catdir(dirname(__FILE__), 'corpus', 'doc', 'text');
+
+ok(my $doc = KorAP::Document->new(
+ path => $path . '/'
+), 'Load Korap::Document');
+
+like($doc->path, qr!$path/$!, 'Path');
+ok($doc->parse, 'Parse document');
+
+ok($doc->primary->data, 'Primary data in existence');
+is($doc->primary->data_length, 129, 'Data length');
+
+use_ok('KorAP::Tokenizer');
+
+ok(my $tokens = KorAP::Tokenizer->new(
+ path => $doc->path,
+ doc => $doc,
+ foundry => 'OpenNLP',
+ layer => 'Tokens',
+ name => 'tokens'
+), 'New Tokenizer');
+
+ok($tokens->parse, 'Parse');
+
+ok($tokens->add('Connexor', 'Morpho'), 'Add Structure');
+
+my $data = $tokens->to_data->{data};
+like($data->{foundries}, qr!connexor/morpho!, 'data');
+is($data->{stream}->[0]->[1], '_0$<i>0<i>3', 'Position');
+is($data->{stream}->[1]->[1], 'cnx/l:letzt', 'Lemma');
+is($data->{stream}->[1]->[2], 'cnx/p:A', 'POS');
+is($data->{stream}->[2]->[1], 'cnx/l:kulturell', 'Lemma');
+is($data->{stream}->[2]->[2], 'cnx/p:A', 'POS');
+is($data->{stream}->[4]->[2], 'cnx/m:IND', 'Morpho');
+is($data->{stream}->[4]->[3], 'cnx/m:PRES', 'Morpho');
+
+is($data->{stream}->[-1]->[2], 'cnx/m:IND', 'Morpho');
+is($data->{stream}->[-1]->[3], 'cnx/m:PRES', 'Morpho');
+
+done_testing;
+
+__END__
diff --git a/t/index/connexor_phrase.t b/t/index/connexor_phrase.t
new file mode 100644
index 0000000..08eb127
--- /dev/null
+++ b/t/index/connexor_phrase.t
@@ -0,0 +1,47 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use utf8;
+use Test::More;
+use Scalar::Util qw/weaken/;
+use Data::Dumper;
+
+use_ok('KorAP::Document');
+
+use File::Basename 'dirname';
+use File::Spec::Functions 'catdir';
+
+my $path = catdir(dirname(__FILE__), 'corpus', 'doc', 'text');
+
+ok(my $doc = KorAP::Document->new(
+ path => $path . '/'
+), 'Load Korap::Document');
+
+like($doc->path, qr!$path/$!, 'Path');
+ok($doc->parse, 'Parse document');
+
+ok($doc->primary->data, 'Primary data in existence');
+is($doc->primary->data_length, 129, 'Data length');
+
+use_ok('KorAP::Tokenizer');
+
+ok(my $tokens = KorAP::Tokenizer->new(
+ path => $doc->path,
+ doc => $doc,
+ foundry => 'OpenNLP',
+ layer => 'Tokens',
+ name => 'tokens'
+), 'New Tokenizer');
+
+ok($tokens->parse, 'Parse');
+
+ok($tokens->add('Connexor', 'Phrase'), 'Add Structure');
+
+my $data = $tokens->to_data->{data};
+
+like($data->{foundries}, qr!connexor/phrase!, 'data');
+is($data->{stream}->[1]->[0], '<>:cnx/c:np$<b>64<i>4<i>30<i>4<b>0', 'Noun phrase');
+
+done_testing;
+
+__END__
diff --git a/t/index/connexor_sentences.t b/t/index/connexor_sentences.t
new file mode 100644
index 0000000..4ce5591
--- /dev/null
+++ b/t/index/connexor_sentences.t
@@ -0,0 +1,50 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use utf8;
+use Test::More;
+use Scalar::Util qw/weaken/;
+use Data::Dumper;
+
+use_ok('KorAP::Document');
+
+use File::Basename 'dirname';
+use File::Spec::Functions 'catdir';
+
+my $path = catdir(dirname(__FILE__), 'corpus', 'doc', 'text');
+
+ok(my $doc = KorAP::Document->new(
+ path => $path . '/'
+), 'Load Korap::Document');
+
+like($doc->path, qr!$path/$!, 'Path');
+ok($doc->parse, 'Parse document');
+
+ok($doc->primary->data, 'Primary data in existence');
+is($doc->primary->data_length, 129, 'Data length');
+
+use_ok('KorAP::Tokenizer');
+
+ok(my $tokens = KorAP::Tokenizer->new(
+ path => $doc->path,
+ doc => $doc,
+ foundry => 'OpenNLP',
+ layer => 'Tokens',
+ name => 'tokens'
+), 'New Tokenizer');
+
+ok($tokens->parse, 'Parse');
+
+ok($tokens->add('Connexor', 'Sentences'), 'Add Structure');
+
+my $data = $tokens->to_data->{data};
+
+like($data->{foundries}, qr!connexor/sentences!, 'data');
+is($data->{stream}->[0]->[0], '-:cnx/sentences$<i>1', 'Number of paragraphs');
+is($data->{stream}->[0]->[1], '-:tokens$<i>18', 'Number of tokens');
+is($data->{stream}->[0]->[2], '<>:cnx/s:s$<b>64<i>0<i>129<i>17<b>0', 'Sentence');
+is($data->{stream}->[0]->[3], '_0$<i>0<i>3', 'Position');
+
+done_testing;
+
+__END__
diff --git a/t/index/connexor_syntax.t b/t/index/connexor_syntax.t
new file mode 100644
index 0000000..ee0d22d
--- /dev/null
+++ b/t/index/connexor_syntax.t
@@ -0,0 +1,48 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use utf8;
+use Test::More;
+use Scalar::Util qw/weaken/;
+use Data::Dumper;
+
+use_ok('KorAP::Document');
+
+use File::Basename 'dirname';
+use File::Spec::Functions 'catdir';
+
+my $path = catdir(dirname(__FILE__), 'corpus', 'doc', 'text');
+
+ok(my $doc = KorAP::Document->new(
+ path => $path . '/'
+), 'Load Korap::Document');
+
+like($doc->path, qr!$path/$!, 'Path');
+ok($doc->parse, 'Parse document');
+
+ok($doc->primary->data, 'Primary data in existence');
+is($doc->primary->data_length, 129, 'Data length');
+
+use_ok('KorAP::Tokenizer');
+
+ok(my $tokens = KorAP::Tokenizer->new(
+ path => $doc->path,
+ doc => $doc,
+ foundry => 'OpenNLP',
+ layer => 'Tokens',
+ name => 'tokens'
+), 'New Tokenizer');
+
+ok($tokens->parse, 'Parse');
+
+ok($tokens->add('Connexor', 'Syntax'), 'Add Structure');
+
+my $data = $tokens->to_data->{data};
+like($data->{foundries}, qr!connexor/syntax!, 'data');
+like($data->{layerInfos}, qr!cnx/syn=tokens!, 'data');
+is($data->{stream}->[1]->[1], 'cnx/syn:@PREMOD', 'Syntax');
+is($data->{stream}->[2]->[1], 'cnx/syn:@PREMOD', 'Syntax');
+
+done_testing;
+
+__END__
diff --git a/t/index/corenlp_constituency.t b/t/index/corenlp_constituency.t
new file mode 100644
index 0000000..bc55b95
--- /dev/null
+++ b/t/index/corenlp_constituency.t
@@ -0,0 +1,52 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use utf8;
+use Test::More;
+use Scalar::Util qw/weaken/;
+use Data::Dumper;
+
+use_ok('KorAP::Document');
+
+use File::Basename 'dirname';
+use File::Spec::Functions 'catdir';
+
+my $path = catdir(dirname(__FILE__), 'corpus', 'doc', 'text');
+
+ok(my $doc = KorAP::Document->new(
+ path => $path . '/'
+), 'Load Korap::Document');
+
+like($doc->path, qr!$path/$!, 'Path');
+ok($doc->parse, 'Parse document');
+
+ok($doc->primary->data, 'Primary data in existence');
+is($doc->primary->data_length, 129, 'Data length');
+
+use_ok('KorAP::Tokenizer');
+
+ok(my $tokens = KorAP::Tokenizer->new(
+ path => $doc->path,
+ doc => $doc,
+ foundry => 'OpenNLP',
+ layer => 'Tokens',
+ name => 'tokens'
+), 'New Tokenizer');
+
+ok($tokens->parse, 'Parse');
+
+ok($tokens->add('CoreNLP', 'Constituency'), 'Add Structure');
+
+my $data = $tokens->to_data->{data};
+
+like($data->{foundries}, qr!corenlp/constituency!, 'data');
+like($data->{layerInfos}, qr!corenlp/c=spans!, 'data');
+
+is($data->{stream}->[0]->[1], '<>:corenlp/c:CNP$<b>64<i>0<i>16<i>2<b>2', 'Noun phrase');
+is($data->{stream}->[0]->[2], '<>:corenlp/c:ROOT$<b>64<i>0<i>42<i>6<b>0', 'Noun phrase');
+is($data->{stream}->[0]->[3], '<>:corenlp/c:NP$<b>64<i>0<i>42<i>6<b>1', 'Noun phrase');
+
+done_testing;
+
+__END__
+
diff --git a/t/index/corenlp_morpho.t b/t/index/corenlp_morpho.t
new file mode 100644
index 0000000..07bdb0f
--- /dev/null
+++ b/t/index/corenlp_morpho.t
@@ -0,0 +1,49 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use utf8;
+use Test::More;
+use Scalar::Util qw/weaken/;
+use Data::Dumper;
+
+use_ok('KorAP::Document');
+
+use File::Basename 'dirname';
+use File::Spec::Functions 'catdir';
+
+my $path = catdir(dirname(__FILE__), 'corpus', 'doc', 'text');
+
+ok(my $doc = KorAP::Document->new(
+ path => $path . '/'
+), 'Load Korap::Document');
+
+like($doc->path, qr!$path/$!, 'Path');
+ok($doc->parse, 'Parse document');
+
+ok($doc->primary->data, 'Primary data in existence');
+is($doc->primary->data_length, 129, 'Data length');
+
+use_ok('KorAP::Tokenizer');
+
+ok(my $tokens = KorAP::Tokenizer->new(
+ path => $doc->path,
+ doc => $doc,
+ foundry => 'OpenNLP',
+ layer => 'Tokens',
+ name => 'tokens'
+), 'New Tokenizer');
+
+ok($tokens->parse, 'Parse');
+
+ok($tokens->add('CoreNLP', 'Morpho'), 'Add Structure');
+
+my $data = $tokens->to_data->{data};
+like($data->{foundries}, qr!corenlp/morpho!, 'data');
+like($data->{layerInfos}, qr!corenlp/p=tokens!, 'data');
+is($data->{stream}->[0]->[2], 'corenlp/p:APPRART', 'POS');
+is($data->{stream}->[1]->[1], 'corenlp/p:ADJ', 'POS');
+is($data->{stream}->[2]->[1], 'corenlp/p:ADJA', 'POS');
+
+done_testing;
+
+__END__
diff --git a/t/index/corenlp_sentences.t b/t/index/corenlp_sentences.t
new file mode 100644
index 0000000..fc0965e
--- /dev/null
+++ b/t/index/corenlp_sentences.t
@@ -0,0 +1,50 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use utf8;
+use Test::More;
+use Scalar::Util qw/weaken/;
+use Data::Dumper;
+
+use_ok('KorAP::Document');
+
+use File::Basename 'dirname';
+use File::Spec::Functions 'catdir';
+
+my $path = catdir(dirname(__FILE__), 'corpus', 'doc', 'text');
+
+ok(my $doc = KorAP::Document->new(
+ path => $path . '/'
+), 'Load Korap::Document');
+
+like($doc->path, qr!$path/$!, 'Path');
+ok($doc->parse, 'Parse document');
+
+ok($doc->primary->data, 'Primary data in existence');
+is($doc->primary->data_length, 129, 'Data length');
+
+use_ok('KorAP::Tokenizer');
+
+ok(my $tokens = KorAP::Tokenizer->new(
+ path => $doc->path,
+ doc => $doc,
+ foundry => 'OpenNLP',
+ layer => 'Tokens',
+ name => 'tokens'
+), 'New Tokenizer');
+
+ok($tokens->parse, 'Parse');
+
+ok($tokens->add('CoreNLP', 'Sentences'), 'Add Structure');
+
+my $data = $tokens->to_data->{data};
+
+done_testing;
+
+__END__
+
+like($data->{foundries}, qr!corenlp/morpho!, 'data');
+like($data->{layerInfos}, qr!corenlp/p=tokens!, 'data');
+is($data->{stream}->[0]->[2], 'corenlp/p:APPRART', 'POS');
+is($data->{stream}->[1]->[1], 'corenlp/p:ADJ', 'POS');
+is($data->{stream}->[2]->[1], 'corenlp/p:ADJA', 'POS');
diff --git a/t/index/corpus/doc/text/base/paragraph.xml b/t/index/corpus/doc/text/base/paragraph.xml
index be19d62..93d479f 100644
--- a/t/index/corpus/doc/text/base/paragraph.xml
+++ b/t/index/corpus/doc/text/base/paragraph.xml
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
-<layer docid="ART_00001" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+<layer docid="Corpus_Doc.0001" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
<spanList>
<span from="0" to="129" />
</spanList>
diff --git a/t/index/corpus/doc/text/base/sentences.xml b/t/index/corpus/doc/text/base/sentences.xml
index be19d62..93d479f 100644
--- a/t/index/corpus/doc/text/base/sentences.xml
+++ b/t/index/corpus/doc/text/base/sentences.xml
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
-<layer docid="ART_00001" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+<layer docid="Corpus_Doc.0001" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
<spanList>
<span from="0" to="129" />
</spanList>
diff --git a/t/index/corpus/doc/text/connexor/morpho.xml b/t/index/corpus/doc/text/connexor/morpho.xml
new file mode 100644
index 0000000..3d36c46
--- /dev/null
+++ b/t/index/corpus/doc/text/connexor/morpho.xml
@@ -0,0 +1,221 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4" docid="Corpus_Doc.00001">
+ <spanList>
+ <span id="s8" from="0" to="2">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">zu</f>
+ <f name="pos">PREP</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s9" from="2" to="3">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">der</f>
+ <f name="pos">DET</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s10" from="4" to="11">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">letzt</f>
+ <f name="pos">A</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s11" from="12" to="23">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">kulturell</f>
+ <f name="pos">A</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s12" from="24" to="30">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">anlass</f>
+ <f name="pos">N</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s13" from="31" to="35">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">laden</f>
+ <f name="pos">V</f>
+ <f name="msd">IND:PRES</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s14" from="36" to="39">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">die</f>
+ <f name="pos">DET</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s15" from="40" to="47">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">leitung</f>
+ <f name="pos">N</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s16" from="48" to="51">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">das</f>
+ <f name="pos">DET</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s17" from="52" to="63">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">schul heim</f>
+ <f name="pos">N</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s18" from="64" to="73">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Hofbergli</f>
+ <f name="pos">N</f>
+ <f name="msd">Prop</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s19" from="74" to="77">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">ein</f>
+ <f name="pos">NUM</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s20" from="77" to="78">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">,</f>
+ <f name="pos"></f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s21" from="79" to="84">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">bevor</f>
+ <f name="pos">CS</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s22" from="85" to="88">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">der</f>
+ <f name="pos">DET</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s23" from="89" to="96">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">betrieb</f>
+ <f name="pos">N</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s24" from="97" to="101">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">ende</f>
+ <f name="pos">N</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s25" from="102" to="111">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">schule jahr</f>
+ <f name="pos">N</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s26" from="112" to="123">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">ein stellen</f>
+ <f name="pos">V</f>
+ <f name="msd">PCP:PERF</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s27" from="124" to="128">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">werden</f>
+ <f name="pos">V</f>
+ <f name="msd">IND:PRES</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s28" from="128" to="129">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">.</f>
+ <f name="pos"></f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ </spanList>
+</layer>
diff --git a/t/index/corpus/doc/text/connexor/phrase.xml b/t/index/corpus/doc/text/connexor/phrase.xml
new file mode 100644
index 0000000..20c92a2
--- /dev/null
+++ b/t/index/corpus/doc/text/connexor/phrase.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4" docid="Corpus_Doc.00001">
+ <spanList>
+ <span from="4" to="30">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="node">
+ <f name="pos">np</f>
+ </fs>
+ </span>
+ <span from="40" to="47">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="node">
+ <f name="pos">np</f>
+ </fs>
+ </span>
+ <span from="52" to="73">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="node">
+ <f name="pos">np</f>
+ </fs>
+ </span>
+ <span from="89" to="111">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="node">
+ <f name="pos">np</f>
+ </fs>
+ </span>
+ </spanList>
+</layer>
diff --git a/t/index/corpus/doc/text/connexor/sentences.xml b/t/index/corpus/doc/text/connexor/sentences.xml
new file mode 100644
index 0000000..505a404
--- /dev/null
+++ b/t/index/corpus/doc/text/connexor/sentences.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4" docid="Corpus_Doc.00001">
+ <spanList>
+ <span from="0" to="129" />
+ </spanList>
+</layer>
diff --git a/t/index/corpus/doc/text/connexor/syntax.xml b/t/index/corpus/doc/text/connexor/syntax.xml
new file mode 100644
index 0000000..2e05306
--- /dev/null
+++ b/t/index/corpus/doc/text/connexor/syntax.xml
@@ -0,0 +1,207 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4" docid="Corpus_Doc.00001">
+ <spanList>
+ <span id="s8" from="0" to="2">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="lemma">zu</f>
+ <f name="pos">@PREMARK</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s9" from="2" to="3">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="lemma">der</f>
+ <f name="pos">@PREMOD</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s10" from="4" to="11">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="lemma">letzt</f>
+ <f name="pos">@PREMOD</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s11" from="12" to="23">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="lemma">kulturell</f>
+ <f name="pos">@PREMOD</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s12" from="24" to="30">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="lemma">anlass</f>
+ <f name="pos">@NH</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s13" from="31" to="35">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="lemma">laden</f>
+ <f name="pos">@MAIN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s14" from="36" to="39">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="lemma">die</f>
+ <f name="pos">@PREMOD</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s15" from="40" to="47">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="lemma">leitung</f>
+ <f name="pos">@NH</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s16" from="48" to="51">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="lemma">das</f>
+ <f name="pos">@PREMOD</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s17" from="52" to="63">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="lemma">schul heim</f>
+ <f name="pos">@PREMOD</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s18" from="64" to="73">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Hofbergli</f>
+ <f name="pos">@NH</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s19" from="74" to="77">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="lemma">ein</f>
+ <f name="pos">@NH</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s20" from="77" to="78">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="lemma">,</f>
+ <f name="pos"></f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s21" from="79" to="84">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="lemma">bevor</f>
+ <f name="pos">@PREMARK</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s22" from="85" to="88">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="lemma">der</f>
+ <f name="pos">@PREMOD</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s23" from="89" to="96">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="lemma">betrieb</f>
+ <f name="pos">@PREMOD</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s24" from="97" to="101">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="lemma">ende</f>
+ <f name="pos">@NH</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s25" from="102" to="111">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="lemma">schule jahr</f>
+ <f name="pos">@NH</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s26" from="112" to="123">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="lemma">ein stellen</f>
+ <f name="pos">@MAIN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s27" from="124" to="128">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="lemma">werden</f>
+ <f name="pos">@AUX</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ </spanList>
+</layer>
diff --git a/t/index/corpus/doc/text/corenlp/constituency.xml b/t/index/corpus/doc/text/corenlp/constituency.xml
new file mode 100644
index 0000000..895c732
--- /dev/null
+++ b/t/index/corpus/doc/text/corenlp/constituency.xml
@@ -0,0 +1,182 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="Corpus_Doc.00001" version="KorAP-0.4">
+ <spanList>
+ <span id="s1_n1" from="0" to="42">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="node">
+ <f name="const">ROOT</f>
+ </fs>
+ <rel label="dominates" target="s1_n2"/>
+ </span>
+ <span id="s1_n2" from="0" to="42">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="node">
+ <f name="const">NP</f>
+ </fs>
+ <rel label="dominates" target="s1_n3"/>
+ <rel label="dominates" target="s1_n10"/>
+ <rel label="dominates" uri="morpho.xml#s1_n17"/>
+ </span>
+ <span id="s1_n3" from="0" to="16">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="node">
+ <f name="const">CNP</f>
+ </fs>
+ <rel label="dominates" uri="morpho.xml#s1_n4"/>
+ <rel label="dominates" uri="morpho.xml#s1_n6"/>
+ <rel label="dominates" uri="morpho.xml#s1_n8"/>
+ </span>
+ <span id="s1_n4" from="0" to="7">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="node">
+ <f name="const">NE</f>
+ </fs>
+ </span>
+ <span id="s1_n6" from="8" to="11">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="node">
+ <f name="const">KON</f>
+ </fs>
+ </span>
+ <span id="s1_n8" from="12" to="16">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="node">
+ <f name="const">NN</f>
+ </fs>
+ </span>
+ <span id="s1_n10" from="17" to="41">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="node">
+ <f name="const">PP</f>
+ </fs>
+ <rel label="dominates" uri="morpho.xml#s1_n11"/>
+ <rel label="dominates" uri="morpho.xml#s1_n13"/>
+ <rel label="dominates" uri="morpho.xml#s1_n15"/>
+ </span>
+ <span id="s1_n11" from="17" to="19">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="node">
+ <f name="const">APPRART</f>
+ </fs>
+ </span>
+ <span id="s1_n13" from="20" to="31">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="node">
+ <f name="const">NN</f>
+ </fs>
+ </span>
+ <span id="s1_n15" from="32" to="41">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="node">
+ <f name="const">NE</f>
+ </fs>
+ </span>
+ <span id="s1_n17" from="41" to="42">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="node">
+ <f name="const">$.</f>
+ </fs>
+ </span>
+<!--
+ <span id="s2_n1" from="43" to="172">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="node">
+ <f name="const">ROOT</f>
+ </fs>
+ <rel label="dominates" target="s2_n2"/>
+ </span>
+ <span id="s2_n2" from="43" to="172">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="node">
+ <f name="const">S</f>
+ </fs>
+ <rel label="dominates" target="s2_n3"/>
+ <rel label="dominates" uri="morpho.xml#s2_n12"/>
+ <rel label="dominates" target="s2_n14"/>
+ <rel label="dominates" uri="morpho.xml#s2_n26"/>
+ <rel label="dominates" uri="morpho.xml#s2_n28"/>
+ <rel label="dominates" target="s2_n30"/>
+ <rel label="dominates" uri="morpho.xml#s2_n48"/>
+ </span>
+-->
+ <span id="s2_n3" from="43" to="73">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="node">
+ <f name="const">PP</f>
+ </fs>
+ <rel label="dominates" uri="morpho.xml#s2_n4"/>
+ <rel label="dominates" uri="morpho.xml#s2_n6"/>
+ <rel label="dominates" uri="morpho.xml#s2_n8"/>
+ <rel label="dominates" uri="morpho.xml#s2_n10"/>
+ </span>
+ <span id="s2_n4" from="43" to="46">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="node">
+ <f name="const">APPRART</f>
+ </fs>
+ </span>
+ <span id="s2_n6" from="47" to="54">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="node">
+ <f name="const">ADJA</f>
+ </fs>
+ </span>
+ <span id="s2_n8" from="55" to="66">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="node">
+ <f name="const">ADJA</f>
+ </fs>
+ </span>
+ <span id="s2_n10" from="67" to="73">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="node">
+ <f name="const">NN</f>
+ </fs>
+ </span>
+ <span id="s2_n12" from="74" to="78">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="node">
+ <f name="const">VVFIN</f>
+ </fs>
+ </span>
+ <span id="s2_n14" from="79" to="116">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="node">
+ <f name="const">NP-SB</f>
+ </fs>
+ <rel label="dominates" uri="morpho.xml#s2_n15"/>
+ <rel label="dominates" uri="morpho.xml#s2_n17"/>
+ <rel label="dominates" target="s2_n19"/>
+ </span>
+ <span id="s2_n15" from="79" to="82">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="node">
+ <f name="const">ART</f>
+ </fs>
+ </span>
+ <span id="s2_n17" from="83" to="90">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="node">
+ <f name="const">NN</f>
+ </fs>
+ </span>
+ <span id="s2_n19" from="91" to="116">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="node">
+ <f name="const">NP</f>
+ </fs>
+ <rel label="dominates" uri="morpho.xml#s2_n20"/>
+ <rel label="dominates" uri="morpho.xml#s2_n22"/>
+ <rel label="dominates" uri="morpho.xml#s2_n24"/>
+ </span>
+ <span id="s2_n20" from="91" to="94">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="node">
+ <f name="const">ART</f>
+ </fs>
+ </span>
+ <span id="s2_n22" from="95" to="106">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="node">
+ <f name="const">NN</f>
+ </fs>
+ </span>
+ <span id="s2_n24" from="107" to="116">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="node">
+ <f name="const">NE</f>
+ </fs>
+ </span>
+ <span id="s2_n26" from="117" to="120">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="node">
+ <f name="const">PTKVZ</f>
+ </fs>
+ </span>
+ <span id="s2_n28" from="120" to="121">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="node">
+ <f name="const">$,</f>
+ </fs>
+ </span>
+ <span id="s2_n31" from="122" to="127">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="node">
+ <f name="const">KOUS</f>
+ </fs>
+ </span>
+ </spanList>
+</layer>
diff --git a/t/index/corpus/doc/text/corenlp/morpho.xml b/t/index/corpus/doc/text/corenlp/morpho.xml
new file mode 100644
index 0000000..559d25b
--- /dev/null
+++ b/t/index/corpus/doc/text/corenlp/morpho.xml
@@ -0,0 +1,187 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="Corpus_Doc.0001" version="KorAP-0.4">
+ <spanList>
+ <span id="s2_n4" from="0" to="3">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">APPRART</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s2_n6" from="4" to="11">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">ADJ</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s2_n8" from="12" to="23">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">ADJA</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s2_n10" from="24" to="30">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">NN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s2_n12" from="31" to="35">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">VVFIN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s2_n15" from="36" to="39">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">ART</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s2_n17" from="40" to="47">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">NN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s2_n20" from="48" to="51">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">ART</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s2_n22" from="52" to="63">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">NN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s2_n24" from="64" to="73">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">NE</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s2_n26" from="74" to="77">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">PTKVZ</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s2_n28" from="77" to="78">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">$,</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s2_n31" from="79" to="84">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">KOUS</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s2_n34" from="85" to="88">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">ART</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s2_n36" from="89" to="96">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">NN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s2_n40" from="97" to="101">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">NN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s2_n42" from="102" to="111">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">NN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s2_n44" from="112" to="123">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">VVPP</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s2_n46" from="124" to="128">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">VAFIN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s2_n48" from="128" to="129">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">$.</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ </spanList>
+</layer>
diff --git a/t/index/corpus/doc/text/corenlp/ne_dewac_175m_600.xml b/t/index/corpus/doc/text/corenlp/ne_dewac_175m_600.xml
new file mode 100644
index 0000000..6359ac8
--- /dev/null
+++ b/t/index/corpus/doc/text/corenlp/ne_dewac_175m_600.xml
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="ART_00001" version="KorAP-0.4">
+ <spanList>
+ <span id="s_18" from="64" to="73">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="ne">
+ <f name="ne">
+ <fs>
+ <f name="ent">I-LOC</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ </spanList>
+</layer>
diff --git a/t/index/corpus/doc/text/corenlp/ne_hgc_175m_600.xml b/t/index/corpus/doc/text/corenlp/ne_hgc_175m_600.xml
new file mode 100644
index 0000000..6359ac8
--- /dev/null
+++ b/t/index/corpus/doc/text/corenlp/ne_hgc_175m_600.xml
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="ART_00001" version="KorAP-0.4">
+ <spanList>
+ <span id="s_18" from="64" to="73">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="ne">
+ <f name="ne">
+ <fs>
+ <f name="ent">I-LOC</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ </spanList>
+</layer>
diff --git a/t/index/corpus/doc/text/corenlp/sentences.xml b/t/index/corpus/doc/text/corenlp/sentences.xml
new file mode 100644
index 0000000..f0a9d0a
--- /dev/null
+++ b/t/index/corpus/doc/text/corenlp/sentences.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="Corpus_Doc.0001" version="KorAP-0.4">
+ <spanList>
+ <span from="0" to="129"/>
+ </spanList>
+</layer>
diff --git a/t/index/corpus/doc/text/corenlp/tokens.xml b/t/index/corpus/doc/text/corenlp/tokens.xml
new file mode 100644
index 0000000..78f562b
--- /dev/null
+++ b/t/index/corpus/doc/text/corenlp/tokens.xml
@@ -0,0 +1,226 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="A01_APR.13047" version="KorAP-0.4">
+ <spanList>
+ <span id="s1_n4" from="0" to="7"/>
+ <span id="s1_n6" from="8" to="11"/>
+ <span id="s1_n8" from="12" to="16"/>
+ <span id="s1_n11" from="17" to="19"/>
+ <span id="s1_n13" from="20" to="31"/>
+ <span id="s1_n15" from="32" to="41"/>
+ <span id="s1_n17" from="41" to="42"/>
+ <span id="s2_n4" from="43" to="46"/>
+ <span id="s2_n6" from="47" to="54"/>
+ <span id="s2_n8" from="55" to="66"/>
+ <span id="s2_n10" from="67" to="73"/>
+ <span id="s2_n12" from="74" to="78"/>
+ <span id="s2_n15" from="79" to="82"/>
+ <span id="s2_n17" from="83" to="90"/>
+ <span id="s2_n20" from="91" to="94"/>
+ <span id="s2_n22" from="95" to="106"/>
+ <span id="s2_n24" from="107" to="116"/>
+ <span id="s2_n26" from="117" to="120"/>
+ <span id="s2_n28" from="120" to="121"/>
+ <span id="s2_n31" from="122" to="127"/>
+ <span id="s2_n34" from="128" to="131"/>
+ <span id="s2_n36" from="132" to="139"/>
+ <span id="s2_n40" from="140" to="144"/>
+ <span id="s2_n42" from="145" to="154"/>
+ <span id="s2_n44" from="155" to="166"/>
+ <span id="s2_n46" from="167" to="171"/>
+ <span id="s2_n48" from="171" to="172"/>
+ <span id="s3_n4" from="173" to="175"/>
+ <span id="s3_n6" from="176" to="185"/>
+ <span id="s3_n9" from="186" to="193"/>
+ <span id="s3_n11" from="193" to="194"/>
+ <span id="s3_n13" from="195" to="196"/>
+ <span id="s3_n15" from="196" to="197"/>
+ <span id="s4_n3" from="198" to="203"/>
+ <span id="s4_n5" from="203" to="204"/>
+ <span id="s4_n7" from="205" to="211"/>
+ <span id="s4_n13" from="212" to="214"/>
+ <span id="s4_n15" from="215" to="220"/>
+ <span id="s4_n17" from="221" to="224"/>
+ <span id="s4_n20" from="225" to="228"/>
+ <span id="s4_n22" from="229" to="239"/>
+ <span id="s4_n25" from="240" to="243"/>
+ <span id="s4_n27" from="244" to="255"/>
+ <span id="s4_n31" from="256" to="259"/>
+ <span id="s4_n34" from="260" to="266"/>
+ <span id="s4_n36" from="267" to="274"/>
+ <span id="s4_n41" from="275" to="276"/>
+ <span id="s4_n43" from="276" to="282"/>
+ <span id="s4_n47" from="282" to="283"/>
+ <span id="s4_n49" from="284" to="287"/>
+ <span id="s4_n52" from="288" to="293"/>
+ <span id="s4_n54" from="294" to="298"/>
+ <span id="s4_n57" from="299" to="300"/>
+ <span id="s4_n59" from="300" to="313"/>
+ <span id="s4_n61" from="314" to="317"/>
+ <span id="s4_n63" from="318" to="331"/>
+ <span id="s4_n65" from="331" to="332"/>
+ <span id="s4_n68" from="333" to="335"/>
+ <span id="s4_n70" from="336" to="345"/>
+ <span id="s4_n72" from="346" to="357"/>
+ <span id="s4_n74" from="358" to="363"/>
+ <span id="s4_n76" from="363" to="364"/>
+ <span id="s5_n5" from="365" to="368"/>
+ <span id="s5_n7" from="369" to="372"/>
+ <span id="s5_n9" from="373" to="385"/>
+ <span id="s5_n11" from="386" to="395"/>
+ <span id="s5_n13" from="396" to="402"/>
+ <span id="s5_n17" from="403" to="409"/>
+ <span id="s5_n19" from="410" to="416"/>
+ <span id="s5_n21" from="417" to="420"/>
+ <span id="s5_n24" from="421" to="425"/>
+ <span id="s5_n26" from="426" to="432"/>
+ <span id="s5_n28" from="432" to="433"/>
+ <span id="s5_n32" from="434" to="445"/>
+ <span id="s5_n34" from="446" to="451"/>
+ <span id="s5_n36" from="452" to="459"/>
+ <span id="s5_n39" from="460" to="465"/>
+ <span id="s5_n41" from="466" to="476"/>
+ <span id="s5_n43" from="476" to="477"/>
+ <span id="s6_n4" from="478" to="481"/>
+ <span id="s6_n6" from="482" to="493"/>
+ <span id="s6_n8" from="494" to="500"/>
+ <span id="s6_n12" from="501" to="504"/>
+ <span id="s6_n14" from="505" to="507"/>
+ <span id="s6_n16" from="508" to="510"/>
+ <span id="s6_n18" from="510" to="511"/>
+ <span id="s7_n5" from="512" to="517"/>
+ <span id="s7_n7" from="518" to="521"/>
+ <span id="s7_n10" from="522" to="525"/>
+ <span id="s7_n14" from="526" to="528"/>
+ <span id="s7_n16" from="529" to="536"/>
+ <span id="s7_n19" from="537" to="540"/>
+ <span id="s7_n22" from="541" to="548"/>
+ <span id="s7_n25" from="549" to="552"/>
+ <span id="s7_n28" from="553" to="558"/>
+ <span id="s7_n30" from="559" to="562"/>
+ <span id="s7_n32" from="563" to="568"/>
+ <span id="s7_n34" from="569" to="572"/>
+ <span id="s7_n36" from="573" to="578"/>
+ <span id="s7_n39" from="579" to="585"/>
+ <span id="s7_n42" from="586" to="589"/>
+ <span id="s7_n44" from="590" to="597"/>
+ <span id="s7_n47" from="598" to="601"/>
+ <span id="s7_n50" from="602" to="607"/>
+ <span id="s7_n52" from="608" to="611"/>
+ <span id="s7_n54" from="612" to="617"/>
+ <span id="s7_n56" from="617" to="620"/>
+ <span id="s7_n58" from="621" to="632"/>
+ <span id="s7_n60" from="632" to="633"/>
+ <span id="s8_n3" from="634" to="642"/>
+ <span id="s8_n6" from="643" to="658"/>
+ <span id="s8_n8" from="659" to="663"/>
+ <span id="s8_n10" from="663" to="664"/>
+ <span id="s9_n4" from="665" to="668"/>
+ <span id="s9_n6" from="669" to="689"/>
+ <span id="s9_n8" from="690" to="695"/>
+ <span id="s9_n10" from="696" to="699"/>
+ <span id="s9_n12" from="699" to="700"/>
+ <span id="s9_n15" from="701" to="705"/>
+ <span id="s9_n18" from="706" to="713"/>
+ <span id="s9_n20" from="714" to="724"/>
+ <span id="s9_n24" from="725" to="729"/>
+ <span id="s9_n26" from="730" to="747"/>
+ <span id="s9_n29" from="748" to="751"/>
+ <span id="s9_n31" from="752" to="755"/>
+ <span id="s9_n33" from="756" to="774"/>
+ <span id="s9_n35" from="775" to="784"/>
+ <span id="s9_n37" from="785" to="788"/>
+ <span id="s9_n39" from="788" to="789"/>
+ <span id="s10_n4" from="790" to="793"/>
+ <span id="s10_n6" from="794" to="797"/>
+ <span id="s10_n9" from="798" to="802"/>
+ <span id="s10_n11" from="803" to="806"/>
+ <span id="s10_n13" from="807" to="816"/>
+ <span id="s10_n15" from="817" to="826"/>
+ <span id="s10_n17" from="827" to="831"/>
+ <span id="s10_n19" from="832" to="835"/>
+ <span id="s10_n22" from="836" to="839"/>
+ <span id="s10_n24" from="840" to="845"/>
+ <span id="s10_n26" from="846" to="850"/>
+ <span id="s10_n29" from="851" to="864"/>
+ <span id="s10_n31" from="864" to="865"/>
+ <span id="s11_n4" from="866" to="869"/>
+ <span id="s11_n6" from="870" to="875"/>
+ <span id="s11_n8" from="876" to="892"/>
+ <span id="s11_n10" from="893" to="898"/>
+ <span id="s11_n13" from="899" to="902"/>
+ <span id="s11_n15" from="903" to="909"/>
+ <span id="s11_n18" from="910" to="913"/>
+ <span id="s11_n20" from="914" to="927"/>
+ <span id="s11_n24" from="928" to="935"/>
+ <span id="s11_n26" from="936" to="947"/>
+ <span id="s11_n28" from="947" to="948"/>
+ <span id="s11_n32" from="949" to="957"/>
+ <span id="s11_n34" from="958" to="962"/>
+ <span id="s11_n36" from="962" to="963"/>
+ <span id="s11_n38" from="964" to="986"/>
+ <span id="s11_n40" from="986" to="987"/>
+ <span id="s11_n42" from="988" to="995"/>
+ <span id="s11_n44" from="995" to="996"/>
+ <span id="s11_n47" from="997" to="1007"/>
+ <span id="s11_n49" from="1007" to="1008"/>
+ <span id="s12_n4" from="1009" to="1025"/>
+ <span id="s12_n7" from="1026" to="1029"/>
+ <span id="s12_n9" from="1030" to="1036"/>
+ <span id="s12_n11" from="1037" to="1049"/>
+ <span id="s12_n13" from="1049" to="1050"/>
+ <span id="s13_n4" from="1051" to="1054"/>
+ <span id="s13_n6" from="1055" to="1056"/>
+ <span id="s13_n9" from="1056" to="1064"/>
+ <span id="s13_n11" from="1065" to="1070"/>
+ <span id="s13_n13" from="1070" to="1071"/>
+ <span id="s13_n15" from="1072" to="1076"/>
+ <span id="s13_n18" from="1077" to="1079"/>
+ <span id="s13_n20" from="1080" to="1085"/>
+ <span id="s13_n22" from="1086" to="1090"/>
+ <span id="s13_n24" from="1091" to="1094"/>
+ <span id="s13_n26" from="1094" to="1095"/>
+ <span id="s13_n31" from="1096" to="1098"/>
+ <span id="s13_n33" from="1099" to="1102"/>
+ <span id="s13_n36" from="1103" to="1109"/>
+ <span id="s13_n38" from="1110" to="1122"/>
+ <span id="s13_n40" from="1123" to="1129"/>
+ <span id="s13_n42" from="1130" to="1134"/>
+ <span id="s13_n44" from="1134" to="1135"/>
+ <span id="s13_n47" from="1136" to="1139"/>
+ <span id="s13_n50" from="1140" to="1143"/>
+ <span id="s13_n54" from="1144" to="1152"/>
+ <span id="s13_n56" from="1153" to="1156"/>
+ <span id="s13_n58" from="1157" to="1168"/>
+ <span id="s13_n60" from="1169" to="1177"/>
+ <span id="s13_n65" from="1177" to="1178"/>
+ <span id="s13_n67" from="1179" to="1182"/>
+ <span id="s13_n69" from="1183" to="1195"/>
+ <span id="s13_n72" from="1196" to="1199"/>
+ <span id="s13_n74" from="1200" to="1207"/>
+ <span id="s13_n76" from="1208" to="1214"/>
+ <span id="s13_n78" from="1215" to="1219"/>
+ <span id="s13_n80" from="1219" to="1220"/>
+ <span id="s14_n4" from="1221" to="1224"/>
+ <span id="s14_n6" from="1225" to="1237"/>
+ <span id="s14_n8" from="1238" to="1243"/>
+ <span id="s14_n10" from="1244" to="1247"/>
+ <span id="s14_n12" from="1248" to="1252"/>
+ <span id="s14_n14" from="1253" to="1258"/>
+ <span id="s14_n17" from="1259" to="1262"/>
+ <span id="s14_n20" from="1263" to="1268"/>
+ <span id="s14_n22" from="1269" to="1274"/>
+ <span id="s14_n24" from="1274" to="1275"/>
+ <span id="s14_n27" from="1276" to="1280"/>
+ <span id="s14_n30" from="1281" to="1284"/>
+ <span id="s14_n32" from="1285" to="1287"/>
+ <span id="s14_n34" from="1288" to="1290"/>
+ <span id="s14_n36" from="1290" to="1291"/>
+ <span id="s15_n4" from="1292" to="1295"/>
+ <span id="s15_n6" from="1296" to="1310"/>
+ <span id="s15_n8" from="1311" to="1314"/>
+ <span id="s15_n10" from="1315" to="1326"/>
+ <span id="s15_n12" from="1326" to="1327"/>
+ </spanList>
+</layer>
diff --git a/t/index/corpus/doc/text/data.xml b/t/index/corpus/doc/text/data.xml
index 6b5af0f..3a12d04 100644
--- a/t/index/corpus/doc/text/data.xml
+++ b/t/index/corpus/doc/text/data.xml
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-model href="text.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
-<raw_text docid="ART_ABC.00001" xmlns="http://ids-mannheim.de/ns/KorAP">
+<raw_text docid="Corpus_Doc.0001" xmlns="http://ids-mannheim.de/ns/KorAP">
<metadata file="metadata.xml" />
<text>Zum letzten kulturellen Anlass lädt die Leitung des Schulheimes Hofbergli ein, bevor der Betrieb Ende Schuljahr eingestellt wird.</text>
</raw_text>
diff --git a/t/index/corpus/doc/text/opennlp/morpho.xml b/t/index/corpus/doc/text/opennlp/morpho.xml
new file mode 100644
index 0000000..65171b4
--- /dev/null
+++ b/t/index/corpus/doc/text/opennlp/morpho.xml
@@ -0,0 +1,184 @@
+<?xml version="1.0" encoding="UTF-8"?><?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?><layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="Corpus_Doc.00001" VERSION="KorAP-0.4">
+ <spanList>
+ <span id="s_7" from="0" to="3">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">APPRART</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s_8" from="4" to="11">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">ADJA</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s_9" from="12" to="23">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">ADJA</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s_10" from="24" to="30">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">NN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s_11" from="31" to="35">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">VVFIN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s_12" from="36" to="39">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">ART</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s_13" from="40" to="47">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">NN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s_14" from="48" to="51">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">ART</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s_15" from="52" to="63">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">NN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s_16" from="64" to="73">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">NE</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s_17" from="74" to="77">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">PTKVZ</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s_18" from="77" to="78">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">$,</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s_19" from="79" to="84">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">KOUS</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s_20" from="85" to="88">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">ART</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s_21" from="89" to="96">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">NN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s_22" from="97" to="101">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">NN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s_23" from="102" to="111">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">NN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s_24" from="112" to="123">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">VVPP</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s_25" from="124" to="128">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">VAFIN</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s_26" from="128" to="129">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">$.</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ </spanList>
+</layer>
diff --git a/t/index/corpus/doc/text/opennlp/sentences.xml b/t/index/corpus/doc/text/opennlp/sentences.xml
new file mode 100644
index 0000000..b35317b
--- /dev/null
+++ b/t/index/corpus/doc/text/opennlp/sentences.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?><layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="Corpus_Doc.0001" VERSION="KorAP-0.4">
+ <spanList>
+ <span from="0" to="129"/>
+ </spanList>
+</layer>
diff --git a/t/index/corpus/doc/text/opennlp/tokens.xml b/t/index/corpus/doc/text/opennlp/tokens.xml
index b181a49..cd56e1c 100644
--- a/t/index/corpus/doc/text/opennlp/tokens.xml
+++ b/t/index/corpus/doc/text/opennlp/tokens.xml
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="UTF-8"?><?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?><layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="ART_00001" VERSION="KorAP-0.4">
+<?xml version="1.0" encoding="UTF-8"?><?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?><layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="Corpus_Doc.0001" VERSION="KorAP-0.4">
<spanList>
<span id="s_7" from="0" to="3"/>
<span id="s_8" from="4" to="11"/>
diff --git a/t/index/opennlp_morpho.t b/t/index/opennlp_morpho.t
new file mode 100644
index 0000000..383f758
--- /dev/null
+++ b/t/index/opennlp_morpho.t
@@ -0,0 +1,52 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use utf8;
+use Test::More;
+use Scalar::Util qw/weaken/;
+use Data::Dumper;
+
+use_ok('KorAP::Document');
+
+use File::Basename 'dirname';
+use File::Spec::Functions 'catdir';
+
+my $path = catdir(dirname(__FILE__), 'corpus', 'doc', 'text');
+
+ok(my $doc = KorAP::Document->new(
+ path => $path . '/'
+), 'Load Korap::Document');
+
+like($doc->path, qr!$path/$!, 'Path');
+ok($doc->parse, 'Parse document');
+
+ok($doc->primary->data, 'Primary data in existence');
+is($doc->primary->data_length, 129, 'Data length');
+
+use_ok('KorAP::Tokenizer');
+
+ok(my $tokens = KorAP::Tokenizer->new(
+ path => $doc->path,
+ doc => $doc,
+ foundry => 'OpenNLP',
+ layer => 'Tokens',
+ name => 'tokens'
+), 'New Tokenizer');
+
+ok($tokens->parse, 'Parse');
+
+ok($tokens->add('OpenNLP', 'Morpho'), 'Add Structure');
+
+my $data = $tokens->to_data->{data};
+
+like($data->{foundries}, qr!opennlp/morpho!, 'data');
+is($data->{stream}->[0]->[1], '_0$<i>0<i>3', 'Position');
+is($data->{stream}->[0]->[3], 'opennlp/p:APPRART', 'POS');
+is($data->{stream}->[1]->[2], 'opennlp/p:ADJA', 'POS');
+is($data->{stream}->[2]->[2], 'opennlp/p:ADJA', 'POS');
+is($data->{stream}->[-1]->[2], 'opennlp/p:VAFIN', 'POS');
+
+done_testing;
+
+__END__
+
diff --git a/t/index/opennlp_sentences.t b/t/index/opennlp_sentences.t
new file mode 100644
index 0000000..740d98a
--- /dev/null
+++ b/t/index/opennlp_sentences.t
@@ -0,0 +1,50 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use utf8;
+use Test::More;
+use Scalar::Util qw/weaken/;
+use Data::Dumper;
+
+use_ok('KorAP::Document');
+
+use File::Basename 'dirname';
+use File::Spec::Functions 'catdir';
+
+my $path = catdir(dirname(__FILE__), 'corpus', 'doc', 'text');
+
+ok(my $doc = KorAP::Document->new(
+ path => $path . '/'
+), 'Load Korap::Document');
+
+like($doc->path, qr!$path/$!, 'Path');
+ok($doc->parse, 'Parse document');
+
+ok($doc->primary->data, 'Primary data in existence');
+is($doc->primary->data_length, 129, 'Data length');
+
+use_ok('KorAP::Tokenizer');
+
+ok(my $tokens = KorAP::Tokenizer->new(
+ path => $doc->path,
+ doc => $doc,
+ foundry => 'OpenNLP',
+ layer => 'Tokens',
+ name => 'tokens'
+), 'New Tokenizer');
+
+ok($tokens->parse, 'Parse');
+
+ok($tokens->add('OpenNLP', 'Sentences'), 'Add Structure');
+
+my $data = $tokens->to_data->{data};
+
+like($data->{foundries}, qr!opennlp/sentences!, 'data');
+is($data->{stream}->[0]->[0], '-:opennlp/sentences$<i>1', 'Number of Sentences');
+is($data->{stream}->[0]->[1], '-:tokens$<i>18', 'Number of tokens');
+is($data->{stream}->[0]->[2], '<>:opennlp/s:s$<b>64<i>0<i>129<i>17<b>0', 'Sentence');
+is($data->{stream}->[0]->[3], '_0$<i>0<i>3', 'Position');
+
+done_testing;
+
+__END__