Test glemm
Change-Id: Idb769826598db575b914332f6a2d8430ee6da603
diff --git a/t/index/corpus/doc/text/glemm/glemm.xml b/t/index/corpus/doc/text/glemm/glemm.xml
new file mode 100644
index 0000000..02a95d1
--- /dev/null
+++ b/t/index/corpus/doc/text/glemm/glemm.xml
@@ -0,0 +1,258 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4" docid="ART_ABC.00001">
+ <spanList>
+ <span id="s1" from="0" to="3">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">zu</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s2" from="4" to="11">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">letzt-</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s3" from="12" to="23">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">kulturell</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s4" from="24" to="30">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Anlass</f>
+ </fs>
+ </f>
+ <f name="lex">
+ <fs>
+ <f name="derivation">true</f>
+ <f name="lemma">an-</f>
+ </fs>
+ </f>
+ <f name="lex">
+ <fs>
+ <f name="derivation">true</f>
+ <f name="lemma">lass</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s5" from="31" to="35">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">laden</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s6" from="36" to="39">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">d-</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s7" from="40" to="47">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Leitung</f>
+ </fs>
+ </f>
+ <f name="lex">
+ <fs>
+ <f name="derivation">true</f>
+ <f name="lemma">leiten</f>
+ </fs>
+ </f>
+ <f name="lex">
+ <fs>
+ <f name="derivation">true</f>
+ <f name="lemma">-ung</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s8" from="48" to="51">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">d-</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s9" from="52" to="63">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Schulheim</f>
+ </fs>
+ </f>
+ <f name="lex">
+ <fs>
+ <f name="composition">true</f>
+ <f name="lemma">Schule</f>
+ </fs>
+ </f>
+ <f name="lex">
+ <fs>
+ <f name="composition">true</f>
+ <f name="lemma">Heim</f>
+ </fs>
+ </f>
+ <f name="lex">
+ <fs>
+ <f name="composition">true</f>
+ <f name="lemma">schulen</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s10" from="74" to="77">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">ein</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s11" from="79" to="84">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">bevor</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s12" from="85" to="88">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">d-</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s13" from="89" to="96">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Betrieb</f>
+ </fs>
+ </f>
+ <f name="lex">
+ <fs>
+ <f name="lemma">betreiben</f>
+ </fs>
+ </f>
+ <f name="lex">
+ <fs>
+ <f name="derivation">true</f>
+ <f name="lemma">be-</f>
+ </fs>
+ </f>
+ <f name="lex">
+ <fs>
+ <f name="derivation">true</f>
+ <f name="lemma">treiben</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s14" from="97" to="101">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Ende</f>
+ </fs>
+ </f>
+ <f name="lex">
+ <fs>
+ <f name="lemma">ende</f>
+ </fs>
+ </f>
+ <f name="lex">
+ <fs>
+ <f name="lemma">enden</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s15" from="102" to="111">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">Schuljahr</f>
+ </fs>
+ </f>
+ <f name="lex">
+ <fs>
+ <f name="composition">true</f>
+ <f name="lemma">Schule</f>
+ </fs>
+ </f>
+ <f name="lex">
+ <fs>
+ <f name="composition">true</f>
+ <f name="lemma">Jahr</f>
+ </fs>
+ </f>
+ <f name="lex">
+ <fs>
+ <f name="composition">true</f>
+ <f name="lemma">schulen</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s16" from="112" to="123">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">einstellen</f>
+ </fs>
+ </f>
+ <f name="lex">
+ <fs>
+ <f name="derivation">true</f>
+ <f name="lemma">ein-</f>
+ </fs>
+ </f>
+ <f name="lex">
+ <fs>
+ <f name="derivation">true</f>
+ <f name="lemma">stellen</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s17" from="124" to="128">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">werden</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ </spanList>
+</layer>
diff --git a/t/index/glemm_morpho.t b/t/index/glemm_morpho.t
new file mode 100644
index 0000000..49157d5
--- /dev/null
+++ b/t/index/glemm_morpho.t
@@ -0,0 +1,59 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use utf8;
+use Test::More;
+use Scalar::Util qw/weaken/;
+use Data::Dumper;
+
+use_ok('KorAP::Document');
+
+use File::Basename 'dirname';
+use File::Spec::Functions 'catdir';
+
+my $path = catdir(dirname(__FILE__), 'corpus', 'doc', 'text');
+
+ok(my $doc = KorAP::Document->new(
+ path => $path . '/'
+), 'Load Korap::Document');
+
+like($doc->path, qr!$path/$!, 'Path');
+ok($doc->parse, 'Parse document');
+
+ok($doc->primary->data, 'Primary data in existence');
+is($doc->primary->data_length, 129, 'Data length');
+
+use_ok('KorAP::Tokenizer');
+
+ok(my $tokens = KorAP::Tokenizer->new(
+ path => $doc->path,
+ doc => $doc,
+ foundry => 'OpenNLP',
+ layer => 'Tokens',
+ name => 'tokens'
+), 'New Tokenizer');
+
+ok($tokens->parse, 'Parse');
+
+ok($tokens->add('Glemm', 'Morpho'), 'Add Structure');
+
+my $data = $tokens->to_data->{data};
+
+like($data->{foundries}, qr!glemm/morpho!, 'data');
+like($data->{layerInfos}, qr!glemm/l=tokens!, 'data');
+
+is($data->{stream}->[0]->[2], 'glemm/l:__zu', 'Lemma');
+is($data->{stream}->[1]->[1], 'glemm/l:__letzt-', 'Lemma');
+is($data->{stream}->[3]->[1], 'glemm/l:_+an-', 'Lemma');
+is($data->{stream}->[3]->[2], 'glemm/l:_+lass', 'Lemma');
+is($data->{stream}->[3]->[3], 'glemm/l:__Anlass', 'Lemma');
+
+is($data->{stream}->[6]->[1], 'glemm/l:_+-ung', 'Lemma');
+is($data->{stream}->[6]->[2], 'glemm/l:_+leiten', 'Lemma');
+is($data->{stream}->[6]->[3], 'glemm/l:__Leitung', 'Lemma');
+
+is($data->{stream}->[-1]->[1], 'glemm/l:__werden', 'Lemma');
+
+done_testing;
+
+__END__
diff --git a/t/index/xip_dependency.t b/t/index/xip_dependency.t
new file mode 100644
index 0000000..10423c1
--- /dev/null
+++ b/t/index/xip_dependency.t
@@ -0,0 +1,66 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use utf8;
+use Test::More skip_all => 'Not yet implemented';
+use Scalar::Util qw/weaken/;
+use Data::Dumper;
+
+use_ok('KorAP::Document');
+
+use File::Basename 'dirname';
+use File::Spec::Functions 'catdir';
+
+my $path = catdir(dirname(__FILE__), 'corpus', 'doc', 'text');
+
+ok(my $doc = KorAP::Document->new(
+ path => $path . '/'
+), 'Load Korap::Document');
+
+like($doc->path, qr!$path/$!, 'Path');
+ok($doc->parse, 'Parse document');
+
+ok($doc->primary->data, 'Primary data in existence');
+is($doc->primary->data_length, 129, 'Data length');
+
+use_ok('KorAP::Tokenizer');
+
+ok(my $tokens = KorAP::Tokenizer->new(
+ path => $doc->path,
+ doc => $doc,
+ foundry => 'OpenNLP',
+ layer => 'Tokens',
+ name => 'tokens'
+), 'New Tokenizer');
+
+ok($tokens->parse, 'Parse');
+
+ok($tokens->add('CoreNLP', 'Dependency'), 'Add Structure');
+
+my $data = $tokens->to_data->{data};
+
+
+# diag Dumper $data;
+
+done_testing;
+
+__END__
+
+
+
+like($data->{foundries}, qr!xip/morpho!, 'data');
+like($data->{layerInfos}, qr!xip/l=tokens!, 'data');
+like($data->{layerInfos}, qr!xip/p=tokens!, 'data');
+is($data->{stream}->[0]->[4], 'xip/l:zu', 'Lemma');
+is($data->{stream}->[0]->[5], 'xip/p:PREP', 'POS');
+
+is($data->{stream}->[1]->[3], 'xip/l:letzt', 'Lemma');
+is($data->{stream}->[1]->[4], 'xip/p:ADJ', 'POS');
+
+is($data->{stream}->[8]->[3], 'xip/l:\#Heim', 'Lemma (part)');
+is($data->{stream}->[8]->[4], 'xip/l:\#schulen', 'Lemma (part)');
+is($data->{stream}->[8]->[5], 'xip/l:schulen\#Heim', 'Lemma (part)');
+
+is($data->{stream}->[-1]->[3], 'xip/l:werden', 'Lemma');
+is($data->{stream}->[-1]->[4], 'xip/p:VERB', 'POS');
+