Introduce support for Gingko

Change-Id: I6d93ebd402b94141ec1d8847605e243b22397eb0
diff --git a/Changes b/Changes
index cd886d7..a3fc7ea 100644
--- a/Changes
+++ b/Changes
@@ -5,6 +5,7 @@
         - Define resources in Makefile.
         - Add GitHub action for CI.
         - Remove MANIFEST file from repo.
+        - Introduce Gingko support.
 
 0.41 2020-08-10
         - Added support for RWK annotations.
diff --git a/Readme.pod b/Readme.pod
index 1c68abf..89abd5f 100644
--- a/Readme.pod
+++ b/Readme.pod
@@ -370,6 +370,9 @@
   DRuKoLa
     #Morpho
 
+  Gingko
+    #Morpho
+
   Glemm
     #Morpho
 
diff --git a/lib/KorAP/XML/Annotation/Gingko/Morpho.pm b/lib/KorAP/XML/Annotation/Gingko/Morpho.pm
new file mode 100644
index 0000000..6897a3c
--- /dev/null
+++ b/lib/KorAP/XML/Annotation/Gingko/Morpho.pm
@@ -0,0 +1,44 @@
+package KorAP::XML::Annotation::Gingko::Morpho;
+use KorAP::XML::Annotation::Base;
+
+sub parse {
+  my $self = shift;
+
+  $$self->add_tokendata(
+    foundry => 'gingko',
+    layer => 'morpho',
+    cb => sub {
+      my ($stream, $token) = @_;
+      my $mtt = $stream->pos($token->get_pos);
+
+      my $content = $token->get_hash->{fs}->{f};
+
+      my $found;
+
+      my $name;
+      foreach my $f (@{$content->{fs}->{f}}) {
+
+        $name = $f->{-name};
+
+        # pos tag
+        if (($name eq 'pos') &&
+              ($found = $f->{'#text'})) {
+          $mtt->add_by_term('ginkgo/p:' . $found);
+        }
+
+        # lemma tag
+        elsif (($name eq 'lemma')
+                 && ($found = $f->{'#text'})
+                 && $found ne '<unknown>') {
+          $mtt->add_by_term('gingko/l:' . $found);
+        };
+      };
+    }) or return;
+  return 1;
+};
+
+sub layer_info {
+  ['gingko/l=tokens', 'gingko/p=tokens']
+}
+
+1;
diff --git a/script/korapxml2krill b/script/korapxml2krill
index e3eb0cb..2376a5e 100644
--- a/script/korapxml2krill
+++ b/script/korapxml2krill
@@ -151,9 +151,12 @@
 # - Added support for Redewiedergabe-Korpus structure
 #   annotations, based on sentence and paragraph milestones
 # - Added support for Redewiedergabe-Korpus morphology
+#
+# 2021/10/11
+# - Introduced support for Gingko
 # ----------------------------------------------------------
 
-our $LAST_CHANGE = '2021/02/08';
+our $LAST_CHANGE = '2021/10/11';
 our $LOCAL = $FindBin::Bin;
 our $KORAL_VERSION = 0.03;
 our $VERSION_MSG = <<"VERSION";
@@ -411,6 +414,10 @@
 push(@layers,
      ['DRuKoLa', 'Morpho']);
 
+# Gingko
+push(@layers,
+     ['Gingko', 'Morpho']);
+
 # Glemm
 push(@layers,
      ['Glemm', 'Morpho']);
@@ -1371,6 +1378,9 @@
   Glemm
     #Morpho
 
+  Gingko
+    #Morpho
+
   HNC
     #Morpho
 
diff --git a/t/real/corpus/Gingko/ATZ07/JAN/00001/data.xml b/t/real/corpus/Gingko/ATZ07/JAN/00001/data.xml
new file mode 100644
index 0000000..d549681
--- /dev/null
+++ b/t/real/corpus/Gingko/ATZ07/JAN/00001/data.xml
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="text.rng"
+            type="application/xml"
+            schematypens="http://relaxng.org/ns/structure/1.0"?>
+<raw_text docid="ATZ07_JAN.00001"
+          xmlns="http://ids-mannheim.de/ns/KorAP">
+  <metadata file="metadata.xml" />
+  <text>Ein neues Energiemanagement-Konzept für das elektrische Bordnetz Energiemanagement-Systeme für heutige Kraftfahrzeuge sollen ohne Komfort einbußen die Fahrzeugstartfähigkeit sicherstellen und durch einen möglichst optimalen Betrieb der Batterie vorzeitige Batterieausfälle vermeiden.</text>
+</raw_text>
\ No newline at end of file
diff --git a/t/real/corpus/Gingko/ATZ07/JAN/00001/gingko/morpho.xml b/t/real/corpus/Gingko/ATZ07/JAN/00001/gingko/morpho.xml
new file mode 100644
index 0000000..5df0854
--- /dev/null
+++ b/t/real/corpus/Gingko/ATZ07/JAN/00001/gingko/morpho.xml
@@ -0,0 +1,311 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng"
+            type="application/xml"
+            schematypens="http://relaxng.org/ns/structure/1.0"?>
+<layer docid="ATZ07_JAN.00001"
+       xmlns="http://ids-mannheim.de/ns/KorAP"
+       version="KorAP-0.4">
+  <spanList>
+    <span id="s0" from="0" to="3" l="5">
+      <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="lex">
+          <fs>
+            <f name="pos">ART</f>
+            <f name="lemma">eine</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s1" from="4" to="9" l="5">
+      <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="lex">
+          <fs>
+            <f name="pos">ADJA</f>
+            <f name="lemma">neu</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s2" from="10" to="35" l="5">
+      <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="lex">
+          <fs>
+            <f name="pos">NN</f>
+            <f name="lemma">&lt;unknown&gt;</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s3" from="36" to="39" l="5">
+      <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="lex">
+          <fs>
+            <f name="pos">APPR</f>
+            <f name="lemma">für</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s4" from="40" to="43" l="5">
+      <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="lex">
+          <fs>
+            <f name="pos">ART</f>
+            <f name="lemma">die</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s5" from="44" to="55" l="5">
+      <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="lex">
+          <fs>
+            <f name="pos">ADJA</f>
+            <f name="lemma">elektrisch</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s6" from="56" to="64" l="5">
+      <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="lex">
+          <fs>
+            <f name="pos">NN</f>
+            <f name="lemma">Bordnetz</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s7" from="65" to="90" l="7">
+      <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="lex">
+          <fs>
+            <f name="pos">NN</f>
+            <f name="lemma">&lt;unknown&gt;</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s8" from="91" to="94" l="7">
+      <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="lex">
+          <fs>
+            <f name="pos">APPR</f>
+            <f name="lemma">für</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s9" from="95" to="102" l="7">
+      <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="lex">
+          <fs>
+            <f name="pos">ADJA</f>
+            <f name="lemma">heutig</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s10" from="103" to="117" l="7">
+      <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="lex">
+          <fs>
+            <f name="pos">NN</f>
+            <f name="lemma">Kraftfahrzeug</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s11" from="118" to="124" l="7">
+      <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="lex">
+          <fs>
+            <f name="pos">VMFIN</f>
+            <f name="lemma">sollen</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s12" from="125" to="129" l="7">
+      <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="lex">
+          <fs>
+            <f name="pos">APPR</f>
+            <f name="lemma">ohne</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s13" from="130" to="137" l="7">
+      <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="lex">
+          <fs>
+            <f name="pos">NN</f>
+            <f name="lemma">Komfort</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s14" from="138" to="146" l="7">
+      <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="lex">
+          <fs>
+            <f name="pos">VVFIN</f>
+            <f name="lemma">&lt;unknown&gt;</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s15" from="147" to="150" l="7">
+      <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="lex">
+          <fs>
+            <f name="pos">ART</f>
+            <f name="lemma">die</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s16" from="151" to="173" l="7">
+      <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="lex">
+          <fs>
+            <f name="pos">NN</f>
+            <f name="lemma">&lt;unknown&gt;</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s17" from="174" to="187" l="7">
+      <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="lex">
+          <fs>
+            <f name="pos">VVINF</f>
+            <f name="lemma">sicherstellen</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s18" from="188" to="191" l="7">
+      <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="lex">
+          <fs>
+            <f name="pos">KON</f>
+            <f name="lemma">und</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s19" from="192" to="197" l="7">
+      <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="lex">
+          <fs>
+            <f name="pos">APPR</f>
+            <f name="lemma">durch</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s20" from="198" to="203" l="7">
+      <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="lex">
+          <fs>
+            <f name="pos">ART</f>
+            <f name="lemma">eine</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s21" from="204" to="213" l="7">
+      <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="lex">
+          <fs>
+            <f name="pos">ADV</f>
+            <f name="lemma">möglichst</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s22" from="214" to="223" l="7">
+      <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="lex">
+          <fs>
+            <f name="pos">ADJA</f>
+            <f name="lemma">optimal</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s23" from="224" to="231" l="7">
+      <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="lex">
+          <fs>
+            <f name="pos">NN</f>
+            <f name="lemma">Betrieb</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s24" from="232" to="235" l="7">
+      <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="lex">
+          <fs>
+            <f name="pos">ART</f>
+            <f name="lemma">die</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s25" from="236" to="244" l="7">
+      <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="lex">
+          <fs>
+            <f name="pos">NN</f>
+            <f name="lemma">Batterie</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s26" from="245" to="255" l="7">
+      <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="lex">
+          <fs>
+            <f name="pos">ADJA</f>
+            <f name="lemma">vorzeitig</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s27" from="256" to="272" l="7">
+      <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="lex">
+          <fs>
+            <f name="pos">NN</f>
+            <f name="lemma">&lt;unknown&gt;</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s28" from="273" to="282" l="7">
+      <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="lex">
+          <fs>
+            <f name="pos">VVINF</f>
+            <f name="lemma">vermeiden</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s29" from="282" to="283" l="7">
+      <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="lex">
+          <fs>
+            <f name="pos">$.</f>
+            <f name="lemma">.</f>
+            <f name="join">left</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+  </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/real/corpus/Gingko/ATZ07/JAN/00001/header.xml b/t/real/corpus/Gingko/ATZ07/JAN/00001/header.xml
new file mode 100644
index 0000000..e71c153
--- /dev/null
+++ b/t/real/corpus/Gingko/ATZ07/JAN/00001/header.xml
@@ -0,0 +1,81 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="header.rng"
+            type="application/xml"
+            schematypens="http://relaxng.org/ns/structure/1.0"?>
+<!DOCTYPE idsCorpus PUBLIC "-//IDS//DTD IDS-XCES 1.0//EN"
+          "http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd">
+<idsHeader TEIform="teiHeader" pattern="text" status="new" type="text" version="1.0">
+        <fileDesc>
+          <titleStmt>
+            <textSigle>ATZ07/JAN.00001</textSigle>
+            <t.title assemblage="external">ATZ07/JAN.00001 ATZ - Automobiltechnische Zeitschrift, Januar 2007, Nr.109, S. 10-15; Ein neues Energiemanagement-Konzept für das elektrische Bordnetz</t.title>
+          </titleStmt>
+          <publicationStmt>
+            <distributor/>
+            <pubAddress/>
+            <availability region="world">QAO-NC</availability>
+            <pubDate type="year">2021</pubDate>
+          </publicationStmt>
+          <sourceDesc>
+            <biblStruct>
+              <analytic>
+                <h.title type="main">Ein neues Energiemanagement-Konzept für das elektrische Bordnetz</h.title>
+                <h.title type="sub"/>
+                <h.author>Theuerkauf, Heinz; Schmidt, Matthias</h.author>
+                <imprint/>
+                <biblScope type="pp">S. 10-15</biblScope>
+                <biblNote n="DOI">10.1007/BF03221854</biblNote>
+              </analytic>
+              <monogr>
+                <h.title type="main">ATZ - Automobiltechnische Zeitschrift</h.title>
+                <h.title type="short">ATZ</h.title>
+                <imprint>
+                  <publisher>Springer Fachmedien GmbH</publisher>
+                  <pubPlace>Wiesbaden</pubPlace>
+                  <pubDate type="year">2007</pubDate>
+                </imprint>
+                <biblScope type="issue">1</biblScope>
+                <biblScope type="vol">109</biblScope>
+              </monogr>
+            </biblStruct>
+            <reference type="complete" assemblage="external">ATZ07/JAN.00001 ATZ - Automobiltechnische Zeitschrift, Januar 2007, Nr.109, S. 10-15 - Theuerkauf, H.; Schmidt, M.: Ein neues Energiemanagement-Konzept für das elektrische Bordnetz</reference>
+            <reference type="short" assemblage="external">ATZ07/JAN.00001 ATZ, 2007, Nr.109</reference>
+            <reference type="super" assemblage="external">ATZ07/JAN ATZ - Automobiltechnische Zeitschrift, Wiesbaden: Springer Fachmedien GmbH; 2007</reference>
+          </sourceDesc>
+        </fileDesc>
+        <encodingDesc>
+          <editorialDecl>
+            <pagination type="no"/>
+            <transduction n="1">gingko-XML by Leipzig University</transduction>
+            <transduction n="2">Sentence splitting using NLTK by Leipzig
+                        University</transduction>
+            <transduction n="3">Tokenisation, Lemmatisation, POS-annotation using TreeTagger
+                        with STTS by Leipzig University</transduction>
+            <transduction n="4">XSL Conversion to I5 by IDS</transduction>
+            <correction n="lemma">no</correction>
+          </editorialDecl>
+          <tagsDecl>
+            <tagUsage gi="w" occurs="2191">used to mark a single token</tagUsage>
+          </tagsDecl>
+        </encodingDesc>
+        <profileDesc>
+          <textClass>
+            <catRef n="1" target="topic.wissenschaft.populaerwissenschaft" scheme="topic"/>
+          </textClass>
+          <textDesc>
+            <textType>Zeitschrift: Fachzeitschrift</textType>
+            <textTypeRef>Fachzeitschrift</textTypeRef>
+            <textTypeArt>Fachartikel</textTypeArt>
+          </textDesc>
+          <creation>
+            <creatDate>2007.01.</creatDate>
+            <creatRef>Januar 2007</creatRef>
+            <creatRefShort>Januar 2007</creatRefShort>
+          </creation>
+          <textClass>
+            <catRef n="0.6" target="topic.technik-industrie.kfz" scheme="topic"/>
+            <classCode scheme="gingkoGenre.top">wissenschaftlich</classCode>
+            <classCode scheme="gingkoGenre.sub">wissenschaftlich</classCode>
+          </textClass>
+        </profileDesc>
+      </idsHeader>
\ No newline at end of file
diff --git a/t/real/corpus/Gingko/ATZ07/JAN/00001/struct/structure.xml b/t/real/corpus/Gingko/ATZ07/JAN/00001/struct/structure.xml
new file mode 100644
index 0000000..b2c5757
--- /dev/null
+++ b/t/real/corpus/Gingko/ATZ07/JAN/00001/struct/structure.xml
@@ -0,0 +1,408 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng"
+            type="application/xml"
+            schematypens="http://relaxng.org/ns/structure/1.0"?>
+<layer docid="ATZ07_JAN.00001"
+       xmlns="http://ids-mannheim.de/ns/KorAP"
+       version="KorAP-0.4">
+  <spanList>
+    <span id="s0" from="0" to="283" l="1">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">text</f>
+      </fs>
+    </span>
+    <span id="s1" from="0" to="283" l="2">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">body</f>
+      </fs>
+    </span>
+    <span id="s2" from="0" to="283" l="3">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">div</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="type">article</f>
+            <f name="n">0</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s3" from="0" to="64" l="4">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">head</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="type"></f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s4" from="0" to="3" l="5">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">w</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="pos">ART</f>
+            <f name="lemma">eine</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s5" from="4" to="9" l="5">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">w</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="pos">ADJA</f>
+            <f name="lemma">neu</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s6" from="10" to="35" l="5">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">w</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="pos">NN</f>
+            <f name="lemma">&lt;unknown&gt;</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s7" from="36" to="39" l="5">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">w</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="pos">APPR</f>
+            <f name="lemma">für</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s8" from="40" to="43" l="5">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">w</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="pos">ART</f>
+            <f name="lemma">die</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s9" from="44" to="55" l="5">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">w</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="pos">ADJA</f>
+            <f name="lemma">elektrisch</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s10" from="56" to="64" l="5">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">w</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="pos">NN</f>
+            <f name="lemma">Bordnetz</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s11" from="64" to="64" l="4">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">head</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="type"></f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s12" from="64" to="283" l="4">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">div</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="type">sec0</f>
+            <f name="n">1</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s13" from="64" to="283" l="5">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">p</f>
+      </fs>
+    </span>
+    <span id="s14" from="64" to="283" l="6">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">s</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="type">s</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s15" from="65" to="90" l="7">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">w</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="pos">NN</f>
+            <f name="lemma">&lt;unknown&gt;</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s16" from="91" to="94" l="7">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">w</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="pos">APPR</f>
+            <f name="lemma">für</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s17" from="95" to="102" l="7">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">w</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="pos">ADJA</f>
+            <f name="lemma">heutig</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s18" from="103" to="117" l="7">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">w</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="pos">NN</f>
+            <f name="lemma">Kraftfahrzeug</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s19" from="118" to="124" l="7">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">w</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="pos">VMFIN</f>
+            <f name="lemma">sollen</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s20" from="125" to="129" l="7">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">w</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="pos">APPR</f>
+            <f name="lemma">ohne</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s21" from="130" to="137" l="7">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">w</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="pos">NN</f>
+            <f name="lemma">Komfort</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s22" from="138" to="146" l="7">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">w</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="pos">VVFIN</f>
+            <f name="lemma">&lt;unknown&gt;</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s23" from="147" to="150" l="7">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">w</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="pos">ART</f>
+            <f name="lemma">die</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s24" from="151" to="173" l="7">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">w</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="pos">NN</f>
+            <f name="lemma">&lt;unknown&gt;</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s25" from="174" to="187" l="7">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">w</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="pos">VVINF</f>
+            <f name="lemma">sicherstellen</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s26" from="188" to="191" l="7">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">w</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="pos">KON</f>
+            <f name="lemma">und</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s27" from="192" to="197" l="7">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">w</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="pos">APPR</f>
+            <f name="lemma">durch</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s28" from="198" to="203" l="7">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">w</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="pos">ART</f>
+            <f name="lemma">eine</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s29" from="204" to="213" l="7">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">w</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="pos">ADV</f>
+            <f name="lemma">möglichst</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s30" from="214" to="223" l="7">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">w</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="pos">ADJA</f>
+            <f name="lemma">optimal</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s31" from="224" to="231" l="7">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">w</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="pos">NN</f>
+            <f name="lemma">Betrieb</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s32" from="232" to="235" l="7">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">w</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="pos">ART</f>
+            <f name="lemma">die</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s33" from="236" to="244" l="7">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">w</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="pos">NN</f>
+            <f name="lemma">Batterie</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s34" from="245" to="255" l="7">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">w</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="pos">ADJA</f>
+            <f name="lemma">vorzeitig</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s35" from="256" to="272" l="7">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">w</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="pos">NN</f>
+            <f name="lemma">&lt;unknown&gt;</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s36" from="273" to="282" l="7">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">w</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="pos">VVINF</f>
+            <f name="lemma">vermeiden</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s37" from="282" to="283" l="7">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">w</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="pos">$.</f>
+            <f name="lemma">.</f>
+            <f name="join">left</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+  </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/real/corpus/Gingko/ATZ07/JAN/header.xml b/t/real/corpus/Gingko/ATZ07/JAN/header.xml
new file mode 100644
index 0000000..609b353
--- /dev/null
+++ b/t/real/corpus/Gingko/ATZ07/JAN/header.xml
@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="header.rng"
+            type="application/xml"
+            schematypens="http://relaxng.org/ns/structure/1.0"?>
+<!DOCTYPE idsCorpus PUBLIC "-//IDS//DTD IDS-XCES 1.0//EN"
+          "http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd">
+<idsHeader TEIform="teiHeader" pattern="text" type="document" version="1.1">
+      <fileDesc>
+        <titleStmt>
+          <dokumentSigle>ATZ07/JAN</dokumentSigle>
+          <d.title>Gingko - Geschriebenes Ingenieurwissenschaftliches Korpus: ATZ -
+                Automobiltechnische Zeitschrift, Januar 2007</d.title>
+          <editor>                
+                <orgName type="project" from="2017" to="2021">Muster in der Sprache der Ingenieurwissenschaften</orgName>
+                <persName>Prof. Dr. Christian Fandrych, University of Leipzig</persName>
+            </editor>
+        </titleStmt>
+        <publicationStmt>
+          <distributor> Institut für Deutsche Sprache </distributor>
+          <pubAddress> Postfach 10 16 21, D-68016 Mannheim </pubAddress>
+          <telephone> +49 (0)621 1581 0 </telephone>
+          <availability region="ids">QAO-NC</availability>
+          <pubDate>2021</pubDate>
+        </publicationStmt>
+        <sourceDesc>
+          <biblStruct>
+            <monogr>
+              <h.title type="main">Gingko - Geschriebenes Ingenieurwissenschaftliches Korpus</h.title>
+              <editor>Prof. Dr. Christian Fandrych, Leipzig University</editor>
+              <imprint>
+                <publisher>Herder-Institut, Leipzig University</publisher>
+                <pubPlace>Leipzig</pubPlace>
+                <pubDate type="year">2021</pubDate>
+              </imprint>
+              <biblNote>https://www.philol.uni-leipzig.de/herder-institut/forschung/projekte/laufende-projekte/gingko/</biblNote>
+            </monogr>
+          </biblStruct>
+        </sourceDesc>
+      </fileDesc>
+    </idsHeader>
\ No newline at end of file
diff --git a/t/real/corpus/Gingko/ATZ07/header.xml b/t/real/corpus/Gingko/ATZ07/header.xml
new file mode 100644
index 0000000..b8f8e5c
--- /dev/null
+++ b/t/real/corpus/Gingko/ATZ07/header.xml
@@ -0,0 +1,280 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="header.rng"
+            type="application/xml"
+            schematypens="http://relaxng.org/ns/structure/1.0"?>
+<!DOCTYPE idsCorpus PUBLIC "-//IDS//DTD IDS-XCES 1.0//EN"
+          "http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd">
+<idsHeader TEIform="teiHeader" pattern="Ztg/Zschr" status="new" type="corpus" version="1.1">
+    <fileDesc>
+      <titleStmt>
+        <korpusSigle>ATZ07</korpusSigle>
+        <c.title>Gingko - Geschriebenes Ingenieurwissenschaftliches Korpus: ATZ - Automobiltechnische Zeitschrift, 2007</c.title>
+        <editor>
+                <orgName type="project" from="2017" to="2021">Muster in der Sprache der
+                    Ingenieurwissenschaften</orgName>
+                <persName>Prof. Dr. Christian Fandrych, University of Leipzig</persName>
+            </editor>
+        <respStmt>
+          <persName from="2017" to="2019">Unbekannt</persName>
+        </respStmt>
+      </titleStmt>
+      <publicationStmt>
+        <distributor> Institut für Deutsche Sprache </distributor>
+        <pubAddress> Postfach 10 16 21, D-68016 Mannheim </pubAddress>
+        <telephone> +49 (0)621 1581 0 </telephone>
+        <availability region="ids">QAO-NC</availability>
+        <pubDate>2021</pubDate>
+      </publicationStmt>
+      <sourceDesc>
+        <biblStruct>
+          <monogr>
+            <h.title type="main">Gingko - Geschriebenes Ingenieurwissenschaftliches
+                        Korpus</h.title>
+            <editor>Christian Fandrych</editor>
+            <imprint>
+              <publisher>Herder-Institut der Universität Leipzig</publisher>
+              <pubPlace>Leipzig</pubPlace>
+              <pubDate type="year">2021</pubDate>
+            </imprint>
+            <biblNote n="url">https://www.philol.uni-leipzig.de/herder-institut/forschung/projekte/laufende-projekte/gingko/</biblNote>
+            <biblNote n="collection">Gingko - Geschriebenes Ingenieurwissenschaftliches Korpus</biblNote>
+            <biblNote n="collectionShort">Gingko</biblNote>
+          </monogr>
+        </biblStruct>
+      </sourceDesc>
+    </fileDesc>
+    <encodingDesc>
+      <projectDesc>
+            <p>Project "Muster in der Sprache der Ingenieurwissenschaften"</p>
+            <p>Universität Greifswald, Institut für Deutsche Philologie (2017-2019)</p>
+            <p>Universität Leipzig, Herder-Institut (2020-2021)</p>
+            <p>Third-party funding by Deutsche Forschungsgemeinschaft (DFG), AOBJ: 692723</p>
+            <p>Project head 2017-2019 Jun.-Prof. Dr. Antje Heine</p>
+            <p>Project head 2020-2021 Prof. Dr. Christian Fandrych</p>
+            <p xml:lang="de">Das Projektkorpus Gingko (Geschriebenes ingenieurwissenschaftliches
+                Korpus) besteht aus 2498 wissenschaftlichen Artikeln der Zeitschriften
+                Automobiltechnische Zeitschrift (ATZ) und Motortechnische Zeitschrift (MTZ) der
+                Jahrgänge 2007-2016 und umfasst insgesamt 4.667.656 Tokens. Es ist im Rahmen des
+                Forschungsprojektes „Muster in der Sprache der Ingenieurwissenschaften“ entstanden.
+                Das Projekt hat das Ziel, Muster in der Sprache der Ingenieurwissenschaften (am
+                Beispiel der Automobiltechnik) systematisch zu erfassen und zu beschreiben.</p>
+            <p>Project Website: <ref type="url" target="https://www.philol.uni-leipzig.de/herder-institut/forschung/projekte/laufende-projekte/gingko/">Gingko website</ref></p>
+            <p>Publication: Schirrmeister, L., Rummel, M., Heine, A., Suppus, N. &amp; Mendoza
+                Sánchez, B. (2021). Gingko – ein Korpus der ingenieurwissenschaftlichen Sprache.
+                    <ref target="https://www.dafdigital.de/">Deutsch als Fremdsprache</ref> 58.</p>
+        </projectDesc>
+      <editorialDecl>
+        <transduction n="1">gingko-XML by Leipzig University</transduction>
+        <transduction n="2">Sentence splitting using NLTK by Leipzig University</transduction>
+        <transduction n="3">Tokenisation, Lemmatisation, POS-annotation using TreeTagger with
+                STTS by Leipzig University</transduction>
+        <transduction n="4">XSL Conversion to I5 by IDS</transduction>
+      </editorialDecl>
+      <classDecl>
+        <taxonomy id="topic">
+          <h.bibl>Thementaxonomie (siehe
+                    http://www.ids-mannheim.de/kl/projekte/methoden/te.html)</h.bibl>
+          <category id="topic.fiktion">
+            <catDesc>Fiktion</catDesc>
+            <category id="topic.fiktion.vermischtes">
+              <catDesc>Fiktion:Vermischtes</catDesc>
+            </category>
+          </category>
+          <category id="topic.freizeit-unterhaltung">
+            <catDesc>Freizeit_Unterhaltung</catDesc>
+            <category id="topic.freizeit-unterhaltung.reisen">
+              <catDesc>Freizeit_Unterhaltung:Reisen</catDesc>
+            </category>
+            <category id="topic.freizeit-unterhaltung.rundfunk">
+              <catDesc>Freizeit_Unterhaltung:Rundfunk</catDesc>
+            </category>
+            <category id="topic.freizeit-unterhaltung.vereine-veranstaltungen">
+              <catDesc>Freizeit_Unterhaltung:Vereine_Veranstaltungen</catDesc>
+            </category>
+          </category>
+          <category id="topic.gesundheit-ernaehrung">
+            <catDesc>Gesundheit_Ernaehrung</catDesc>
+            <category id="topic.gesundheit-ernaehrung.ernaehrung">
+              <catDesc>Gesundheit_Ernaehrung:Ernaehrung</catDesc>
+            </category>
+            <category id="topic.gesundheit-ernaehrung.gesundheit">
+              <catDesc>Gesundheit_Ernaehrung:Gesundheit</catDesc>
+            </category>
+          </category>
+          <category id="topic.kultur">
+            <catDesc>Kultur</catDesc>
+            <category id="topic.kultur.bildende-kunst">
+              <catDesc>Kultur:Bildende Kunst</catDesc>
+            </category>
+            <category id="topic.kultur.darstellende-kunst">
+              <catDesc>Kultur:Darstellende Kunst</catDesc>
+            </category>
+            <category id="topic.kultur.film">
+              <catDesc>Kultur:Film</catDesc>
+            </category>
+            <category id="topic.kultur.literatur">
+              <catDesc>Kultur:Literatur</catDesc>
+            </category>
+            <category id="topic.kultur.mode">
+              <catDesc>Kultur:Mode</catDesc>
+            </category>
+            <category id="topic.kultur.musik">
+              <catDesc>Kultur:Musik</catDesc>
+            </category>
+          </category>
+          <category id="topic.natur-umwelt">
+            <catDesc>Natur_Umwelt</catDesc>
+            <category id="topic.natur-umwelt.garten">
+              <catDesc>Natur_Umwelt:Garten</catDesc>
+            </category>
+            <category id="topic.natur-umwelt.tiere">
+              <catDesc>Natur_Umwelt:Tiere</catDesc>
+            </category>
+            <category id="topic.natur-umwelt.wetter-klima">
+              <catDesc>Natur_Umwelt:Wetter_Klima</catDesc>
+            </category>
+          </category>
+          <category id="topic.politik">
+            <catDesc>Politik</catDesc>
+            <category id="topic.politik.ausland">
+              <catDesc>Politik:Ausland</catDesc>
+            </category>
+            <category id="topic.politik.inland">
+              <catDesc>Politik:Inland</catDesc>
+            </category>
+            <category id="topic.politik.kommunalpolitik">
+              <catDesc>Politik:Kommunalpolitik</catDesc>
+            </category>
+          </category>
+          <category id="topic.rest">
+            <catDesc>Rest</catDesc>
+            <category id="topic.rest.boersenkurse">
+              <catDesc>Rest:boersenkurse</catDesc>
+            </category>
+            <category id="topic.rest.geburt-tod-heirat">
+              <catDesc>Rest:geburt_tod_heirat</catDesc>
+            </category>
+            <category id="topic.rest.impressum">
+              <catDesc>Rest:impressum</catDesc>
+            </category>
+            <category id="topic.rest.inhaltsverzeichnisse">
+              <catDesc>Rest:inhaltsverzeichnisse</catDesc>
+            </category>
+            <category id="topic.rest.ligatabellen">
+              <catDesc>Rest:ligatabellen</catDesc>
+            </category>
+            <category id="topic.rest.tabellen">
+              <catDesc>Rest:tabellen</catDesc>
+            </category>
+            <category id="topic.rest.veranstaltungshinweise">
+              <catDesc>Rest:veranstaltungshinweise</catDesc>
+            </category>
+          </category>
+          <category id="topic.sport">
+            <catDesc>Sport</catDesc>
+            <category id="topic.sport.ballsport">
+              <catDesc>Sport:Ballsport</catDesc>
+            </category>
+            <category id="topic.sport.fussball">
+              <catDesc>Sport:Fussball</catDesc>
+            </category>
+            <category id="topic.sport.motorsport">
+              <catDesc>Sport:Motorsport</catDesc>
+            </category>
+            <category id="topic.sport.radsport">
+              <catDesc>Sport:Radsport</catDesc>
+            </category>
+            <category id="topic.sport.tennis">
+              <catDesc>Sport:Tennis</catDesc>
+            </category>
+            <category id="topic.sport.vermischtes">
+              <catDesc>Sport:Vermischtes</catDesc>
+            </category>
+            <category id="topic.sport.wintersport">
+              <catDesc>Sport:Wintersport</catDesc>
+            </category>
+          </category>
+          <category id="topic.staat-gesellschaft">
+            <catDesc>Staat_Gesellschaft</catDesc>
+            <category id="topic.staat-gesellschaft.arbeit-und-beruf">
+              <catDesc>Staat_Gesellschaft:Arbeit_und_Beruf</catDesc>
+            </category>
+            <category id="topic.staat-gesellschaft.bildung">
+              <catDesc>Staat_Gesellschaft:Bildung</catDesc>
+            </category>
+            <category id="topic.staat-gesellschaft.biographien-interviews">
+              <catDesc>Staat_Gesellschaft:Biographien_Interviews</catDesc>
+            </category>
+            <category id="topic.staat-gesellschaft.drittes-reich-rechtsextremismus">
+              <catDesc>Staat_Gesellschaft:Drittes_Reich_Rechtsextremismus</catDesc>
+            </category>
+            <category id="topic.staat-gesellschaft.familie-geschlecht">
+              <catDesc>Staat_Gesellschaft:Familie_Geschlecht</catDesc>
+            </category>
+            <category id="topic.staat-gesellschaft.kirche">
+              <catDesc>Staat_Gesellschaft:Kirche</catDesc>
+            </category>
+            <category id="topic.staat-gesellschaft.recht">
+              <catDesc>Staat_Gesellschaft:Recht</catDesc>
+            </category>
+            <category id="topic.staat-gesellschaft.tod">
+              <catDesc>Staat_Gesellschaft:Tod</catDesc>
+            </category>
+            <category id="topic.staat-gesellschaft.verbrechen">
+              <catDesc>Staat_Gesellschaft:Verbrechen</catDesc>
+            </category>
+          </category>
+          <category id="topic.technik-industrie">
+            <catDesc>Technik_Industrie</catDesc>
+            <category id="topic.technik-industrie.edv-elektronik">
+              <catDesc>Technik_Industrie:EDV_Elektronik</catDesc>
+            </category>
+            <category id="topic.technik-industrie.kfz">
+              <catDesc>Technik_Industrie:Kfz</catDesc>
+            </category>
+            <category id="topic.technik-industrie.transport-verkehr">
+              <catDesc>Technik_Industrie:Transport_Verkehr</catDesc>
+            </category>
+            <category id="topic.technik-industrie.umweltschutz">
+              <catDesc>Technik_Industrie:Umweltschutz</catDesc>
+            </category>
+            <category id="topic.technik-industrie.unfaelle">
+              <catDesc>Technik_Industrie:Unfaelle</catDesc>
+            </category>
+          </category>
+          <category id="topic.wirtschaft-finanzen">
+            <catDesc>Wirtschaft_Finanzen</catDesc>
+            <category id="topic.wirtschaft-finanzen.banken">
+              <catDesc>Wirtschaft_Finanzen:Banken</catDesc>
+            </category>
+            <category id="topic.wirtschaft-finanzen.bilanzen">
+              <catDesc>Wirtschaft_Finanzen:Bilanzen</catDesc>
+            </category>
+            <category id="topic.wirtschaft-finanzen.oeffentliche-finanzen">
+              <catDesc>Wirtschaft_Finanzen:Oeffentliche_Finanzen</catDesc>
+            </category>
+            <category id="topic.wirtschaft-finanzen.sozialprodukt">
+              <catDesc>Wirtschaft_Finanzen:Sozialprodukt</catDesc>
+            </category>
+            <category id="topic.wirtschaft-finanzen.waehrung">
+              <catDesc>Wirtschaft_Finanzen:Waehrung</catDesc>
+            </category>
+          </category>
+          <category id="topic.wissenschaft">
+            <catDesc>Wissenschaft</catDesc>
+            <category id="topic.wissenschaft.populaerwissenschaft">
+              <catDesc>Wissenschaft:Populaerwissenschaft</catDesc>
+            </category>
+          </category>
+          <category id="topic.unklassifizierbar">
+            <catDesc>Text ist thematisch nicht klassifizierbar.</catDesc>
+          </category>
+        </taxonomy>
+      </classDecl>
+    </encodingDesc>
+    <profileDesc>
+      <langUsage>
+        <language id="de" usage="100">Deutsch</language>
+      </langUsage>
+    </profileDesc>
+  </idsHeader>
\ No newline at end of file
diff --git a/t/real/gingko.t b/t/real/gingko.t
new file mode 100644
index 0000000..5edd877
--- /dev/null
+++ b/t/real/gingko.t
@@ -0,0 +1,111 @@
+use strict;
+use warnings;
+use Test::More;
+use Data::Dumper;
+use JSON::XS;
+
+if ($ENV{SKIP_REAL}) {
+  plan skip_all => 'Skip real tests';
+};
+
+use Benchmark qw/:hireswallclock/;
+
+my $t = Benchmark->new;
+
+use utf8;
+use lib 'lib', '../lib';
+
+use File::Basename 'dirname';
+use File::Spec::Functions 'catdir';
+
+use_ok('KorAP::XML::Krill');
+
+# This will Check Gingko-Files
+
+# New
+# ATZ07/JAN/00001
+my $path = catdir(dirname(__FILE__), 'corpus','Gingko', 'ATZ07','JAN','00001');
+
+ok(my $doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
+ok($doc->parse, 'Parse document');
+
+is($doc->text_sigle, 'ATZ07/JAN/00001', 'Correct text sigle');
+is($doc->doc_sigle, 'ATZ07/JAN', 'Correct document sigle');
+is($doc->corpus_sigle, 'ATZ07', 'Correct corpus sigle');
+
+my $meta = $doc->meta;
+is($meta->{T_title}, 'Ein neues Energiemanagement-Konzept für das elektrische Bordnetz', 'Title');
+is($meta->{S_pub_place}, 'Wiesbaden', 'PubPlace');
+is($meta->{D_pub_date}, '20070000', 'Creation Date');
+ok(!$meta->{T_sub_title}, 'SubTitle');
+is($meta->{T_author}, 'Theuerkauf, Heinz; Schmidt, Matthias', 'Author');
+
+is($meta->{A_publisher}, 'Springer Fachmedien GmbH', 'Publisher');
+ok(!$meta->{A_editor}, 'Editor');
+ok(!$meta->{A_translator}, 'Translator');
+is($meta->{S_text_type}, 'Zeitschrift: Fachzeitschrift', 'Correct Text Type');
+is($meta->{S_text_type_art}, 'Fachartikel', 'Correct Text Type Art');
+is($meta->{S_text_type_ref}, 'Fachzeitschrift', 'Correct Text Type Ref');
+ok(!$meta->{S_text_column}, 'Correct Text Column');
+ok(!$meta->{S_text_domain}, 'Correct Text Domain');
+ok(!$meta->{D_creation_date}, 'Creation Date');
+
+ok(!$meta->{pages}, 'Pages');
+ok(!$meta->{A_file_edition_statement}, 'File Ed Statement');
+ok(!$meta->{A_bibl_edition_statement}, 'Bibl Ed Statement');
+is($meta->{A_reference}, 'ATZ - Automobiltechnische Zeitschrift, Januar 2007, Nr.109, S. 10-15 - Theuerkauf, H.; Schmidt, M.: Ein neues Energiemanagement-Konzept für das elektrische Bordnetz', 'Reference');
+is($meta->{S_language}, 'de', 'Language');
+
+is($meta->{T_corpus_title}, 'Gingko - Geschriebenes Ingenieurwissenschaftliches Korpus', 'Correct Corpus title');
+ok(!$meta->{T_corpus_sub_title}, 'Correct Corpus Sub title');
+ok(!$meta->{T_corpus_author}, 'Correct Corpus author');
+is($meta->{A_corpus_editor}, 'Christian Fandrych', 'Correct Corpus editor');
+
+is($meta->{T_doc_title}, 'Gingko - Geschriebenes Ingenieurwissenschaftliches Korpus',   'Correct Doc title');
+ok(!$meta->{T_doc_sub_title}, 'Correct Doc Sub title');
+ok(!$meta->{T_doc_author}, 'Correct Doc author');
+is($meta->{A_doc_editor}, 'Prof. Dr. Christian Fandrych, Leipzig University', 'Correct Doc editor');
+
+# Tokenization
+use_ok('KorAP::XML::Tokenizer');
+
+my ($token_base_foundry, $token_base_layer) = (qw/Gingko Morpho/);
+
+# Get tokenization
+my $tokens = KorAP::XML::Tokenizer->new(
+  path => $doc->path,
+  doc => $doc,
+  foundry => $token_base_foundry,
+  layer => $token_base_layer,
+  name => 'tokens'
+);
+ok($tokens, 'Token Object is fine');
+ok($tokens->parse, 'Token parsing is fine');
+
+my $output = decode_json( $tokens->to_json );
+
+## Base
+ok($tokens->add('DeReKo', 'Structure', 'base_sentences_paragraphs'));
+ok($tokens->add('Gingko', 'Morpho'), 'Add Gingko');
+
+$output = $tokens->to_data;
+
+is($output->{data}->{foundries}, 'dereko dereko/structure dereko/structure/base_sentences_paragraphs gingko gingko/morpho', 'Foundries');
+
+is($output->{data}->{layerInfos}, 'dereko/s=spans gingko/l=tokens gingko/p=tokens', 'layerInfos');
+
+my $token = join('||', @{$output->{data}->{stream}->[7]});
+
+# Unknown
+unlike($token, qr!gingko/l!, 'data');
+like($token, qr!ginkgo/p:NN!, 'data');
+
+$token = join('||', @{$output->{data}->{stream}->[9]});
+
+like($token, qr!i:heutige!, 'data');
+like($token, qr!ginkgo/p:ADJA!, 'data');
+like($token, qr!gingko/l:heutig!, 'data');
+
+done_testing;
+__END__
+