Support NDY metadata

Change-Id: Ifebed2de671ac516782a4daf49b2440e89ec4533
diff --git a/t/real/corpus/NDY/296/008718/base/tokens.xml b/t/real/corpus/NDY/296/008718/base/tokens.xml
new file mode 100644
index 0000000..723f3f2
--- /dev/null
+++ b/t/real/corpus/NDY/296/008718/base/tokens.xml
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng"
+            type="application/xml"
+            schematypens="http://relaxng.org/ns/structure/1.0"?>
+<layer docid="NDY_296.008718"
+       xmlns="http://ids-mannheim.de/ns/KorAP"
+       version="KorAP-0.4">
+  <spanList>
+    <span id="t_0" from="0" to="1" />
+  </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/real/corpus/NDY/296/008718/corenlp/constituency.xml b/t/real/corpus/NDY/296/008718/corenlp/constituency.xml
new file mode 100644
index 0000000..304bf8a
--- /dev/null
+++ b/t/real/corpus/NDY/296/008718/corenlp/constituency.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="NDY_296.008718" version="KorAP-0.4">
+   <spanList/>
+</layer>
diff --git a/t/real/corpus/NDY/296/008718/corenlp/metadata.xml b/t/real/corpus/NDY/296/008718/corenlp/metadata.xml
new file mode 100644
index 0000000..94b1bda
--- /dev/null
+++ b/t/real/corpus/NDY/296/008718/corenlp/metadata.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="metadata.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<metadata docid="NDY_296.008718" type="foundry" xmlns="http://ids-mannheim.de/ns/KorAP">  <doc file="../data.xml" />
+<foundry name="corenlp"><layer segm="tok" file="tokens.xml" name="token" id="l1" /><layer segm="s" file="sentences.xml" name="sent" id="l2" /><layer info="pos lemma" file="morpho.xml" name="morph" id="l3" /></foundry></metadata>
\ No newline at end of file
diff --git a/t/real/corpus/NDY/296/008718/corenlp/morpho.xml b/t/real/corpus/NDY/296/008718/corenlp/morpho.xml
new file mode 100644
index 0000000..304bf8a
--- /dev/null
+++ b/t/real/corpus/NDY/296/008718/corenlp/morpho.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="NDY_296.008718" version="KorAP-0.4">
+   <spanList/>
+</layer>
diff --git a/t/real/corpus/NDY/296/008718/corenlp/sentences.xml b/t/real/corpus/NDY/296/008718/corenlp/sentences.xml
new file mode 100644
index 0000000..304bf8a
--- /dev/null
+++ b/t/real/corpus/NDY/296/008718/corenlp/sentences.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="NDY_296.008718" version="KorAP-0.4">
+   <spanList/>
+</layer>
diff --git a/t/real/corpus/NDY/296/008718/corenlp/tokens.xml b/t/real/corpus/NDY/296/008718/corenlp/tokens.xml
new file mode 100644
index 0000000..304bf8a
--- /dev/null
+++ b/t/real/corpus/NDY/296/008718/corenlp/tokens.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="NDY_296.008718" version="KorAP-0.4">
+   <spanList/>
+</layer>
diff --git a/t/real/corpus/NDY/296/008718/data.xml b/t/real/corpus/NDY/296/008718/data.xml
new file mode 100644
index 0000000..4bac9a0
--- /dev/null
+++ b/t/real/corpus/NDY/296/008718/data.xml
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="text.rng"
+            type="application/xml"
+            schematypens="http://relaxng.org/ns/structure/1.0"?>
+<raw_text docid="NDY_296.008718"
+          xmlns="http://ids-mannheim.de/ns/KorAP">
+  <metadata file="metadata.xml" />
+  <text>😍</text>
+</raw_text>
\ No newline at end of file
diff --git a/t/real/corpus/NDY/296/008718/header.xml b/t/real/corpus/NDY/296/008718/header.xml
new file mode 100644
index 0000000..a56a8b4
--- /dev/null
+++ b/t/real/corpus/NDY/296/008718/header.xml
@@ -0,0 +1,86 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="header.rng"
+            type="application/xml"
+            schematypens="http://relaxng.org/ns/structure/1.0"?>
+<!DOCTYPE idsCorpus PUBLIC "-//IDS//DTD IDS-XCES 1.0//EN"
+          "http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd">
+<idsHeader type="text" pattern="text" status="new" version="1.1" TEIform="teiHeader">
+    <fileDesc>
+     <titleStmt>
+      <textSigle>NDY/296.008718</textSigle>
+      <t.title assemblage="external">NDY/296.008718 Kommentar zu: LOCKE hat mein MERCEDES AMG ZERSTÖRT!😡 (AutoUnfall), - YouTube, 04.12.2017</t.title>
+     </titleStmt>
+     <publicationStmt>
+      <distributor/>
+      <pubAddress/>
+      <availability region="ids" Default="n" status="unknown">QAO-NC-LOC:ids</availability>
+      <pubDate/>
+     </publicationStmt>
+     <sourceDesc Default="n">
+      <biblStruct Default="n" status="draft">
+       <analytic>
+        <h.title type="main">Kommentar zu: LOCKE hat mein MERCEDES AMG ZERSTÖRT!😡 (AutoUnfall)</h.title>
+        <h.author>Livia Banse</h.author>
+        <imprint/>
+        <biblScope type="pp"/>
+        <biblScope type="suppl"/>
+        <biblScope type="suppltitle"/>
+        <biblNote n="1"/>
+        <biblNote n="url">https://youtube.googleapis.com/youtube/v3/comments?part=snippet&amp;id=UgwyndbbrTpNztj0vwh4AaABAg&amp;key=[YOUR_API_KEY]</biblNote>
+       </analytic>
+       <monogr>
+        <h.title type="main">YouTube</h.title>
+        <h.title type="sub"/>
+        <h.title type="abbr">YouTube</h.title>
+        <editor/>
+        <imprint>
+         <publisher>YouTube</publisher>
+         <pubDate type="year">2017</pubDate>
+         <pubDate type="month">12</pubDate>
+         <pubDate type="day">04</pubDate>
+         <pubDate type="time">2017-12-04T12:56:36.000Z</pubDate>
+         <pubPlace/>
+        </imprint>
+        <biblScope type="issue"/>
+        <biblScope type="issueplace"/>
+        <biblScope type="vol"/>
+       </monogr>
+      </biblStruct>
+      <reference type="complete" assemblage="regular">NDY/296.008718, YouTube, 04.12.2017. Livia Banse: Kommentar zu: LOCKE hat mein MERCEDES AMG ZERSTÖRT!😡 (AutoUnfall), - YouTube
+        </reference>
+      <reference type="short" assemblage="regular">NDY/296.008718, YouTube, 04.12.2017</reference>
+     </sourceDesc>
+    </fileDesc>
+    <encodingDesc>
+     <samplingDecl Default="n"/>
+     <editorialDecl Default="n">
+      <pagination type="no"/>
+     </editorialDecl>
+     <tagsDecl>
+      <tagUsage gi="p" occurs="1"/>
+     </tagsDecl>
+    </encodingDesc>
+    <profileDesc>
+     <creation>
+      <creatDate>2017.12.04</creatDate>
+      <creatRef>4.12.2017</creatRef>
+      <creatRefShort>4.12.2017</creatRefShort>
+     </creation>
+     <textClass Default="n">
+      <catRef n="24" target="videoCategories.entertainment" scheme="YTvideoCategories"/>
+     </textClass>
+     <textDesc Default="n">
+      <textType>Kurzmeldungen: YouTube-Kommentare</textType>
+      <textTypeArt/>
+      <textDomain/>
+      <column/>
+     </textDesc>
+     <correspDesc Default="n">
+      <correspContext>
+       <ref type="channel" n="1" target="https://www.youtube.com/watch?v=UCk-ERvn9_tUjg7e8Bn921gA"/>
+       <ref type="video" n="1" target="https://www.youtube.com/watch?v=JnPVOl-sO64"/>
+       <ref type="parent"/>
+      </correspContext>
+     </correspDesc>
+    </profileDesc>
+   </idsHeader>
\ No newline at end of file
diff --git a/t/real/corpus/NDY/296/008718/malt/dependency.xml b/t/real/corpus/NDY/296/008718/malt/dependency.xml
new file mode 100644
index 0000000..f28bd17
--- /dev/null
+++ b/t/real/corpus/NDY/296/008718/malt/dependency.xml
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<layer docid="NDY_296.008718" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+<spanList>
+<span id="s2318566_n1" from="0" to="1">
+<rel label="ROOT">
+<span from="0" to="1"/>
+</rel>
+</span>
+</spanList>
+</layer>
diff --git a/t/real/corpus/NDY/296/008718/marmot/morpho.xml b/t/real/corpus/NDY/296/008718/marmot/morpho.xml
new file mode 100644
index 0000000..9852add
--- /dev/null
+++ b/t/real/corpus/NDY/296/008718/marmot/morpho.xml
@@ -0,0 +1,15 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<layer docid="NDY_296.008718" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+<spanList>
+  <span id="s4195562_n1" from="0" to="1">
+   <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+    <f name="lex">
+     <fs>
+      <f name="pos">XY</f>
+     </fs>
+    </f>
+   </fs>
+  </span>
+ </spanList>
+</layer>
diff --git a/t/real/corpus/NDY/296/008718/opennlp/metadata.xml b/t/real/corpus/NDY/296/008718/opennlp/metadata.xml
new file mode 100644
index 0000000..61411b4
--- /dev/null
+++ b/t/real/corpus/NDY/296/008718/opennlp/metadata.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="metadata.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<metadata docid="NDY_296.008718" type="foundry" xmlns="http://ids-mannheim.de/ns/KorAP">  <doc file="../data.xml" />
+<foundry name="opennlp"><layer segm="tok" file="tokens.xml" name="token" id="l1" /><layer segm="s" file="sentences.xml" name="sent" id="l2" /><layer info="pos lemma" file="morpho.xml" name="morph" id="l3" /></foundry></metadata>
\ No newline at end of file
diff --git a/t/real/corpus/NDY/296/008718/opennlp/morpho.xml b/t/real/corpus/NDY/296/008718/opennlp/morpho.xml
new file mode 100644
index 0000000..2aade7f
--- /dev/null
+++ b/t/real/corpus/NDY/296/008718/opennlp/morpho.xml
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="NDY_296.008718" VERSION="KorAP-0.4">
+   <spanList>
+      <span id="s_0" from="0" to="1">
+         <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+            <f name="lex">
+               <fs>
+                  <f name="pos">XY</f>
+               </fs>
+            </f>
+         </fs>
+      </span>
+   </spanList>
+</layer>
diff --git a/t/real/corpus/NDY/296/008718/opennlp/sentences.xml b/t/real/corpus/NDY/296/008718/opennlp/sentences.xml
new file mode 100644
index 0000000..de054d3
--- /dev/null
+++ b/t/real/corpus/NDY/296/008718/opennlp/sentences.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="NDY_296.008718" VERSION="KorAP-0.4">
+   <spanList>
+      <span from="0" to="2"/>
+   </spanList>
+</layer>
diff --git a/t/real/corpus/NDY/296/008718/opennlp/tokens.xml b/t/real/corpus/NDY/296/008718/opennlp/tokens.xml
new file mode 100644
index 0000000..6a82ed6
--- /dev/null
+++ b/t/real/corpus/NDY/296/008718/opennlp/tokens.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="NDY_296.008718" VERSION="KorAP-0.4">
+   <spanList>
+      <span id="s_0" from="0" to="1"/>
+   </spanList>
+</layer>
diff --git a/t/real/corpus/NDY/296/008718/struct/structure.xml b/t/real/corpus/NDY/296/008718/struct/structure.xml
new file mode 100644
index 0000000..63e5f50
--- /dev/null
+++ b/t/real/corpus/NDY/296/008718/struct/structure.xml
@@ -0,0 +1,50 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng"
+            type="application/xml"
+            schematypens="http://relaxng.org/ns/structure/1.0"?>
+<layer docid="NDY_296.008718"
+       xmlns="http://ids-mannheim.de/ns/KorAP"
+       version="KorAP-0.4">
+  <spanList>
+    <span id="s0" from="0" to="1" l="1">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">text</f>
+      </fs>
+    </span>
+    <span id="s1" from="0" to="1" l="2">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">body</f>
+      </fs>
+    </span>
+    <span id="s2" from="0" to="1" l="3">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">posting</f>
+      </fs>
+    </span>
+    <span id="s3" from="0" to="0" l="4">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">head</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="type">parent</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s4" from="0" to="1" l="4">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">p</f>
+        <f name="attr">
+          <fs type="attr">
+            <f name="part">N</f>
+          </fs>
+        </f>
+      </fs>
+    </span>
+    <span id="s5" from="0" to="1" l="-1">
+      <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+        <f name="name">s</f>
+      </fs>
+    </span>
+  </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/real/corpus/NDY/296/008718/tree_tagger/morpho.xml b/t/real/corpus/NDY/296/008718/tree_tagger/morpho.xml
new file mode 100644
index 0000000..1080aa5
--- /dev/null
+++ b/t/real/corpus/NDY/296/008718/tree_tagger/morpho.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4" docid="NDY_296.008718">
+ <spanList>
+  <span id="t_0" from="0" to="1">
+   <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+    <f name="lex">
+     <fs>
+      <f name="lemma">UNKNOWN</f>
+      <f name="certainty">0.889475</f>
+      <f name="ctag">NN</f>
+     </fs>
+    </f>
+    <f name="lex">
+     <fs>
+      <f name="lemma">UNKNOWN</f>
+      <f name="certainty">0.110525</f>
+      <f name="ctag">ADJA</f>
+     </fs>
+    </f>
+   </fs>
+  </span>
+ </spanList>
+</layer>
diff --git a/t/real/corpus/NDY/296/header.xml b/t/real/corpus/NDY/296/header.xml
new file mode 100644
index 0000000..5564e2a
--- /dev/null
+++ b/t/real/corpus/NDY/296/header.xml
@@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="header.rng"
+            type="application/xml"
+            schematypens="http://relaxng.org/ns/structure/1.0"?>
+<!DOCTYPE idsCorpus PUBLIC "-//IDS//DTD IDS-XCES 1.0//EN"
+          "http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd">
+<idsHeader type="document" pattern="text" status="new" version="1.1" TEIform="teiHeader">
+   <fileDesc>
+    <titleStmt>
+     <dokumentSigle>NDY/296</dokumentSigle>
+     <d.title>LOCKE hat mein MERCEDES AMG ZERSTÖRT!😡 (AutoUnfall)</d.title>
+    </titleStmt>
+    <publicationStmt>
+     <distributor/>
+     <pubAddress/>
+     <availability region="ids" Default="n" status="unknown">QAO-NC-LOC:ids</availability>
+     <pubDate/>
+    </publicationStmt>
+    <sourceDesc Default="n">
+     <biblStruct Default="n" status="draft">
+      <monogr>
+       <h.title type="main">LOCKE hat mein MERCEDES AMG ZERSTÖRT!😡 (AutoUnfall)</h.title>
+       <h.author>Leon Machère</h.author>
+       <imprint/>
+       <biblScope unit="channel" n="1">https://www.youtube.com/watch?v=UCk-ERvn9_tUjg7e8Bn921gA</biblScope>
+       <biblScope unit="video" n="1">https://www.youtube.com/watch?v=JnPVOl-sO64</biblScope>
+      </monogr>
+     </biblStruct>
+    </sourceDesc>
+   </fileDesc>
+   <profileDesc/>
+  </idsHeader>
\ No newline at end of file
diff --git a/t/real/corpus/NDY/header.xml b/t/real/corpus/NDY/header.xml
new file mode 100644
index 0000000..7893393
--- /dev/null
+++ b/t/real/corpus/NDY/header.xml
@@ -0,0 +1,393 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="header.rng"
+            type="application/xml"
+            schematypens="http://relaxng.org/ns/structure/1.0"?>
+<!DOCTYPE idsCorpus PUBLIC "-//IDS//DTD IDS-XCES 1.0//EN"
+          "http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd">
+<idsHeader type="corpus" pattern="text" status="new" version="1.1" TEIform="teiHeader">
+  <fileDesc>
+   <titleStmt>
+    <korpusSigle>NDY</korpusSigle>
+    <c.title>Nottinghamer Korpus Deutscher YouTube-Sprache</c.title>
+    <editor>Louis Cotgrove</editor>
+   </titleStmt>
+   <publicationStmt>
+    <distributor>Institut für Deutsche Sprache</distributor>
+    <pubAddress>Postfach 10 16 21, D-68016 Mannheim</pubAddress>
+    <telephone>+49 (0)621 1581 0</telephone>
+    <availability region="ids" Default="n" status="unknown">QAO-NC-LOC:ids</availability>
+    <pubDate/>
+   </publicationStmt>
+   <sourceDesc Default="n">
+    <biblStruct Default="n" status="draft">
+     <monogr>
+      <h.title type="main">YouTube</h.title>
+      <h.title type="sub"/>
+      <h.title type="abbr">YouTube</h.title>
+      <editor/>
+      <imprint>
+       <publisher>YouTube</publisher>
+       <pubPlace>San Bruno, California</pubPlace>
+      </imprint>
+      <biblScope type="vol"/>
+     </monogr>
+    </biblStruct>
+    <reference type="super" assemblage="regular">NDY, YouTube, San Bruno, California: YouTube</reference>
+   </sourceDesc>
+  </fileDesc>
+  <encodingDesc>
+   <projectDesc Default="n"/>
+   <samplingDecl Default="n"/>
+   <editorialDecl Default="n">
+    <transduction n="1" Default="n">Data scraped using tuber R package</transduction>
+    <transduction n="2" Default="n">Data scraped using YouTube Data Tools</transduction>
+    <transduction n="3" Default="n">Tokenisation, Lemmatisation, POS-annotation using TreeTagger with STTS by Leipzig University</transduction>
+    <transduction n="4" Default="n">R data frame to I5 XML by Louis Cotgrove</transduction>
+    <pagination type="no"/>
+   </editorialDecl>
+   <classDecl>
+    <taxonomy id="topic">
+     <h.bibl>Thementaxonomie (siehe http://www.ids-mannheim.de/kl/projekte/methoden/te.html)</h.bibl>
+     <category id="topic.wissenschaft">
+      <catDesc>Wissenschaft</catDesc>
+      <category id="topic.wissenschaft.populaerwissenschaft">
+       <catDesc>Wissenschaft:Populaerwissenschaft</catDesc>
+      </category>
+     </category>
+     <category id="topic.wirtschaft-finanzen">
+      <catDesc>Wirtschaft_Finanzen</catDesc>
+      <category id="topic.wirtschaft-finanzen.banken">
+       <catDesc>Wirtschaft_Finanzen:Banken</catDesc>
+      </category>
+      <category id="topic.wirtschaft-finanzen.bilanzen">
+       <catDesc>Wirtschaft_Finanzen:Bilanzen</catDesc>
+      </category>
+      <category id="topic.wirtschaft-finanzen.oeffentliche-finanzen">
+       <catDesc>Wirtschaft_Finanzen:Oeffentliche_Finanzen</catDesc>
+      </category>
+      <category id="topic.wirtschaft-finanzen.sozialprodukt">
+       <catDesc>Wirtschaft_Finanzen:Sozialprodukt</catDesc>
+      </category>
+      <category id="topic.wirtschaft-finanzen.waehrung">
+       <catDesc>Wirtschaft_Finanzen:Waehrung</catDesc>
+      </category>
+     </category>
+     <category id="topic.technik-industrie">
+      <catDesc>Technik_Industrie</catDesc>
+      <category id="topic.technik-industrie.edv-elektronik">
+       <catDesc>Technik_Industrie:Edv_Elektronik</catDesc>
+      </category>
+      <category id="topic.technik-industrie.kfz">
+       <catDesc>Technik_Industrie:Kfz</catDesc>
+      </category>
+      <category id="topic.technik-industrie.transport-verkehr">
+       <catDesc>Technik_Industrie:Transport_Verkehr</catDesc>
+      </category>
+      <category id="topic.technik-industrie.umweltschutz">
+       <catDesc>Technik_Industrie:Umweltschutz</catDesc>
+      </category>
+      <category id="topic.technik-industrie.unfaelle">
+       <catDesc>Technik_Industrie:Unfaelle</catDesc>
+      </category>
+     </category>
+     <category id="topic.staat-gesellschaft">
+      <catDesc>Staat_Gesellschaft</catDesc>
+      <category id="topic.staat-gesellschaft.arbeit-und-beruf">
+       <catDesc>Staat_Gesellschaft:Arbeit_Und_Beruf</catDesc>
+      </category>
+      <category id="topic.staat-gesellschaft.bildung">
+       <catDesc>Staat_Gesellschaft:Bildung</catDesc>
+      </category>
+      <category id="topic.staat-gesellschaft.biographien-interviews">
+       <catDesc>Staat_Gesellschaft:Biographien_Interviews</catDesc>
+      </category>
+      <category id="topic.staat-gesellschaft.drittes-reich-rechtsextremismus">
+       <catDesc>Staat_Gesellschaft:Drittes_Reich_Rechtsextremismus</catDesc>
+      </category>
+      <category id="topic.staat-gesellschaft.familie-geschlecht">
+       <catDesc>Staat_Gesellschaft:Familie_Geschlecht</catDesc>
+      </category>
+      <category id="topic.staat-gesellschaft.kirche">
+       <catDesc>Staat_Gesellschaft:Kirche</catDesc>
+      </category>
+      <category id="topic.staat-gesellschaft.recht">
+       <catDesc>Staat_Gesellschaft:Recht</catDesc>
+      </category>
+      <category id="topic.staat-gesellschaft.tod">
+       <catDesc>Staat_Gesellschaft:Tod</catDesc>
+      </category>
+      <category id="topic.staat-gesellschaft.verbrechen">
+       <catDesc>Staat_Gesellschaft:Verbrechen</catDesc>
+      </category>
+     </category>
+     <category id="topic.sport">
+      <catDesc>Sport</catDesc>
+      <category id="topic.sport.ballsport">
+       <catDesc>Sport:Ballsport</catDesc>
+      </category>
+      <category id="topic.sport.fussball">
+       <catDesc>Sport:Fussball</catDesc>
+      </category>
+      <category id="topic.sport.motorsport">
+       <catDesc>Sport:Motorsport</catDesc>
+      </category>
+      <category id="topic.sport.radsport">
+       <catDesc>Sport:Radsport</catDesc>
+      </category>
+      <category id="topic.sport.tennis">
+       <catDesc>Sport:Tennis</catDesc>
+      </category>
+      <category id="topic.sport.vermischtes">
+       <catDesc>Sport:Vermischtes</catDesc>
+      </category>
+      <category id="topic.sport.wintersport">
+       <catDesc>Sport:Wintersport</catDesc>
+      </category>
+     </category>
+     <category id="topic.rest">
+      <catDesc>Rest</catDesc>
+      <category id="topic.rest.boersenkurse">
+       <catDesc>Rest:Boersenkurse</catDesc>
+      </category>
+      <category id="topic.rest.geburt-tod-heirat">
+       <catDesc>Rest:Geburt_Tod_Heirat</catDesc>
+      </category>
+      <category id="topic.rest.impressum">
+       <catDesc>Rest:Impressum</catDesc>
+      </category>
+      <category id="topic.rest.inhaltsverzeichnisse">
+       <catDesc>Rest:Inhaltsverzeichnisse</catDesc>
+      </category>
+      <category id="topic.rest.ligatabellen">
+       <catDesc>Rest:Ligatabellen</catDesc>
+      </category>
+      <category id="topic.rest.tabellen">
+       <catDesc>Rest:Tabellen</catDesc>
+      </category>
+      <category id="topic.rest.veranstaltungshinweise">
+       <catDesc>Rest:Veranstaltungshinweise</catDesc>
+      </category>
+     </category>
+     <category id="topic.politik">
+      <catDesc>Politik</catDesc>
+      <category id="topic.politik.ausland">
+       <catDesc>Politik:Ausland</catDesc>
+      </category>
+      <category id="topic.politik.inland">
+       <catDesc>Politik:Inland</catDesc>
+      </category>
+      <category id="topic.politik.kommunalpolitik">
+       <catDesc>Politik:Kommunalpolitik</catDesc>
+      </category>
+     </category>
+     <category id="topic.natur-umwelt">
+      <catDesc>Natur_Umwelt</catDesc>
+      <category id="topic.natur-umwelt.garten">
+       <catDesc>Natur_Umwelt:Garten</catDesc>
+      </category>
+      <category id="topic.natur-umwelt.tiere">
+       <catDesc>Natur_Umwelt:Tiere</catDesc>
+      </category>
+      <category id="topic.natur-umwelt.wetter-klima">
+       <catDesc>Natur_Umwelt:Wetter_Klima</catDesc>
+      </category>
+     </category>
+     <category id="topic.kultur">
+      <catDesc>Kultur</catDesc>
+      <category id="topic.kultur.bildende-kunst">
+       <catDesc>Kultur:Bildende_Kunst</catDesc>
+      </category>
+      <category id="topic.kultur.darstellende-kunst">
+       <catDesc>Kultur:Darstellende_Kunst</catDesc>
+      </category>
+      <category id="topic.kultur.film">
+       <catDesc>Kultur:Film</catDesc>
+      </category>
+      <category id="topic.kultur.literatur">
+       <catDesc>Kultur:Literatur</catDesc>
+      </category>
+      <category id="topic.kultur.mode">
+       <catDesc>Kultur:Mode</catDesc>
+      </category>
+      <category id="topic.kultur.musik">
+       <catDesc>Kultur:Musik</catDesc>
+      </category>
+     </category>
+     <category id="topic.gesundheit-ernaehrung">
+      <catDesc>Gesundheit_Ernaehrung</catDesc>
+      <category id="topic.gesundheit-ernaehrung.ernaehrung">
+       <catDesc>Gesundheit_Ernaehrung:Ernaehrung</catDesc>
+      </category>
+      <category id="topic.gesundheit-ernaehrung.gesundheit">
+       <catDesc>Gesundheit_Ernaehrung:Gesundheit</catDesc>
+      </category>
+     </category>
+     <category id="topic.freizeit-unterhaltung">
+      <catDesc>Freizeit_Unterhaltung</catDesc>
+      <category id="topic.freizeit-unterhaltung.reisen">
+       <catDesc>Freizeit_Unterhaltung:Reisen</catDesc>
+      </category>
+      <category id="topic.freizeit-unterhaltung.rundfunk">
+       <catDesc>Freizeit_Unterhaltung:Rundfunk</catDesc>
+      </category>
+      <category id="topic.freizeit-unterhaltung.vereine-veranstaltungen">
+       <catDesc>Freizeit_Unterhaltung:Vereine_Veranstaltungen</catDesc>
+      </category>
+     </category>
+     <category id="topic.fiktion">
+      <catDesc>Fiktion</catDesc>
+      <category id="topic.fiktion.vermischtes">
+       <catDesc>Fiktion:Vermischtes</catDesc>
+      </category>
+     </category>
+    </taxonomy>
+    <taxonomy id="YTvideoCategories">
+     <h.bibl>YouTube-Videokategorien (Englisch, siehe
+        https://youtube.googleapis.com/youtube/v3/videoCategories?part=snippet&amp;regionCode=US&amp;=[YOUR_API_KEY]; Deutsch, siehe https://youtube.googleapis.com/youtube/v3/videoCategories?part=snippet&amp;regionCode=DE&amp;=[YOUR_API_KEY])</h.bibl>
+     <category id="videoCategories.filmanimation">
+      <catDesc xml:lang="en">Film &amp; Animation</catDesc>
+      <catDesc xml:lang="de">Film &amp; Animation</catDesc>
+     </category>
+     <category id="videoCategories.autosvehicles">
+      <catDesc xml:lang="en">Autos &amp; Vehicles</catDesc>
+      <catDesc xml:lang="de">Autos &amp; Fahrzeuge</catDesc>
+     </category>
+     <category id="videoCategories.music">
+      <catDesc xml:lang="en">Music</catDesc>
+      <catDesc xml:lang="de">Musik</catDesc>
+     </category>
+     <category id="videoCategories.petsanimals">
+      <catDesc xml:lang="en">Pets &amp; Animals</catDesc>
+      <catDesc xml:lang="de">Tiere</catDesc>
+     </category>
+     <category id="videoCategories.sports">
+      <catDesc xml:lang="en">Sports</catDesc>
+      <catDesc xml:lang="de">Sport</catDesc>
+     </category>
+     <category id="videoCategories.shortmovies">
+      <catDesc xml:lang="en">Short Movies</catDesc>
+      <catDesc xml:lang="de">Kurzfilme</catDesc>
+     </category>
+     <category id="videoCategories.travelevents">
+      <catDesc xml:lang="en">Travel &amp; Events</catDesc>
+      <catDesc xml:lang="de">Reisen &amp; Events</catDesc>
+     </category>
+     <category id="videoCategories.gaming">
+      <catDesc xml:lang="en">Gaming</catDesc>
+      <catDesc xml:lang="de">Gaming</catDesc>
+     </category>
+     <category id="videoCategories.videoblogging">
+      <catDesc xml:lang="en">Videoblogging</catDesc>
+      <catDesc xml:lang="de">Videoblogging</catDesc>
+     </category>
+     <category id="videoCategories.peopleblogs">
+      <catDesc xml:lang="en">People &amp; Blogs</catDesc>
+      <catDesc xml:lang="de">Menschen &amp; Blogs</catDesc>
+     </category>
+     <category id="videoCategories.comedy">
+      <catDesc xml:lang="en">Comedy</catDesc>
+      <catDesc xml:lang="de">Komödie</catDesc>
+     </category>
+     <category id="videoCategories.entertainment">
+      <catDesc xml:lang="en">Entertainment</catDesc>
+      <catDesc xml:lang="de">Unterhaltung</catDesc>
+     </category>
+     <category id="videoCategories.newspolitics">
+      <catDesc xml:lang="en">News &amp; Politics</catDesc>
+      <catDesc xml:lang="de">Nachrichten &amp; Politik</catDesc>
+     </category>
+     <category id="videoCategories.howtostyle">
+      <catDesc xml:lang="en">Howto &amp; Style</catDesc>
+      <catDesc xml:lang="de">Praktische Tipps &amp; Styling</catDesc>
+     </category>
+     <category id="videoCategories.education">
+      <catDesc xml:lang="en">Education</catDesc>
+      <catDesc xml:lang="de">Bildung</catDesc>
+     </category>
+     <category id="videoCategories.sciencetechnology">
+      <catDesc xml:lang="en">Science &amp; Technology</catDesc>
+      <catDesc xml:lang="de">Wissenschaft &amp; Technik</catDesc>
+     </category>
+     <category id="videoCategories.nonprofits">
+      <catDesc xml:lang="en">Nonprofits &amp; Activism</catDesc>
+      <catDesc xml:lang="de">NA</catDesc>
+     </category>
+     <category id="videoCategories.movies">
+      <catDesc xml:lang="en">Movies</catDesc>
+      <catDesc xml:lang="de">Filme</catDesc>
+     </category>
+     <category id="videoCategories.anime">
+      <catDesc xml:lang="en">Anime/Animation</catDesc>
+      <catDesc xml:lang="de">Anime/Animation</catDesc>
+     </category>
+     <category id="videoCategories.action">
+      <catDesc xml:lang="en">Action/Adventure</catDesc>
+      <catDesc xml:lang="de">Action/Abenteuer</catDesc>
+     </category>
+     <category id="videoCategories.classics">
+      <catDesc xml:lang="en">Classics</catDesc>
+      <catDesc xml:lang="de">Klassiker</catDesc>
+     </category>
+     <category id="videoCategories.comedy_genre">
+      <catDesc xml:lang="en">Comedy</catDesc>
+      <catDesc xml:lang="de">Komödie</catDesc>
+     </category>
+     <category id="videoCategories.documentary">
+      <catDesc xml:lang="en">Documentary</catDesc>
+      <catDesc xml:lang="de">Dokumentationen</catDesc>
+     </category>
+     <category id="videoCategories.drama">
+      <catDesc xml:lang="en">Drama</catDesc>
+      <catDesc xml:lang="de">Drama</catDesc>
+     </category>
+     <category id="videoCategories.family">
+      <catDesc xml:lang="en">Family</catDesc>
+      <catDesc xml:lang="de">Familie</catDesc>
+     </category>
+     <category id="videoCategories.foreign">
+      <catDesc xml:lang="en">Foreign</catDesc>
+      <catDesc xml:lang="de">Ausländische Filme</catDesc>
+     </category>
+     <category id="videoCategories.horror">
+      <catDesc xml:lang="en">Horror</catDesc>
+      <catDesc xml:lang="de">Horror</catDesc>
+     </category>
+     <category id="videoCategories.scififantasy">
+      <catDesc xml:lang="en">Sci-Fi/Fantasy</catDesc>
+      <catDesc xml:lang="de">Science-Fiction/Fantasy</catDesc>
+     </category>
+     <category id="videoCategories.thriller">
+      <catDesc xml:lang="en">Thriller</catDesc>
+      <catDesc xml:lang="de">Thriller</catDesc>
+     </category>
+     <category id="videoCategories.shorts">
+      <catDesc xml:lang="en">Shorts</catDesc>
+      <catDesc xml:lang="de">Kurzfilme</catDesc>
+     </category>
+     <category id="videoCategories.shows">
+      <catDesc xml:lang="en">Shows</catDesc>
+      <catDesc xml:lang="de">Serien</catDesc>
+     </category>
+     <category id="videoCategories.trailers">
+      <catDesc xml:lang="en">Trailers</catDesc>
+      <catDesc xml:lang="de">Trailer</catDesc>
+     </category>
+    </taxonomy>
+   </classDecl>
+  </encodingDesc>
+  <profileDesc>
+   <langUsage Default="n">
+    <language id="de" usage="90.5">Deutsch</language>
+    <language id="zxx" usage="3.5">Kein linguistischer Inhalt</language>
+    <language id="en" usage="3">Englisch</language>
+    <language id="tr" usage="1">Türkisch</language>
+    <language id="ru" usage="1">Russisch</language>
+    <language id="hr" usage="0.5">Kroatisch</language>
+    <language id="mis" usage="0.5">Einzelne andere Sprachen</language>
+   </langUsage>
+   <textDesc Default="n">
+    <textType>Kurzmeldungen: YouTube-Kommentare</textType>
+    <textTypeRef/>
+   </textDesc>
+  </profileDesc>
+ </idsHeader>
\ No newline at end of file
diff --git a/t/real/ndy.t b/t/real/ndy.t
new file mode 100644
index 0000000..e6ce10a
--- /dev/null
+++ b/t/real/ndy.t
@@ -0,0 +1,70 @@
+use strict;
+use warnings;
+use Test::More;
+use Data::Dumper;
+use JSON::XS;
+
+if ($ENV{SKIP_REAL}) {
+  plan skip_all => 'Skip real tests';
+};
+
+use utf8;
+use lib 'lib', '../lib';
+
+use File::Basename 'dirname';
+use File::Spec::Functions 'catdir';
+
+use_ok('KorAP::XML::Krill');
+
+my $path = catdir(dirname(__FILE__), 'corpus','NDY','296','008718');
+
+ok(my $doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
+ok($doc->parse, 'Parse document');
+
+is($doc->text_sigle, 'NDY/296/008718', 'Correct text sigle');
+is($doc->doc_sigle, 'NDY/296', 'Correct document sigle');
+is($doc->corpus_sigle, 'NDY', 'Correct corpus sigle');
+
+my $meta = $doc->meta;
+
+like($meta->{T_title}, qr!^Kommentar zu: LOCKE hat mein MERCEDES AMG ZERSTÖRT!, 'Title');
+ok(!$meta->{T_sub_title}, 'SubTitle');
+is($meta->{T_author}, 'Livia Banse', 'Author');
+ok(!$meta->{A_editor}, 'Editor');
+is($meta->{S_pub_place}, 'San Bruno, California');
+is($meta->{A_publisher}, 'YouTube', 'Publisher');
+
+is($meta->{S_text_type},'Kurzmeldungen: YouTube-Kommentare', 'No Text Type');
+ok(!$meta->{S_text_type_art}, 'No Text Type Art');
+ok(!$meta->{S_text_type_ref}, 'No Text Type Ref');
+ok(!$meta->{S_text_domain}, 'No Text Domain');
+ok(!$meta->{S_text_column}, 'No Text Column');
+
+is($meta->{K_text_class}->[0], 'entertainment', 'Correct Text Class');
+ok(!$meta->{K_text_class}->[1], 'Correct Text Class');
+
+is($meta->{D_pub_date}, '20171204', 'Creation date');
+is($meta->{D_creation_date}, '20171204', 'Creation date');
+is($meta->{S_availability}, 'QAO-NC-LOC:ids', 'License');
+ok(!$meta->{A_pages}, 'Pages');
+
+ok(!$meta->{A_file_edition_statement}, 'File Statement');
+ok(!$meta->{A_bibl_edition_statement}, 'Bibl Statement');
+
+like($meta->{A_reference}, qr!NDY\/296\.008718, YouTube, 04\.12\.2017\. Livia Banse: Kommentar zu: LOCKE hat mein MERCEDES AMG ZERSTÖRT.* \(AutoUnfall\), - YouTube!, 'Reference');
+
+is($meta->{S_language}, 'de', 'Language');
+
+is($meta->{T_corpus_title}, 'YouTube', 'Correct Corpus title');
+ok(!$meta->{T_corpus_sub_title}, 'Correct Corpus sub title');
+ok(!$meta->{T_corpus_author}, 'Correct Corpus author');
+ok(!$meta->{A_corpus_editor}, 'Correct Corpus editor');
+
+like($meta->{T_doc_title}, qr!LOCKE hat mein MERCEDES AMG ZERSTÖRT\!.* \(AutoUnfall\)!, 'Correct Doc title');
+ok(!$meta->{T_doc_sub_title}, 'Correct Doc sub title');
+is($meta->{T_doc_author},'Leon Machère', 'Correct Doc author');
+ok(!$meta->{A_doc_editor}, 'Correct doc editor');
+
+
+done_testing;
+__END__