Support NDY metadata
Change-Id: Ifebed2de671ac516782a4daf49b2440e89ec4533
diff --git a/lib/KorAP/XML/Meta/I5.pm b/lib/KorAP/XML/Meta/I5.pm
index 4dd4b9d..928b9be 100644
--- a/lib/KorAP/XML/Meta/I5.pm
+++ b/lib/KorAP/XML/Meta/I5.pm
@@ -304,6 +304,7 @@
$temp->find("catRef")->each(
sub {
+ return unless $_->attr('target');
my ($ign, @ttopic) = grep { $_ } map { _squish($_) } split('\.', $_->attr('target'));
push(@topic, @ttopic);
}
diff --git a/t/real/corpus/NDY/296/008718/base/tokens.xml b/t/real/corpus/NDY/296/008718/base/tokens.xml
new file mode 100644
index 0000000..723f3f2
--- /dev/null
+++ b/t/real/corpus/NDY/296/008718/base/tokens.xml
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng"
+ type="application/xml"
+ schematypens="http://relaxng.org/ns/structure/1.0"?>
+<layer docid="NDY_296.008718"
+ xmlns="http://ids-mannheim.de/ns/KorAP"
+ version="KorAP-0.4">
+ <spanList>
+ <span id="t_0" from="0" to="1" />
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/real/corpus/NDY/296/008718/corenlp/constituency.xml b/t/real/corpus/NDY/296/008718/corenlp/constituency.xml
new file mode 100644
index 0000000..304bf8a
--- /dev/null
+++ b/t/real/corpus/NDY/296/008718/corenlp/constituency.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="NDY_296.008718" version="KorAP-0.4">
+ <spanList/>
+</layer>
diff --git a/t/real/corpus/NDY/296/008718/corenlp/metadata.xml b/t/real/corpus/NDY/296/008718/corenlp/metadata.xml
new file mode 100644
index 0000000..94b1bda
--- /dev/null
+++ b/t/real/corpus/NDY/296/008718/corenlp/metadata.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="metadata.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<metadata docid="NDY_296.008718" type="foundry" xmlns="http://ids-mannheim.de/ns/KorAP"> <doc file="../data.xml" />
+<foundry name="corenlp"><layer segm="tok" file="tokens.xml" name="token" id="l1" /><layer segm="s" file="sentences.xml" name="sent" id="l2" /><layer info="pos lemma" file="morpho.xml" name="morph" id="l3" /></foundry></metadata>
\ No newline at end of file
diff --git a/t/real/corpus/NDY/296/008718/corenlp/morpho.xml b/t/real/corpus/NDY/296/008718/corenlp/morpho.xml
new file mode 100644
index 0000000..304bf8a
--- /dev/null
+++ b/t/real/corpus/NDY/296/008718/corenlp/morpho.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="NDY_296.008718" version="KorAP-0.4">
+ <spanList/>
+</layer>
diff --git a/t/real/corpus/NDY/296/008718/corenlp/sentences.xml b/t/real/corpus/NDY/296/008718/corenlp/sentences.xml
new file mode 100644
index 0000000..304bf8a
--- /dev/null
+++ b/t/real/corpus/NDY/296/008718/corenlp/sentences.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="NDY_296.008718" version="KorAP-0.4">
+ <spanList/>
+</layer>
diff --git a/t/real/corpus/NDY/296/008718/corenlp/tokens.xml b/t/real/corpus/NDY/296/008718/corenlp/tokens.xml
new file mode 100644
index 0000000..304bf8a
--- /dev/null
+++ b/t/real/corpus/NDY/296/008718/corenlp/tokens.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="NDY_296.008718" version="KorAP-0.4">
+ <spanList/>
+</layer>
diff --git a/t/real/corpus/NDY/296/008718/data.xml b/t/real/corpus/NDY/296/008718/data.xml
new file mode 100644
index 0000000..4bac9a0
--- /dev/null
+++ b/t/real/corpus/NDY/296/008718/data.xml
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="text.rng"
+ type="application/xml"
+ schematypens="http://relaxng.org/ns/structure/1.0"?>
+<raw_text docid="NDY_296.008718"
+ xmlns="http://ids-mannheim.de/ns/KorAP">
+ <metadata file="metadata.xml" />
+ <text>😍</text>
+</raw_text>
\ No newline at end of file
diff --git a/t/real/corpus/NDY/296/008718/header.xml b/t/real/corpus/NDY/296/008718/header.xml
new file mode 100644
index 0000000..a56a8b4
--- /dev/null
+++ b/t/real/corpus/NDY/296/008718/header.xml
@@ -0,0 +1,86 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="header.rng"
+ type="application/xml"
+ schematypens="http://relaxng.org/ns/structure/1.0"?>
+<!DOCTYPE idsCorpus PUBLIC "-//IDS//DTD IDS-XCES 1.0//EN"
+ "http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd">
+<idsHeader type="text" pattern="text" status="new" version="1.1" TEIform="teiHeader">
+ <fileDesc>
+ <titleStmt>
+ <textSigle>NDY/296.008718</textSigle>
+ <t.title assemblage="external">NDY/296.008718 Kommentar zu: LOCKE hat mein MERCEDES AMG ZERSTÖRT!😡 (AutoUnfall), - YouTube, 04.12.2017</t.title>
+ </titleStmt>
+ <publicationStmt>
+ <distributor/>
+ <pubAddress/>
+ <availability region="ids" Default="n" status="unknown">QAO-NC-LOC:ids</availability>
+ <pubDate/>
+ </publicationStmt>
+ <sourceDesc Default="n">
+ <biblStruct Default="n" status="draft">
+ <analytic>
+ <h.title type="main">Kommentar zu: LOCKE hat mein MERCEDES AMG ZERSTÖRT!😡 (AutoUnfall)</h.title>
+ <h.author>Livia Banse</h.author>
+ <imprint/>
+ <biblScope type="pp"/>
+ <biblScope type="suppl"/>
+ <biblScope type="suppltitle"/>
+ <biblNote n="1"/>
+ <biblNote n="url">https://youtube.googleapis.com/youtube/v3/comments?part=snippet&id=UgwyndbbrTpNztj0vwh4AaABAg&key=[YOUR_API_KEY]</biblNote>
+ </analytic>
+ <monogr>
+ <h.title type="main">YouTube</h.title>
+ <h.title type="sub"/>
+ <h.title type="abbr">YouTube</h.title>
+ <editor/>
+ <imprint>
+ <publisher>YouTube</publisher>
+ <pubDate type="year">2017</pubDate>
+ <pubDate type="month">12</pubDate>
+ <pubDate type="day">04</pubDate>
+ <pubDate type="time">2017-12-04T12:56:36.000Z</pubDate>
+ <pubPlace/>
+ </imprint>
+ <biblScope type="issue"/>
+ <biblScope type="issueplace"/>
+ <biblScope type="vol"/>
+ </monogr>
+ </biblStruct>
+ <reference type="complete" assemblage="regular">NDY/296.008718, YouTube, 04.12.2017. Livia Banse: Kommentar zu: LOCKE hat mein MERCEDES AMG ZERSTÖRT!😡 (AutoUnfall), - YouTube
+ </reference>
+ <reference type="short" assemblage="regular">NDY/296.008718, YouTube, 04.12.2017</reference>
+ </sourceDesc>
+ </fileDesc>
+ <encodingDesc>
+ <samplingDecl Default="n"/>
+ <editorialDecl Default="n">
+ <pagination type="no"/>
+ </editorialDecl>
+ <tagsDecl>
+ <tagUsage gi="p" occurs="1"/>
+ </tagsDecl>
+ </encodingDesc>
+ <profileDesc>
+ <creation>
+ <creatDate>2017.12.04</creatDate>
+ <creatRef>4.12.2017</creatRef>
+ <creatRefShort>4.12.2017</creatRefShort>
+ </creation>
+ <textClass Default="n">
+ <catRef n="24" target="videoCategories.entertainment" scheme="YTvideoCategories"/>
+ </textClass>
+ <textDesc Default="n">
+ <textType>Kurzmeldungen: YouTube-Kommentare</textType>
+ <textTypeArt/>
+ <textDomain/>
+ <column/>
+ </textDesc>
+ <correspDesc Default="n">
+ <correspContext>
+ <ref type="channel" n="1" target="https://www.youtube.com/watch?v=UCk-ERvn9_tUjg7e8Bn921gA"/>
+ <ref type="video" n="1" target="https://www.youtube.com/watch?v=JnPVOl-sO64"/>
+ <ref type="parent"/>
+ </correspContext>
+ </correspDesc>
+ </profileDesc>
+ </idsHeader>
\ No newline at end of file
diff --git a/t/real/corpus/NDY/296/008718/malt/dependency.xml b/t/real/corpus/NDY/296/008718/malt/dependency.xml
new file mode 100644
index 0000000..f28bd17
--- /dev/null
+++ b/t/real/corpus/NDY/296/008718/malt/dependency.xml
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<layer docid="NDY_296.008718" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+<spanList>
+<span id="s2318566_n1" from="0" to="1">
+<rel label="ROOT">
+<span from="0" to="1"/>
+</rel>
+</span>
+</spanList>
+</layer>
diff --git a/t/real/corpus/NDY/296/008718/marmot/morpho.xml b/t/real/corpus/NDY/296/008718/marmot/morpho.xml
new file mode 100644
index 0000000..9852add
--- /dev/null
+++ b/t/real/corpus/NDY/296/008718/marmot/morpho.xml
@@ -0,0 +1,15 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<layer docid="NDY_296.008718" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+<spanList>
+ <span id="s4195562_n1" from="0" to="1">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="pos">XY</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ </spanList>
+</layer>
diff --git a/t/real/corpus/NDY/296/008718/opennlp/metadata.xml b/t/real/corpus/NDY/296/008718/opennlp/metadata.xml
new file mode 100644
index 0000000..61411b4
--- /dev/null
+++ b/t/real/corpus/NDY/296/008718/opennlp/metadata.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="metadata.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<metadata docid="NDY_296.008718" type="foundry" xmlns="http://ids-mannheim.de/ns/KorAP"> <doc file="../data.xml" />
+<foundry name="opennlp"><layer segm="tok" file="tokens.xml" name="token" id="l1" /><layer segm="s" file="sentences.xml" name="sent" id="l2" /><layer info="pos lemma" file="morpho.xml" name="morph" id="l3" /></foundry></metadata>
\ No newline at end of file
diff --git a/t/real/corpus/NDY/296/008718/opennlp/morpho.xml b/t/real/corpus/NDY/296/008718/opennlp/morpho.xml
new file mode 100644
index 0000000..2aade7f
--- /dev/null
+++ b/t/real/corpus/NDY/296/008718/opennlp/morpho.xml
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="NDY_296.008718" VERSION="KorAP-0.4">
+ <spanList>
+ <span id="s_0" from="0" to="1">
+ <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
+ <f name="lex">
+ <fs>
+ <f name="pos">XY</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ </spanList>
+</layer>
diff --git a/t/real/corpus/NDY/296/008718/opennlp/sentences.xml b/t/real/corpus/NDY/296/008718/opennlp/sentences.xml
new file mode 100644
index 0000000..de054d3
--- /dev/null
+++ b/t/real/corpus/NDY/296/008718/opennlp/sentences.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="NDY_296.008718" VERSION="KorAP-0.4">
+ <spanList>
+ <span from="0" to="2"/>
+ </spanList>
+</layer>
diff --git a/t/real/corpus/NDY/296/008718/opennlp/tokens.xml b/t/real/corpus/NDY/296/008718/opennlp/tokens.xml
new file mode 100644
index 0000000..6a82ed6
--- /dev/null
+++ b/t/real/corpus/NDY/296/008718/opennlp/tokens.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="NDY_296.008718" VERSION="KorAP-0.4">
+ <spanList>
+ <span id="s_0" from="0" to="1"/>
+ </spanList>
+</layer>
diff --git a/t/real/corpus/NDY/296/008718/struct/structure.xml b/t/real/corpus/NDY/296/008718/struct/structure.xml
new file mode 100644
index 0000000..63e5f50
--- /dev/null
+++ b/t/real/corpus/NDY/296/008718/struct/structure.xml
@@ -0,0 +1,50 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng"
+ type="application/xml"
+ schematypens="http://relaxng.org/ns/structure/1.0"?>
+<layer docid="NDY_296.008718"
+ xmlns="http://ids-mannheim.de/ns/KorAP"
+ version="KorAP-0.4">
+ <spanList>
+ <span id="s0" from="0" to="1" l="1">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">text</f>
+ </fs>
+ </span>
+ <span id="s1" from="0" to="1" l="2">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">body</f>
+ </fs>
+ </span>
+ <span id="s2" from="0" to="1" l="3">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">posting</f>
+ </fs>
+ </span>
+ <span id="s3" from="0" to="0" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">head</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="type">parent</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s4" from="0" to="1" l="4">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">p</f>
+ <f name="attr">
+ <fs type="attr">
+ <f name="part">N</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ <span id="s5" from="0" to="1" l="-1">
+ <fs type="struct" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="name">s</f>
+ </fs>
+ </span>
+ </spanList>
+</layer>
\ No newline at end of file
diff --git a/t/real/corpus/NDY/296/008718/tree_tagger/morpho.xml b/t/real/corpus/NDY/296/008718/tree_tagger/morpho.xml
new file mode 100644
index 0000000..1080aa5
--- /dev/null
+++ b/t/real/corpus/NDY/296/008718/tree_tagger/morpho.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4" docid="NDY_296.008718">
+ <spanList>
+ <span id="t_0" from="0" to="1">
+ <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
+ <f name="lex">
+ <fs>
+ <f name="lemma">UNKNOWN</f>
+ <f name="certainty">0.889475</f>
+ <f name="ctag">NN</f>
+ </fs>
+ </f>
+ <f name="lex">
+ <fs>
+ <f name="lemma">UNKNOWN</f>
+ <f name="certainty">0.110525</f>
+ <f name="ctag">ADJA</f>
+ </fs>
+ </f>
+ </fs>
+ </span>
+ </spanList>
+</layer>
diff --git a/t/real/corpus/NDY/296/header.xml b/t/real/corpus/NDY/296/header.xml
new file mode 100644
index 0000000..5564e2a
--- /dev/null
+++ b/t/real/corpus/NDY/296/header.xml
@@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="header.rng"
+ type="application/xml"
+ schematypens="http://relaxng.org/ns/structure/1.0"?>
+<!DOCTYPE idsCorpus PUBLIC "-//IDS//DTD IDS-XCES 1.0//EN"
+ "http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd">
+<idsHeader type="document" pattern="text" status="new" version="1.1" TEIform="teiHeader">
+ <fileDesc>
+ <titleStmt>
+ <dokumentSigle>NDY/296</dokumentSigle>
+ <d.title>LOCKE hat mein MERCEDES AMG ZERSTÖRT!😡 (AutoUnfall)</d.title>
+ </titleStmt>
+ <publicationStmt>
+ <distributor/>
+ <pubAddress/>
+ <availability region="ids" Default="n" status="unknown">QAO-NC-LOC:ids</availability>
+ <pubDate/>
+ </publicationStmt>
+ <sourceDesc Default="n">
+ <biblStruct Default="n" status="draft">
+ <monogr>
+ <h.title type="main">LOCKE hat mein MERCEDES AMG ZERSTÖRT!😡 (AutoUnfall)</h.title>
+ <h.author>Leon Machère</h.author>
+ <imprint/>
+ <biblScope unit="channel" n="1">https://www.youtube.com/watch?v=UCk-ERvn9_tUjg7e8Bn921gA</biblScope>
+ <biblScope unit="video" n="1">https://www.youtube.com/watch?v=JnPVOl-sO64</biblScope>
+ </monogr>
+ </biblStruct>
+ </sourceDesc>
+ </fileDesc>
+ <profileDesc/>
+ </idsHeader>
\ No newline at end of file
diff --git a/t/real/corpus/NDY/header.xml b/t/real/corpus/NDY/header.xml
new file mode 100644
index 0000000..7893393
--- /dev/null
+++ b/t/real/corpus/NDY/header.xml
@@ -0,0 +1,393 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="header.rng"
+ type="application/xml"
+ schematypens="http://relaxng.org/ns/structure/1.0"?>
+<!DOCTYPE idsCorpus PUBLIC "-//IDS//DTD IDS-XCES 1.0//EN"
+ "http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd">
+<idsHeader type="corpus" pattern="text" status="new" version="1.1" TEIform="teiHeader">
+ <fileDesc>
+ <titleStmt>
+ <korpusSigle>NDY</korpusSigle>
+ <c.title>Nottinghamer Korpus Deutscher YouTube-Sprache</c.title>
+ <editor>Louis Cotgrove</editor>
+ </titleStmt>
+ <publicationStmt>
+ <distributor>Institut für Deutsche Sprache</distributor>
+ <pubAddress>Postfach 10 16 21, D-68016 Mannheim</pubAddress>
+ <telephone>+49 (0)621 1581 0</telephone>
+ <availability region="ids" Default="n" status="unknown">QAO-NC-LOC:ids</availability>
+ <pubDate/>
+ </publicationStmt>
+ <sourceDesc Default="n">
+ <biblStruct Default="n" status="draft">
+ <monogr>
+ <h.title type="main">YouTube</h.title>
+ <h.title type="sub"/>
+ <h.title type="abbr">YouTube</h.title>
+ <editor/>
+ <imprint>
+ <publisher>YouTube</publisher>
+ <pubPlace>San Bruno, California</pubPlace>
+ </imprint>
+ <biblScope type="vol"/>
+ </monogr>
+ </biblStruct>
+ <reference type="super" assemblage="regular">NDY, YouTube, San Bruno, California: YouTube</reference>
+ </sourceDesc>
+ </fileDesc>
+ <encodingDesc>
+ <projectDesc Default="n"/>
+ <samplingDecl Default="n"/>
+ <editorialDecl Default="n">
+ <transduction n="1" Default="n">Data scraped using tuber R package</transduction>
+ <transduction n="2" Default="n">Data scraped using YouTube Data Tools</transduction>
+ <transduction n="3" Default="n">Tokenisation, Lemmatisation, POS-annotation using TreeTagger with STTS by Leipzig University</transduction>
+ <transduction n="4" Default="n">R data frame to I5 XML by Louis Cotgrove</transduction>
+ <pagination type="no"/>
+ </editorialDecl>
+ <classDecl>
+ <taxonomy id="topic">
+ <h.bibl>Thementaxonomie (siehe http://www.ids-mannheim.de/kl/projekte/methoden/te.html)</h.bibl>
+ <category id="topic.wissenschaft">
+ <catDesc>Wissenschaft</catDesc>
+ <category id="topic.wissenschaft.populaerwissenschaft">
+ <catDesc>Wissenschaft:Populaerwissenschaft</catDesc>
+ </category>
+ </category>
+ <category id="topic.wirtschaft-finanzen">
+ <catDesc>Wirtschaft_Finanzen</catDesc>
+ <category id="topic.wirtschaft-finanzen.banken">
+ <catDesc>Wirtschaft_Finanzen:Banken</catDesc>
+ </category>
+ <category id="topic.wirtschaft-finanzen.bilanzen">
+ <catDesc>Wirtschaft_Finanzen:Bilanzen</catDesc>
+ </category>
+ <category id="topic.wirtschaft-finanzen.oeffentliche-finanzen">
+ <catDesc>Wirtschaft_Finanzen:Oeffentliche_Finanzen</catDesc>
+ </category>
+ <category id="topic.wirtschaft-finanzen.sozialprodukt">
+ <catDesc>Wirtschaft_Finanzen:Sozialprodukt</catDesc>
+ </category>
+ <category id="topic.wirtschaft-finanzen.waehrung">
+ <catDesc>Wirtschaft_Finanzen:Waehrung</catDesc>
+ </category>
+ </category>
+ <category id="topic.technik-industrie">
+ <catDesc>Technik_Industrie</catDesc>
+ <category id="topic.technik-industrie.edv-elektronik">
+ <catDesc>Technik_Industrie:Edv_Elektronik</catDesc>
+ </category>
+ <category id="topic.technik-industrie.kfz">
+ <catDesc>Technik_Industrie:Kfz</catDesc>
+ </category>
+ <category id="topic.technik-industrie.transport-verkehr">
+ <catDesc>Technik_Industrie:Transport_Verkehr</catDesc>
+ </category>
+ <category id="topic.technik-industrie.umweltschutz">
+ <catDesc>Technik_Industrie:Umweltschutz</catDesc>
+ </category>
+ <category id="topic.technik-industrie.unfaelle">
+ <catDesc>Technik_Industrie:Unfaelle</catDesc>
+ </category>
+ </category>
+ <category id="topic.staat-gesellschaft">
+ <catDesc>Staat_Gesellschaft</catDesc>
+ <category id="topic.staat-gesellschaft.arbeit-und-beruf">
+ <catDesc>Staat_Gesellschaft:Arbeit_Und_Beruf</catDesc>
+ </category>
+ <category id="topic.staat-gesellschaft.bildung">
+ <catDesc>Staat_Gesellschaft:Bildung</catDesc>
+ </category>
+ <category id="topic.staat-gesellschaft.biographien-interviews">
+ <catDesc>Staat_Gesellschaft:Biographien_Interviews</catDesc>
+ </category>
+ <category id="topic.staat-gesellschaft.drittes-reich-rechtsextremismus">
+ <catDesc>Staat_Gesellschaft:Drittes_Reich_Rechtsextremismus</catDesc>
+ </category>
+ <category id="topic.staat-gesellschaft.familie-geschlecht">
+ <catDesc>Staat_Gesellschaft:Familie_Geschlecht</catDesc>
+ </category>
+ <category id="topic.staat-gesellschaft.kirche">
+ <catDesc>Staat_Gesellschaft:Kirche</catDesc>
+ </category>
+ <category id="topic.staat-gesellschaft.recht">
+ <catDesc>Staat_Gesellschaft:Recht</catDesc>
+ </category>
+ <category id="topic.staat-gesellschaft.tod">
+ <catDesc>Staat_Gesellschaft:Tod</catDesc>
+ </category>
+ <category id="topic.staat-gesellschaft.verbrechen">
+ <catDesc>Staat_Gesellschaft:Verbrechen</catDesc>
+ </category>
+ </category>
+ <category id="topic.sport">
+ <catDesc>Sport</catDesc>
+ <category id="topic.sport.ballsport">
+ <catDesc>Sport:Ballsport</catDesc>
+ </category>
+ <category id="topic.sport.fussball">
+ <catDesc>Sport:Fussball</catDesc>
+ </category>
+ <category id="topic.sport.motorsport">
+ <catDesc>Sport:Motorsport</catDesc>
+ </category>
+ <category id="topic.sport.radsport">
+ <catDesc>Sport:Radsport</catDesc>
+ </category>
+ <category id="topic.sport.tennis">
+ <catDesc>Sport:Tennis</catDesc>
+ </category>
+ <category id="topic.sport.vermischtes">
+ <catDesc>Sport:Vermischtes</catDesc>
+ </category>
+ <category id="topic.sport.wintersport">
+ <catDesc>Sport:Wintersport</catDesc>
+ </category>
+ </category>
+ <category id="topic.rest">
+ <catDesc>Rest</catDesc>
+ <category id="topic.rest.boersenkurse">
+ <catDesc>Rest:Boersenkurse</catDesc>
+ </category>
+ <category id="topic.rest.geburt-tod-heirat">
+ <catDesc>Rest:Geburt_Tod_Heirat</catDesc>
+ </category>
+ <category id="topic.rest.impressum">
+ <catDesc>Rest:Impressum</catDesc>
+ </category>
+ <category id="topic.rest.inhaltsverzeichnisse">
+ <catDesc>Rest:Inhaltsverzeichnisse</catDesc>
+ </category>
+ <category id="topic.rest.ligatabellen">
+ <catDesc>Rest:Ligatabellen</catDesc>
+ </category>
+ <category id="topic.rest.tabellen">
+ <catDesc>Rest:Tabellen</catDesc>
+ </category>
+ <category id="topic.rest.veranstaltungshinweise">
+ <catDesc>Rest:Veranstaltungshinweise</catDesc>
+ </category>
+ </category>
+ <category id="topic.politik">
+ <catDesc>Politik</catDesc>
+ <category id="topic.politik.ausland">
+ <catDesc>Politik:Ausland</catDesc>
+ </category>
+ <category id="topic.politik.inland">
+ <catDesc>Politik:Inland</catDesc>
+ </category>
+ <category id="topic.politik.kommunalpolitik">
+ <catDesc>Politik:Kommunalpolitik</catDesc>
+ </category>
+ </category>
+ <category id="topic.natur-umwelt">
+ <catDesc>Natur_Umwelt</catDesc>
+ <category id="topic.natur-umwelt.garten">
+ <catDesc>Natur_Umwelt:Garten</catDesc>
+ </category>
+ <category id="topic.natur-umwelt.tiere">
+ <catDesc>Natur_Umwelt:Tiere</catDesc>
+ </category>
+ <category id="topic.natur-umwelt.wetter-klima">
+ <catDesc>Natur_Umwelt:Wetter_Klima</catDesc>
+ </category>
+ </category>
+ <category id="topic.kultur">
+ <catDesc>Kultur</catDesc>
+ <category id="topic.kultur.bildende-kunst">
+ <catDesc>Kultur:Bildende_Kunst</catDesc>
+ </category>
+ <category id="topic.kultur.darstellende-kunst">
+ <catDesc>Kultur:Darstellende_Kunst</catDesc>
+ </category>
+ <category id="topic.kultur.film">
+ <catDesc>Kultur:Film</catDesc>
+ </category>
+ <category id="topic.kultur.literatur">
+ <catDesc>Kultur:Literatur</catDesc>
+ </category>
+ <category id="topic.kultur.mode">
+ <catDesc>Kultur:Mode</catDesc>
+ </category>
+ <category id="topic.kultur.musik">
+ <catDesc>Kultur:Musik</catDesc>
+ </category>
+ </category>
+ <category id="topic.gesundheit-ernaehrung">
+ <catDesc>Gesundheit_Ernaehrung</catDesc>
+ <category id="topic.gesundheit-ernaehrung.ernaehrung">
+ <catDesc>Gesundheit_Ernaehrung:Ernaehrung</catDesc>
+ </category>
+ <category id="topic.gesundheit-ernaehrung.gesundheit">
+ <catDesc>Gesundheit_Ernaehrung:Gesundheit</catDesc>
+ </category>
+ </category>
+ <category id="topic.freizeit-unterhaltung">
+ <catDesc>Freizeit_Unterhaltung</catDesc>
+ <category id="topic.freizeit-unterhaltung.reisen">
+ <catDesc>Freizeit_Unterhaltung:Reisen</catDesc>
+ </category>
+ <category id="topic.freizeit-unterhaltung.rundfunk">
+ <catDesc>Freizeit_Unterhaltung:Rundfunk</catDesc>
+ </category>
+ <category id="topic.freizeit-unterhaltung.vereine-veranstaltungen">
+ <catDesc>Freizeit_Unterhaltung:Vereine_Veranstaltungen</catDesc>
+ </category>
+ </category>
+ <category id="topic.fiktion">
+ <catDesc>Fiktion</catDesc>
+ <category id="topic.fiktion.vermischtes">
+ <catDesc>Fiktion:Vermischtes</catDesc>
+ </category>
+ </category>
+ </taxonomy>
+ <taxonomy id="YTvideoCategories">
+ <h.bibl>YouTube-Videokategorien (Englisch, siehe
+ https://youtube.googleapis.com/youtube/v3/videoCategories?part=snippet&regionCode=US&=[YOUR_API_KEY]; Deutsch, siehe https://youtube.googleapis.com/youtube/v3/videoCategories?part=snippet&regionCode=DE&=[YOUR_API_KEY])</h.bibl>
+ <category id="videoCategories.filmanimation">
+ <catDesc xml:lang="en">Film & Animation</catDesc>
+ <catDesc xml:lang="de">Film & Animation</catDesc>
+ </category>
+ <category id="videoCategories.autosvehicles">
+ <catDesc xml:lang="en">Autos & Vehicles</catDesc>
+ <catDesc xml:lang="de">Autos & Fahrzeuge</catDesc>
+ </category>
+ <category id="videoCategories.music">
+ <catDesc xml:lang="en">Music</catDesc>
+ <catDesc xml:lang="de">Musik</catDesc>
+ </category>
+ <category id="videoCategories.petsanimals">
+ <catDesc xml:lang="en">Pets & Animals</catDesc>
+ <catDesc xml:lang="de">Tiere</catDesc>
+ </category>
+ <category id="videoCategories.sports">
+ <catDesc xml:lang="en">Sports</catDesc>
+ <catDesc xml:lang="de">Sport</catDesc>
+ </category>
+ <category id="videoCategories.shortmovies">
+ <catDesc xml:lang="en">Short Movies</catDesc>
+ <catDesc xml:lang="de">Kurzfilme</catDesc>
+ </category>
+ <category id="videoCategories.travelevents">
+ <catDesc xml:lang="en">Travel & Events</catDesc>
+ <catDesc xml:lang="de">Reisen & Events</catDesc>
+ </category>
+ <category id="videoCategories.gaming">
+ <catDesc xml:lang="en">Gaming</catDesc>
+ <catDesc xml:lang="de">Gaming</catDesc>
+ </category>
+ <category id="videoCategories.videoblogging">
+ <catDesc xml:lang="en">Videoblogging</catDesc>
+ <catDesc xml:lang="de">Videoblogging</catDesc>
+ </category>
+ <category id="videoCategories.peopleblogs">
+ <catDesc xml:lang="en">People & Blogs</catDesc>
+ <catDesc xml:lang="de">Menschen & Blogs</catDesc>
+ </category>
+ <category id="videoCategories.comedy">
+ <catDesc xml:lang="en">Comedy</catDesc>
+ <catDesc xml:lang="de">Komödie</catDesc>
+ </category>
+ <category id="videoCategories.entertainment">
+ <catDesc xml:lang="en">Entertainment</catDesc>
+ <catDesc xml:lang="de">Unterhaltung</catDesc>
+ </category>
+ <category id="videoCategories.newspolitics">
+ <catDesc xml:lang="en">News & Politics</catDesc>
+ <catDesc xml:lang="de">Nachrichten & Politik</catDesc>
+ </category>
+ <category id="videoCategories.howtostyle">
+ <catDesc xml:lang="en">Howto & Style</catDesc>
+ <catDesc xml:lang="de">Praktische Tipps & Styling</catDesc>
+ </category>
+ <category id="videoCategories.education">
+ <catDesc xml:lang="en">Education</catDesc>
+ <catDesc xml:lang="de">Bildung</catDesc>
+ </category>
+ <category id="videoCategories.sciencetechnology">
+ <catDesc xml:lang="en">Science & Technology</catDesc>
+ <catDesc xml:lang="de">Wissenschaft & Technik</catDesc>
+ </category>
+ <category id="videoCategories.nonprofits">
+ <catDesc xml:lang="en">Nonprofits & Activism</catDesc>
+ <catDesc xml:lang="de">NA</catDesc>
+ </category>
+ <category id="videoCategories.movies">
+ <catDesc xml:lang="en">Movies</catDesc>
+ <catDesc xml:lang="de">Filme</catDesc>
+ </category>
+ <category id="videoCategories.anime">
+ <catDesc xml:lang="en">Anime/Animation</catDesc>
+ <catDesc xml:lang="de">Anime/Animation</catDesc>
+ </category>
+ <category id="videoCategories.action">
+ <catDesc xml:lang="en">Action/Adventure</catDesc>
+ <catDesc xml:lang="de">Action/Abenteuer</catDesc>
+ </category>
+ <category id="videoCategories.classics">
+ <catDesc xml:lang="en">Classics</catDesc>
+ <catDesc xml:lang="de">Klassiker</catDesc>
+ </category>
+ <category id="videoCategories.comedy_genre">
+ <catDesc xml:lang="en">Comedy</catDesc>
+ <catDesc xml:lang="de">Komödie</catDesc>
+ </category>
+ <category id="videoCategories.documentary">
+ <catDesc xml:lang="en">Documentary</catDesc>
+ <catDesc xml:lang="de">Dokumentationen</catDesc>
+ </category>
+ <category id="videoCategories.drama">
+ <catDesc xml:lang="en">Drama</catDesc>
+ <catDesc xml:lang="de">Drama</catDesc>
+ </category>
+ <category id="videoCategories.family">
+ <catDesc xml:lang="en">Family</catDesc>
+ <catDesc xml:lang="de">Familie</catDesc>
+ </category>
+ <category id="videoCategories.foreign">
+ <catDesc xml:lang="en">Foreign</catDesc>
+ <catDesc xml:lang="de">Ausländische Filme</catDesc>
+ </category>
+ <category id="videoCategories.horror">
+ <catDesc xml:lang="en">Horror</catDesc>
+ <catDesc xml:lang="de">Horror</catDesc>
+ </category>
+ <category id="videoCategories.scififantasy">
+ <catDesc xml:lang="en">Sci-Fi/Fantasy</catDesc>
+ <catDesc xml:lang="de">Science-Fiction/Fantasy</catDesc>
+ </category>
+ <category id="videoCategories.thriller">
+ <catDesc xml:lang="en">Thriller</catDesc>
+ <catDesc xml:lang="de">Thriller</catDesc>
+ </category>
+ <category id="videoCategories.shorts">
+ <catDesc xml:lang="en">Shorts</catDesc>
+ <catDesc xml:lang="de">Kurzfilme</catDesc>
+ </category>
+ <category id="videoCategories.shows">
+ <catDesc xml:lang="en">Shows</catDesc>
+ <catDesc xml:lang="de">Serien</catDesc>
+ </category>
+ <category id="videoCategories.trailers">
+ <catDesc xml:lang="en">Trailers</catDesc>
+ <catDesc xml:lang="de">Trailer</catDesc>
+ </category>
+ </taxonomy>
+ </classDecl>
+ </encodingDesc>
+ <profileDesc>
+ <langUsage Default="n">
+ <language id="de" usage="90.5">Deutsch</language>
+ <language id="zxx" usage="3.5">Kein linguistischer Inhalt</language>
+ <language id="en" usage="3">Englisch</language>
+ <language id="tr" usage="1">Türkisch</language>
+ <language id="ru" usage="1">Russisch</language>
+ <language id="hr" usage="0.5">Kroatisch</language>
+ <language id="mis" usage="0.5">Einzelne andere Sprachen</language>
+ </langUsage>
+ <textDesc Default="n">
+ <textType>Kurzmeldungen: YouTube-Kommentare</textType>
+ <textTypeRef/>
+ </textDesc>
+ </profileDesc>
+ </idsHeader>
\ No newline at end of file
diff --git a/t/real/ndy.t b/t/real/ndy.t
new file mode 100644
index 0000000..e6ce10a
--- /dev/null
+++ b/t/real/ndy.t
@@ -0,0 +1,70 @@
+use strict;
+use warnings;
+use Test::More;
+use Data::Dumper;
+use JSON::XS;
+
+if ($ENV{SKIP_REAL}) {
+ plan skip_all => 'Skip real tests';
+};
+
+use utf8;
+use lib 'lib', '../lib';
+
+use File::Basename 'dirname';
+use File::Spec::Functions 'catdir';
+
+use_ok('KorAP::XML::Krill');
+
+my $path = catdir(dirname(__FILE__), 'corpus','NDY','296','008718');
+
+ok(my $doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
+ok($doc->parse, 'Parse document');
+
+is($doc->text_sigle, 'NDY/296/008718', 'Correct text sigle');
+is($doc->doc_sigle, 'NDY/296', 'Correct document sigle');
+is($doc->corpus_sigle, 'NDY', 'Correct corpus sigle');
+
+my $meta = $doc->meta;
+
+like($meta->{T_title}, qr!^Kommentar zu: LOCKE hat mein MERCEDES AMG ZERSTÖRT!, 'Title');
+ok(!$meta->{T_sub_title}, 'SubTitle');
+is($meta->{T_author}, 'Livia Banse', 'Author');
+ok(!$meta->{A_editor}, 'Editor');
+is($meta->{S_pub_place}, 'San Bruno, California');
+is($meta->{A_publisher}, 'YouTube', 'Publisher');
+
+is($meta->{S_text_type},'Kurzmeldungen: YouTube-Kommentare', 'No Text Type');
+ok(!$meta->{S_text_type_art}, 'No Text Type Art');
+ok(!$meta->{S_text_type_ref}, 'No Text Type Ref');
+ok(!$meta->{S_text_domain}, 'No Text Domain');
+ok(!$meta->{S_text_column}, 'No Text Column');
+
+is($meta->{K_text_class}->[0], 'entertainment', 'Correct Text Class');
+ok(!$meta->{K_text_class}->[1], 'Correct Text Class');
+
+is($meta->{D_pub_date}, '20171204', 'Creation date');
+is($meta->{D_creation_date}, '20171204', 'Creation date');
+is($meta->{S_availability}, 'QAO-NC-LOC:ids', 'License');
+ok(!$meta->{A_pages}, 'Pages');
+
+ok(!$meta->{A_file_edition_statement}, 'File Statement');
+ok(!$meta->{A_bibl_edition_statement}, 'Bibl Statement');
+
+like($meta->{A_reference}, qr!NDY\/296\.008718, YouTube, 04\.12\.2017\. Livia Banse: Kommentar zu: LOCKE hat mein MERCEDES AMG ZERSTÖRT.* \(AutoUnfall\), - YouTube!, 'Reference');
+
+is($meta->{S_language}, 'de', 'Language');
+
+is($meta->{T_corpus_title}, 'YouTube', 'Correct Corpus title');
+ok(!$meta->{T_corpus_sub_title}, 'Correct Corpus sub title');
+ok(!$meta->{T_corpus_author}, 'Correct Corpus author');
+ok(!$meta->{A_corpus_editor}, 'Correct Corpus editor');
+
+like($meta->{T_doc_title}, qr!LOCKE hat mein MERCEDES AMG ZERSTÖRT\!.* \(AutoUnfall\)!, 'Correct Doc title');
+ok(!$meta->{T_doc_sub_title}, 'Correct Doc sub title');
+is($meta->{T_doc_author},'Leon Machère', 'Correct Doc author');
+ok(!$meta->{A_doc_editor}, 'Correct doc editor');
+
+
+done_testing;
+__END__