Improved DRuKoLa meta data handling
Change-Id: I50a79baa595429abeb8b56b11b1942f6e2ed8374
diff --git a/t/corpus/BBU2/Blog/83701_a_82376/data.xml b/t/corpus/BBU2/Blog/83701_a_82376/data.xml
new file mode 100644
index 0000000..630b2dc
--- /dev/null
+++ b/t/corpus/BBU2/Blog/83701_a_82376/data.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="text.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<raw_text docid="BBU_BLOG.83701_a_82376" xmlns="http://ids-mannheim.de/ns/KorAP">
+ <metadata file="metadata.xml" />
+ <text>.</text>
+</raw_text>
diff --git a/t/corpus/BBU2/Blog/83701_a_82376/header.xml b/t/corpus/BBU2/Blog/83701_a_82376/header.xml
new file mode 100644
index 0000000..94f5be2
--- /dev/null
+++ b/t/corpus/BBU2/Blog/83701_a_82376/header.xml
@@ -0,0 +1,38 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="header.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<!DOCTYPE idsCorpus PUBLIC "-//IDS//DTD IDS-XCES 1.0//EN" "http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd">
+<idsHeader type="text" pattern="text" status="new" version="1.1" TEIform="teiHeader">
+ <fileDesc>
+ <titleStmt>
+ <textSigle>Corola-Bucurenci/Blog.83701_a_82376</textSigle>
+ <t.title>Confesiunile unui misogin</t.title>
+ </titleStmt>
+ <publicationStmt>
+ <distributor/>
+ <pubAddress/>
+ <availability region="world">QAO-NC</availability>
+ <pubDate/>
+ </publicationStmt>
+ <sourceDesc>
+ <biblStruct>
+ <monogr>
+ <h.author>DragoČ™ Bucurenci</h.author>
+ <editor role="translator">-</editor>
+ <imprint>
+ <pubDate type="year">2013</pubDate>
+ <pubDate type="month">11</pubDate>
+ <pubDate type="day">18</pubDate>
+ <pubPlace>URL:http://www.bucurenci.ro</pubPlace>
+ </imprint>
+ </monogr>
+ </biblStruct>
+ </sourceDesc>
+ </fileDesc>
+ <profileDesc>
+ <textDesc>
+ <textType>-.-</textType>
+ <textClass>
+ <catRef target="-.-" scheme="topic"/>
+ </textClass>
+ </profileDesc>
+ </idsHeader>
\ No newline at end of file
diff --git a/t/corpus/BBU2/Blog/header.xml b/t/corpus/BBU2/Blog/header.xml
new file mode 100644
index 0000000..a6afb53
--- /dev/null
+++ b/t/corpus/BBU2/Blog/header.xml
@@ -0,0 +1,30 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="header.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<!DOCTYPE idsCorpus PUBLIC "-//IDS//DTD IDS-XCES 1.0//EN" "http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd">
+<idsHeader type="document" pattern="text" status="new" version="1.1" TEIform="teiHeader">
+ <fileDesc>
+ <titleStmt>
+ <dokumentSigle>Corola-Bucurenci/Blog</dokumentSigle>
+ <d.title>-</d.title>
+ </titleStmt>
+ <publicationStmt>
+ <distributor/>
+ <pubAddress/>
+ <availability region="world">[...]</availability>
+ <pubDate/>
+ </publicationStmt>
+ <sourceDesc>
+ <biblStruct>
+ <monogr>
+ <h.title type="main"/>
+ <edition>
+ <further/>
+ <kind/>
+ <appearance/>
+ </edition>
+ <imprint/>
+ </monogr>
+ </biblStruct>
+ </sourceDesc>
+ </fileDesc>
+ </idsHeader>
\ No newline at end of file
diff --git a/t/corpus/BBU2/header.xml b/t/corpus/BBU2/header.xml
new file mode 100644
index 0000000..7901a2e
--- /dev/null
+++ b/t/corpus/BBU2/header.xml
@@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="header.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<!DOCTYPE idsCorpus PUBLIC "-//IDS//DTD IDS-XCES 1.0//EN" "http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd">
+<idsHeader type="corpus" status="new" version="1.1" TEIform="teiHeader">
+ <fileDesc>
+ <titleStmt>
+ <korpusSigle>Corola-Bucurenci</korpusSigle>
+ <c.title>Corola-Bucurenci</c.title>
+ </titleStmt>
+ </fileDesc>
+ <profileDesc>
+ <langUsage>
+ <language id="ro" usage="100">Romanian</language>
+ </langUsage>
+ <!--
+ <textDesc>
+ This element is suppressed in p5!
+ <channel mode="w">written</channel>
+ </textDesc>
+ -->
+ </profileDesc>
+ </idsHeader>
\ No newline at end of file
diff --git a/t/real/drukola.t b/t/real/drukola.t
index 4656f18..1eb4587 100644
--- a/t/real/drukola.t
+++ b/t/real/drukola.t
@@ -106,5 +106,23 @@
like($token, qr!drukola/m:gender:feminine!, 'data');
like($token, qr!drukola/p:NOUN!, 'data');
+
+# New
+# BBU2/BLOG/83709_a_82384
+$path = catdir(dirname(__FILE__), '../corpus/BBU2/Blog/83701_a_82376');
+
+
+
+ok($doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
+ok($doc->parse, 'Parse document');
+
+$meta = $doc->meta;
+
+ok(!exists $meta->{doc_title}, 'No doc title');
+ok(!exists $meta->{translator}, 'No translator');
+
+ok(!exists $meta->{text_class}, 'No translator');
+
+
done_testing;
__END__