Somehow fixed relation indexation and meta data parsing (consistent to the GDoc)

commit: 192057192e65d09fdc7f3c3de1d66d31c3852cc5 [log] [tgz]
author: Nils Diewald <nils@diewald-online.de> Thu Jun 18 20:06:45 2015 +0000
committer: Nils Diewald <nils@diewald-online.de> Thu Jun 18 20:06:45 2015 +0000
tree: 97d53f6e4f9d003c0d3d18d87a660025a1b2c652
parent: 0d76734639be97dc264ac8c02118faba0b1dd3df [diff]
diff --git a/t/VDI/JAN/00001/data.xml b/t/VDI/JAN/00001/data.xml
new file mode 100644
index 0000000..21fd76f
--- /dev/null
+++ b/t/VDI/JAN/00001/data.xml

@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-model href="text.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+
+<raw_text docid="VDI_JAN.00001" xmlns="http://ids-mannheim.de/ns/KorAP">
+  <metadata file="metadata.xml" />
+  <text>hui</text>
+</raw_text>

diff --git a/t/VDI/JAN/00001/header.xml b/t/VDI/JAN/00001/header.xml
new file mode 100644
index 0000000..a919407
--- /dev/null
+++ b/t/VDI/JAN/00001/header.xml

@@ -0,0 +1,69 @@
+<?xml version="1.0" encoding="iso-8859-1"?>
+<?xml-model href="header.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<!DOCTYPE idsCorpus PUBLIC "-//IDS//DTD IDS-XCES 1.0//EN" "http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd">
+<idsheader type="text" version="1.1">
+  <fileDesc>
+    <titleStmt>
+      <textsigle>VDI14/JAN.00001</textsigle>
+      <t.title assemblage="external">VDI14/JAN.00001 VDI nachrichten, 17.01.2014, S. 10; 10- Zz mit Zahl</t.title>
+    </titleStmt>
+    <publicationStmt>
+      <distributor></distributor>
+      <pubAddress></pubAddress>
+      <availability></availability>
+      <pubDate></pubDate>
+    </publicationStmt>
+    <sourceDesc>
+      <biblStruct>
+	<analytic>
+	  <h.title type="main">10- Zz mit Zahl</h.title>
+	  <h.author>Windhövel, Kerstin</h.author>
+	  <imprint>
+	  </imprint>
+	  <biblScope type="pp">S. 10</biblScope>
+	  <biblScope type="suppl"></biblScope>
+	  <biblScope type="suppltitle"></biblScope>
+	  <biblNote n="1">Id: 578453</biblNote>
+	</analytic>
+	<monogr>
+	  <h.title></h.title>
+	  <imprint>
+	    <pubDate type="year">2014</pubDate>
+	    <pubDate type="month">01</pubDate>
+	    <pubDate type="day">17</pubDate>
+	  </imprint>
+	  <biblScope type="issue">03</biblScope>
+	  <biblScope type="issueplace"></biblScope>
+	</monogr>
+      </biblStruct>
+      <reference type="complete" assemblage="regular">VDI14/JAN.00001 VDI nachrichten, 17.01.2014, S. 10; 10- Zz mit Zahl [Ausführliche Zitierung nicht verfügbar]</reference>
+      <reference type="short" assemblage="regular">VDI14/JAN.00001 VDI nachr., 17.01.2014, S. 10</reference>
+    </sourceDesc>
+  </fileDesc>
+  <encodingDesc>
+    <samplingDecl>
+    </samplingDecl>
+    <tagsDecl>
+      <tagUsage gi="p" occurs="2"></tagUsage>
+      <tagUsage gi="q" occurs="1"></tagUsage>
+      <tagUsage gi="s" occurs="3"></tagUsage>
+    </tagsDecl>
+  </encodingDesc>
+  <profileDesc>
+    <creation>
+      <creatdate>2014.01.17</creatdate>
+    </creation>
+    <textClass>
+      <catRef target="topic.Freizeit-Unterhaltung.Reisen" n="0.38" scheme="topic" />
+      <catRef target="topic.Politik.Ausland" n="0.14" scheme="topic" />
+      <h.keywords>
+	<keyterm></keyterm>
+      </h.keywords>
+    </textClass>
+    <textdesc>
+      <texttypeart></texttypeart>
+      <textdomain></textdomain>
+      <column></column>
+    </textdesc>
+  </profileDesc>
+</idsheader>

diff --git a/t/VDI/JAN/00001/text.txt b/t/VDI/JAN/00001/text.txt
new file mode 100644
index 0000000..0cb4652
--- /dev/null
+++ b/t/VDI/JAN/00001/text.txt

@@ -0,0 +1 @@
+hui

diff --git a/t/VDI/JAN/header.xml b/t/VDI/JAN/header.xml
new file mode 100644
index 0000000..ec1d7cb
--- /dev/null
+++ b/t/VDI/JAN/header.xml

@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="iso-8859-1"?>
+<?xml-model href="header.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<!DOCTYPE idsCorpus PUBLIC "-//IDS//DTD IDS-XCES 1.0//EN" "http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd">
+<idsheader type="document" version="1.1">
+  <fileDesc>
+    <titleStmt>
+      <dokumentsigle>VDI14/JAN</dokumentsigle>
+      <d.title>VDI nachrichten, Januar 2014</d.title>
+    </titleStmt>
+    <publicationStmt>
+      <distributor></distributor>
+      <pubAddress></pubAddress>
+      <availability></availability>
+      <pubDate></pubDate>
+    </publicationStmt>
+    <sourceDesc>
+      <biblStruct>
+	<monogr>
+	  <h.title></h.title>
+	  <imprint>
+	  </imprint>
+	</monogr>
+      </biblStruct>
+    </sourceDesc>
+  </fileDesc>
+</idsheader>

diff --git a/t/VDI/header.xml b/t/VDI/header.xml
new file mode 100644
index 0000000..28a29b4
--- /dev/null
+++ b/t/VDI/header.xml

@@ -0,0 +1,277 @@
+<?xml version="1.0" encoding="iso-8859-1"?>
+<?xml-model href="header.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<!DOCTYPE idsCorpus PUBLIC "-//IDS//DTD IDS-XCES 1.0//EN" "http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd">
+<idsheader type="corpus" pattern="Ztg/Zschr" version="1.1">
+  <fileDesc>
+    <titleStmt>
+      <korpussigle>VDI14</korpussigle>
+      <c.title>VDI nachrichten 2014</c.title>
+    </titleStmt>
+    <publicationStmt>
+      <distributor>		Institut für Deutsche Sprache		</distributor>
+      <pubAddress>		Postfach 10 16 21, D-68016 Mannheim	</pubAddress>
+      <telephone>		+49 (0)621 1581 0			</telephone>
+      <availability></availability>
+      <pubDate></pubDate>
+    </publicationStmt>
+    <sourceDesc>
+      <biblFull>
+	<titleStmt>
+	  <x.title></x.title>
+	</titleStmt>
+	<editionStmt>
+	</editionStmt>
+	<publicationStmt>
+	  <distributor></distributor>
+	  <pubAddress></pubAddress>
+	  <availability></availability>
+	  <pubDate></pubDate>
+	</publicationStmt>
+      </biblFull>
+      <biblStruct>
+	<monogr>
+	  <h.title type="main">VDI nachrichten</h.title>
+	  <h.title type="sub"></h.title>
+	  <h.title type="abbr" level="m">VDI nachr.</h.title>
+	  <editor>Verein Deutscher Ingenieure</editor>
+	  <imprint>
+	    <publisher>VDI Verlag GmbH</publisher>
+	    <pubPlace>Düsseldorf</pubPlace>
+	  </imprint>
+	  <biblScope type="vol"></biblScope>
+	</monogr>
+      </biblStruct>
+      <reference type="super" assemblage="regular">VDI14 VDI nachrichten, [Wochenzeitung]; Hrsg.: Verein Deutscher Ingenieure, Düsseldorf: VDI Verlag GmbH; 2014</reference>
+    </sourceDesc>
+  </fileDesc>
+  <encodingDesc>
+    <projectDesc>
+    </projectDesc>
+    <samplingDecl>
+    </samplingDecl>
+    <editorialDecl>
+      <transduction>
+	TraDuCES - Korpus-Transformationscompiler, Version 3.6.4,
+	Eric Seubert, IDS Mannheim, 7. April 2014
+	Optionen bei der Konvertierung:
+	- Dubletten-Modus:
+	Entfernung aller als Dubletten klassifizierten Texte.
+	- Indexierungsmodus für COSMAS II:
+	Erzeugung von Ersatzreferenzen für ausführliche Zitierung.
+	Entfernung aller Deklarationen für Dubletten.
+	Entfernung von Texten mit Sperrvermerken.
+	Entfernung von Texten mit minimalem Inhalt.
+      </transduction>
+      <pagination type="no"></pagination>
+    </editorialDecl>
+    <classDecl>
+      <taxonomy id="topic">
+	<h.bibl>Thementaxonomie (siehe http://www.ids-mannheim.de/kl/projekte/methoden/te.html)</h.bibl>
+	<category id="topic.Fiktion">
+	  <catDesc>Fiktion</catDesc>
+	  <category id="topic.Fiktion.Vermischtes">
+	    <catDesc>Fiktion:Vermischtes</catDesc>
+	  </category>
+	</category>
+	<category id="topic.Freizeit-Unterhaltung">
+	  <catDesc>Freizeit_Unterhaltung</catDesc>
+	  <category id="topic.Freizeit-Unterhaltung.Reisen">
+	    <catDesc>Freizeit_Unterhaltung:Reisen</catDesc>
+	  </category>
+	  <category id="topic.Freizeit-Unterhaltung.Rundfunk">
+	    <catDesc>Freizeit_Unterhaltung:Rundfunk</catDesc>
+	  </category>
+	  <category id="topic.Freizeit-Unterhaltung.Vereine-Veranstaltungen">
+	    <catDesc>Freizeit_Unterhaltung:Vereine_Veranstaltungen</catDesc>
+	  </category>
+	</category>
+	<category id="topic.Gesundheit-Ernaehrung">
+	  <catDesc>Gesundheit_Ernaehrung</catDesc>
+	  <category id="topic.Gesundheit-Ernaehrung.Ernaehrung">
+	    <catDesc>Gesundheit_Ernaehrung:Ernaehrung</catDesc>
+	  </category>
+	  <category id="topic.Gesundheit-Ernaehrung.Gesundheit">
+	    <catDesc>Gesundheit_Ernaehrung:Gesundheit</catDesc>
+	  </category>
+	</category>
+	<category id="topic.Kultur">
+	  <catDesc>Kultur</catDesc>
+	  <category id="topic.Kultur.Bildende-Kunst">
+	    <catDesc>Kultur:Bildende Kunst</catDesc>
+	  </category>
+	  <category id="topic.Kultur.Darstellende-Kunst">
+	    <catDesc>Kultur:Darstellende Kunst</catDesc>
+	  </category>
+	  <category id="topic.Kultur.Film">
+	    <catDesc>Kultur:Film</catDesc>
+	  </category>
+	  <category id="topic.Kultur.Literatur">
+	    <catDesc>Kultur:Literatur</catDesc>
+	  </category>
+	  <category id="topic.Kultur.Mode">
+	    <catDesc>Kultur:Mode</catDesc>
+	  </category>
+	  <category id="topic.Kultur.Musik">
+	    <catDesc>Kultur:Musik</catDesc>
+	  </category>
+	</category>
+	<category id="topic.Natur-Umwelt">
+	  <catDesc>Natur_Umwelt</catDesc>
+	  <category id="topic.Natur-Umwelt.Garten">
+	    <catDesc>Natur_Umwelt:Garten</catDesc>
+	  </category>
+	  <category id="topic.Natur-Umwelt.Tiere">
+	    <catDesc>Natur_Umwelt:Tiere</catDesc>
+	  </category>
+	  <category id="topic.Natur-Umwelt.Wetter-Klima">
+	    <catDesc>Natur_Umwelt:Wetter_Klima</catDesc>
+	  </category>
+	</category>
+	<category id="topic.Politik">
+	  <catDesc>Politik</catDesc>
+	  <category id="topic.Politik.Ausland">
+	    <catDesc>Politik:Ausland</catDesc>
+	  </category>
+	  <category id="topic.Politik.Inland">
+	    <catDesc>Politik:Inland</catDesc>
+	  </category>
+	  <category id="topic.Politik.Kommunalpolitik">
+	    <catDesc>Politik:Kommunalpolitik</catDesc>
+	  </category>
+	</category>
+	<category id="topic.Rest">
+	  <catDesc>Rest</catDesc>
+	  <category id="topic.Rest.boersenkurse">
+	    <catDesc>Rest:boersenkurse</catDesc>
+	  </category>
+	  <category id="topic.Rest.geburt-tod-heirat">
+	    <catDesc>Rest:geburt_tod_heirat</catDesc>
+	  </category>
+	  <category id="topic.Rest.impressum">
+	    <catDesc>Rest:impressum</catDesc>
+	  </category>
+	  <category id="topic.Rest.inhaltsverzeichnisse">
+	    <catDesc>Rest:inhaltsverzeichnisse</catDesc>
+	  </category>
+	  <category id="topic.Rest.ligatabellen">
+	    <catDesc>Rest:ligatabellen</catDesc>
+	  </category>
+	  <category id="topic.Rest.tabellen">
+	    <catDesc>Rest:tabellen</catDesc>
+	  </category>
+	  <category id="topic.Rest.veranstaltungshinweise">
+	    <catDesc>Rest:veranstaltungshinweise</catDesc>
+	  </category>
+	</category>
+	<category id="topic.Sport">
+	  <catDesc>Sport</catDesc>
+	  <category id="topic.Sport.Ballsport">
+	    <catDesc>Sport:Ballsport</catDesc>
+	  </category>
+	  <category id="topic.Sport.Fussball">
+	    <catDesc>Sport:Fussball</catDesc>
+	  </category>
+	  <category id="topic.Sport.Motorsport">
+	    <catDesc>Sport:Motorsport</catDesc>
+	  </category>
+	  <category id="topic.Sport.Radsport">
+	    <catDesc>Sport:Radsport</catDesc>
+	  </category>
+	  <category id="topic.Sport.Tennis">
+	    <catDesc>Sport:Tennis</catDesc>
+	  </category>
+	  <category id="topic.Sport.Vermischtes">
+	    <catDesc>Sport:Vermischtes</catDesc>
+	  </category>
+	  <category id="topic.Sport.Wintersport">
+	    <catDesc>Sport:Wintersport</catDesc>
+	  </category>
+	</category>
+	<category id="topic.Staat-Gesellschaft">
+	  <catDesc>Staat_Gesellschaft</catDesc>
+	  <category id="topic.Staat-Gesellschaft.Arbeit-und-Beruf">
+	    <catDesc>Staat_Gesellschaft:Arbeit_und_Beruf</catDesc>
+	  </category>
+	  <category id="topic.Staat-Gesellschaft.Bildung">
+	    <catDesc>Staat_Gesellschaft:Bildung</catDesc>
+	  </category>
+	  <category id="topic.Staat-Gesellschaft.Biographien-Interviews">
+	    <catDesc>Staat_Gesellschaft:Biographien_Interviews</catDesc>
+	  </category>
+	  <category id="topic.Staat-Gesellschaft.Drittes-Reich-Rechtsextremismus">
+	    <catDesc>Staat_Gesellschaft:Drittes_Reich_Rechtsextremismus</catDesc>
+	  </category>
+	  <category id="topic.Staat-Gesellschaft.Familie-Geschlecht">
+	    <catDesc>Staat_Gesellschaft:Familie_Geschlecht</catDesc>
+	  </category>
+	  <category id="topic.Staat-Gesellschaft.Kirche">
+	    <catDesc>Staat_Gesellschaft:Kirche</catDesc>
+	  </category>
+	  <category id="topic.Staat-Gesellschaft.Recht">
+	    <catDesc>Staat_Gesellschaft:Recht</catDesc>
+	  </category>
+	  <category id="topic.Staat-Gesellschaft.Tod">
+	    <catDesc>Staat_Gesellschaft:Tod</catDesc>
+	  </category>
+	  <category id="topic.Staat-Gesellschaft.Verbrechen">
+	    <catDesc>Staat_Gesellschaft:Verbrechen</catDesc>
+	  </category>
+	</category>
+	<category id="topic.Technik-Industrie">
+	  <catDesc>Technik_Industrie</catDesc>
+	  <category id="topic.Technik-Industrie.EDV-Elektronik">
+	    <catDesc>Technik_Industrie:EDV_Elektronik</catDesc>
+	  </category>
+	  <category id="topic.Technik-Industrie.Kfz">
+	    <catDesc>Technik_Industrie:Kfz</catDesc>
+	  </category>
+	  <category id="topic.Technik-Industrie.Transport-Verkehr">
+	    <catDesc>Technik_Industrie:Transport_Verkehr</catDesc>
+	  </category>
+	  <category id="topic.Technik-Industrie.Umweltschutz">
+	    <catDesc>Technik_Industrie:Umweltschutz</catDesc>
+	  </category>
+	  <category id="topic.Technik-Industrie.Unfaelle">
+	    <catDesc>Technik_Industrie:Unfaelle</catDesc>
+	  </category>
+	</category>
+	<category id="topic.Wirtschaft-Finanzen">
+	  <catDesc>Wirtschaft_Finanzen</catDesc>
+	  <category id="topic.Wirtschaft-Finanzen.Banken">
+	    <catDesc>Wirtschaft_Finanzen:Banken</catDesc>
+	  </category>
+	  <category id="topic.Wirtschaft-Finanzen.Bilanzen">
+	    <catDesc>Wirtschaft_Finanzen:Bilanzen</catDesc>
+	  </category>
+	  <category id="topic.Wirtschaft-Finanzen.Oeffentliche-Finanzen">
+	    <catDesc>Wirtschaft_Finanzen:Oeffentliche_Finanzen</catDesc>
+	  </category>
+	  <category id="topic.Wirtschaft-Finanzen.Sozialprodukt">
+	    <catDesc>Wirtschaft_Finanzen:Sozialprodukt</catDesc>
+	  </category>
+	  <category id="topic.Wirtschaft-Finanzen.Waehrung">
+	    <catDesc>Wirtschaft_Finanzen:Waehrung</catDesc>
+	  </category>
+	</category>
+	<category id="topic.Wissenschaft">
+	  <catDesc>Wissenschaft</catDesc>
+	  <category id="topic.Wissenschaft.Populaerwissenschaft">
+	    <catDesc>Wissenschaft:Populaerwissenschaft</catDesc>
+	  </category>
+	</category>
+	<category id="topic.unklassifizierbar">
+	  <catDesc>Text ist thematisch nicht klassifizierbar.</catDesc>
+	</category>
+      </taxonomy>
+    </classDecl>
+  </encodingDesc>
+  <profileDesc>
+    <langusage>
+      <language id="de" usage="100">Deutsch</language>
+    </langusage>
+    <textdesc>
+      <texttype>Zeitung: Wochenzeitung</texttype>
+      <texttyperef>Wochenzeitung</texttyperef>
+    </textdesc>
+  </profileDesc>
+</idsheader>

diff --git a/t/artificial-subtoken.t b/t/artificial-subtoken.t
index ebf3b33..7a30103 100644
--- a/t/artificial-subtoken.t
+++ b/t/artificial-subtoken.t

@@ -16,7 +16,7 @@
 
 my $path = catdir(dirname(__FILE__), 'artificial');
 ok(my $doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
-is($doc->path, $path . '/', 'Path');
+like($doc->path, qr!$path/$!, 'Path');
 ok($doc->parse, 'Parse document');
 
 sub new_tokenizer {

diff --git a/t/meta.t b/t/meta.t
index 87180de..5159889 100644
--- a/t/meta.t
+++ b/t/meta.t

@@ -11,22 +11,26 @@
 use File::Basename 'dirname';
 use File::Spec::Functions 'catdir';
 
+
+# TODO: Make 'text' -> 'primaryText'
+
 use_ok('KorAP::Document');
 
 # WPD/00001
 my $path = catdir(dirname(__FILE__), 'WPD/00001');
 ok(my $doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
-is($doc->path, $path . '/', 'Path');
+like($doc->path, qr!$path/!, 'Path');
 
 ok($doc = KorAP::Document->new( path => $path ), 'Load Korap::Document');
-is($doc->path, $path . '/', 'Path');
+like($doc->path, qr!$path/$!, 'Path');
 
 ok($doc->parse, 'Parse document');
 
 # Metdata
+is($doc->text_sigle, 'WPD_AAA.00001', 'ID');
+
 is($doc->title, 'A', 'title');
 ok(!$doc->sub_title, 'subTitle');
-is($doc->text_sigle, 'WPD_AAA.00001', 'ID');
 is($doc->corpus_sigle, 'WPD', 'corpusID');
 is($doc->pub_date, '20050328', 'pubDate');
 is($doc->pub_place, 'URL:http://de.wikipedia.org', 'pubPlace');
@@ -35,21 +39,30 @@
 is($doc->text_class->[2], 'wissenschaft', 'TextClass');
 is($doc->text_class->[3], 'populaerwissenschaft', 'TextClass');
 ok(!$doc->text_class->[4], 'TextClass');
-is($doc->author->[0], 'Ruru', 'author');
-is($doc->author->[1], 'Jens.Ol', 'author');
-is($doc->author->[2], 'Aglarech', 'author');
-ok(!$doc->author->[3], 'author');
+is($doc->author, 'Ruru; Jens.Ol; Aglarech; u.a.', 'author');
+
+#is($doc->author->[0], 'Ruru', 'author');
+#is($doc->author->[1], 'Jens.Ol', 'author');
+#is($doc->author->[2], 'Aglarech', 'author');
+#ok(!$doc->author->[3], 'author');
 
 # Additional information
-is($doc->editor,'wikipedia.org', 'Editor');
+ok(!$doc->editor, 'Editor');
 is($doc->publisher, 'Wikipedia', 'Publisher');
 is($doc->creation_date, '20050000', 'Creation date');
-is($doc->coll_title, 'Wikipedia', 'Collection title');
-is($doc->coll_sub_title, 'Die freie Enzyklopädie', 'Collection subtitle');
-is($doc->coll_editor, 'wikipedia.org', 'Collection editor');
-ok(!$doc->coll_author, 'Collection author');
 ok(!$doc->text_type, 'No text_type');
-ok(!$doc->text_type_art, 'text_type art');
+ok(!$doc->text_type_art, 'no text_type art');
+ok(!$doc->text_type_ref, 'no text_type ref');
+ok(!$doc->text_domain, 'no text_domain');
+ok(!$doc->text_column, 'no text_column');
+ok(!$doc->keywords_string, 'no keywords');
+is($doc->text_class_string, 'freizeit-unterhaltung reisen wissenschaft populaerwissenschaft', 'no text classes');
+ok(!$doc->language, 'no text_column');
+
+#is($doc->coll_title, 'Wikipedia', 'Collection title');
+#is($doc->coll_sub_title, 'Die freie Enzyklopädie', 'Collection subtitle');
+#is($doc->coll_editor, 'wikipedia.org', 'Collection editor');
+#ok(!$doc->coll_author, 'Collection author');
 
 # BRZ13/00001
 $path = catdir(dirname(__FILE__), 'BRZ13/00001');
@@ -60,21 +73,24 @@
 ok(!$doc->sub_title, 'subTitle');
 is($doc->text_sigle, 'BRZ13_APR.00001', 'ID');
 is($doc->corpus_sigle, 'BRZ13', 'corpusID');
+
+
 is($doc->pub_date, '20130402', 'pubDate');
 is($doc->pub_place, 'Braunschweig', 'pubPlace');
+
 is($doc->text_class->[0], 'staat-gesellschaft', 'TextClass');
 is($doc->text_class->[1], 'familie-geschlecht', 'TextClass');
 ok(!$doc->text_class->[2], 'TextClass');
-ok(!$doc->author->[0], 'author');
+ok(!$doc->author, 'author');
 
 # Additional information
 ok(!$doc->editor, 'Editor');
 is($doc->publisher, 'Braunschweiger Zeitungsverlag, Druckhaus Albert Limbach GmbH & Co. KG', 'Publisher');
 is($doc->creation_date, '20130402', 'Creation date');
-is($doc->coll_title, 'Braunschweiger Zeitung', 'Collection title');
-ok(!$doc->coll_sub_title, 'Collection subtitle');
-ok(!$doc->coll_editor, 'Collection editor');
-ok(!$doc->coll_author, 'Collection author');
+#is($doc->coll_title, 'Braunschweiger Zeitung', 'Collection title');
+#ok(!$doc->coll_sub_title, 'Collection subtitle');
+#ok(!$doc->coll_editor, 'Collection editor');
+#ok(!$doc->coll_author, 'Collection author');
 is($doc->text_type, 'Zeitung: Tageszeitung', 'text_type');
 ok(!$doc->text_type_art, 'text_type art');
 
@@ -92,17 +108,16 @@
 is($doc->text_class->[0], 'freizeit-unterhaltung', 'TextClass');
 is($doc->text_class->[1], 'vereine-veranstaltungen', 'TextClass');
 ok(!$doc->text_class->[2], 'TextClass');
-ok(!$doc->author->[0], 'author');
-
+ok(!$doc->author, 'author');
 
 # Additional information
 ok(!$doc->editor, 'Editor');
 ok(!$doc->publisher, 'Publisher');
 is($doc->creation_date, '20010402', 'Creation date');
-ok(!$doc->coll_title, 'Collection title');
-ok(!$doc->coll_sub_title, 'Collection subtitle');
-ok(!$doc->coll_editor, 'Collection editor');
-ok(!$doc->coll_author, 'Collection author');
+#ok(!$doc->coll_title, 'Collection title');
+#ok(!$doc->coll_sub_title, 'Collection subtitle');
+#ok(!$doc->coll_editor, 'Collection editor');
+#ok(!$doc->coll_author, 'Collection author');
 ok(!$doc->text_type, 'text_type');
 is($doc->text_type_art, 'Bericht', 'text_type art');
 
@@ -112,7 +127,8 @@
 ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
 
 ok($doc->parse, 'Parse document');
-is($doc->title, 'Amtsblatt des Landesbezirks Baden [diverse Erlasse]', 'title');
+is($doc->title, 'MK2/ERL.00001 Amtsblatt des Landesbezirks Baden [diverse Erlasse], Hrsg. und Schriftleitung: Präsidialstelle der Landesverwaltung Baden in Karlsruhe. - Karlsruhe, o.J.', 'title'); # Amtsblatt des Landesbezirks Baden [diverse Erlasse]
+
 ok(!$doc->sub_title, 'subTitle');
 is($doc->text_sigle, 'MK2_ERL.00001', 'ID');
 is($doc->corpus_sigle, 'MK2', 'corpusID');
@@ -121,27 +137,26 @@
 is($doc->text_class->[0], 'politik', 'TextClass');
 is($doc->text_class->[1], 'kommunalpolitik', 'TextClass');
 ok(!$doc->text_class->[2], 'TextClass');
-ok(!$doc->author->[0], 'author');
+ok(!$doc->author, 'author');
 
 # Additional information
 ok(!$doc->editor, 'Editor');
 is($doc->publisher, 'Badenia Verlag und Druckerei', 'Publisher');
 is($doc->creation_date, '19600000', 'Creation date');
-diag 'Non-acceptance of creation date ranges is temporary';
-ok(!$doc->coll_title, 'Collection title');
-ok(!$doc->coll_sub_title, 'Collection subtitle');
-ok(!$doc->coll_editor, 'Collection editor');
-ok(!$doc->coll_author, 'Collection author');
+diag 'Non-acceptance of creation date ranges may be temporary';
+#ok(!$doc->coll_title, 'Collection title');
+#ok(!$doc->coll_sub_title, 'Collection subtitle');
+#ok(!$doc->coll_editor, 'Collection editor');
+#ok(!$doc->coll_author, 'Collection author');
 is($doc->text_type, 'Erlass', 'text_type');
 ok(!$doc->text_type_art, 'text_type art');
 
-
 # A01/02035-substring
 $path = catdir(dirname(__FILE__), 'A01/02035-substring');
 ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
 
 ok($doc->parse, 'Parse document');
-ok(!$doc->title, 'title');
+is($doc->title, 'A00/JAN.02035 St. Galler Tagblatt, 11.01.2000, Ressort: TB-RSP (Abk.)', 'title');
 ok(!$doc->sub_title, 'subTitle');
 is($doc->text_sigle, 'A00_JAN.02035', 'ID');
 is($doc->corpus_sigle, 'A00', 'corpusID');
@@ -150,20 +165,19 @@
 is($doc->text_class->[0], 'sport', 'TextClass');
 is($doc->text_class->[1], 'ballsport', 'TextClass');
 ok(!$doc->text_class->[2], 'TextClass');
-ok(!$doc->author->[0], 'author');
+ok(!$doc->author, 'author');
 
 # Additional information
 ok(!$doc->editor, 'Editor');
 ok(!$doc->publisher, 'Publisher');
 is($doc->creation_date, "20000111", 'Creation date');
-ok(!$doc->coll_title, 'Collection title');
-ok(!$doc->coll_sub_title, 'Collection subtitle');
-ok(!$doc->coll_editor, 'Collection editor');
-ok(!$doc->coll_author, 'Collection author');
+#ok(!$doc->coll_title, 'Collection title');
+#ok(!$doc->coll_sub_title, 'Collection subtitle');
+#ok(!$doc->coll_editor, 'Collection editor');
+#ok(!$doc->coll_author, 'Collection author');
 ok(!$doc->text_type, 'text_type');
 is($doc->text_type_art, 'Bericht', 'text_type art');
 
-
 # A01/02873-meta
 $path = catdir(dirname(__FILE__), 'A01/02873-meta');
 ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
@@ -178,16 +192,16 @@
 is($doc->text_class->[0], 'kultur', 'TextClass');
 is($doc->text_class->[1], 'film', 'TextClass');
 ok(!$doc->text_class->[2], 'TextClass');
-ok(!$doc->author->[0], 'author');
+ok(!$doc->author, 'author');
 
 # Additional information
 ok(!$doc->editor, 'Editor');
 ok(!$doc->publisher, 'Publisher');
 is($doc->creation_date, "20000113", 'Creation date');
-ok(!$doc->coll_title, 'Collection title');
-ok(!$doc->coll_sub_title, 'Collection subtitle');
-ok(!$doc->coll_editor, 'Collection editor');
-ok(!$doc->coll_author, 'Collection author');
+#ok(!$doc->coll_title, 'Collection title');
+#ok(!$doc->coll_sub_title, 'Collection subtitle');
+#ok(!$doc->coll_editor, 'Collection editor');
+#ok(!$doc->coll_author, 'Collection author');
 ok(!$doc->text_type, 'text_type');
 is($doc->text_type_art, 'Bericht', 'text_type art');
 
@@ -206,21 +220,20 @@
 is($doc->text_class->[0], 'gesundheit-ernaehrung', 'TextClass');
 is($doc->text_class->[1], 'gesundheit', 'TextClass');
 ok(!$doc->text_class->[2], 'TextClass');
-ok(!$doc->author->[0], 'author');
+ok(!$doc->author, 'author');
 
 # Additional information
 ok(!$doc->editor, 'Editor');
 ok(!$doc->publisher, 'Publisher');
 is($doc->creation_date, "20000124", 'Creation date');
-ok(!$doc->coll_title, 'Collection title');
-ok(!$doc->coll_sub_title, 'Collection subtitle');
-ok(!$doc->coll_editor, 'Collection editor');
-ok(!$doc->coll_author, 'Collection author');
+#ok(!$doc->coll_title, 'Collection title');
+#ok(!$doc->coll_sub_title, 'Collection subtitle');
+#ok(!$doc->coll_editor, 'Collection editor');
+#ok(!$doc->coll_author, 'Collection author');
 ok(!$doc->text_type, 'text_type');
 is($doc->text_type_art, 'Bericht', 'text_type art');
 
 
-
 # A01/07452-deep
 $path = catdir(dirname(__FILE__), 'A01/07452-deep');
 ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
@@ -235,26 +248,27 @@
 is($doc->text_class->[0], 'politik', 'TextClass');
 is($doc->text_class->[1], 'kommunalpolitik', 'TextClass');
 ok(!$doc->text_class->[2], 'TextClass');
-ok(!$doc->author->[0], 'author');
+ok(!$doc->author, 'author');
 
 # Additional information
 ok(!$doc->editor, 'Editor');
 ok(!$doc->publisher, 'Publisher');
 is($doc->creation_date, "20000129", 'Creation date');
-ok(!$doc->coll_title, 'Collection title');
-ok(!$doc->coll_sub_title, 'Collection subtitle');
-ok(!$doc->coll_editor, 'Collection editor');
-ok(!$doc->coll_author, 'Collection author');
+#ok(!$doc->coll_title, 'Collection title');
+#ok(!$doc->coll_sub_title, 'Collection subtitle');
+#ok(!$doc->coll_editor, 'Collection editor');
+#ok(!$doc->coll_author, 'Collection author');
 ok(!$doc->text_type, 'text_type');
 is($doc->text_type_art, 'Bericht', 'text_type art');
 
+
 # ART
 $path = catdir(dirname(__FILE__), 'artificial');
 ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
-is($doc->path, $path . '/', 'Path');
+#is($doc->path, $path . '/', 'Path');
 
 ok($doc = KorAP::Document->new( path => $path ), 'Load Korap::Document');
-is($doc->path, $path . '/', 'Path');
+#is($doc->path, $path . '/', 'Path');
 
 ok($doc->parse, 'Parse document');
 
@@ -268,21 +282,119 @@
 is($doc->text_class->[0], 'freizeit-unterhaltung', 'TextClass');
 is($doc->text_class->[1], 'vereine-veranstaltungen', 'TextClass');
 ok(!$doc->text_class->[2], 'TextClass');
-is($doc->author->[0], 'Ruru', 'author');
-is($doc->author->[1], 'Jens.Ol', 'author');
-is($doc->author->[2], 'Aglarech', 'author');
-ok(!$doc->author->[3], 'author');
+#is($doc->author->[0], 'Ruru', 'author');
+#is($doc->author->[1], 'Jens.Ol', 'author');
+#is($doc->author->[2], 'Aglarech', 'author');
+is($doc->author, 'Ruru; Jens.Ol; Aglarech; u.a.', 'author');
 
 # Additional information
 is($doc->editor, 'Nils Diewald', 'Editor');
 is($doc->publisher, 'Artificial articles Inc.', 'Publisher');
 is($doc->creation_date, '19990601', 'Creation date');
-is($doc->coll_title, 'Artificial articles', 'Collection title');
-is($doc->coll_sub_title, 'Best of!', 'Collection subtitle');
-is($doc->coll_editor, 'Nils Diewald', 'Collection editor');
-is($doc->coll_author, 'Nils Diewald', 'Collection author');
+#is($doc->coll_title, 'Artificial articles', 'Collection title');
+#is($doc->coll_sub_title, 'Best of!', 'Collection subtitle');
+#is($doc->coll_editor, 'Nils Diewald', 'Collection editor');
+#is($doc->coll_author, 'Nils Diewald', 'Collection author');
 is($doc->text_type, 'Zeitung: Tageszeitung', 'No text_type');
 is($doc->text_type_art, 'Bericht', 'text_type art');
 
+# Multipath headers
+$path = catdir(dirname(__FILE__), 'VDI/JAN/00001');
+ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+like($doc->path, qr!$path/!, 'Path');
+
+ok($doc = KorAP::Document->new( path => $path ), 'Load Korap::Document');
+like($doc->path, qr!$path/$!, 'Path');
+
+ok($doc->parse, 'Parse document');
+is($doc->text_sigle, 'VDI_JAN.00001', 'text sigle');
+is($doc->doc_sigle, 'VDI_JAN', 'doc sigle');
+is($doc->corpus_sigle, 'VDI', 'corpus sigle');
+is($doc->title, '10- Zz mit Zahl', 'title');
+ok(!$doc->sub_title, 'subtitle');
+is($doc->pub_date, '20140117', 'pubdate');
+is($doc->pub_place, 'Düsseldorf', 'pubplace');
+is($doc->author, 'Windhövel, Kerstin', 'author');
+is($doc->publisher, 'VDI Verlag GmbH', 'publisher');
+ok(!$doc->editor, 'editor');
+
+ok(!$doc->text_type, 'text type');
+ok(!$doc->text_type_art, 'text type art');
+ok(!$doc->text_type_ref, 'text type ref');
+ok(!$doc->text_column, 'text column');
+ok(!$doc->text_domain, 'text domain');
+ok(!$doc->creation_date, 'creation date');
+ok(!$doc->license, 'License');
+ok(!$doc->pages, 'Pages');
+ok(!$doc->file_edition_statement, 'file edition statement');
+ok(!$doc->bibl_edition_statement, 'bibl edition statement');
+is($doc->reference, 'VDI nachrichten, 17.01.2014, S. 10; 10- Zz mit Zahl [Ausführliche Zitierung nicht verfügbar]', 'Reference');
+
+ok(!$doc->language, 'Language');
+diag 'This may be "de" in the future';
+
+is($doc->doc_title, 'VDI nachrichten, Januar 2014', 'Doc title');
+ok(!$doc->doc_sub_title, 'Doc Sub title');
+ok(!$doc->doc_editor, 'Doc editor');
+ok(!$doc->doc_author, 'Doc author');
+
+is($doc->corpus_title, 'VDI nachrichten 2014', 'Corpus title');
+ok(!$doc->corpus_sub_title, 'Corpus Sub title');
+ok(!$doc->corpus_editor, 'Corpus editor');
+ok(!$doc->corpus_author, 'Corpus author');
+
+is($doc->keywords_string, '', 'Keywords');
+is($doc->text_class_string, 'Freizeit-Unterhaltung Reisen Politik Ausland', 'Text class');
+
+
+# WDD
+$path = catdir(dirname(__FILE__), 'WDD/G27/38989');
+ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+like($doc->path, qr!$path/!, 'Path');
+ok($doc->parse, 'Parse document');
+
+is($doc->text_sigle, 'WDD11_G27.38989', 'text sigle');
+is($doc->doc_sigle, 'WDD11_G27', 'doc sigle');
+is($doc->corpus_sigle, 'WDD11', 'corpus sigle');
+
+is($doc->title, 'Diskussion:Gunter A. Pilz', 'title');
+ok(!$doc->sub_title, 'subtitle');
+is($doc->pub_date, '20111029', 'pubdate');
+is($doc->pub_place, 'URL:http://de.wikipedia.org', 'pubplace');
+
+is($doc->author, '€pa, u.a.', 'author');
+is($doc->publisher, 'Wikipedia', 'publisher');
+ok(!$doc->editor, 'editor');
+
+is($doc->text_type, 'Diskussionen zu Enzyklopädie-Artikeln', 'text type');
+ok(!$doc->text_type_art, 'text type art');
+ok(!$doc->text_type_ref, 'text type ref');
+ok(!$doc->text_column, 'text column');
+ok(!$doc->text_domain, 'text domain');
+
+is($doc->creation_date, '20070707', 'creation date');
+is($doc->license, 'CC-BY-SA', 'License');
+ok(!$doc->pages, 'Pages');
+ok(!$doc->file_edition_statement, 'file edition statement');
+ok(!$doc->bibl_edition_statement, 'bibl edition statement');
+is($doc->reference, 'Diskussion:Gunter A. Pilz, In: Wikipedia - URL:http://de.wikipedia.org/wiki/Diskussion:Gunter_A._Pilz: Wikipedia, 2007', 'Reference');
+
+is($doc->language, 'de', 'Language');
+
+is($doc->doc_title, 'Wikipedia, Diskussionen zu Artikeln mit Anfangsbuchstabe G, Teil 27', 'Doc title');
+ok(!$doc->doc_sub_title, 'Doc Sub title');
+ok(!$doc->doc_editor, 'Doc editor');
+ok(!$doc->doc_author, 'Doc author');
+
+is($doc->corpus_title, 'Wikipedia.de 2011 Diskussionen', 'Corpus title');
+ok(!$doc->corpus_sub_title, 'Corpus Sub title');
+ok(!$doc->corpus_editor, 'Corpus editor');
+ok(!$doc->corpus_author, 'Corpus author');
+
+is($doc->keywords_string, '', 'Keywords');
+is($doc->text_class_string, '', 'Text class');
+
 done_testing;
 __END__
+
+

diff --git a/t/real_bzk.t b/t/real_bzk.t
index d033e77..a590d44 100644
--- a/t/real_bzk.t
+++ b/t/real_bzk.t

@@ -63,7 +63,7 @@
 ok(!$doc->corpus_author, 'Correct Corpus author');
 ok(!$doc->corpus_editor, 'Correct Corpus editor');
 
-is($doc->doc_title, 'Neues Deutschland', 'Correct Doc title');
+is($doc->doc_title, 'Neues Deutschland, Jahrgangsquerschnitt 1959', 'Correct Doc title');
 is($doc->doc_sub_title, 'Organ des Zentralkomitees der Sozialistischen Einheitspartei Deutschlands', 'Correct Doc sub title');
 ok(!$doc->doc_author, 'Correct Doc author');
 ok(!$doc->doc_editor, 'Correct doc editor');
@@ -127,7 +127,7 @@
 ok(!exists $output->{corpusAuthor}, 'Correct Corpus author');
 ok(!exists $output->{corpusEditor}, 'Correct Corpus editor');
 
-is($output->{docTitle}, 'Neues Deutschland', 'Correct Doc title');
+is($output->{docTitle}, 'Neues Deutschland, Jahrgangsquerschnitt 1959', 'Correct Doc title');
 is($output->{docSubTitle}, 'Organ des Zentralkomitees der Sozialistischen Einheitspartei Deutschlands', 'Correct Doc sub title');
 ok(!exists $output->{docAuthor}, 'Correct Doc author');
 ok(!exists $output->{docEditor}, 'Correct doc editor');

diff --git a/t/real_goethe.t b/t/real_goethe.t
index 878607b..9efe4c5 100644
--- a/t/real_goethe.t
+++ b/t/real_goethe.t

@@ -53,7 +53,7 @@
 REF
 is($doc->language, 'de', 'Language');
 
-is($doc->corpus_title, 'Goethes Werke', 'Correct Corpus title');
+is($doc->corpus_title, 'Goethe-Korpus', 'Correct Corpus title');
 ok(!$doc->corpus_sub_title, 'Correct Corpus Sub title');
 is($doc->corpus_author, 'Goethe, Johann Wolfgang von', 'Correct Corpus author');
 is($doc->corpus_editor, 'Trunz, Erich', 'Correct Corpus editor');
@@ -64,7 +64,6 @@
 ok(!$doc->doc_author, 'Correct Doc author');
 ok(!$doc->doc_editor, 'Correct Doc editor');
 
-
 # Tokenization
 use_ok('KorAP::Tokenizer');
 
@@ -120,7 +119,7 @@
 REF
 is($output->{language}, 'de', 'Language');
 
-is($output->{corpusTitle}, 'Goethes Werke', 'Correct Corpus title');
+is($output->{corpusTitle}, 'Goethe-Korpus', 'Correct Corpus title');
 ok(!exists $output->{corpusSubTitle}, 'Correct Text Type');
 is($output->{corpusAuthor}, 'Goethe, Johann Wolfgang von', 'Correct Corpus title');
 is($output->{corpusEditor}, 'Trunz, Erich', 'Editor');

diff --git a/t/transform.t b/t/transform.t
index 50cd2d7..783042d 100644
--- a/t/transform.t
+++ b/t/transform.t

@@ -14,6 +14,16 @@
 
 use_ok('KorAP::Document');
 
+sub _t2h {
+  my $string = shift;
+  $string =~ s/^\[\(\d+?-\d+?\)(.+?)\]$/$1/;
+  my %hash = ();
+  foreach (split(qr!\|!, $string)) {
+    $hash{$_} = 1;
+  };
+  return \%hash;
+};
+
 my @layers;
 # push(@layers, ['Base', 'Sentences']);
 push(@layers, ['Base', 'Paragraphs']);
@@ -50,10 +60,10 @@
 
 my $path = catdir(dirname(__FILE__), 'WPD/00001');
 ok(my $doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
-is($doc->path, $path . '/', 'Path');
+like($doc->path, qr!$path/$!, 'Path');
 
 ok($doc = KorAP::Document->new( path => $path ), 'Load Korap::Document');
-is($doc->path, $path . '/', 'Path');
+like($doc->path, qr!$path/$!, 'Path');
 
 ok($doc->parse, 'Parse document');
 
@@ -70,10 +80,11 @@
 is($doc->text_class->[2], 'wissenschaft', 'TextClass');
 is($doc->text_class->[3], 'populaerwissenschaft', 'TextClass');
 ok(!$doc->text_class->[4], 'TextClass');
-is($doc->author->[0], 'Ruru', 'author');
-is($doc->author->[1], 'Jens.Ol', 'author');
-is($doc->author->[2], 'Aglarech', 'author');
-ok(!$doc->author->[3], 'author');
+is($doc->author, 'Ruru; Jens.Ol; Aglarech; u.a.', 'author');
+#is($doc->author->[0], 'Ruru', 'author');
+#is($doc->author->[1], 'Jens.Ol', 'author');
+#is($doc->author->[2], 'Aglarech', 'author');
+#ok(!$doc->author->[3], 'author');
 
 # Get tokens
 use_ok('KorAP::Tokenizer');
@@ -87,7 +98,7 @@
 ), 'New Tokenizer');
 ok($tokens->parse, 'Parse');
 
-is($tokens->path, $path . '/', 'Path');
+like($tokens->path, qr!$path/$!, 'Path');
 is($tokens->foundry, 'OpenNLP', 'Foundry');
 is($tokens->doc->text_sigle, 'WPD_AAA.00001', 'Doc id');
 is($tokens->should, 1068, 'Should');
@@ -95,23 +106,38 @@
 is($tokens->name, 'tokens', 'Name');
 is($tokens->layer, 'Tokens', 'Layer');
 
-is($tokens->stream->pos(118)->to_string, '[(763-768)s:Linie|i:linie|_118#763-768]', 'Token is correct');
+is_deeply(_t2h($tokens->stream->pos(118)->to_string),
+   _t2h('[(763-768)s:Linie|i:linie|_118#763-768]'),
+   'Token is correct');
 
 # Add Mate
 ok($tokens->add('Mate', 'Morpho'), 'Add Mate');
 
-is($tokens->stream->pos(118)->to_string, '[(763-768)s:Linie|i:linie|_118#763-768|mate/l:linie|mate/p:NN|mate/m:case:acc|mate/m:number:sg|mate/m:gender:fem]', 'with Mate');
+is_deeply(
+  _t2h($tokens->stream->pos(118)->to_string),
+  _t2h('[(763-768)s:Linie|i:linie|_118#763-768|mate/l:linie|mate/p:NN|mate/m:case:acc|mate/m:number:sg|mate/m:gender:fem]'),
+  'with Mate');
 
 # Add sentences
 ok($tokens->add('Base', 'Sentences'), 'Add Sentences');
 
-is($tokens->stream->pos(0)->to_string, '[(0-1)s:A|i:a|_0#0-1|-:tokens$<i>923|mate/p:XY|<>:base/s:s#0-74$<i>13|<>:base/s:t#0-6083$<i>923|-:base/sentences$<i>96]', 'Startinfo');
+is_deeply(
+  _t2h($tokens->stream->pos(0)->to_string),
+  _t2h('[(0-1)s:A|i:a|_0#0-1|-:tokens$<i>923|mate/p:XY|<>:base/s:s#0-74$<i>13<b>2|<>:base/s:t#0-6083$<i>923<b>0|-:base/sentences$<i>96]'),
+  'Startinfo'
+);
 
 foreach (@layers) {
   ok($tokens->add(@$_), 'Add '. join(', ', @$_));
 };
 
-is($tokens->stream->pos(0)->to_string, '[(0-1)s:A|i:a|_0#0-1|-:tokens$<i>923|mate/p:XY|<>:base/s:s#0-74$<i>13|<>:base/s:t#0-6083$<i>923|-:base/sentences$<i>96|<>:base/s:p#0-224$<i>34|-:base/paragraphs$<i>76|opennlp/p:NE|<>:opennlp/s:s#0-74$<i>13|-:opennlp/sentences$<i>50|<>:corenlp/s:s#0-6$<i>2|-:corenlp/sentences$<i>65|cnx/l:A|cnx/p:N|cnx/syn:@NH|<>:cnx/c:np#0-1$<i>1|<>:cnx/s:s#0-74$<i>13|-:cnx/sentences$<i>62|tt/l:A|tt/p:NN|tt/l:A|tt/p:FM|<>:tt/s:s#0-6083$<i>923|-:tt/sentences$<i>1|>:mate/d:PNC$<i>2|xip/p:SYMBOL|xip/l:A|<>:xip/c:TOP#0-74$<i>13|<>:xip/c:MC#0-73$<i>13<b>1|<>:xip/c:NP#0-1$<i>1<b>2|<>:xip/c:NPA#0-1$<i>1<b>3|<>:xip/c:NOUN#0-1$<i>1<b>4|<>:xip/c:SYMBOL#0-1$<i>1<b>5|>:xip/d:SUBJ$<i>3|<:xip/d:COORD$<i>1|<>:xip/s:s#0-74$<i>13|-:xip/sentences$<i>64]', 'Startinfo');
+is(
+  _t2h($tokens->stream->pos(0)->to_string),
+  _t2h('[(0-1)s:A|i:a|_0#0-1|-:tokens$<i>923|mate/p:XY|<>:base/s:s#0-74$<i>13|<>:base/s:t#0-6083$<i>923|-:base/sentences$<i>96|<>:base/s:p#0-224$<i>34|-:base/paragraphs$<i>76|opennlp/p:NE|<>:opennlp/s:s#0-74$<i>13|-:opennlp/sentences$<i>50|<>:corenlp/s:s#0-6$<i>2|-:corenlp/sentences$<i>65|cnx/l:A|cnx/p:N|cnx/syn:@NH|<>:cnx/c:np#0-1$<i>1|<>:cnx/s:s#0-74$<i>13|-:cnx/sentences$<i>62|tt/l:A|tt/p:NN|tt/l:A|tt/p:FM|<>:tt/s:s#0-6083$<i>923|-:tt/sentences$<i>1|>:mate/d:PNC$<i>2|xip/p:SYMBOL|xip/l:A|<>:xip/c:TOP#0-74$<i>13|<>:xip/c:MC#0-73$<i>13<b>1|<>:xip/c:NP#0-1$<i>1<b>2|<>:xip/c:NPA#0-1$<i>1<b>3|<>:xip/c:NOUN#0-1$<i>1<b>4|<>:xip/c:SYMBOL#0-1$<i>1<b>5|>:xip/d:SUBJ$<i>3|<:xip/d:COORD$<i>1|<>:xip/s:s#0-74$<i>13|-:xip/sentences$<i>64]'),
+  'Startinfo');
+
+done_testing;
+__END__
 
 
 #is($tokens->stream->pos(118)->to_string,
commit	192057192e65d09fdc7f3c3de1d66d31c3852cc5	[log] [tgz]
author	Nils Diewald <nils@diewald-online.de>	Thu Jun 18 20:06:45 2015 +0000
committer	Nils Diewald <nils@diewald-online.de>	Thu Jun 18 20:06:45 2015 +0000
tree	97d53f6e4f9d003c0d3d18d87a660025a1b2c652
parent	0d76734639be97dc264ac8c02118faba0b1dd3df [diff]