Added meta data for Schreibgebrauch and fixed Metadata parsing for I5 Change-Id: Ib2c9c5cce11c67bb093b2c0aa61449adff69e16e

commit: a866578571f08dcdb3b7b7f6559aec39e49a7e6c [log] [tgz]
author: Akron <nils@diewald-online.de> Wed Jan 27 21:47:57 2016 +0100
committer: Akron <nils@diewald-online.de> Wed Jan 27 21:47:57 2016 +0100
tree: f53efe928aeb8fa246264141fbd4a920d655916f
parent: b2636cf2f8813f1ff62ade23d9afe1c098db1acc [diff]
diff --git a/lib/KorAP/Document.pm b/lib/KorAP/Document.pm
index 3e9f82e..74a3d8c 100644
--- a/lib/KorAP/Document.pm
+++ b/lib/KorAP/Document.pm

@@ -49,6 +49,9 @@
 			corpus_title
 			corpus_sub_title
 			corpus_editor
+
+			availability
+			pub_place_key
 			/;
 # Separate: text_class, keywords
 
@@ -67,6 +70,7 @@
   return $log;
 };
 
+
 sub new {
   my $class = shift;
   my $self = bless { @_ }, $class;
@@ -95,6 +99,7 @@
   }
 
   else {
+
     $file = b($data_xml)->slurp;
 
     try {
@@ -102,11 +107,10 @@
 	$error = 1;
       };
       $rt = xml2hash($file, text => '#text', attr => '-')->{raw_text};
-    }
-      catch  {
-	$self->log->warn($unable);
-	$error = 1;
-      };
+    } catch  {
+      $self->log->warn($unable);
+      $error = 1;
+    };
   };
 
   return if $error;
@@ -150,13 +154,37 @@
   foreach (@header) {
     # Get corpus, doc and text meta data
     my $type = shift(@type);
-    $self->_parse_meta_i5($_, $type) if -e $_;
+
+    next unless -e $_;
+
+    my $slurp = b($_)->slurp;
+    $slurp =~ /^[^>]+encoding\s*=\s*(["'])([^\1]+?)\1/;
+    my $file = $slurp->decode($2 // 'UTF-8');
+
+    # Get DOM
+    my $dom = Mojo::DOM->new($file);
+
+    if ($dom->at('idsHeader') || $dom->at('idsheader')) {
+      $self->_parse_meta_i5($dom, $type);
+    }
+    else {
+      $self->_parse_meta_tei($dom, $type);
+    };
   };
 
   return 1;
 };
 
 
+# Store arbitrary data
+sub store {
+  my $self = shift;
+  return $self->{store} unless @_;
+  return $self->{store}->{$_[0]} if @_ == 1;
+  $self->{store}->{$_[0]} = $_[1];
+};
+
+
 # Primary data
 sub primary {
   $_[0]->{pd};
@@ -199,7 +227,7 @@
 }
 
 sub _remove_prefix {
-  return $_[0];
+#   return $_[0];
 
   # This may render some titles wrong, e.g. 'VDI nachrichten 2014' ...
   my $title = shift;
@@ -214,14 +242,78 @@
 };
 
 
-sub _parse_meta_i5 {
+sub _parse_meta_tei {
   my $self = shift;
-  my $header_xml = shift;
+  my $dom = shift;
   my $type = shift;
 
-  my $file = b($header_xml)->slurp->decode('iso-8859-1');
+  my $stmt;
+  if ($type eq 'text' && ($stmt = $dom->at('titleStmt'))) {
 
-  my $dom = Mojo::DOM->new($file);
+    # Title
+    try {
+      $stmt->find('title')->each(
+	sub {
+	  my $type = $_->attr('type') || 'main';
+	  $self->title($_->all_text) if $type eq 'main';
+	  $self->sub_title($_->all_text) if $type eq 'sub';
+	}
+      );
+    };
+
+    # Author
+    try {
+      my $author = $stmt->at('author')->attr('ref');
+      $author = $self->{ref_author}->{$author};
+      if ($author) {
+	$self->author($author->{id});
+	$self->store('sgbrAuthorAgeClass' => $author->{age}) if $author->{age};
+	$self->store('sgbrAuthorSex' => $author->{sex}) if $author->{sex};
+      };
+    };
+
+    try {
+      my $kodex = $dom->at('item[rend]')->attr('rend');
+      $self->store('sgbrKodex' => $kodex);
+    };
+  }
+
+  elsif ($type eq 'doc') {
+    try {
+      $dom->find('particDesc person')->each(
+	sub {
+	  $self->{ref_author}->{'#' . $_->attr('xml:id')} = {
+	    age => $_->attr('age'),
+	    sex => $_->attr('sex'),
+	    id => $_->attr('xml:id')
+	  }
+	});
+    };
+
+    try {
+      my $lang = $dom->at('language[ident]')->attr('ident');
+      $self->language($lang);
+    };
+
+    try {
+      $stmt = $dom->find('titleStmt > title')->each(
+	sub {
+	  my $type = $_->attr('type') || 'main';
+	  $self->doc_title($_->all_text) if $type eq 'main';
+	  $self->doc_sub_title($_->all_text) if $type eq 'sub';
+	}
+      );
+    };
+  };
+  return;
+};
+
+
+
+sub _parse_meta_i5 {
+  my $self = shift;
+  my $dom = shift;
+  my $type = shift;
 
   my $analytic = $dom->at('analytic');
 
@@ -263,7 +355,8 @@
   if ($type eq 'corpus') {
     unless ($self->corpus_title) {
       if (my $title = $dom->at('fileDesc > titleStmt > c\.title')) {
-	$self->corpus_title(_remove_prefix($title->all_text, $self->corpus_sigle)) if $title->all_text;
+	$self->corpus_title(_remove_prefix($title->all_text, $self->corpus_sigle))
+	  if $title->all_text;
       };
     };
   }
@@ -272,7 +365,8 @@
   elsif ($type eq 'doc') {
     unless ($self->doc_title) {
       if (my $title = $dom->at('fileDesc > titleStmt > d\.title')) {
-	$self->doc_title(_remove_prefix($title->all_text, $self->doc_sigle)) if $title->all_text;
+	$self->doc_title(_remove_prefix($title->all_text, $self->doc_sigle))
+	  if $title->all_text;
       };
     };
   }
@@ -281,14 +375,16 @@
   elsif ($type eq 'text') {
     unless ($self->title) {
       if (my $title = $dom->at('fileDesc > titleStmt > t\.title')) {
-	$self->title(_remove_prefix($title->all_text, $self->text_sigle)) if $title->all_text;
-      };
+	$self->title(_remove_prefix($title->all_text, $self->text_sigle))
+	  if $title->all_text;
+      }
     };
   };
 
   # Get PubPlace
   if (my $place = $dom->at('pubPlace')) {
     $self->pub_place($place->all_text) if $place->all_text;
+    $self->pub_place_key($place->attr('key')) if $place->attr('key');
   };
 
   # Get Publisher
@@ -354,6 +450,13 @@
     };
   };
 
+  # Availability
+  try {
+    $self->availability(
+      $dom->at('availability')->all_text
+    );
+  };
+
   # Get pubDate
   my $pub_date = $dom->find('pubDate[type=year]');
   $pub_date->each(

diff --git a/t/A01/02035-substring/base/paragraph.xml b/t/A00/02035-substring/base/paragraph.xml
similarity index 100%
rename from t/A01/02035-substring/base/paragraph.xml
rename to t/A00/02035-substring/base/paragraph.xml


diff --git a/t/A01/02035-substring/base/sentences.xml b/t/A00/02035-substring/base/sentences.xml
similarity index 100%
rename from t/A01/02035-substring/base/sentences.xml
rename to t/A00/02035-substring/base/sentences.xml


diff --git a/t/A01/02035-substring/base/tokens_aggr.xml b/t/A00/02035-substring/base/tokens_aggr.xml
similarity index 100%
rename from t/A01/02035-substring/base/tokens_aggr.xml
rename to t/A00/02035-substring/base/tokens_aggr.xml


diff --git a/t/A01/02035-substring/base/tokens_conservative.xml b/t/A00/02035-substring/base/tokens_conservative.xml
similarity index 100%
rename from t/A01/02035-substring/base/tokens_conservative.xml
rename to t/A00/02035-substring/base/tokens_conservative.xml


diff --git a/t/A01/02035-substring/connexor/metadata.xml b/t/A00/02035-substring/connexor/metadata.xml
similarity index 100%
rename from t/A01/02035-substring/connexor/metadata.xml
rename to t/A00/02035-substring/connexor/metadata.xml


diff --git a/t/A01/02035-substring/connexor/morpho.xml b/t/A00/02035-substring/connexor/morpho.xml
similarity index 100%
rename from t/A01/02035-substring/connexor/morpho.xml
rename to t/A00/02035-substring/connexor/morpho.xml


diff --git a/t/A01/02035-substring/connexor/mpt.xml b/t/A00/02035-substring/connexor/mpt.xml
similarity index 100%
rename from t/A01/02035-substring/connexor/mpt.xml
rename to t/A00/02035-substring/connexor/mpt.xml


diff --git a/t/A01/02035-substring/connexor/phrase.xml b/t/A00/02035-substring/connexor/phrase.xml
similarity index 100%
rename from t/A01/02035-substring/connexor/phrase.xml
rename to t/A00/02035-substring/connexor/phrase.xml


diff --git a/t/A01/02035-substring/connexor/sentences.xml b/t/A00/02035-substring/connexor/sentences.xml
similarity index 100%
rename from t/A01/02035-substring/connexor/sentences.xml
rename to t/A00/02035-substring/connexor/sentences.xml


diff --git a/t/A01/02035-substring/connexor/syntax.xml b/t/A00/02035-substring/connexor/syntax.xml
similarity index 100%
rename from t/A01/02035-substring/connexor/syntax.xml
rename to t/A00/02035-substring/connexor/syntax.xml


diff --git a/t/A01/02035-substring/connexor/tokens.xml b/t/A00/02035-substring/connexor/tokens.xml
similarity index 100%
rename from t/A01/02035-substring/connexor/tokens.xml
rename to t/A00/02035-substring/connexor/tokens.xml


diff --git a/t/A01/02035-substring/corenlp/ne_dewac_175m_600.xml b/t/A00/02035-substring/corenlp/ne_dewac_175m_600.xml
similarity index 100%
rename from t/A01/02035-substring/corenlp/ne_dewac_175m_600.xml
rename to t/A00/02035-substring/corenlp/ne_dewac_175m_600.xml


diff --git a/t/A01/02035-substring/corenlp/ne_hgc_175m_600.xml b/t/A00/02035-substring/corenlp/ne_hgc_175m_600.xml
similarity index 100%
rename from t/A01/02035-substring/corenlp/ne_hgc_175m_600.xml
rename to t/A00/02035-substring/corenlp/ne_hgc_175m_600.xml


diff --git a/t/A01/02035-substring/corenlp/sentences.xml b/t/A00/02035-substring/corenlp/sentences.xml
similarity index 100%
rename from t/A01/02035-substring/corenlp/sentences.xml
rename to t/A00/02035-substring/corenlp/sentences.xml


diff --git a/t/A01/02035-substring/corenlp/tokens.xml b/t/A00/02035-substring/corenlp/tokens.xml
similarity index 100%
rename from t/A01/02035-substring/corenlp/tokens.xml
rename to t/A00/02035-substring/corenlp/tokens.xml


diff --git a/t/A01/02035-substring/data.xml b/t/A00/02035-substring/data.xml
similarity index 100%
rename from t/A01/02035-substring/data.xml
rename to t/A00/02035-substring/data.xml


diff --git a/t/A01/02035-substring/header.xml b/t/A00/02035-substring/header.xml
similarity index 100%
rename from t/A01/02035-substring/header.xml
rename to t/A00/02035-substring/header.xml


diff --git a/t/A01/02035-substring/mate/dependency.xml b/t/A00/02035-substring/mate/dependency.xml
similarity index 100%
rename from t/A01/02035-substring/mate/dependency.xml
rename to t/A00/02035-substring/mate/dependency.xml


diff --git a/t/A01/02035-substring/mate/morpho.xml b/t/A00/02035-substring/mate/morpho.xml
similarity index 100%
rename from t/A01/02035-substring/mate/morpho.xml
rename to t/A00/02035-substring/mate/morpho.xml


diff --git a/t/A01/02035-substring/mate/pipeline/one_token_per_line.txt b/t/A00/02035-substring/mate/pipeline/one_token_per_line.txt
similarity index 100%
rename from t/A01/02035-substring/mate/pipeline/one_token_per_line.txt
rename to t/A00/02035-substring/mate/pipeline/one_token_per_line.txt


diff --git a/t/A01/02035-substring/mate/pipeline/parsed.txt b/t/A00/02035-substring/mate/pipeline/parsed.txt
similarity index 100%
rename from t/A01/02035-substring/mate/pipeline/parsed.txt
rename to t/A00/02035-substring/mate/pipeline/parsed.txt


diff --git a/t/A01/02035-substring/mate/tokenSpans/number_tokenSpans.xml b/t/A00/02035-substring/mate/tokenSpans/number_tokenSpans.xml
similarity index 100%
rename from t/A01/02035-substring/mate/tokenSpans/number_tokenSpans.xml
rename to t/A00/02035-substring/mate/tokenSpans/number_tokenSpans.xml


diff --git a/t/A01/02035-substring/opennlp/morpho.xml b/t/A00/02035-substring/opennlp/morpho.xml
similarity index 100%
rename from t/A01/02035-substring/opennlp/morpho.xml
rename to t/A00/02035-substring/opennlp/morpho.xml


diff --git a/t/A01/02035-substring/opennlp/sentences.xml b/t/A00/02035-substring/opennlp/sentences.xml
similarity index 100%
rename from t/A01/02035-substring/opennlp/sentences.xml
rename to t/A00/02035-substring/opennlp/sentences.xml


diff --git a/t/A01/02035-substring/opennlp/tokens.xml b/t/A00/02035-substring/opennlp/tokens.xml
similarity index 100%
rename from t/A01/02035-substring/opennlp/tokens.xml
rename to t/A00/02035-substring/opennlp/tokens.xml


diff --git a/t/A01/02035-substring/struct/structure.xml b/t/A00/02035-substring/struct/structure.xml
similarity index 100%
rename from t/A01/02035-substring/struct/structure.xml
rename to t/A00/02035-substring/struct/structure.xml


diff --git a/t/A01/02035-substring/text.txt b/t/A00/02035-substring/text.txt
similarity index 100%
rename from t/A01/02035-substring/text.txt
rename to t/A00/02035-substring/text.txt


diff --git a/t/A01/02035-substring/tree_tagger/metadata.xml b/t/A00/02035-substring/tree_tagger/metadata.xml
similarity index 100%
rename from t/A01/02035-substring/tree_tagger/metadata.xml
rename to t/A00/02035-substring/tree_tagger/metadata.xml


diff --git a/t/A01/02035-substring/tree_tagger/morpho.xml b/t/A00/02035-substring/tree_tagger/morpho.xml
similarity index 100%
rename from t/A01/02035-substring/tree_tagger/morpho.xml
rename to t/A00/02035-substring/tree_tagger/morpho.xml


diff --git a/t/A01/02035-substring/tree_tagger/sentences.xml b/t/A00/02035-substring/tree_tagger/sentences.xml
similarity index 100%
rename from t/A01/02035-substring/tree_tagger/sentences.xml
rename to t/A00/02035-substring/tree_tagger/sentences.xml


diff --git a/t/A01/02035-substring/tree_tagger/tokens.xml b/t/A00/02035-substring/tree_tagger/tokens.xml
similarity index 100%
rename from t/A01/02035-substring/tree_tagger/tokens.xml
rename to t/A00/02035-substring/tree_tagger/tokens.xml


diff --git a/t/A01/02035-substring/xip/constituency.xml b/t/A00/02035-substring/xip/constituency.xml
similarity index 100%
rename from t/A01/02035-substring/xip/constituency.xml
rename to t/A00/02035-substring/xip/constituency.xml


diff --git a/t/A01/02035-substring/xip/dependency.xml b/t/A00/02035-substring/xip/dependency.xml
similarity index 100%
rename from t/A01/02035-substring/xip/dependency.xml
rename to t/A00/02035-substring/xip/dependency.xml


diff --git a/t/A01/02035-substring/xip/metadata.xml b/t/A00/02035-substring/xip/metadata.xml
similarity index 100%
rename from t/A01/02035-substring/xip/metadata.xml
rename to t/A00/02035-substring/xip/metadata.xml


diff --git a/t/A01/02035-substring/xip/morpho.xml b/t/A00/02035-substring/xip/morpho.xml
similarity index 100%
rename from t/A01/02035-substring/xip/morpho.xml
rename to t/A00/02035-substring/xip/morpho.xml


diff --git a/t/A01/02035-substring/xip/sentences.xml b/t/A00/02035-substring/xip/sentences.xml
similarity index 100%
rename from t/A01/02035-substring/xip/sentences.xml
rename to t/A00/02035-substring/xip/sentences.xml


diff --git a/t/A01/02035-substring/xip/tokens.xml b/t/A00/02035-substring/xip/tokens.xml
similarity index 100%
rename from t/A01/02035-substring/xip/tokens.xml
rename to t/A00/02035-substring/xip/tokens.xml


diff --git a/t/A01/02873-meta/base/paragraph.xml b/t/A00/02873-meta/base/paragraph.xml
similarity index 100%
rename from t/A01/02873-meta/base/paragraph.xml
rename to t/A00/02873-meta/base/paragraph.xml


diff --git a/t/A01/02873-meta/base/sentences.xml b/t/A00/02873-meta/base/sentences.xml
similarity index 100%
rename from t/A01/02873-meta/base/sentences.xml
rename to t/A00/02873-meta/base/sentences.xml


diff --git a/t/A01/02873-meta/base/tokens_aggr.xml b/t/A00/02873-meta/base/tokens_aggr.xml
similarity index 100%
rename from t/A01/02873-meta/base/tokens_aggr.xml
rename to t/A00/02873-meta/base/tokens_aggr.xml


diff --git a/t/A01/02873-meta/base/tokens_conservative.xml b/t/A00/02873-meta/base/tokens_conservative.xml
similarity index 100%
rename from t/A01/02873-meta/base/tokens_conservative.xml
rename to t/A00/02873-meta/base/tokens_conservative.xml


diff --git a/t/A01/02873-meta/connexor/metadata.xml b/t/A00/02873-meta/connexor/metadata.xml
similarity index 100%
rename from t/A01/02873-meta/connexor/metadata.xml
rename to t/A00/02873-meta/connexor/metadata.xml


diff --git a/t/A01/02873-meta/connexor/morpho.xml b/t/A00/02873-meta/connexor/morpho.xml
similarity index 100%
rename from t/A01/02873-meta/connexor/morpho.xml
rename to t/A00/02873-meta/connexor/morpho.xml


diff --git a/t/A01/02873-meta/connexor/mpt.xml b/t/A00/02873-meta/connexor/mpt.xml
similarity index 100%
rename from t/A01/02873-meta/connexor/mpt.xml
rename to t/A00/02873-meta/connexor/mpt.xml


diff --git a/t/A01/02873-meta/connexor/phrase.xml b/t/A00/02873-meta/connexor/phrase.xml
similarity index 100%
rename from t/A01/02873-meta/connexor/phrase.xml
rename to t/A00/02873-meta/connexor/phrase.xml


diff --git a/t/A01/02873-meta/connexor/sentences.xml b/t/A00/02873-meta/connexor/sentences.xml
similarity index 100%
rename from t/A01/02873-meta/connexor/sentences.xml
rename to t/A00/02873-meta/connexor/sentences.xml


diff --git a/t/A01/02873-meta/connexor/syntax.xml b/t/A00/02873-meta/connexor/syntax.xml
similarity index 100%
rename from t/A01/02873-meta/connexor/syntax.xml
rename to t/A00/02873-meta/connexor/syntax.xml


diff --git a/t/A01/02873-meta/connexor/tokens.xml b/t/A00/02873-meta/connexor/tokens.xml
similarity index 100%
rename from t/A01/02873-meta/connexor/tokens.xml
rename to t/A00/02873-meta/connexor/tokens.xml


diff --git a/t/A01/02873-meta/corenlp/ne_dewac_175m_600.xml b/t/A00/02873-meta/corenlp/ne_dewac_175m_600.xml
similarity index 100%
rename from t/A01/02873-meta/corenlp/ne_dewac_175m_600.xml
rename to t/A00/02873-meta/corenlp/ne_dewac_175m_600.xml


diff --git a/t/A01/02873-meta/corenlp/ne_hgc_175m_600.xml b/t/A00/02873-meta/corenlp/ne_hgc_175m_600.xml
similarity index 100%
rename from t/A01/02873-meta/corenlp/ne_hgc_175m_600.xml
rename to t/A00/02873-meta/corenlp/ne_hgc_175m_600.xml


diff --git a/t/A01/02873-meta/corenlp/sentences.xml b/t/A00/02873-meta/corenlp/sentences.xml
similarity index 100%
rename from t/A01/02873-meta/corenlp/sentences.xml
rename to t/A00/02873-meta/corenlp/sentences.xml


diff --git a/t/A01/02873-meta/corenlp/tokens.xml b/t/A00/02873-meta/corenlp/tokens.xml
similarity index 100%
rename from t/A01/02873-meta/corenlp/tokens.xml
rename to t/A00/02873-meta/corenlp/tokens.xml


diff --git a/t/A01/02873-meta/data.xml b/t/A00/02873-meta/data.xml
similarity index 100%
rename from t/A01/02873-meta/data.xml
rename to t/A00/02873-meta/data.xml


diff --git a/t/A01/02873-meta/header.xml b/t/A00/02873-meta/header.xml
similarity index 100%
rename from t/A01/02873-meta/header.xml
rename to t/A00/02873-meta/header.xml


diff --git a/t/A01/02873-meta/mate/dependency.xml b/t/A00/02873-meta/mate/dependency.xml
similarity index 100%
rename from t/A01/02873-meta/mate/dependency.xml
rename to t/A00/02873-meta/mate/dependency.xml


diff --git a/t/A01/02873-meta/mate/morpho.xml b/t/A00/02873-meta/mate/morpho.xml
similarity index 100%
rename from t/A01/02873-meta/mate/morpho.xml
rename to t/A00/02873-meta/mate/morpho.xml


diff --git a/t/A01/02873-meta/mate/pipeline/one_token_per_line.txt b/t/A00/02873-meta/mate/pipeline/one_token_per_line.txt
similarity index 100%
rename from t/A01/02873-meta/mate/pipeline/one_token_per_line.txt
rename to t/A00/02873-meta/mate/pipeline/one_token_per_line.txt


diff --git a/t/A01/02873-meta/mate/pipeline/parsed.txt b/t/A00/02873-meta/mate/pipeline/parsed.txt
similarity index 100%
rename from t/A01/02873-meta/mate/pipeline/parsed.txt
rename to t/A00/02873-meta/mate/pipeline/parsed.txt


diff --git a/t/A01/02873-meta/mate/tokenSpans/number_tokenSpans.xml b/t/A00/02873-meta/mate/tokenSpans/number_tokenSpans.xml
similarity index 100%
rename from t/A01/02873-meta/mate/tokenSpans/number_tokenSpans.xml
rename to t/A00/02873-meta/mate/tokenSpans/number_tokenSpans.xml


diff --git a/t/A01/02873-meta/opennlp/morpho.xml b/t/A00/02873-meta/opennlp/morpho.xml
similarity index 100%
rename from t/A01/02873-meta/opennlp/morpho.xml
rename to t/A00/02873-meta/opennlp/morpho.xml


diff --git a/t/A01/02873-meta/opennlp/sentences.xml b/t/A00/02873-meta/opennlp/sentences.xml
similarity index 100%
rename from t/A01/02873-meta/opennlp/sentences.xml
rename to t/A00/02873-meta/opennlp/sentences.xml


diff --git a/t/A01/02873-meta/opennlp/tokens.xml b/t/A00/02873-meta/opennlp/tokens.xml
similarity index 100%
rename from t/A01/02873-meta/opennlp/tokens.xml
rename to t/A00/02873-meta/opennlp/tokens.xml


diff --git a/t/A01/02873-meta/struct/structure.xml b/t/A00/02873-meta/struct/structure.xml
similarity index 100%
rename from t/A01/02873-meta/struct/structure.xml
rename to t/A00/02873-meta/struct/structure.xml


diff --git a/t/A01/02873-meta/text.txt b/t/A00/02873-meta/text.txt
similarity index 100%
rename from t/A01/02873-meta/text.txt
rename to t/A00/02873-meta/text.txt


diff --git a/t/A01/02873-meta/tree_tagger/metadata.xml b/t/A00/02873-meta/tree_tagger/metadata.xml
similarity index 100%
rename from t/A01/02873-meta/tree_tagger/metadata.xml
rename to t/A00/02873-meta/tree_tagger/metadata.xml


diff --git a/t/A01/02873-meta/tree_tagger/morpho.xml b/t/A00/02873-meta/tree_tagger/morpho.xml
similarity index 100%
rename from t/A01/02873-meta/tree_tagger/morpho.xml
rename to t/A00/02873-meta/tree_tagger/morpho.xml


diff --git a/t/A01/02873-meta/tree_tagger/sentences.xml b/t/A00/02873-meta/tree_tagger/sentences.xml
similarity index 100%
rename from t/A01/02873-meta/tree_tagger/sentences.xml
rename to t/A00/02873-meta/tree_tagger/sentences.xml


diff --git a/t/A01/02873-meta/tree_tagger/tokens.xml b/t/A00/02873-meta/tree_tagger/tokens.xml
similarity index 100%
rename from t/A01/02873-meta/tree_tagger/tokens.xml
rename to t/A00/02873-meta/tree_tagger/tokens.xml


diff --git a/t/A01/02873-meta/xip/constituency.xml b/t/A00/02873-meta/xip/constituency.xml
similarity index 100%
rename from t/A01/02873-meta/xip/constituency.xml
rename to t/A00/02873-meta/xip/constituency.xml


diff --git a/t/A01/02873-meta/xip/dependency.xml b/t/A00/02873-meta/xip/dependency.xml
similarity index 100%
rename from t/A01/02873-meta/xip/dependency.xml
rename to t/A00/02873-meta/xip/dependency.xml


diff --git a/t/A01/02873-meta/xip/metadata.xml b/t/A00/02873-meta/xip/metadata.xml
similarity index 100%
rename from t/A01/02873-meta/xip/metadata.xml
rename to t/A00/02873-meta/xip/metadata.xml


diff --git a/t/A01/02873-meta/xip/morpho.xml b/t/A00/02873-meta/xip/morpho.xml
similarity index 100%
rename from t/A01/02873-meta/xip/morpho.xml
rename to t/A00/02873-meta/xip/morpho.xml


diff --git a/t/A01/02873-meta/xip/sentences.xml b/t/A00/02873-meta/xip/sentences.xml
similarity index 100%
rename from t/A01/02873-meta/xip/sentences.xml
rename to t/A00/02873-meta/xip/sentences.xml


diff --git a/t/A01/02873-meta/xip/tokens.xml b/t/A00/02873-meta/xip/tokens.xml
similarity index 100%
rename from t/A01/02873-meta/xip/tokens.xml
rename to t/A00/02873-meta/xip/tokens.xml


diff --git a/t/A01/05663-unbalanced/base/paragraph.xml b/t/A00/05663-unbalanced/base/paragraph.xml
similarity index 100%
rename from t/A01/05663-unbalanced/base/paragraph.xml
rename to t/A00/05663-unbalanced/base/paragraph.xml


diff --git a/t/A01/05663-unbalanced/base/sentences.xml b/t/A00/05663-unbalanced/base/sentences.xml
similarity index 100%
rename from t/A01/05663-unbalanced/base/sentences.xml
rename to t/A00/05663-unbalanced/base/sentences.xml


diff --git a/t/A01/05663-unbalanced/base/tokens_aggr.xml b/t/A00/05663-unbalanced/base/tokens_aggr.xml
similarity index 100%
rename from t/A01/05663-unbalanced/base/tokens_aggr.xml
rename to t/A00/05663-unbalanced/base/tokens_aggr.xml


diff --git a/t/A01/05663-unbalanced/base/tokens_conservative.xml b/t/A00/05663-unbalanced/base/tokens_conservative.xml
similarity index 100%
rename from t/A01/05663-unbalanced/base/tokens_conservative.xml
rename to t/A00/05663-unbalanced/base/tokens_conservative.xml


diff --git a/t/A01/05663-unbalanced/connexor/metadata.xml b/t/A00/05663-unbalanced/connexor/metadata.xml
similarity index 100%
rename from t/A01/05663-unbalanced/connexor/metadata.xml
rename to t/A00/05663-unbalanced/connexor/metadata.xml


diff --git a/t/A01/05663-unbalanced/connexor/morpho.xml b/t/A00/05663-unbalanced/connexor/morpho.xml
similarity index 100%
rename from t/A01/05663-unbalanced/connexor/morpho.xml
rename to t/A00/05663-unbalanced/connexor/morpho.xml


diff --git a/t/A01/05663-unbalanced/connexor/mpt.xml b/t/A00/05663-unbalanced/connexor/mpt.xml
similarity index 100%
rename from t/A01/05663-unbalanced/connexor/mpt.xml
rename to t/A00/05663-unbalanced/connexor/mpt.xml


diff --git a/t/A01/05663-unbalanced/connexor/phrase.xml b/t/A00/05663-unbalanced/connexor/phrase.xml
similarity index 100%
rename from t/A01/05663-unbalanced/connexor/phrase.xml
rename to t/A00/05663-unbalanced/connexor/phrase.xml


diff --git a/t/A01/05663-unbalanced/connexor/sentences.xml b/t/A00/05663-unbalanced/connexor/sentences.xml
similarity index 100%
rename from t/A01/05663-unbalanced/connexor/sentences.xml
rename to t/A00/05663-unbalanced/connexor/sentences.xml


diff --git a/t/A01/05663-unbalanced/connexor/syntax.xml b/t/A00/05663-unbalanced/connexor/syntax.xml
similarity index 100%
rename from t/A01/05663-unbalanced/connexor/syntax.xml
rename to t/A00/05663-unbalanced/connexor/syntax.xml


diff --git a/t/A01/05663-unbalanced/connexor/tokens.xml b/t/A00/05663-unbalanced/connexor/tokens.xml
similarity index 100%
rename from t/A01/05663-unbalanced/connexor/tokens.xml
rename to t/A00/05663-unbalanced/connexor/tokens.xml


diff --git a/t/A01/05663-unbalanced/corenlp/ne_dewac_175m_600.xml b/t/A00/05663-unbalanced/corenlp/ne_dewac_175m_600.xml
similarity index 100%
rename from t/A01/05663-unbalanced/corenlp/ne_dewac_175m_600.xml
rename to t/A00/05663-unbalanced/corenlp/ne_dewac_175m_600.xml


diff --git a/t/A01/05663-unbalanced/corenlp/ne_hgc_175m_600.xml b/t/A00/05663-unbalanced/corenlp/ne_hgc_175m_600.xml
similarity index 100%
rename from t/A01/05663-unbalanced/corenlp/ne_hgc_175m_600.xml
rename to t/A00/05663-unbalanced/corenlp/ne_hgc_175m_600.xml


diff --git a/t/A01/05663-unbalanced/corenlp/sentences.xml b/t/A00/05663-unbalanced/corenlp/sentences.xml
similarity index 100%
rename from t/A01/05663-unbalanced/corenlp/sentences.xml
rename to t/A00/05663-unbalanced/corenlp/sentences.xml


diff --git a/t/A01/05663-unbalanced/corenlp/tokens.xml b/t/A00/05663-unbalanced/corenlp/tokens.xml
similarity index 100%
rename from t/A01/05663-unbalanced/corenlp/tokens.xml
rename to t/A00/05663-unbalanced/corenlp/tokens.xml


diff --git a/t/A01/05663-unbalanced/data.xml b/t/A00/05663-unbalanced/data.xml
similarity index 100%
rename from t/A01/05663-unbalanced/data.xml
rename to t/A00/05663-unbalanced/data.xml


diff --git a/t/A01/05663-unbalanced/header.xml b/t/A00/05663-unbalanced/header.xml
similarity index 100%
rename from t/A01/05663-unbalanced/header.xml
rename to t/A00/05663-unbalanced/header.xml


diff --git a/t/A01/05663-unbalanced/mate/dependency.xml b/t/A00/05663-unbalanced/mate/dependency.xml
similarity index 100%
rename from t/A01/05663-unbalanced/mate/dependency.xml
rename to t/A00/05663-unbalanced/mate/dependency.xml


diff --git a/t/A01/05663-unbalanced/mate/morpho.xml b/t/A00/05663-unbalanced/mate/morpho.xml
similarity index 100%
rename from t/A01/05663-unbalanced/mate/morpho.xml
rename to t/A00/05663-unbalanced/mate/morpho.xml


diff --git a/t/A01/05663-unbalanced/mate/pipeline/one_token_per_line.txt b/t/A00/05663-unbalanced/mate/pipeline/one_token_per_line.txt
similarity index 100%
rename from t/A01/05663-unbalanced/mate/pipeline/one_token_per_line.txt
rename to t/A00/05663-unbalanced/mate/pipeline/one_token_per_line.txt


diff --git a/t/A01/05663-unbalanced/mate/pipeline/parsed.txt b/t/A00/05663-unbalanced/mate/pipeline/parsed.txt
similarity index 100%
rename from t/A01/05663-unbalanced/mate/pipeline/parsed.txt
rename to t/A00/05663-unbalanced/mate/pipeline/parsed.txt


diff --git a/t/A01/05663-unbalanced/mate/tokenSpans/number_tokenSpans.xml b/t/A00/05663-unbalanced/mate/tokenSpans/number_tokenSpans.xml
similarity index 100%
rename from t/A01/05663-unbalanced/mate/tokenSpans/number_tokenSpans.xml
rename to t/A00/05663-unbalanced/mate/tokenSpans/number_tokenSpans.xml


diff --git a/t/A01/05663-unbalanced/opennlp/morpho.xml b/t/A00/05663-unbalanced/opennlp/morpho.xml
similarity index 100%
rename from t/A01/05663-unbalanced/opennlp/morpho.xml
rename to t/A00/05663-unbalanced/opennlp/morpho.xml


diff --git a/t/A01/05663-unbalanced/opennlp/sentences.xml b/t/A00/05663-unbalanced/opennlp/sentences.xml
similarity index 100%
rename from t/A01/05663-unbalanced/opennlp/sentences.xml
rename to t/A00/05663-unbalanced/opennlp/sentences.xml


diff --git a/t/A01/05663-unbalanced/opennlp/tokens.xml b/t/A00/05663-unbalanced/opennlp/tokens.xml
similarity index 100%
rename from t/A01/05663-unbalanced/opennlp/tokens.xml
rename to t/A00/05663-unbalanced/opennlp/tokens.xml


diff --git a/t/A01/05663-unbalanced/struct/structure.xml b/t/A00/05663-unbalanced/struct/structure.xml
similarity index 100%
rename from t/A01/05663-unbalanced/struct/structure.xml
rename to t/A00/05663-unbalanced/struct/structure.xml


diff --git a/t/A01/05663-unbalanced/text.txt b/t/A00/05663-unbalanced/text.txt
similarity index 100%
rename from t/A01/05663-unbalanced/text.txt
rename to t/A00/05663-unbalanced/text.txt


diff --git a/t/A01/05663-unbalanced/tree_tagger/metadata.xml b/t/A00/05663-unbalanced/tree_tagger/metadata.xml
similarity index 100%
rename from t/A01/05663-unbalanced/tree_tagger/metadata.xml
rename to t/A00/05663-unbalanced/tree_tagger/metadata.xml


diff --git a/t/A01/05663-unbalanced/tree_tagger/morpho.xml b/t/A00/05663-unbalanced/tree_tagger/morpho.xml
similarity index 100%
rename from t/A01/05663-unbalanced/tree_tagger/morpho.xml
rename to t/A00/05663-unbalanced/tree_tagger/morpho.xml


diff --git a/t/A01/05663-unbalanced/tree_tagger/sentences.xml b/t/A00/05663-unbalanced/tree_tagger/sentences.xml
similarity index 100%
rename from t/A01/05663-unbalanced/tree_tagger/sentences.xml
rename to t/A00/05663-unbalanced/tree_tagger/sentences.xml


diff --git a/t/A01/05663-unbalanced/tree_tagger/tokens.xml b/t/A00/05663-unbalanced/tree_tagger/tokens.xml
similarity index 100%
rename from t/A01/05663-unbalanced/tree_tagger/tokens.xml
rename to t/A00/05663-unbalanced/tree_tagger/tokens.xml


diff --git a/t/A01/05663-unbalanced/xip/constituency.xml b/t/A00/05663-unbalanced/xip/constituency.xml
similarity index 100%
rename from t/A01/05663-unbalanced/xip/constituency.xml
rename to t/A00/05663-unbalanced/xip/constituency.xml


diff --git a/t/A01/05663-unbalanced/xip/dependency.xml b/t/A00/05663-unbalanced/xip/dependency.xml
similarity index 100%
rename from t/A01/05663-unbalanced/xip/dependency.xml
rename to t/A00/05663-unbalanced/xip/dependency.xml


diff --git a/t/A01/05663-unbalanced/xip/metadata.xml b/t/A00/05663-unbalanced/xip/metadata.xml
similarity index 100%
rename from t/A01/05663-unbalanced/xip/metadata.xml
rename to t/A00/05663-unbalanced/xip/metadata.xml


diff --git a/t/A01/05663-unbalanced/xip/morpho.xml b/t/A00/05663-unbalanced/xip/morpho.xml
similarity index 100%
rename from t/A01/05663-unbalanced/xip/morpho.xml
rename to t/A00/05663-unbalanced/xip/morpho.xml


diff --git a/t/A01/05663-unbalanced/xip/sentences.xml b/t/A00/05663-unbalanced/xip/sentences.xml
similarity index 100%
rename from t/A01/05663-unbalanced/xip/sentences.xml
rename to t/A00/05663-unbalanced/xip/sentences.xml


diff --git a/t/A01/05663-unbalanced/xip/tokens.xml b/t/A00/05663-unbalanced/xip/tokens.xml
similarity index 100%
rename from t/A01/05663-unbalanced/xip/tokens.xml
rename to t/A00/05663-unbalanced/xip/tokens.xml


diff --git a/t/A01/07452-deep/base/paragraph.xml b/t/A00/07452-deep/base/paragraph.xml
similarity index 100%
rename from t/A01/07452-deep/base/paragraph.xml
rename to t/A00/07452-deep/base/paragraph.xml


diff --git a/t/A01/07452-deep/base/sentences.xml b/t/A00/07452-deep/base/sentences.xml
similarity index 100%
rename from t/A01/07452-deep/base/sentences.xml
rename to t/A00/07452-deep/base/sentences.xml


diff --git a/t/A01/07452-deep/base/tokens_aggr.xml b/t/A00/07452-deep/base/tokens_aggr.xml
similarity index 100%
rename from t/A01/07452-deep/base/tokens_aggr.xml
rename to t/A00/07452-deep/base/tokens_aggr.xml


diff --git a/t/A01/07452-deep/base/tokens_conservative.xml b/t/A00/07452-deep/base/tokens_conservative.xml
similarity index 100%
rename from t/A01/07452-deep/base/tokens_conservative.xml
rename to t/A00/07452-deep/base/tokens_conservative.xml


diff --git a/t/A01/07452-deep/connexor/metadata.xml b/t/A00/07452-deep/connexor/metadata.xml
similarity index 100%
rename from t/A01/07452-deep/connexor/metadata.xml
rename to t/A00/07452-deep/connexor/metadata.xml


diff --git a/t/A01/07452-deep/connexor/morpho.xml b/t/A00/07452-deep/connexor/morpho.xml
similarity index 100%
rename from t/A01/07452-deep/connexor/morpho.xml
rename to t/A00/07452-deep/connexor/morpho.xml


diff --git a/t/A01/07452-deep/connexor/mpt.xml b/t/A00/07452-deep/connexor/mpt.xml
similarity index 100%
rename from t/A01/07452-deep/connexor/mpt.xml
rename to t/A00/07452-deep/connexor/mpt.xml


diff --git a/t/A01/07452-deep/connexor/phrase.xml b/t/A00/07452-deep/connexor/phrase.xml
similarity index 100%
rename from t/A01/07452-deep/connexor/phrase.xml
rename to t/A00/07452-deep/connexor/phrase.xml


diff --git a/t/A01/07452-deep/connexor/sentences.xml b/t/A00/07452-deep/connexor/sentences.xml
similarity index 100%
rename from t/A01/07452-deep/connexor/sentences.xml
rename to t/A00/07452-deep/connexor/sentences.xml


diff --git a/t/A01/07452-deep/connexor/syntax.xml b/t/A00/07452-deep/connexor/syntax.xml
similarity index 100%
rename from t/A01/07452-deep/connexor/syntax.xml
rename to t/A00/07452-deep/connexor/syntax.xml


diff --git a/t/A01/07452-deep/connexor/tokens.xml b/t/A00/07452-deep/connexor/tokens.xml
similarity index 100%
rename from t/A01/07452-deep/connexor/tokens.xml
rename to t/A00/07452-deep/connexor/tokens.xml


diff --git a/t/A01/07452-deep/corenlp/ne_dewac_175m_600.xml b/t/A00/07452-deep/corenlp/ne_dewac_175m_600.xml
similarity index 100%
rename from t/A01/07452-deep/corenlp/ne_dewac_175m_600.xml
rename to t/A00/07452-deep/corenlp/ne_dewac_175m_600.xml


diff --git a/t/A01/07452-deep/corenlp/ne_hgc_175m_600.xml b/t/A00/07452-deep/corenlp/ne_hgc_175m_600.xml
similarity index 100%
rename from t/A01/07452-deep/corenlp/ne_hgc_175m_600.xml
rename to t/A00/07452-deep/corenlp/ne_hgc_175m_600.xml


diff --git a/t/A01/07452-deep/corenlp/sentences.xml b/t/A00/07452-deep/corenlp/sentences.xml
similarity index 100%
rename from t/A01/07452-deep/corenlp/sentences.xml
rename to t/A00/07452-deep/corenlp/sentences.xml


diff --git a/t/A01/07452-deep/corenlp/tokens.xml b/t/A00/07452-deep/corenlp/tokens.xml
similarity index 100%
rename from t/A01/07452-deep/corenlp/tokens.xml
rename to t/A00/07452-deep/corenlp/tokens.xml


diff --git a/t/A01/07452-deep/data.xml b/t/A00/07452-deep/data.xml
similarity index 100%
rename from t/A01/07452-deep/data.xml
rename to t/A00/07452-deep/data.xml


diff --git a/t/A01/07452-deep/header.xml b/t/A00/07452-deep/header.xml
similarity index 100%
rename from t/A01/07452-deep/header.xml
rename to t/A00/07452-deep/header.xml


diff --git a/t/A01/07452-deep/mate/dependency.xml b/t/A00/07452-deep/mate/dependency.xml
similarity index 100%
rename from t/A01/07452-deep/mate/dependency.xml
rename to t/A00/07452-deep/mate/dependency.xml


diff --git a/t/A01/07452-deep/mate/morpho.xml b/t/A00/07452-deep/mate/morpho.xml
similarity index 100%
rename from t/A01/07452-deep/mate/morpho.xml
rename to t/A00/07452-deep/mate/morpho.xml


diff --git a/t/A01/07452-deep/mate/pipeline/one_token_per_line.txt b/t/A00/07452-deep/mate/pipeline/one_token_per_line.txt
similarity index 100%
rename from t/A01/07452-deep/mate/pipeline/one_token_per_line.txt
rename to t/A00/07452-deep/mate/pipeline/one_token_per_line.txt


diff --git a/t/A01/07452-deep/mate/pipeline/parsed.txt b/t/A00/07452-deep/mate/pipeline/parsed.txt
similarity index 100%
rename from t/A01/07452-deep/mate/pipeline/parsed.txt
rename to t/A00/07452-deep/mate/pipeline/parsed.txt


diff --git a/t/A01/07452-deep/mate/tokenSpans/number_tokenSpans.xml b/t/A00/07452-deep/mate/tokenSpans/number_tokenSpans.xml
similarity index 100%
rename from t/A01/07452-deep/mate/tokenSpans/number_tokenSpans.xml
rename to t/A00/07452-deep/mate/tokenSpans/number_tokenSpans.xml


diff --git a/t/A01/07452-deep/opennlp/morpho.xml b/t/A00/07452-deep/opennlp/morpho.xml
similarity index 100%
rename from t/A01/07452-deep/opennlp/morpho.xml
rename to t/A00/07452-deep/opennlp/morpho.xml


diff --git a/t/A01/07452-deep/opennlp/sentences.xml b/t/A00/07452-deep/opennlp/sentences.xml
similarity index 100%
rename from t/A01/07452-deep/opennlp/sentences.xml
rename to t/A00/07452-deep/opennlp/sentences.xml


diff --git a/t/A01/07452-deep/opennlp/tokens.xml b/t/A00/07452-deep/opennlp/tokens.xml
similarity index 100%
rename from t/A01/07452-deep/opennlp/tokens.xml
rename to t/A00/07452-deep/opennlp/tokens.xml


diff --git a/t/A01/07452-deep/struct/structure.xml b/t/A00/07452-deep/struct/structure.xml
similarity index 100%
rename from t/A01/07452-deep/struct/structure.xml
rename to t/A00/07452-deep/struct/structure.xml


diff --git a/t/A01/07452-deep/text.txt b/t/A00/07452-deep/text.txt
similarity index 100%
rename from t/A01/07452-deep/text.txt
rename to t/A00/07452-deep/text.txt


diff --git a/t/A01/07452-deep/tree_tagger/metadata.xml b/t/A00/07452-deep/tree_tagger/metadata.xml
similarity index 100%
rename from t/A01/07452-deep/tree_tagger/metadata.xml
rename to t/A00/07452-deep/tree_tagger/metadata.xml


diff --git a/t/A01/07452-deep/tree_tagger/morpho.xml b/t/A00/07452-deep/tree_tagger/morpho.xml
similarity index 100%
rename from t/A01/07452-deep/tree_tagger/morpho.xml
rename to t/A00/07452-deep/tree_tagger/morpho.xml


diff --git a/t/A01/07452-deep/tree_tagger/sentences.xml b/t/A00/07452-deep/tree_tagger/sentences.xml
similarity index 100%
rename from t/A01/07452-deep/tree_tagger/sentences.xml
rename to t/A00/07452-deep/tree_tagger/sentences.xml


diff --git a/t/A01/07452-deep/tree_tagger/tokens.xml b/t/A00/07452-deep/tree_tagger/tokens.xml
similarity index 100%
rename from t/A01/07452-deep/tree_tagger/tokens.xml
rename to t/A00/07452-deep/tree_tagger/tokens.xml


diff --git a/t/A01/07452-deep/xip/constituency.xml b/t/A00/07452-deep/xip/constituency.xml
similarity index 100%
rename from t/A01/07452-deep/xip/constituency.xml
rename to t/A00/07452-deep/xip/constituency.xml


diff --git a/t/A01/07452-deep/xip/dependency.xml b/t/A00/07452-deep/xip/dependency.xml
similarity index 100%
rename from t/A01/07452-deep/xip/dependency.xml
rename to t/A00/07452-deep/xip/dependency.xml


diff --git a/t/A01/07452-deep/xip/metadata.xml b/t/A00/07452-deep/xip/metadata.xml
similarity index 100%
rename from t/A01/07452-deep/xip/metadata.xml
rename to t/A00/07452-deep/xip/metadata.xml


diff --git a/t/A01/07452-deep/xip/morpho.xml b/t/A00/07452-deep/xip/morpho.xml
similarity index 100%
rename from t/A01/07452-deep/xip/morpho.xml
rename to t/A00/07452-deep/xip/morpho.xml


diff --git a/t/A01/07452-deep/xip/sentences.xml b/t/A00/07452-deep/xip/sentences.xml
similarity index 100%
rename from t/A01/07452-deep/xip/sentences.xml
rename to t/A00/07452-deep/xip/sentences.xml


diff --git a/t/A01/07452-deep/xip/tokens.xml b/t/A00/07452-deep/xip/tokens.xml
similarity index 100%
rename from t/A01/07452-deep/xip/tokens.xml
rename to t/A00/07452-deep/xip/tokens.xml


diff --git a/t/VDI/JAN/00001/data.xml b/t/VDI/JAN/00001/data.xml
index 21fd76f..545f020 100644
--- a/t/VDI/JAN/00001/data.xml
+++ b/t/VDI/JAN/00001/data.xml

@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <?xml-model href="text.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
 
-<raw_text docid="VDI_JAN.00001" xmlns="http://ids-mannheim.de/ns/KorAP">
+<raw_text docid="VDI14_JAN.00001" xmlns="http://ids-mannheim.de/ns/KorAP">
   <metadata file="metadata.xml" />
   <text>hui</text>
 </raw_text>

diff --git a/t/artificial-subtoken.t b/t/artificial-subtoken.t
deleted file mode 100644
index 7a30103..0000000
--- a/t/artificial-subtoken.t
+++ /dev/null

@@ -1,65 +0,0 @@
-#!/usr/bin/env perl
-# source ~/perl5/perlbrew/etc/bashrc
-# perlbrew switch perl-blead@korap
-use strict;
-use warnings;
-use utf8;
-use Test::More;
-use Benchmark ':hireswallclock';
-use lib 'lib', '../lib';
-use Scalar::Util qw/weaken/;
-
-use File::Basename 'dirname';
-use File::Spec::Functions 'catdir';
-
-use_ok('KorAP::Document');
-
-my $path = catdir(dirname(__FILE__), 'artificial');
-ok(my $doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
-like($doc->path, qr!$path/$!, 'Path');
-ok($doc->parse, 'Parse document');
-
-sub new_tokenizer {
-  my $x = $doc;
-  weaken $x;
-  return KorAP::Tokenizer->new(
-    path => $x->path,
-    doc => $x,
-    foundry => 'OpenNLP',
-    layer => 'Tokens',
-    name => 'tokens'
-  )
-};
-
-is($doc->primary->data,
-   'Zum letzten kulturellen Anlass lädt die Leitung des Schulheimes Hofbergli ein, '.
-     'bevor der Betrieb Ende Schuljahr eingestellt wird.', 'Primary data');
-
-is($doc->primary->data_length, 129, 'Primary data length');
-
-is($doc->primary->data(0,3), 'Zum', 'Get primary data');
-
-# Get tokens
-use_ok('KorAP::Tokenizer');
-# Get tokenization
-ok(my $tokens = KorAP::Tokenizer->new(
-  path => $doc->path,
-  doc => $doc,
-  foundry => 'OpenNLP',
-  layer => 'Tokens',
-  name => 'tokens'
-), 'New Tokenizer');
-ok($tokens->parse, 'Parse');
-
-ok($tokens->add_subtokens, 'Add subtokens');
-
-# diag $tokens->to_string;
-
-#foreach (@{$tokens->stream->multi_term_tokens}) {
-#  print $_;
-#};
-
-done_testing;
-
-
-__END__

diff --git a/t/artificial.t b/t/artificial.t
deleted file mode 100644
index 95ef890..0000000
--- a/t/artificial.t
+++ /dev/null

@@ -1,452 +0,0 @@
-#!/usr/bin/env perl
-# source ~/perl5/perlbrew/etc/bashrc
-# perlbrew switch perl-blead@korap
-use strict;
-use warnings;
-use utf8;
-use Test::More;
-use Benchmark ':hireswallclock';
-use lib 'lib', '../lib';
-use Scalar::Util qw/weaken/;
-
-use File::Basename 'dirname';
-use File::Spec::Functions 'catdir';
-
-use_ok('KorAP::Document');
-
-# Tests for material identicality of a token
-sub _t2h {
-  my $string = shift;
-  $string =~ s/^\[\(\d+?-\d+?\)(.+?)\]$/$1/;
-  my %hash = ();
-  foreach (split(qr!\|!, $string)) {
-    $hash{$_} = 1;
-  };
-  return \%hash;
-};
-
-
-my $path = catdir(dirname(__FILE__), 'artificial');
-ok(my $doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
-like($doc->path, qr!$path/$!, 'Path');
-ok($doc->parse, 'Parse document');
-
-sub new_tokenizer {
-  my $x = $doc;
-  weaken $x;
-  return KorAP::Tokenizer->new(
-    path => $x->path,
-    doc => $x,
-    foundry => 'OpenNLP',
-    layer => 'Tokens',
-    name => 'tokens'
-  )
-};
-
-is($doc->primary->data,
-   'Zum letzten kulturellen Anlass lädt die Leitung des Schulheimes Hofbergli ein, '.
-     'bevor der Betrieb Ende Schuljahr eingestellt wird.', 'Primary data');
-
-is($doc->primary->data_length, 129, 'Primary data length');
-
-is($doc->primary->data(0,3), 'Zum', 'Get primary data');
-
-# Get tokens
-use_ok('KorAP::Tokenizer');
-# Get tokenization
-ok(my $tokens = KorAP::Tokenizer->new(
-  path => $doc->path,
-  doc => $doc,
-  foundry => 'OpenNLP',
-  layer => 'Tokens',
-  name => 'tokens'
-), 'New Tokenizer');
-ok($tokens->parse, 'Parse');
-
-is($tokens->foundry, 'OpenNLP', 'Foundry');
-
-is($tokens->doc->text_sigle, 'ART_ABC.00001', 'Doc id');
-is($tokens->should, 20, 'Should');
-is($tokens->have, 18, 'Have');
-is($tokens->name, 'tokens', 'Name');
-is($tokens->layer, 'Tokens', 'Layer');
-
-is($tokens->stream->pos(0)->to_string, '[(0-3)-:tokens$<i>18|_0#0-3|i:zum|s:Zum]', 'Token is correct');
-
-is($tokens->stream->pos(1)->to_string, '[(4-11)_1#4-11|i:letzten|s:letzten]', 'Token is correct');
-
-my $i = 2;
-foreach ([12,23, 'kulturellen'],
-	 [24,30, 'Anlass'],
-	 [31,35, 'lädt'],
-	 [36,39, 'die'],
-	 [40,47, 'Leitung'],
-	 [48,51, 'des'],
-	 [52,63, 'Schulheimes'],
-	 [64,73, 'Hofbergli'],
-	 [74,77, 'ein'],
-	 [79,84, 'bevor'],
-	 [85,88, 'der'],
-	 [89,96, 'Betrieb'],
-	 [97,101, 'Ende'],
-	 [102,111, 'Schuljahr'],
-	 [112,123, 'eingestellt'],
-	 [124,128, 'wird']
-       ) {
-  is($tokens->stream->pos($i++)->to_string,
-     '[('.$_->[0].'-'.$_->[1].')'.
-       '_'.($i-1).'#'.$_->[0].'-'.$_->[1] . '|' .
-	 'i:'.lc($_->[2]).'|s:'.$_->[2].']',
-     'Token is correct');
-};
-
-ok(!$tokens->stream->pos($i++), 'No more tokens');
-
-# Add OpenNLP/morpho
-ok($tokens->add('OpenNLP', 'Morpho'), 'Add OpenNLP/Morpho');
-
-$i = 0;
-foreach (qw/APPRART ADJA ADJA NN VVFIN ART NN ART NN NE PTKVZ KOUS ART NN NN NN VVPP VAFIN/) {
-  like($tokens->stream->pos($i++)->to_string,
-       qr!\|opennlp/p:$_!,
-       'Annotation (OpenNLP/p) is correct: ' . $_
-     );
-};
-
-# Add OpenNLP/sentences
-ok($tokens->add('OpenNLP', 'Sentences'), 'Add OpenNLP/Sentences');
-
-is($tokens->stream->pos(0)->to_string,
-   '[(0-3)-:opennlp/sentences$<i>1|-:tokens$<i>18|<>:opennlp/s:s#0-129$<i>17<b>0|_0#0-3|i:zum|opennlp/p:APPRART|s:Zum]',
-   #   '[(0-3)-:opennlp/sentences$<i>1|-:tokens$<i>18|_0#0-3|i:zum|s:Zum|opennlp/p:APPRART|<>:opennlp/s:s#0-129$<i>17]',
-   'Correct sentence'
- );
-
-# New instantiation
-ok($tokens = KorAP::Tokenizer->new(
-  path => $doc->path,
-  doc => $doc,
-  foundry => 'OpenNLP',
-  layer => 'Tokens',
-  name => 'tokens'
-), 'New Tokenizer');
-
-ok($tokens->parse, 'Parse');
-
-# Add OpenNLP/sentences
-ok($tokens->add('Base', 'Sentences'), 'Add Base/Sentences');
-
-# Add OpenNLP/sentences
-ok($tokens->add('Base', 'Paragraphs'), 'Add Base/Paragraphs');
-
-is_deeply(
-  _t2h($tokens->stream->pos(0)->to_string),
-  _t2h('[(0-3)-:base/paragraphs$<i>1|-:base/sentences$<i>1|-:tokens$<i>18|<>:base/s:t#0-129$<i>17<b>0|<>:base/s:p#0-129$<i>17<b>1|<>:base/s:s#0-129$<i>17<b>2|_0#0-3|i:zum|s:Zum]'),
-   'Correct base annotation');
-
-# New instantiation
-ok($tokens = new_tokenizer->parse, 'Parse');
-
-# Add CoreNLP/NamedEntities
-ok($tokens->add('CoreNLP', 'NamedEntities', 'ne_dewac_175m_600'), 'Add CoreNLP/NamedEntities');
-ok($tokens->add('CoreNLP', 'NamedEntities', 'ne_hgc_175m_600'), 'Add CoreNLP/NamedEntities');
-
-# [(64-73)s:Hofbergli|i:hofbergli|_9#64-73|corenlp/ne_dewac_175m_600:I-LOC|corenlp/ne_hgc_175m_600:I-LOC]
-is_deeply(
-  _t2h($tokens->stream->pos(9)->to_string),
-  _t2h('[(64-73)_9#64-73|corenlp/ne:I-LOC|i:hofbergli|s:Hofbergli]'),
-  'Correct NamedEntities annotation'
-);
-
-# New instantiation
-ok($tokens = new_tokenizer->parse, 'Parse');
-
-# Add CoreNLP/Morpho
-ok($tokens->add('CoreNLP', 'Morpho'), 'Add CoreNLP/Morpho');
-
-is_deeply(
-  _t2h($tokens->stream->pos(0)->to_string),
-  _t2h('[(0-3)-:tokens$<i>18|_0#0-3|corenlp/p:APPRART|i:zum|s:Zum]'),
-  'Correct corenlp annotation'
-);
-
-$i = 0;
-foreach (qw/APPRART ADJ ADJA NN VVFIN ART NN ART NN NE PTKVZ KOUS ART NN NN NN VVPP VAFIN/) {
-  like($tokens->stream->pos($i++)->to_string,
-       qr!\|corenlp/p:$_!,
-       'Annotation (CoreNLP/p) is correct: '. $_);
-};
-
-
-# Add CoreNLP/Sentences
-ok($tokens->add('CoreNLP', 'Sentences'), 'Add CoreNLP/Sentences');
-
-is_deeply(
-  _t2h($tokens->stream->pos(0)->to_string),
-  _t2h('[(0-3)-:corenlp/sentences$<i>1|-:tokens$<i>18|<>:corenlp/s:s#0-129$<i>17<b>0|_0#0-3|corenlp/p:APPRART|i:zum|s:Zum]'),
-  #   '[(0-3)-:corenlp/sentences$<i>1|-:tokens$<i>18|_0#0-3|i:zum|s:Zum|corenlp/p:APPRART|<>:corenlp/s:s#0-129$<i>17]',
-  'Correct corenlp annotation'
-);
-
-# New instantiation
-ok($tokens = new_tokenizer->parse, 'New Tokenizer');
-
-# Add CoreNLP/Sentences
-ok($tokens->add('Connexor', 'Sentences'), 'Add Connexor/Sentences');
-
-is_deeply(
-  _t2h($tokens->stream->pos(0)->to_string),
-  _t2h('[(0-3)-:cnx/sentences$<i>1|-:tokens$<i>18|<>:cnx/s:s#0-129$<i>17<b>0|_0#0-3|i:zum|s:Zum]'),
-  #   '[(0-3)-:cnx/sentences$<i>1|-:tokens$<i>18|_0#0-3|i:zum|s:Zum|<>:cnx/s:s#0-129$<i>17<b>0]',
-  'Correct cnx annotation'
-);
-
-# New instantiation
-ok($tokens = new_tokenizer->parse, 'New Tokenizer');
-
-# Add Connexor/Morpho
-ok($tokens->add('Connexor', 'Morpho'), 'Add Connexor/Morpho');
-
-$i = 0;
-foreach (qw/! A A N V DET N DET N N NUM CS DET N N N V V/) {
-  if ($_ eq '!') {
-    $i++;
-    next;
-  };
-  like($tokens->stream->pos($i++)->to_string,
-       qr!\|cnx/p:$_!,
-       'Annotation (Connexor/p) is correct: ' . $_);
-};
-
-
-$i = 0;
-foreach (qw/! ! ! ! IND:PRES ! ! ! ! Prop ! ! ! ! ! ! PCP:PERF IND:PRES/) {
-  if ($_ eq '!') {
-    $i++;
-    next;
-  };
-  foreach my $f (split(':', $_)) {
-    like($tokens->stream->pos($i)->to_string,
-	 qr!\|cnx/m:$f!,
-	 'Annotation (Connexor/m) is correct: '. $f);
-  };
-  $i++;
-};
-
-# New instantiation
-ok($tokens = new_tokenizer->parse, 'New Tokenizer');
-
-# Add Connexor/Phrase
-ok($tokens->add('Connexor', 'Phrase'), 'Add Connexor/Phrase');
-my $stream = $tokens->stream;
-like($stream->pos(1)->to_string, qr!<>:cnx/c:np#4-30\$<i>4<b>0!, 'Annotation (Connexor/c) is correct');
-like($stream->pos(6)->to_string, qr!<>:cnx/c:np#40-47\$<i>7<b>0!, 'Annotation (Connexor/c) is correct');
-like($stream->pos(8)->to_string, qr!<>:cnx/c:np#52-73\$<i>10<b>0!, 'Annotation (Connexor/c) is correct');
-like($stream->pos(13)->to_string, qr!<>:cnx/c:np#89-111\$<i>16<b>0!, 'Annotation (Connexor/c) is correct');
-
-# New instantiation
-ok($tokens = new_tokenizer->parse, 'New Tokenizer');
-
-# Add Connexor/Syntax
-ok($tokens->add('Connexor', 'Syntax'), 'Add Connexor/Syntax');
-$stream = $tokens->stream;
-
-$i = 0;
-foreach (qw/! @PREMOD @PREMOD @NH @MAIN @PREMOD @NH @PREMOD
-	    @PREMOD @NH @NH @PREMARK @PREMOD @PREMOD @NH @NH @MAIN @AUX/) {
-  if ($_ eq '!') {
-    $i++;
-    next;
-  };
-  like($tokens->stream->pos($i++)->to_string,
-       qr!\|cnx/syn:$_!,
-       'Annotation (Connexor/syn) is correct: ' . $_);
-};
-
-# New instantiation
-ok($tokens = new_tokenizer->parse, 'New Tokenizer');
-
-# Add XIP/Sentences
-ok($tokens->add('XIP', 'Sentences'), 'Add XIP/Sentences');
-
-is_deeply(
-  _t2h($tokens->stream->pos(0)->to_string),
-  _t2h('[(0-3)-:tokens$<i>18|-:xip/sentences$<i>1|<>:xip/s:s#0-129$<i>17<b>0|_0#0-3|i:zum|s:Zum]'),
-  #   '[(0-3)-:tokens$<i>18|_0#0-3|i:zum|s:Zum|-:xip/sentences$<i>1|<>:xip/s:s#0-129$<i>17<b>0]',
-  'First sentence'
-);
-
-# Add XIP/Morpho
-ok($tokens->add('XIP', 'Morpho'), 'Add XIP/Morpho');
-$stream = $tokens->stream;
-
-$i = 0;
-foreach (qw/PREP ADJ ADJ NOUN VERB DET NOUN DET NOUN NOUN PTCL CONJ DET NOUN NOUN NOUN VERB VERB/) {
-  if ($_ eq '!') {
-    $i++;
-    next;
-  };
-  like($tokens->stream->pos($i++)->to_string,
-       qr!\|xip/p:$_!,
-       'Annotation (xip/p) is correct: ' . $_);
-};
-
-$i = 0;
-foreach ('zu', 'letzt', 'kulturell', 'Anlass', '=laden:laden', 'die', 'Leitung', 'der', '\#schulen:\#Heim:schulen\#Heim', 'Hofbergli', 'ein', 'bevor', 'der', 'Betrieb', 'Ende', '\#schulen:\#Jahr:schulen\#Jahr') {
-  if ($_ eq '!') {
-    $i++;
-    next;
-  };
-  foreach my $f (split(':', $_)) {
-    like($tokens->stream->pos($i)->to_string,
-	 qr!\|xip\/l:\Q$f\E!,
-	 'Annotation (xip/l) is correct: ' . $f);
-  };
-  $i++;
-};
-
-# New instantiation
-ok($tokens = new_tokenizer->parse, 'New Tokenizer');
-
-# Add XIP/Sentences
-ok($tokens->add('XIP', 'Dependency'), 'Add XIP/Dependency');
-
-$stream = $tokens->stream;
-diag $stream->pos(1)->to_string;
-
-like($stream->pos(1)->to_string, qr![^<]>:xip/d:NMOD\$<i>3!, 'Dependency fine');
-like($stream->pos(3)->to_string, qr![^<]<:xip/d:NMOD\$<i>1!, 'Dependency fine');
-
-done_testing;
-__END__
-
-
-like($stream->pos(3)->to_string, qr!\|<:xip/d:NMOD\$<i>2!, 'Dependency fine');
-like($stream->pos(4)->to_string, qr!\|>xip/d:VMAIN\$<i>4!, 'Dependency fine');
-like($stream->pos(4)->to_string, qr!\|<:xip/d:SUBJ\$<i>6!, 'Dependency fine');
-like($stream->pos(4)->to_string, qr!\|<:xip/d:VPREF\$<i>10!, 'Dependency fine');
-like($stream->pos(5)->to_string, qr!\|>:xip/d:DETERM\$<i>6!, 'Dependency fine');
-like($stream->pos(6)->to_string, qr!\|<:xip/d:DETERM\$<i>5!, 'Dependency fine');
-like($stream->pos(6)->to_string, qr!\|>:xip/d:SUBJ\$<i>4!, 'Dependency fine');
-like($stream->pos(6)->to_string, qr!\|<:xip/d:NMOD\$<i>8!, 'Dependency fine');
-like($stream->pos(7)->to_string, qr!\|>:xip/d:DETERM\$<i>8!, 'Dependency fine');
-like($stream->pos(8)->to_string, qr!\|<:xip/d:DETERM\$<i>7!, 'Dependency fine');
-like($stream->pos(8)->to_string, qr!\|>:xip/d:NMOD\$<i>6!, 'Dependency fine');
-like($stream->pos(8)->to_string, qr!\|<:xip/d:NMOD\$<i>9!, 'Dependency fine');
-like($stream->pos(9)->to_string, qr!\|>:xip/d:NMOD\$<i>8!, 'Dependency fine');
-like($stream->pos(10)->to_string, qr!\|>:xip/d:VPREF\$<i>4!, 'Dependency fine');
-like($stream->pos(11)->to_string, qr!\|>:xip/d:CONNECT\$<i>16!, 'Dependency fine');
-like($stream->pos(12)->to_string, qr!\|>:xip/d:DETERM\$<i>13!, 'Dependency fine');
-like($stream->pos(13)->to_string, qr!\|<:xip/d:DETERM\$<i>12!, 'Dependency fine');
-like($stream->pos(13)->to_string, qr!\|>:xip/d:SUBJ\$<i>16!, 'Dependency fine');
-like($stream->pos(14)->to_string, qr!\|>:xip/d:OBJ\$<i>16!, 'Dependency fine');
-like($stream->pos(15)->to_string, qr!\|>:xip/d:OBJ\$<i>16!, 'Dependency fine');
-like($stream->pos(16)->to_string, qr!\|<:xip/d:CONNECT\$<i>11!, 'Dependency fine');
-like($stream->pos(16)->to_string, qr!\|<:xip/d:SUBJ\$<i>13!, 'Dependency fine');
-like($stream->pos(16)->to_string, qr!\|<:xip/d:OBJ\$<i>14!, 'Dependency fine');
-like($stream->pos(16)->to_string, qr!\|<:xip/d:OBJ\$<i>15!, 'Dependency fine');
-like($stream->pos(16)->to_string, qr!\|>:xip/d:AUXIL\$<i>17!, 'Dependency fine');
-like($stream->pos(16)->to_string, qr!\|>xip/d:VMAIN\$<i>16!, 'Dependency fine');
-like($stream->pos(16)->to_string, qr!\|<xip/d:VMAIN\$<i>16!, 'Dependency fine');
-like($stream->pos(17)->to_string, qr!\|<:xip/d:AUXIL\$<i>16!, 'Dependency fine');
-
-# New instantiation
-ok($tokens = new_tokenizer->parse, 'New Tokenizer');
-
-# Add XIP/Sentences
-ok($tokens->add('XIP', 'Constituency'), 'Add XIP/Constituency');
-
-$stream = $tokens->stream;
-like($stream->pos(0)->to_string, qr!\|<>:xip/c:TOP#0-129\$<i>17!, 'Constituency fine');
-like($stream->pos(0)->to_string, qr!\|<>:xip/c:MC#0-129\$<i>17<b>1!, 'Constituency fine');
-like($stream->pos(0)->to_string, qr!\|<>:xip/c:PP#0-30\$<i>4<b>2!, 'Constituency fine');
-like($stream->pos(0)->to_string, qr!\|<>:xip/c:PREP#0-3\$<i>1!, 'Constituency fine');
-
-like($stream->pos(1)->to_string, qr!\|<>:xip/c:NP#4-30\$<i>4<b>3!, 'Constituency fine');
-like($stream->pos(1)->to_string, qr!\|<>:xip/c:NPA#4-30\$<i>4<b>4!, 'Constituency fine');
-like($stream->pos(1)->to_string, qr!\|<>:xip/c:AP#4-11\$<i>2<b>5!, 'Constituency fine');
-like($stream->pos(1)->to_string, qr!\|<>:xip/c:ADJ#4-11\$<i>2<b>6!, 'Constituency fine');
-
-like($stream->pos(2)->to_string, qr!\|<>:xip/c:AP#12-23\$<i>3<b>5!, 'Constituency fine');
-like($stream->pos(2)->to_string, qr!\|<>:xip/c:ADJ#12-23\$<i>3<b>6!, 'Constituency fine');
-
-like($stream->pos(3)->to_string, qr!\|<>:xip/c:NOUN#24-30\$<i>4<b>5!, 'Constituency fine');
-
-like($stream->pos(4)->to_string, qr!\|<>:xip/c:VERB#31-35\$<i>5<b>2!, 'Constituency fine');
-
-like($stream->pos(5)->to_string, qr!\|<>:xip/c:NP#36-47\$<i>7<b>2!, 'Constituency fine');
-like($stream->pos(5)->to_string, qr!\|<>:xip/c:DET#36-39\$<i>6<b>3!, 'Constituency fine');
-
-like($stream->pos(6)->to_string, qr!\|<>:xip/c:NPA#40-47\$<i>7<b>3!, 'Constituency fine');
-like($stream->pos(6)->to_string, qr!\|<>:xip/c:NOUN#40-47\$<i>7<b>4!, 'Constituency fine');
-
-like($stream->pos(7)->to_string, qr!\|<>:xip/c:NP#48-63\$<i>9<b>2!, 'Constituency fine');
-like($stream->pos(7)->to_string, qr!\|<>:xip/c:DET#48-51\$<i>8<b>3!, 'Constituency fine');
-
-like($stream->pos(8)->to_string, qr!\|<>:xip/c:NPA#52-63\$<i>9<b>3!, 'Constituency fine');
-like($stream->pos(8)->to_string, qr!\|<>:xip/c:NOUN#52-63\$<i>9<b>4!, 'Constituency fine');
-
-like($stream->pos(9)->to_string, qr!\|<>:xip/c:NP#64-73\$<i>10<b>2!, 'Constituency fine');
-like($stream->pos(9)->to_string, qr!\|<>:xip/c:NPA#64-73\$<i>10<b>3!, 'Constituency fine');
-like($stream->pos(9)->to_string, qr!\|<>:xip/c:NOUN#64-73\$<i>10<b>4!, 'Constituency fine');
-
-like($stream->pos(10)->to_string, qr!\|<>:xip/c:PTCL#74-77\$<i>11<b>2!, 'Constituency fine');
-
-like($stream->pos(11)->to_string, qr!\|<>:xip/c:SC#79-128\$<i>18!, 'Constituency fine');
-like($stream->pos(11)->to_string, qr!\|<>:xip/c:CONJ#79-84\$<i>12<b>1!, 'Constituency fine');
-
-like($stream->pos(12)->to_string, qr!\|<>:xip/c:NP#85-96\$<i>14<b>1!, 'Constituency fine');
-like($stream->pos(12)->to_string, qr!\|<>:xip/c:DET#85-88\$<i>13<b>2!, 'Constituency fine');
-
-
-like($stream->pos(13)->to_string, qr!\|<>:xip/c:NPA#89-96\$<i>14<b>2!, 'Constituency fine');
-like($stream->pos(13)->to_string, qr!\|<>:xip/c:NOUN#89-96\$<i>14<b>3!, 'Constituency fine');
-
-like($stream->pos(14)->to_string, qr!\|<>:xip/c:NP#97-101\$<i>15<b>1!, 'Constituency fine');
-like($stream->pos(14)->to_string, qr!\|<>:xip/c:NPA#97-101\$<i>15<b>2!, 'Constituency fine');
-like($stream->pos(14)->to_string, qr!\|<>:xip/c:NOUN#97-101\$<i>15<b>3!, 'Constituency fine');
-
-like($stream->pos(15)->to_string, qr!\|<>:xip/c:NP#102-111\$<i>16<b>1!, 'Constituency fine');
-like($stream->pos(15)->to_string, qr!\|<>:xip/c:NPA#102-111\$<i>16<b>2!, 'Constituency fine');
-like($stream->pos(15)->to_string, qr!\|<>:xip/c:NOUN#102-111\$<i>16<b>3!, 'Constituency fine');
-
-like($stream->pos(16)->to_string, qr!\|<>:xip/c:VERB#112-123\$<i>17<b>1!, 'Constituency fine');
-
-like($stream->pos(17)->to_string, qr!\|<>:xip/c:VERB#124-128\$<i>18<b>1!, 'Constituency fine');
-
-# diag $stream->to_string;
-
-
-# ADJA ADJA NN VVFIN ART NN ART NN NE PTKVZ KOUS ART NN NN NN VVPP VAFIN
-done_testing;
-__END__
-
-
-# Todo: CoreNLP/Constituency!
-
-
-
-
-
-# Connexor
-push(@layers, ['Connexor', 'Morpho']);
-push(@layers, ['Connexor', 'Syntax']);
-push(@layers, ['Connexor', 'Phrase']);
-push(@layers, ['Connexor', 'Sentences']);
-
-# TreeTagger
-push(@layers, ['TreeTagger', 'Morpho']);
-push(@layers, ['TreeTagger', 'Sentences']);
-
-# Mate
-# push(@layers, ['Mate', 'Morpho']);
-push(@layers, ['Mate', 'Dependency']);
-
-# XIP
-push(@layers, ['XIP', 'Morpho']);
-push(@layers, ['XIP', 'Constituency']);
-push(@layers, ['XIP', 'Dependency']);
-push(@layers, ['XIP', 'Sentences']);
-
-
-__END__

diff --git a/t/artificial/header.xml b/t/artificial/header.xml
index 950e202..589e75e 100644
--- a/t/artificial/header.xml
+++ b/t/artificial/header.xml

@@ -36,7 +36,7 @@
             <pubDate type="year">2001</pubDate>
             <pubDate type="month">04</pubDate>
             <pubDate type="day">02</pubDate>
-	    <pubPlace>Mannheim</pubPlace>
+	    <pubPlace key="DE">Mannheim</pubPlace>
           </imprint>
           <biblScope type="issue"/>
           <biblScope type="issueplace"/>

diff --git a/t/index/corpus/doc/0001/header.xml b/t/index/corpus/doc/0001/header.xml
index fb770f7..dd5c085 100644
--- a/t/index/corpus/doc/0001/header.xml
+++ b/t/index/corpus/doc/0001/header.xml

@@ -19,7 +19,7 @@
           <h.title type="main">Beispiel Text</h.title>
 	  <h.title type="sub">Beispiel Text Untertitel</h.title>
           <h.author>Mustermann, Max</h.author>
-	  <editor>Monkika Mustermann</editor>
+	  <editor>Monika Mustermann</editor>
           <imprint/>
           <biblScope type="pp"/>
           <biblScope type="suppl"/>

diff --git a/t/index/meta.t b/t/index/meta.t
new file mode 100644
index 0000000..dadcb4c
--- /dev/null
+++ b/t/index/meta.t

@@ -0,0 +1,58 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use utf8;
+use Test::More;
+use Scalar::Util qw/weaken/;
+use Data::Dumper;
+use lib 't/index';
+use TestInit;
+use File::Basename 'dirname';
+use File::Spec::Functions 'catdir';
+
+
+my $path = catdir(dirname(__FILE__), 'corpus', 'doc', '0001');
+
+ok(my $doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok($doc->parse, 'Parse document');
+like($doc->path, qr!$path/!, 'Path');
+
+# Metdata
+is($doc->text_sigle, 'Corpus_Doc.0001', 'ID-text');
+is($doc->doc_sigle, 'Corpus_Doc', 'ID-doc');
+is($doc->corpus_sigle, 'Corpus', 'ID-corpus');
+
+is($doc->title, 'Beispiel Text', 'title');
+is($doc->sub_title, 'Beispiel Text Untertitel', 'title');
+is($doc->pub_date, '20010402', 'Publication date');
+is($doc->pub_place, 'Mannheim', 'Publication place');
+is($doc->author, 'Mustermann, Max', 'Author');
+
+is($doc->publisher, 'Artificial articles Inc.', 'Publisher');
+is($doc->editor, 'Monika Mustermann', 'Editor');
+is($doc->text_type, 'Zeitung: Tageszeitung', 'Text Type');
+is($doc->text_type_art, 'Bericht', 'Text Type Art');
+is($doc->text_type_ref, 'Aphorismen', 'Text Type Ref');
+ok(!$doc->text_column, 'Text Column');
+ok(!$doc->text_domain, 'Text Domain');
+is($doc->creation_date, '19990601', 'Creation Date');
+ok(!$doc->license, 'License');
+ok(!$doc->pages, 'Pages');
+ok(!$doc->file_edition_statement, 'File Edition Statement');
+ok(!$doc->bibl_edition_statement, 'Bibl Edition Statement');
+ok(!$doc->reference, 'Reference');
+is($doc->language, 'de', 'Language');
+
+is($doc->doc_title, 'Beispiel Dokument', 'Doc: title');
+ok(!$doc->doc_sub_title, 'Doc: subtitle');
+ok(!$doc->doc_editor, 'Doc: editor');
+ok(!$doc->doc_author, 'Doc: author');
+
+is($doc->corpus_title, 'Beispiel-Corpus', 'Corpus: title');
+ok(!$doc->corpus_sub_title, 'Corpus: subtitle');
+ok(!$doc->corpus_editor, 'Corpus: editor');
+ok(!$doc->corpus_author, 'Corpus: author');
+
+done_testing;
+
+__END__

diff --git a/t/index/opennlp_morpho.t b/t/index/opennlp_morpho.t
index 82182a2..cf57006 100644
--- a/t/index/opennlp_morpho.t
+++ b/t/index/opennlp_morpho.t

@@ -10,6 +10,41 @@
 
 ok(my $tokens = TestInit::tokens('0001'), 'Parse tokens');
 
+is($tokens->stream->pos(0)->to_string, '[(0-3)-:tokens$<i>18|_0$<i>0<i>3|i:zum|s:Zum]', 'Token is correct');
+
+is($tokens->stream->pos(1)->to_string, '[(4-11)_1$<i>4<i>11|i:letzten|s:letzten]', 'Token is correct');
+
+my $i = 2;
+foreach ([12,23, 'kulturellen'],
+	 [24,30, 'Anlass'],
+	 [31,35, 'lädt'],
+	 [36,39, 'die'],
+	 [40,47, 'Leitung'],
+	 [48,51, 'des'],
+	 [52,63, 'Schulheimes'],
+	 [64,73, 'Hofbergli'],
+	 [74,77, 'ein'],
+	 [79,84, 'bevor'],
+	 [85,88, 'der'],
+	 [89,96, 'Betrieb'],
+	 [97,101, 'Ende'],
+	 [102,111, 'Schuljahr'],
+	 [112,123, 'eingestellt'],
+	 [124,128, 'wird']
+       ) {
+  is($tokens->stream->pos($i++)->to_string,
+     '[('.$_->[0].'-'.$_->[1].')'.
+       '_'.($i-1).
+	 '$<i>'.$_->[0].'<i>' . $_->[1] . '|' .
+	 'i:'.lc($_->[2]).'|s:'.$_->[2].']',
+     'Token is correct');
+};
+
+ok(!$tokens->stream->pos($i++), 'No more tokens');
+
+
+
+
 ok($tokens->add('OpenNLP', 'Morpho'), 'Add Structure');
 
 my $data = $tokens->to_data->{data};

diff --git a/t/index/primary.t b/t/index/primary.t
new file mode 100644
index 0000000..7abf629
--- /dev/null
+++ b/t/index/primary.t

@@ -0,0 +1,31 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use utf8;
+use Test::More;
+use Scalar::Util qw/weaken/;
+use Data::Dumper;
+use lib 't/index';
+use TestInit;
+use File::Basename 'dirname';
+use File::Spec::Functions 'catdir';
+
+
+my $path = catdir(dirname(__FILE__), 'corpus', 'doc', '0001');
+
+ok(my $doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
+ok($doc->parse, 'Parse document');
+like($doc->path, qr!$path/!, 'Path');
+
+is($doc->primary->data,
+   'Zum letzten kulturellen Anlass lädt die Leitung des Schulheimes Hofbergli ein, '.
+     'bevor der Betrieb Ende Schuljahr eingestellt wird.', 'Primary data');
+
+is($doc->primary->data_length, 129, 'Primary data length');
+
+is($doc->primary->data(0,3), 'Zum', 'Get primary data');
+
+
+done_testing;
+
+__END__

diff --git a/t/meta.t b/t/meta.t
index d3c851c..3aa0f34 100644
--- a/t/meta.t
+++ b/t/meta.t

@@ -11,10 +11,6 @@
 use File::Basename 'dirname';
 use File::Spec::Functions 'catdir';
 
-
-diag 'Support "availability"';
-diag 'Support "pubPlace-key"';
-
 # TODO: Make 'text' -> 'primaryText'
 
 use_ok('KorAP::Document');
@@ -60,7 +56,6 @@
 ok(!$doc->text_column, 'no text_column');
 ok(!$doc->keywords_string, 'no keywords');
 is($doc->text_class_string, 'freizeit-unterhaltung reisen wissenschaft populaerwissenschaft', 'no text classes');
-ok(!$doc->language, 'no text_column');
 
 #is($doc->coll_title, 'Wikipedia', 'Collection title');
 #is($doc->coll_sub_title, 'Die freie Enzyklopädie', 'Collection subtitle');
@@ -77,7 +72,6 @@
 is($doc->text_sigle, 'BRZ13_APR.00001', 'ID');
 is($doc->corpus_sigle, 'BRZ13', 'corpusID');
 
-
 is($doc->pub_date, '20130402', 'pubDate');
 is($doc->pub_place, 'Braunschweig', 'pubPlace');
 
@@ -124,13 +118,13 @@
 ok(!$doc->text_type, 'text_type');
 is($doc->text_type_art, 'Bericht', 'text_type art');
 
-
 # ERL/0001
 $path = catdir(dirname(__FILE__), 'ERL/00001');
 ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
 
 ok($doc->parse, 'Parse document');
-is($doc->title, 'MK2/ERL.00001 Amtsblatt des Landesbezirks Baden [diverse Erlasse], Hrsg. und Schriftleitung: Präsidialstelle der Landesverwaltung Baden in Karlsruhe. - Karlsruhe, o.J.', 'title'); # Amtsblatt des Landesbezirks Baden [diverse Erlasse]
+is($doc->title, 'Amtsblatt des Landesbezirks Baden [diverse Erlasse], Hrsg. und Schriftleitung: Präsidialstelle der Landesverwaltung Baden in Karlsruhe. - Karlsruhe, o.J.', 'title'); # Amtsblatt des Landesbezirks Baden [diverse Erlasse]
+# MK2/ERL.00001
 
 ok(!$doc->sub_title, 'subTitle');
 is($doc->text_sigle, 'MK2_ERL.00001', 'ID');
@@ -146,7 +140,11 @@
 ok(!$doc->editor, 'Editor');
 is($doc->publisher, 'Badenia Verlag und Druckerei', 'Publisher');
 is($doc->creation_date, '19600000', 'Creation date');
-diag 'Non-acceptance of creation date ranges may be temporary';
+
+# !!!
+# diag 'Non-acceptance of creation date ranges may be temporary';
+
+
 #ok(!$doc->coll_title, 'Collection title');
 #ok(!$doc->coll_sub_title, 'Collection subtitle');
 #ok(!$doc->coll_editor, 'Collection editor');
@@ -155,11 +153,11 @@
 ok(!$doc->text_type_art, 'text_type art');
 
 # A01/02035-substring
-$path = catdir(dirname(__FILE__), 'A01/02035-substring');
+$path = catdir(dirname(__FILE__), 'A00/02035-substring');
 ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
 
 ok($doc->parse, 'Parse document');
-is($doc->title, 'A00/JAN.02035 St. Galler Tagblatt, 11.01.2000, Ressort: TB-RSP (Abk.)', 'title');
+is($doc->title, 'St. Galler Tagblatt, 11.01.2000, Ressort: TB-RSP (Abk.)', 'title'); # A00/JAN.02035
 ok(!$doc->sub_title, 'subTitle');
 is($doc->text_sigle, 'A00_JAN.02035', 'ID');
 is($doc->corpus_sigle, 'A00', 'corpusID');
@@ -182,7 +180,7 @@
 is($doc->text_type_art, 'Bericht', 'text_type art');
 
 # A01/02873-meta
-$path = catdir(dirname(__FILE__), 'A01/02873-meta');
+$path = catdir(dirname(__FILE__), 'A00/02873-meta');
 ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
 
 ok($doc->parse, 'Parse document');
@@ -197,6 +195,7 @@
 ok(!$doc->text_class->[2], 'TextClass');
 ok(!$doc->author, 'author');
 
+
 # Additional information
 ok(!$doc->editor, 'Editor');
 ok(!$doc->publisher, 'Publisher');
@@ -210,7 +209,7 @@
 
 
 # A01/05663-unbalanced
-$path = catdir(dirname(__FILE__), 'A01/05663-unbalanced');
+$path = catdir(dirname(__FILE__), 'A00/05663-unbalanced');
 ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
 
 ok($doc->parse, 'Parse document');
@@ -225,6 +224,7 @@
 ok(!$doc->text_class->[2], 'TextClass');
 ok(!$doc->author, 'author');
 
+
 # Additional information
 ok(!$doc->editor, 'Editor');
 ok(!$doc->publisher, 'Publisher');
@@ -238,7 +238,7 @@
 
 
 # A01/07452-deep
-$path = catdir(dirname(__FILE__), 'A01/07452-deep');
+$path = catdir(dirname(__FILE__), 'A00/07452-deep');
 ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
 
 ok($doc->parse, 'Parse document');
@@ -253,6 +253,7 @@
 ok(!$doc->text_class->[2], 'TextClass');
 ok(!$doc->author, 'author');
 
+
 # Additional information
 ok(!$doc->editor, 'Editor');
 ok(!$doc->publisher, 'Publisher');
@@ -264,7 +265,6 @@
 ok(!$doc->text_type, 'text_type');
 is($doc->text_type_art, 'Bericht', 'text_type art');
 
-
 # ART
 $path = catdir(dirname(__FILE__), 'artificial');
 ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
@@ -282,6 +282,7 @@
 is($doc->corpus_sigle, 'ART', 'corpusID');
 is($doc->pub_date, '20010402', 'pubDate');
 is($doc->pub_place, 'Mannheim', 'pubPlace');
+is($doc->pub_place_key, 'DE', 'pubPlace key');
 is($doc->text_class->[0], 'freizeit-unterhaltung', 'TextClass');
 is($doc->text_class->[1], 'vereine-veranstaltungen', 'TextClass');
 ok(!$doc->text_class->[2], 'TextClass');
@@ -310,10 +311,14 @@
 like($doc->path, qr!$path/$!, 'Path');
 
 ok($doc->parse, 'Parse document');
-is($doc->text_sigle, 'VDI_JAN.00001', 'text sigle');
-is($doc->doc_sigle, 'VDI_JAN', 'doc sigle');
-is($doc->corpus_sigle, 'VDI', 'corpus sigle');
+
+
+is($doc->text_sigle, 'VDI14_JAN.00001', 'text sigle');
+is($doc->doc_sigle, 'VDI14_JAN', 'doc sigle');
+is($doc->corpus_sigle, 'VDI14', 'corpus sigle');
+
 is($doc->title, '10- Zz mit Zahl', 'title');
+
 ok(!$doc->sub_title, 'subtitle');
 is($doc->pub_date, '20140117', 'pubdate');
 is($doc->pub_place, 'Düsseldorf', 'pubplace');
@@ -334,7 +339,8 @@
 is($doc->reference, 'VDI nachrichten, 17.01.2014, S. 10; 10- Zz mit Zahl [Ausführliche Zitierung nicht verfügbar]', 'Reference');
 
 ok(!$doc->language, 'Language');
-diag 'This may be "de" in the future';
+# !!!
+# diag 'This may be "de" in the future';
 
 is($doc->doc_title, 'VDI nachrichten, Januar 2014', 'Doc title');
 ok(!$doc->doc_sub_title, 'Doc Sub title');
@@ -349,7 +355,6 @@
 is($doc->keywords_string, '', 'Keywords');
 is($doc->text_class_string, 'Freizeit-Unterhaltung Reisen Politik Ausland', 'Text class');
 
-
 # WDD
 $path = catdir(dirname(__FILE__), 'WDD/G27/38989');
 ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
@@ -397,7 +402,9 @@
 is($doc->keywords_string, '', 'Keywords');
 is($doc->text_class_string, '', 'Text class');
 
+
+is($doc->availability, 'CC-BY-SA', 'Availability');
+
+
 done_testing;
 __END__
-
-

diff --git a/t/sgbr/sgbr_meta.t b/t/sgbr/sgbr_meta.t
index 8ab6414..50891f8 100644
--- a/t/sgbr/sgbr_meta.t
+++ b/t/sgbr/sgbr_meta.t

@@ -23,6 +23,43 @@
 is($doc->doc_sigle, 'TEST_BSP', 'ID-doc');
 is($doc->corpus_sigle, 'TEST', 'ID-corpus');
 
-diag 'TODO: Parse meta';
+is($doc->title, 'Sommerüberraschung', 'title');
+#is($doc->sub_title, 'Beispiel Text Untertitel', 'title');
+#is($doc->pub_date, '20010402', 'Publication date');
+#is($doc->pub_place, 'Mannheim', 'Publication place');
+is($doc->author, 'TEST.BSP.Autoren.1', 'Author');
+is($doc->store('sgbrAuthorAgeClass'), 'X', 'AgeClass');
+is($doc->store('sgbrAuthorSex'), 'M', 'Sex');
+is($doc->store('sgbrKodex'), 'M', 'Kodex');
+
+is($doc->doc_title, 'Beispielkorpus', 'Doc: title');
+is($doc->doc_sub_title, 'Subkorpus Beispieltext', 'Doc: subtitle');
+
+is($doc->language, 'de', 'Language');
+
+ok(!$doc->publisher, 'Publisher');
+ok(!$doc->editor, 'Editor');
+ok(!$doc->text_type, 'Text Type');
+ok(!$doc->text_type_art, 'Text Type Art');
+ok(!$doc->text_type_ref, 'Text Type Ref');
+ok(!$doc->text_column, 'Text Column');
+ok(!$doc->text_domain, 'Text Domain');
+ok(!$doc->creation_date, 'Creation Date');
+ok(!$doc->license, 'License');
+ok(!$doc->pages, 'Pages');
+ok(!$doc->file_edition_statement, 'File Edition Statement');
+ok(!$doc->bibl_edition_statement, 'Bibl Edition Statement');
+ok(!$doc->reference, 'Reference');
+
+ok(!$doc->doc_editor, 'Doc: editor');
+ok(!$doc->doc_author, 'Doc: author');
+
+ok(!$doc->corpus_title, 'Corpus: title');
+ok(!$doc->corpus_sub_title, 'Corpus: subtitle');
+ok(!$doc->corpus_editor, 'Corpus: editor');
+ok(!$doc->corpus_author, 'Corpus: author');
 
 done_testing;
+
+
+__END__
commit	a866578571f08dcdb3b7b7f6559aec39e49a7e6c	[log] [tgz]
author	Akron <nils@diewald-online.de>	Wed Jan 27 21:47:57 2016 +0100
committer	Akron <nils@diewald-online.de>	Wed Jan 27 21:47:57 2016 +0100
tree	f53efe928aeb8fa246264141fbd4a920d655916f
parent	b2636cf2f8813f1ff62ade23d9afe1c098db1acc [diff]