Started meta parsing for Schreibgebrauch

Change-Id: Ib0f58cfaceff691bc237dfee8a5f957fb3b3391c
diff --git a/lib/KorAP/Document.pm b/lib/KorAP/Document.pm
index 8aeec55..3e9f82e 100644
--- a/lib/KorAP/Document.pm
+++ b/lib/KorAP/Document.pm
@@ -145,11 +145,12 @@
     unshift @header, '/' . catfile(@path, 'header.xml');
     pop @path;
   };
+
   my @type = qw/corpus doc text/;
   foreach (@header) {
     # Get corpus, doc and text meta data
     my $type = shift(@type);
-    $self->_parse_meta($_, $type) if -e $_;
+    $self->_parse_meta_i5($_, $type) if -e $_;
   };
 
   return 1;
@@ -213,7 +214,7 @@
 };
 
 
-sub _parse_meta {
+sub _parse_meta_i5 {
   my $self = shift;
   my $header_xml = shift;
   my $type = shift;
@@ -521,88 +522,6 @@
 };
 
 
-# Don't work that well
-sub _parse_meta_fast {
-  my $self = shift;
-
-  #  my $file = b($self->path . 'header.xml')->slurp->decode('iso-8859-1');
-    my $file = b($self->path . 'header.xml')->slurp;
-
-  my ($meta, $error);
-  my $unable = 'Unable to parse document ' . $self->path;
-
-  try {
-      local $SIG{__WARN__} = sub {
-	  $error = 1;
-      };
-      $meta = xml2hash(
-	$file,
-	text => '#text',
-	attr => '-',
-	array => ['h.title', 'imprint', 'catRef', 'h.author']
-      )->{idsHeader};
-  }
-  catch  {
-      $self->log->warn($unable);
-      $error = 1;
-  };
-
-  return if $error;
-
-  my $bibl_struct = $meta->{fileDesc}->{sourceDesc}->{biblStruct};
-  my $analytic = $bibl_struct->{analytic};
-
-  my $titles = $analytic->{'h.title'};
-  foreach (@$titles) {
-    if ($_->{'-type'} eq 'main') {
-      $self->title($_->{'#text'});
-    }
-    elsif ($_->{'-type'} eq 'sub') {
-      $self->sub_title($_->{'#text'});
-    };
-  };
-
-  # Get Author
-  if (my $author = $analytic->{'h.author'}) {
-    $self->author($author->[0]);
-  };
-
-  # Get pubDate
-  my $date = $bibl_struct->{monogr}->{imprint};
-  my ($year, $month, $day) = (0,0,0);
-  foreach (@$date) {
-    if ($date->{-type} eq 'year') {
-      $year = $date->{'#text'};
-    }
-    elsif ($date->{-type} eq 'month') {
-      $month = $date->{'#text'};
-    }
-    elsif ($date->{-type} eq 'day') {
-      $day = $date->{'#text'};
-    };
-  };
-
-  $year  = 0 if $year  !~ /^\d+$/;
-  $month = 0 if $month !~ /^\d+$/;
-  $day   = 0 if $day   !~ /^\d+$/;
-
-  $date = $year ? ($year < 100 ? '20' . $year : $year) : '0000';
-  $date .= length($month) == 1 ? '0' . $month : $month;
-  $date .= length($day) == 1 ? '0' . $day : $day;
-
-  $self->pub_date($date);
-
-  # Get textClasses
-  my @topic;
-  my $textClass = $meta->{profileDesc}->{textClass}->{catRef};
-  foreach (@$textClass) {
-    my ($ign, @ttopic) = split('\.', $_->{'-target'});
-    push(@topic, @ttopic);
-  };
-  $self->text_class(@topic);
-};
-
-
 
 1;
 
diff --git a/lib/KorAP/Index/Schreibgebrauch/Lemma.pm b/lib/KorAP/Index/Schreibgebrauch/Lemma.pm
index d3807e2..5bd01d5 100644
--- a/lib/KorAP/Index/Schreibgebrauch/Lemma.pm
+++ b/lib/KorAP/Index/Schreibgebrauch/Lemma.pm
@@ -34,10 +34,14 @@
 	  # warn $found;
 
 	  unless ($first++) {
-	    $mtt->add(term => 'sgbr/l:' . $found);
+	    $mtt->add(
+	      term => 'sgbr/l:' . $found
+	    );
 	  }
 	  else {
-	    $mtt->add(term => 'sgbr/lv:' . $found);
+	    $mtt->add(
+	      term => 'sgbr/lv:' . $found
+	    );
 	  };
 	};
       };
diff --git a/t/index/mate_dependency.t b/t/index/mate_dependency.t
index 1622bad..2228a43 100644
--- a/t/index/mate_dependency.t
+++ b/t/index/mate_dependency.t
@@ -2,7 +2,7 @@
 use strict;
 use warnings;
 use utf8;
-use Test::More; # skip_all => 'Not yet implemented';
+use Test::More skip_all => 'Not yet implemented';
 use Scalar::Util qw/weaken/;
 use Data::Dumper;
 use lib 't/index';
diff --git a/t/sgbr/TEST/BSP/1/sgbr/ana.xml b/t/sgbr/TEST/BSP/1/sgbr/ana.xml
index 987b84d..9a2e798 100644
--- a/t/sgbr/TEST/BSP/1/sgbr/ana.xml
+++ b/t/sgbr/TEST/BSP/1/sgbr/ana.xml
@@ -684,4 +684,4 @@
       </fs>
     </span>
   </spanList>
-</layer>
+</layer>
\ No newline at end of file
diff --git a/t/sgbr/TEST/BSP/1/sgbr/lemma.xml b/t/sgbr/TEST/BSP/1/sgbr/lemma.xml
index 5085f21..fbb28d0 100644
--- a/t/sgbr/TEST/BSP/1/sgbr/lemma.xml
+++ b/t/sgbr/TEST/BSP/1/sgbr/lemma.xml
@@ -491,4 +491,4 @@
       </fs>
     </span>
   </spanList>
-</layer>
+</layer>
\ No newline at end of file
diff --git a/t/sgbr/sgbr_meta.t b/t/sgbr/sgbr_meta.t
new file mode 100644
index 0000000..8ab6414
--- /dev/null
+++ b/t/sgbr/sgbr_meta.t
@@ -0,0 +1,28 @@
+use strict;
+use warnings;
+use Test::More;
+use File::Basename 'dirname';
+use File::Spec::Functions 'catdir';
+use Data::Dumper;
+use KorAP::Tokenizer;
+use KorAP::Document;
+use utf8;
+
+my $path = catdir(dirname(__FILE__), 'TEST', 'BSP', 1);
+
+ok(my $doc = KorAP::Document->new(
+  path => $path . '/'
+), 'Create Document');
+
+ok($doc->parse, 'Parse document');
+
+like($doc->path, qr!$path/!, 'Path');
+
+# Metdata
+is($doc->text_sigle, 'TEST_BSP.1', 'ID-text');
+is($doc->doc_sigle, 'TEST_BSP', 'ID-doc');
+is($doc->corpus_sigle, 'TEST', 'ID-corpus');
+
+diag 'TODO: Parse meta';
+
+done_testing;