Added Schreibgebrauch features Change-Id: I2eb4f6b32007370985e2cf4168e00a95cbde4001

commit: 194be547ddcfe6753f336c3dea0f7ff995fbb482 [log] [tgz]
author: Akron <nils@diewald-online.de> Thu Jan 21 12:52:43 2016 +0100
committer: Akron <nils@diewald-online.de> Thu Jan 21 12:52:43 2016 +0100
tree: 9b8c132ef65f92a9603c4ded1980d1051fee89b9
parent: 11ef51d46e069fed40d4abe09a019307ab1ab1d8 [diff]
diff --git a/lib/KorAP/Document.pm b/lib/KorAP/Document.pm
index 2e3da2a..8aeec55 100644
--- a/lib/KorAP/Document.pm
+++ b/lib/KorAP/Document.pm

@@ -116,7 +116,7 @@
   # Get document id and corpus id
   if ($rt && $rt->{'-docid'}) {
     $self->text_sigle($rt->{'-docid'});
-    if ($self->text_sigle =~ /^(([^_]+)_[^\._]+?)\.\d+$/) {
+    if ($self->text_sigle =~ /^(([^_]+)_[^\._]+?)\..+?$/) {
       $self->corpus_sigle($2);
       $self->doc_sigle($1);
     }

diff --git a/lib/KorAP/Index/Malt/Dependency.pm b/lib/KorAP/Index/Malt/Dependency.pm
new file mode 100644
index 0000000..1f55824
--- /dev/null
+++ b/lib/KorAP/Index/Malt/Dependency.pm

@@ -0,0 +1,30 @@
+package KorAP::Index::Malt::Dependency;
+use KorAP::Index::Base;
+use Data::Dumper;
+
+sub parse {
+  my $self = shift;
+
+  # Relation data
+  $$self->add_tokendata(
+    foundry => 'malt',
+    layer => 'dependency',
+    cb => sub {
+      my ($stream, $token, $tokens) = @_;
+
+      # Get MultiTermToken from stream
+      my $mtt = $stream->pos($token->pos);
+
+      # Serialized information from token
+      my $content = $token->hash;
+    }) or return;
+
+  return 1;
+};
+
+sub layer_info {
+  ['malt/d=rels']
+};
+
+
+1;

diff --git a/lib/KorAP/Index/Schreibgebrauch/Morpho.pm b/lib/KorAP/Index/Schreibgebrauch/Morpho.pm
new file mode 100644
index 0000000..195b084
--- /dev/null
+++ b/lib/KorAP/Index/Schreibgebrauch/Morpho.pm

@@ -0,0 +1,44 @@
+package KorAP::Index::Schreibgebrauch::Morpho;
+use KorAP::Index::Base;
+
+sub parse {
+  my $self = shift;
+
+  $$self->add_tokendata(
+    foundry => 'sgbr',
+    layer => 'lemma',
+    cb => sub {
+      my ($stream, $token) = @_;
+      my $mtt = $stream->pos($token->pos);
+
+      my $content = $token->hash->{fs}->{f};
+
+      my $found;
+
+      my $capital = 0;
+
+      my $lemmata = (ref $content->{fs}->{f} eq 'ARRAY') ?
+	$content->{fs}->{f} : [$content->{fs}->{f}];
+
+      my $first = 0;
+
+      foreach my $f (@$lemmata) {
+
+	# lemma
+	if (($f->{-name} eq 'lemma')
+	      && ($found = $f->{'#text'})) {
+	  # b($found)->decode('latin-1')->encode->to_string
+	  $mtt->add(term => 'sgbr/l:' . $found) unless $first++;
+	  $mtt->add(term => 'sgbr/lv:' . $found);
+	};
+      };
+    }) or return;
+
+  return 1;
+};
+
+sub layer_info {
+    ['sgbr/l=tokens', 'sgbr/lv=tokens']
+}
+
+1;

diff --git a/t/index/dereko_struct.t b/t/index/dereko_struct.t
index beaf248..1c9ae07 100644
--- a/t/index/dereko_struct.t
+++ b/t/index/dereko_struct.t

@@ -8,8 +8,6 @@
 use Scalar::Util qw/weaken/;
 use Data::Dumper;
 
-use_ok('KorAP::Document');
-
 ok(my $tokens = TestInit::tokens('0001'), 'Parse tokens');
 
 ok($tokens->add('DeReKo', 'Structure'), 'Add Structure');

diff --git a/t/index/malt_dependency.t b/t/index/malt_dependency.t
new file mode 100644
index 0000000..b1b7e20
--- /dev/null
+++ b/t/index/malt_dependency.t

@@ -0,0 +1,28 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use utf8;
+use Test::More skip_all => 'Not yet implemented';
+use Scalar::Util qw/weaken/;
+use Data::Dumper;
+use lib 't/index';
+use TestInit;
+
+ok(my $tokens = TestInit::tokens('0002'), 'Parse tokens');
+
+ok($tokens->add('Malt', 'dependency'), 'Add Structure');
+
+my $data = $tokens->to_data->{data};
+
+done_testing;
+__END__
+
+
+like($data->{foundries}, qr!xip/sentences!, 'data');
+
+is($data->{stream}->[0]->[1], '-:xip/sentences$<i>1', 'Number of paragraphs');
+is($data->{stream}->[0]->[0], '-:tokens$<i>18', 'Number of tokens');
+is($data->{stream}->[0]->[2], '<>:xip/s:s$<b>64<i>0<i>129<i>17<b>0', 'Text');
+is($data->{stream}->[0]->[3], '_0$<i>0<i>3', 'Position');
+is($data->{stream}->[-1]->[0], '_17$<i>124<i>128', 'Position');
+

diff --git a/t/index/mate_dependency.t b/t/index/mate_dependency.t
index 807e11d..1622bad 100644
--- a/t/index/mate_dependency.t
+++ b/t/index/mate_dependency.t

@@ -2,7 +2,7 @@
 use strict;
 use warnings;
 use utf8;
-use Test::More skip_all => 'Not yet implemented';
+use Test::More; # skip_all => 'Not yet implemented';
 use Scalar::Util qw/weaken/;
 use Data::Dumper;
 use lib 't/index';
@@ -12,7 +12,9 @@
 
 ok($tokens->add('Mate', 'Dependency'), 'Add Structure');
 
-# my $data = $tokens->to_data->{data};
+my $data = $tokens->to_data->{data};
+
+diag Dumper $data;
 
 done_testing;
commit	194be547ddcfe6753f336c3dea0f7ff995fbb482	[log] [tgz]
author	Akron <nils@diewald-online.de>	Thu Jan 21 12:52:43 2016 +0100
committer	Akron <nils@diewald-online.de>	Thu Jan 21 12:52:43 2016 +0100
tree	9b8c132ef65f92a9603c4ded1980d1051fee89b9
parent	11ef51d46e069fed40d4abe09a019307ab1ab1d8 [diff]