Added Schreibgebrauch features
Change-Id: I2eb4f6b32007370985e2cf4168e00a95cbde4001
diff --git a/lib/KorAP/Document.pm b/lib/KorAP/Document.pm
index 2e3da2a..8aeec55 100644
--- a/lib/KorAP/Document.pm
+++ b/lib/KorAP/Document.pm
@@ -116,7 +116,7 @@
# Get document id and corpus id
if ($rt && $rt->{'-docid'}) {
$self->text_sigle($rt->{'-docid'});
- if ($self->text_sigle =~ /^(([^_]+)_[^\._]+?)\.\d+$/) {
+ if ($self->text_sigle =~ /^(([^_]+)_[^\._]+?)\..+?$/) {
$self->corpus_sigle($2);
$self->doc_sigle($1);
}
diff --git a/lib/KorAP/Index/Malt/Dependency.pm b/lib/KorAP/Index/Malt/Dependency.pm
new file mode 100644
index 0000000..1f55824
--- /dev/null
+++ b/lib/KorAP/Index/Malt/Dependency.pm
@@ -0,0 +1,30 @@
+package KorAP::Index::Malt::Dependency;
+use KorAP::Index::Base;
+use Data::Dumper;
+
+sub parse {
+ my $self = shift;
+
+ # Relation data
+ $$self->add_tokendata(
+ foundry => 'malt',
+ layer => 'dependency',
+ cb => sub {
+ my ($stream, $token, $tokens) = @_;
+
+ # Get MultiTermToken from stream
+ my $mtt = $stream->pos($token->pos);
+
+ # Serialized information from token
+ my $content = $token->hash;
+ }) or return;
+
+ return 1;
+};
+
+sub layer_info {
+ ['malt/d=rels']
+};
+
+
+1;
diff --git a/lib/KorAP/Index/Schreibgebrauch/Morpho.pm b/lib/KorAP/Index/Schreibgebrauch/Morpho.pm
new file mode 100644
index 0000000..195b084
--- /dev/null
+++ b/lib/KorAP/Index/Schreibgebrauch/Morpho.pm
@@ -0,0 +1,44 @@
+package KorAP::Index::Schreibgebrauch::Morpho;
+use KorAP::Index::Base;
+
+sub parse {
+ my $self = shift;
+
+ $$self->add_tokendata(
+ foundry => 'sgbr',
+ layer => 'lemma',
+ cb => sub {
+ my ($stream, $token) = @_;
+ my $mtt = $stream->pos($token->pos);
+
+ my $content = $token->hash->{fs}->{f};
+
+ my $found;
+
+ my $capital = 0;
+
+ my $lemmata = (ref $content->{fs}->{f} eq 'ARRAY') ?
+ $content->{fs}->{f} : [$content->{fs}->{f}];
+
+ my $first = 0;
+
+ foreach my $f (@$lemmata) {
+
+ # lemma
+ if (($f->{-name} eq 'lemma')
+ && ($found = $f->{'#text'})) {
+ # b($found)->decode('latin-1')->encode->to_string
+ $mtt->add(term => 'sgbr/l:' . $found) unless $first++;
+ $mtt->add(term => 'sgbr/lv:' . $found);
+ };
+ };
+ }) or return;
+
+ return 1;
+};
+
+sub layer_info {
+ ['sgbr/l=tokens', 'sgbr/lv=tokens']
+}
+
+1;
diff --git a/t/index/dereko_struct.t b/t/index/dereko_struct.t
index beaf248..1c9ae07 100644
--- a/t/index/dereko_struct.t
+++ b/t/index/dereko_struct.t
@@ -8,8 +8,6 @@
use Scalar::Util qw/weaken/;
use Data::Dumper;
-use_ok('KorAP::Document');
-
ok(my $tokens = TestInit::tokens('0001'), 'Parse tokens');
ok($tokens->add('DeReKo', 'Structure'), 'Add Structure');
diff --git a/t/index/malt_dependency.t b/t/index/malt_dependency.t
new file mode 100644
index 0000000..b1b7e20
--- /dev/null
+++ b/t/index/malt_dependency.t
@@ -0,0 +1,28 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use utf8;
+use Test::More skip_all => 'Not yet implemented';
+use Scalar::Util qw/weaken/;
+use Data::Dumper;
+use lib 't/index';
+use TestInit;
+
+ok(my $tokens = TestInit::tokens('0002'), 'Parse tokens');
+
+ok($tokens->add('Malt', 'dependency'), 'Add Structure');
+
+my $data = $tokens->to_data->{data};
+
+done_testing;
+__END__
+
+
+like($data->{foundries}, qr!xip/sentences!, 'data');
+
+is($data->{stream}->[0]->[1], '-:xip/sentences$<i>1', 'Number of paragraphs');
+is($data->{stream}->[0]->[0], '-:tokens$<i>18', 'Number of tokens');
+is($data->{stream}->[0]->[2], '<>:xip/s:s$<b>64<i>0<i>129<i>17<b>0', 'Text');
+is($data->{stream}->[0]->[3], '_0$<i>0<i>3', 'Position');
+is($data->{stream}->[-1]->[0], '_17$<i>124<i>128', 'Position');
+
diff --git a/t/index/mate_dependency.t b/t/index/mate_dependency.t
index 807e11d..1622bad 100644
--- a/t/index/mate_dependency.t
+++ b/t/index/mate_dependency.t
@@ -2,7 +2,7 @@
use strict;
use warnings;
use utf8;
-use Test::More skip_all => 'Not yet implemented';
+use Test::More; # skip_all => 'Not yet implemented';
use Scalar::Util qw/weaken/;
use Data::Dumper;
use lib 't/index';
@@ -12,7 +12,9 @@
ok($tokens->add('Mate', 'Dependency'), 'Add Structure');
-# my $data = $tokens->to_data->{data};
+my $data = $tokens->to_data->{data};
+
+diag Dumper $data;
done_testing;