Added base-sentences and base-paragraphs options
Change-Id: I695b65661d97785e75703207bfc83a316d0a4815
diff --git a/lib/KorAP/XML/Annotation/Base/Paragraphs.pm b/lib/KorAP/XML/Annotation/Base/Paragraphs.pm
index ced09c2..235e2ff 100644
--- a/lib/KorAP/XML/Annotation/Base/Paragraphs.pm
+++ b/lib/KorAP/XML/Annotation/Base/Paragraphs.pm
@@ -13,12 +13,12 @@
my $mtt = $stream->pos($span->p_start);
$mtt->add(
- term => '<>:base/s:p',
- o_start => $span->o_start,
- o_end => $span->o_end,
- p_end => $span->p_end,
- payload => '<b>1',
- pti => 64
+ term => '<>:base/s:p',
+ o_start => $span->o_start,
+ o_end => $span->o_end,
+ p_end => $span->p_end,
+ payload => '<b>1',
+ pti => 64
);
$i++;
}
diff --git a/lib/KorAP/XML/Annotation/Base/Sentences.pm b/lib/KorAP/XML/Annotation/Base/Sentences.pm
index 28c9434..1d66c8a 100644
--- a/lib/KorAP/XML/Annotation/Base/Sentences.pm
+++ b/lib/KorAP/XML/Annotation/Base/Sentences.pm
@@ -16,12 +16,12 @@
$first = [$span->p_start, $span->o_start] unless defined $first;
$mtt->add(
- term => '<>:base/s:s',
- o_start => $span->o_start,
- o_end => $span->o_end,
- p_end => $span->p_end,
- payload => '<b>2',
- pti => 64
+ term => '<>:base/s:s',
+ o_start => $span->o_start,
+ o_end => $span->o_end,
+ p_end => $span->p_end,
+ payload => '<b>2',
+ pti => 64
);
$last_p = $span->p_end;
$last_o = $span->o_end;
@@ -29,15 +29,15 @@
}
) or return;
-# my $mt = $$self->stream->pos($first->[0]);
-# $mt->add(
-# term => '<>:base/s:t',
-# o_start => $first->[1],
-# p_end => $last_p,
-# o_end => $last_o,
-# payload => '<b>0',
-# pti => 64
-# );
+ # my $mt = $$self->stream->pos($first->[0]);
+ # $mt->add(
+ # term => '<>:base/s:t',
+ # o_start => $first->[1],
+ # p_end => $last_p,
+ # o_end => $last_o,
+ # payload => '<b>0',
+ # pti => 64
+ # );
$$self->stream->add_meta('base/sentences', '<i>' . $i);
diff --git a/lib/KorAP/XML/Annotation/DeReKo/Structure.pm b/lib/KorAP/XML/Annotation/DeReKo/Structure.pm
index 80a284e..ced5476 100644
--- a/lib/KorAP/XML/Annotation/DeReKo/Structure.pm
+++ b/lib/KorAP/XML/Annotation/DeReKo/Structure.pm
@@ -1,9 +1,10 @@
package KorAP::XML::Annotation::DeReKo::Structure;
use KorAP::XML::Annotation::Base;
-use Data::Dumper;
sub parse {
my $self = shift;
+ my $as_base = shift // 0;
+ my ($sentences, $paragraphs) = (0,0);
$$self->add_spandata(
foundry => 'struct',
@@ -21,10 +22,10 @@
# Get attributes
if (ref $feature eq 'ARRAY') {
- $attrs = $feature->[1]->{fs}->{f};
- $attrs = ref $attrs eq 'ARRAY' ? $attrs : [$attrs];
- $feature = $feature->[0];
- $tui = $stream->tui($p_start);
+ $attrs = $feature->[1]->{fs}->{f};
+ $attrs = ref $attrs eq 'ARRAY' ? $attrs : [$attrs];
+ $feature = $feature->[0];
+ $tui = $stream->tui($p_start);
};
# Get term label
@@ -37,42 +38,71 @@
# Add structure
my $mt = $mtt->add(
- term => '<>:dereko/s:' . $name,
- o_start => $span->o_start,
- o_end => $span->o_end,
- p_start => $p_start,
- p_end => $p_end,
- pti => $span->milestone ? 65 : 64,
+ term => '<>:dereko/s:' . $name,
+ o_start => $span->o_start,
+ o_end => $span->o_end,
+ p_start => $p_start,
+ p_end => $p_end,
+ pti => $span->milestone ? 65 : 64,
);
my $level = $span->hash->{'-l'};
if ($level || $tui) {
- my $pl;
- $pl .= '<b>' . ($level ? $level - 1 : 0);
- $pl .= '<s>' . $tui if $tui;
- $mt->payload($pl);
+ my $pl;
+ $pl .= '<b>' . ($level ? $level - 1 : 0);
+ $pl .= '<s>' . $tui if $tui;
+ $mt->payload($pl);
+ };
+
+ # Use sentence and paragraph elements for base
+ if ($as_base && ($name eq 's' || $name eq 'p')) {
+
+ # Clone Multiterm
+ my $mt2 = $mt->clone;
+ $mt2->term('<>:base/s:' . $name);
+
+ if ($name eq 's' && index($as_base, 'sentences') >= 0) {
+ $mt2->payload('<b>2');
+ $sentences++;
+ }
+ elsif ($name eq 'p' && index($as_base, 'paragraphs') >= 0) {
+ $mt2->payload('<b>1');
+ $paragraphs++;
+ };
+
+ # Add to stream
+ $mtt->add($mt2);
};
# Add attributes
if ($attrs) {
- # Set a tui if attributes are set
- foreach (@$attrs) {
+ # Set a tui if attributes are set
+ foreach (@$attrs) {
- # Add attributes
- $mtt->add(
- term =>
- '@:dereko/s:' . $_->{'-name'} . ':' . $_->{'#text'},
- p_start => $p_start,
- pti => 17,
- payload => '<s>' . $tui .
- ($span->milestone ? '' : '<i>' . $p_end)
- );
- };
+ # Add attributes
+ $mtt->add(
+ term =>
+ '@:dereko/s:' . $_->{'-name'} . ':' . $_->{'#text'},
+ p_start => $p_start,
+ pti => 17,
+ payload => '<s>' . $tui .
+ ($span->milestone ? '' : '<i>' . $p_end)
+ );
+ };
};
}
) or return;
+ if ($as_base) {
+ if (index($as_base, 'sentences') >= 0) {
+ $$self->stream->add_meta('base/sentences', '<i>' . $sentences);
+ };
+ if (index($as_base, 'paragraphs') >= 0) {
+ $$self->stream->add_meta('base/paragraphs', '<i>' . $paragraphs);
+ };
+ };
+
return 1;
};
diff --git a/lib/KorAP/XML/Index/MultiTerm.pm b/lib/KorAP/XML/Index/MultiTerm.pm
index 87946e2..7e089de 100644
--- a/lib/KorAP/XML/Index/MultiTerm.pm
+++ b/lib/KorAP/XML/Index/MultiTerm.pm
@@ -128,8 +128,8 @@
'<i>' . $_[0]->[4];
};
-# my $pl = $_[0]->[1] ?
-# $_[0]->[1] - 1 : $_[0]->[0];
+ # my $pl = $_[0]->[1] ?
+ # $_[0]->[1] - 1 : $_[0]->[0];
if ($_[0]->[2] || $_[0]->[0]) {
@@ -139,10 +139,10 @@
};
if ($_[0]->[0]) {
if (index($_[0]->[0], '<') == 0) {
- $pre .= $_[0]->[0];
+ $pre .= $_[0]->[0];
}
else {
- $pre .= '<?>' . $_[0]->[0];
+ $pre .= '<?>' . $_[0]->[0];
};
};
};
@@ -151,6 +151,11 @@
};
+sub clone {
+ my $self = shift;
+ bless [@$self], __PACKAGE__;
+};
+
sub to_string_2 {
my $self = shift;
my $string = $self->term;
@@ -166,10 +171,10 @@
};
if ($self->payload) {
if (index($self->payload, '<') == 0) {
- $string .= $self->payload;
+ $string .= $self->payload;
}
else {
- $string .= '<?>' . $self->payload;
+ $string .= '<?>' . $self->payload;
};
};
};
diff --git a/lib/KorAP/XML/Index/MultiTermToken.pm b/lib/KorAP/XML/Index/MultiTermToken.pm
index 9e78df8..3556422 100644
--- a/lib/KorAP/XML/Index/MultiTermToken.pm
+++ b/lib/KorAP/XML/Index/MultiTermToken.pm
@@ -1,5 +1,6 @@
package KorAP::XML::Index::MultiTermToken;
use KorAP::XML::Index::MultiTerm;
+use Scalar::Util qw/blessed/;
use List::MoreUtils 'uniq';
use Carp qw/carp croak/;
use strict;
@@ -16,8 +17,9 @@
sub add {
my $self = shift;
+
my $mt;
- unless (ref $_[0] eq 'MultiTerm') {
+ unless (blessed $_[0]) {
if (@_ == 1) {
$mt = KorAP::XML::Index::MultiTerm->new(term => $_[0]);
}
diff --git a/lib/KorAP/XML/Krill.pm b/lib/KorAP/XML/Krill.pm
index 968d3d9..1f5fad8 100644
--- a/lib/KorAP/XML/Krill.pm
+++ b/lib/KorAP/XML/Krill.pm
@@ -15,7 +15,7 @@
use Data::Dumper;
use File::Spec::Functions qw/catdir catfile catpath splitdir splitpath rel2abs/;
-our $VERSION = '0.23';
+our $VERSION = '0.24';
has 'path';
has [qw/text_sigle doc_sigle corpus_sigle/];