Minor cleanups for annotation procedures
Change-Id: Ib09ac9a911e535070794f78ad023813ad192565c
diff --git a/lib/KorAP/XML/Annotation/DGD/Structure.pm b/lib/KorAP/XML/Annotation/DGD/Structure.pm
index ce4a25c..a863c43 100644
--- a/lib/KorAP/XML/Annotation/DGD/Structure.pm
+++ b/lib/KorAP/XML/Annotation/DGD/Structure.pm
@@ -71,7 +71,6 @@
my $mt = $mtt->add_by_term('<>:base/s:s');
$mt->set_o_start($o_start);
$mt->set_o_end($_->[1]);
- $mt->set_p_start($p_start);
$mt->set_p_end($_->[0]);
$mt->set_pti(64);
$mt->set_payload('<b>1');
diff --git a/lib/KorAP/XML/Annotation/DeReKo/Structure.pm b/lib/KorAP/XML/Annotation/DeReKo/Structure.pm
index 9742437..77ac223 100644
--- a/lib/KorAP/XML/Annotation/DeReKo/Structure.pm
+++ b/lib/KorAP/XML/Annotation/DeReKo/Structure.pm
@@ -46,14 +46,9 @@
return;
};
- my $p_end = $span->get_p_end;
-
# Add structure
- my $mt = $mtt->add_by_term('<>:dereko/s:' . $name);
- $mt->set_o_start($span->get_o_start);
- $mt->set_o_end($span->get_o_end);
- $mt->set_p_start($p_start);
- $mt->set_p_end($p_end);
+ my $mt = $mtt->add_span('<>:dereko/s:' . $name, $span);
+ # $mt->set_p_start($p_start);
$mt->set_pti($span->get_milestone ? 65 : 64);
my $level = $span->get_hash->{'-l'};
@@ -69,32 +64,32 @@
if ($name eq 's' && index($as_base, 'sentences') >= 0) {
# Clone Multiterm
- my $mt2 = $mt->clone;
- $mt2->set_term('<>:base/s:' . $name);
- $mt2->set_payload('<b>2');
+ $mt = $mt->clone;
+ $mt->set_term('<>:base/s:' . $name);
+ $mt->set_payload('<b>2');
$sentences++;
# Add to stream
- $mtt->add_blessed($mt2);
+ $mtt->add_blessed($mt);
}
elsif ($name eq 'p' && index($as_base, 'paragraphs') >= 0) {
# Clone Multiterm
- my $mt2 = $mt->clone;
- $mt2->set_term('<>:base/s:' . $name);
- $mt2->set_payload('<b>1');
+ $mt = $mt->clone;
+ $mt->set_term('<>:base/s:' . $name);
+ $mt->set_payload('<b>1');
$paragraphs++;
# Add to stream
- $mtt->add_blessed($mt2);
+ $mtt->add_blessed($mt);
}
# Add pagebreaks
elsif ($name eq 'pb' && index($as_base, 'pagebreaks') >= 0) {
if (my $nr = first { $_->{-name} eq 'n' } @$attrs) {
if (($nr = $nr->{'#text'}) && looks_like_number($nr)) {
- my $mt2 = $mtt->add_by_term('~:base/s:pb');
- $mt2->set_payload('<i>' . $nr . '<i>' . $span->get_o_start);
- $mt2->set_stored_offsets(0);
+ $mt = $mtt->add_by_term('~:base/s:pb');
+ $mt->set_payload('<i>' . $nr . '<i>' . $span->get_o_start);
+ $mt->set_stored_offsets(0);
};
};
};
@@ -103,14 +98,16 @@
# Add attributes
if ($attrs) {
+ my $pl = '<s>' . $tui .($span->get_milestone ? '' : '<i>' . $span->get_p_end);
+
# Set a tui if attributes are set
foreach (@$attrs) {
# Add attributes
- my $mt = $mtt->add_by_term('@:dereko/s:' . $_->{'-name'} . ($_->{'#text'} ? ':' . $_->{'#text'} : ''));
+ $mt = $mtt->add_by_term('@:dereko/s:' . $_->{'-name'} . ($_->{'#text'} ? ':' . $_->{'#text'} : ''));
$mt->set_p_start($p_start);
$mt->set_pti(17);
- $mt->set_payload('<s>' . $tui .($span->get_milestone ? '' : '<i>' . $p_end));
+ $mt->set_payload($pl);
};
};
}
diff --git a/lib/KorAP/XML/Annotation/LWC/Dependency.pm b/lib/KorAP/XML/Annotation/LWC/Dependency.pm
index e57ba1e..66d1068 100644
--- a/lib/KorAP/XML/Annotation/LWC/Dependency.pm
+++ b/lib/KorAP/XML/Annotation/LWC/Dependency.pm
@@ -50,7 +50,6 @@
$mt = $stream->pos($target->get_pos)
->add_by_term('<:lwc/d:' . $label);
-
$mt->set_pti(32); # term-to-term relation
$mt->set_payload(
'<i>' . $source->get_pos # . # left part token position
diff --git a/lib/KorAP/XML/Annotation/OpenNLP/Morpho.pm b/lib/KorAP/XML/Annotation/OpenNLP/Morpho.pm
index 6bb9554..dd1e481 100644
--- a/lib/KorAP/XML/Annotation/OpenNLP/Morpho.pm
+++ b/lib/KorAP/XML/Annotation/OpenNLP/Morpho.pm
@@ -12,11 +12,10 @@
my $content = $token->get_hash->{fs}->{f} or return;
- $content = $content->{fs}->{f};
- my $found;
+ $content = $content->{fs}->{f} or return;
# syntax
- if (($content->{-name} eq 'pos') && ($content->{'#text'})) {
+ if (($content->{-name} eq 'pos') && $content->{'#text'}) {
$mtt->add_by_term('opennlp/p:' . $content->{'#text'});
};
}) or return;
diff --git a/lib/KorAP/XML/Annotation/RWK/Morpho.pm b/lib/KorAP/XML/Annotation/RWK/Morpho.pm
index 62ee40a..42f169b 100644
--- a/lib/KorAP/XML/Annotation/RWK/Morpho.pm
+++ b/lib/KorAP/XML/Annotation/RWK/Morpho.pm
@@ -11,7 +11,7 @@
my ($stream, $token) = @_;
my $mtt = $stream->pos($token->get_pos);
- my $content = $token->get_hash->{fs}->{f};
+ my $content = $token->get_hash->{fs}->{f} or return;
my $found;
@@ -39,8 +39,7 @@
}
# ana tag
- elsif ($name =~ m/^(?:bc|(?:sub)?type|usage|person|pos|case|number|gender|tense|mood|degree)$/ &&
- ($found = $f->{'#text'})) {
+ elsif (($found = $f->{'#text'}) && $name =~ m/^(?:bc|(?:sub)?type|usage|person|pos|case|number|gender|tense|mood|degree)$/o) {
$mtt->add_by_term('rwk/m:' . $name . ':' . $found);
};
};
diff --git a/lib/KorAP/XML/Annotation/Sgbr/Lemma.pm b/lib/KorAP/XML/Annotation/Sgbr/Lemma.pm
index fc54bd4..aa0248a 100644
--- a/lib/KorAP/XML/Annotation/Sgbr/Lemma.pm
+++ b/lib/KorAP/XML/Annotation/Sgbr/Lemma.pm
@@ -16,8 +16,6 @@
my $found;
- my $capital = 0;
-
my $lemmata = (ref $content->{fs}->{f} eq 'ARRAY') ?
$content->{fs}->{f} : [$content->{fs}->{f}];
diff --git a/lib/KorAP/XML/Annotation/TreeTagger/Morpho.pm b/lib/KorAP/XML/Annotation/TreeTagger/Morpho.pm
index 110742c..2e65abc 100644
--- a/lib/KorAP/XML/Annotation/TreeTagger/Morpho.pm
+++ b/lib/KorAP/XML/Annotation/TreeTagger/Morpho.pm
@@ -47,7 +47,6 @@
# pos
if (($_->{-name} eq 'ctag') && ($found = $_->{'#text'})) {
-
$pos{$found} += $certainty // 1;
};
};
diff --git a/lib/KorAP/XML/Annotation/XIP/Constituency.pm b/lib/KorAP/XML/Annotation/XIP/Constituency.pm
index e146d93..5ee1259 100644
--- a/lib/KorAP/XML/Annotation/XIP/Constituency.pm
+++ b/lib/KorAP/XML/Annotation/XIP/Constituency.pm
@@ -1,6 +1,8 @@
package KorAP::XML::Annotation::XIP::Constituency;
use KorAP::XML::Annotation::Base;
use Set::Scalar;
+use feature 'current_sub';
+
use Scalar::Util qw/weaken/;
our $URI_RE = qr/^[^\#]+\#(.+?)$/;
@@ -27,7 +29,6 @@
# Collect the span
$xip_const{$span->get_id} = $span;
- # warn 'Remember ' . $span->get_id;
# It's probably a root
$xip_const_root->insert($span->get_id);
@@ -66,8 +67,7 @@
my $stream = $$self->stream;
# Recursive tree traversal method
- my $add_const;
- $add_const = sub {
+ my $add_const = sub {
my ($span, $level) = @_;
weaken $xip_const_root;
@@ -92,17 +92,9 @@
};
# $type is now NPA, NP, NUM ...
- my $mt = $mtt->add_by_term('<>:xip/c:' . $type);
- $mt->set_o_start($span->get_o_start);
- $mt->set_o_end($span->get_o_end);
- $mt->set_p_end($span->get_p_end);
- $mt->set_pti(64);
-
- # Only add level payload if node != root
- $mt->set_payload('<b>' . ($level // 0));
-
- # my $this = __SUB__
- my $this = $add_const;
+ $mtt->add_span('<>:xip/c:' . $type, $span)
+ # Only add level payload if node != root
+ ->set_payload('<b>' . ($level // 0));
my $rel = $content->{rel};
@@ -133,9 +125,9 @@
# warn "A-Forgot about $target: " . ($subspan ? 'yes' : 'no');
next unless $subspan;
- # warn "Span " . $target . " not found";
- $this->($subspan, $level + 1);
+ # Recursive call
+ __SUB__->($subspan, $level + 1);
};
};
@@ -146,8 +138,6 @@
foreach ($roots->members) {
my $obj = delete $xip_const{$_};
- # warn "B-Forgot about $_: " . ($obj ? 'yes' : 'no');
-
next unless $obj;
$add_const->($obj, 0);