Lifted recall for span wraps 2
diff --git a/lib/KorAP/Index/CoreNLP/Constituency.pm b/lib/KorAP/Index/CoreNLP/Constituency.pm
index ee37abc..af93c86 100644
--- a/lib/KorAP/Index/CoreNLP/Constituency.pm
+++ b/lib/KorAP/Index/CoreNLP/Constituency.pm
@@ -18,18 +18,24 @@
my ($stream, $span) = @_;
$corenlp_const{$span->id} = $span;
+
+ # Maybe root
$corenlp_const_root->insert($span->id);
my $rel = $span->hash->{rel} or return;
+
+ # Make rel an array in case it's not
$rel = [$rel] unless ref $rel eq 'ARRAY';
foreach (@$rel) {
if ($_->{-label} eq 'dominates') {
if ($_->{-target}) {
+# warn $_->{-target} . ' is no root';
$corenlp_const_noroot->insert($_->{-target});
}
elsif (my $uri = $_->{-uri}) {
$uri =~ s/^morpho\.xml#//;
+# warn $uri . ' is no root';
$corenlp_const_noroot->insert($uri);
};
};
@@ -40,6 +46,7 @@
my $stream = $$self->stream;
my $add_const;
+
$add_const = sub {
my $span = shift;
my $level = shift;
@@ -75,9 +82,16 @@
};
};
+ # Next run
my $diff = $corenlp_const_root->difference($corenlp_const_noroot);
+
+ # Iterate over all roots
foreach ($diff->members) {
+
+ # Get root span based on root id
my $obj = delete $corenlp_const{$_} or next;
+
+ # Start on level 0
$add_const->($obj, 0);
};
diff --git a/lib/KorAP/Index/CoreNLP/Morpho.pm b/lib/KorAP/Index/CoreNLP/Morpho.pm
index 2d3491d..76cee0b 100644
--- a/lib/KorAP/Index/CoreNLP/Morpho.pm
+++ b/lib/KorAP/Index/CoreNLP/Morpho.pm
@@ -12,9 +12,7 @@
my $mtt = $stream->pos($token->pos);
my $content = $token->hash->{fs}->{f} or return;
-
$content = $content->{fs}->{f};
- my $found;
# syntax
if (($content->{-name} eq 'pos') && ($content->{'#text'})) {
diff --git a/lib/KorAP/Tokenizer/Range.pm b/lib/KorAP/Tokenizer/Range.pm
index 5fa2a38..a514230 100644
--- a/lib/KorAP/Tokenizer/Range.pm
+++ b/lib/KorAP/Tokenizer/Range.pm
@@ -9,16 +9,19 @@
bless \$range, $class;
};
+
+# Set integer range from x to y with z
sub set {
- my $self = shift;
- $$self->set_range(@_);
+ ${shift()}->set_range(@_);
};
+# Set gap in range from x to y with !z-1:z
sub gap {
- my $self = shift;
- $$self->set_range($_[0], $_[1], '!' . ($_[2] - 1) . ':' . $_[2]);
+ ${shift()}->set_range($_[0], $_[1],
+ '!' . ($_[2] - 1) . ':' . $_[2]);
};
+# Lookup range - ignore gaps!
sub lookup {
my $x = ${$_[0]}->lookup( $_[1] ) or return;
return if index($x, '!') == 0;
@@ -28,14 +31,24 @@
sub before {
my $self = shift;
my $offset = shift;
+
my $found = $$self->lookup( $offset );
+
unless (defined $found) {
warn 'There is no value for ', $offset;
return;
};
+ # Hit a gap,
+ # return preceding match
if ($found =~ /!(\d+):(\d+)$/) {
return $1 >= 0 ? $1 : 0;
+ }
+ else {
+ # Didn't hit a gap
+ # this however may be inaccurate
+ # but lifts recall
+ return $found - 1;
};
};
@@ -51,6 +64,9 @@
if ($found =~ /^!(\d+):(\d+)$/) {
return $2;
+ }
+ else {
+ $found + 1;
};
};
diff --git a/lib/KorAP/Tokenizer/Units.pm b/lib/KorAP/Tokenizer/Units.pm
index a073fef..a10e374 100644
--- a/lib/KorAP/Tokenizer/Units.pm
+++ b/lib/KorAP/Tokenizer/Units.pm
@@ -24,6 +24,7 @@
$span->o_end($to);
my $start = $self->match->startswith($span->o_start);
+
unless (defined $start) {
$start = $self->range->after($span->o_start) or return;
};
@@ -33,7 +34,8 @@
my $end = $self->match->endswith($span->o_end);
unless (defined $end) {
- $end = $self->range->before($span->o_end) or return;
+ $end = $self->range->before($span->o_end);
+ return unless $end;
};
# $span->p_end($end);