Fixed legacy XIP parser
Change-Id: I53789d9f6af1b6873e23f5b11c59c812bc6abf45
diff --git a/Changes b/Changes
index 267bf0c..053e517 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,6 @@
+0.40 2020-03-01
+ - Fixed XIP parser.
+
0.39 2020-02-19
- Added Talismane support.
- Added "distributor" field to I5 metadata.
diff --git a/lib/KorAP/XML/Annotation/XIP/Constituency.pm b/lib/KorAP/XML/Annotation/XIP/Constituency.pm
index b823dea..7fd05f9 100644
--- a/lib/KorAP/XML/Annotation/XIP/Constituency.pm
+++ b/lib/KorAP/XML/Annotation/XIP/Constituency.pm
@@ -9,7 +9,7 @@
my $self = shift;
# Collect all spans
- my %xip_const;
+ my %xip_const = ();
# Collect all roots
my $xip_const_root = Set::Scalar->new;
@@ -27,29 +27,34 @@
# Collect the span
$xip_const{$span->id} = $span;
+ # warn 'Remember ' . $span->id;
# It's probably a root
$xip_const_root->insert($span->id);
my $rel = $span->hash->{rel} or return;
+
$rel = [$rel] unless ref $rel eq 'ARRAY';
# Iterate over all relations
foreach (@$rel) {
- if ($_->{-label} eq 'dominates') {
+ next if $_->{-label} ne 'dominates';
- my $target = $_->{-target};
- if (!$target && $_->{-uri} &&
- $_->{-uri} =~ $URI_RE) {
- $target = $1;
- };
+ my $target = $_->{-target};
+ if (!$target && $_->{-uri} &&
+ $_->{-uri} =~ $URI_RE) {
+ $target = $1;
+ };
- # The target may not be addressable
- next unless $target;
+ # The target may not be addressable
+ next unless $target;
- # It's definately not a root
- $xip_const_noroot->insert($target);
- };
+ # It's definately not a root
+ $xip_const_noroot->insert($target);
+
+ # if ($target =~ /^s2_n(?:36|58|59|60|40)$/) {
+ # warn 'Probably not a root ' . $target . ' but ' . $span->id;
+ # };
};
}
) or return;
@@ -109,20 +114,29 @@
$rel = [$rel] unless ref $rel eq 'ARRAY';
+ # Iterate over all relations (again ...)
foreach (@$rel) {
next if $_->{-label} ne 'dominates';
- my $target;
- $target = $_->{-target};
- if (!$target && $_->{-uri} && $_->{-uri} =~ $URI_RE) {
- $target = $1;
+ my $target = $_->{-target};
+ if (!$target && $_->{-uri} &&
+ $_->{-uri} =~ $URI_RE) {
+ $target = $1;
};
+ # if ($span->id =~ /^s2_n(?:36|58|59|60|40)$/ && $target =~ /^s2_n(?:36|58|59|60|40)$/) {
+ # warn 'B: ' . $span->id . ' points to ' . $target;
+ # };
+
next unless $target;
my $subspan = delete $xip_const{$target};
- return unless $subspan;
- # warn "Span " . $target . " not found";
+ # warn "A-Forgot about $target: " . ($subspan ? 'yes' : 'no');
+
+ unless ($subspan) {
+ next;
+ };
+ # warn "Span " . $target . " not found";
$this->($subspan, $level + 1);
};
@@ -133,8 +147,13 @@
# Start tree traversal from the root
foreach ($roots->members) {
+ my $obj = delete $xip_const{$_};
- my $obj = delete $xip_const{$_} or next;
+ # warn "B-Forgot about $_: " . ($obj ? 'yes' : 'no');
+
+ unless ($obj) {
+ next;
+ };
$add_const->($obj, 0);
};
@@ -148,4 +167,5 @@
['xip/c=spans']
};
+
1;
diff --git a/lib/KorAP/XML/Krill.pm b/lib/KorAP/XML/Krill.pm
index ec30176..7ad65c5 100644
--- a/lib/KorAP/XML/Krill.pm
+++ b/lib/KorAP/XML/Krill.pm
@@ -16,7 +16,7 @@
use Data::Dumper;
use File::Spec::Functions qw/catdir catfile catpath splitdir splitpath rel2abs/;
-our $VERSION = '0.39';
+our $VERSION = '0.40';
has 'path';
has [qw/text_sigle doc_sigle corpus_sigle/];