blob: 5ee12596c5a6f82619b0e0cad1f14f46779950b9 [file] [log] [blame]
package KorAP::XML::Annotation::XIP::Constituency;
use KorAP::XML::Annotation::Base;
use Set::Scalar;
use feature 'current_sub';
use Scalar::Util qw/weaken/;
our $URI_RE = qr/^[^\#]+\#(.+?)$/;
sub parse {
my $self = shift;
# Collect all spans
my %xip_const = ();
# Collect all roots
my $xip_const_root = Set::Scalar->new;
# Collect all non-roots
my $xip_const_noroot = Set::Scalar->new;
# First run:
$$self->add_spandata(
foundry => 'xip',
layer => 'constituency',
encoding => 'xip',
cb => sub {
my ($stream, $span) = @_;
# Collect the span
$xip_const{$span->get_id} = $span;
# It's probably a root
$xip_const_root->insert($span->get_id);
my $rel = $span->get_hash->{rel} or return;
$rel = [$rel] unless ref $rel eq 'ARRAY';
my $target;
# Iterate over all relations
foreach (@$rel) {
next if $_->{-label} ne 'dominates';
$target = $_->{-target};
if (!$target && $_->{-uri} &&
$_->{-uri} =~ $URI_RE) {
$target = $1;
};
# The target may not be addressable
next unless $target;
# It's definately not a root
$xip_const_noroot->insert($target);
# if ($target =~ /^s2_n(?:36|58|59|60|40)$/) {
# warn 'Probably not a root ' . $target . ' but ' . $span->id;
# };
};
}
) or return;
# Get the stream
my $stream = $$self->stream;
# Recursive tree traversal method
my $add_const = sub {
my ($span, $level) = @_;
weaken $xip_const_root;
weaken $xip_const_noroot;
# Get the correct position for the span
my $mtt = $stream->pos($span->get_p_start);
my $content = $span->get_hash;
my $f = $content->{fs}->{f};
unless ($f->{-name} eq 'const') {
warn $f->{-id} . ' is no constant';
return;
};
my $type = $f->{'#text'};
unless ($type) {
warn $f->{-id} . ' has no content';
return;
};
# $type is now NPA, NP, NUM ...
$mtt->add_span('<>:xip/c:' . $type, $span)
# Only add level payload if node != root
->set_payload('<b>' . ($level // 0));
my $rel = $content->{rel};
unless ($rel) {
warn $f->{-id} . ' has no relation' if $f->{-id};
return;
};
$rel = [$rel] unless ref $rel eq 'ARRAY';
# Iterate over all relations (again ...)
foreach (@$rel) {
next if $_->{-label} ne 'dominates';
my $target = $_->{-target};
if (!$target && $_->{-uri} &&
$_->{-uri} =~ $URI_RE) {
$target = $1;
};
# if ($span->id =~ /^s2_n(?:36|58|59|60|40)$/ && $target =~ /^s2_n(?:36|58|59|60|40)$/) {
# warn 'B: ' . $span->id . ' points to ' . $target;
# };
next unless $target;
my $subspan = delete $xip_const{$target};
# warn "A-Forgot about $target: " . ($subspan ? 'yes' : 'no');
next unless $subspan;
# Recursive call
__SUB__->($subspan, $level + 1);
};
};
# Calculate all roots
my $roots = $xip_const_root->difference($xip_const_noroot);
# Start tree traversal from the root
foreach ($roots->members) {
my $obj = delete $xip_const{$_};
next unless $obj;
$add_const->($obj, 0);
};
return 1;
};
# Layer info
sub layer_info {
['xip/c=spans']
};
1;