blob: b823dea3c4328ef7b1b7d5fafee5476303f610ed [file] [log] [blame]
Akron151676d2016-03-14 20:12:14 +01001package KorAP::XML::Annotation::XIP::Constituency;
2use KorAP::XML::Annotation::Base;
Nils Diewald7364d1f2013-11-05 19:26:35 +00003use Set::Scalar;
Nils Diewald47c3ef32014-04-30 19:13:17 +00004use Scalar::Util qw/weaken/;
Nils Diewald7364d1f2013-11-05 19:26:35 +00005
Nils Diewald47c3ef32014-04-30 19:13:17 +00006our $URI_RE = qr/^[^\#]+\#(.+?)$/;
7
Nils Diewald7364d1f2013-11-05 19:26:35 +00008sub parse {
9 my $self = shift;
10
Nils Diewald47c3ef32014-04-30 19:13:17 +000011 # Collect all spans
Nils Diewald7364d1f2013-11-05 19:26:35 +000012 my %xip_const;
Nils Diewald47c3ef32014-04-30 19:13:17 +000013
14 # Collect all roots
Nils Diewald7364d1f2013-11-05 19:26:35 +000015 my $xip_const_root = Set::Scalar->new;
Nils Diewald47c3ef32014-04-30 19:13:17 +000016
17 # Collect all non-roots
Nils Diewald7364d1f2013-11-05 19:26:35 +000018 my $xip_const_noroot = Set::Scalar->new;
19
20 # First run:
21 $$self->add_spandata(
22 foundry => 'xip',
23 layer => 'constituency',
24 encoding => 'xip',
25 cb => sub {
26 my ($stream, $span) = @_;
27
Nils Diewald47c3ef32014-04-30 19:13:17 +000028 # Collect the span
Nils Diewald7364d1f2013-11-05 19:26:35 +000029 $xip_const{$span->id} = $span;
Nils Diewald47c3ef32014-04-30 19:13:17 +000030
31 # It's probably a root
Nils Diewald7364d1f2013-11-05 19:26:35 +000032 $xip_const_root->insert($span->id);
33
34 my $rel = $span->hash->{rel} or return;
35 $rel = [$rel] unless ref $rel eq 'ARRAY';
36
Nils Diewald6d565072014-10-30 23:20:58 +000037 # Iterate over all relations
Nils Diewald7364d1f2013-11-05 19:26:35 +000038 foreach (@$rel) {
Nils Diewald47c3ef32014-04-30 19:13:17 +000039 if ($_->{-label} eq 'dominates') {
40
41 my $target = $_->{-target};
42 if (!$target && $_->{-uri} &&
43 $_->{-uri} =~ $URI_RE) {
44 $target = $1;
45 };
46
Nils Diewald6d565072014-10-30 23:20:58 +000047 # The target may not be addressable
Nils Diewald47c3ef32014-04-30 19:13:17 +000048 next unless $target;
49
50 # It's definately not a root
51 $xip_const_noroot->insert($target);
Nils Diewald7364d1f2013-11-05 19:26:35 +000052 };
53 };
54 }
55 ) or return;
56
Nils Diewald47c3ef32014-04-30 19:13:17 +000057 # Get the stream
Nils Diewald7364d1f2013-11-05 19:26:35 +000058 my $stream = $$self->stream;
59
Nils Diewald47c3ef32014-04-30 19:13:17 +000060 # Recursive tree traversal method
Nils Diewald840c9242014-10-28 19:51:26 +000061 my $add_const;
Nils Diewald6d565072014-10-30 23:20:58 +000062 $add_const = sub {
Nils Diewald47c3ef32014-04-30 19:13:17 +000063 my ($span, $level) = @_;
64
65 weaken $xip_const_root;
66 weaken $xip_const_noroot;
67
68 # Get the correct position for the span
Nils Diewald7364d1f2013-11-05 19:26:35 +000069 my $mtt = $stream->pos($span->p_start);
70
71 my $content = $span->hash;
72 my $f = $content->{fs}->{f};
Nils Diewald6d565072014-10-30 23:20:58 +000073
Nils Diewaldff6d0782014-06-10 18:26:36 +000074 unless ($f->{-name} eq 'const') {
75 warn $f->{-id} . ' is no constant';
76 return;
77 };
Nils Diewald7364d1f2013-11-05 19:26:35 +000078
Nils Diewaldff6d0782014-06-10 18:26:36 +000079 my $type = $f->{'#text'};
80
81 unless ($type) {
82 warn $f->{-id} . ' has no content';
83 return;
84 };
Nils Diewald7364d1f2013-11-05 19:26:35 +000085
86 # $type is now NPA, NP, NUM ...
87 my %term = (
Nils Diewald3cf08c72013-12-16 20:31:10 +000088 term => '<>:xip/c:' . $type,
Nils Diewald7364d1f2013-11-05 19:26:35 +000089 o_start => $span->o_start,
90 o_end => $span->o_end,
Akron5f511d22016-01-05 20:54:34 +010091 p_end => $span->p_end,
92 pti => 64
Nils Diewald7364d1f2013-11-05 19:26:35 +000093 );
94
Nils Diewald47c3ef32014-04-30 19:13:17 +000095 # Only add level payload if node != root
Nils Diewald6d565072014-10-30 23:20:58 +000096 $term{payload} ='<b>' . ($level // 0);
Nils Diewald7364d1f2013-11-05 19:26:35 +000097
98 $mtt->add(%term);
99
Nils Diewald840c9242014-10-28 19:51:26 +0000100 # my $this = __SUB__
101 my $this = $add_const;
Nils Diewald7364d1f2013-11-05 19:26:35 +0000102
Nils Diewaldff6d0782014-06-10 18:26:36 +0000103 my $rel = $content->{rel};
104
105 unless ($rel) {
Nils Diewaldf03c6802014-07-21 16:39:44 +0000106 warn $f->{-id} . ' has no relation' if $f->{-id};
Nils Diewaldff6d0782014-06-10 18:26:36 +0000107 return;
108 };
109
Nils Diewald7364d1f2013-11-05 19:26:35 +0000110 $rel = [$rel] unless ref $rel eq 'ARRAY';
111
112 foreach (@$rel) {
Nils Diewald47c3ef32014-04-30 19:13:17 +0000113 next if $_->{-label} ne 'dominates';
114 my $target;
115
116 $target = $_->{-target};
117 if (!$target && $_->{-uri} && $_->{-uri} =~ $URI_RE) {
118 $target = $1;
119 };
120
121 next unless $target;
122
123 my $subspan = delete $xip_const{$target};
Nils Diewaldf03c6802014-07-21 16:39:44 +0000124 return unless $subspan;
125 # warn "Span " . $target . " not found";
126
Nils Diewald7364d1f2013-11-05 19:26:35 +0000127 $this->($subspan, $level + 1);
128 };
129 };
130
Nils Diewald47c3ef32014-04-30 19:13:17 +0000131 # Calculate all roots
132 my $roots = $xip_const_root->difference($xip_const_noroot);
133
134 # Start tree traversal from the root
135 foreach ($roots->members) {
Nils Diewaldff6d0782014-06-10 18:26:36 +0000136
Nils Diewald7364d1f2013-11-05 19:26:35 +0000137 my $obj = delete $xip_const{$_} or next;
Nils Diewaldff6d0782014-06-10 18:26:36 +0000138
Nils Diewald7364d1f2013-11-05 19:26:35 +0000139 $add_const->($obj, 0);
140 };
141
142 return 1;
143};
144
Nils Diewald47c3ef32014-04-30 19:13:17 +0000145
146# Layer info
Nils Diewald3cf08c72013-12-16 20:31:10 +0000147sub layer_info {
Nils Diewald6d565072014-10-30 23:20:58 +0000148 ['xip/c=spans']
149};
Nils Diewald7364d1f2013-11-05 19:26:35 +0000150
1511;