blob: b34cab58d3e2885112189bf97f45e3924b3ac096 [file] [log] [blame]
package KorAP::XML::Annotation::RWK::Structure;
use KorAP::XML::Annotation::Base;
sub parse {
my $self = shift;
my (@sentences, @paragraphs);
my ($last_p, $last_o) = (0,0);
$$self->add_spandata(
foundry => 'struct',
layer => 'structure',
cb => sub {
my ($stream, $span) = @_;
# Read feature
my $feature = $span->get_hash->{fs}->{f};
my $attrs;
# Get attributes
if (ref $feature eq 'ARRAY') {
$attrs = $feature->[1]->{fs}->{f};
$attrs = ref $attrs eq 'ARRAY' ? $attrs : [$attrs];
$feature = $feature->[0];
};
# Get term label
my $name = $feature->{'#text'};
# Check only for anchors
if ($name eq 's-milestone') {
push @sentences, [ $span->get_p_start, $span->get_o_start ];
}
elsif ($name eq 'p-milestone') {
push @paragraphs, [ $span->get_p_start, $span->get_o_start ];
}
else {
$last_p = $span->get_p_start;
$last_o = $span->get_o_end;
}
}
) or return;
my ($sentences, $paragraphs) = (0, 0);
# Add final position
push @sentences, [$last_p, $last_o];
push @paragraphs, [$last_p, $last_o];
my $stream = $$self->stream;
my %hash = (
s => \@sentences,
p => \@paragraphs
);
while (my ($type, $list) = each %hash) {
my ($p_start, $o_start) = (0,0);
# Sort and unique milestones
@$list = sort {
$a->[0] <=> $b->[0]
} @$list;
# Iterate overs milestones
foreach (@$list) {
if (($_->[0] == $p_start) || ($_->[1] == $o_start)) {
next;
};
my $mtt = $stream->pos($p_start);
if (!$mtt) {
$p_start--;
if (($_->[0] == $p_start) || ($_->[1] == $o_start)) {
next;
};
$mtt = $stream->pos($p_start);
};
# Add the base sentence
my $mt = $mtt->add_by_term('<>:base/s:' . $type);
$mt->set_o_start($o_start);
$mt->set_o_end($_->[1]);
$mt->set_p_start($p_start);
$mt->set_p_end($_->[0]);
$mt->set_pti(64);
$mt->set_payload('<b>1');
if ($type eq 's') {
$sentences++;
} else {
$paragraphs++;
};
$p_start = $_->[0];
$o_start = $_->[1];
};
# Set meta information about sentence count
if ($type eq 's') {
$stream->add_meta('base/sentences', '<i>' . $sentences);
}
else {
$stream->add_meta('base/paragraphs', '<i>' . $paragraphs);
};
};
# Set meta information about sentence count
return 1;
};
sub layer_info {
[];
};
1;
__END__