blob: 1760d2519021247ec15825729dcf4bd7c9c6cab6 [file] [log] [blame]
package KorAP::XML::Annotation::XIP::Morpho;
use KorAP::XML::Annotation::Base;
sub parse {
my $self = shift;
$$self->add_tokendata(
foundry => 'xip',
layer => 'morpho',
encoding => 'xip',
cb => sub {
my ($stream, $token) = @_;
my $mtt = $stream->pos($token->pos);
my $content = $token->hash->{fs}->{f}->{fs}->{f};
my $found;
my $capital = 0;
foreach (@$content) {
# pos
if (($_->{-name} eq 'pos') &&
($found = $_->{'#text'})) {
$mtt->add(
term => 'xip/p:' . $found
);
$capital = 1 if $found eq 'NOUN';
}
};
foreach (@$content) {
# lemma
if (($_->{-name} eq 'lemma') &&
($found = $_->{'#text'})) {
# Verb delimiter (aus=druecken)
$mtt->add(term => 'xip/l:' . $found);
if ($found =~ tr/=//d) {
$mtt->add(term => 'xip/l:' . $found);
};
# Composites
my (@token) = split('#', $found);
next if @token == 1;
my $full = '';
foreach (@token) {
$full .= $_;
$_ =~ s{/\w+$}{};
$mtt->add(term => 'xip/l:#' . $_);
};
};
};
}) or return;
return 1;
};
sub layer_info {
['xip/l=tokens', 'xip/p=tokens']
};
1;