blob: 6007356dee48ed13fa26250a7e34b8e5ce948d7e [file] [log] [blame]
package KorAP::XML::Annotation::Connexor::Morpho;
use KorAP::XML::Annotation::Base;
our %MAP = (
'v_ind' => 'mood',
'v_imp' => 'mood',
'v_sub' => 'mood',
'v_fin' => 'inf',
'v_pcp' => 'inf',
'v_pres' => 'tense',
'v_past' => 'tense',
'v_prog' => 'tense',
'v_perf' => 'tense',
'n_abbr' => 'type',
'n_prop' => 'type',
'n_pl' => 'type',
'a_cmp' => 'degree',
'a_sub' => 'degree',
'num_ord' => 'type'
);
sub parse {
my $self = shift;
$$self->add_tokendata(
foundry => 'connexor',
layer => 'morpho',
cb => sub {
my ($stream, $token) = @_;
my $mtt = $stream->pos($token->get_pos);
my $content = $token->get_hash->{fs}->{f};
my $found;
my $features = $content->{fs}->{f};
for my $f (@$features) {
# Lemma
if (($f->{-name} eq 'lemma') && ($found = $f->{'#text'})) {
if (index($found, "\N{U+00a0}") >= 0) {
foreach (split(/\x{00A0}/, $found)) {
$mtt->add_by_term('cnx/l:' . $_);
}
}
else {
$mtt->add_by_term('cnx/l:' . $found);
};
}
# POS
elsif (($f->{-name} eq 'pos') && ($found = $f->{'#text'})) {
$mtt->add_by_term('cnx/p:' . $found);
}
# MSD
# This could follow
# http://www.ids-mannheim.de/cosmas2/projekt/referenz/connexor/morph.html
elsif (($f->{-name} eq 'msd') && ($found = $f->{'#text'})) {
foreach (split(':', $found)) {
$mtt->add_by_term('cnx/m:' . $_);
};
};
};
}
) or return;
return 1;
};
sub layer_info {
['cnx/l=tokens', 'cnx/p=tokens', 'cnx/m=tokens'];
};
1;