blob: fd0b9c41c8fc8880b7e7f3b9edc60b0d7976f072 [file] [log] [blame]
Akron151676d2016-03-14 20:12:14 +01001package KorAP::XML::Annotation::Mate::MorphoAttr;
2use KorAP::XML::Annotation::Base;
Nils Diewald6d565072014-10-30 23:20:58 +00003
4# This attaches morphological information as attributes to the pos
5
6sub parse {
7 my $self = shift;
8
9 $$self->add_tokendata(
10 foundry => 'mate',
11 layer => 'morpho',
12 cb => sub {
13 my ($stream, $token) = @_;
Akronfa82f042020-08-04 12:56:29 +020014 my $mtt = $stream->pos($token->get_pos);
Nils Diewald6d565072014-10-30 23:20:58 +000015
Akronfa82f042020-08-04 12:56:29 +020016 my $content = $token->get_hash->{fs}->{f};
Nils Diewald6d565072014-10-30 23:20:58 +000017
Akron126e33c2016-01-07 21:08:45 +010018 my ($found, $pos, $msd, $tui);
Nils Diewald6d565072014-10-30 23:20:58 +000019
20 my $capital = 0;
21
Akronb65e9092020-08-05 12:16:41 +020022 my $mt;
23
Nils Diewald6d565072014-10-30 23:20:58 +000024 foreach my $f (@{$content->{fs}->{f}}) {
Akron126e33c2016-01-07 21:08:45 +010025
Akronfa82f042020-08-04 12:56:29 +020026 #pos
27 if (($f->{-name} eq 'pos') && ($found = $f->{'#text'})) {
28 $pos = $found;
29 }
Nils Diewald6d565072014-10-30 23:20:58 +000030
Akronfa82f042020-08-04 12:56:29 +020031 # lemma
32 elsif (($f->{-name} eq 'lemma')
33 && ($found = $f->{'#text'})
34 && $found ne '--') {
Akronb65e9092020-08-05 12:16:41 +020035 $mtt->add_by_term('mate/l:' . $found);
Akronfa82f042020-08-04 12:56:29 +020036 }
Nils Diewald6d565072014-10-30 23:20:58 +000037
Akronfa82f042020-08-04 12:56:29 +020038 # MSD
39 elsif (($f->{-name} eq 'msd') &&
40 ($found = $f->{'#text'}) &&
41 ($found ne '_')) {
42 $msd = $found;
43 $tui = $mtt->id_counter;
44 };
Nils Diewald6d565072014-10-30 23:20:58 +000045 };
46
Akronb65e9092020-08-05 12:16:41 +020047 $mt = $mtt->add_by_term('mate/p:' . $pos);
Akron126e33c2016-01-07 21:08:45 +010048
49 # There are attributes needed
50 if ($tui) {
Akronb65e9092020-08-05 12:16:41 +020051 $mt->set_pti(128);
52 $mt->set_payload('<s>' . $tui);
Akronfa82f042020-08-04 12:56:29 +020053 };
Akron126e33c2016-01-07 21:08:45 +010054
Nils Diewald6d565072014-10-30 23:20:58 +000055 # MSD
56 if ($msd) {
Akronfa82f042020-08-04 12:56:29 +020057 foreach (split '\|', $msd) {
58 my ($x, $y) = split "=", $_;
59 # case, tense, number, mood, person, degree, gender
Akronb65e9092020-08-05 12:16:41 +020060 $mt = $mtt->add_by_term('@:' . $x . ($y ? '=' . $y : ''));
61 $mt->set_pti(16);
62 $mt->set_payload('<s>' . $tui);
Akronfa82f042020-08-04 12:56:29 +020063 };
Nils Diewald6d565072014-10-30 23:20:58 +000064 };
65 }) or return;
66
67 return 1;
68};
69
70sub layer_info {
Akronfa82f042020-08-04 12:56:29 +020071 ['mate/l=tokens', 'mate/p=tokens']
Nils Diewald6d565072014-10-30 23:20:58 +000072};
73
741;