| Akron | 151676d | 2016-03-14 20:12:14 +0100 | [diff] [blame] | 1 | package KorAP::XML::Annotation::Mate::MorphoAttr; |
| 2 | use KorAP::XML::Annotation::Base; |
| Nils Diewald | 6d56507 | 2014-10-30 23:20:58 +0000 | [diff] [blame] | 3 | |
| 4 | # This attaches morphological information as attributes to the pos |
| 5 | |
| 6 | sub parse { |
| 7 | my $self = shift; |
| 8 | |
| 9 | $$self->add_tokendata( |
| 10 | foundry => 'mate', |
| 11 | layer => 'morpho', |
| 12 | cb => sub { |
| 13 | my ($stream, $token) = @_; |
| Akron | fa82f04 | 2020-08-04 12:56:29 +0200 | [diff] [blame] | 14 | my $mtt = $stream->pos($token->get_pos); |
| Nils Diewald | 6d56507 | 2014-10-30 23:20:58 +0000 | [diff] [blame] | 15 | |
| Akron | fa82f04 | 2020-08-04 12:56:29 +0200 | [diff] [blame] | 16 | my $content = $token->get_hash->{fs}->{f}; |
| Nils Diewald | 6d56507 | 2014-10-30 23:20:58 +0000 | [diff] [blame] | 17 | |
| Akron | 126e33c | 2016-01-07 21:08:45 +0100 | [diff] [blame] | 18 | my ($found, $pos, $msd, $tui); |
| Nils Diewald | 6d56507 | 2014-10-30 23:20:58 +0000 | [diff] [blame] | 19 | |
| 20 | my $capital = 0; |
| 21 | |
| Akron | b65e909 | 2020-08-05 12:16:41 +0200 | [diff] [blame^] | 22 | my $mt; |
| 23 | |
| Nils Diewald | 6d56507 | 2014-10-30 23:20:58 +0000 | [diff] [blame] | 24 | foreach my $f (@{$content->{fs}->{f}}) { |
| Akron | 126e33c | 2016-01-07 21:08:45 +0100 | [diff] [blame] | 25 | |
| Akron | fa82f04 | 2020-08-04 12:56:29 +0200 | [diff] [blame] | 26 | #pos |
| 27 | if (($f->{-name} eq 'pos') && ($found = $f->{'#text'})) { |
| 28 | $pos = $found; |
| 29 | } |
| Nils Diewald | 6d56507 | 2014-10-30 23:20:58 +0000 | [diff] [blame] | 30 | |
| Akron | fa82f04 | 2020-08-04 12:56:29 +0200 | [diff] [blame] | 31 | # lemma |
| 32 | elsif (($f->{-name} eq 'lemma') |
| 33 | && ($found = $f->{'#text'}) |
| 34 | && $found ne '--') { |
| Akron | b65e909 | 2020-08-05 12:16:41 +0200 | [diff] [blame^] | 35 | $mtt->add_by_term('mate/l:' . $found); |
| Akron | fa82f04 | 2020-08-04 12:56:29 +0200 | [diff] [blame] | 36 | } |
| Nils Diewald | 6d56507 | 2014-10-30 23:20:58 +0000 | [diff] [blame] | 37 | |
| Akron | fa82f04 | 2020-08-04 12:56:29 +0200 | [diff] [blame] | 38 | # MSD |
| 39 | elsif (($f->{-name} eq 'msd') && |
| 40 | ($found = $f->{'#text'}) && |
| 41 | ($found ne '_')) { |
| 42 | $msd = $found; |
| 43 | $tui = $mtt->id_counter; |
| 44 | }; |
| Nils Diewald | 6d56507 | 2014-10-30 23:20:58 +0000 | [diff] [blame] | 45 | }; |
| 46 | |
| Akron | b65e909 | 2020-08-05 12:16:41 +0200 | [diff] [blame^] | 47 | $mt = $mtt->add_by_term('mate/p:' . $pos); |
| Akron | 126e33c | 2016-01-07 21:08:45 +0100 | [diff] [blame] | 48 | |
| 49 | # There are attributes needed |
| 50 | if ($tui) { |
| Akron | b65e909 | 2020-08-05 12:16:41 +0200 | [diff] [blame^] | 51 | $mt->set_pti(128); |
| 52 | $mt->set_payload('<s>' . $tui); |
| Akron | fa82f04 | 2020-08-04 12:56:29 +0200 | [diff] [blame] | 53 | }; |
| Akron | 126e33c | 2016-01-07 21:08:45 +0100 | [diff] [blame] | 54 | |
| Nils Diewald | 6d56507 | 2014-10-30 23:20:58 +0000 | [diff] [blame] | 55 | # MSD |
| 56 | if ($msd) { |
| Akron | fa82f04 | 2020-08-04 12:56:29 +0200 | [diff] [blame] | 57 | foreach (split '\|', $msd) { |
| 58 | my ($x, $y) = split "=", $_; |
| 59 | # case, tense, number, mood, person, degree, gender |
| Akron | b65e909 | 2020-08-05 12:16:41 +0200 | [diff] [blame^] | 60 | $mt = $mtt->add_by_term('@:' . $x . ($y ? '=' . $y : '')); |
| 61 | $mt->set_pti(16); |
| 62 | $mt->set_payload('<s>' . $tui); |
| Akron | fa82f04 | 2020-08-04 12:56:29 +0200 | [diff] [blame] | 63 | }; |
| Nils Diewald | 6d56507 | 2014-10-30 23:20:58 +0000 | [diff] [blame] | 64 | }; |
| 65 | }) or return; |
| 66 | |
| 67 | return 1; |
| 68 | }; |
| 69 | |
| 70 | sub layer_info { |
| Akron | fa82f04 | 2020-08-04 12:56:29 +0200 | [diff] [blame] | 71 | ['mate/l=tokens', 'mate/p=tokens'] |
| Nils Diewald | 6d56507 | 2014-10-30 23:20:58 +0000 | [diff] [blame] | 72 | }; |
| 73 | |
| 74 | 1; |