| Akron | 151676d | 2016-03-14 20:12:14 +0100 | [diff] [blame^] | 1 | package KorAP::XML::Annotation::XIP::Morpho; |
| 2 | use KorAP::XML::Annotation::Base; |
| Nils Diewald | 7364d1f | 2013-11-05 19:26:35 +0000 | [diff] [blame] | 3 | |
| 4 | sub parse { |
| 5 | my $self = shift; |
| 6 | |
| 7 | $$self->add_tokendata( |
| 8 | foundry => 'xip', |
| 9 | layer => 'morpho', |
| 10 | encoding => 'xip', |
| 11 | cb => sub { |
| 12 | my ($stream, $token) = @_; |
| 13 | my $mtt = $stream->pos($token->pos); |
| 14 | |
| 15 | my $content = $token->hash->{fs}->{f}->{fs}->{f}; |
| 16 | |
| 17 | my $found; |
| 18 | |
| 19 | my $capital = 0; |
| 20 | foreach (@$content) { |
| 21 | # pos |
| Nils Diewald | 47c3ef3 | 2014-04-30 19:13:17 +0000 | [diff] [blame] | 22 | if (($_->{-name} eq 'pos') && |
| 23 | ($found = $_->{'#text'})) { |
| Nils Diewald | 7364d1f | 2013-11-05 19:26:35 +0000 | [diff] [blame] | 24 | $mtt->add( |
| Nils Diewald | 044c41d | 2013-11-11 21:45:09 +0000 | [diff] [blame] | 25 | term => 'xip/p:' . $found |
| Nils Diewald | 7364d1f | 2013-11-05 19:26:35 +0000 | [diff] [blame] | 26 | ); |
| 27 | |
| 28 | $capital = 1 if $found eq 'NOUN'; |
| 29 | } |
| 30 | }; |
| 31 | |
| 32 | foreach (@$content) { |
| 33 | # lemma |
| Nils Diewald | 47c3ef3 | 2014-04-30 19:13:17 +0000 | [diff] [blame] | 34 | if (($_->{-name} eq 'lemma') && |
| 35 | ($found = $_->{'#text'})) { |
| Nils Diewald | 7364d1f | 2013-11-05 19:26:35 +0000 | [diff] [blame] | 36 | |
| 37 | # Verb delimiter (aus=druecken) |
| Nils Diewald | 21a3e1a | 2014-04-28 18:48:16 +0000 | [diff] [blame] | 38 | $mtt->add(term => 'xip/l:' . $found); |
| 39 | if ($found =~ tr/=//d) { |
| 40 | $mtt->add(term => 'xip/l:' . $found); |
| 41 | }; |
| Nils Diewald | 7364d1f | 2013-11-05 19:26:35 +0000 | [diff] [blame] | 42 | |
| 43 | # Composites |
| 44 | my (@token) = split('#', $found); |
| 45 | |
| Akron | 5f511d2 | 2016-01-05 20:54:34 +0100 | [diff] [blame] | 46 | next if @token == 1; |
| 47 | |
| Nils Diewald | 7364d1f | 2013-11-05 19:26:35 +0000 | [diff] [blame] | 48 | my $full = ''; |
| 49 | foreach (@token) { |
| 50 | $full .= $_; |
| 51 | $_ =~ s{/\w+$}{}; |
| Nils Diewald | 21a3e1a | 2014-04-28 18:48:16 +0000 | [diff] [blame] | 52 | $mtt->add(term => 'xip/l:#' . $_); |
| Nils Diewald | 7364d1f | 2013-11-05 19:26:35 +0000 | [diff] [blame] | 53 | }; |
| Nils Diewald | 7364d1f | 2013-11-05 19:26:35 +0000 | [diff] [blame] | 54 | }; |
| 55 | }; |
| 56 | }) or return; |
| 57 | |
| 58 | return 1; |
| 59 | }; |
| 60 | |
| Nils Diewald | 3cf08c7 | 2013-12-16 20:31:10 +0000 | [diff] [blame] | 61 | sub layer_info { |
| Nils Diewald | 6d56507 | 2014-10-30 23:20:58 +0000 | [diff] [blame] | 62 | ['xip/l=tokens', 'xip/p=tokens'] |
| Nils Diewald | 3cf08c7 | 2013-12-16 20:31:10 +0000 | [diff] [blame] | 63 | }; |
| 64 | |
| Nils Diewald | 7364d1f | 2013-11-05 19:26:35 +0000 | [diff] [blame] | 65 | |
| 66 | 1; |