blob: 1760d2519021247ec15825729dcf4bd7c9c6cab6 [file] [log] [blame]
Akron151676d2016-03-14 20:12:14 +01001package KorAP::XML::Annotation::XIP::Morpho;
2use KorAP::XML::Annotation::Base;
Nils Diewald7364d1f2013-11-05 19:26:35 +00003
4sub parse {
5 my $self = shift;
6
7 $$self->add_tokendata(
8 foundry => 'xip',
9 layer => 'morpho',
10 encoding => 'xip',
11 cb => sub {
12 my ($stream, $token) = @_;
13 my $mtt = $stream->pos($token->pos);
14
15 my $content = $token->hash->{fs}->{f}->{fs}->{f};
16
17 my $found;
18
19 my $capital = 0;
20 foreach (@$content) {
21 # pos
Nils Diewald47c3ef32014-04-30 19:13:17 +000022 if (($_->{-name} eq 'pos') &&
23 ($found = $_->{'#text'})) {
Nils Diewald7364d1f2013-11-05 19:26:35 +000024 $mtt->add(
Nils Diewald044c41d2013-11-11 21:45:09 +000025 term => 'xip/p:' . $found
Nils Diewald7364d1f2013-11-05 19:26:35 +000026 );
27
28 $capital = 1 if $found eq 'NOUN';
29 }
30 };
31
32 foreach (@$content) {
33 # lemma
Nils Diewald47c3ef32014-04-30 19:13:17 +000034 if (($_->{-name} eq 'lemma') &&
35 ($found = $_->{'#text'})) {
Nils Diewald7364d1f2013-11-05 19:26:35 +000036
37 # Verb delimiter (aus=druecken)
Nils Diewald21a3e1a2014-04-28 18:48:16 +000038 $mtt->add(term => 'xip/l:' . $found);
39 if ($found =~ tr/=//d) {
40 $mtt->add(term => 'xip/l:' . $found);
41 };
Nils Diewald7364d1f2013-11-05 19:26:35 +000042
43 # Composites
44 my (@token) = split('#', $found);
45
Akron5f511d22016-01-05 20:54:34 +010046 next if @token == 1;
47
Nils Diewald7364d1f2013-11-05 19:26:35 +000048 my $full = '';
49 foreach (@token) {
50 $full .= $_;
51 $_ =~ s{/\w+$}{};
Nils Diewald21a3e1a2014-04-28 18:48:16 +000052 $mtt->add(term => 'xip/l:#' . $_);
Nils Diewald7364d1f2013-11-05 19:26:35 +000053 };
Nils Diewald7364d1f2013-11-05 19:26:35 +000054 };
55 };
56 }) or return;
57
58 return 1;
59};
60
Nils Diewald3cf08c72013-12-16 20:31:10 +000061sub layer_info {
Nils Diewald6d565072014-10-30 23:20:58 +000062 ['xip/l=tokens', 'xip/p=tokens']
Nils Diewald3cf08c72013-12-16 20:31:10 +000063};
64
Nils Diewald7364d1f2013-11-05 19:26:35 +000065
661;