| Akron | 2532f1b | 2023-05-15 13:41:24 +0200 | [diff] [blame] | 1 | package KorAP::XML::Meta::ICC; |
| 2 | use KorAP::XML::Meta::Base; |
| 3 | use KorAP::XML::Meta::I5; |
| 4 | |
| 5 | my $squish = \&KorAP::XML::Meta::I5::_squish; |
| 6 | |
| 7 | sub parse_date { |
| 8 | my $temp = shift; |
| 9 | if ($temp =~ m/^(\d\d\d\d)(?:-(\d\d)(?:-(\d\d))?)?$/) { |
| 10 | my $year = $1; |
| 11 | my $month = $2 // 0; |
| 12 | my $day = $3 // 0; |
| 13 | |
| 14 | my $date = $year ? ($year < 100 ? '20' . $year : $year) : '0000'; |
| 15 | $date .= length($month) == 1 ? '0' . $month : $month; |
| 16 | $date .= length($day) == 1 ? '0' . $day : $day; |
| 17 | return $date; |
| 18 | }; |
| 19 | }; |
| 20 | |
| 21 | sub parse { |
| 22 | my ($self, $dom, $type) = @_; |
| 23 | |
| 24 | # Parse using the parent I% class |
| 25 | unless (KorAP::XML::Meta::I5::parse($self, $dom, $type)) { |
| 26 | return 0; |
| 27 | }; |
| 28 | |
| 29 | my $temp; |
| 30 | |
| 31 | # Add metadata on the text level |
| 32 | return if $type ne 'text'; |
| 33 | |
| 34 | if (my $bibl = $dom->at('fileDesc > sourceDesc > bibl')) { |
| 35 | if ($temp = $bibl->at('> author')) { |
| 36 | $temp = $squish->($temp->all_text); |
| 37 | $self->{T_author} = $temp if $temp; |
| 38 | }; |
| 39 | |
| 40 | if ($temp = $bibl->at('> title')) { |
| 41 | $temp = $squish->($temp->all_text); |
| 42 | $self->{T_title} = $temp if $temp; |
| 43 | }; |
| 44 | |
| 45 | if ($temp = $bibl->at('> pubPlace')) { |
| 46 | $temp = $squish->($temp->all_text); |
| 47 | $self->{S_pub_place} = $temp if $temp; |
| 48 | }; |
| 49 | |
| 50 | if ($temp = $bibl->at('> date')) { |
| 51 | $temp = $squish->($temp->all_text); |
| 52 | |
| 53 | my $date = parse_date($temp); |
| 54 | |
| 55 | $self->{D_pub_date} = $date if $date; |
| 56 | }; |
| 57 | |
| 58 | if ($temp = $bibl->at('> publisher')) { |
| 59 | $temp = $squish->($temp->all_text); |
| 60 | $self->{A_publisher} = $temp if $temp; |
| 61 | }; |
| 62 | |
| 63 | if ($temp = $bibl->at('> availability > licence')) { |
| 64 | $temp = $squish->($temp->all_text); |
| 65 | $self->{S_license} = $temp if $temp; |
| 66 | }; |
| 67 | }; |
| 68 | |
| 69 | if ($temp = $dom->at('fileDesc > publicationStmt > distributor > note:nth-child(2)')) { |
| 70 | $temp = $squish->($temp->all_text); |
| 71 | $self->{A_source} = $temp if $temp; |
| 72 | }; |
| 73 | |
| 74 | if ($temp = $dom->at('profileDesc > textClass > classCode[scheme=ICC], fileDesc > titleStmt > domain')) { |
| 75 | $temp = $squish->($temp->all_text); |
| 76 | $self->{S_iccGenre} = $temp if $temp; |
| 77 | }; |
| 78 | |
| 79 | if (my $person = $dom->at('profileDesc > particDesc > person')) { |
| 80 | if ($temp = $person->at('> birth > date')) { |
| 81 | $temp = $squish->($temp->all_text); |
| 82 | |
| 83 | my $date = parse_date($temp); |
| 84 | |
| 85 | $self->{D_author_birth_date} = $date if $date; |
| 86 | }; |
| 87 | |
| 88 | if ($temp = $person->at('> occupation')) { |
| 89 | $temp = $squish->($temp->all_text); |
| 90 | $self->{S_author_occupation} = $temp if $temp; |
| 91 | }; |
| 92 | |
| 93 | if ($temp = $person->at('> sex')) { |
| 94 | $temp = $squish->($temp->all_text); |
| 95 | $self->{S_author_sex} = $temp if $temp; |
| 96 | }; |
| 97 | }; |
| 98 | }; |
| 99 | |
| 100 | 1; |