| Akron | 35db6e3 | 2016-03-17 22:42:22 +0100 | [diff] [blame] | 1 | package KorAP::XML::Meta::Base; |
| Akron | fbf6638 | 2016-07-12 19:44:01 +0200 | [diff] [blame] | 2 | use Mojo::Log; |
| Akron | 35db6e3 | 2016-03-17 22:42:22 +0100 | [diff] [blame] | 3 | use strict; |
| 4 | use warnings; |
| 5 | |
| 6 | # Importing method |
| 7 | sub import { |
| 8 | my $class = shift; |
| 9 | my $caller = caller; |
| 10 | |
| 11 | no strict 'refs'; |
| 12 | |
| 13 | push @{"${caller}::ISA"}, $class; |
| 14 | |
| 15 | strict->import; |
| 16 | warnings->import; |
| 17 | utf8->import; |
| 18 | feature->import(':5.10'); |
| 19 | }; |
| 20 | |
| 21 | sub log { |
| Akron | fbf6638 | 2016-07-12 19:44:01 +0200 | [diff] [blame] | 22 | return $_[0]->{_log} if $_[0]->{_log}; |
| 23 | $_[0]->{_log} = Mojo::Log->new; |
| Akron | 35db6e3 | 2016-03-17 22:42:22 +0100 | [diff] [blame] | 24 | }; |
| 25 | |
| 26 | sub corpus_sigle { |
| 27 | $_[0]->{_corpus_sigle}; |
| 28 | }; |
| 29 | |
| 30 | sub doc_sigle { |
| 31 | $_[0]->{_doc_sigle}; |
| 32 | }; |
| 33 | |
| 34 | sub text_sigle { |
| 35 | $_[0]->{_text_sigle}; |
| 36 | }; |
| 37 | |
| Akron | 11c8030 | 2016-03-18 19:44:43 +0100 | [diff] [blame] | 38 | sub cache { |
| 39 | $_[0]->{_cache}; |
| 40 | } |
| 41 | |
| Akron | 35db6e3 | 2016-03-17 22:42:22 +0100 | [diff] [blame] | 42 | sub new { |
| 43 | my $class = shift; |
| 44 | my %hash = @_; |
| 45 | my $copy = {}; |
| Akron | 11c8030 | 2016-03-18 19:44:43 +0100 | [diff] [blame] | 46 | foreach (qw/log cache corpus_sigle doc_sigle text_sigle/) { |
| Akron | 35db6e3 | 2016-03-17 22:42:22 +0100 | [diff] [blame] | 47 | $copy->{'_' . $_} = $hash{$_}; |
| 48 | }; |
| 49 | |
| 50 | bless $copy, $class; |
| 51 | }; |
| 52 | |
| 53 | sub keywords { |
| 54 | my $self = shift; |
| 55 | return join(' ', @{$self->{$_[0]} // []}); |
| 56 | }; |
| 57 | |
| Akron | 11c8030 | 2016-03-18 19:44:43 +0100 | [diff] [blame] | 58 | # Check if cached |
| Akron | fbf6638 | 2016-07-12 19:44:01 +0200 | [diff] [blame] | 59 | # Cache differently! |
| Akron | 11c8030 | 2016-03-18 19:44:43 +0100 | [diff] [blame] | 60 | sub is_cached { |
| 61 | my ($self, $type) = @_; |
| 62 | |
| 63 | return if $type eq 'text'; |
| 64 | return unless $self->cache; |
| 65 | |
| 66 | my $value; |
| 67 | my $cache = $self->cache; |
| 68 | if ($type eq 'corpus') { |
| 69 | $value = $cache->get($self->corpus_sigle); |
| 70 | } |
| 71 | elsif ($type eq 'doc') { |
| 72 | $value = $cache->get($self->doc_sigle); |
| 73 | }; |
| 74 | |
| 75 | if ($value) { |
| 76 | foreach (grep {index($_, '_') != 0 } keys %$value) { |
| 77 | $self->{$_} = $value->{$_}; |
| 78 | }; |
| 79 | return 1; |
| 80 | }; |
| 81 | |
| 82 | return; |
| 83 | }; |
| 84 | |
| Akron | fbf6638 | 2016-07-12 19:44:01 +0200 | [diff] [blame] | 85 | sub to_hash { |
| 86 | my $self = shift; |
| 87 | my %new; |
| 88 | foreach ($self->keys) { |
| 89 | $new{$_} = $self->{$_}; |
| 90 | }; |
| 91 | if ($self->corpus_sigle) { |
| 92 | $new{corpus_sigle} = $self->corpus_sigle; |
| 93 | if ($self->doc_sigle) { |
| 94 | $new{doc_sigle} = $self->doc_sigle; |
| 95 | if ($self->text_sigle) { |
| 96 | $new{text_sigle} = $self->text_sigle; |
| 97 | } |
| 98 | } |
| 99 | }; |
| 100 | return \%new; |
| 101 | }; |
| 102 | |
| Akron | 11c8030 | 2016-03-18 19:44:43 +0100 | [diff] [blame] | 103 | sub keys { |
| 104 | my $self = shift; |
| 105 | return grep {index($_, '_') != 0 } keys %$self; |
| 106 | }; |
| 107 | |
| 108 | sub do_cache { |
| 109 | my ($self, $type) = @_; |
| 110 | |
| 111 | return if $type eq 'text'; |
| 112 | return unless $self->cache; |
| 113 | |
| 114 | my %value; |
| 115 | foreach ($self->keys) { |
| 116 | $value{$_} = $self->{$_}; |
| 117 | }; |
| 118 | |
| 119 | my $cache = $self->cache; |
| 120 | |
| 121 | if ($type eq 'corpus') { |
| 122 | $cache->set($self->corpus_sigle, \%value); |
| 123 | return 1; |
| 124 | } |
| 125 | elsif ($type eq 'doc') { |
| 126 | $cache->set($self->doc_sigle, \%value); |
| 127 | return 1; |
| 128 | }; |
| 129 | |
| 130 | return 0; |
| 131 | }; |
| Akron | 35db6e3 | 2016-03-17 22:42:22 +0100 | [diff] [blame] | 132 | |
| Akron | 5eb3aa0 | 2019-01-25 18:30:47 +0100 | [diff] [blame] | 133 | |
| 134 | # Generate koral_fields |
| 135 | sub to_koral_fields { |
| 136 | my $self = shift; |
| 137 | my @fields = (); |
| 138 | |
| 139 | if ($self->corpus_sigle) { |
| 140 | push @fields, _string_field('corpusSigle', $self->corpus_sigle); |
| 141 | if ($self->doc_sigle) { |
| 142 | push @fields, _string_field('docSigle', $self->doc_sigle); |
| 143 | if ($self->text_sigle) { |
| 144 | push @fields, _string_field('textSigle', $self->text_sigle); |
| 145 | } |
| 146 | } |
| 147 | }; |
| 148 | |
| 149 | # Iterate over all keys |
| 150 | foreach (sort {$a cmp $b } $self->keys) { |
| 151 | if (index($_, 'D_') == 0) { |
| 152 | push @fields, _date_field(_k($_), $self->{$_}); |
| 153 | } |
| 154 | elsif (index($_, 'S_') == 0) { |
| 155 | push @fields, _string_field(_k($_), $self->{$_}); |
| 156 | } |
| 157 | elsif (index($_, 'T_') == 0) { |
| 158 | push @fields, _text_field(_k($_), $self->{$_}); |
| 159 | } |
| 160 | # elsif (index($_, 'I_') == 0) { |
| 161 | # _int_field(_k($_), $self->{$_}); |
| 162 | # } |
| 163 | elsif (index($_, 'A_') == 0) { |
| 164 | push @fields, _attachement_field(_k($_), $self->{$_}); |
| 165 | } |
| 166 | elsif (index($_, 'K_') == 0) { |
| 167 | push @fields, _keywords_field(_k($_), $self->{$_}); |
| 168 | } |
| 169 | else { |
| 170 | warn 'Unknown field type: ' . $_; |
| 171 | } |
| 172 | }; |
| 173 | |
| 174 | return \@fields; |
| 175 | }; |
| 176 | |
| 177 | sub _k { |
| 178 | my $x = substr($_[0], 2); |
| 179 | $x =~ s/_(\w)/\U$1\E/g; |
| 180 | $x =~ s/id$/ID/gi; |
| 181 | return $x; |
| 182 | }; |
| 183 | |
| 184 | |
| 185 | sub _string_field { |
| 186 | return { |
| 187 | '@type' => 'koral:field', |
| 188 | type => 'type:string', |
| 189 | key => $_[0], |
| 190 | value => $_[1] |
| 191 | }; |
| 192 | }; |
| 193 | |
| 194 | sub _text_field { |
| 195 | return { |
| 196 | '@type' => 'koral:field', |
| 197 | type => 'type:text', |
| 198 | key => $_[0], |
| 199 | value => $_[1] |
| 200 | }; |
| 201 | }; |
| 202 | |
| 203 | sub _date_field { |
| 204 | my ($key, $value) = @_; |
| 205 | my $new_value; |
| 206 | if ($value =~ /^(\d\d\d\d)(\d\d)(\d\d)$/) { |
| 207 | $new_value = "$1"; |
| 208 | if ($2 ne '00') { |
| 209 | $new_value .= "-$2"; |
| 210 | if ($3 ne '00') { |
| 211 | $new_value .= "-$3"; |
| 212 | }; |
| 213 | }; |
| 214 | }; |
| 215 | return { |
| 216 | '@type' => 'koral:field', |
| 217 | type => 'type:date', |
| 218 | key => $key, |
| 219 | value => $new_value |
| 220 | }; |
| 221 | }; |
| 222 | |
| 223 | sub _keywords_field { |
| 224 | return { |
| 225 | '@type' => 'koral:field', |
| 226 | type => 'type:keywords', |
| 227 | key => $_[0], |
| 228 | value => $_[1] |
| 229 | }; |
| 230 | }; |
| 231 | |
| 232 | sub _attachement_field { |
| Akron | 6bf3cc9 | 2019-02-07 12:11:20 +0100 | [diff] [blame^] | 233 | my $value = $_[1]; |
| 234 | if (index($value, 'data:') != 0) { |
| 235 | $value = 'data:,' . $value; |
| 236 | }; |
| Akron | 5eb3aa0 | 2019-01-25 18:30:47 +0100 | [diff] [blame] | 237 | return { |
| 238 | '@type' => 'koral:field', |
| 239 | type => 'type:attachement', |
| 240 | key => $_[0], |
| Akron | 6bf3cc9 | 2019-02-07 12:11:20 +0100 | [diff] [blame^] | 241 | value => $value |
| Akron | 5eb3aa0 | 2019-01-25 18:30:47 +0100 | [diff] [blame] | 242 | }; |
| 243 | }; |
| 244 | |
| Akron | 35db6e3 | 2016-03-17 22:42:22 +0100 | [diff] [blame] | 245 | 1; |