blob: de9ad447254655be6025d595ec70d1e3b42dca15 [file] [log] [blame]
Akron35db6e32016-03-17 22:42:22 +01001package KorAP::XML::Meta::Base;
Akronfbf66382016-07-12 19:44:01 +02002use Mojo::Log;
Akron35db6e32016-03-17 22:42:22 +01003use strict;
4use warnings;
5
6# Importing method
7sub import {
8 my $class = shift;
9 my $caller = caller;
10
11 no strict 'refs';
12
13 push @{"${caller}::ISA"}, $class;
14
15 strict->import;
16 warnings->import;
17 utf8->import;
18 feature->import(':5.10');
19};
20
21sub log {
Akronfbf66382016-07-12 19:44:01 +020022 return $_[0]->{_log} if $_[0]->{_log};
23 $_[0]->{_log} = Mojo::Log->new;
Akron35db6e32016-03-17 22:42:22 +010024};
25
26sub corpus_sigle {
27 $_[0]->{_corpus_sigle};
28};
29
30sub doc_sigle {
31 $_[0]->{_doc_sigle};
32};
33
34sub text_sigle {
35 $_[0]->{_text_sigle};
36};
37
Akron11c80302016-03-18 19:44:43 +010038sub cache {
39 $_[0]->{_cache};
40}
41
Akron35db6e32016-03-17 22:42:22 +010042sub new {
43 my $class = shift;
44 my %hash = @_;
45 my $copy = {};
Akron11c80302016-03-18 19:44:43 +010046 foreach (qw/log cache corpus_sigle doc_sigle text_sigle/) {
Akron35db6e32016-03-17 22:42:22 +010047 $copy->{'_' . $_} = $hash{$_};
48 };
49
50 bless $copy, $class;
51};
52
53sub keywords {
54 my $self = shift;
55 return join(' ', @{$self->{$_[0]} // []});
56};
57
Akron11c80302016-03-18 19:44:43 +010058# Check if cached
Akronfbf66382016-07-12 19:44:01 +020059# Cache differently!
Akron11c80302016-03-18 19:44:43 +010060sub is_cached {
61 my ($self, $type) = @_;
62
63 return if $type eq 'text';
64 return unless $self->cache;
65
66 my $value;
67 my $cache = $self->cache;
68 if ($type eq 'corpus') {
69 $value = $cache->get($self->corpus_sigle);
70 }
71 elsif ($type eq 'doc') {
72 $value = $cache->get($self->doc_sigle);
73 };
74
75 if ($value) {
76 foreach (grep {index($_, '_') != 0 } keys %$value) {
77 $self->{$_} = $value->{$_};
78 };
79 return 1;
80 };
81
82 return;
83};
84
Akronfbf66382016-07-12 19:44:01 +020085sub to_hash {
86 my $self = shift;
87 my %new;
88 foreach ($self->keys) {
89 $new{$_} = $self->{$_};
90 };
91 if ($self->corpus_sigle) {
92 $new{corpus_sigle} = $self->corpus_sigle;
93 if ($self->doc_sigle) {
94 $new{doc_sigle} = $self->doc_sigle;
95 if ($self->text_sigle) {
96 $new{text_sigle} = $self->text_sigle;
97 }
98 }
99 };
100 return \%new;
101};
102
Akron11c80302016-03-18 19:44:43 +0100103sub keys {
104 my $self = shift;
105 return grep {index($_, '_') != 0 } keys %$self;
106};
107
108sub do_cache {
109 my ($self, $type) = @_;
110
111 return if $type eq 'text';
112 return unless $self->cache;
113
114 my %value;
115 foreach ($self->keys) {
116 $value{$_} = $self->{$_};
117 };
118
119 my $cache = $self->cache;
120
121 if ($type eq 'corpus') {
122 $cache->set($self->corpus_sigle, \%value);
123 return 1;
124 }
125 elsif ($type eq 'doc') {
126 $cache->set($self->doc_sigle, \%value);
127 return 1;
128 };
129
130 return 0;
131};
Akron35db6e32016-03-17 22:42:22 +0100132
Akron5eb3aa02019-01-25 18:30:47 +0100133
134# Generate koral_fields
135sub to_koral_fields {
136 my $self = shift;
137 my @fields = ();
138
139 if ($self->corpus_sigle) {
140 push @fields, _string_field('corpusSigle', $self->corpus_sigle);
141 if ($self->doc_sigle) {
142 push @fields, _string_field('docSigle', $self->doc_sigle);
143 if ($self->text_sigle) {
144 push @fields, _string_field('textSigle', $self->text_sigle);
145 }
146 }
147 };
148
149 # Iterate over all keys
150 foreach (sort {$a cmp $b } $self->keys) {
151 if (index($_, 'D_') == 0) {
152 push @fields, _date_field(_k($_), $self->{$_});
153 }
154 elsif (index($_, 'S_') == 0) {
155 push @fields, _string_field(_k($_), $self->{$_});
156 }
157 elsif (index($_, 'T_') == 0) {
158 push @fields, _text_field(_k($_), $self->{$_});
159 }
160 # elsif (index($_, 'I_') == 0) {
161 # _int_field(_k($_), $self->{$_});
162 # }
163 elsif (index($_, 'A_') == 0) {
164 push @fields, _attachement_field(_k($_), $self->{$_});
165 }
166 elsif (index($_, 'K_') == 0) {
167 push @fields, _keywords_field(_k($_), $self->{$_});
168 }
169 else {
170 warn 'Unknown field type: ' . $_;
171 }
172 };
173
174 return \@fields;
175};
176
177sub _k {
178 my $x = substr($_[0], 2);
179 $x =~ s/_(\w)/\U$1\E/g;
180 $x =~ s/id$/ID/gi;
181 return $x;
182};
183
184
185sub _string_field {
186 return {
187 '@type' => 'koral:field',
188 type => 'type:string',
189 key => $_[0],
190 value => $_[1]
191 };
192};
193
194sub _text_field {
195 return {
196 '@type' => 'koral:field',
197 type => 'type:text',
198 key => $_[0],
199 value => $_[1]
200 };
201};
202
203sub _date_field {
204 my ($key, $value) = @_;
205 my $new_value;
206 if ($value =~ /^(\d\d\d\d)(\d\d)(\d\d)$/) {
207 $new_value = "$1";
208 if ($2 ne '00') {
209 $new_value .= "-$2";
210 if ($3 ne '00') {
211 $new_value .= "-$3";
212 };
213 };
214 };
215 return {
216 '@type' => 'koral:field',
217 type => 'type:date',
218 key => $key,
219 value => $new_value
220 };
221};
222
223sub _keywords_field {
224 return {
225 '@type' => 'koral:field',
226 type => 'type:keywords',
227 key => $_[0],
228 value => $_[1]
229 };
230};
231
232sub _attachement_field {
Akron6bf3cc92019-02-07 12:11:20 +0100233 my $value = $_[1];
234 if (index($value, 'data:') != 0) {
235 $value = 'data:,' . $value;
236 };
Akron5eb3aa02019-01-25 18:30:47 +0100237 return {
238 '@type' => 'koral:field',
239 type => 'type:attachement',
240 key => $_[0],
Akron6bf3cc92019-02-07 12:11:20 +0100241 value => $value
Akron5eb3aa02019-01-25 18:30:47 +0100242 };
243};
244
Akron35db6e32016-03-17 22:42:22 +01002451;