| Akron | 4335bdc | 2018-03-08 17:15:33 +0100 | [diff] [blame] | 1 | package Krawfish::Koral::Corpus::Field::Date; |
| Akron | 4335bdc | 2018-03-08 17:15:33 +0100 | [diff] [blame] | 2 | use strict; |
| 3 | use warnings; |
| Akron | cb0bac1 | 2018-04-12 20:28:21 +0200 | [diff] [blame^] | 4 | use Krawfish::Util::Constants qw/:PREFIX :RANGE/; |
| 5 | use Krawfish::Log; |
| 6 | use Krawfish::Koral::Corpus::DateRange; |
| 7 | use Role::Tiny::With; |
| Akron | 4335bdc | 2018-03-08 17:15:33 +0100 | [diff] [blame] | 8 | |
| Akron | 6274802 | 2018-03-15 19:40:45 +0100 | [diff] [blame] | 9 | with 'Krawfish::Koral::Corpus::Field::Relational'; |
| Akron | bfa1cbd | 2018-04-10 12:43:25 +0200 | [diff] [blame] | 10 | with 'Krawfish::Koral::Corpus::Field'; |
| Akron | 4335bdc | 2018-03-08 17:15:33 +0100 | [diff] [blame] | 11 | with 'Krawfish::Koral::Corpus'; |
| 12 | |
| Akron | cb0bac1 | 2018-04-12 20:28:21 +0200 | [diff] [blame^] | 13 | # This supports range queries on special |
| 14 | # date and int fields in the dictionary. |
| 15 | |
| 16 | # The implementation for dates has to recognize |
| 17 | # that not only ranges are required to be queried, but also |
| 18 | # stored. |
| 19 | |
| 20 | # RESTRICTION: |
| 21 | # - Currently this is restricted to dates! |
| 22 | # - currently this finds all intersecting dates! |
| Akron | 320c67c | 2018-04-09 17:26:04 +0200 | [diff] [blame] | 23 | |
| 24 | # TODO: |
| 25 | # Convert the strings to RFC3339, as this is a sortable |
| 26 | # date format. |
| 27 | |
| Akron | 7403592 | 2018-03-27 20:33:13 +0200 | [diff] [blame] | 28 | use constant DEBUG => 0; |
| Akron | 4335bdc | 2018-03-08 17:15:33 +0100 | [diff] [blame] | 29 | |
| Akron | 99da869 | 2018-04-08 15:33:19 +0200 | [diff] [blame] | 30 | # TODO: |
| 31 | # A date should probably have a different prefix |
| Akron | 4335bdc | 2018-03-08 17:15:33 +0100 | [diff] [blame] | 32 | |
| Akron | e984d64 | 2018-03-14 15:05:43 +0100 | [diff] [blame] | 33 | # TODO: |
| 34 | # Compare with de.ids_mannheim.korap.util.KrillDate |
| 35 | |
| Akron | 1bbe60e | 2018-03-25 09:28:37 +0200 | [diff] [blame] | 36 | # Construct new date field object |
| Akron | 4335bdc | 2018-03-08 17:15:33 +0100 | [diff] [blame] | 37 | sub new { |
| 38 | my $class = shift; |
| 39 | bless { |
| Akron | e984d64 | 2018-03-14 15:05:43 +0100 | [diff] [blame] | 40 | key => shift, |
| 41 | parsed => undef, |
| 42 | year => undef, |
| 43 | month => undef, |
| 44 | day => undef |
| Akron | 4335bdc | 2018-03-08 17:15:33 +0100 | [diff] [blame] | 45 | }, $class; |
| 46 | }; |
| 47 | |
| Akron | 4335bdc | 2018-03-08 17:15:33 +0100 | [diff] [blame] | 48 | sub key_type { |
| 49 | 'date'; |
| 50 | }; |
| 51 | |
| Akron | e984d64 | 2018-03-14 15:05:43 +0100 | [diff] [blame] | 52 | sub year { |
| Akron | 1bbe60e | 2018-03-25 09:28:37 +0200 | [diff] [blame] | 53 | $_[0]->{year}; |
| Akron | e984d64 | 2018-03-14 15:05:43 +0100 | [diff] [blame] | 54 | }; |
| 55 | |
| 56 | sub month { |
| Akron | 1bbe60e | 2018-03-25 09:28:37 +0200 | [diff] [blame] | 57 | $_[0]->{month} // 0; |
| Akron | e984d64 | 2018-03-14 15:05:43 +0100 | [diff] [blame] | 58 | }; |
| 59 | |
| 60 | sub day { |
| Akron | 1bbe60e | 2018-03-25 09:28:37 +0200 | [diff] [blame] | 61 | $_[0]->{day} // 0; |
| Akron | e984d64 | 2018-03-14 15:05:43 +0100 | [diff] [blame] | 62 | }; |
| 63 | |
| Akron | bfa1cbd | 2018-04-10 12:43:25 +0200 | [diff] [blame] | 64 | # Compare against another field value |
| 65 | sub value_eq { |
| 66 | my ($self, $other) = @_; |
| 67 | if ($self->year == $other->year && |
| 68 | $self->month == $other->month && |
| 69 | $self->day == $other->day) { |
| 70 | return 1; |
| 71 | }; |
| 72 | return 0; |
| 73 | }; |
| 74 | |
| 75 | |
| 76 | sub value_gt { |
| Akron | e984d64 | 2018-03-14 15:05:43 +0100 | [diff] [blame] | 77 | my ($self, $other) = @_; |
| 78 | if ($self->year > $other->year) { |
| 79 | return 1; |
| 80 | } |
| 81 | elsif ($self->year < $other->year) { |
| 82 | return 0; |
| 83 | } |
| Akron | bfa1cbd | 2018-04-10 12:43:25 +0200 | [diff] [blame] | 84 | elsif (!$self->month && !$other->month) { |
| 85 | return 0; # It's equal |
| 86 | } |
| 87 | elsif ($self->month && !$other->month) { |
| Akron | e984d64 | 2018-03-14 15:05:43 +0100 | [diff] [blame] | 88 | return 1; |
| 89 | } |
| Akron | bfa1cbd | 2018-04-10 12:43:25 +0200 | [diff] [blame] | 90 | elsif (!$self->month && $other->month) { |
| 91 | return 0; |
| 92 | } |
| Akron | e984d64 | 2018-03-14 15:05:43 +0100 | [diff] [blame] | 93 | elsif ($self->month > $other->month) { |
| 94 | return 1; |
| 95 | } |
| 96 | elsif ($self->month < $other->month) { |
| 97 | return 0; |
| 98 | } |
| Akron | bfa1cbd | 2018-04-10 12:43:25 +0200 | [diff] [blame] | 99 | elsif (!$self->day && !$other->day) { |
| 100 | return 0; # It's equal |
| 101 | } |
| 102 | elsif ($self->day && !$other->day) { |
| Akron | e984d64 | 2018-03-14 15:05:43 +0100 | [diff] [blame] | 103 | return 1; |
| 104 | } |
| Akron | bfa1cbd | 2018-04-10 12:43:25 +0200 | [diff] [blame] | 105 | elsif (!$self->day && $other->day) { |
| 106 | return 0; |
| 107 | } |
| Akron | e984d64 | 2018-03-14 15:05:43 +0100 | [diff] [blame] | 108 | elsif ($self->day > $other->day) { |
| 109 | return 1; |
| 110 | }; |
| 111 | return 0; |
| 112 | }; |
| 113 | |
| 114 | |
| Akron | bfa1cbd | 2018-04-10 12:43:25 +0200 | [diff] [blame] | 115 | sub value_lt { |
| Akron | 320c67c | 2018-04-09 17:26:04 +0200 | [diff] [blame] | 116 | my ($self, $other) = @_; |
| 117 | if ($self->year < $other->year) { |
| 118 | return 1; |
| 119 | } |
| 120 | elsif ($self->year > $other->year) { |
| 121 | return 0; |
| 122 | } |
| Akron | bfa1cbd | 2018-04-10 12:43:25 +0200 | [diff] [blame] | 123 | elsif (!$self->month && !$other->month) { |
| 124 | return 0; # It's equal |
| 125 | } |
| 126 | elsif ($self->month && !$other->month) { |
| Akron | 320c67c | 2018-04-09 17:26:04 +0200 | [diff] [blame] | 127 | return 0; |
| 128 | } |
| Akron | bfa1cbd | 2018-04-10 12:43:25 +0200 | [diff] [blame] | 129 | elsif (!$self->month && $other->month) { |
| 130 | return 1; |
| 131 | } |
| Akron | 320c67c | 2018-04-09 17:26:04 +0200 | [diff] [blame] | 132 | elsif ($self->month < $other->month) { |
| 133 | return 1; |
| 134 | } |
| 135 | elsif ($self->month > $other->month) { |
| 136 | return 0; |
| 137 | } |
| Akron | bfa1cbd | 2018-04-10 12:43:25 +0200 | [diff] [blame] | 138 | elsif (!$self->day && !$other->day) { |
| 139 | return 0; # It's equal |
| 140 | } |
| 141 | elsif ($self->day && !$other->day) { |
| Akron | 320c67c | 2018-04-09 17:26:04 +0200 | [diff] [blame] | 142 | return 0; |
| 143 | } |
| Akron | bfa1cbd | 2018-04-10 12:43:25 +0200 | [diff] [blame] | 144 | elsif (!$self->day && $other->day) { |
| 145 | return 1; |
| 146 | } |
| Akron | 320c67c | 2018-04-09 17:26:04 +0200 | [diff] [blame] | 147 | elsif ($self->day < $other->day) { |
| 148 | return 1; |
| 149 | }; |
| 150 | return 0; |
| 151 | }; |
| 152 | |
| 153 | |
| 154 | |
| Akron | 3ee72ec | 2018-03-26 22:49:36 +0200 | [diff] [blame] | 155 | # Translate all terms to term ids |
| 156 | sub identify { |
| 157 | my ($self, $dict) = @_; |
| 158 | |
| 159 | if ($self->match_short ne '=') { |
| 160 | warn 'Relational matches not supported yet'; |
| 161 | return; |
| 162 | }; |
| 163 | |
| 164 | my $term = $self->to_term; |
| 165 | |
| 166 | print_log('kq_date', "Translate term $term to term_id") if DEBUG; |
| 167 | |
| 168 | my $term_id = $dict->term_id_by_term(DATE_FIELD_PREF . $term); |
| 169 | |
| 170 | return $self->builder->nowhere unless defined $term_id; |
| 171 | |
| 172 | return Krawfish::Koral::Corpus::FieldID->new($term_id); |
| 173 | }; |
| 174 | |
| 175 | |
| Akron | 1bbe60e | 2018-03-25 09:28:37 +0200 | [diff] [blame] | 176 | sub value { |
| Akron | e984d64 | 2018-03-14 15:05:43 +0100 | [diff] [blame] | 177 | my $self = shift; |
| Akron | 1bbe60e | 2018-03-25 09:28:37 +0200 | [diff] [blame] | 178 | if (@_) { |
| 179 | $self->{value} = shift; |
| 180 | if ($self->{value} =~ /^(\d{4})(?:-?(\d{2})(?:-?(\d{2}))?)?$/) { |
| 181 | $self->{year} = ($1 + 0) if $1; |
| 182 | $self->{month} = ($2 + 0) if $2; |
| 183 | $self->{day} = ($3 + 0) if $3; |
| 184 | return $self; |
| 185 | }; |
| 186 | return; |
| 187 | }; |
| 188 | return $self->{value}; |
| Akron | e984d64 | 2018-03-14 15:05:43 +0100 | [diff] [blame] | 189 | }; |
| 190 | |
| Akron | 1bbe60e | 2018-03-25 09:28:37 +0200 | [diff] [blame] | 191 | |
| Akron | cb0bac1 | 2018-04-12 20:28:21 +0200 | [diff] [blame^] | 192 | # Serialize the value string |
| 193 | # Accepts an optional granularity value |
| 194 | # with: 0 = all |
| 195 | # 1 = till month |
| 196 | # 2 = till year |
| Akron | 3c89668 | 2018-03-24 12:00:55 +0100 | [diff] [blame] | 197 | sub value_string { |
| Akron | cb0bac1 | 2018-04-12 20:28:21 +0200 | [diff] [blame^] | 198 | my ($self, $granularity) = @_; |
| 199 | $granularity //= 0; |
| Akron | 1bbe60e | 2018-03-25 09:28:37 +0200 | [diff] [blame] | 200 | my $str = ''; |
| 201 | $str .= $self->year; |
| Akron | cb0bac1 | 2018-04-12 20:28:21 +0200 | [diff] [blame^] | 202 | if ($self->month && $granularity <= 1) { |
| Akron | 3c89668 | 2018-03-24 12:00:55 +0100 | [diff] [blame] | 203 | $str .= '-' . _zero($self->month); |
| Akron | cb0bac1 | 2018-04-12 20:28:21 +0200 | [diff] [blame^] | 204 | if ($self->day && $granularity <= 0) { |
| Akron | 3c89668 | 2018-03-24 12:00:55 +0100 | [diff] [blame] | 205 | $str .= '-' . _zero($self->day); |
| 206 | }; |
| 207 | } |
| 208 | return $str; |
| 209 | }; |
| 210 | |
| Akron | cb0bac1 | 2018-04-12 20:28:21 +0200 | [diff] [blame^] | 211 | |
| Akron | 3c89668 | 2018-03-24 12:00:55 +0100 | [diff] [blame] | 212 | sub _zero { |
| 213 | if ($_[0] < 10) { |
| 214 | return '0' . $_[0] |
| 215 | }; |
| 216 | return $_[0]; |
| 217 | }; |
| 218 | |
| 219 | |
| Akron | 1bbe60e | 2018-03-25 09:28:37 +0200 | [diff] [blame] | 220 | # Stringification for sorting |
| 221 | # TODO: |
| 222 | # This may fail in case key_type and/or |
| 223 | # value may contain ':' - so this should be |
| 224 | # ensured! |
| 225 | sub to_sort_string { |
| 226 | my $self = shift; |
| 227 | return 0 if $self->is_null; |
| 228 | |
| 229 | my $str = $self->key_type . ':'; |
| 230 | $str .= $self->key . ':'; |
| 231 | $str .= ($self->value_string // '') . ':'; |
| 232 | $str .= $self->match_short; |
| 233 | return $str; |
| 234 | }; |
| 235 | |
| 236 | |
| Akron | cb0bac1 | 2018-04-12 20:28:21 +0200 | [diff] [blame^] | 237 | # Convert date to query term |
| 238 | # This will represent an intersection |
| 239 | # with all dates or dateranges intersecting |
| 240 | # with the current date |
| 241 | sub to_intersecting_terms { |
| 242 | my $self = shift; |
| 243 | my @terms; |
| 244 | |
| 245 | # Match the whole granularity subtree |
| 246 | # Either the day, the month or the year |
| 247 | # e.g. 2015], 2015-11], 2015-11-14] |
| 248 | if ($self->day) { |
| 249 | push @terms, |
| 250 | $self->builder->string($self->key)->eq( |
| 251 | $self->value_string(0) . RANGE_ALL_POST |
| 252 | ); |
| 253 | }; |
| 254 | |
| 255 | if ($self->month) { |
| 256 | push @terms, |
| 257 | $self->builder->string($self->key)->eq( |
| 258 | $self->value_string(1) . RANGE_ALL_POST |
| 259 | ); |
| 260 | }; |
| 261 | |
| 262 | push @terms, |
| 263 | $self->builder->string($self->key)->eq( |
| 264 | $self->value_string(2) . RANGE_ALL_POST |
| 265 | ); |
| 266 | |
| 267 | return @terms; |
| 268 | }; |
| 269 | |
| 270 | |
| 271 | # Spawn an intersecting date range query |
| 272 | sub intersect { |
| 273 | my $self = shift; |
| 274 | my ($first, $second) = @_; |
| 275 | |
| 276 | # Make this a DateRange query |
| 277 | if ($second) { |
| 278 | my $cb = $self->builder; |
| 279 | |
| 280 | return Krawfish::Koral::Corpus::DateRange->new( |
| 281 | $cb->date($self->key)->geq($first), |
| 282 | $cb->date($self->key)->leq($second) |
| 283 | ); |
| 284 | }; |
| 285 | |
| 286 | $self->{match} = 'intersect'; |
| 287 | $self->value(shift) or return; |
| 288 | |
| 289 | return $self; |
| 290 | }; |
| 291 | |
| 292 | |
| 293 | |
| 294 | |
| Akron | 4335bdc | 2018-03-08 17:15:33 +0100 | [diff] [blame] | 295 | 1; |