| Akron | 0a0e924 | 2016-10-28 14:42:29 +0200 | [diff] [blame] | 1 | package Krawfish::Koral::Query; |
| Akron | 6621e11 | 2016-11-05 17:21:39 +0100 | [diff] [blame] | 2 | use parent 'Krawfish::Info'; |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 3 | # TODO: Use the same parent as Koral::Corpus |
| Akron | 4763ea6 | 2016-11-02 19:36:18 +0100 | [diff] [blame] | 4 | use Krawfish::Koral::Query::Builder; |
| Akron | 944091b | 2016-11-24 16:40:58 +0100 | [diff] [blame] | 5 | use Krawfish::Koral::Query::Importer; |
| Akron | 965f5d9 | 2017-01-20 18:38:08 +0100 | [diff] [blame] | 6 | use Mojo::Util qw/md5_sum/; |
| Akron | 0a0e924 | 2016-10-28 14:42:29 +0200 | [diff] [blame] | 7 | use warnings; |
| Akron | 944091b | 2016-11-24 16:40:58 +0100 | [diff] [blame] | 8 | use strict; |
| Akron | 0a0e924 | 2016-10-28 14:42:29 +0200 | [diff] [blame] | 9 | |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 10 | # TODO: |
| 11 | # - rename 'nothing' to 'nowhere' |
| 12 | # - rename 'any' to 'anywhere' |
| Akron | 8231ca7 | 2017-06-16 16:08:32 +0200 | [diff] [blame] | 13 | # - extended_* may be queried |
| 14 | # automatically without parameter |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 15 | |
| Akron | 33f1dcb | 2016-10-29 17:27:23 +0200 | [diff] [blame] | 16 | sub new { |
| 17 | my $class = shift; |
| Akron | 944091b | 2016-11-24 16:40:58 +0100 | [diff] [blame] | 18 | my $self = bless { |
| Akron | 4763ea6 | 2016-11-02 19:36:18 +0100 | [diff] [blame] | 19 | any => 0, |
| 20 | optional => 0, |
| 21 | null => 0, |
| 22 | negative => 0, |
| 23 | extended => 0, |
| Akron | ddf077a | 2016-11-05 15:00:00 +0100 | [diff] [blame] | 24 | extended_left => 0, |
| Akron | 6621e11 | 2016-11-05 17:21:39 +0100 | [diff] [blame] | 25 | extended_right => 0 |
| Akron | 4763ea6 | 2016-11-02 19:36:18 +0100 | [diff] [blame] | 26 | }, $class; |
| Akron | 944091b | 2016-11-24 16:40:58 +0100 | [diff] [blame] | 27 | |
| 28 | if ($_[0]) { |
| 29 | return $self->from_koral(shift); |
| 30 | }; |
| 31 | |
| 32 | $self; |
| Akron | 33f1dcb | 2016-10-29 17:27:23 +0200 | [diff] [blame] | 33 | }; |
| 34 | |
| Akron | 4763ea6 | 2016-11-02 19:36:18 +0100 | [diff] [blame] | 35 | ######################################### |
| 36 | # Query Planning methods and attributes # |
| 37 | ######################################### |
| Akron | a211bf5 | 2016-10-29 18:03:29 +0200 | [diff] [blame] | 38 | |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 39 | sub plan_for_new { |
| 40 | my ($self, $index) = @_; |
| 41 | $self |
| 42 | ->normalize |
| 43 | ->finalize |
| 44 | ->refer |
| 45 | ->inflate($index->dict) |
| 46 | ->cache |
| 47 | ->optimize($index); |
| 48 | }; |
| 49 | |
| 50 | # Normalize the query |
| 51 | sub normalize; |
| 52 | |
| 53 | |
| 54 | # Refer to common subqueries |
| 55 | sub refer { |
| 56 | $_[0]; |
| 57 | }; |
| 58 | |
| 59 | |
| 60 | # Expand regular expressions ... |
| 61 | sub inflate; |
| 62 | |
| 63 | |
| 64 | # Check for cached subqueries |
| 65 | sub cache { |
| 66 | $_[0]; |
| 67 | }; |
| 68 | |
| 69 | |
| 70 | # Optimize for an index |
| 71 | sub optimize; |
| 72 | |
| 73 | |
| Akron | c552937 | 2017-06-21 15:56:18 +0200 | [diff] [blame] | 74 | # This is the class to be overwritten |
| 75 | # by subclasses |
| 76 | sub _finalize { |
| 77 | $_[0]; |
| 78 | }; |
| 79 | |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 80 | sub finalize { |
| 81 | my $self = shift; |
| 82 | |
| 83 | my $query = $self; |
| 84 | |
| Akron | c552937 | 2017-06-21 15:56:18 +0200 | [diff] [blame] | 85 | if ($query->is_any || $query->is_null) { |
| 86 | $self->error(780, "This query matches everywhere"); |
| 87 | return; |
| 88 | }; |
| 89 | |
| 90 | if ($query->is_nothing) { |
| 91 | return $query->builder->nothing; |
| 92 | }; |
| 93 | |
| 94 | if ($query->is_negative) { |
| 95 | $query->warn(782, 'Exclusivity of query is ignored'); |
| 96 | $query->is_negative(0); |
| 97 | }; |
| 98 | |
| 99 | if ($query->is_optional) { |
| 100 | $query->warn(781, "Optionality of query is ignored"); |
| 101 | $query->is_optional(0); |
| 102 | }; |
| 103 | |
| 104 | $query = $query->_finalize; |
| 105 | |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 106 | # There is a possible 'any' extension, |
| 107 | # that may exceed the text |
| 108 | if ($query->is_extended_right) { |
| 109 | return $self->builder->in_text($query); |
| 110 | }; |
| 111 | |
| 112 | # Return the planned query |
| 113 | # TODO: Check for serialization errors |
| 114 | return $query; |
| 115 | }; |
| 116 | |
| 117 | |
| Akron | ce10cb4 | 2017-06-14 01:12:40 +0200 | [diff] [blame] | 118 | sub remove_unused_classes { |
| 119 | my ($self, $classes) = @_; |
| 120 | my $used = $self->uses_classes; |
| 121 | # Pass classes required for highlighting or grouping, |
| 122 | # and take classes from uses_classes() into account. |
| 123 | # This is not done recursively, as it first needs to |
| 124 | # gather all classes and then can remove them. |
| 125 | }; |
| 126 | |
| 127 | |
| 128 | |
| Akron | 4763ea6 | 2016-11-02 19:36:18 +0100 | [diff] [blame] | 129 | # Prepare a query for an index |
| Akron | 6a74973 | 2017-02-14 14:43:06 +0100 | [diff] [blame] | 130 | # TODO: Rename to compile() |
| Akron | 4763ea6 | 2016-11-02 19:36:18 +0100 | [diff] [blame] | 131 | sub prepare_for { |
| 132 | my ($self, $index) = @_; |
| Akron | c3657bf | 2016-10-31 00:15:43 +0100 | [diff] [blame] | 133 | |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 134 | warn 'DEPRECATED'; |
| 135 | |
| Akron | 4763ea6 | 2016-11-02 19:36:18 +0100 | [diff] [blame] | 136 | my $query = $self; |
| 137 | |
| 138 | # There is a possible 'any' extension, |
| 139 | # that may exceed the text |
| 140 | if ($self->is_extended_right) { |
| 141 | my $builder = $self->builder; |
| 142 | |
| 143 | # Wrap query in a text element |
| 144 | $query = $builder->position( |
| 145 | ['endsWith', 'isAround', 'startsWith', 'matches'], |
| 146 | $builder->span('base/s=t'), |
| 147 | $self |
| 148 | ); |
| 149 | }; |
| 150 | |
| 151 | # Return the planned query |
| 152 | # TODO: Check for serialization errors |
| 153 | $query->plan_for($index); |
| 154 | }; |
| 155 | |
| 156 | # Plan a query for an index (to be overwritten) |
| Akron | 349747d | 2016-12-05 11:05:53 +0100 | [diff] [blame] | 157 | # TODO: Rename to_primitive(index) |
| Akron | 4763ea6 | 2016-11-02 19:36:18 +0100 | [diff] [blame] | 158 | sub plan_for; |
| 159 | |
| Akron | 617871f | 2017-05-27 02:05:31 +0200 | [diff] [blame] | 160 | |
| 161 | # This will remove classes |
| 162 | # in subqueries |
| 163 | sub plan_without_classes_for { |
| 164 | shift->plan_for(@_); |
| 165 | }; |
| 166 | |
| 167 | |
| Akron | 22b6858 | 2017-01-19 12:05:21 +0100 | [diff] [blame] | 168 | # Filter a query based on a document query |
| Akron | 9b6ea8d | 2017-04-07 14:01:09 +0200 | [diff] [blame] | 169 | sub filter_by { |
| 170 | ... |
| 171 | }; |
| Akron | 22b6858 | 2017-01-19 12:05:21 +0100 | [diff] [blame] | 172 | |
| Akron | ce10cb4 | 2017-06-14 01:12:40 +0200 | [diff] [blame] | 173 | # sub is_any { $_[0]->{any} // 0 }; |
| 174 | # Matches everything |
| 175 | sub is_any { |
| 176 | my $self = shift; |
| 177 | if (defined $_[0]) { |
| 178 | $self->{any} = shift; |
| 179 | }; |
| 180 | return $self->{any} // 0; |
| 181 | }; |
| 182 | |
| Akron | c552937 | 2017-06-21 15:56:18 +0200 | [diff] [blame] | 183 | sub is_optional { |
| 184 | my $self = shift; |
| 185 | if (defined $_[0]) { |
| 186 | $self->{optional} = shift; |
| 187 | }; |
| 188 | return $self->{optional} // 0; |
| 189 | }; |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 190 | |
| 191 | # Null is empty - e.g. in |
| Akron | c048b18 | 2017-06-13 01:29:03 +0200 | [diff] [blame] | 192 | # Der >alte{0}< Mann |
| Akron | dc9f116 | 2016-11-05 15:31:40 +0100 | [diff] [blame] | 193 | sub is_null { $_[0]->{null} // 0 }; |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 194 | |
| 195 | # Nothing matches nowhere - e.g. in |
| 196 | # Der [alte & !alte] Mann |
| Akron | ce10cb4 | 2017-06-14 01:12:40 +0200 | [diff] [blame] | 197 | # sub is_nothing { $_[0]->{nothing} // 0 }; |
| 198 | sub is_nothing { |
| 199 | my $self = shift; |
| 200 | if (defined $_[0]) { |
| 201 | $self->{nothing} = shift; |
| 202 | }; |
| 203 | return $self->{nothing} // 0; |
| 204 | }; |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 205 | |
| 206 | sub is_leaf { 0 }; |
| Akron | dc9f116 | 2016-11-05 15:31:40 +0100 | [diff] [blame] | 207 | sub is_extended_right { $_[0]->{extended_right} // 0 }; |
| 208 | sub is_extended_left { $_[0]->{extended_left} // 0 }; |
| Akron | 84b8b75 | 2016-11-19 15:55:12 +0100 | [diff] [blame] | 209 | sub is_extended { $_[0]->is_extended_right || $_[0]->is_extended_left // 0 }; |
| Akron | 4763ea6 | 2016-11-02 19:36:18 +0100 | [diff] [blame] | 210 | sub freq { -1 }; |
| Akron | 774c5db | 2016-11-09 16:11:38 +0100 | [diff] [blame] | 211 | sub type { '' }; |
| Akron | a211bf5 | 2016-10-29 18:03:29 +0200 | [diff] [blame] | 212 | |
| Akron | ce10cb4 | 2017-06-14 01:12:40 +0200 | [diff] [blame] | 213 | # Returns a list of classes used by the query, |
| 214 | # e.g. in a focus() context. |
| 215 | sub uses_classes; |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 216 | |
| 217 | sub is_negative { |
| 218 | my $self = shift; |
| 219 | if (scalar @_ == 1) { |
| 220 | $self->{negative} = shift; |
| 221 | }; |
| 222 | return $self->{negative} // 0; |
| 223 | }; |
| 224 | |
| 225 | |
| 226 | sub toggle_negative { |
| 227 | my $self = shift; |
| 228 | $self->is_negative($self->is_negative ? 0 : 1); |
| 229 | return $self; |
| 230 | }; |
| 231 | |
| 232 | |
| Akron | 774c5db | 2016-11-09 16:11:38 +0100 | [diff] [blame] | 233 | # TODO: Probably better to be renamed "potential_anchor" |
| Akron | a211bf5 | 2016-10-29 18:03:29 +0200 | [diff] [blame] | 234 | sub maybe_anchor { |
| 235 | my $self = shift; |
| 236 | return if $self->is_negative; |
| 237 | return if $self->is_optional; |
| 238 | return if $self->is_any; |
| 239 | return 1; |
| 240 | }; |
| 241 | |
| 242 | # Check if the wrapped query may need to be sorted |
| 243 | # on focussing on a specific class. |
| 244 | # Normally spans are always sorted, but in case of |
| 245 | # a wrapped relation query, classed operands may |
| 246 | # be in arbitrary order. When focussing on these |
| 247 | # classes, the span has to me reordered. |
| Akron | 1b09c5b | 2016-11-20 15:59:34 +0100 | [diff] [blame] | 248 | sub maybe_unsorted { $_[0]->{maybe_unsorted} // 0 }; |
| Akron | a211bf5 | 2016-10-29 18:03:29 +0200 | [diff] [blame] | 249 | |
| Akron | 965f5d9 | 2017-01-20 18:38:08 +0100 | [diff] [blame] | 250 | |
| 251 | # Iterate over all subqueries and possibly replace them |
| 252 | sub subqueries; |
| 253 | |
| Akron | a211bf5 | 2016-10-29 18:03:29 +0200 | [diff] [blame] | 254 | ############################# |
| 255 | # Query Application methods # |
| 256 | ############################# |
| Akron | 33f1dcb | 2016-10-29 17:27:23 +0200 | [diff] [blame] | 257 | |
| Akron | a211bf5 | 2016-10-29 18:03:29 +0200 | [diff] [blame] | 258 | # Deserialization of KoralQuery |
| Akron | 944091b | 2016-11-24 16:40:58 +0100 | [diff] [blame] | 259 | # TODO: export this method from Importer |
| Akron | 33f1dcb | 2016-10-29 17:27:23 +0200 | [diff] [blame] | 260 | sub from_koral { |
| Akron | 944091b | 2016-11-24 16:40:58 +0100 | [diff] [blame] | 261 | my ($class, $kq) = @_; |
| 262 | my $importer = Krawfish::Koral::Query::Importer->new; |
| 263 | |
| 264 | my $type = $kq->{'@type'}; |
| 265 | if ($type eq 'koral:group') { |
| 266 | my $op = $kq->{operation}; |
| 267 | if ($op eq 'operation:sequence') { |
| 268 | return $importer->seq($kq); |
| 269 | } |
| 270 | |
| 271 | elsif ($op eq 'operation:class') { |
| 272 | return $importer->class($kq); |
| 273 | } |
| 274 | else { |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 275 | warn 'Operation ' . $op . ' no supported'; |
| Akron | 944091b | 2016-11-24 16:40:58 +0100 | [diff] [blame] | 276 | }; |
| 277 | } |
| 278 | |
| 279 | elsif ($type eq 'koral:token') { |
| 280 | return $importer->token($kq); |
| 281 | } |
| 282 | else { |
| 283 | warn $type . ' unknown'; |
| 284 | }; |
| 285 | |
| 286 | return; |
| Akron | 33f1dcb | 2016-10-29 17:27:23 +0200 | [diff] [blame] | 287 | }; |
| 288 | |
| Akron | a211bf5 | 2016-10-29 18:03:29 +0200 | [diff] [blame] | 289 | # Overwritten |
| 290 | sub to_koral_fragment; |
| 291 | |
| Akron | c3657bf | 2016-10-31 00:15:43 +0100 | [diff] [blame] | 292 | # Overwritten |
| Akron | a211bf5 | 2016-10-29 18:03:29 +0200 | [diff] [blame] | 293 | sub to_string; |
| Akron | 33f1dcb | 2016-10-29 17:27:23 +0200 | [diff] [blame] | 294 | |
| Akron | 8231ca7 | 2017-06-16 16:08:32 +0200 | [diff] [blame] | 295 | sub to_neutral { |
| 296 | $_[0]->to_string; |
| 297 | }; |
| 298 | |
| 299 | |
| Akron | 965f5d9 | 2017-01-20 18:38:08 +0100 | [diff] [blame] | 300 | # TODO: This may be optimizable and |
| 301 | # implemented in all query and corpus wrappers |
| 302 | sub to_signature { |
| 303 | md5_sum $_[0]->to_string; |
| 304 | }; |
| 305 | |
| 306 | # TODO: Returns a value of complexity of the query, |
| 307 | # that can be used to decide, if a query should be cached. |
| 308 | sub complexity; |
| 309 | |
| Akron | 573e7ec | 2016-11-05 19:03:01 +0100 | [diff] [blame] | 310 | # Clone the query |
| 311 | # sub clone; |
| Akron | c3657bf | 2016-10-31 00:15:43 +0100 | [diff] [blame] | 312 | |
| Akron | 4763ea6 | 2016-11-02 19:36:18 +0100 | [diff] [blame] | 313 | # Create KoralQuery builder |
| 314 | sub builder { |
| 315 | return Krawfish::Koral::Query::Builder->new; |
| 316 | }; |
| 317 | |
| Akron | 944091b | 2016-11-24 16:40:58 +0100 | [diff] [blame] | 318 | # Create KoralQuery builder |
| 319 | sub importer { |
| 320 | return Krawfish::Koral::Query::Importer->new; |
| 321 | }; |
| 322 | |
| Akron | 169ede4 | 2017-02-05 12:52:22 +0100 | [diff] [blame] | 323 | |
| 324 | # Serialization helper |
| 325 | sub boundary { |
| 326 | my $self = shift; |
| 327 | my %hash = ( |
| 328 | '@type' => 'koral:boundary' |
| 329 | ); |
| 330 | $hash{min} = $self->{min} if defined $self->{min}; |
| 331 | $hash{max} = $self->{max} if defined $self->{max}; |
| 332 | return \%hash; |
| 333 | } |
| 334 | |
| 335 | |
| Akron | 0a0e924 | 2016-10-28 14:42:29 +0200 | [diff] [blame] | 336 | 1; |
| Akron | 33f1dcb | 2016-10-29 17:27:23 +0200 | [diff] [blame] | 337 | |
| Akron | a211bf5 | 2016-10-29 18:03:29 +0200 | [diff] [blame] | 338 | |
| Akron | 33f1dcb | 2016-10-29 17:27:23 +0200 | [diff] [blame] | 339 | __END__ |
| 340 | |