blob: 9ea48d297a6c0be6bb0edffd85add8c18202cde5 [file] [log] [blame]
Akron0a0e9242016-10-28 14:42:29 +02001package Krawfish::Koral::Query;
Akron6621e112016-11-05 17:21:39 +01002use parent 'Krawfish::Info';
Akron2c6c7162017-05-15 18:15:33 +02003# TODO: Use the same parent as Koral::Corpus
Akron4763ea62016-11-02 19:36:18 +01004use Krawfish::Koral::Query::Builder;
Akron944091b2016-11-24 16:40:58 +01005use Krawfish::Koral::Query::Importer;
Akron965f5d92017-01-20 18:38:08 +01006use Mojo::Util qw/md5_sum/;
Akron0a0e9242016-10-28 14:42:29 +02007use warnings;
Akron944091b2016-11-24 16:40:58 +01008use strict;
Akron0a0e9242016-10-28 14:42:29 +02009
Akron6b195632017-06-09 23:47:49 +020010# TODO:
11# - rename 'nothing' to 'nowhere'
12# - rename 'any' to 'anywhere'
Akron8231ca72017-06-16 16:08:32 +020013# - extended_* may be queried
14# automatically without parameter
Akron55fb3082017-07-18 13:24:53 +020015# - rename all sorts of single ops to operand
16# - rename all sorts of multiple ops to operands
Akron6b195632017-06-09 23:47:49 +020017
Akron818e8522017-07-22 12:34:01 +020018# TODO:
19# This is now double with Krawfish::Koral!
20use constant {
21 CONTEXT => 'http://korap.ids-mannheim.de/ns/koral/0.6/context.jsonld'
22};
23
Akron33f1dcb2016-10-29 17:27:23 +020024sub new {
25 my $class = shift;
Akron944091b2016-11-24 16:40:58 +010026 my $self = bless {
Akron4763ea62016-11-02 19:36:18 +010027 any => 0,
28 optional => 0,
29 null => 0,
30 negative => 0,
31 extended => 0,
Akronddf077a2016-11-05 15:00:00 +010032 extended_left => 0,
Akron6621e112016-11-05 17:21:39 +010033 extended_right => 0
Akron4763ea62016-11-02 19:36:18 +010034 }, $class;
Akron944091b2016-11-24 16:40:58 +010035
36 if ($_[0]) {
37 return $self->from_koral(shift);
38 };
39
40 $self;
Akron33f1dcb2016-10-29 17:27:23 +020041};
42
Akron55fb3082017-07-18 13:24:53 +020043
Akron5b6264f2017-07-19 01:14:01 +020044sub type;
45
46
Akron4763ea62016-11-02 19:36:18 +010047#########################################
48# Query Planning methods and attributes #
49#########################################
Akrona211bf52016-10-29 18:03:29 +020050
Akron6b195632017-06-09 23:47:49 +020051sub plan_for_new {
52 my ($self, $index) = @_;
53 $self
54 ->normalize
55 ->finalize
56 ->refer
57 ->inflate($index->dict)
58 ->cache
59 ->optimize($index);
60};
61
Akron55fb3082017-07-18 13:24:53 +020062
Akron6b195632017-06-09 23:47:49 +020063# Normalize the query
64sub normalize;
65
66
67# Refer to common subqueries
68sub refer {
69 $_[0];
70};
71
72
Akron5b6264f2017-07-19 01:14:01 +020073# Expand regular expressions
Akron4f9eef42017-07-24 11:41:09 +020074sub inflate {
75 my ($self, $dict) = @_;
76 my $ops = $self->operands;
77 return $self unless $ops;
78 for (my $i = 0; $i < @$ops; $i++) {
79 $ops->[$i] = $ops->[$i]->inflate($dict);
80 };
81 return $self;
82};
Akron6b195632017-06-09 23:47:49 +020083
84
85# Check for cached subqueries
86sub cache {
87 $_[0];
88};
89
90
91# Optimize for an index
92sub optimize;
93
94
Akronc5529372017-06-21 15:56:18 +020095# This is the class to be overwritten
96# by subclasses
97sub _finalize {
98 $_[0];
99};
100
Akron55fb3082017-07-18 13:24:53 +0200101
Akron5b6264f2017-07-19 01:14:01 +0200102# Treat the operand like a root operand
Akron6b195632017-06-09 23:47:49 +0200103sub finalize {
104 my $self = shift;
105
106 my $query = $self;
107
Akron5b6264f2017-07-19 01:14:01 +0200108 # The query matches everywhere
Akronc5529372017-06-21 15:56:18 +0200109 if ($query->is_any || $query->is_null) {
110 $self->error(780, "This query matches everywhere");
111 return;
112 };
113
Akron5b6264f2017-07-19 01:14:01 +0200114 # The query matches nowhere
Akronc5529372017-06-21 15:56:18 +0200115 if ($query->is_nothing) {
116 return $query->builder->nothing;
117 };
118
119 if ($query->is_negative) {
Akronc4bf5fb2017-07-18 02:20:40 +0200120 $query->warning(782, 'Exclusivity of query is ignored');
Akron5ddc38f2017-07-18 00:16:22 +0200121 # TODO:
122 # Better not search at all, because in case the query was classed,
123 # this class information would be lost in the normalization process, so
124 # {1:[!der]} would become [der], which is somehow weird.
Akronc5529372017-06-21 15:56:18 +0200125 $query->is_negative(0);
126 };
127
Akron5b6264f2017-07-19 01:14:01 +0200128 # Ignore optionality
Akronc5529372017-06-21 15:56:18 +0200129 if ($query->is_optional) {
Akronc4bf5fb2017-07-18 02:20:40 +0200130 $query->warning(781, "Optionality of query is ignored");
Akronc5529372017-06-21 15:56:18 +0200131 $query->is_optional(0);
132 };
133
Akron5b6264f2017-07-19 01:14:01 +0200134 # Use a finalize method
Akronc5529372017-06-21 15:56:18 +0200135 $query = $query->_finalize;
136
Akron5ddc38f2017-07-18 00:16:22 +0200137 # TODO:
138 # This needs to be in the finalize stage
139 # on the segment level!
140
Akron6b195632017-06-09 23:47:49 +0200141 # There is a possible 'any' extension,
Akron5b6264f2017-07-19 01:14:01 +0200142 # that may exceed the text boundary
Akron6b195632017-06-09 23:47:49 +0200143 if ($query->is_extended_right) {
144 return $self->builder->in_text($query);
145 };
146
147 # Return the planned query
Akron5b6264f2017-07-19 01:14:01 +0200148 # TODO:
149 # Check for serialization errors
Akron6b195632017-06-09 23:47:49 +0200150 return $query;
151};
152
Akron5b6264f2017-07-19 01:14:01 +0200153
Akron55fb3082017-07-18 13:24:53 +0200154# Returns a list of classes used by the query,
155# e.g. in a focus() context.
156sub uses_classes;
157
Akron6b195632017-06-09 23:47:49 +0200158
Akronce10cb42017-06-14 01:12:40 +0200159sub remove_unused_classes {
160 my ($self, $classes) = @_;
161 my $used = $self->uses_classes;
162 # Pass classes required for highlighting or grouping,
163 # and take classes from uses_classes() into account.
164 # This is not done recursively, as it first needs to
165 # gather all classes and then can remove them.
166};
167
168
Akron5ddc38f2017-07-18 00:16:22 +0200169# Remove classes passed as an array references
170sub remove_classes {
171 my ($self, $keep) = @_;
172 unless ($keep) {
173 $keep = [];
174 };
Akron5b6264f2017-07-19 01:14:01 +0200175 my $ops = $self->operands;
Akron4f9eef42017-07-24 11:41:09 +0200176
177 return $self unless $ops;
178
Akron5b6264f2017-07-19 01:14:01 +0200179 for (my $i = 0; $i < @$ops; $i++) {
180 $ops->[$i] = $ops->[$i]->remove_classes($keep);
181 };
Akron5ddc38f2017-07-18 00:16:22 +0200182 return $self;
183};
184
Akronce10cb42017-06-14 01:12:40 +0200185
Akron5b6264f2017-07-19 01:14:01 +0200186# Get and set operands
187sub operands {
188 my $self = shift;
189 if (@_) {
190 $self->{operands} = shift;
191 };
192 $self->{operands};
193};
194
195
196# Get and set first and only operand
197sub operand {
198 if (@_ == 2) {
199 $_[0]->{operands} = [$_[1]];
200 };
201 $_[0]->{operands}->[0];
202};
203
204
205#sub replace_references {
206# my ($self, $refs) = @_;
207# my $sig = $self->signature;
208#
209# # Subquery is identical to given query
210# if ($refs->{$sig}) {
211# ...
212# }
213# else {
214# $refs->{$sig} = $self->operand;
215# };
216#};
217
218
Akronce10cb42017-06-14 01:12:40 +0200219# Matches everything
220sub is_any {
221 my $self = shift;
222 if (defined $_[0]) {
223 $self->{any} = shift;
224 };
225 return $self->{any} // 0;
226};
227
Akron5b6264f2017-07-19 01:14:01 +0200228
229
230# Is optional
231sub is_optional {
Akronc5529372017-06-21 15:56:18 +0200232 my $self = shift;
233 if (defined $_[0]) {
234 $self->{optional} = shift;
235 };
236 return $self->{optional} // 0;
237};
Akron2c6c7162017-05-15 18:15:33 +0200238
Akron5b6264f2017-07-19 01:14:01 +0200239
Akron2c6c7162017-05-15 18:15:33 +0200240# Null is empty - e.g. in
Akronc048b182017-06-13 01:29:03 +0200241# Der >alte{0}< Mann
Akron5b6264f2017-07-19 01:14:01 +0200242sub is_null {
243 $_[0]->{null} // 0
244};
245
Akron2c6c7162017-05-15 18:15:33 +0200246
247# Nothing matches nowhere - e.g. in
248# Der [alte & !alte] Mann
Akronce10cb42017-06-14 01:12:40 +0200249sub is_nothing {
250 my $self = shift;
251 if (defined $_[0]) {
252 $self->{nothing} = shift;
253 };
254 return $self->{nothing} // 0;
255};
Akron2c6c7162017-05-15 18:15:33 +0200256
Akron24ab2892017-07-18 14:05:33 +0200257
Akron5b6264f2017-07-19 01:14:01 +0200258# Check if the query is a leaf node in the tree
259sub is_leaf {
260 0;
261};
Akrona211bf52016-10-29 18:03:29 +0200262
Akron5b6264f2017-07-19 01:14:01 +0200263
264# Check if the result of the query is extended to the right
265sub is_extended_right {
266 $_[0]->{extended_right} // 0
267};
268
269
270# Check if the result of the query is extended to the left
271sub is_extended_left {
272 $_[0]->{extended_left} // 0
273};
274
275
276# Check if the result of the query is extended
277sub is_extended {
278 $_[0]->is_extended_right || $_[0]->is_extended_left // 0
279};
280
281
282# Is negative
Akron2c6c7162017-05-15 18:15:33 +0200283sub is_negative {
284 my $self = shift;
285 if (scalar @_ == 1) {
286 $self->{negative} = shift;
Akron5ddc38f2017-07-18 00:16:22 +0200287 return $self;
Akron2c6c7162017-05-15 18:15:33 +0200288 };
289 return $self->{negative} // 0;
290};
291
292
Akron5b6264f2017-07-19 01:14:01 +0200293# Toggle negativity
Akron2c6c7162017-05-15 18:15:33 +0200294sub toggle_negative {
295 my $self = shift;
296 $self->is_negative($self->is_negative ? 0 : 1);
297 return $self;
298};
299
300
Akron774c5db2016-11-09 16:11:38 +0100301# TODO: Probably better to be renamed "potential_anchor"
Akrona211bf52016-10-29 18:03:29 +0200302sub maybe_anchor {
303 my $self = shift;
304 return if $self->is_negative;
305 return if $self->is_optional;
306 return if $self->is_any;
307 return 1;
308};
309
Akron5b6264f2017-07-19 01:14:01 +0200310
Akrona211bf52016-10-29 18:03:29 +0200311# Check if the wrapped query may need to be sorted
312# on focussing on a specific class.
313# Normally spans are always sorted, but in case of
314# a wrapped relation query, classed operands may
315# be in arbitrary order. When focussing on these
316# classes, the span has to me reordered.
Akron5b6264f2017-07-19 01:14:01 +0200317# TODO:
318# Rename to classes_maybe_unsorted
319sub maybe_unsorted {
320 $_[0]->{maybe_unsorted} // 0
321};
Akrona211bf52016-10-29 18:03:29 +0200322
Akron965f5d92017-01-20 18:38:08 +0100323
Akron704ec062017-07-24 15:46:21 +0200324# Get the minimum tokens the query spans
325sub min_span {
326 ...
327};
328
329
330# Get the maximum tokens the query spans
331# -1 means arbitrary
332sub max_span {
333 ...
334};
335
Akron965f5d92017-01-20 18:38:08 +0100336
Akrona211bf52016-10-29 18:03:29 +0200337#############################
338# Query Application methods #
339#############################
Akron33f1dcb2016-10-29 17:27:23 +0200340
Akrona211bf52016-10-29 18:03:29 +0200341# Deserialization of KoralQuery
Akron944091b2016-11-24 16:40:58 +0100342# TODO: export this method from Importer
Akron33f1dcb2016-10-29 17:27:23 +0200343sub from_koral {
Akron944091b2016-11-24 16:40:58 +0100344 my ($class, $kq) = @_;
345 my $importer = Krawfish::Koral::Query::Importer->new;
346
347 my $type = $kq->{'@type'};
348 if ($type eq 'koral:group') {
349 my $op = $kq->{operation};
350 if ($op eq 'operation:sequence') {
351 return $importer->seq($kq);
352 }
353
354 elsif ($op eq 'operation:class') {
355 return $importer->class($kq);
356 }
357 else {
Akron2c6c7162017-05-15 18:15:33 +0200358 warn 'Operation ' . $op . ' no supported';
Akron944091b2016-11-24 16:40:58 +0100359 };
360 }
361
362 elsif ($type eq 'koral:token') {
363 return $importer->token($kq);
364 }
365 else {
366 warn $type . ' unknown';
367 };
368
369 return;
Akron33f1dcb2016-10-29 17:27:23 +0200370};
371
Akron5b6264f2017-07-19 01:14:01 +0200372
Akrona211bf52016-10-29 18:03:29 +0200373# Overwritten
374sub to_koral_fragment;
375
Akron818e8522017-07-22 12:34:01 +0200376sub to_koral_query {
377 my $self = shift;
378 my $koral = $self->to_koral_fragment;
379 $koral->{'@context'} = CONTEXT;
380 $koral;
381};
Akron5b6264f2017-07-19 01:14:01 +0200382
Akronc3657bf2016-10-31 00:15:43 +0100383# Overwritten
Akrona211bf52016-10-29 18:03:29 +0200384sub to_string;
Akron33f1dcb2016-10-29 17:27:23 +0200385
Akron5b6264f2017-07-19 01:14:01 +0200386
Akron8231ca72017-06-16 16:08:32 +0200387sub to_neutral {
388 $_[0]->to_string;
389};
390
391
Akron965f5d92017-01-20 18:38:08 +0100392# TODO: This may be optimizable and
393# implemented in all query and corpus wrappers
394sub to_signature {
395 md5_sum $_[0]->to_string;
396};
397
Akron5b6264f2017-07-19 01:14:01 +0200398
Akron965f5d92017-01-20 18:38:08 +0100399# TODO: Returns a value of complexity of the query,
400# that can be used to decide, if a query should be cached.
401sub complexity;
402
Akronc3657bf2016-10-31 00:15:43 +0100403
Akron4763ea62016-11-02 19:36:18 +0100404# Create KoralQuery builder
405sub builder {
406 return Krawfish::Koral::Query::Builder->new;
407};
408
Akron5b6264f2017-07-19 01:14:01 +0200409
Akron944091b2016-11-24 16:40:58 +0100410# Create KoralQuery builder
411sub importer {
412 return Krawfish::Koral::Query::Importer->new;
413};
414
Akron169ede42017-02-05 12:52:22 +0100415
416# Serialization helper
417sub boundary {
418 my $self = shift;
419 my %hash = (
420 '@type' => 'koral:boundary'
421 );
422 $hash{min} = $self->{min} if defined $self->{min};
423 $hash{max} = $self->{max} if defined $self->{max};
424 return \%hash;
Akron5b6264f2017-07-19 01:14:01 +0200425};
Akron169ede42017-02-05 12:52:22 +0100426
427
Akron0a0e9242016-10-28 14:42:29 +02004281;
Akron33f1dcb2016-10-29 17:27:23 +0200429
Akrona211bf52016-10-29 18:03:29 +0200430
Akron33f1dcb2016-10-29 17:27:23 +0200431__END__
432