blob: 37bd848c723f44dcdf2aea259d14715702ce352b [file] [log] [blame]
Akron0a0e9242016-10-28 14:42:29 +02001package Krawfish::Koral::Query;
Akron290f59f2017-08-17 21:55:07 +02002use Role::Tiny::With;
3with 'Krawfish::Koral::Info';
Akron2c6c7162017-05-15 18:15:33 +02004# TODO: Use the same parent as Koral::Corpus
Akron4763ea62016-11-02 19:36:18 +01005use Krawfish::Koral::Query::Builder;
Akron944091b2016-11-24 16:40:58 +01006use Krawfish::Koral::Query::Importer;
Akron6ce51082017-07-26 17:31:41 +02007use Krawfish::Log;
Akron965f5d92017-01-20 18:38:08 +01008use Mojo::Util qw/md5_sum/;
Akron0a0e9242016-10-28 14:42:29 +02009use warnings;
Akron944091b2016-11-24 16:40:58 +010010use strict;
Akron0a0e9242016-10-28 14:42:29 +020011
Akrona588d072017-10-13 14:45:34 +020012# Base class for span queries
13
Akron6b195632017-06-09 23:47:49 +020014# TODO:
Akron8231ca72017-06-16 16:08:32 +020015# - extended_* may be queried
16# automatically without parameter
Akron55fb3082017-07-18 13:24:53 +020017# - rename all sorts of single ops to operand
18# - rename all sorts of multiple ops to operands
Akron6b195632017-06-09 23:47:49 +020019
Akron818e8522017-07-22 12:34:01 +020020# TODO:
21# This is now double with Krawfish::Koral!
Akrona588d072017-10-13 14:45:34 +020022
Akron818e8522017-07-22 12:34:01 +020023use constant {
Akron6ce51082017-07-26 17:31:41 +020024 CONTEXT => 'http://korap.ids-mannheim.de/ns/koral/0.6/context.jsonld',
25 DEBUG => 0
Akron818e8522017-07-22 12:34:01 +020026};
27
Akrona588d072017-10-13 14:45:34 +020028
29# Constructor
Akron33f1dcb2016-10-29 17:27:23 +020030sub new {
31 my $class = shift;
Akron944091b2016-11-24 16:40:58 +010032 my $self = bless {
Akron655a10a2017-09-11 14:13:18 +020033 anywhere => 0,
Akron4763ea62016-11-02 19:36:18 +010034 optional => 0,
35 null => 0,
36 negative => 0,
37 extended => 0,
Akronddf077a2016-11-05 15:00:00 +010038 extended_left => 0,
Akron6621e112016-11-05 17:21:39 +010039 extended_right => 0
Akron4763ea62016-11-02 19:36:18 +010040 }, $class;
Akron944091b2016-11-24 16:40:58 +010041
42 if ($_[0]) {
43 return $self->from_koral(shift);
44 };
45
46 $self;
Akron33f1dcb2016-10-29 17:27:23 +020047};
48
Akron55fb3082017-07-18 13:24:53 +020049
Akrona588d072017-10-13 14:45:34 +020050# Override type
Akron290f59f2017-08-17 21:55:07 +020051sub type {
Akrona588d072017-10-13 14:45:34 +020052 warn 'override';
Akron290f59f2017-08-17 21:55:07 +020053};
Akron5b6264f2017-07-19 01:14:01 +020054
55
Akron4763ea62016-11-02 19:36:18 +010056#########################################
57# Query Planning methods and attributes #
58#########################################
Akrona211bf52016-10-29 18:03:29 +020059
Akron55fb3082017-07-18 13:24:53 +020060
Akron6b195632017-06-09 23:47:49 +020061# Normalize the query
Akron290f59f2017-08-17 21:55:07 +020062sub normalize {
Akrona588d072017-10-13 14:45:34 +020063 warn 'override';
Akron290f59f2017-08-17 21:55:07 +020064};
Akron6b195632017-06-09 23:47:49 +020065
66
67# Refer to common subqueries
68sub refer {
69 $_[0];
70};
71
72
Akrona588d072017-10-13 14:45:34 +020073# Translate to ids
Akron7dc2a642017-08-02 15:39:49 +020074# TODO:
Akron5a5595b2017-09-10 13:00:57 +020075# If "nowhere" returns, optimize away
Akron7dc2a642017-08-02 15:39:49 +020076# before ->optimize().
77sub identify {
78 my ($self, $dict) = @_;
79
80 my $ops = $self->operands;
81 return $self unless $ops;
82 for (my $i = 0; $i < @$ops; $i++) {
83 $ops->[$i] = $ops->[$i]->identify($dict);
84 };
85 return $self;
86};
87
88
Akron6b195632017-06-09 23:47:49 +020089# Check for cached subqueries
90sub cache {
91 $_[0];
92};
93
94
95# Optimize for an index
Akron290f59f2017-08-17 21:55:07 +020096sub optimize {
97 ...
98};
Akron6b195632017-06-09 23:47:49 +020099
100
Akronc5529372017-06-21 15:56:18 +0200101# This is the class to be overwritten
102# by subclasses
103sub _finalize {
104 $_[0];
105};
106
Akron55fb3082017-07-18 13:24:53 +0200107
Akron5b6264f2017-07-19 01:14:01 +0200108# Treat the operand like a root operand
Akron6b195632017-06-09 23:47:49 +0200109sub finalize {
110 my $self = shift;
111
Akron6ce51082017-07-26 17:31:41 +0200112 if (DEBUG) {
113 print_log('kq_query', 'Finalize query ' . $self->to_string);
114 };
115
Akron6b195632017-06-09 23:47:49 +0200116 my $query = $self;
117
Akron5b6264f2017-07-19 01:14:01 +0200118 # The query matches everywhere
Akron655a10a2017-09-11 14:13:18 +0200119 if ($query->is_anywhere || $query->is_null) {
Akronc5529372017-06-21 15:56:18 +0200120 $self->error(780, "This query matches everywhere");
121 return;
122 };
123
Akron6ce51082017-07-26 17:31:41 +0200124
Akron5b6264f2017-07-19 01:14:01 +0200125 # The query matches nowhere
Akron5a5595b2017-09-10 13:00:57 +0200126 if ($query->is_nowhere) {
127 return $query->builder->nowhere;
Akronc5529372017-06-21 15:56:18 +0200128 };
129
130 if ($query->is_negative) {
Akronc4bf5fb2017-07-18 02:20:40 +0200131 $query->warning(782, 'Exclusivity of query is ignored');
Akron5ddc38f2017-07-18 00:16:22 +0200132 # TODO:
133 # Better not search at all, because in case the query was classed,
134 # this class information would be lost in the normalization process, so
135 # {1:[!der]} would become [der], which is somehow weird.
Akronc5529372017-06-21 15:56:18 +0200136 $query->is_negative(0);
137 };
138
Akron5b6264f2017-07-19 01:14:01 +0200139 # Ignore optionality
Akronc5529372017-06-21 15:56:18 +0200140 if ($query->is_optional) {
Akronc4bf5fb2017-07-18 02:20:40 +0200141 $query->warning(781, "Optionality of query is ignored");
Akronc5529372017-06-21 15:56:18 +0200142 $query->is_optional(0);
143 };
144
Akron5b6264f2017-07-19 01:14:01 +0200145 # Use a finalize method
Akronc5529372017-06-21 15:56:18 +0200146 $query = $query->_finalize;
147
Akron5ddc38f2017-07-18 00:16:22 +0200148 # TODO:
149 # This needs to be in the finalize stage
150 # on the segment level!
151
Akron655a10a2017-09-11 14:13:18 +0200152 # There is a possible 'anywhere' extension,
Akron5b6264f2017-07-19 01:14:01 +0200153 # that may exceed the text boundary
Akron6b195632017-06-09 23:47:49 +0200154 if ($query->is_extended_right) {
155 return $self->builder->in_text($query);
156 };
157
158 # Return the planned query
Akron5b6264f2017-07-19 01:14:01 +0200159 # TODO:
160 # Check for serialization errors
Akron6b195632017-06-09 23:47:49 +0200161 return $query;
162};
163
Akron5b6264f2017-07-19 01:14:01 +0200164
Akron55fb3082017-07-18 13:24:53 +0200165# Returns a list of classes used by the query,
166# e.g. in a focus() context.
Akron290f59f2017-08-17 21:55:07 +0200167sub uses_classes {
Akrona588d072017-10-13 14:45:34 +0200168 warn 'override';
Akron290f59f2017-08-17 21:55:07 +0200169};
Akron55fb3082017-07-18 13:24:53 +0200170
Akron6b195632017-06-09 23:47:49 +0200171
Akronce10cb42017-06-14 01:12:40 +0200172sub remove_unused_classes {
173 my ($self, $classes) = @_;
174 my $used = $self->uses_classes;
175 # Pass classes required for highlighting or grouping,
176 # and take classes from uses_classes() into account.
177 # This is not done recursively, as it first needs to
178 # gather all classes and then can remove them.
179};
180
181
Akron5ddc38f2017-07-18 00:16:22 +0200182# Remove classes passed as an array references
183sub remove_classes {
184 my ($self, $keep) = @_;
185 unless ($keep) {
186 $keep = [];
187 };
Akron5b6264f2017-07-19 01:14:01 +0200188 my $ops = $self->operands;
Akron4f9eef42017-07-24 11:41:09 +0200189
190 return $self unless $ops;
191
Akron5b6264f2017-07-19 01:14:01 +0200192 for (my $i = 0; $i < @$ops; $i++) {
193 $ops->[$i] = $ops->[$i]->remove_classes($keep);
194 };
Akron5ddc38f2017-07-18 00:16:22 +0200195 return $self;
196};
197
Akronce10cb42017-06-14 01:12:40 +0200198
Akron5b6264f2017-07-19 01:14:01 +0200199# Get and set operands
200sub operands {
201 my $self = shift;
202 if (@_) {
Akrona84ef2d2017-08-07 14:45:46 +0200203 my $ops = shift;
204 my @new_ops = ();
205 foreach my $op (@$ops) {
206 $self->remove_info_from($op);
207 push @new_ops, $op;
208 };
209 $self->{operands} = \@new_ops;
Akron5b6264f2017-07-19 01:14:01 +0200210 };
211 $self->{operands};
212};
213
214
215# Get and set first and only operand
216sub operand {
217 if (@_ == 2) {
218 $_[0]->{operands} = [$_[1]];
219 };
220 $_[0]->{operands}->[0];
221};
222
223
Akronce10cb42017-06-14 01:12:40 +0200224# Matches everything
Akron655a10a2017-09-11 14:13:18 +0200225sub is_anywhere {
Akronce10cb42017-06-14 01:12:40 +0200226 my $self = shift;
227 if (defined $_[0]) {
Akron655a10a2017-09-11 14:13:18 +0200228 $self->{anywhere} = shift;
Akronce10cb42017-06-14 01:12:40 +0200229 };
Akron655a10a2017-09-11 14:13:18 +0200230 return $self->{anywhere} // 0;
Akronce10cb42017-06-14 01:12:40 +0200231};
232
Akron5b6264f2017-07-19 01:14:01 +0200233
Akron5b6264f2017-07-19 01:14:01 +0200234# Is optional
235sub is_optional {
Akronc5529372017-06-21 15:56:18 +0200236 my $self = shift;
237 if (defined $_[0]) {
238 $self->{optional} = shift;
239 };
240 return $self->{optional} // 0;
241};
Akron2c6c7162017-05-15 18:15:33 +0200242
Akron5b6264f2017-07-19 01:14:01 +0200243
Akron2c6c7162017-05-15 18:15:33 +0200244# Null is empty - e.g. in
Akronc048b182017-06-13 01:29:03 +0200245# Der >alte{0}< Mann
Akron5b6264f2017-07-19 01:14:01 +0200246sub is_null {
247 $_[0]->{null} // 0
248};
249
Akron2c6c7162017-05-15 18:15:33 +0200250
251# Nothing matches nowhere - e.g. in
252# Der [alte & !alte] Mann
Akron5a5595b2017-09-10 13:00:57 +0200253sub is_nowhere {
Akronce10cb42017-06-14 01:12:40 +0200254 my $self = shift;
255 if (defined $_[0]) {
Akron5a5595b2017-09-10 13:00:57 +0200256 $self->{nowhere} = shift;
Akronce10cb42017-06-14 01:12:40 +0200257 };
Akron5a5595b2017-09-10 13:00:57 +0200258 return $self->{nowhere} // 0;
Akronce10cb42017-06-14 01:12:40 +0200259};
Akron2c6c7162017-05-15 18:15:33 +0200260
Akron24ab2892017-07-18 14:05:33 +0200261
Akron5b6264f2017-07-19 01:14:01 +0200262# Check if the query is a leaf node in the tree
263sub is_leaf {
264 0;
265};
Akrona211bf52016-10-29 18:03:29 +0200266
Akron5b6264f2017-07-19 01:14:01 +0200267
268# Check if the result of the query is extended to the right
269sub is_extended_right {
270 $_[0]->{extended_right} // 0
271};
272
273
274# Check if the result of the query is extended to the left
275sub is_extended_left {
276 $_[0]->{extended_left} // 0
277};
278
279
280# Check if the result of the query is extended
281sub is_extended {
282 $_[0]->is_extended_right || $_[0]->is_extended_left // 0
283};
284
285
286# Is negative
Akron2c6c7162017-05-15 18:15:33 +0200287sub is_negative {
288 my $self = shift;
289 if (scalar @_ == 1) {
290 $self->{negative} = shift;
Akron5ddc38f2017-07-18 00:16:22 +0200291 return $self;
Akron2c6c7162017-05-15 18:15:33 +0200292 };
293 return $self->{negative} // 0;
294};
295
296
Akron5b6264f2017-07-19 01:14:01 +0200297# Toggle negativity
Akron2c6c7162017-05-15 18:15:33 +0200298sub toggle_negative {
299 my $self = shift;
300 $self->is_negative($self->is_negative ? 0 : 1);
301 return $self;
302};
303
304
Akron774c5db2016-11-09 16:11:38 +0100305# TODO: Probably better to be renamed "potential_anchor"
Akrona211bf52016-10-29 18:03:29 +0200306sub maybe_anchor {
307 my $self = shift;
308 return if $self->is_negative;
309 return if $self->is_optional;
Akron655a10a2017-09-11 14:13:18 +0200310 return if $self->is_anywhere;
Akrona211bf52016-10-29 18:03:29 +0200311 return 1;
312};
313
Akron5b6264f2017-07-19 01:14:01 +0200314
Akrona211bf52016-10-29 18:03:29 +0200315# Check if the wrapped query may need to be sorted
316# on focussing on a specific class.
317# Normally spans are always sorted, but in case of
318# a wrapped relation query, classed operands may
319# be in arbitrary order. When focussing on these
320# classes, the span has to me reordered.
Akron5b6264f2017-07-19 01:14:01 +0200321# TODO:
322# Rename to classes_maybe_unsorted
323sub maybe_unsorted {
324 $_[0]->{maybe_unsorted} // 0
325};
Akrona211bf52016-10-29 18:03:29 +0200326
Akron965f5d92017-01-20 18:38:08 +0100327
Akron704ec062017-07-24 15:46:21 +0200328# Get the minimum tokens the query spans
329sub min_span {
Akrona588d072017-10-13 14:45:34 +0200330 warn 'override';
Akron704ec062017-07-24 15:46:21 +0200331};
332
333
334# Get the maximum tokens the query spans
335# -1 means arbitrary
336sub max_span {
Akrona588d072017-10-13 14:45:34 +0200337 warn 'override';
Akron704ec062017-07-24 15:46:21 +0200338};
339
Akron965f5d92017-01-20 18:38:08 +0100340
Akrona211bf52016-10-29 18:03:29 +0200341#############################
342# Query Application methods #
343#############################
Akron33f1dcb2016-10-29 17:27:23 +0200344
Akrona211bf52016-10-29 18:03:29 +0200345# Deserialization of KoralQuery
Akron944091b2016-11-24 16:40:58 +0100346# TODO: export this method from Importer
Akron33f1dcb2016-10-29 17:27:23 +0200347sub from_koral {
Akron944091b2016-11-24 16:40:58 +0100348 my ($class, $kq) = @_;
349 my $importer = Krawfish::Koral::Query::Importer->new;
350
351 my $type = $kq->{'@type'};
352 if ($type eq 'koral:group') {
353 my $op = $kq->{operation};
354 if ($op eq 'operation:sequence') {
355 return $importer->seq($kq);
356 }
357
358 elsif ($op eq 'operation:class') {
359 return $importer->class($kq);
360 }
361 else {
Akron2c6c7162017-05-15 18:15:33 +0200362 warn 'Operation ' . $op . ' no supported';
Akron944091b2016-11-24 16:40:58 +0100363 };
364 }
365
366 elsif ($type eq 'koral:token') {
367 return $importer->token($kq);
368 }
369 else {
370 warn $type . ' unknown';
371 };
372
373 return;
Akron33f1dcb2016-10-29 17:27:23 +0200374};
375
Akron5b6264f2017-07-19 01:14:01 +0200376
Akrona588d072017-10-13 14:45:34 +0200377# Serialize
Akron290f59f2017-08-17 21:55:07 +0200378sub to_koral_fragment {
Akrona588d072017-10-13 14:45:34 +0200379 warn 'override';
Akron290f59f2017-08-17 21:55:07 +0200380};
381
Akrona211bf52016-10-29 18:03:29 +0200382
Akrona588d072017-10-13 14:45:34 +0200383# Serialize
Akron818e8522017-07-22 12:34:01 +0200384sub to_koral_query {
385 my $self = shift;
386 my $koral = $self->to_koral_fragment;
387 $koral->{'@context'} = CONTEXT;
388 $koral;
389};
Akron5b6264f2017-07-19 01:14:01 +0200390
Akrona588d072017-10-13 14:45:34 +0200391
392# Stringification
Akron290f59f2017-08-17 21:55:07 +0200393sub to_string {
Akrona588d072017-10-13 14:45:34 +0200394 warn 'override';
Akron290f59f2017-08-17 21:55:07 +0200395};
Akron33f1dcb2016-10-29 17:27:23 +0200396
Akron5b6264f2017-07-19 01:14:01 +0200397
Akron8231ca72017-06-16 16:08:32 +0200398sub to_neutral {
399 $_[0]->to_string;
400};
401
402
Akron965f5d92017-01-20 18:38:08 +0100403# TODO: This may be optimizable and
404# implemented in all query and corpus wrappers
405sub to_signature {
406 md5_sum $_[0]->to_string;
407};
408
Akron5b6264f2017-07-19 01:14:01 +0200409
Akron965f5d92017-01-20 18:38:08 +0100410# TODO: Returns a value of complexity of the query,
411# that can be used to decide, if a query should be cached.
Akron290f59f2017-08-17 21:55:07 +0200412sub complexity {
Akrona588d072017-10-13 14:45:34 +0200413 warn 'override';
Akron290f59f2017-08-17 21:55:07 +0200414};
Akron965f5d92017-01-20 18:38:08 +0100415
Akronc3657bf2016-10-31 00:15:43 +0100416
Akron4763ea62016-11-02 19:36:18 +0100417# Create KoralQuery builder
418sub builder {
419 return Krawfish::Koral::Query::Builder->new;
420};
421
Akron5b6264f2017-07-19 01:14:01 +0200422
Akron944091b2016-11-24 16:40:58 +0100423# Create KoralQuery builder
424sub importer {
425 return Krawfish::Koral::Query::Importer->new;
426};
427
Akron169ede42017-02-05 12:52:22 +0100428
429# Serialization helper
430sub boundary {
431 my $self = shift;
432 my %hash = (
433 '@type' => 'koral:boundary'
434 );
435 $hash{min} = $self->{min} if defined $self->{min};
436 $hash{max} = $self->{max} if defined $self->{max};
437 return \%hash;
Akron5b6264f2017-07-19 01:14:01 +0200438};
Akron169ede42017-02-05 12:52:22 +0100439
440
Akrona588d072017-10-13 14:45:34 +0200441# sub replace_references {
442# my ($self, $refs) = @_;
443# my $sig = $self->signature;
444#
445# # Subquery is identical to given query
446# if ($refs->{$sig}) {
447# ...
448# }
449# else {
450# $refs->{$sig} = $self->operand;
451# };
452# };
453
454
Akron0a0e9242016-10-28 14:42:29 +02004551;