blob: 4b71477ace01793bdb99739b8753ab367c5b399c [file] [log] [blame]
Akron0a0e9242016-10-28 14:42:29 +02001package Krawfish::Koral::Query;
Akron6621e112016-11-05 17:21:39 +01002use parent 'Krawfish::Info';
Akron2c6c7162017-05-15 18:15:33 +02003# TODO: Use the same parent as Koral::Corpus
Akron4763ea62016-11-02 19:36:18 +01004use Krawfish::Koral::Query::Builder;
Akron944091b2016-11-24 16:40:58 +01005use Krawfish::Koral::Query::Importer;
Akron965f5d92017-01-20 18:38:08 +01006use Mojo::Util qw/md5_sum/;
Akron0a0e9242016-10-28 14:42:29 +02007use warnings;
Akron944091b2016-11-24 16:40:58 +01008use strict;
Akron0a0e9242016-10-28 14:42:29 +02009
Akron6b195632017-06-09 23:47:49 +020010# TODO:
11# - rename 'nothing' to 'nowhere'
12# - rename 'any' to 'anywhere'
Akron8231ca72017-06-16 16:08:32 +020013# - extended_* may be queried
14# automatically without parameter
Akron6b195632017-06-09 23:47:49 +020015
Akron33f1dcb2016-10-29 17:27:23 +020016sub new {
17 my $class = shift;
Akron944091b2016-11-24 16:40:58 +010018 my $self = bless {
Akron4763ea62016-11-02 19:36:18 +010019 any => 0,
20 optional => 0,
21 null => 0,
22 negative => 0,
23 extended => 0,
Akronddf077a2016-11-05 15:00:00 +010024 extended_left => 0,
Akron6621e112016-11-05 17:21:39 +010025 extended_right => 0
Akron4763ea62016-11-02 19:36:18 +010026 }, $class;
Akron944091b2016-11-24 16:40:58 +010027
28 if ($_[0]) {
29 return $self->from_koral(shift);
30 };
31
32 $self;
Akron33f1dcb2016-10-29 17:27:23 +020033};
34
Akron4763ea62016-11-02 19:36:18 +010035#########################################
36# Query Planning methods and attributes #
37#########################################
Akrona211bf52016-10-29 18:03:29 +020038
Akron6b195632017-06-09 23:47:49 +020039sub plan_for_new {
40 my ($self, $index) = @_;
41 $self
42 ->normalize
43 ->finalize
44 ->refer
45 ->inflate($index->dict)
46 ->cache
47 ->optimize($index);
48};
49
50# Normalize the query
51sub normalize;
52
53
54# Refer to common subqueries
55sub refer {
56 $_[0];
57};
58
59
60# Expand regular expressions ...
61sub inflate;
62
63
64# Check for cached subqueries
65sub cache {
66 $_[0];
67};
68
69
70# Optimize for an index
71sub optimize;
72
73
Akronc5529372017-06-21 15:56:18 +020074# This is the class to be overwritten
75# by subclasses
76sub _finalize {
77 $_[0];
78};
79
Akron6b195632017-06-09 23:47:49 +020080sub finalize {
81 my $self = shift;
82
83 my $query = $self;
84
Akronc5529372017-06-21 15:56:18 +020085 if ($query->is_any || $query->is_null) {
86 $self->error(780, "This query matches everywhere");
87 return;
88 };
89
90 if ($query->is_nothing) {
91 return $query->builder->nothing;
92 };
93
94 if ($query->is_negative) {
95 $query->warn(782, 'Exclusivity of query is ignored');
96 $query->is_negative(0);
97 };
98
99 if ($query->is_optional) {
100 $query->warn(781, "Optionality of query is ignored");
101 $query->is_optional(0);
102 };
103
104 $query = $query->_finalize;
105
Akron6b195632017-06-09 23:47:49 +0200106 # There is a possible 'any' extension,
107 # that may exceed the text
108 if ($query->is_extended_right) {
109 return $self->builder->in_text($query);
110 };
111
112 # Return the planned query
113 # TODO: Check for serialization errors
114 return $query;
115};
116
117
Akronce10cb42017-06-14 01:12:40 +0200118sub remove_unused_classes {
119 my ($self, $classes) = @_;
120 my $used = $self->uses_classes;
121 # Pass classes required for highlighting or grouping,
122 # and take classes from uses_classes() into account.
123 # This is not done recursively, as it first needs to
124 # gather all classes and then can remove them.
125};
126
127
128
Akron4763ea62016-11-02 19:36:18 +0100129# Prepare a query for an index
Akron6a749732017-02-14 14:43:06 +0100130# TODO: Rename to compile()
Akron4763ea62016-11-02 19:36:18 +0100131sub prepare_for {
132 my ($self, $index) = @_;
Akronc3657bf2016-10-31 00:15:43 +0100133
Akron6b195632017-06-09 23:47:49 +0200134 warn 'DEPRECATED';
135
Akron4763ea62016-11-02 19:36:18 +0100136 my $query = $self;
137
138 # There is a possible 'any' extension,
139 # that may exceed the text
140 if ($self->is_extended_right) {
141 my $builder = $self->builder;
142
143 # Wrap query in a text element
144 $query = $builder->position(
145 ['endsWith', 'isAround', 'startsWith', 'matches'],
146 $builder->span('base/s=t'),
147 $self
148 );
149 };
150
151 # Return the planned query
152 # TODO: Check for serialization errors
153 $query->plan_for($index);
154};
155
156# Plan a query for an index (to be overwritten)
Akron349747d2016-12-05 11:05:53 +0100157# TODO: Rename to_primitive(index)
Akron4763ea62016-11-02 19:36:18 +0100158sub plan_for;
159
Akron617871f2017-05-27 02:05:31 +0200160
161# This will remove classes
162# in subqueries
163sub plan_without_classes_for {
164 shift->plan_for(@_);
165};
166
167
Akron22b68582017-01-19 12:05:21 +0100168# Filter a query based on a document query
Akron9b6ea8d2017-04-07 14:01:09 +0200169sub filter_by {
170 ...
171};
Akron22b68582017-01-19 12:05:21 +0100172
Akronce10cb42017-06-14 01:12:40 +0200173# sub is_any { $_[0]->{any} // 0 };
174# Matches everything
175sub is_any {
176 my $self = shift;
177 if (defined $_[0]) {
178 $self->{any} = shift;
179 };
180 return $self->{any} // 0;
181};
182
Akronc5529372017-06-21 15:56:18 +0200183sub is_optional {
184 my $self = shift;
185 if (defined $_[0]) {
186 $self->{optional} = shift;
187 };
188 return $self->{optional} // 0;
189};
Akron2c6c7162017-05-15 18:15:33 +0200190
191# Null is empty - e.g. in
Akronc048b182017-06-13 01:29:03 +0200192# Der >alte{0}< Mann
Akrondc9f1162016-11-05 15:31:40 +0100193sub is_null { $_[0]->{null} // 0 };
Akron2c6c7162017-05-15 18:15:33 +0200194
195# Nothing matches nowhere - e.g. in
196# Der [alte & !alte] Mann
Akronce10cb42017-06-14 01:12:40 +0200197# sub is_nothing { $_[0]->{nothing} // 0 };
198sub is_nothing {
199 my $self = shift;
200 if (defined $_[0]) {
201 $self->{nothing} = shift;
202 };
203 return $self->{nothing} // 0;
204};
Akron2c6c7162017-05-15 18:15:33 +0200205
206sub is_leaf { 0 };
Akrondc9f1162016-11-05 15:31:40 +0100207sub is_extended_right { $_[0]->{extended_right} // 0 };
208sub is_extended_left { $_[0]->{extended_left} // 0 };
Akron84b8b752016-11-19 15:55:12 +0100209sub is_extended { $_[0]->is_extended_right || $_[0]->is_extended_left // 0 };
Akron4763ea62016-11-02 19:36:18 +0100210sub freq { -1 };
Akron774c5db2016-11-09 16:11:38 +0100211sub type { '' };
Akrona211bf52016-10-29 18:03:29 +0200212
Akronce10cb42017-06-14 01:12:40 +0200213# Returns a list of classes used by the query,
214# e.g. in a focus() context.
215sub uses_classes;
Akron2c6c7162017-05-15 18:15:33 +0200216
217sub is_negative {
218 my $self = shift;
219 if (scalar @_ == 1) {
220 $self->{negative} = shift;
221 };
222 return $self->{negative} // 0;
223};
224
225
226sub toggle_negative {
227 my $self = shift;
228 $self->is_negative($self->is_negative ? 0 : 1);
229 return $self;
230};
231
232
Akron774c5db2016-11-09 16:11:38 +0100233# TODO: Probably better to be renamed "potential_anchor"
Akrona211bf52016-10-29 18:03:29 +0200234sub maybe_anchor {
235 my $self = shift;
236 return if $self->is_negative;
237 return if $self->is_optional;
238 return if $self->is_any;
239 return 1;
240};
241
242# Check if the wrapped query may need to be sorted
243# on focussing on a specific class.
244# Normally spans are always sorted, but in case of
245# a wrapped relation query, classed operands may
246# be in arbitrary order. When focussing on these
247# classes, the span has to me reordered.
Akron1b09c5b2016-11-20 15:59:34 +0100248sub maybe_unsorted { $_[0]->{maybe_unsorted} // 0 };
Akrona211bf52016-10-29 18:03:29 +0200249
Akron965f5d92017-01-20 18:38:08 +0100250
251# Iterate over all subqueries and possibly replace them
252sub subqueries;
253
Akrona211bf52016-10-29 18:03:29 +0200254#############################
255# Query Application methods #
256#############################
Akron33f1dcb2016-10-29 17:27:23 +0200257
Akrona211bf52016-10-29 18:03:29 +0200258# Deserialization of KoralQuery
Akron944091b2016-11-24 16:40:58 +0100259# TODO: export this method from Importer
Akron33f1dcb2016-10-29 17:27:23 +0200260sub from_koral {
Akron944091b2016-11-24 16:40:58 +0100261 my ($class, $kq) = @_;
262 my $importer = Krawfish::Koral::Query::Importer->new;
263
264 my $type = $kq->{'@type'};
265 if ($type eq 'koral:group') {
266 my $op = $kq->{operation};
267 if ($op eq 'operation:sequence') {
268 return $importer->seq($kq);
269 }
270
271 elsif ($op eq 'operation:class') {
272 return $importer->class($kq);
273 }
274 else {
Akron2c6c7162017-05-15 18:15:33 +0200275 warn 'Operation ' . $op . ' no supported';
Akron944091b2016-11-24 16:40:58 +0100276 };
277 }
278
279 elsif ($type eq 'koral:token') {
280 return $importer->token($kq);
281 }
282 else {
283 warn $type . ' unknown';
284 };
285
286 return;
Akron33f1dcb2016-10-29 17:27:23 +0200287};
288
Akrona211bf52016-10-29 18:03:29 +0200289# Overwritten
290sub to_koral_fragment;
291
Akronc3657bf2016-10-31 00:15:43 +0100292# Overwritten
Akrona211bf52016-10-29 18:03:29 +0200293sub to_string;
Akron33f1dcb2016-10-29 17:27:23 +0200294
Akron8231ca72017-06-16 16:08:32 +0200295sub to_neutral {
296 $_[0]->to_string;
297};
298
299
Akron965f5d92017-01-20 18:38:08 +0100300# TODO: This may be optimizable and
301# implemented in all query and corpus wrappers
302sub to_signature {
303 md5_sum $_[0]->to_string;
304};
305
306# TODO: Returns a value of complexity of the query,
307# that can be used to decide, if a query should be cached.
308sub complexity;
309
Akron573e7ec2016-11-05 19:03:01 +0100310# Clone the query
311# sub clone;
Akronc3657bf2016-10-31 00:15:43 +0100312
Akron4763ea62016-11-02 19:36:18 +0100313# Create KoralQuery builder
314sub builder {
315 return Krawfish::Koral::Query::Builder->new;
316};
317
Akron944091b2016-11-24 16:40:58 +0100318# Create KoralQuery builder
319sub importer {
320 return Krawfish::Koral::Query::Importer->new;
321};
322
Akron169ede42017-02-05 12:52:22 +0100323
324# Serialization helper
325sub boundary {
326 my $self = shift;
327 my %hash = (
328 '@type' => 'koral:boundary'
329 );
330 $hash{min} = $self->{min} if defined $self->{min};
331 $hash{max} = $self->{max} if defined $self->{max};
332 return \%hash;
333}
334
335
Akron0a0e9242016-10-28 14:42:29 +02003361;
Akron33f1dcb2016-10-29 17:27:23 +0200337
Akrona211bf52016-10-29 18:03:29 +0200338
Akron33f1dcb2016-10-29 17:27:23 +0200339__END__
340