blob: 18a3593e152aa47cc1c81fdef0b70e4d3da5d1df [file] [log] [blame]
Akron8a679332016-11-01 16:18:55 +01001package Krawfish::Koral::Query::Builder;
Akron7b4e4d92017-09-25 12:18:29 +02002use Krawfish::Util::Constants qw/:PREFIX/;
Akron1ec9b8e2017-12-12 15:09:15 +01003use Krawfish::Util::Bits;
Akron7b4e4d92017-09-25 12:18:29 +02004
Akron06eb4d32016-11-11 14:05:52 +01005use Krawfish::Koral::Query::Term;
Akron8a679332016-11-01 16:18:55 +01006use Krawfish::Koral::Query::Token;
7use Krawfish::Koral::Query::Span;
Akron2bc94da2017-10-27 15:20:36 +02008use Krawfish::Koral::Query::InCorpus;
Akron8a679332016-11-01 16:18:55 +01009use Krawfish::Koral::Query::Sequence;
Akronc697fc22016-11-11 02:34:56 +010010use Krawfish::Koral::Query::Repetition;
Akron06eb4d32016-11-11 14:05:52 +010011use Krawfish::Koral::Query::TermGroup;
Akron774c5db2016-11-09 16:11:38 +010012use Krawfish::Koral::Query::Extension;
Akrone62f4942016-11-13 01:11:21 +010013use Krawfish::Koral::Query::Exclusion;
Akron6d9341b2016-11-16 16:59:01 +010014use Krawfish::Koral::Query::Unique;
Akrone2744462016-11-15 00:21:43 +010015use Krawfish::Koral::Query::Class;
Akron95a37a42017-11-20 17:16:40 +010016use Krawfish::Koral::Query::Constraint;
Akron169ede42017-02-05 12:52:22 +010017use Krawfish::Koral::Query::Length;
Akron5a5595b2017-09-10 13:00:57 +020018use Krawfish::Koral::Query::Nowhere;
Akron6ccc3ad2017-07-17 13:21:22 +020019use Krawfish::Koral::Query::Or;
Akron15fc1972017-07-20 22:53:00 +020020use Krawfish::Koral::Query::Filter;
Akron5e4b2972017-08-05 20:59:48 +020021use Krawfish::Koral::Query::Match;
Akron934afe02016-11-18 03:35:20 +010022
Akron8b3d9ff2017-12-13 15:01:22 +010023use Krawfish::Koral::Query::Failure;
24
Akron934afe02016-11-18 03:35:20 +010025use Krawfish::Koral::Query::Constraint::Position;
Akron581e8932017-12-11 14:25:03 +010026use Krawfish::Koral::Query::Constraint::ClassBetween;
Akron61e8bce2017-05-24 15:55:27 +020027use Krawfish::Koral::Query::Constraint::NotBetween;
Akron6ccc3ad2017-07-17 13:21:22 +020028use Krawfish::Koral::Query::Constraint::InBetween;
Akron8a679332016-11-01 16:18:55 +010029
Akron5e4b2972017-08-05 20:59:48 +020030use Krawfish::Koral::Corpus::Builder;
31
Akronb945c572017-07-23 14:55:00 +020032use Scalar::Util qw/blessed/;
Akronee06a132017-12-08 16:59:27 +010033use strict;
34use warnings;
Akronb945c572017-07-23 14:55:00 +020035
Akrone66b1db2017-12-13 12:00:46 +010036use constant {
37 DOC_IDENTIFIER => 'id',
38 TEXT_SPAN => 'base/s=t'
39};
Akron5e4b2972017-08-05 20:59:48 +020040
Akrone66b1db2017-12-13 12:00:46 +010041
42# Constructor
Akron8a679332016-11-01 16:18:55 +010043sub new {
44 my $class = shift;
Akrone66b1db2017-12-13 12:00:46 +010045 my $text_span = shift // TEXT_SPAN;
Akron6b195632017-06-09 23:47:49 +020046 bless [$text_span], $class;
Akron8a679332016-11-01 16:18:55 +010047};
48
Akronee06a132017-12-08 16:59:27 +010049
Akron8a679332016-11-01 16:18:55 +010050#########################
51# KoralQuery constructs #
52#########################
53
Akronee06a132017-12-08 16:59:27 +010054
55# Token construct
56# Should probably be like:
57# ->token('Der') or
58# ->token(->term_or('Der', 'Die', 'Das'))
59sub token {
60 shift;
61 return Krawfish::Koral::Query::Token->new(@_);
62};
63
64
Akrone66b1db2017-12-13 12:00:46 +010065# Class construct
Akronee06a132017-12-08 16:59:27 +010066sub class {
67 shift;
68 Krawfish::Koral::Query::Class->new(@_);
69};
70
Akrone66b1db2017-12-13 12:00:46 +010071
Akron8a679332016-11-01 16:18:55 +010072# Sequence construct
73sub seq {
Akron6621e112016-11-05 17:21:39 +010074 shift;
75 return Krawfish::Koral::Query::Sequence->new(@_);
Akron8a679332016-11-01 16:18:55 +010076};
77
78
Akrone66b1db2017-12-13 12:00:46 +010079# Repetition construct
Akronc697fc22016-11-11 02:34:56 +010080sub repeat {
81 shift;
82 return Krawfish::Koral::Query::Repetition->new(@_);
83};
84
85
Akrone66b1db2017-12-13 12:00:46 +010086# Term construct
Akron06eb4d32016-11-11 14:05:52 +010087sub term {
88 shift;
Akron2bc94da2017-10-27 15:20:36 +020089 return Krawfish::Koral::Query::Term->new(TOKEN_PREF . shift);
Akron06eb4d32016-11-11 14:05:52 +010090};
91
Akrone66b1db2017-12-13 12:00:46 +010092
93# Term with negativity
Akron84b8b752016-11-19 15:55:12 +010094sub term_neg {
Akrone66b1db2017-12-13 12:00:46 +010095 shift->term(@_)->match('!=');
Akron84b8b752016-11-19 15:55:12 +010096};
97
Akrone66b1db2017-12-13 12:00:46 +010098
99# Term with regular expression
Akronb945c572017-07-23 14:55:00 +0200100sub term_re {
Akrone66b1db2017-12-13 12:00:46 +0100101 shift->term(@_)->match('~');
Akronb945c572017-07-23 14:55:00 +0200102};
103
104
Akrone66b1db2017-12-13 12:00:46 +0100105# Span construct
106sub span {
107 shift;
108 Krawfish::Koral::Query::Span->new(@_);
109};
110
111
112# Conjunction group construct
Akronb945c572017-07-23 14:55:00 +0200113sub bool_and {
114 shift;
115 Krawfish::Koral::Query::TermGroup->new('and' => @_);
116};
117
Akrone66b1db2017-12-13 12:00:46 +0100118
119# Exclusion query construct
Akronb945c572017-07-23 14:55:00 +0200120sub bool_and_not {
121 shift;
122 my ($pos, $neg) = @_;
123 Krawfish::Koral::Query::Exclusion->new(['matches'], $pos, $neg);
124};
125
126
Akrone66b1db2017-12-13 12:00:46 +0100127# Disjunction group construct
Akronb945c572017-07-23 14:55:00 +0200128sub bool_or {
129 my $self = shift;
130 my $first_type = blessed $_[0] ? $_[0]->type : 'term';
131 my $second_type = blessed $_[1] ? $_[1]->type : 'term';
132 if (
133 ($first_type eq 'term' || $first_type eq 'termGroup') &&
134 ($second_type eq 'term' || $second_type eq 'termGroup')
135 ) {
136 return Krawfish::Koral::Query::TermGroup->new('or' => @_);
137 };
138
139 return Krawfish::Koral::Query::Or->new(@_);
140};
141
142
Akron6b195632017-06-09 23:47:49 +0200143# Create an in-text construct
144sub in_text {
145 my $self = shift;
146 return $self->position(
147 ['endsWith', 'isAround', 'startsWith', 'matches'],
148 $self->span($self->[0]),
149 shift
150 );
151};
152
Akron2bc94da2017-10-27 15:20:36 +0200153
Akron8a679332016-11-01 16:18:55 +0100154# Position construct
155sub position {
Akron84ae6572017-02-03 19:26:36 +0100156 my $self = shift;
Akron17c0a3d2017-06-11 23:19:16 +0200157 my $frames = shift;
Akron95a37a42017-11-20 17:16:40 +0100158 return $self->constraint(
Akron17c0a3d2017-06-11 23:19:16 +0200159 [$self->c_position(@$frames)],
160 @_
161 );
Akron4763ea62016-11-02 19:36:18 +0100162};
163
Akron934afe02016-11-18 03:35:20 +0100164
Akrone66b1db2017-12-13 12:00:46 +0100165# Exclusion construct
Akron934afe02016-11-18 03:35:20 +0100166sub exclusion {
Akron6621e112016-11-05 17:21:39 +0100167 shift;
Akrone62f4942016-11-13 01:11:21 +0100168 Krawfish::Koral::Query::Exclusion->new(@_);
Akron8a679332016-11-01 16:18:55 +0100169};
170
Akron934afe02016-11-18 03:35:20 +0100171
Akrone66b1db2017-12-13 12:00:46 +0100172# Search with reference to a specific subcorpus
Akron2bc94da2017-10-27 15:20:36 +0200173sub in_corpus {
174 shift;
175 Krawfish::Koral::Query::InCorpus->new(@_);
176};
177
178
Akron965f5d92017-01-20 18:38:08 +0100179# Create reference query
180sub reference {
181 shift;
182 Krawfish::Koral::Query::Reference->new(shift);
183};
184
185
Akrone66b1db2017-12-13 12:00:46 +0100186# Create constraint query
Akron95a37a42017-11-20 17:16:40 +0100187sub constraint {
Akron934afe02016-11-18 03:35:20 +0100188 shift;
Akron95a37a42017-11-20 17:16:40 +0100189 Krawfish::Koral::Query::Constraint->new(@_);
Akron934afe02016-11-18 03:35:20 +0100190};
191
Akrone66b1db2017-12-13 12:00:46 +0100192
193# Create position constraint
Akron934afe02016-11-18 03:35:20 +0100194sub c_position {
195 shift;
196 Krawfish::Koral::Query::Constraint::Position->new(@_);
197};
198
Akrone66b1db2017-12-13 12:00:46 +0100199
200# Create class between constraint
Akron581e8932017-12-11 14:25:03 +0100201sub c_class_between {
Akron934afe02016-11-18 03:35:20 +0100202 shift;
Akron581e8932017-12-11 14:25:03 +0100203 Krawfish::Koral::Query::Constraint::ClassBetween->new(@_);
Akron934afe02016-11-18 03:35:20 +0100204};
205
Akrone66b1db2017-12-13 12:00:46 +0100206
207# Create not between constraint
Akron61e8bce2017-05-24 15:55:27 +0200208sub c_not_between {
209 shift;
210 Krawfish::Koral::Query::Constraint::NotBetween->new(@_);
211};
212
Akrone66b1db2017-12-13 12:00:46 +0100213
214# Create in between constraint
Akron6ccc3ad2017-07-17 13:21:22 +0200215sub c_in_between {
216 shift;
217 Krawfish::Koral::Query::Constraint::InBetween->new(@_);
218};
219
Akron0fe4c292017-07-25 16:30:46 +0200220
221# Make all positions be in order
222sub c_in_order {
223 shift;
224 Krawfish::Koral::Query::Constraint::Position->new(
Akrone66b1db2017-12-13 12:00:46 +0100225 qw/precedesDirectly
226 precedes
227 endsWith
228 isAround
229 overlapsLeft
230 alignsLeft
231 matches/
Akron0fe4c292017-07-25 16:30:46 +0200232 );
233};
234
235
Akrone66b1db2017-12-13 12:00:46 +0100236# Create length query
Akron169ede42017-02-05 12:52:22 +0100237sub length {
238 shift;
239 Krawfish::Koral::Query::Length->new(@_);
240};
241
Akrone2744462016-11-15 00:21:43 +0100242
Akron774c5db2016-11-09 16:11:38 +0100243# Extension to the left
244sub ext_left {
245 shift;
Akron7d1dc8e2016-11-13 15:54:11 +0100246 Krawfish::Koral::Query::Extension->new(1, @_);
Akron774c5db2016-11-09 16:11:38 +0100247};
248
Akrone66b1db2017-12-13 12:00:46 +0100249
250# Extension to the right
Akron774c5db2016-11-09 16:11:38 +0100251sub ext_right {
252 shift;
Akron7d1dc8e2016-11-13 15:54:11 +0100253 Krawfish::Koral::Query::Extension->new(0, @_);
Akron774c5db2016-11-09 16:11:38 +0100254};
255
256
Akronc5529372017-06-21 15:56:18 +0200257# Matches anywhere
Akron655a10a2017-09-11 14:13:18 +0200258sub anywhere {
Akronc5529372017-06-21 15:56:18 +0200259 Krawfish::Koral::Query::Token->new;
260};
261
262
Akronddf077a2016-11-05 15:00:00 +0100263# Null element - only for plan testing purposes
264sub null {
Akron6d9341b2016-11-16 16:59:01 +0100265 Krawfish::Koral::Query::Term->new;
266};
267
Akron4f9eef42017-07-24 11:41:09 +0200268
Akron8b3d9ff2017-12-13 15:01:22 +0100269# Return Failure object
270sub failure {
271 shift;
272 Krawfish::Koral::Query::Failure->new(shift);
273};
274
275
Akronded01ae2016-11-23 13:43:54 +0100276# No match
Akron5a5595b2017-09-10 13:00:57 +0200277sub nowhere {
278 Krawfish::Koral::Query::Nowhere->new;
Akronded01ae2016-11-23 13:43:54 +0100279};
280
Akron6d9341b2016-11-16 16:59:01 +0100281
282# Unique results
283sub unique {
284 shift;
285 Krawfish::Koral::Query::Unique->new(@_);
Akronddf077a2016-11-05 15:00:00 +0100286};
287
Akron15fc1972017-07-20 22:53:00 +0200288
289# Filter a query with a corpus
290sub filter_by {
291 shift;
292 Krawfish::Koral::Query::Filter->new(@_);
293};
294
Akron5e4b2972017-08-05 20:59:48 +0200295
296# Find exactly one single match
297sub match {
298 my $self = shift;
Akrone66b1db2017-12-13 12:00:46 +0100299
300 # TODO:
301 # Probably ask for a unique field!
Akron1ec9b8e2017-12-12 15:09:15 +0100302 my ($doc_id, $start, $end, $pl, $flags) = @_;
Akron5e4b2972017-08-05 20:59:48 +0200303
304 my $cb = Krawfish::Koral::Corpus::Builder->new;
Akron1ec9b8e2017-12-12 15:09:15 +0100305 my $doc = $cb->string(DOC_IDENTIFIER)->eq($doc_id);
306
307 my $payload;
308 if ($pl) {
309 $payload = Krawfish::Posting::Payload->new;
310 foreach (@$pl) {
311 $payload->add(@{$_});
312 };
313 };
314
315 if ($flags && ref($flags) eq 'ARRAY') {
316 $flags = classes_to_flags(@$flags)
317 };
318
Akron5e4b2972017-08-05 20:59:48 +0200319 Krawfish::Koral::Query::Match->new(
Akron1ec9b8e2017-12-12 15:09:15 +0100320 $doc,
Akron5e4b2972017-08-05 20:59:48 +0200321 $start,
Akron1ec9b8e2017-12-12 15:09:15 +0100322 $end,
323 $payload,
324 $flags
Akron5e4b2972017-08-05 20:59:48 +0200325 );
326};
327
Akron3feb4d82017-12-12 19:33:46 +0100328
329##############################
330# KoralQuery deserialization #
331##############################
332
Akrone66b1db2017-12-13 12:00:46 +0100333# Deserialize KQ
Akron3feb4d82017-12-12 19:33:46 +0100334sub from_koral {
335 my ($self, $kq) = @_;
336
337 my $type = $kq->{'@type'};
338
339 # Deserialize groups
340 if ($type eq 'koral:group') {
341 my $op = $kq->{operation};
342
343 # Check for operation types
344 if ($op eq 'operation:sequence') {
345 return Krawfish::Koral::Query::Sequence->from_koral($kq);
346 }
347
348 elsif ($op eq 'operation:class') {
349 return Krawfish::Koral::Query::Class->from_koral($kq);
350 }
351
352 elsif ($op eq 'operation:length') {
353 return Krawfish::Koral::Query::Length->from_koral($kq);
354 }
355
356 elsif ($op eq 'operation:repetition') {
357 return Krawfish::Koral::Query::Repetition->from_koral($kq);
358 }
359
360 elsif ($op eq 'operation:exclusion') {
361 return Krawfish::Koral::Query::Exclusion->from_koral($kq);
362 }
363
364 elsif ($op eq 'operation:position' || $op eq 'operation:constraint') {
365 return Krawfish::Koral::Query::Constraint->from_koral($kq);
366 }
367
368 elsif ($op eq 'operation:disjunction' || $op eq 'operation:or') {
369 return Krawfish::Koral::Query::Or->from_koral($kq);
370 }
371
372 elsif ($op eq 'operation:unique') {
373 return Krawfish::Koral::Query::Unique->from_koral($kq);
374 }
375
376 else {
377 warn 'Operation ' . $op . ' no supported';
378 };
379 }
380
381 elsif ($type eq 'koral:token') {
382 return Krawfish::Koral::Query::Token->from_koral($kq);
383 }
384
385 elsif ($type eq 'koral:match') {
386 return Krawfish::Koral::Query::Match->from_koral($kq);
387 }
388
389 elsif ($type eq 'koral:span') {
390 return Krawfish::Koral::Query::Span->from_koral($kq);
391 }
392
393 elsif ($type eq 'koral:nowhere') {
394 return Krawfish::Koral::Query::Nowhere->from_koral;
395 }
396
397 else {
Akrone66b1db2017-12-13 12:00:46 +0100398 warn "Type $type unknown";
Akron3feb4d82017-12-12 19:33:46 +0100399 };
400
401 return;
402};
403
404
Akrone66b1db2017-12-13 12:00:46 +0100405# Deserialize constraint
Akron3feb4d82017-12-12 19:33:46 +0100406sub from_koral_constraint {
407 shift;
408 my $kq = shift;
409 if ($kq->{'@type'} eq 'constraint:position') {
410 return Krawfish::Koral::Query::Constraint::Position->from_koral($kq);
411 }
412
413 elsif ($kq->{'@type'} eq 'constraint:classBetween') {
414 return Krawfish::Koral::Query::Constraint::ClassBetween->from_koral($kq);
415 }
416
417 elsif ($kq->{'@type'} eq 'constraint:notBetween') {
418 return Krawfish::Koral::Query::Constraint::NotBetween->from_koral($kq);
419 }
420
421 elsif ($kq->{'@type'} eq 'constraint:inBetween') {
422 return Krawfish::Koral::Query::Constraint::InBetween->from_koral($kq);
423 };
424
425 warn 'Type ' . $kq->{'@type'} . ' unknown';
Akrone66b1db2017-12-13 12:00:46 +0100426 return;
Akron3feb4d82017-12-12 19:33:46 +0100427};
428
429
430# Deserialize from term or term group
431sub from_koral_term_or_term_group {
432 my ($self, $kq) = @_;
433 my $type = $kq->{'@type'};
434
435 # Defines a term
436 if ($type eq 'koral:term') {
437 return $self->from_koral_term($kq);
438 }
439
440 # Defines a term group
441 elsif ($type eq 'koral:termGroup') {
442 return Krawfish::Koral::Query::TermGroup->from_koral($kq);
443 }
444
445 # Matches nowhere
446 elsif ($type eq 'koral:nowhere') {
447 return Krawfish::Koral::Query::Nowhere->from_koral;
448 };
449
Akron8b3d9ff2017-12-13 15:01:22 +0100450 # Return failure with error message
451 return $self->failure($kq)->error(000 => 'Type no term or termGroup', $type);
Akron3feb4d82017-12-12 19:33:46 +0100452};
453
454
455# Get from koral:term
456sub from_koral_term {
457 my ($self, $kq) = @_;
458
459 if (defined $kq->{'@id'}) {
460 return Krawfish::Koral::Query::TermID->from_koral($kq);
461 };
462
463 return Krawfish::Koral::Query::Term->from_koral($kq);
464};
465
466
Akron8a679332016-11-01 16:18:55 +01004671;