blob: ae50e410a4e5211df2b7927b45e377af92b4d0c8 [file] [log] [blame]
Akron2d304052017-07-19 17:39:10 +02001package Krawfish::Koral::Util::Boolean;
Akron2c6c7162017-05-15 18:15:33 +02002use Krawfish::Log;
3use List::MoreUtils qw!uniq!;
4use strict;
5use warnings;
6
Akronb945c572017-07-23 14:55:00 +02007# Base class for boolean group queries.
Akron39e25482017-10-24 12:01:37 +02008
9# Used by
10# - Koral::Corpus::FieldGroup,
11# - Koral::Query::TermGroup
12# - Koral::Query::Or
Akron2c6c7162017-05-15 18:15:33 +020013
Akron2858e9a2017-08-21 16:47:49 +020014use constant DEBUG => 0;
Akron49c32f42017-05-18 14:34:27 +020015
Akron61e8bce2017-05-24 15:55:27 +020016# TODO:
Akron448defc2017-08-30 14:59:29 +020017# Introduce a ->complex attribute to all queries,
18# to guarantee that simple operands are grouped together
19# to make filtering more efficient!
20
21# TODO:
Akron61e8bce2017-05-24 15:55:27 +020022# To simplify this, it may be useful to use Negation instead of is_negative().
23# This means, fields with "ne" won't be "ne"-fields, but become not(term).
24# It's also easier to detect double negation.
25
Akron49c32f42017-05-18 14:34:27 +020026# TODO:
Akronb945c572017-07-23 14:55:00 +020027# Let normalize return a cloned query instead of in-place creation
28
29# TODO:
Akron49c32f42017-05-18 14:34:27 +020030# - Deal with classes:
31# (A | !A) -> 1, aber ({1:A} | {2:!A}) -> ({1:A} | {2:!A})
32# (A & !A) -> 0, und ({1:A} & {2:!A}) -> 0
Akron2c6c7162017-05-15 18:15:33 +020033
Akron2c6c7162017-05-15 18:15:33 +020034# TODO:
Akronb945c572017-07-23 14:55:00 +020035# Check https://de.wikipedia.org/wiki/Boolesche_Algebra
36# for optimizations
Akron2c6c7162017-05-15 18:15:33 +020037# or(and(a,b),and(a,c)) -> and(a,or(b,c))
38# and(or(a,b),or(a,c)) -> or(a,and(b,c))
39# not(not(a)) -> a
Akron2ea61aa2017-06-03 16:30:23 +020040# and(a,or(a,b)) -> a !! (Relevant for VCs!)
Akron2c6c7162017-05-15 18:15:33 +020041# or(a,and(a,b)) -> a
42
43# DeMorgan:
44# or(not(a),not(b)) -> not(and(a,b))
45# and(not(a),not(b)) -> not(or(a,b))
46
47# TODO:
48# from managing gigabytes bool_optimiser.c
49#/* =========================================================================
50# * Function: OptimiseBoolTree
Akronb945c572017-07-23 14:55:00 +020051# * Description:
Akron2c6c7162017-05-15 18:15:33 +020052# * For case 2:
53# * Do three major steps:
54# * (i) put into standard form
55# * -> put into DNF (disjunctive normal form - or of ands)
56# * Use DeMorgan's, Double-negative, Distributive rules
57# * (ii) simplify
58# * apply idempotency rules
59# * (iii) ameliorate
60# * convert &! to diff nodes, order terms by frequency,...
61# * Could also do the matching idempotency laws i.e. ...
62# * (A | A), (A | !A), (A & !A), (A & A), (A & (A | B)), (A | (A & B))
Akronb945c572017-07-23 14:55:00 +020063# * Job for future.... ;-)
Akron2c6c7162017-05-15 18:15:33 +020064# * ========================================================================= */
65#/* =========================================================================
66# * Function: DoubleNeg
Akronb945c572017-07-23 14:55:00 +020067# * Description:
Akron2c6c7162017-05-15 18:15:33 +020068# * !(!(a) = a
69# * Assumes binary tree.
Akronb945c572017-07-23 14:55:00 +020070# * Input:
71# * Output:
Akron2c6c7162017-05-15 18:15:33 +020072# * ========================================================================= */
73#/* =========================================================================
74# * Function: AndDeMorgan
Akronb945c572017-07-23 14:55:00 +020075# * Description:
Akron2c6c7162017-05-15 18:15:33 +020076# * DeMorgan's rule for 'not' of an 'and' i.e. !(a & b) <=> (!a) | (!b)
77# * Assumes Binary Tree
Akronb945c572017-07-23 14:55:00 +020078# * Input:
Akron2c6c7162017-05-15 18:15:33 +020079# * not of and tree
Akronb945c572017-07-23 14:55:00 +020080# * Output:
Akron2c6c7162017-05-15 18:15:33 +020081# * or of not trees
82# * ========================================================================= */
83#/* =========================================================================
84# * Function: OrDeMorgan
Akronb945c572017-07-23 14:55:00 +020085# * Description:
Akron2c6c7162017-05-15 18:15:33 +020086# * DeMorgan's rule for 'not' of an 'or' i.e. !(a | b) <=> (!a) & (!b)
87# * Assumes Binary Tree
Akronb945c572017-07-23 14:55:00 +020088# * Input:
Akron2c6c7162017-05-15 18:15:33 +020089# * not of and tree
Akronb945c572017-07-23 14:55:00 +020090# * Output:
Akron2c6c7162017-05-15 18:15:33 +020091# * or of not trees
92# * ========================================================================= */
93#/* =========================================================================
94# * Function: PermeateNots
Akronb945c572017-07-23 14:55:00 +020095# * Description:
Akron2c6c7162017-05-15 18:15:33 +020096# * Use DeMorgan's and Double-negative
97# * Assumes tree in binary form (i.e. No ands/ors collapsed)
Akronb945c572017-07-23 14:55:00 +020098# * Input:
99# * Output:
Akron2c6c7162017-05-15 18:15:33 +0200100# * ========================================================================= */
101#/* =========================================================================
102# * Function: AndDistribute
Akronb945c572017-07-23 14:55:00 +0200103# * Description:
Akron2c6c7162017-05-15 18:15:33 +0200104# * (a | b) & A <=> (a & A) | (b & A)
Akronb945c572017-07-23 14:55:00 +0200105# * Input:
Akron2c6c7162017-05-15 18:15:33 +0200106# * binary tree of "AND" , "OR"s.
Akronb945c572017-07-23 14:55:00 +0200107# * Output:
Akron2c6c7162017-05-15 18:15:33 +0200108# * return 1 if changed the tree
109# * return 0 if there was NO change (no distributive rule to apply)
110# * ========================================================================= */
111#/* =========================================================================
112#/* =========================================================================
113# * Function: AndSort
Akronb945c572017-07-23 14:55:00 +0200114# * Description:
Akron2c6c7162017-05-15 18:15:33 +0200115# * Sort the list of nodes by increasing doc_count
116# * Using some Neil Sharman code - pretty straight forward.
117# * Note: not-terms are sent to the end of the list
Akronb945c572017-07-23 14:55:00 +0200118# * Input:
119# * Output:
Akron2c6c7162017-05-15 18:15:33 +0200120# * ========================================================================= */
121
122# From managing gigabytes bool_optimiser.c
123# - function: TF_Idempotent -> DONE
124
125
Akronb945c572017-07-23 14:55:00 +0200126# Normalize boolean query
Akron2ea61aa2017-06-03 16:30:23 +0200127sub normalize {
Akron2c6c7162017-05-15 18:15:33 +0200128 my $self = shift;
Akron61e8bce2017-05-24 15:55:27 +0200129
Akron93e925e2017-06-10 01:21:26 +0200130 # TODO:
131 # probably reiterate on all operands in that order.
132 # foreach (qw/_clean_and_flatten/) { ... }
133
Akron6b195632017-06-09 23:47:49 +0200134 $self = $self->_clean_and_flatten;
135
Akron2ea61aa2017-06-03 16:30:23 +0200136 # Recursive normalize
Akron49dd24f2017-06-20 17:02:15 +0200137 my @ops = ();
Akron2c6c7162017-05-15 18:15:33 +0200138 foreach my $op (@{$self->operands}) {
Akron2ea61aa2017-06-03 16:30:23 +0200139
140 # Operand is group!
Akron6b195632017-06-09 23:47:49 +0200141 if ($op) { # && $op->type eq $self->type) {
Akron49dd24f2017-06-20 17:02:15 +0200142 push @ops, $op->normalize
Akron6b195632017-06-09 23:47:49 +0200143 }
Akron2c6c7162017-05-15 18:15:33 +0200144 };
Akron2ea61aa2017-06-03 16:30:23 +0200145
Akron49dd24f2017-06-20 17:02:15 +0200146 $self->operands(\@ops);
147
Akron2ea61aa2017-06-03 16:30:23 +0200148 # Apply normalization
149 # The return value may not be a group,
150 # but an andNot or a leaf after the final step
Akron8231ca72017-06-16 16:08:32 +0200151 #
152 # The order is important!
Akron93e925e2017-06-10 01:21:26 +0200153 return $self
154 ->_clean_and_flatten
155 ->_resolve_idempotence
Akron2ea61aa2017-06-03 16:30:23 +0200156 ->_resolve_demorgan
Akron8231ca72017-06-16 16:08:32 +0200157 ->_remove_nested_idempotence
Akron2ea61aa2017-06-03 16:30:23 +0200158 ->_replace_negative;
Akron2c6c7162017-05-15 18:15:33 +0200159};
160
161
162# Resolve idempotence
163# a & a = a
164# a | a = a
Akron49dd24f2017-06-20 17:02:15 +0200165# TODO:
166# (a & b) | (a & b) = a & b
167# (a | b) & (a | b) = a & b
Akron2c6c7162017-05-15 18:15:33 +0200168sub _resolve_idempotence {
169 my $self = shift;
170
Akrond3355ba2017-05-17 21:16:35 +0200171 print_log('kq_bool', 'Resolve idempotence for ' . $self->to_string) if DEBUG;
172
Akrona84ef2d2017-08-07 14:45:46 +0200173 # TODO:
174 # copy warning messages everywhere, when operations are changed!
175
Akron655a10a2017-09-11 14:13:18 +0200176 return $self if $self->is_nowhere || $self->is_anywhere;
Akron2c6c7162017-05-15 18:15:33 +0200177
178 # Get operands in order to identify identical subcorpora
179 my $ops = $self->operands_in_order;
180
181 my ($i, @ops) = (0);
182
183 # Get length
184 my $length = scalar @$ops;
185
186 # Create new operand list
187 @ops = ($ops->[$i++]);
188
189 # Iterate over all operands
190 while ($i < $length) {
191
192 # (A | A)
193 # (A & A)
194 if ($ops->[$i-1]->to_string ne $ops->[$i]->to_string) {
195 push @ops, $ops->[$i]
196 }
197
198 elsif (DEBUG) {
199 print_log('kq_bool', 'Subcorpora are idempotent');
Akrona84ef2d2017-08-07 14:45:46 +0200200 $self->remove_info_from($ops->[$i]);
Akron2c6c7162017-05-15 18:15:33 +0200201 };
202
203 $i++;
204 };
205
206 # Set operands
207 $self->operands(\@ops);
208
209 $self;
210};
211
212
213# Remove matching idempotence
214# (A & (A | B)) -> A
215# (A | (A & B)) -> A
Akron655a10a2017-09-11 14:13:18 +0200216# (A | !A) -> (anywhere)
Akron5a5595b2017-09-10 13:00:57 +0200217# (A & !A) -> (nowhere)
Akron2c6c7162017-05-15 18:15:33 +0200218#
219# TODO:
220# (A | B) & !(A | B)
221sub _remove_nested_idempotence {
222 my $self = shift;
223
Akrond3355ba2017-05-17 21:16:35 +0200224 print_log('kq_bool', 'Remove nested idempotence for ' . $self->to_string) if DEBUG;
225
Akron655a10a2017-09-11 14:13:18 +0200226 return $self if $self->is_nowhere || $self->is_anywhere;
Akron2c6c7162017-05-15 18:15:33 +0200227
228 my $ops = $self->operands;
229
Akron8231ca72017-06-16 16:08:32 +0200230 my (@plains, @neg_group, @pos_group, @pos, @neg);
Akron2c6c7162017-05-15 18:15:33 +0200231
232 # TODO:
233 # Deal with classes
234 for (my $i = 0; $i < scalar(@$ops); $i++) {
235
236 # Operand is group
237 if ($ops->[$i]->type eq $self->type &&
238
239 # Operations are reversed
240 $ops->[$i]->operation ne $self->operation) {
241
Akron8231ca72017-06-16 16:08:32 +0200242 if ($ops->[$i]->is_negative) {
243 push @neg_group, $i;
244 }
245
246 else {
247 push @pos_group, $i;
248 };
Akron2c6c7162017-05-15 18:15:33 +0200249 }
250
251 # Operand is leaf
252 elsif ($ops->[$i]->is_leaf) {
253 push @plains, $i;
254
255 # Item is negative
256 if ($ops->[$i]->is_negative) {
257 push @neg, $i;
258 }
259
260 # Item is positive
261 else {
262 push @pos, $i;
263 }
264 };
265 };
266
Akrond3355ba2017-05-17 21:16:35 +0200267 if (DEBUG) {
268 print_log(
269 'kq_bool',
270 'Index lists created for ' . $self->to_string . ':',
Akron8231ca72017-06-16 16:08:32 +0200271 ' Pos-Group: ' . join(', ', @pos_group),
272 ' Neg-Group: ' . join(', ', @neg_group),
Akrond3355ba2017-05-17 21:16:35 +0200273 ' Plains: ' . join(', ', @plains),
274 ' Neg: ' . join(', ', @neg),
275 ' Pos: ' . join(', ', @pos),
276 );
277 };
278
Akron655a10a2017-09-11 14:13:18 +0200279 # Check for anywhere or nowhere
280 # (A | !A) -> (anywhere)
Akron5a5595b2017-09-10 13:00:57 +0200281 # (A & !A) -> (nowhere)
Akron8231ca72017-06-16 16:08:32 +0200282 foreach my $neg_i (@neg, @neg_group) {
283 foreach my $pos_i (@pos, @pos_group) {
Akron2c6c7162017-05-15 18:15:33 +0200284
Akronce10cb42017-06-14 01:12:40 +0200285 if (DEBUG) {
286 print_log(
287 'kq_tool',
Akron8231ca72017-06-16 16:08:32 +0200288 'Compare ' . $ops->[$neg_i]->to_neutral . ' and ' .
289 $ops->[$pos_i]->to_neutral
Akronce10cb42017-06-14 01:12:40 +0200290 );
291 };
292
Akron2c6c7162017-05-15 18:15:33 +0200293 # Compare terms
Akron8231ca72017-06-16 16:08:32 +0200294 if ($ops->[$neg_i]->to_neutral eq $ops->[$pos_i]->to_neutral) {
Akrond3355ba2017-05-17 21:16:35 +0200295
296 if (DEBUG) {
297 print_log(
298 'kq_bool',
299 'Found idempotent group for ' .
300 $self->operation .
301 '(' . $ops->[$neg_i]->to_string . ',' . $ops->[$pos_i]->to_string . ')'
302 );
303 };
Akron2c6c7162017-05-15 18:15:33 +0200304
305 if ($self->operation eq 'or') {
306
307 # Matches everything
Akron655a10a2017-09-11 14:13:18 +0200308 $self->is_anywhere(1);
Akron2c6c7162017-05-15 18:15:33 +0200309 }
Akrona84ef2d2017-08-07 14:45:46 +0200310
Akron2c6c7162017-05-15 18:15:33 +0200311 elsif ($self->operation eq 'and') {
312
Akron5a5595b2017-09-10 13:00:57 +0200313 # Matches nowhere
Akron8231ca72017-06-16 16:08:32 +0200314
Akron5a5595b2017-09-10 13:00:57 +0200315 $self->is_nowhere(1);
Akron2c6c7162017-05-15 18:15:33 +0200316 };
317
318 # Remove all operands
319 $self->operands([]);
320
321 # Stop further processing
322 return $self;
323 };
324 };
325 };
326
327 my @remove_groups = ();
328
329 # Iterate over all groups and plains
Akrond3355ba2017-05-17 21:16:35 +0200330 # (A & (A | B)) -> A
331 # (A | (A & B)) -> A
Akron2c6c7162017-05-15 18:15:33 +0200332 foreach my $plain_i (@plains) {
Akron8231ca72017-06-16 16:08:32 +0200333 foreach my $group_i (@pos_group) {
Akron2c6c7162017-05-15 18:15:33 +0200334
335 # Get group operands
336 my $group_ops = $ops->[$group_i]->operands;
337
338 # Get operand
339 foreach (@$group_ops) {
340
341 # Nested operand is identical
342 if ($_->to_string eq $ops->[$plain_i]->to_string) {
343
Akron8231ca72017-06-16 16:08:32 +0200344 unless (0) { # $_->has_classes) {
Akrond3355ba2017-05-17 21:16:35 +0200345 if (DEBUG) {
346 print_log('kq_bool', 'Remove nested idempotence in ' . $self->to_string);
347 };
348
Akron2c6c7162017-05-15 18:15:33 +0200349 push @remove_groups, $group_i;
350 }
351 else {
352 warn 'Behaviour for classes is undefined!';
353 };
354 };
355 };
356 };
357 };
358
359
360 # Get a list of all removable items in reverse order
361 # To remove irrelevant nested groups
362 foreach (uniq reverse sort @remove_groups) {
Akrona84ef2d2017-08-07 14:45:46 +0200363 $self->remove_info_from($ops->[$_]);
Akron2c6c7162017-05-15 18:15:33 +0200364 splice @$ops, $_, 1;
365 };
366
367 return $self;
368};
369
370
371# Remove empty
372# a & () = a
373# a | () = a
374#
375# Flatten groups
376# ((a & b) & c) -> (a & b & c)
377# ((a | b) | c) -> (a | b | c)
378#
Akron655a10a2017-09-11 14:13:18 +0200379# Respect anywhere and nowhere
Akron2c6c7162017-05-15 18:15:33 +0200380# a & b & [1] -> a & b
381# a & b & [0] -> [0]
382# a | b | [1] -> [1]
383# a | b | [0] -> a | b
384sub _clean_and_flatten {
385 my $self = shift;
386
Akron655a10a2017-09-11 14:13:18 +0200387 return $self if $self->is_nowhere || $self->is_anywhere;
Akron2c6c7162017-05-15 18:15:33 +0200388
389 # Get operands
390 my $ops = $self->operands;
391
Akron49dd24f2017-06-20 17:02:15 +0200392 print_log('kq_bool', 'Flatten groups of ' . $self->to_string) if DEBUG;
Akron2c6c7162017-05-15 18:15:33 +0200393
394 # Flatten groups in reverse order
395 for (my $i = scalar(@$ops) - 1; $i >= 0;) {
396
397 # Get operand under scrutiny
398 my $op = $ops->[$i];
399
400 # Remove empty elements
401 if (!defined($op) || $op->is_null) {
Akrona84ef2d2017-08-07 14:45:46 +0200402 $self->remove_info_from($ops->[$i]);
403
Akron2c6c7162017-05-15 18:15:33 +0200404 splice @$ops, $i, 1;
405 }
406
Akron5a5595b2017-09-10 13:00:57 +0200407 # If nowhere can be matched
408 elsif ($op->is_nowhere) {
Akron2c6c7162017-05-15 18:15:33 +0200409
410 # A & B & [0] -> [0]
411 if ($self->operation eq 'and') {
412
413 print_log('kq_bool', 'Group can be simplified to [0]') if DEBUG;
414
415 # Matches nowhere!
416 @$ops = ();
Akron5a5595b2017-09-10 13:00:57 +0200417 $self->is_nowhere(1);
Akron2c6c7162017-05-15 18:15:33 +0200418 last;
419 }
420
421 # A | B | [0] -> A | B
422 elsif ($self->operation eq 'or') {
Akrona84ef2d2017-08-07 14:45:46 +0200423 $self->remove_info_from($ops->[$i]);
Akron2c6c7162017-05-15 18:15:33 +0200424 splice @$ops, $i, 1;
425 }
426 }
427
428 # If everything can be matched
Akron655a10a2017-09-11 14:13:18 +0200429 elsif ($op->is_anywhere) {
Akron2c6c7162017-05-15 18:15:33 +0200430
431 # A & B & [1] -> A & B
432 if ($self->operation eq 'and') {
Akrona84ef2d2017-08-07 14:45:46 +0200433 $self->remove_info_from($ops->[$i]);
Akron2c6c7162017-05-15 18:15:33 +0200434 splice @$ops, $i, 1;
435 }
436
437 # A | B | [1] -> [1]
438 elsif ($self->operation eq 'or') {
439
440 print_log('kq_bool', 'Group can be simplified to [1]') if DEBUG;
441
442 # Matches everywhere
443 @$ops = ();
Akron655a10a2017-09-11 14:13:18 +0200444 $self->is_anywhere(1);
Akron2c6c7162017-05-15 18:15:33 +0200445 last;
446 }
447 }
448
449 # Is a nested group
450 elsif ($op->type eq $self->type) {
451
452 # Get nested operands
Akron2ea61aa2017-06-03 16:30:23 +0200453 # TODO:
454 # Rename to $ops
Akron2c6c7162017-05-15 18:15:33 +0200455 my $operands = $op->operands;
456 my $nr = @$operands;
457
458 # Simple ungroup
459 if (!$op->is_negative && $op->operation eq $self->operation) {
Akrond3355ba2017-05-17 21:16:35 +0200460
461 print_log('kq_bool', 'Group can be embedded') if DEBUG;
462
Akron2c6c7162017-05-15 18:15:33 +0200463 splice @$ops, $i, 1, @$operands;
464 $i+= $nr;
465 }
466
467 # Resolve grouped deMorgan-negativity
468 elsif ($op->is_negative && $op->operation ne $self->operation) {
Akrond3355ba2017-05-17 21:16:35 +0200469
470 print_log('kq_bool', 'Group can be resolved with demorgan') if DEBUG;
471
Akron2c6c7162017-05-15 18:15:33 +0200472 splice @$ops, $i, 1, map { $_->toggle_negative; $_ } @$operands;
473 $i+= $nr;
474 };
475 };
476
477 $i--;
478 };
479
480 $self->operands($ops);
481
Akrond3355ba2017-05-17 21:16:35 +0200482 print_log('kq_bool', 'Group is now ' . $self->to_string) if DEBUG;
483
Akron2c6c7162017-05-15 18:15:33 +0200484 $self;
485};
486
487
Akrond3355ba2017-05-17 21:16:35 +0200488# Resolve DeMorgan
Akron2c6c7162017-05-15 18:15:33 +0200489# !a & !b = !(a | b)
490# !a | !b = !(a & b)
Akron7a97d3f2017-06-07 18:28:50 +0200491# Afterwards the group will only contain a single negative element at the end
Akrond3355ba2017-05-17 21:16:35 +0200492sub _resolve_demorgan {
Akron2c6c7162017-05-15 18:15:33 +0200493 my $self = shift;
494
Akron7a97d3f2017-06-07 18:28:50 +0200495 print_log('kq_bool', 'Resolve DeMorgan in ' . $self->to_string) if DEBUG;
Akrond3355ba2017-05-17 21:16:35 +0200496
Akron655a10a2017-09-11 14:13:18 +0200497 return $self if $self->is_nowhere || $self->is_anywhere;
Akron2ea61aa2017-06-03 16:30:23 +0200498
Akron2c6c7162017-05-15 18:15:33 +0200499 # Split negative and operands
Akrond3355ba2017-05-17 21:16:35 +0200500 my (@neg, @pos) = ();
501 my $ops = $self->operands;
Akron2c6c7162017-05-15 18:15:33 +0200502
503 # Iterate over operands
Akrond3355ba2017-05-17 21:16:35 +0200504 for (my $i = 0; $i < @$ops; $i++) {
505
506 # Check if negative
507 if ($ops->[$i]->is_negative) {
508 push @neg, $i;
Akron2c6c7162017-05-15 18:15:33 +0200509 }
510 else {
Akrond3355ba2017-05-17 21:16:35 +0200511 push @pos, $i;
Akron2c6c7162017-05-15 18:15:33 +0200512 };
513 };
514
515 # Found no negative operands
516 return $self unless @neg;
517
Akrond3355ba2017-05-17 21:16:35 +0200518 if (DEBUG) {
519 print_log(
520 'kq_bool',
521 'Index lists created for ' . $self->to_string . ':',
522 ' Neg: ' . join(', ', @neg),
523 ' Pos: ' . join(', ', @pos),
524 );
525 };
526
527 # Everything is negative
528 unless (@pos) {
529
530 print_log('kq_bool', 'The whole group is negative') if DEBUG;
531
532 foreach (@neg) {
533 $ops->[$_]->toggle_negative;
534 };
535 $self->toggle_operation;
Akron49c32f42017-05-18 14:34:27 +0200536 $self->toggle_negative;
Akrond3355ba2017-05-17 21:16:35 +0200537 return $self;
538 };
539
Akron2ea61aa2017-06-03 16:30:23 +0200540 # There are no negative operands
541 return $self unless @neg;
Akrond3355ba2017-05-17 21:16:35 +0200542
Akron2ea61aa2017-06-03 16:30:23 +0200543 # There are negative operands
Akrond3355ba2017-05-17 21:16:35 +0200544
Akron2ea61aa2017-06-03 16:30:23 +0200545 # Group all negative operands
546 # and apply demorgan
547 my @new_group = ();
Akrond3355ba2017-05-17 21:16:35 +0200548
Akron2ea61aa2017-06-03 16:30:23 +0200549 # Get all negative items and create a new group
550 foreach (uniq reverse sort @neg) {
Akrond3355ba2017-05-17 21:16:35 +0200551
Akron7a97d3f2017-06-07 18:28:50 +0200552 if (DEBUG) {
553 print_log('kq_bool', 'Add operand to group: ' . $ops->[$_]->to_string);
554 };
555
Akron2ea61aa2017-06-03 16:30:23 +0200556 # Remove from old group
557 my $neg_op = splice(@$ops, $_, 1);
558
559 # Reset negativity
560 $neg_op->is_negative(0);
561
562 # Put in new group
563 push(@new_group, $neg_op);
564 };
565
566 # Only a single negative operand
567 if (scalar(@new_group) == 1) {
568
Akron7a97d3f2017-06-07 18:28:50 +0200569 print_log('kq_bool', 'Single negative operand') if DEBUG;
570
Akron2ea61aa2017-06-03 16:30:23 +0200571 # Reintroduce negativity
572 $new_group[0]->is_negative(1);
573
574 # Add negative operand at the end
575 push @$ops, $new_group[0];
576 }
577
578 # Create a group with negative operands
579 else {
Akrond3355ba2017-05-17 21:16:35 +0200580
Akron7a97d3f2017-06-07 18:28:50 +0200581 print_log('kq_bool', 'Create group with negation') if DEBUG;
582
Akrond3355ba2017-05-17 21:16:35 +0200583 my $new_group;
584
585 # Get reverted DeMorgan group
586 if ($self->operation eq 'and') {
Akron7a97d3f2017-06-07 18:28:50 +0200587
Akronb945c572017-07-23 14:55:00 +0200588 $new_group = $self->builder->bool_or(@new_group);
Akron2ea61aa2017-06-03 16:30:23 +0200589 # Create an andNot group in the next step
Akrond3355ba2017-05-17 21:16:35 +0200590 }
Akron7a97d3f2017-06-07 18:28:50 +0200591
592 # For 'or' operation
Akrond3355ba2017-05-17 21:16:35 +0200593 else {
Akronb945c572017-07-23 14:55:00 +0200594 $new_group = $self->builder->bool_and(@new_group);
Akrond3355ba2017-05-17 21:16:35 +0200595 };
596
597 # Set group to negative
598 $new_group->is_negative(1);
599
Akron8231ca72017-06-16 16:08:32 +0200600
601 # Be aware this could lead to heavy and unnecessary recursion
Akrona84ef2d2017-08-07 14:45:46 +0200602
603 my $norm = $new_group->normalize;
604 $self->remove_info_from($norm);
605 push @$ops, $norm;
Akrond3355ba2017-05-17 21:16:35 +0200606 };
607
Akron7a97d3f2017-06-07 18:28:50 +0200608 # $self->operands($ops);
609
610 print_log('kq_bool', 'Group is now ' . $self->to_string) if DEBUG;
611
Akrond3355ba2017-05-17 21:16:35 +0200612 return $self;
Akron2c6c7162017-05-15 18:15:33 +0200613};
614
615
Akron2ea61aa2017-06-03 16:30:23 +0200616# To make queries with negation more efficient,
617# replace (a & !b) with andNot(a,b)
618# and (a | !b) with (a | andNot([1],b))
Akron1811acc2017-06-07 02:13:16 +0200619sub _replace_negative {
Akron2ea61aa2017-06-03 16:30:23 +0200620 my $self = shift;
621
Akron6b195632017-06-09 23:47:49 +0200622 print_log('kq_bool', 'Replace Negations in ' . $self->to_string) if DEBUG;
Akron2ea61aa2017-06-03 16:30:23 +0200623
Akron5a5595b2017-09-10 13:00:57 +0200624 # Check for negativity in groups to toggle all or nowhere
Akron2ea61aa2017-06-03 16:30:23 +0200625 if ($self->is_negative) {
626
627 # ![1] -> [0]
Akron655a10a2017-09-11 14:13:18 +0200628 if ($self->is_anywhere) {
629 $self->is_anywhere(0);
Akron5a5595b2017-09-10 13:00:57 +0200630 $self->is_nowhere(1);
Akron2ea61aa2017-06-03 16:30:23 +0200631 $self->is_negative(0);
632 }
633
634 # ![0] -> [1]
Akron5a5595b2017-09-10 13:00:57 +0200635 elsif ($self->is_nowhere) {
Akron655a10a2017-09-11 14:13:18 +0200636 $self->is_anywhere(1);
Akron5a5595b2017-09-10 13:00:57 +0200637 $self->is_nowhere(0);
Akron2ea61aa2017-06-03 16:30:23 +0200638 $self->is_negative(0);
639 };
640 };
641
Akron655a10a2017-09-11 14:13:18 +0200642 # Return if anywhere or nowhere
643 return $self if $self->is_anywhere || $self->is_nowhere;
Akron2ea61aa2017-06-03 16:30:23 +0200644
645 my $ops = $self->operands;
646
647 # There is only a single operand
648 if (@$ops == 1) {
649
650 # Only operand is negative
Akron655a10a2017-09-11 14:13:18 +0200651 # return !a -> andNot(anywhere,a)
Akron2ea61aa2017-06-03 16:30:23 +0200652 if ($self->is_negative) {
653 $self->is_negative(0);
Akronb945c572017-07-23 14:55:00 +0200654 return $self->builder->bool_and_not(
Akron655a10a2017-09-11 14:13:18 +0200655 $self->builder->anywhere,
Akron2ea61aa2017-06-03 16:30:23 +0200656 $self
Akron8231ca72017-06-16 16:08:32 +0200657 )->normalize;
Akron2ea61aa2017-06-03 16:30:23 +0200658 };
659
660 # Only operand does not need a group
661 return $ops->[0];
662 };
663
664 print_log('kq_bool', 'Check final operand on negativity') if DEBUG;
665
666 # There is only one single negative operand possible!
667 # And it's at the end!
668
669 # All operands are positive
670 return $self unless $ops->[-1]->is_negative;
671
672 # Group all positive operands
673 print_log('kq_bool', 'Create group with negation') if DEBUG;
674
Akron2ea61aa2017-06-03 16:30:23 +0200675
676 # Remove the negative operand
677 my $neg = pop @$ops;
678
679 # Switch negativity
680 $neg->is_negative(0);
681
Akronb945c572017-07-23 14:55:00 +0200682 if (DEBUG) {
683 print_log('kq_bool', 'Negative operand is removed and reversed: ' . $neg->to_string);
684 };
Akron7a97d3f2017-06-07 18:28:50 +0200685
Akron6b195632017-06-09 23:47:49 +0200686 # Deal with operations differently
Akron1811acc2017-06-07 02:13:16 +0200687 if ($self->operation eq 'and') {
688
Akron6b195632017-06-09 23:47:49 +0200689 print_log('kq_bool', 'Operation is "and"') if DEBUG;
690
Akron1811acc2017-06-07 02:13:16 +0200691 # There is exactly one positive operand
692 if (@$ops == 1) {
Akron49dd24f2017-06-20 17:02:15 +0200693
694 print_log('kq_bool', 'Operation on a single operand') if DEBUG;
Akronb945c572017-07-23 14:55:00 +0200695 my $and_not = $self->builder->bool_and_not($ops->[0], $neg)->normalize;
Akron49dd24f2017-06-20 17:02:15 +0200696
697 print_log('kq_bool', 'Created ' . $and_not->to_string) if DEBUG;
698 return $and_not;
Akron1811acc2017-06-07 02:13:16 +0200699 };
700
Akron6b195632017-06-09 23:47:49 +0200701 print_log('kq_bool', 'Operation on multiple operands') if DEBUG;
702
Akron1811acc2017-06-07 02:13:16 +0200703 # There are multiple positive operands - create a new group
Akronb945c572017-07-23 14:55:00 +0200704 return $self->builder->bool_and_not($self, $neg)->normalize;
Akron1811acc2017-06-07 02:13:16 +0200705 }
706
707 elsif ($self->operation eq 'or') {
708
Akron6b195632017-06-09 23:47:49 +0200709 print_log('kq_bool', 'Operation is "or"') if DEBUG;
710
Akronb945c572017-07-23 14:55:00 +0200711 push @$ops, $self->builder->bool_and_not(
Akron655a10a2017-09-11 14:13:18 +0200712 $self->builder->anywhere,
Akron1811acc2017-06-07 02:13:16 +0200713 $neg
Akron8231ca72017-06-16 16:08:32 +0200714 )->normalize;
Akron1811acc2017-06-07 02:13:16 +0200715 return $self;
Akron2ea61aa2017-06-03 16:30:23 +0200716 };
717
Akron5b6264f2017-07-19 01:14:01 +0200718 warn 'Unknown operation';
Akron2ea61aa2017-06-03 16:30:23 +0200719};
720
Akron5b6264f2017-07-19 01:14:01 +0200721
Akronb945c572017-07-23 14:55:00 +0200722# Optimize boolean queries based on their frequencies
Akron88b855a2017-08-30 15:21:37 +0200723# and there complexity
Akronb945c572017-07-23 14:55:00 +0200724sub optimize {
Akron48fabe52017-08-07 16:48:12 +0200725 my ($self, $segment) = @_;
Akronb945c572017-07-23 14:55:00 +0200726
727 # Get operands
728 my $ops = $self->operands;
729
730 # Check the frequency of all operands
731 my (@freq, $query);
732
733 # Filter out all terms that do not occur
734 for (my $i = 0; $i < @$ops; $i++) {
735
736 # Get query operation for next operand
Akron48fabe52017-08-07 16:48:12 +0200737 my $next = $ops->[$i]->optimize($segment);
Akronb945c572017-07-23 14:55:00 +0200738
739 # Get maximum frequency
740 my $freq = $next->max_freq;
741
742 # Push to frequency list
743 push @freq, [$next, $freq];
744 };
745
746 # Sort operands based on ascending frequency
Akron88b855a2017-08-30 15:21:37 +0200747 # This is - however - rather irrelevant for or-queries,
748 # but it may help caching by introducing deterministic ordering
Akronb945c572017-07-23 14:55:00 +0200749 @freq = sort {
750 ($a->[1] < $b->[1]) ? -1 :
751 (($a->[1] > $b->[1]) ? 1 :
752 ($a->[0]->to_string cmp $b->[0]->to_string))
Akron88b855a2017-08-30 15:21:37 +0200753 } @freq;
Akronb945c572017-07-23 14:55:00 +0200754
755 # Operation is 'or'
756 if ($self->operation eq 'or') {
757 print_log('kq_bool', 'Prepare or-group') if DEBUG;
758
Akron88b855a2017-08-30 15:21:37 +0200759 # Move simple queries to the front, so when filtering, leaf groups
760 # will be filtered in groups
761 @freq = sort {
762 $a->[0]->complex == $b->[0]->complex ? 0 :
763 (
764 $a->[0]->complex && !$b->[0]->complex ? 1 : -1
765 )
766 } @freq;
767
Akronb945c572017-07-23 14:55:00 +0200768 # Ignore non-existing terms
769 while (@freq && $freq[0]->[1] == 0) {
770 shift @freq;
771 };
772
773 # No valid operands exist
774 if (@freq == 0) {
Akron5a5595b2017-09-10 13:00:57 +0200775 return Krawfish::Query::Nowhere->new;
Akronb945c572017-07-23 14:55:00 +0200776 };
777
778 # Get the first operand
779 $query = shift(@freq)->[0];
780
781 # For all further queries, create a query tree
782 while (@freq) {
783 my $next = shift(@freq)->[0];
784
785 # TODO: Distinguish here between classes and non-classes!
786 $query = $self->bool_or_query(
787 $query,
788 $next
789 );
790 };
791 }
792
793 # Operation is 'and'
794 elsif ($self->operation eq 'and') {
Akron88b855a2017-08-30 15:21:37 +0200795
Akronb945c572017-07-23 14:55:00 +0200796 print_log('kq_bool', 'Prepare and-group') if DEBUG;
797
Akron88b855a2017-08-30 15:21:37 +0200798
Akronb945c572017-07-23 14:55:00 +0200799 # If the least frequent operand does not exist,
800 # the whole group can't exist
801 if ($freq[0]->[1] == 0) {
802
803 # One operand is not existing
Akron5a5595b2017-09-10 13:00:57 +0200804 return Krawfish::Query::Nowhere->new;
Akronb945c572017-07-23 14:55:00 +0200805 };
806
807 # Get the first operand
808 $query = shift(@freq)->[0];
809
810 # Make the least frequent terms come first in constraint
811 while (@freq) {
812 my $next = shift(@freq)->[0];
813
814 # Create constraint with the least frequent as second (buffered) operand
815 $query = $self->bool_and_query($next, $query);
816 };
817 }
818
819 # Operation is unknown!
820 else {
821 warn 'Should never happen!';
822 };
823
Akron5a5595b2017-09-10 13:00:57 +0200824 # Return nowhere if nowhere matches!
Akronb945c572017-07-23 14:55:00 +0200825 if ($query->max_freq == 0) {
Akron5a5595b2017-09-10 13:00:57 +0200826 return Krawfish::Query::Nowhere->new;
Akronb945c572017-07-23 14:55:00 +0200827 };
828
829 return $query;
830};
831
832
Akron5b6264f2017-07-19 01:14:01 +0200833# Toggle the operation
834sub toggle_operation {
835 my $self = shift;
836 if ($self->operation eq 'or') {
837 $self->operation('and');
838 }
839 elsif ($self->operation eq 'and') {
840 $self->operation('or');
841 };
842};
843
844
Akronb945c572017-07-23 14:55:00 +0200845# Create operands in order
846sub operands_in_order {
847 my $self = shift;
848 my $ops = $self->{operands};
849 return [ sort { ($a && $b) ? ($a->to_string cmp $b->to_string) : 1 } @$ops ];
850};
851
852
Akron2c6c7162017-05-15 18:15:33 +02008531;
854
Akron5b6264f2017-07-19 01:14:01 +0200855
Akron2c6c7162017-05-15 18:15:33 +0200856__END__