| Akron | 2d30405 | 2017-07-19 17:39:10 +0200 | [diff] [blame] | 1 | package Krawfish::Koral::Util::Boolean; |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 2 | use Krawfish::Log; |
| 3 | use List::MoreUtils qw!uniq!; |
| 4 | use strict; |
| 5 | use warnings; |
| 6 | |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 7 | # Base class for boolean group queries. |
| Akron | 39e2548 | 2017-10-24 12:01:37 +0200 | [diff] [blame] | 8 | |
| 9 | # Used by |
| 10 | # - Koral::Corpus::FieldGroup, |
| 11 | # - Koral::Query::TermGroup |
| 12 | # - Koral::Query::Or |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 13 | |
| Akron | 2858e9a | 2017-08-21 16:47:49 +0200 | [diff] [blame] | 14 | use constant DEBUG => 0; |
| Akron | 49c32f4 | 2017-05-18 14:34:27 +0200 | [diff] [blame] | 15 | |
| Akron | 61e8bce | 2017-05-24 15:55:27 +0200 | [diff] [blame] | 16 | # TODO: |
| Akron | 448defc | 2017-08-30 14:59:29 +0200 | [diff] [blame] | 17 | # Introduce a ->complex attribute to all queries, |
| 18 | # to guarantee that simple operands are grouped together |
| 19 | # to make filtering more efficient! |
| 20 | |
| 21 | # TODO: |
| Akron | 61e8bce | 2017-05-24 15:55:27 +0200 | [diff] [blame] | 22 | # To simplify this, it may be useful to use Negation instead of is_negative(). |
| 23 | # This means, fields with "ne" won't be "ne"-fields, but become not(term). |
| 24 | # It's also easier to detect double negation. |
| 25 | |
| Akron | 49c32f4 | 2017-05-18 14:34:27 +0200 | [diff] [blame] | 26 | # TODO: |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 27 | # Let normalize return a cloned query instead of in-place creation |
| 28 | |
| 29 | # TODO: |
| Akron | 49c32f4 | 2017-05-18 14:34:27 +0200 | [diff] [blame] | 30 | # - Deal with classes: |
| 31 | # (A | !A) -> 1, aber ({1:A} | {2:!A}) -> ({1:A} | {2:!A}) |
| 32 | # (A & !A) -> 0, und ({1:A} & {2:!A}) -> 0 |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 33 | |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 34 | # TODO: |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 35 | # Check https://de.wikipedia.org/wiki/Boolesche_Algebra |
| 36 | # for optimizations |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 37 | # or(and(a,b),and(a,c)) -> and(a,or(b,c)) |
| 38 | # and(or(a,b),or(a,c)) -> or(a,and(b,c)) |
| 39 | # not(not(a)) -> a |
| Akron | 2ea61aa | 2017-06-03 16:30:23 +0200 | [diff] [blame] | 40 | # and(a,or(a,b)) -> a !! (Relevant for VCs!) |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 41 | # or(a,and(a,b)) -> a |
| 42 | |
| 43 | # DeMorgan: |
| 44 | # or(not(a),not(b)) -> not(and(a,b)) |
| 45 | # and(not(a),not(b)) -> not(or(a,b)) |
| 46 | |
| 47 | # TODO: |
| 48 | # from managing gigabytes bool_optimiser.c |
| 49 | #/* ========================================================================= |
| 50 | # * Function: OptimiseBoolTree |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 51 | # * Description: |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 52 | # * For case 2: |
| 53 | # * Do three major steps: |
| 54 | # * (i) put into standard form |
| 55 | # * -> put into DNF (disjunctive normal form - or of ands) |
| 56 | # * Use DeMorgan's, Double-negative, Distributive rules |
| 57 | # * (ii) simplify |
| 58 | # * apply idempotency rules |
| 59 | # * (iii) ameliorate |
| 60 | # * convert &! to diff nodes, order terms by frequency,... |
| 61 | # * Could also do the matching idempotency laws i.e. ... |
| 62 | # * (A | A), (A | !A), (A & !A), (A & A), (A & (A | B)), (A | (A & B)) |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 63 | # * Job for future.... ;-) |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 64 | # * ========================================================================= */ |
| 65 | #/* ========================================================================= |
| 66 | # * Function: DoubleNeg |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 67 | # * Description: |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 68 | # * !(!(a) = a |
| 69 | # * Assumes binary tree. |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 70 | # * Input: |
| 71 | # * Output: |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 72 | # * ========================================================================= */ |
| 73 | #/* ========================================================================= |
| 74 | # * Function: AndDeMorgan |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 75 | # * Description: |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 76 | # * DeMorgan's rule for 'not' of an 'and' i.e. !(a & b) <=> (!a) | (!b) |
| 77 | # * Assumes Binary Tree |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 78 | # * Input: |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 79 | # * not of and tree |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 80 | # * Output: |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 81 | # * or of not trees |
| 82 | # * ========================================================================= */ |
| 83 | #/* ========================================================================= |
| 84 | # * Function: OrDeMorgan |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 85 | # * Description: |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 86 | # * DeMorgan's rule for 'not' of an 'or' i.e. !(a | b) <=> (!a) & (!b) |
| 87 | # * Assumes Binary Tree |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 88 | # * Input: |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 89 | # * not of and tree |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 90 | # * Output: |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 91 | # * or of not trees |
| 92 | # * ========================================================================= */ |
| 93 | #/* ========================================================================= |
| 94 | # * Function: PermeateNots |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 95 | # * Description: |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 96 | # * Use DeMorgan's and Double-negative |
| 97 | # * Assumes tree in binary form (i.e. No ands/ors collapsed) |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 98 | # * Input: |
| 99 | # * Output: |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 100 | # * ========================================================================= */ |
| 101 | #/* ========================================================================= |
| 102 | # * Function: AndDistribute |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 103 | # * Description: |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 104 | # * (a | b) & A <=> (a & A) | (b & A) |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 105 | # * Input: |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 106 | # * binary tree of "AND" , "OR"s. |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 107 | # * Output: |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 108 | # * return 1 if changed the tree |
| 109 | # * return 0 if there was NO change (no distributive rule to apply) |
| 110 | # * ========================================================================= */ |
| 111 | #/* ========================================================================= |
| 112 | #/* ========================================================================= |
| 113 | # * Function: AndSort |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 114 | # * Description: |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 115 | # * Sort the list of nodes by increasing doc_count |
| 116 | # * Using some Neil Sharman code - pretty straight forward. |
| 117 | # * Note: not-terms are sent to the end of the list |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 118 | # * Input: |
| 119 | # * Output: |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 120 | # * ========================================================================= */ |
| 121 | |
| 122 | # From managing gigabytes bool_optimiser.c |
| 123 | # - function: TF_Idempotent -> DONE |
| 124 | |
| 125 | |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 126 | # Normalize boolean query |
| Akron | 2ea61aa | 2017-06-03 16:30:23 +0200 | [diff] [blame] | 127 | sub normalize { |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 128 | my $self = shift; |
| Akron | 61e8bce | 2017-05-24 15:55:27 +0200 | [diff] [blame] | 129 | |
| Akron | 93e925e | 2017-06-10 01:21:26 +0200 | [diff] [blame] | 130 | # TODO: |
| 131 | # probably reiterate on all operands in that order. |
| 132 | # foreach (qw/_clean_and_flatten/) { ... } |
| 133 | |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 134 | $self = $self->_clean_and_flatten; |
| 135 | |
| Akron | 2ea61aa | 2017-06-03 16:30:23 +0200 | [diff] [blame] | 136 | # Recursive normalize |
| Akron | 49dd24f | 2017-06-20 17:02:15 +0200 | [diff] [blame] | 137 | my @ops = (); |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 138 | foreach my $op (@{$self->operands}) { |
| Akron | 2ea61aa | 2017-06-03 16:30:23 +0200 | [diff] [blame] | 139 | |
| 140 | # Operand is group! |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 141 | if ($op) { # && $op->type eq $self->type) { |
| Akron | 49dd24f | 2017-06-20 17:02:15 +0200 | [diff] [blame] | 142 | push @ops, $op->normalize |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 143 | } |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 144 | }; |
| Akron | 2ea61aa | 2017-06-03 16:30:23 +0200 | [diff] [blame] | 145 | |
| Akron | 49dd24f | 2017-06-20 17:02:15 +0200 | [diff] [blame] | 146 | $self->operands(\@ops); |
| 147 | |
| Akron | 2ea61aa | 2017-06-03 16:30:23 +0200 | [diff] [blame] | 148 | # Apply normalization |
| 149 | # The return value may not be a group, |
| 150 | # but an andNot or a leaf after the final step |
| Akron | 8231ca7 | 2017-06-16 16:08:32 +0200 | [diff] [blame] | 151 | # |
| 152 | # The order is important! |
| Akron | 93e925e | 2017-06-10 01:21:26 +0200 | [diff] [blame] | 153 | return $self |
| 154 | ->_clean_and_flatten |
| 155 | ->_resolve_idempotence |
| Akron | 2ea61aa | 2017-06-03 16:30:23 +0200 | [diff] [blame] | 156 | ->_resolve_demorgan |
| Akron | 8231ca7 | 2017-06-16 16:08:32 +0200 | [diff] [blame] | 157 | ->_remove_nested_idempotence |
| Akron | 2ea61aa | 2017-06-03 16:30:23 +0200 | [diff] [blame] | 158 | ->_replace_negative; |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 159 | }; |
| 160 | |
| 161 | |
| 162 | # Resolve idempotence |
| 163 | # a & a = a |
| 164 | # a | a = a |
| Akron | 49dd24f | 2017-06-20 17:02:15 +0200 | [diff] [blame] | 165 | # TODO: |
| 166 | # (a & b) | (a & b) = a & b |
| 167 | # (a | b) & (a | b) = a & b |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 168 | sub _resolve_idempotence { |
| 169 | my $self = shift; |
| 170 | |
| Akron | d3355ba | 2017-05-17 21:16:35 +0200 | [diff] [blame] | 171 | print_log('kq_bool', 'Resolve idempotence for ' . $self->to_string) if DEBUG; |
| 172 | |
| Akron | a84ef2d | 2017-08-07 14:45:46 +0200 | [diff] [blame] | 173 | # TODO: |
| 174 | # copy warning messages everywhere, when operations are changed! |
| 175 | |
| Akron | 655a10a | 2017-09-11 14:13:18 +0200 | [diff] [blame] | 176 | return $self if $self->is_nowhere || $self->is_anywhere; |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 177 | |
| 178 | # Get operands in order to identify identical subcorpora |
| 179 | my $ops = $self->operands_in_order; |
| 180 | |
| 181 | my ($i, @ops) = (0); |
| 182 | |
| 183 | # Get length |
| 184 | my $length = scalar @$ops; |
| 185 | |
| 186 | # Create new operand list |
| 187 | @ops = ($ops->[$i++]); |
| 188 | |
| 189 | # Iterate over all operands |
| 190 | while ($i < $length) { |
| 191 | |
| 192 | # (A | A) |
| 193 | # (A & A) |
| 194 | if ($ops->[$i-1]->to_string ne $ops->[$i]->to_string) { |
| 195 | push @ops, $ops->[$i] |
| 196 | } |
| 197 | |
| 198 | elsif (DEBUG) { |
| 199 | print_log('kq_bool', 'Subcorpora are idempotent'); |
| Akron | a84ef2d | 2017-08-07 14:45:46 +0200 | [diff] [blame] | 200 | $self->remove_info_from($ops->[$i]); |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 201 | }; |
| 202 | |
| 203 | $i++; |
| 204 | }; |
| 205 | |
| 206 | # Set operands |
| 207 | $self->operands(\@ops); |
| 208 | |
| 209 | $self; |
| 210 | }; |
| 211 | |
| 212 | |
| 213 | # Remove matching idempotence |
| 214 | # (A & (A | B)) -> A |
| 215 | # (A | (A & B)) -> A |
| Akron | 655a10a | 2017-09-11 14:13:18 +0200 | [diff] [blame] | 216 | # (A | !A) -> (anywhere) |
| Akron | 5a5595b | 2017-09-10 13:00:57 +0200 | [diff] [blame] | 217 | # (A & !A) -> (nowhere) |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 218 | # |
| 219 | # TODO: |
| 220 | # (A | B) & !(A | B) |
| 221 | sub _remove_nested_idempotence { |
| 222 | my $self = shift; |
| 223 | |
| Akron | d3355ba | 2017-05-17 21:16:35 +0200 | [diff] [blame] | 224 | print_log('kq_bool', 'Remove nested idempotence for ' . $self->to_string) if DEBUG; |
| 225 | |
| Akron | 655a10a | 2017-09-11 14:13:18 +0200 | [diff] [blame] | 226 | return $self if $self->is_nowhere || $self->is_anywhere; |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 227 | |
| 228 | my $ops = $self->operands; |
| 229 | |
| Akron | 8231ca7 | 2017-06-16 16:08:32 +0200 | [diff] [blame] | 230 | my (@plains, @neg_group, @pos_group, @pos, @neg); |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 231 | |
| 232 | # TODO: |
| 233 | # Deal with classes |
| 234 | for (my $i = 0; $i < scalar(@$ops); $i++) { |
| 235 | |
| 236 | # Operand is group |
| 237 | if ($ops->[$i]->type eq $self->type && |
| 238 | |
| 239 | # Operations are reversed |
| 240 | $ops->[$i]->operation ne $self->operation) { |
| 241 | |
| Akron | 8231ca7 | 2017-06-16 16:08:32 +0200 | [diff] [blame] | 242 | if ($ops->[$i]->is_negative) { |
| 243 | push @neg_group, $i; |
| 244 | } |
| 245 | |
| 246 | else { |
| 247 | push @pos_group, $i; |
| 248 | }; |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 249 | } |
| 250 | |
| 251 | # Operand is leaf |
| 252 | elsif ($ops->[$i]->is_leaf) { |
| 253 | push @plains, $i; |
| 254 | |
| 255 | # Item is negative |
| 256 | if ($ops->[$i]->is_negative) { |
| 257 | push @neg, $i; |
| 258 | } |
| 259 | |
| 260 | # Item is positive |
| 261 | else { |
| 262 | push @pos, $i; |
| 263 | } |
| 264 | }; |
| 265 | }; |
| 266 | |
| Akron | d3355ba | 2017-05-17 21:16:35 +0200 | [diff] [blame] | 267 | if (DEBUG) { |
| 268 | print_log( |
| 269 | 'kq_bool', |
| 270 | 'Index lists created for ' . $self->to_string . ':', |
| Akron | 8231ca7 | 2017-06-16 16:08:32 +0200 | [diff] [blame] | 271 | ' Pos-Group: ' . join(', ', @pos_group), |
| 272 | ' Neg-Group: ' . join(', ', @neg_group), |
| Akron | d3355ba | 2017-05-17 21:16:35 +0200 | [diff] [blame] | 273 | ' Plains: ' . join(', ', @plains), |
| 274 | ' Neg: ' . join(', ', @neg), |
| 275 | ' Pos: ' . join(', ', @pos), |
| 276 | ); |
| 277 | }; |
| 278 | |
| Akron | 655a10a | 2017-09-11 14:13:18 +0200 | [diff] [blame] | 279 | # Check for anywhere or nowhere |
| 280 | # (A | !A) -> (anywhere) |
| Akron | 5a5595b | 2017-09-10 13:00:57 +0200 | [diff] [blame] | 281 | # (A & !A) -> (nowhere) |
| Akron | 8231ca7 | 2017-06-16 16:08:32 +0200 | [diff] [blame] | 282 | foreach my $neg_i (@neg, @neg_group) { |
| 283 | foreach my $pos_i (@pos, @pos_group) { |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 284 | |
| Akron | ce10cb4 | 2017-06-14 01:12:40 +0200 | [diff] [blame] | 285 | if (DEBUG) { |
| 286 | print_log( |
| 287 | 'kq_tool', |
| Akron | 8231ca7 | 2017-06-16 16:08:32 +0200 | [diff] [blame] | 288 | 'Compare ' . $ops->[$neg_i]->to_neutral . ' and ' . |
| 289 | $ops->[$pos_i]->to_neutral |
| Akron | ce10cb4 | 2017-06-14 01:12:40 +0200 | [diff] [blame] | 290 | ); |
| 291 | }; |
| 292 | |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 293 | # Compare terms |
| Akron | 8231ca7 | 2017-06-16 16:08:32 +0200 | [diff] [blame] | 294 | if ($ops->[$neg_i]->to_neutral eq $ops->[$pos_i]->to_neutral) { |
| Akron | d3355ba | 2017-05-17 21:16:35 +0200 | [diff] [blame] | 295 | |
| 296 | if (DEBUG) { |
| 297 | print_log( |
| 298 | 'kq_bool', |
| 299 | 'Found idempotent group for ' . |
| 300 | $self->operation . |
| 301 | '(' . $ops->[$neg_i]->to_string . ',' . $ops->[$pos_i]->to_string . ')' |
| 302 | ); |
| 303 | }; |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 304 | |
| 305 | if ($self->operation eq 'or') { |
| 306 | |
| 307 | # Matches everything |
| Akron | 655a10a | 2017-09-11 14:13:18 +0200 | [diff] [blame] | 308 | $self->is_anywhere(1); |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 309 | } |
| Akron | a84ef2d | 2017-08-07 14:45:46 +0200 | [diff] [blame] | 310 | |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 311 | elsif ($self->operation eq 'and') { |
| 312 | |
| Akron | 5a5595b | 2017-09-10 13:00:57 +0200 | [diff] [blame] | 313 | # Matches nowhere |
| Akron | 8231ca7 | 2017-06-16 16:08:32 +0200 | [diff] [blame] | 314 | |
| Akron | 5a5595b | 2017-09-10 13:00:57 +0200 | [diff] [blame] | 315 | $self->is_nowhere(1); |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 316 | }; |
| 317 | |
| 318 | # Remove all operands |
| 319 | $self->operands([]); |
| 320 | |
| 321 | # Stop further processing |
| 322 | return $self; |
| 323 | }; |
| 324 | }; |
| 325 | }; |
| 326 | |
| 327 | my @remove_groups = (); |
| 328 | |
| 329 | # Iterate over all groups and plains |
| Akron | d3355ba | 2017-05-17 21:16:35 +0200 | [diff] [blame] | 330 | # (A & (A | B)) -> A |
| 331 | # (A | (A & B)) -> A |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 332 | foreach my $plain_i (@plains) { |
| Akron | 8231ca7 | 2017-06-16 16:08:32 +0200 | [diff] [blame] | 333 | foreach my $group_i (@pos_group) { |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 334 | |
| 335 | # Get group operands |
| 336 | my $group_ops = $ops->[$group_i]->operands; |
| 337 | |
| 338 | # Get operand |
| 339 | foreach (@$group_ops) { |
| 340 | |
| 341 | # Nested operand is identical |
| 342 | if ($_->to_string eq $ops->[$plain_i]->to_string) { |
| 343 | |
| Akron | 8231ca7 | 2017-06-16 16:08:32 +0200 | [diff] [blame] | 344 | unless (0) { # $_->has_classes) { |
| Akron | d3355ba | 2017-05-17 21:16:35 +0200 | [diff] [blame] | 345 | if (DEBUG) { |
| 346 | print_log('kq_bool', 'Remove nested idempotence in ' . $self->to_string); |
| 347 | }; |
| 348 | |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 349 | push @remove_groups, $group_i; |
| 350 | } |
| 351 | else { |
| 352 | warn 'Behaviour for classes is undefined!'; |
| 353 | }; |
| 354 | }; |
| 355 | }; |
| 356 | }; |
| 357 | }; |
| 358 | |
| 359 | |
| 360 | # Get a list of all removable items in reverse order |
| 361 | # To remove irrelevant nested groups |
| 362 | foreach (uniq reverse sort @remove_groups) { |
| Akron | a84ef2d | 2017-08-07 14:45:46 +0200 | [diff] [blame] | 363 | $self->remove_info_from($ops->[$_]); |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 364 | splice @$ops, $_, 1; |
| 365 | }; |
| 366 | |
| 367 | return $self; |
| 368 | }; |
| 369 | |
| 370 | |
| 371 | # Remove empty |
| 372 | # a & () = a |
| 373 | # a | () = a |
| 374 | # |
| 375 | # Flatten groups |
| 376 | # ((a & b) & c) -> (a & b & c) |
| 377 | # ((a | b) | c) -> (a | b | c) |
| 378 | # |
| Akron | 655a10a | 2017-09-11 14:13:18 +0200 | [diff] [blame] | 379 | # Respect anywhere and nowhere |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 380 | # a & b & [1] -> a & b |
| 381 | # a & b & [0] -> [0] |
| 382 | # a | b | [1] -> [1] |
| 383 | # a | b | [0] -> a | b |
| 384 | sub _clean_and_flatten { |
| 385 | my $self = shift; |
| 386 | |
| Akron | 655a10a | 2017-09-11 14:13:18 +0200 | [diff] [blame] | 387 | return $self if $self->is_nowhere || $self->is_anywhere; |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 388 | |
| 389 | # Get operands |
| 390 | my $ops = $self->operands; |
| 391 | |
| Akron | 49dd24f | 2017-06-20 17:02:15 +0200 | [diff] [blame] | 392 | print_log('kq_bool', 'Flatten groups of ' . $self->to_string) if DEBUG; |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 393 | |
| 394 | # Flatten groups in reverse order |
| 395 | for (my $i = scalar(@$ops) - 1; $i >= 0;) { |
| 396 | |
| 397 | # Get operand under scrutiny |
| 398 | my $op = $ops->[$i]; |
| 399 | |
| 400 | # Remove empty elements |
| 401 | if (!defined($op) || $op->is_null) { |
| Akron | a84ef2d | 2017-08-07 14:45:46 +0200 | [diff] [blame] | 402 | $self->remove_info_from($ops->[$i]); |
| 403 | |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 404 | splice @$ops, $i, 1; |
| 405 | } |
| 406 | |
| Akron | 5a5595b | 2017-09-10 13:00:57 +0200 | [diff] [blame] | 407 | # If nowhere can be matched |
| 408 | elsif ($op->is_nowhere) { |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 409 | |
| 410 | # A & B & [0] -> [0] |
| 411 | if ($self->operation eq 'and') { |
| 412 | |
| 413 | print_log('kq_bool', 'Group can be simplified to [0]') if DEBUG; |
| 414 | |
| 415 | # Matches nowhere! |
| 416 | @$ops = (); |
| Akron | 5a5595b | 2017-09-10 13:00:57 +0200 | [diff] [blame] | 417 | $self->is_nowhere(1); |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 418 | last; |
| 419 | } |
| 420 | |
| 421 | # A | B | [0] -> A | B |
| 422 | elsif ($self->operation eq 'or') { |
| Akron | a84ef2d | 2017-08-07 14:45:46 +0200 | [diff] [blame] | 423 | $self->remove_info_from($ops->[$i]); |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 424 | splice @$ops, $i, 1; |
| 425 | } |
| 426 | } |
| 427 | |
| 428 | # If everything can be matched |
| Akron | 655a10a | 2017-09-11 14:13:18 +0200 | [diff] [blame] | 429 | elsif ($op->is_anywhere) { |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 430 | |
| 431 | # A & B & [1] -> A & B |
| 432 | if ($self->operation eq 'and') { |
| Akron | a84ef2d | 2017-08-07 14:45:46 +0200 | [diff] [blame] | 433 | $self->remove_info_from($ops->[$i]); |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 434 | splice @$ops, $i, 1; |
| 435 | } |
| 436 | |
| 437 | # A | B | [1] -> [1] |
| 438 | elsif ($self->operation eq 'or') { |
| 439 | |
| 440 | print_log('kq_bool', 'Group can be simplified to [1]') if DEBUG; |
| 441 | |
| 442 | # Matches everywhere |
| 443 | @$ops = (); |
| Akron | 655a10a | 2017-09-11 14:13:18 +0200 | [diff] [blame] | 444 | $self->is_anywhere(1); |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 445 | last; |
| 446 | } |
| 447 | } |
| 448 | |
| 449 | # Is a nested group |
| 450 | elsif ($op->type eq $self->type) { |
| 451 | |
| 452 | # Get nested operands |
| Akron | 2ea61aa | 2017-06-03 16:30:23 +0200 | [diff] [blame] | 453 | # TODO: |
| 454 | # Rename to $ops |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 455 | my $operands = $op->operands; |
| 456 | my $nr = @$operands; |
| 457 | |
| 458 | # Simple ungroup |
| 459 | if (!$op->is_negative && $op->operation eq $self->operation) { |
| Akron | d3355ba | 2017-05-17 21:16:35 +0200 | [diff] [blame] | 460 | |
| 461 | print_log('kq_bool', 'Group can be embedded') if DEBUG; |
| 462 | |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 463 | splice @$ops, $i, 1, @$operands; |
| 464 | $i+= $nr; |
| 465 | } |
| 466 | |
| 467 | # Resolve grouped deMorgan-negativity |
| 468 | elsif ($op->is_negative && $op->operation ne $self->operation) { |
| Akron | d3355ba | 2017-05-17 21:16:35 +0200 | [diff] [blame] | 469 | |
| 470 | print_log('kq_bool', 'Group can be resolved with demorgan') if DEBUG; |
| 471 | |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 472 | splice @$ops, $i, 1, map { $_->toggle_negative; $_ } @$operands; |
| 473 | $i+= $nr; |
| 474 | }; |
| 475 | }; |
| 476 | |
| 477 | $i--; |
| 478 | }; |
| 479 | |
| 480 | $self->operands($ops); |
| 481 | |
| Akron | d3355ba | 2017-05-17 21:16:35 +0200 | [diff] [blame] | 482 | print_log('kq_bool', 'Group is now ' . $self->to_string) if DEBUG; |
| 483 | |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 484 | $self; |
| 485 | }; |
| 486 | |
| 487 | |
| Akron | d3355ba | 2017-05-17 21:16:35 +0200 | [diff] [blame] | 488 | # Resolve DeMorgan |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 489 | # !a & !b = !(a | b) |
| 490 | # !a | !b = !(a & b) |
| Akron | 7a97d3f | 2017-06-07 18:28:50 +0200 | [diff] [blame] | 491 | # Afterwards the group will only contain a single negative element at the end |
| Akron | d3355ba | 2017-05-17 21:16:35 +0200 | [diff] [blame] | 492 | sub _resolve_demorgan { |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 493 | my $self = shift; |
| 494 | |
| Akron | 7a97d3f | 2017-06-07 18:28:50 +0200 | [diff] [blame] | 495 | print_log('kq_bool', 'Resolve DeMorgan in ' . $self->to_string) if DEBUG; |
| Akron | d3355ba | 2017-05-17 21:16:35 +0200 | [diff] [blame] | 496 | |
| Akron | 655a10a | 2017-09-11 14:13:18 +0200 | [diff] [blame] | 497 | return $self if $self->is_nowhere || $self->is_anywhere; |
| Akron | 2ea61aa | 2017-06-03 16:30:23 +0200 | [diff] [blame] | 498 | |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 499 | # Split negative and operands |
| Akron | d3355ba | 2017-05-17 21:16:35 +0200 | [diff] [blame] | 500 | my (@neg, @pos) = (); |
| 501 | my $ops = $self->operands; |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 502 | |
| 503 | # Iterate over operands |
| Akron | d3355ba | 2017-05-17 21:16:35 +0200 | [diff] [blame] | 504 | for (my $i = 0; $i < @$ops; $i++) { |
| 505 | |
| 506 | # Check if negative |
| 507 | if ($ops->[$i]->is_negative) { |
| 508 | push @neg, $i; |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 509 | } |
| 510 | else { |
| Akron | d3355ba | 2017-05-17 21:16:35 +0200 | [diff] [blame] | 511 | push @pos, $i; |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 512 | }; |
| 513 | }; |
| 514 | |
| 515 | # Found no negative operands |
| 516 | return $self unless @neg; |
| 517 | |
| Akron | d3355ba | 2017-05-17 21:16:35 +0200 | [diff] [blame] | 518 | if (DEBUG) { |
| 519 | print_log( |
| 520 | 'kq_bool', |
| 521 | 'Index lists created for ' . $self->to_string . ':', |
| 522 | ' Neg: ' . join(', ', @neg), |
| 523 | ' Pos: ' . join(', ', @pos), |
| 524 | ); |
| 525 | }; |
| 526 | |
| 527 | # Everything is negative |
| 528 | unless (@pos) { |
| 529 | |
| 530 | print_log('kq_bool', 'The whole group is negative') if DEBUG; |
| 531 | |
| 532 | foreach (@neg) { |
| 533 | $ops->[$_]->toggle_negative; |
| 534 | }; |
| 535 | $self->toggle_operation; |
| Akron | 49c32f4 | 2017-05-18 14:34:27 +0200 | [diff] [blame] | 536 | $self->toggle_negative; |
| Akron | d3355ba | 2017-05-17 21:16:35 +0200 | [diff] [blame] | 537 | return $self; |
| 538 | }; |
| 539 | |
| Akron | 2ea61aa | 2017-06-03 16:30:23 +0200 | [diff] [blame] | 540 | # There are no negative operands |
| 541 | return $self unless @neg; |
| Akron | d3355ba | 2017-05-17 21:16:35 +0200 | [diff] [blame] | 542 | |
| Akron | 2ea61aa | 2017-06-03 16:30:23 +0200 | [diff] [blame] | 543 | # There are negative operands |
| Akron | d3355ba | 2017-05-17 21:16:35 +0200 | [diff] [blame] | 544 | |
| Akron | 2ea61aa | 2017-06-03 16:30:23 +0200 | [diff] [blame] | 545 | # Group all negative operands |
| 546 | # and apply demorgan |
| 547 | my @new_group = (); |
| Akron | d3355ba | 2017-05-17 21:16:35 +0200 | [diff] [blame] | 548 | |
| Akron | 2ea61aa | 2017-06-03 16:30:23 +0200 | [diff] [blame] | 549 | # Get all negative items and create a new group |
| 550 | foreach (uniq reverse sort @neg) { |
| Akron | d3355ba | 2017-05-17 21:16:35 +0200 | [diff] [blame] | 551 | |
| Akron | 7a97d3f | 2017-06-07 18:28:50 +0200 | [diff] [blame] | 552 | if (DEBUG) { |
| 553 | print_log('kq_bool', 'Add operand to group: ' . $ops->[$_]->to_string); |
| 554 | }; |
| 555 | |
| Akron | 2ea61aa | 2017-06-03 16:30:23 +0200 | [diff] [blame] | 556 | # Remove from old group |
| 557 | my $neg_op = splice(@$ops, $_, 1); |
| 558 | |
| 559 | # Reset negativity |
| 560 | $neg_op->is_negative(0); |
| 561 | |
| 562 | # Put in new group |
| 563 | push(@new_group, $neg_op); |
| 564 | }; |
| 565 | |
| 566 | # Only a single negative operand |
| 567 | if (scalar(@new_group) == 1) { |
| 568 | |
| Akron | 7a97d3f | 2017-06-07 18:28:50 +0200 | [diff] [blame] | 569 | print_log('kq_bool', 'Single negative operand') if DEBUG; |
| 570 | |
| Akron | 2ea61aa | 2017-06-03 16:30:23 +0200 | [diff] [blame] | 571 | # Reintroduce negativity |
| 572 | $new_group[0]->is_negative(1); |
| 573 | |
| 574 | # Add negative operand at the end |
| 575 | push @$ops, $new_group[0]; |
| 576 | } |
| 577 | |
| 578 | # Create a group with negative operands |
| 579 | else { |
| Akron | d3355ba | 2017-05-17 21:16:35 +0200 | [diff] [blame] | 580 | |
| Akron | 7a97d3f | 2017-06-07 18:28:50 +0200 | [diff] [blame] | 581 | print_log('kq_bool', 'Create group with negation') if DEBUG; |
| 582 | |
| Akron | d3355ba | 2017-05-17 21:16:35 +0200 | [diff] [blame] | 583 | my $new_group; |
| 584 | |
| 585 | # Get reverted DeMorgan group |
| 586 | if ($self->operation eq 'and') { |
| Akron | 7a97d3f | 2017-06-07 18:28:50 +0200 | [diff] [blame] | 587 | |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 588 | $new_group = $self->builder->bool_or(@new_group); |
| Akron | 2ea61aa | 2017-06-03 16:30:23 +0200 | [diff] [blame] | 589 | # Create an andNot group in the next step |
| Akron | d3355ba | 2017-05-17 21:16:35 +0200 | [diff] [blame] | 590 | } |
| Akron | 7a97d3f | 2017-06-07 18:28:50 +0200 | [diff] [blame] | 591 | |
| 592 | # For 'or' operation |
| Akron | d3355ba | 2017-05-17 21:16:35 +0200 | [diff] [blame] | 593 | else { |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 594 | $new_group = $self->builder->bool_and(@new_group); |
| Akron | d3355ba | 2017-05-17 21:16:35 +0200 | [diff] [blame] | 595 | }; |
| 596 | |
| 597 | # Set group to negative |
| 598 | $new_group->is_negative(1); |
| 599 | |
| Akron | 8231ca7 | 2017-06-16 16:08:32 +0200 | [diff] [blame] | 600 | |
| 601 | # Be aware this could lead to heavy and unnecessary recursion |
| Akron | a84ef2d | 2017-08-07 14:45:46 +0200 | [diff] [blame] | 602 | |
| 603 | my $norm = $new_group->normalize; |
| 604 | $self->remove_info_from($norm); |
| 605 | push @$ops, $norm; |
| Akron | d3355ba | 2017-05-17 21:16:35 +0200 | [diff] [blame] | 606 | }; |
| 607 | |
| Akron | 7a97d3f | 2017-06-07 18:28:50 +0200 | [diff] [blame] | 608 | # $self->operands($ops); |
| 609 | |
| 610 | print_log('kq_bool', 'Group is now ' . $self->to_string) if DEBUG; |
| 611 | |
| Akron | d3355ba | 2017-05-17 21:16:35 +0200 | [diff] [blame] | 612 | return $self; |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 613 | }; |
| 614 | |
| 615 | |
| Akron | 2ea61aa | 2017-06-03 16:30:23 +0200 | [diff] [blame] | 616 | # To make queries with negation more efficient, |
| 617 | # replace (a & !b) with andNot(a,b) |
| 618 | # and (a | !b) with (a | andNot([1],b)) |
| Akron | 1811acc | 2017-06-07 02:13:16 +0200 | [diff] [blame] | 619 | sub _replace_negative { |
| Akron | 2ea61aa | 2017-06-03 16:30:23 +0200 | [diff] [blame] | 620 | my $self = shift; |
| 621 | |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 622 | print_log('kq_bool', 'Replace Negations in ' . $self->to_string) if DEBUG; |
| Akron | 2ea61aa | 2017-06-03 16:30:23 +0200 | [diff] [blame] | 623 | |
| Akron | 5a5595b | 2017-09-10 13:00:57 +0200 | [diff] [blame] | 624 | # Check for negativity in groups to toggle all or nowhere |
| Akron | 2ea61aa | 2017-06-03 16:30:23 +0200 | [diff] [blame] | 625 | if ($self->is_negative) { |
| 626 | |
| 627 | # ![1] -> [0] |
| Akron | 655a10a | 2017-09-11 14:13:18 +0200 | [diff] [blame] | 628 | if ($self->is_anywhere) { |
| 629 | $self->is_anywhere(0); |
| Akron | 5a5595b | 2017-09-10 13:00:57 +0200 | [diff] [blame] | 630 | $self->is_nowhere(1); |
| Akron | 2ea61aa | 2017-06-03 16:30:23 +0200 | [diff] [blame] | 631 | $self->is_negative(0); |
| 632 | } |
| 633 | |
| 634 | # ![0] -> [1] |
| Akron | 5a5595b | 2017-09-10 13:00:57 +0200 | [diff] [blame] | 635 | elsif ($self->is_nowhere) { |
| Akron | 655a10a | 2017-09-11 14:13:18 +0200 | [diff] [blame] | 636 | $self->is_anywhere(1); |
| Akron | 5a5595b | 2017-09-10 13:00:57 +0200 | [diff] [blame] | 637 | $self->is_nowhere(0); |
| Akron | 2ea61aa | 2017-06-03 16:30:23 +0200 | [diff] [blame] | 638 | $self->is_negative(0); |
| 639 | }; |
| 640 | }; |
| 641 | |
| Akron | 655a10a | 2017-09-11 14:13:18 +0200 | [diff] [blame] | 642 | # Return if anywhere or nowhere |
| 643 | return $self if $self->is_anywhere || $self->is_nowhere; |
| Akron | 2ea61aa | 2017-06-03 16:30:23 +0200 | [diff] [blame] | 644 | |
| 645 | my $ops = $self->operands; |
| 646 | |
| 647 | # There is only a single operand |
| 648 | if (@$ops == 1) { |
| 649 | |
| 650 | # Only operand is negative |
| Akron | 655a10a | 2017-09-11 14:13:18 +0200 | [diff] [blame] | 651 | # return !a -> andNot(anywhere,a) |
| Akron | 2ea61aa | 2017-06-03 16:30:23 +0200 | [diff] [blame] | 652 | if ($self->is_negative) { |
| 653 | $self->is_negative(0); |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 654 | return $self->builder->bool_and_not( |
| Akron | 655a10a | 2017-09-11 14:13:18 +0200 | [diff] [blame] | 655 | $self->builder->anywhere, |
| Akron | 2ea61aa | 2017-06-03 16:30:23 +0200 | [diff] [blame] | 656 | $self |
| Akron | 8231ca7 | 2017-06-16 16:08:32 +0200 | [diff] [blame] | 657 | )->normalize; |
| Akron | 2ea61aa | 2017-06-03 16:30:23 +0200 | [diff] [blame] | 658 | }; |
| 659 | |
| 660 | # Only operand does not need a group |
| 661 | return $ops->[0]; |
| 662 | }; |
| 663 | |
| 664 | print_log('kq_bool', 'Check final operand on negativity') if DEBUG; |
| 665 | |
| 666 | # There is only one single negative operand possible! |
| 667 | # And it's at the end! |
| 668 | |
| 669 | # All operands are positive |
| 670 | return $self unless $ops->[-1]->is_negative; |
| 671 | |
| 672 | # Group all positive operands |
| 673 | print_log('kq_bool', 'Create group with negation') if DEBUG; |
| 674 | |
| Akron | 2ea61aa | 2017-06-03 16:30:23 +0200 | [diff] [blame] | 675 | |
| 676 | # Remove the negative operand |
| 677 | my $neg = pop @$ops; |
| 678 | |
| 679 | # Switch negativity |
| 680 | $neg->is_negative(0); |
| 681 | |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 682 | if (DEBUG) { |
| 683 | print_log('kq_bool', 'Negative operand is removed and reversed: ' . $neg->to_string); |
| 684 | }; |
| Akron | 7a97d3f | 2017-06-07 18:28:50 +0200 | [diff] [blame] | 685 | |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 686 | # Deal with operations differently |
| Akron | 1811acc | 2017-06-07 02:13:16 +0200 | [diff] [blame] | 687 | if ($self->operation eq 'and') { |
| 688 | |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 689 | print_log('kq_bool', 'Operation is "and"') if DEBUG; |
| 690 | |
| Akron | 1811acc | 2017-06-07 02:13:16 +0200 | [diff] [blame] | 691 | # There is exactly one positive operand |
| 692 | if (@$ops == 1) { |
| Akron | 49dd24f | 2017-06-20 17:02:15 +0200 | [diff] [blame] | 693 | |
| 694 | print_log('kq_bool', 'Operation on a single operand') if DEBUG; |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 695 | my $and_not = $self->builder->bool_and_not($ops->[0], $neg)->normalize; |
| Akron | 49dd24f | 2017-06-20 17:02:15 +0200 | [diff] [blame] | 696 | |
| 697 | print_log('kq_bool', 'Created ' . $and_not->to_string) if DEBUG; |
| 698 | return $and_not; |
| Akron | 1811acc | 2017-06-07 02:13:16 +0200 | [diff] [blame] | 699 | }; |
| 700 | |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 701 | print_log('kq_bool', 'Operation on multiple operands') if DEBUG; |
| 702 | |
| Akron | 1811acc | 2017-06-07 02:13:16 +0200 | [diff] [blame] | 703 | # There are multiple positive operands - create a new group |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 704 | return $self->builder->bool_and_not($self, $neg)->normalize; |
| Akron | 1811acc | 2017-06-07 02:13:16 +0200 | [diff] [blame] | 705 | } |
| 706 | |
| 707 | elsif ($self->operation eq 'or') { |
| 708 | |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 709 | print_log('kq_bool', 'Operation is "or"') if DEBUG; |
| 710 | |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 711 | push @$ops, $self->builder->bool_and_not( |
| Akron | 655a10a | 2017-09-11 14:13:18 +0200 | [diff] [blame] | 712 | $self->builder->anywhere, |
| Akron | 1811acc | 2017-06-07 02:13:16 +0200 | [diff] [blame] | 713 | $neg |
| Akron | 8231ca7 | 2017-06-16 16:08:32 +0200 | [diff] [blame] | 714 | )->normalize; |
| Akron | 1811acc | 2017-06-07 02:13:16 +0200 | [diff] [blame] | 715 | return $self; |
| Akron | 2ea61aa | 2017-06-03 16:30:23 +0200 | [diff] [blame] | 716 | }; |
| 717 | |
| Akron | 5b6264f | 2017-07-19 01:14:01 +0200 | [diff] [blame] | 718 | warn 'Unknown operation'; |
| Akron | 2ea61aa | 2017-06-03 16:30:23 +0200 | [diff] [blame] | 719 | }; |
| 720 | |
| Akron | 5b6264f | 2017-07-19 01:14:01 +0200 | [diff] [blame] | 721 | |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 722 | # Optimize boolean queries based on their frequencies |
| Akron | 88b855a | 2017-08-30 15:21:37 +0200 | [diff] [blame] | 723 | # and there complexity |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 724 | sub optimize { |
| Akron | 48fabe5 | 2017-08-07 16:48:12 +0200 | [diff] [blame] | 725 | my ($self, $segment) = @_; |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 726 | |
| 727 | # Get operands |
| 728 | my $ops = $self->operands; |
| 729 | |
| 730 | # Check the frequency of all operands |
| 731 | my (@freq, $query); |
| 732 | |
| 733 | # Filter out all terms that do not occur |
| 734 | for (my $i = 0; $i < @$ops; $i++) { |
| 735 | |
| 736 | # Get query operation for next operand |
| Akron | 48fabe5 | 2017-08-07 16:48:12 +0200 | [diff] [blame] | 737 | my $next = $ops->[$i]->optimize($segment); |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 738 | |
| 739 | # Get maximum frequency |
| 740 | my $freq = $next->max_freq; |
| 741 | |
| 742 | # Push to frequency list |
| 743 | push @freq, [$next, $freq]; |
| 744 | }; |
| 745 | |
| 746 | # Sort operands based on ascending frequency |
| Akron | 88b855a | 2017-08-30 15:21:37 +0200 | [diff] [blame] | 747 | # This is - however - rather irrelevant for or-queries, |
| 748 | # but it may help caching by introducing deterministic ordering |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 749 | @freq = sort { |
| 750 | ($a->[1] < $b->[1]) ? -1 : |
| 751 | (($a->[1] > $b->[1]) ? 1 : |
| 752 | ($a->[0]->to_string cmp $b->[0]->to_string)) |
| Akron | 88b855a | 2017-08-30 15:21:37 +0200 | [diff] [blame] | 753 | } @freq; |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 754 | |
| 755 | # Operation is 'or' |
| 756 | if ($self->operation eq 'or') { |
| 757 | print_log('kq_bool', 'Prepare or-group') if DEBUG; |
| 758 | |
| Akron | 88b855a | 2017-08-30 15:21:37 +0200 | [diff] [blame] | 759 | # Move simple queries to the front, so when filtering, leaf groups |
| 760 | # will be filtered in groups |
| 761 | @freq = sort { |
| 762 | $a->[0]->complex == $b->[0]->complex ? 0 : |
| 763 | ( |
| 764 | $a->[0]->complex && !$b->[0]->complex ? 1 : -1 |
| 765 | ) |
| 766 | } @freq; |
| 767 | |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 768 | # Ignore non-existing terms |
| 769 | while (@freq && $freq[0]->[1] == 0) { |
| 770 | shift @freq; |
| 771 | }; |
| 772 | |
| 773 | # No valid operands exist |
| 774 | if (@freq == 0) { |
| Akron | 5a5595b | 2017-09-10 13:00:57 +0200 | [diff] [blame] | 775 | return Krawfish::Query::Nowhere->new; |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 776 | }; |
| 777 | |
| 778 | # Get the first operand |
| 779 | $query = shift(@freq)->[0]; |
| 780 | |
| 781 | # For all further queries, create a query tree |
| 782 | while (@freq) { |
| 783 | my $next = shift(@freq)->[0]; |
| 784 | |
| 785 | # TODO: Distinguish here between classes and non-classes! |
| 786 | $query = $self->bool_or_query( |
| 787 | $query, |
| 788 | $next |
| 789 | ); |
| 790 | }; |
| 791 | } |
| 792 | |
| 793 | # Operation is 'and' |
| 794 | elsif ($self->operation eq 'and') { |
| Akron | 88b855a | 2017-08-30 15:21:37 +0200 | [diff] [blame] | 795 | |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 796 | print_log('kq_bool', 'Prepare and-group') if DEBUG; |
| 797 | |
| Akron | 88b855a | 2017-08-30 15:21:37 +0200 | [diff] [blame] | 798 | |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 799 | # If the least frequent operand does not exist, |
| 800 | # the whole group can't exist |
| 801 | if ($freq[0]->[1] == 0) { |
| 802 | |
| 803 | # One operand is not existing |
| Akron | 5a5595b | 2017-09-10 13:00:57 +0200 | [diff] [blame] | 804 | return Krawfish::Query::Nowhere->new; |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 805 | }; |
| 806 | |
| 807 | # Get the first operand |
| 808 | $query = shift(@freq)->[0]; |
| 809 | |
| 810 | # Make the least frequent terms come first in constraint |
| 811 | while (@freq) { |
| 812 | my $next = shift(@freq)->[0]; |
| 813 | |
| 814 | # Create constraint with the least frequent as second (buffered) operand |
| 815 | $query = $self->bool_and_query($next, $query); |
| 816 | }; |
| 817 | } |
| 818 | |
| 819 | # Operation is unknown! |
| 820 | else { |
| 821 | warn 'Should never happen!'; |
| 822 | }; |
| 823 | |
| Akron | 5a5595b | 2017-09-10 13:00:57 +0200 | [diff] [blame] | 824 | # Return nowhere if nowhere matches! |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 825 | if ($query->max_freq == 0) { |
| Akron | 5a5595b | 2017-09-10 13:00:57 +0200 | [diff] [blame] | 826 | return Krawfish::Query::Nowhere->new; |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 827 | }; |
| 828 | |
| 829 | return $query; |
| 830 | }; |
| 831 | |
| 832 | |
| Akron | 5b6264f | 2017-07-19 01:14:01 +0200 | [diff] [blame] | 833 | # Toggle the operation |
| 834 | sub toggle_operation { |
| 835 | my $self = shift; |
| 836 | if ($self->operation eq 'or') { |
| 837 | $self->operation('and'); |
| 838 | } |
| 839 | elsif ($self->operation eq 'and') { |
| 840 | $self->operation('or'); |
| 841 | }; |
| 842 | }; |
| 843 | |
| 844 | |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 845 | # Create operands in order |
| 846 | sub operands_in_order { |
| 847 | my $self = shift; |
| 848 | my $ops = $self->{operands}; |
| 849 | return [ sort { ($a && $b) ? ($a->to_string cmp $b->to_string) : 1 } @$ops ]; |
| 850 | }; |
| 851 | |
| 852 | |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 853 | 1; |
| 854 | |
| Akron | 5b6264f | 2017-07-19 01:14:01 +0200 | [diff] [blame] | 855 | |
| Akron | 2c6c716 | 2017-05-15 18:15:33 +0200 | [diff] [blame] | 856 | __END__ |