| Akron | 169ede4 | 2017-02-05 12:52:22 +0100 | [diff] [blame] | 1 | package Krawfish::Koral::Query::Length; |
| 2 | use parent 'Krawfish::Koral::Query'; |
| 3 | use Scalar::Util qw/looks_like_number/; |
| Akron | 704ec06 | 2017-07-24 15:46:21 +0200 | [diff] [blame] | 4 | use List::Util; |
| Akron | bcbe268 | 2017-02-05 13:05:55 +0100 | [diff] [blame] | 5 | use Krawfish::Query::Length; |
| Akron | 169ede4 | 2017-02-05 12:52:22 +0100 | [diff] [blame] | 6 | use strict; |
| 7 | use warnings; |
| Akron | 1fe979b | 2017-07-25 14:58:47 +0200 | [diff] [blame] | 8 | use Memoize; |
| 9 | memoize('min_span'); |
| 10 | memoize('max_span'); |
| Akron | 169ede4 | 2017-02-05 12:52:22 +0100 | [diff] [blame] | 11 | |
| Akron | 704ec06 | 2017-07-24 15:46:21 +0200 | [diff] [blame] | 12 | # TODO: |
| 13 | # Normalize chained length queries |
| 14 | # length(0-3,length(1-3,query)) |
| 15 | |
| 16 | # TODO: |
| 17 | # Check for query invalidity based on min_span and max_span |
| 18 | # length(2-4, [Baum]) - although, this only works with token support! |
| 19 | |
| Akron | 169ede4 | 2017-02-05 12:52:22 +0100 | [diff] [blame] | 20 | sub new { |
| 21 | my $class = shift; |
| 22 | my $span = shift; |
| 23 | |
| 24 | # Expect parameters min-length, max-length |
| 25 | # and tokenization that is the base for length |
| 26 | my ($min, $max, $token); |
| 27 | |
| 28 | # All parameters set |
| 29 | if (@_ == 3) { |
| 30 | ($min, $max, $token) = @_; |
| 31 | } |
| 32 | |
| 33 | # Two parameters |
| 34 | elsif (@_ == 2) { |
| Akron | 0a29cd2 | 2017-02-06 10:58:02 +0100 | [diff] [blame] | 35 | unless (looks_like_number($_[1])) { |
| Akron | 169ede4 | 2017-02-05 12:52:22 +0100 | [diff] [blame] | 36 | $min = $max = $_[0]; |
| 37 | $token = $_[1]; |
| 38 | } |
| 39 | |
| 40 | else { |
| 41 | ($min, $max) = @_; |
| 42 | }; |
| 43 | } |
| 44 | |
| 45 | # One parameter |
| 46 | elsif (@_ == 1) { |
| Akron | 704ec06 | 2017-07-24 15:46:21 +0200 | [diff] [blame] | 47 | ($min, $max) = ($_[0], $_[0]); |
| 48 | }; |
| 49 | |
| 50 | if ($token) { |
| 51 | warn 'Token definitions not yet supported!'; |
| Akron | 169ede4 | 2017-02-05 12:52:22 +0100 | [diff] [blame] | 52 | }; |
| 53 | |
| 54 | bless { |
| Akron | 5b6264f | 2017-07-19 01:14:01 +0200 | [diff] [blame] | 55 | operands => [$span], |
| Akron | 169ede4 | 2017-02-05 12:52:22 +0100 | [diff] [blame] | 56 | min => $min, |
| 57 | max => $max, |
| 58 | token => $token |
| 59 | }, $class; |
| 60 | }; |
| 61 | |
| Akron | 55fb308 | 2017-07-18 13:24:53 +0200 | [diff] [blame] | 62 | |
| Akron | 704ec06 | 2017-07-24 15:46:21 +0200 | [diff] [blame] | 63 | # Minimum length of either tokens or (default) subtokens |
| Akron | 169ede4 | 2017-02-05 12:52:22 +0100 | [diff] [blame] | 64 | sub min { |
| 65 | if (defined $_[1]) { |
| 66 | $_[0]->{min} = $_[1]; |
| 67 | return $_[0]; |
| 68 | }; |
| 69 | $_[0]->{min}; |
| 70 | }; |
| 71 | |
| 72 | |
| Akron | 704ec06 | 2017-07-24 15:46:21 +0200 | [diff] [blame] | 73 | # Minimum length of either tokens or (default) subtokens |
| Akron | 169ede4 | 2017-02-05 12:52:22 +0100 | [diff] [blame] | 74 | sub max { |
| 75 | if (defined $_[1]) { |
| 76 | $_[0]->{max} = $_[1]; |
| 77 | return $_[0]; |
| 78 | }; |
| 79 | $_[0]->{max}; |
| 80 | }; |
| 81 | |
| Akron | 55fb308 | 2017-07-18 13:24:53 +0200 | [diff] [blame] | 82 | |
| Akron | 704ec06 | 2017-07-24 15:46:21 +0200 | [diff] [blame] | 83 | # Minimum span of the query in tokens |
| 84 | sub min_span { |
| 85 | my $self = shift; |
| 86 | |
| 87 | # As per tokens are not supported, |
| 88 | # min( refers to the minimum number of subtokens |
| 89 | # As min_span refers to tokens and one token has, as minumum, |
| 90 | # one subtoken, both values can't be compared. That's why |
| 91 | # min_span of the operand is returned, as long as it is not 0. |
| 92 | |
| 93 | my $min_span = $self->operand->min_span; |
| 94 | $min_span = $min_span == 0 ? ($self->min >= 1 ? 1 : 0) : $min_span; |
| 95 | |
| 96 | my $max_span = $self->max_span; |
| 97 | |
| 98 | return ($max_span != -1 && $max_span < $min_span) ? $max_span : $min_span; |
| 99 | }; |
| 100 | |
| 101 | |
| 102 | |
| 103 | # Maximum span of the query |
| 104 | sub max_span { |
| 105 | my $self = shift; |
| 106 | |
| 107 | # As max_span refers to tokens and max refers |
| 108 | # (as long tokens are not supported) subtokens, |
| 109 | # those values are not interchangeable. |
| 110 | # But one token spans at least one subtoken, so |
| 111 | # if the subtoken boundary is smaller as max_span, |
| 112 | # this is the new max_span. |
| 113 | |
| 114 | my $max_span = $self->operand->max_span; |
| 115 | |
| 116 | if ($max_span == -1) { |
| 117 | return -1; |
| 118 | } |
| 119 | |
| 120 | elsif ($self->max < $max_span) { |
| 121 | return $self->max; |
| 122 | }; |
| 123 | |
| 124 | return $max_span; |
| 125 | }; |
| 126 | |
| 127 | |
| Akron | 169ede4 | 2017-02-05 12:52:22 +0100 | [diff] [blame] | 128 | sub token_base { |
| 129 | if (defined $_[1]) { |
| 130 | $_[0]->{token} = $_[1]; |
| 131 | return $_[0]; |
| 132 | }; |
| 133 | $_[0]->{token}; |
| 134 | }; |
| 135 | |
| 136 | |
| 137 | sub type { 'length' }; |
| 138 | |
| Akron | 55fb308 | 2017-07-18 13:24:53 +0200 | [diff] [blame] | 139 | |
| Akron | 169ede4 | 2017-02-05 12:52:22 +0100 | [diff] [blame] | 140 | sub to_koral_fragment { |
| 141 | ... |
| 142 | }; |
| 143 | |
| 144 | |
| Akron | 704ec06 | 2017-07-24 15:46:21 +0200 | [diff] [blame] | 145 | # Normalize query |
| Akron | 5b07e1b | 2017-06-20 20:10:26 +0200 | [diff] [blame] | 146 | sub normalize { |
| 147 | my $self = shift; |
| 148 | |
| 149 | # Length is null |
| 150 | if ($self->{max} == 0) { |
| Akron | 704ec06 | 2017-07-24 15:46:21 +0200 | [diff] [blame] | 151 | return $self->builder->null; |
| Akron | 5b07e1b | 2017-06-20 20:10:26 +0200 | [diff] [blame] | 152 | }; |
| 153 | |
| 154 | my $span; |
| Akron | 5b6264f | 2017-07-19 01:14:01 +0200 | [diff] [blame] | 155 | unless ($span = $self->operand->normalize) { |
| 156 | $self->copy_info_from($self->operand); |
| Akron | 5b07e1b | 2017-06-20 20:10:26 +0200 | [diff] [blame] | 157 | return; |
| 158 | }; |
| 159 | |
| 160 | # Span is null or nothing |
| Akron | 704ec06 | 2017-07-24 15:46:21 +0200 | [diff] [blame] | 161 | if ($span->is_null) { |
| 162 | return $self->builder->null; |
| Akron | 5b07e1b | 2017-06-20 20:10:26 +0200 | [diff] [blame] | 163 | }; |
| 164 | |
| Akron | 704ec06 | 2017-07-24 15:46:21 +0200 | [diff] [blame] | 165 | if ($span->is_nothing) { |
| 166 | return $self->builder->nothing; |
| Akron | c552937 | 2017-06-21 15:56:18 +0200 | [diff] [blame] | 167 | }; |
| 168 | |
| Akron | 704ec06 | 2017-07-24 15:46:21 +0200 | [diff] [blame] | 169 | # Matches anywhere |
| 170 | # if ($span->is_any) { |
| 171 | |
| 172 | # TODO: Check for repetition!!! |
| 173 | # if ($self->type) |
| 174 | # |
| 175 | #return $self->builder->repeat( |
| 176 | # $self->builder->any, |
| 177 | # $self->min, |
| 178 | # $self->max |
| 179 | #)->normalize; |
| 180 | |
| 181 | # }; |
| 182 | |
| Akron | 5b07e1b | 2017-06-20 20:10:26 +0200 | [diff] [blame] | 183 | # No boundaries given |
| 184 | if (!defined $self->{min} && !defined $self->{max}) { |
| 185 | return $span; |
| 186 | }; |
| 187 | |
| Akron | 704ec06 | 2017-07-24 15:46:21 +0200 | [diff] [blame] | 188 | # Check the length for plausibility |
| 189 | my $min = $self->min_span; # Is tokens, may span more subtokens |
| 190 | my $max = $self->max; |
| 191 | |
| 192 | if ($min < $self->min) { |
| 193 | $min = $self->min; |
| 194 | }; |
| 195 | |
| 196 | # The length is not plausible |
| 197 | if (defined $min && defined $max && ($min > $max)) { |
| 198 | |
| 199 | # Cannot match |
| 200 | return $self->builder->nothing; |
| 201 | }; |
| 202 | |
| Akron | 5b6264f | 2017-07-19 01:14:01 +0200 | [diff] [blame] | 203 | $self->operands([$span]); |
| 204 | |
| Akron | 5b07e1b | 2017-06-20 20:10:26 +0200 | [diff] [blame] | 205 | return $self; |
| 206 | }; |
| 207 | |
| Akron | 55fb308 | 2017-07-18 13:24:53 +0200 | [diff] [blame] | 208 | |
| Akron | 704ec06 | 2017-07-24 15:46:21 +0200 | [diff] [blame] | 209 | # Optimize query |
| Akron | 5b07e1b | 2017-06-20 20:10:26 +0200 | [diff] [blame] | 210 | sub optimize { |
| Akron | 48fabe5 | 2017-08-07 16:48:12 +0200 | [diff] [blame] | 211 | my ($self, $segment) = @_; |
| Akron | 5b07e1b | 2017-06-20 20:10:26 +0200 | [diff] [blame] | 212 | |
| 213 | # TODO: Add constraint instead of query, if implemented |
| 214 | |
| Akron | 48fabe5 | 2017-08-07 16:48:12 +0200 | [diff] [blame] | 215 | my $span = $self->operand->optimize($segment); |
| Akron | 5b07e1b | 2017-06-20 20:10:26 +0200 | [diff] [blame] | 216 | |
| 217 | # Nothing set |
| Akron | faf7685 | 2017-07-19 17:37:07 +0200 | [diff] [blame] | 218 | if ($span->max_freq == 0) { |
| Akron | 5b07e1b | 2017-06-20 20:10:26 +0200 | [diff] [blame] | 219 | return Krawfish::Query::Nothing->new; |
| 220 | }; |
| 221 | |
| 222 | return Krawfish::Query::Length->new( |
| 223 | $span, |
| 224 | $self->{min}, |
| 225 | $self->{max}, |
| 226 | $self->{token} |
| 227 | ); |
| 228 | }; |
| 229 | |
| Akron | 0a29cd2 | 2017-02-06 10:58:02 +0100 | [diff] [blame] | 230 | |
| Akron | 0a29cd2 | 2017-02-06 10:58:02 +0100 | [diff] [blame] | 231 | sub maybe_unsorted { |
| Akron | 5b6264f | 2017-07-19 01:14:01 +0200 | [diff] [blame] | 232 | $_[0]->operand->maybe_unsorted; |
| Akron | 0a29cd2 | 2017-02-06 10:58:02 +0100 | [diff] [blame] | 233 | }; |
| Akron | 169ede4 | 2017-02-05 12:52:22 +0100 | [diff] [blame] | 234 | |
| Akron | 704ec06 | 2017-07-24 15:46:21 +0200 | [diff] [blame] | 235 | |
| Akron | 290f59f | 2017-08-17 21:55:07 +0200 | [diff] [blame] | 236 | sub from_koral { |
| 237 | ... |
| 238 | }; |
| Akron | f365504 | 2017-02-06 13:08:44 +0100 | [diff] [blame] | 239 | |
| Akron | 169ede4 | 2017-02-05 12:52:22 +0100 | [diff] [blame] | 240 | |
| 241 | sub to_string { |
| Akron | 0a29cd2 | 2017-02-06 10:58:02 +0100 | [diff] [blame] | 242 | my $self = shift; |
| 243 | my $str = 'length('; |
| 244 | $str .= $self->{min} // '0'; |
| 245 | $str .= '-'; |
| 246 | $str .= $self->{max} // 'inf'; |
| 247 | $str .= ';' . $self->{token} if $self->{token}; |
| 248 | $str .= ':'; |
| Akron | 5b6264f | 2017-07-19 01:14:01 +0200 | [diff] [blame] | 249 | $str .= $self->operand->to_string; |
| Akron | 0a29cd2 | 2017-02-06 10:58:02 +0100 | [diff] [blame] | 250 | return $str . ')'; |
| Akron | 169ede4 | 2017-02-05 12:52:22 +0100 | [diff] [blame] | 251 | }; |
| 252 | |
| Akron | 5b6264f | 2017-07-19 01:14:01 +0200 | [diff] [blame] | 253 | sub is_any { $_[0]->operand->is_any }; |
| 254 | |
| Akron | 704ec06 | 2017-07-24 15:46:21 +0200 | [diff] [blame] | 255 | |
| Akron | c552937 | 2017-06-21 15:56:18 +0200 | [diff] [blame] | 256 | sub is_optional { |
| 257 | my $self = shift; |
| Akron | 704ec06 | 2017-07-24 15:46:21 +0200 | [diff] [blame] | 258 | if ($self->{min} == 0 && $self->operand->is_optional) { |
| Akron | c552937 | 2017-06-21 15:56:18 +0200 | [diff] [blame] | 259 | return 1; |
| 260 | }; |
| 261 | return; |
| 262 | }; |
| Akron | 5b6264f | 2017-07-19 01:14:01 +0200 | [diff] [blame] | 263 | |
| Akron | 704ec06 | 2017-07-24 15:46:21 +0200 | [diff] [blame] | 264 | |
| Akron | 5b07e1b | 2017-06-20 20:10:26 +0200 | [diff] [blame] | 265 | sub is_null { |
| 266 | return 1 if $_[0]->{max} == 0; |
| Akron | 5b6264f | 2017-07-19 01:14:01 +0200 | [diff] [blame] | 267 | return $_[0]->operand->is_null |
| Akron | 5b07e1b | 2017-06-20 20:10:26 +0200 | [diff] [blame] | 268 | }; |
| Akron | 5b6264f | 2017-07-19 01:14:01 +0200 | [diff] [blame] | 269 | |
| 270 | sub is_negative { $_[0]->operand->is_negative }; |
| 271 | |
| 272 | sub is_extended_right { $_[0]->operand->is_extended_right }; |
| 273 | |
| 274 | sub is_extended_left { $_[0]->operand->is_extended_left }; |
| Akron | 0a29cd2 | 2017-02-06 10:58:02 +0100 | [diff] [blame] | 275 | |
| Akron | 169ede4 | 2017-02-05 12:52:22 +0100 | [diff] [blame] | 276 | 1; |