blob: 13ba664862931a1984e115e8bc19dd9d5a1e7a3e [file] [log] [blame]
Akron169ede42017-02-05 12:52:22 +01001package Krawfish::Koral::Query::Length;
2use parent 'Krawfish::Koral::Query';
3use Scalar::Util qw/looks_like_number/;
Akron704ec062017-07-24 15:46:21 +02004use List::Util;
Akronbcbe2682017-02-05 13:05:55 +01005use Krawfish::Query::Length;
Akron169ede42017-02-05 12:52:22 +01006use strict;
7use warnings;
Akron1fe979b2017-07-25 14:58:47 +02008use Memoize;
9memoize('min_span');
10memoize('max_span');
Akron169ede42017-02-05 12:52:22 +010011
Akron704ec062017-07-24 15:46:21 +020012# TODO:
13# Normalize chained length queries
14# length(0-3,length(1-3,query))
15
16# TODO:
17# Check for query invalidity based on min_span and max_span
18# length(2-4, [Baum]) - although, this only works with token support!
19
Akron169ede42017-02-05 12:52:22 +010020sub new {
21 my $class = shift;
22 my $span = shift;
23
24 # Expect parameters min-length, max-length
25 # and tokenization that is the base for length
26 my ($min, $max, $token);
27
28 # All parameters set
29 if (@_ == 3) {
30 ($min, $max, $token) = @_;
31 }
32
33 # Two parameters
34 elsif (@_ == 2) {
Akron0a29cd22017-02-06 10:58:02 +010035 unless (looks_like_number($_[1])) {
Akron169ede42017-02-05 12:52:22 +010036 $min = $max = $_[0];
37 $token = $_[1];
38 }
39
40 else {
41 ($min, $max) = @_;
42 };
43 }
44
45 # One parameter
46 elsif (@_ == 1) {
Akron704ec062017-07-24 15:46:21 +020047 ($min, $max) = ($_[0], $_[0]);
48 };
49
50 if ($token) {
51 warn 'Token definitions not yet supported!';
Akron169ede42017-02-05 12:52:22 +010052 };
53
54 bless {
Akron5b6264f2017-07-19 01:14:01 +020055 operands => [$span],
Akron169ede42017-02-05 12:52:22 +010056 min => $min,
57 max => $max,
58 token => $token
59 }, $class;
60};
61
Akron55fb3082017-07-18 13:24:53 +020062
Akron704ec062017-07-24 15:46:21 +020063# Minimum length of either tokens or (default) subtokens
Akron169ede42017-02-05 12:52:22 +010064sub min {
65 if (defined $_[1]) {
66 $_[0]->{min} = $_[1];
67 return $_[0];
68 };
69 $_[0]->{min};
70};
71
72
Akron704ec062017-07-24 15:46:21 +020073# Minimum length of either tokens or (default) subtokens
Akron169ede42017-02-05 12:52:22 +010074sub max {
75 if (defined $_[1]) {
76 $_[0]->{max} = $_[1];
77 return $_[0];
78 };
79 $_[0]->{max};
80};
81
Akron55fb3082017-07-18 13:24:53 +020082
Akron704ec062017-07-24 15:46:21 +020083# Minimum span of the query in tokens
84sub min_span {
85 my $self = shift;
86
87 # As per tokens are not supported,
88 # min( refers to the minimum number of subtokens
89 # As min_span refers to tokens and one token has, as minumum,
90 # one subtoken, both values can't be compared. That's why
91 # min_span of the operand is returned, as long as it is not 0.
92
93 my $min_span = $self->operand->min_span;
94 $min_span = $min_span == 0 ? ($self->min >= 1 ? 1 : 0) : $min_span;
95
96 my $max_span = $self->max_span;
97
98 return ($max_span != -1 && $max_span < $min_span) ? $max_span : $min_span;
99};
100
101
102
103# Maximum span of the query
104sub max_span {
105 my $self = shift;
106
107 # As max_span refers to tokens and max refers
108 # (as long tokens are not supported) subtokens,
109 # those values are not interchangeable.
110 # But one token spans at least one subtoken, so
111 # if the subtoken boundary is smaller as max_span,
112 # this is the new max_span.
113
114 my $max_span = $self->operand->max_span;
115
116 if ($max_span == -1) {
117 return -1;
118 }
119
120 elsif ($self->max < $max_span) {
121 return $self->max;
122 };
123
124 return $max_span;
125};
126
127
Akron169ede42017-02-05 12:52:22 +0100128sub token_base {
129 if (defined $_[1]) {
130 $_[0]->{token} = $_[1];
131 return $_[0];
132 };
133 $_[0]->{token};
134};
135
136
137sub type { 'length' };
138
Akron55fb3082017-07-18 13:24:53 +0200139
Akron169ede42017-02-05 12:52:22 +0100140sub to_koral_fragment {
141 ...
142};
143
144
Akron704ec062017-07-24 15:46:21 +0200145# Normalize query
Akron5b07e1b2017-06-20 20:10:26 +0200146sub normalize {
147 my $self = shift;
148
149 # Length is null
150 if ($self->{max} == 0) {
Akron704ec062017-07-24 15:46:21 +0200151 return $self->builder->null;
Akron5b07e1b2017-06-20 20:10:26 +0200152 };
153
154 my $span;
Akron5b6264f2017-07-19 01:14:01 +0200155 unless ($span = $self->operand->normalize) {
156 $self->copy_info_from($self->operand);
Akron5b07e1b2017-06-20 20:10:26 +0200157 return;
158 };
159
160 # Span is null or nothing
Akron704ec062017-07-24 15:46:21 +0200161 if ($span->is_null) {
162 return $self->builder->null;
Akron5b07e1b2017-06-20 20:10:26 +0200163 };
164
Akron704ec062017-07-24 15:46:21 +0200165 if ($span->is_nothing) {
166 return $self->builder->nothing;
Akronc5529372017-06-21 15:56:18 +0200167 };
168
Akron704ec062017-07-24 15:46:21 +0200169 # Matches anywhere
170 # if ($span->is_any) {
171
172 # TODO: Check for repetition!!!
173 # if ($self->type)
174 #
175 #return $self->builder->repeat(
176 # $self->builder->any,
177 # $self->min,
178 # $self->max
179 #)->normalize;
180
181 # };
182
Akron5b07e1b2017-06-20 20:10:26 +0200183 # No boundaries given
184 if (!defined $self->{min} && !defined $self->{max}) {
185 return $span;
186 };
187
Akron704ec062017-07-24 15:46:21 +0200188 # Check the length for plausibility
189 my $min = $self->min_span; # Is tokens, may span more subtokens
190 my $max = $self->max;
191
192 if ($min < $self->min) {
193 $min = $self->min;
194 };
195
196 # The length is not plausible
197 if (defined $min && defined $max && ($min > $max)) {
198
199 # Cannot match
200 return $self->builder->nothing;
201 };
202
Akron5b6264f2017-07-19 01:14:01 +0200203 $self->operands([$span]);
204
Akron5b07e1b2017-06-20 20:10:26 +0200205 return $self;
206};
207
Akron55fb3082017-07-18 13:24:53 +0200208
Akron704ec062017-07-24 15:46:21 +0200209# Optimize query
Akron5b07e1b2017-06-20 20:10:26 +0200210sub optimize {
Akron48fabe52017-08-07 16:48:12 +0200211 my ($self, $segment) = @_;
Akron5b07e1b2017-06-20 20:10:26 +0200212
213 # TODO: Add constraint instead of query, if implemented
214
Akron48fabe52017-08-07 16:48:12 +0200215 my $span = $self->operand->optimize($segment);
Akron5b07e1b2017-06-20 20:10:26 +0200216
217 # Nothing set
Akronfaf76852017-07-19 17:37:07 +0200218 if ($span->max_freq == 0) {
Akron5b07e1b2017-06-20 20:10:26 +0200219 return Krawfish::Query::Nothing->new;
220 };
221
222 return Krawfish::Query::Length->new(
223 $span,
224 $self->{min},
225 $self->{max},
226 $self->{token}
227 );
228};
229
Akron0a29cd22017-02-06 10:58:02 +0100230
Akron0a29cd22017-02-06 10:58:02 +0100231sub maybe_unsorted {
Akron5b6264f2017-07-19 01:14:01 +0200232 $_[0]->operand->maybe_unsorted;
Akron0a29cd22017-02-06 10:58:02 +0100233};
Akron169ede42017-02-05 12:52:22 +0100234
Akron704ec062017-07-24 15:46:21 +0200235
Akron290f59f2017-08-17 21:55:07 +0200236sub from_koral {
237 ...
238};
Akronf3655042017-02-06 13:08:44 +0100239
Akron169ede42017-02-05 12:52:22 +0100240
241sub to_string {
Akron0a29cd22017-02-06 10:58:02 +0100242 my $self = shift;
243 my $str = 'length(';
244 $str .= $self->{min} // '0';
245 $str .= '-';
246 $str .= $self->{max} // 'inf';
247 $str .= ';' . $self->{token} if $self->{token};
248 $str .= ':';
Akron5b6264f2017-07-19 01:14:01 +0200249 $str .= $self->operand->to_string;
Akron0a29cd22017-02-06 10:58:02 +0100250 return $str . ')';
Akron169ede42017-02-05 12:52:22 +0100251};
252
Akron5b6264f2017-07-19 01:14:01 +0200253sub is_any { $_[0]->operand->is_any };
254
Akron704ec062017-07-24 15:46:21 +0200255
Akronc5529372017-06-21 15:56:18 +0200256sub is_optional {
257 my $self = shift;
Akron704ec062017-07-24 15:46:21 +0200258 if ($self->{min} == 0 && $self->operand->is_optional) {
Akronc5529372017-06-21 15:56:18 +0200259 return 1;
260 };
261 return;
262};
Akron5b6264f2017-07-19 01:14:01 +0200263
Akron704ec062017-07-24 15:46:21 +0200264
Akron5b07e1b2017-06-20 20:10:26 +0200265sub is_null {
266 return 1 if $_[0]->{max} == 0;
Akron5b6264f2017-07-19 01:14:01 +0200267 return $_[0]->operand->is_null
Akron5b07e1b2017-06-20 20:10:26 +0200268};
Akron5b6264f2017-07-19 01:14:01 +0200269
270sub is_negative { $_[0]->operand->is_negative };
271
272sub is_extended_right { $_[0]->operand->is_extended_right };
273
274sub is_extended_left { $_[0]->operand->is_extended_left };
Akron0a29cd22017-02-06 10:58:02 +0100275
Akron169ede42017-02-05 12:52:22 +01002761;