| Akron | 33f1dcb | 2016-10-29 17:27:23 +0200 | [diff] [blame] | 1 | package Krawfish::Koral::Query::Token; |
| 2 | use parent 'Krawfish::Koral::Query'; |
| Akron | 3ab2e97 | 2017-08-02 19:10:10 +0200 | [diff] [blame] | 3 | # use Krawfish::Koral::Query::Token; |
| Akron | 33f1dcb | 2016-10-29 17:27:23 +0200 | [diff] [blame] | 4 | use Krawfish::Koral::Query::Term; |
| Akron | 3ab2e97 | 2017-08-02 19:10:10 +0200 | [diff] [blame] | 5 | # use Krawfish::Query::Term; |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 6 | use Krawfish::Log; |
| Akron | 33f1dcb | 2016-10-29 17:27:23 +0200 | [diff] [blame] | 7 | use strict; |
| 8 | use warnings; |
| Akron | 06eb4d3 | 2016-11-11 14:05:52 +0100 | [diff] [blame] | 9 | use Scalar::Util qw/blessed/; |
| Akron | 33f1dcb | 2016-10-29 17:27:23 +0200 | [diff] [blame] | 10 | |
| Akron | bc7dd43 | 2017-07-18 14:21:51 +0200 | [diff] [blame] | 11 | use constant DEBUG => 0; |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 12 | |
| Akron | 4f9eef4 | 2017-07-24 11:41:09 +0200 | [diff] [blame] | 13 | # Token based query containing boolean definition of terms. |
| 14 | |
| 15 | # TODO: |
| 16 | # Token should probably introduce a unique-query to filter out multiple matches. |
| 17 | # It should also remove classes, that are not allowed. |
| 18 | |
| Akron | 33f1dcb | 2016-10-29 17:27:23 +0200 | [diff] [blame] | 19 | sub new { |
| 20 | my $class = shift; |
| Akron | 06eb4d3 | 2016-11-11 14:05:52 +0100 | [diff] [blame] | 21 | my $token = shift; |
| 22 | |
| 23 | # Any token |
| 24 | unless ($token) { |
| Akron | 5b6264f | 2017-07-19 01:14:01 +0200 | [diff] [blame] | 25 | return bless { operands => [] }, $class; |
| Akron | 06eb4d3 | 2016-11-11 14:05:52 +0100 | [diff] [blame] | 26 | }; |
| 27 | |
| 28 | # Token is a string |
| 29 | unless (blessed $token) { |
| 30 | return bless { |
| Akron | 5b6264f | 2017-07-19 01:14:01 +0200 | [diff] [blame] | 31 | operands => [Krawfish::Koral::Query::Term->new($token)] |
| Akron | 06eb4d3 | 2016-11-11 14:05:52 +0100 | [diff] [blame] | 32 | }, $class; |
| 33 | }; |
| 34 | |
| 35 | # Token is already a group or a term |
| Akron | 4f9eef4 | 2017-07-24 11:41:09 +0200 | [diff] [blame] | 36 | |
| 37 | # TODO: |
| 38 | # Check that everything else is invalid! |
| Akron | 33f1dcb | 2016-10-29 17:27:23 +0200 | [diff] [blame] | 39 | bless { |
| Akron | 5b6264f | 2017-07-19 01:14:01 +0200 | [diff] [blame] | 40 | operands => [$token] |
| Akron | 06eb4d3 | 2016-11-11 14:05:52 +0100 | [diff] [blame] | 41 | }; |
| Akron | 33f1dcb | 2016-10-29 17:27:23 +0200 | [diff] [blame] | 42 | }; |
| 43 | |
| Akron | 4f9eef4 | 2017-07-24 11:41:09 +0200 | [diff] [blame] | 44 | |
| 45 | # Query type |
| Akron | 774c5db | 2016-11-09 16:11:38 +0100 | [diff] [blame] | 46 | sub type { 'token' }; |
| Akron | 6621e11 | 2016-11-05 17:21:39 +0100 | [diff] [blame] | 47 | |
| Akron | 4f9eef4 | 2017-07-24 11:41:09 +0200 | [diff] [blame] | 48 | |
| Akron | 5b6264f | 2017-07-19 01:14:01 +0200 | [diff] [blame] | 49 | # There are no classes allowed in tokens |
| Akron | 5ddc38f | 2017-07-18 00:16:22 +0200 | [diff] [blame] | 50 | sub remove_classes { |
| 51 | $_[0]; |
| 52 | }; |
| Akron | 06eb4d3 | 2016-11-11 14:05:52 +0100 | [diff] [blame] | 53 | |
| Akron | 4763ea6 | 2016-11-02 19:36:18 +0100 | [diff] [blame] | 54 | |
| 55 | # Overwrite is any |
| 56 | sub is_any { |
| Akron | 8231ca7 | 2017-06-16 16:08:32 +0200 | [diff] [blame] | 57 | return if $_[0]->is_nothing; |
| Akron | 5b6264f | 2017-07-19 01:14:01 +0200 | [diff] [blame] | 58 | return 1 unless $_[0]->operand; |
| Akron | 4763ea6 | 2016-11-02 19:36:18 +0100 | [diff] [blame] | 59 | return; |
| 60 | }; |
| 61 | |
| Akron | ce10cb4 | 2017-06-14 01:12:40 +0200 | [diff] [blame] | 62 | |
| Akron | 704ec06 | 2017-07-24 15:46:21 +0200 | [diff] [blame] | 63 | # A token always spans exactly one token |
| 64 | sub min_span { |
| 65 | return 0 if $_[0]->is_null; |
| 66 | 1; |
| 67 | }; |
| 68 | |
| 69 | |
| 70 | # A token always spans exactly one token |
| 71 | sub max_span { |
| 72 | return 0 if $_[0]->is_null; |
| 73 | 1; |
| 74 | }; |
| 75 | |
| 76 | |
| Akron | 4f9eef4 | 2017-07-24 11:41:09 +0200 | [diff] [blame] | 77 | # Normalize the token |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 78 | sub normalize { |
| 79 | my $self = shift; |
| Akron | 5b6264f | 2017-07-19 01:14:01 +0200 | [diff] [blame] | 80 | my $op; |
| 81 | |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 82 | print_log('kq_token', 'Normalize wrapper') if DEBUG; |
| Akron | 5b6264f | 2017-07-19 01:14:01 +0200 | [diff] [blame] | 83 | |
| Akron | 4f9eef4 | 2017-07-24 11:41:09 +0200 | [diff] [blame] | 84 | # There is an operand defined |
| Akron | 5b6264f | 2017-07-19 01:14:01 +0200 | [diff] [blame] | 85 | if ($self->operand) { |
| 86 | my $op = $self->operand->normalize; |
| 87 | if ($op->is_nothing) { |
| 88 | $self->operands([]); |
| Akron | 8231ca7 | 2017-06-16 16:08:32 +0200 | [diff] [blame] | 89 | $self->is_nothing(1); |
| 90 | } |
| Akron | 5b6264f | 2017-07-19 01:14:01 +0200 | [diff] [blame] | 91 | elsif ($op->is_any) { |
| 92 | $self->operands([]); |
| Akron | 8231ca7 | 2017-06-16 16:08:32 +0200 | [diff] [blame] | 93 | $self->is_any(1); |
| 94 | } |
| 95 | elsif (!$self->is_optional && !$self->is_negative) { |
| Akron | 5b6264f | 2017-07-19 01:14:01 +0200 | [diff] [blame] | 96 | return $op; |
| 97 | } |
| 98 | else { |
| 99 | $self->operands([$op]); |
| Akron | ce10cb4 | 2017-06-14 01:12:40 +0200 | [diff] [blame] | 100 | }; |
| Akron | c048b18 | 2017-06-13 01:29:03 +0200 | [diff] [blame] | 101 | }; |
| Akron | 4f9eef4 | 2017-07-24 11:41:09 +0200 | [diff] [blame] | 102 | |
| 103 | # No operand defined - ANY query |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 104 | return $self; |
| 105 | }; |
| 106 | |
| Akron | ce10cb4 | 2017-06-14 01:12:40 +0200 | [diff] [blame] | 107 | |
| Akron | c048b18 | 2017-06-13 01:29:03 +0200 | [diff] [blame] | 108 | sub inflate { |
| 109 | my ($self, $dict) = @_; |
| 110 | print_log('kq_token', 'Inflate wrapper') if DEBUG; |
| Akron | 5b6264f | 2017-07-19 01:14:01 +0200 | [diff] [blame] | 111 | $self->operands([$self->operand->inflate($dict)]); |
| Akron | c048b18 | 2017-06-13 01:29:03 +0200 | [diff] [blame] | 112 | return $self; |
| 113 | }; |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 114 | |
| Akron | c048b18 | 2017-06-13 01:29:03 +0200 | [diff] [blame] | 115 | sub finalize { |
| 116 | my $self = shift; |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 117 | |
| 118 | # Token is null |
| 119 | if ($self->is_null) { |
| 120 | $self->error(000, 'Unable to search for null tokens'); |
| 121 | return; |
| 122 | }; |
| 123 | |
| 124 | # No term defined |
| Akron | 5b6264f | 2017-07-19 01:14:01 +0200 | [diff] [blame] | 125 | unless ($self->operand) { |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 126 | $self->error(000, 'Unable to search for any tokens'); |
| 127 | return; |
| 128 | }; |
| 129 | |
| Akron | c048b18 | 2017-06-13 01:29:03 +0200 | [diff] [blame] | 130 | return $self; |
| 131 | }; |
| 132 | |
| Akron | 5ddc38f | 2017-07-18 00:16:22 +0200 | [diff] [blame] | 133 | |
| Akron | c048b18 | 2017-06-13 01:29:03 +0200 | [diff] [blame] | 134 | sub optimize { |
| 135 | my ($self, $index) = @_; |
| 136 | |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 137 | # Create token query |
| Akron | 5b6264f | 2017-07-19 01:14:01 +0200 | [diff] [blame] | 138 | unless ($self->operand) { |
| Akron | 5ddc38f | 2017-07-18 00:16:22 +0200 | [diff] [blame] | 139 | warn "It's not possible to optimize an any query"; |
| 140 | return; |
| 141 | }; |
| 142 | |
| Akron | 4f9eef4 | 2017-07-24 11:41:09 +0200 | [diff] [blame] | 143 | # The operand is a single term - ignore the wrapping token |
| 144 | # However - this would ignore the unique constraint for cases, |
| 145 | # where terms are identical, but have different payload information |
| Akron | 3ab2e97 | 2017-08-02 19:10:10 +0200 | [diff] [blame] | 146 | #if ($self->operand->type eq 'term') { |
| 147 | # return Krawfish::Query::Term->new( |
| 148 | # $index, |
| 149 | # $self->operand->to_string |
| 150 | # ); |
| 151 | #}; |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 152 | |
| 153 | print_log('kq_token', 'Optimize and return wrap token') if DEBUG; |
| Akron | 5b6264f | 2017-07-19 01:14:01 +0200 | [diff] [blame] | 154 | return $self->operand->optimize($index); |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 155 | }; |
| 156 | |
| Akron | 6621e11 | 2016-11-05 17:21:39 +0100 | [diff] [blame] | 157 | |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 158 | |
| Akron | 6621e11 | 2016-11-05 17:21:39 +0100 | [diff] [blame] | 159 | # Stringify |
| Akron | a211bf5 | 2016-10-29 18:03:29 +0200 | [diff] [blame] | 160 | sub to_string { |
| Akron | ce10cb4 | 2017-06-14 01:12:40 +0200 | [diff] [blame] | 161 | my $self = shift; |
| 162 | |
| Akron | 8231ca7 | 2017-06-16 16:08:32 +0200 | [diff] [blame] | 163 | my $string = '['; |
| Akron | 5ddc38f | 2017-07-18 00:16:22 +0200 | [diff] [blame] | 164 | |
| Akron | ce10cb4 | 2017-06-14 01:12:40 +0200 | [diff] [blame] | 165 | if ($self->is_nothing) { |
| Akron | 8231ca7 | 2017-06-16 16:08:32 +0200 | [diff] [blame] | 166 | $string .= '0'; |
| 167 | } |
| 168 | elsif ($self->is_any) { |
| 169 | $string .= ''; |
| 170 | } |
| Akron | 5b6264f | 2017-07-19 01:14:01 +0200 | [diff] [blame] | 171 | elsif ($self->operand) { |
| Akron | 5ddc38f | 2017-07-18 00:16:22 +0200 | [diff] [blame] | 172 | if ($self->is_negative) { |
| 173 | $string .= '!'; |
| 174 | }; |
| 175 | |
| Akron | 5b6264f | 2017-07-19 01:14:01 +0200 | [diff] [blame] | 176 | $string .= $self->operand->to_string; |
| Akron | 8231ca7 | 2017-06-16 16:08:32 +0200 | [diff] [blame] | 177 | } |
| Akron | ce10cb4 | 2017-06-14 01:12:40 +0200 | [diff] [blame] | 178 | |
| Akron | 8231ca7 | 2017-06-16 16:08:32 +0200 | [diff] [blame] | 179 | $string .= ']'; |
| 180 | |
| Akron | ce10cb4 | 2017-06-14 01:12:40 +0200 | [diff] [blame] | 181 | if ($self->is_null) { |
| Akron | ddf077a | 2016-11-05 15:00:00 +0100 | [diff] [blame] | 182 | $string .= '{0}'; |
| 183 | } |
| Akron | 8231ca7 | 2017-06-16 16:08:32 +0200 | [diff] [blame] | 184 | |
| Akron | ce10cb4 | 2017-06-14 01:12:40 +0200 | [diff] [blame] | 185 | elsif ($self->is_optional) { |
| Akron | ddf077a | 2016-11-05 15:00:00 +0100 | [diff] [blame] | 186 | $string .= '?'; |
| 187 | }; |
| Akron | 8231ca7 | 2017-06-16 16:08:32 +0200 | [diff] [blame] | 188 | |
| Akron | ddf077a | 2016-11-05 15:00:00 +0100 | [diff] [blame] | 189 | return $string; |
| Akron | a211bf5 | 2016-10-29 18:03:29 +0200 | [diff] [blame] | 190 | }; |
| 191 | |
| Akron | 4de6620 | 2016-11-11 14:13:43 +0100 | [diff] [blame] | 192 | |
| Akron | 1b09c5b | 2016-11-20 15:59:34 +0100 | [diff] [blame] | 193 | sub maybe_unsorted { 0 }; |
| 194 | |
| Akron | 944091b | 2016-11-24 16:40:58 +0100 | [diff] [blame] | 195 | sub from_koral { |
| 196 | my $class = shift; |
| 197 | my $kq = shift; |
| 198 | my $importer = $class->importer; |
| 199 | |
| 200 | # No wrap |
| 201 | unless ($kq->{'wrap'}) { |
| 202 | return $class->new; |
| 203 | } |
| 204 | |
| 205 | # Wrap is a term |
| 206 | else { |
| 207 | my $wrap = $kq->{wrap}; |
| 208 | if ($wrap->{'@type'} eq 'koral:term') { |
| 209 | return $class->new($importer->term($wrap)); |
| 210 | } |
| 211 | elsif ($wrap->{'@type'} eq 'koral:termGroup') { |
| 212 | return $class->new($importer->term_group($wrap)); |
| 213 | } |
| 214 | else { |
| 215 | warn 'Wrap type not supported!' |
| 216 | }; |
| 217 | } |
| 218 | }; |
| Akron | 1b09c5b | 2016-11-20 15:59:34 +0100 | [diff] [blame] | 219 | |
| Akron | 818e852 | 2017-07-22 12:34:01 +0200 | [diff] [blame] | 220 | # Return Koral fragment |
| 221 | sub to_koral_fragment { |
| 222 | my $self = shift; |
| 223 | |
| 224 | my $token = { |
| 225 | '@type' => 'koral:token' |
| 226 | }; |
| 227 | |
| 228 | if ($self->operand) { |
| 229 | $token->{wrap} = $self->operand->to_koral_fragment; |
| 230 | }; |
| 231 | |
| 232 | $token; |
| 233 | }; |
| 234 | |
| 235 | |
| 236 | |
| Akron | 33f1dcb | 2016-10-29 17:27:23 +0200 | [diff] [blame] | 237 | 1; |