blob: 1b98bee446a954467e809b0ddb96d4e4ca01211e [file] [log] [blame]
Akron95a37a42017-11-20 17:16:40 +01001package Krawfish::Query::Constraint;
Akron71fc0ec2017-11-02 17:34:21 +01002use strict;
3use warnings;
Akron7aed51c2017-10-31 16:23:49 +01004use Role::Tiny::With;
Akron903894a2017-02-20 22:19:59 +01005use Krawfish::Util::Buffer;
Akron993e0142017-07-07 23:24:19 +02006use List::Util qw/min/;
Akron934afe02016-11-18 03:35:20 +01007use Krawfish::Log;
Akron71fc0ec2017-11-02 17:34:21 +01008
9with 'Krawfish::Query::Base::Dual';
10with 'Krawfish::Query';
Akron934afe02016-11-18 03:35:20 +010011
Akrona588d072017-10-13 14:45:34 +020012# TODO:
13# Improve by skipping to the same document
14#
15# TODO:
16# The check probably needs more than just the span
17# information, e.g. to get the max_length() of
18# a span for skip_pos() stuff.
19
Akron934afe02016-11-18 03:35:20 +010020use constant {
21 NEXTA => 1,
22 NEXTB => 2,
23 MATCH => 4,
Akron993e0142017-07-07 23:24:19 +020024 DONE => 8, # Short circuit match
Akronb7235472017-07-15 16:20:44 +020025 DEBUG => 0
Akron934afe02016-11-18 03:35:20 +010026};
27
Akron934afe02016-11-18 03:35:20 +010028
Akrona588d072017-10-13 14:45:34 +020029# Constructor
Akron934afe02016-11-18 03:35:20 +010030sub new {
31 my $class = shift;
32 bless {
33 constraints => shift,
34 first => shift,
35 second => shift,
Akronea6a2232017-02-03 18:12:00 +010036
37 # TODO:
Akrona588d072017-10-13 14:45:34 +020038 # Second operand should be nested
39 # in buffer by Dual
Akron903894a2017-02-20 22:19:59 +010040 buffer => Krawfish::Util::Buffer->new
Akron934afe02016-11-18 03:35:20 +010041 }, $class;
42};
43
Akron934afe02016-11-18 03:35:20 +010044
Akrona588d072017-10-13 14:45:34 +020045# Clone query
Akronb7653672017-08-07 14:34:14 +020046sub clone {
47 my $self = shift;
48 __PACKAGE__->new(
49 [map { $_->clone } @{$self->{constraints}}],
50 $self->{first}->clone,
51 $self->{second}->clone
52 );
53};
54
55
Akronea6a2232017-02-03 18:12:00 +010056# Check all constraints sequentially
Akronf00b57a2017-02-02 20:09:32 +010057sub check {
Akron934afe02016-11-18 03:35:20 +010058 my $self = shift;
Akronea6a2232017-02-03 18:12:00 +010059 my ($first, $second) = @_;
Akron934afe02016-11-18 03:35:20 +010060
61 # Initialize the return value
62 my $ret_val = 0b0111;
63
64 # Iterate
65 foreach (@{$self->{constraints}}) {
66
Akronea6a2232017-02-03 18:12:00 +010067 # TODO:
68 # Under certain circumstances it may be
Akron1afe7a32017-07-15 15:30:04 +020069 # faster to
Akronea6a2232017-02-03 18:12:00 +010070
Akron934afe02016-11-18 03:35:20 +010071 # Check constrained
Akronea6a2232017-02-03 18:12:00 +010072 my $check = $_->check($first, $second);
Akron934afe02016-11-18 03:35:20 +010073
74 # Combine NEXTA and NEXTB rules
75 $ret_val &= $check;
76
77 # Check matches
78 unless ($check & MATCH) {
79
Akron1afe7a32017-07-15 15:30:04 +020080 if (DEBUG) {
81 print_log('constr', 'Constraint ' . $_->to_string . ' does not match');
82 };
83
Akron934afe02016-11-18 03:35:20 +010084 # No match - send NEXTA and NEXTB rules
85 return $ret_val;
86 };
Akron993e0142017-07-07 23:24:19 +020087
Akron1afe7a32017-07-15 15:30:04 +020088 if (DEBUG) {
89 print_log('constr', 'Constraint ' . $_->to_string . ' matches for ' .
90 $first->to_string . ' and ' . $second->to_string);
91 };
92
Akron993e0142017-07-07 23:24:19 +020093 # If done flag is set, do short circuit
94 last if $check & DONE;
Akron934afe02016-11-18 03:35:20 +010095 };
96
Akronf00b57a2017-02-02 20:09:32 +010097 # Match!
Akronea6a2232017-02-03 18:12:00 +010098 $self->{doc_id} = $first->doc_id;
Akron6fc5b712017-10-24 14:48:39 +020099
100 # Flags need to be considered from both operands,
101 # as not both operands are filtered
102 $self->{flags} = $first->flags | $second->flags;
Akronea6a2232017-02-03 18:12:00 +0100103 $self->{start} = $first->start < $second->start ? $first->start : $second->start;
104 $self->{end} = $first->end > $second->end ? $first->end : $second->end;
Akronf00b57a2017-02-02 20:09:32 +0100105 $self->{payload} = $first->payload->clone->copy_from($second->payload);
106
Akronea6a2232017-02-03 18:12:00 +0100107 print_log('constr', 'Constraint matches: ' . $self->current->to_string) if DEBUG;
108
Akron934afe02016-11-18 03:35:20 +0100109 return $ret_val | MATCH;
110};
111
112
Akrona588d072017-10-13 14:45:34 +0200113# Get maximum frequency of query
Akronfaf76852017-07-19 17:37:07 +0200114sub max_freq {
Akron993e0142017-07-07 23:24:19 +0200115 my $self = shift;
Akronfaf76852017-07-19 17:37:07 +0200116 min($self->{first}->max_freq, $self->{second}->max_freq);
Akron993e0142017-07-07 23:24:19 +0200117};
118
119
Akron15fc1972017-07-20 22:53:00 +0200120# Filter constraint by a corpus by only applying to
Akron2bc94da2017-10-27 15:20:36 +0200121# the least frequent operand, in case, there are no
122# further requirements
Akron15fc1972017-07-20 22:53:00 +0200123sub filter_by {
124 my ($self, $corpus) = @_;
125
Akron2bc94da2017-10-27 15:20:36 +0200126 my $first = $self->{first};
127 my $second = $self->{second};
128
129 # There is a need for filtering
130 if ($first->requires_filter || $second->requires_filter) {
131
132 # First operand requires a filter
133 if ($first->requires_filter) {
134 $self->{first} = $first->filter_by($corpus);
135 };
136
137 # Second operand requires a filter
138 if ($second->requires_filter) {
139 $self->{second} = $second->filter_by($corpus);
140 };
141
142 return $self;
143 };
144
Akron15fc1972017-07-20 22:53:00 +0200145 # The first operand is least frequent
Akron2bc94da2017-10-27 15:20:36 +0200146 if ($first->max_freq < $second->max_freq) {
147 $self->{first} = $first->filter_by($corpus);
Akron15fc1972017-07-20 22:53:00 +0200148 }
149
150 # The second operand is least frequent (default)
151 else {
Akron2bc94da2017-10-27 15:20:36 +0200152 $self->{second} = $second->filter_by($corpus);
Akron15fc1972017-07-20 22:53:00 +0200153 };
154 return $self;
155};
156
157
Akron2bc94da2017-10-27 15:20:36 +0200158# Requires filtering
159sub requires_filter {
160 my $self = shift;
161 if ($self->{first}->requires_filter) {
162 return 1;
163 }
164 elsif ($self->{second}->requires_filter) {
165 return 1;
166 };
167 return 0;
168};
169
170
Akrona588d072017-10-13 14:45:34 +0200171# Stringification
Akron934afe02016-11-18 03:35:20 +0100172sub to_string {
173 my $self = shift;
174 my $str = 'constr(';
175 $str .= join(',', map { $_->to_string } @{$self->{constraints}});
176 $str .= ':';
177 $str .= $self->{first}->to_string . ',' . $self->{second}->to_string;
178 return $str . ')';
179};
180
Akronf00b57a2017-02-02 20:09:32 +0100181
Akron934afe02016-11-18 03:35:20 +01001821;