blob: 7a2dec551d317a4a2f40a974f4b28bb7f50a2b9e [file] [log] [blame]
Akronc3657bf2016-10-31 00:15:43 +01001package Krawfish::Koral::Meta;
Akron304fdd52017-04-05 01:47:46 +02002use parent 'Krawfish::Info';
3use Krawfish::Log;
Akron9b6ea8d2017-04-07 14:01:09 +02004use Krawfish::Result::Sort::Filter;
Akron304fdd52017-04-05 01:47:46 +02005use Krawfish::Result::Sort::PriorityCascade;
6use Krawfish::Result::Limit;
Akrone9d59c82017-05-12 19:33:43 +02007use Krawfish::Result::Segment::Aggregate;
8use Krawfish::Result::Segment::Aggregate::Facets;
9use Krawfish::Result::Segment::Aggregate::Count;
10use Krawfish::Result::Segment::Aggregate::Length;
11use Krawfish::Result::Segment::Aggregate::Values;
Akronc3657bf2016-10-31 00:15:43 +010012use strict;
13use warnings;
14
Akron304fdd52017-04-05 01:47:46 +020015use constant {
16 DEBUG => 1,
Akron4b5257e2017-04-05 17:50:22 +020017 UNIQUE_FIELD => 'id'
Akron304fdd52017-04-05 01:47:46 +020018};
19
Akronc3657bf2016-10-31 00:15:43 +010020sub new {
21 my $class = shift;
Akron6638e812016-12-11 23:21:18 +010022 bless {
Akron304fdd52017-04-05 01:47:46 +020023 query => undef,
24 items_per_page => undef,
25 field_sort => [],
Akron28fcd302017-04-05 10:43:51 +020026 field_count => undef,
Akron304fdd52017-04-05 01:47:46 +020027 facets => undef,
Akron28fcd302017-04-05 10:43:51 +020028 count => undef,
Akron9b6ea8d2017-04-07 14:01:09 +020029 start_index => 0,
30 max_doc_rank_ref => \(my $init = 0)
Akron6638e812016-12-11 23:21:18 +010031 }, $class;
Akronc3657bf2016-10-31 00:15:43 +010032};
33
Akron9b6ea8d2017-04-07 14:01:09 +020034# Nest the query
Akron304fdd52017-04-05 01:47:46 +020035sub search_for {
Akron9b6ea8d2017-04-07 14:01:09 +020036 my ($self, $query) = @_;
37 $self->{query} = $query;
Akron304fdd52017-04-05 01:47:46 +020038 return $self;
39};
40
Akron6638e812016-12-11 23:21:18 +010041#sub fields;
Akronc3657bf2016-10-31 00:15:43 +010042
Akron304fdd52017-04-05 01:47:46 +020043sub items_per_page {
44 my $self = shift;
45 return $self->{items_per_page} unless @_;
46 $self->{items_per_page} = shift;
47 return $self;
48};
49
Akron9b6ea8d2017-04-07 14:01:09 +020050
Akron304fdd52017-04-05 01:47:46 +020051sub start_index {
52 my $self = shift;
53 return $self->{start_index} unless @_;
54 $self->{start_index} = shift;
55 return $self;
56};
57
58
59sub facets {
60 my $self = shift;
61 return $self->{facets} unless @_;
62 $self->{facets} = [@_];
63 return $self;
64};
65
66
Akron28fcd302017-04-05 10:43:51 +020067# Count doc_freq and freq
68sub count {
69 my $self = shift;
70 return $self->{count} unless @_;
71 $self->{count} = shift;
72 return $self;
73};
74
75
76# Get lengths of results
77sub length {
78 my $self = shift;
79 return $self->{length} unless @_;
80 $self->{length} = shift;
81 return $self;
82};
83
Akron6638e812016-12-11 23:21:18 +010084
Akron304fdd52017-04-05 01:47:46 +020085sub prepare_for {
86 shift->plan_for(@_);
Akron28fcd302017-04-05 10:43:51 +020087};
88
Akronc3657bf2016-10-31 00:15:43 +010089
Akron9b6ea8d2017-04-07 14:01:09 +020090# Check if the meta query is filterable
91sub sort_filter {
92 my ($self, $query, $index) = @_;
93
94 # No sort defined
95 return $query unless $self->{field_sort};
96
97 # Sort is not restricted
98 return $query unless $self->{items_per_page};
99
100 # Filtering not applicable because
101 # all matches need to be found
102 if ($self->{facets} ||
103 $self->{field_count} ||
104 $self->{count} ||
105 $self->{length}) {
106 return $query;
107 };
108
109 # Get first run field
110 my ($field, $desc) = @{$self->{field_sort}->[0]};
111
112 # Create rank filter
113 $query = Krawfish::Result::Sort::Filter->new(
114 query => $query,
115 max_rank_ref => $self->max_doc_rank_ref,
116 field => $field,
117 desc => $desc,
118 index => $index
119 );
120
121 print_log('kq_meta', 'Query is qualified for sort filtering') if DEBUG;
122
123 return $query;
124};
125
126
127# Return max_doc_rank reference
128sub max_doc_rank_ref {
129 my $self = shift;
130
131 # Set value to reference
132 ${$self->{max_doc_rank_ref}} = shift if @_;
133
134 return $self->{max_doc_rank_ref};
135};
136
137
Akron2ee89f12016-12-07 18:33:52 +0100138sub plan_for {
139 my ($self, $index) = @_;
140
Akron9b6ea8d2017-04-07 14:01:09 +0200141 # Get the query
Akron304fdd52017-04-05 01:47:46 +0200142 my $query = $self->{query} or return;
143
Akron304fdd52017-04-05 01:47:46 +0200144
145 # TODO:
146 # The dictionary should also have a max_rank!
147
148
Akron9b6ea8d2017-04-07 14:01:09 +0200149 # Get the maximum rank for fields, aka the document number
150 # and init the shared value for faster filtering
151 my $max_doc_rank_ref = $self->max_doc_rank_ref($index->max_rank);
152
153 # Prepare the nested query
154 $query = $query->prepare_for($index);
155
Akron304fdd52017-04-05 01:47:46 +0200156 my @aggr;
157 # Add facets to the result
158 if ($self->{facets}) {
159
160 # This should have more parameters, like count
161 foreach (@{$self->{facets}}) {
Akrone9d59c82017-05-12 19:33:43 +0200162 push @aggr, Krawfish::Result::Segment::Aggregate::Facets->new($index, $_);
Akron304fdd52017-04-05 01:47:46 +0200163 };
164 };
165
Akron28fcd302017-04-05 10:43:51 +0200166 # Count field values
167 if ($self->{field_count}) {
168
169 # This should have more parameters, like count
170 foreach (@{$self->{field_count}}) {
Akrone9d59c82017-05-12 19:33:43 +0200171 push @aggr, Krawfish::Result::Segment::Aggregate::Values->new($index, $_);
Akron28fcd302017-04-05 10:43:51 +0200172 };
173 };
174
175 # Add frequency and document frequency count to result
176 # TODO:
177 # This may be obsolete in some cases, because other aggregations already
178 # count frequencies.
179 if ($self->{count}) {
Akrone9d59c82017-05-12 19:33:43 +0200180 push @aggr, Krawfish::Result::Segment::Aggregate::Count->new;
Akron28fcd302017-04-05 10:43:51 +0200181 };
182
183 if ($self->{length}) {
Akrone9d59c82017-05-12 19:33:43 +0200184 push @aggr, Krawfish::Result::Segment::Aggregate::Length->new;
Akron28fcd302017-04-05 10:43:51 +0200185 };
186
Akron304fdd52017-04-05 01:47:46 +0200187 # Augment the query with aggregations
Akron28fcd302017-04-05 10:43:51 +0200188 # TODO:
189 # It may be better to have one aggregation object, that can be filled!
190 # like ->query($query)->aggregate_on($aggr)->prepare_for($index);
191 # and after the query is through, the aggregation map contains data
Akron304fdd52017-04-05 01:47:46 +0200192 if (@aggr) {
Akrone9d59c82017-05-12 19:33:43 +0200193 $query = Krawfish::Result::Segment::Aggregate->new($query, \@aggr);
Akron304fdd52017-04-05 01:47:46 +0200194 };
195
196 # Sort the result
Akron28fcd302017-04-05 10:43:51 +0200197 # This is mandatory!
Akron304fdd52017-04-05 01:47:46 +0200198
Akron28fcd302017-04-05 10:43:51 +0200199 # Precalculate top_k value
200 my $top_k = undef;
201 if ($self->items_per_page) {
Akron304fdd52017-04-05 01:47:46 +0200202
Akron28fcd302017-04-05 10:43:51 +0200203 # Top k is defined
204 $top_k = $self->items_per_page + ($self->start_index // 0);
Akron304fdd52017-04-05 01:47:46 +0200205 };
206
Akron28fcd302017-04-05 10:43:51 +0200207 # TODO:
208 # Check for fields that are either not part
209 # of the index or are identified in
210 # the corpus query (it makes no sense to
211 # sort for author, if author=Fontane is
212 # required)
213 $query = Krawfish::Result::Sort::PriorityCascade->new(
214 query => $query,
215 index => $index,
216 fields => $self->{field_sort},
217 unique => UNIQUE_FIELD,
218 top_k => $top_k,
219 max_rank_ref => $max_doc_rank_ref
220 );
221
222 print_log('kq_meta', "Field sort with: " . $query->to_string) if DEBUG;
223
224
Akron304fdd52017-04-05 01:47:46 +0200225 # Limit the result
226 if ($self->items_per_page || $self->start_index) {
227 $query = Krawfish::Result::Limit->new(
228 $query,
229 $self->start_index,
230 $self->items_per_page
231 );
232 };
233
Akron7db79e22016-12-08 23:02:32 +0100234 # The order needs to be:
235 # snippet(
236 # fields(
Akron28fcd302017-04-05 10:43:51 +0200237 # limit( -
238 # sorted( -
239 # faceted( -
240 # count(Q) -
Akron7db79e22016-12-08 23:02:32 +0100241 # )
242 # )
243 # )
244 # )
245 # )
Akron304fdd52017-04-05 01:47:46 +0200246
Akron28fcd302017-04-05 10:43:51 +0200247 # Return the query
Akron304fdd52017-04-05 01:47:46 +0200248 return $query;
Akron2ee89f12016-12-07 18:33:52 +0100249};
250
Akron304fdd52017-04-05 01:47:46 +0200251
Akronc3657bf2016-10-31 00:15:43 +01002521;
253
254__END__