blob: a543319b143f6e6fa8ca1b6d3788674e4c1aa356 [file] [log] [blame]
package Krawfish::Koral::Compile;
use Krawfish::Koral::Compile::Builder;
use Krawfish::Log;
use strict;
use warnings;
# WARNING! / TODO!
# An enrichment for fields or snippets (better any enrichments)
# can never wrap around a presort query, because the relevant
# data structures and algorithms require the results to be in doc_id order!
# WARNING!
# It's important to remember that sortFilter can't be shared in parallel
# processing - especially for fields, as segment rankings can differ!
# TODO:
# There are presort and postsort queries.
# Presortqueries don't respect current_query,
# while postsortqueries do!
# Postsortqueries only work on the clusterlevel.
# TODO:
# When a group filter is added,
# sorting does not work etc.
# This has to be thought through
our %COMPILE_ORDER = (
limit => 1,
sort => 2,
sample => 3,
enrich => 4,
aggregate => 5,
group => 6,
filter => 7
);
use constant {
DEBUG => 0,
UNIQUE_FIELD => 'id'
};
sub new {
my $class = shift;
bless [@_], $class;
};
sub to_string {
my $self = shift;
return join(',', map { $_->to_string } $self->operations);
};
sub builder {
return Krawfish::Koral::Compile::Builder->new;
};
# Get or set operations
sub operations {
my $self = shift;
if (@_) {
@$self = @_;
return $self;
};
return @$self;
};
# Normalize compile object
sub normalize {
my $self = shift;
my @compile = $self->operations;
my $mb = $self->builder;
# Check, if the query is a group query,
# which invalidates some compile operations
my $group_query = 0;
my $top_k = 0;
foreach (@compile) {
if ($_->type eq 'group') {
$group_query = 1;
}
elsif ($_->type eq 'limit') {
$top_k = $_->start_index + $_->items_per_page;
};
};
# Add unique sorting per default - unless it's a group query
#unless ($group_query) {
# push @compile,
# $mb->sort_by($mb->s_field(UNIQUE_FIELD));
#
# if (DEBUG) {
# print_log('kq_compile', 'Added unique field ' . UNIQUE_FIELD . ' to order');
# };
#};
# 1. Introduce required information
# e.g. sort(field) => fields(field)
my $sort_filtering = 1;
for (my $i = 0; $i < scalar @compile; $i++) {
# For all sort fields, it may be beneficial to
# retrieve the fields as well - as they need
# to be retrieved nonetheless for search criteria
#if ($compile[$i]->type eq 'sort') {
#
# my $mb = $self->builder;
# push @compile,
# $mb->enrich($mb->e_fields($compile[$i]->fields));
#
# if (DEBUG) {
# print_log('kq_compile', 'Added sorting ' .
# join(',', map {$_->to_string } $compile[$i]->fields) .
# ' to fields');
# };
#}
# There is at least one aggregation field
#els
if ($compile[$i]->type eq 'aggregate') {
$sort_filtering = 0;
}
# There is at least one group option
elsif ($compile[$i]->type eq 'group') {
$sort_filtering = 0;
};
};
# Sort objects based on a defined order
@compile = sort {
$COMPILE_ORDER{$a->type} <=> $COMPILE_ORDER{$b->type}
} @compile;
# 2. Find identical types and merge
# fields+fields => fields
# sort+sort => sort ...
# and take the first value for single values
# start_index=0 + start_index=2 => start_index=0
#
# 3. Remove duplicates
# aggr_freq + aggr_freq => - aggr_freq
for (my $i = 1; $i < @compile; $i++) {
# Consecutive types are identical, join
if ($compile[$i]->type eq $compile[$i-1]->type) {
# Join fields or aggregations
if ($compile[$i]->type eq 'enrich' ||
$compile[$i]->type eq 'aggregate' ||
$compile[$i]->type eq 'sort'
) {
# The first operations have higher precedence
$compile[$i-1]->operations(
$compile[$i-1]->operations,
$compile[$i]->operations
);
# Remove merged object
splice(@compile, $i, 1);
$i--;
}
# TODO:
# Make single field values work
# - start_index
# - count
# Unknown operation
else {
warn 'Unable to deal with unknown compile operation' . $compile[$i]->type;
};
# Don't normalize nonmerged data
CORE::next;
};
# Normalize when no longer consecutive operations
# can be expected
$compile[$i-1] = $compile[$i-1]->normalize;
};
# Normalize last operation
$compile[-1] = $compile[-1]->normalize;
# 4. Optimize
# No aggregation or group queries =>
# add a sort filter to sort
# If a limit is given, add top_k to sort
if ($sort_filtering && $top_k) {
foreach (@compile) {
if ($_->type eq 'sort') {
# Activate sort_filter option
$_->filter(1); # if $sort_filtering;
# Set top_k option!
$_->top_k($top_k) if $top_k;
last;
}
elsif ($_->type eq 'sample') {
# Set top_k option!
$_->top_k($top_k) if $top_k;
last;
};
};
};
# Set operations
$self->operations(@compile);
return $self;
};
# Translate all fields to term ids
sub identify {
my ($self, $dict) = @_;
for (my $i = 0; $i < @$self; $i++) {
$self->[$i] = $self->[$i]->identify($dict);
};
return $self;
};
# Wrap operations in a single query object
sub wrap {
my ($self, $query) = @_;
foreach (reverse $self->operations) {
$query = $_->wrap($query);
};
return $query;
};
sub to_segment {
...
};
sub optimize {
...
};
1;
__END__