blob: 7a2dec551d317a4a2f40a974f4b28bb7f50a2b9e [file] [log] [blame]
package Krawfish::Koral::Meta;
use parent 'Krawfish::Info';
use Krawfish::Log;
use Krawfish::Result::Sort::Filter;
use Krawfish::Result::Sort::PriorityCascade;
use Krawfish::Result::Limit;
use Krawfish::Result::Segment::Aggregate;
use Krawfish::Result::Segment::Aggregate::Facets;
use Krawfish::Result::Segment::Aggregate::Count;
use Krawfish::Result::Segment::Aggregate::Length;
use Krawfish::Result::Segment::Aggregate::Values;
use strict;
use warnings;
use constant {
DEBUG => 1,
UNIQUE_FIELD => 'id'
};
sub new {
my $class = shift;
bless {
query => undef,
items_per_page => undef,
field_sort => [],
field_count => undef,
facets => undef,
count => undef,
start_index => 0,
max_doc_rank_ref => \(my $init = 0)
}, $class;
};
# Nest the query
sub search_for {
my ($self, $query) = @_;
$self->{query} = $query;
return $self;
};
#sub fields;
sub items_per_page {
my $self = shift;
return $self->{items_per_page} unless @_;
$self->{items_per_page} = shift;
return $self;
};
sub start_index {
my $self = shift;
return $self->{start_index} unless @_;
$self->{start_index} = shift;
return $self;
};
sub facets {
my $self = shift;
return $self->{facets} unless @_;
$self->{facets} = [@_];
return $self;
};
# Count doc_freq and freq
sub count {
my $self = shift;
return $self->{count} unless @_;
$self->{count} = shift;
return $self;
};
# Get lengths of results
sub length {
my $self = shift;
return $self->{length} unless @_;
$self->{length} = shift;
return $self;
};
sub prepare_for {
shift->plan_for(@_);
};
# Check if the meta query is filterable
sub sort_filter {
my ($self, $query, $index) = @_;
# No sort defined
return $query unless $self->{field_sort};
# Sort is not restricted
return $query unless $self->{items_per_page};
# Filtering not applicable because
# all matches need to be found
if ($self->{facets} ||
$self->{field_count} ||
$self->{count} ||
$self->{length}) {
return $query;
};
# Get first run field
my ($field, $desc) = @{$self->{field_sort}->[0]};
# Create rank filter
$query = Krawfish::Result::Sort::Filter->new(
query => $query,
max_rank_ref => $self->max_doc_rank_ref,
field => $field,
desc => $desc,
index => $index
);
print_log('kq_meta', 'Query is qualified for sort filtering') if DEBUG;
return $query;
};
# Return max_doc_rank reference
sub max_doc_rank_ref {
my $self = shift;
# Set value to reference
${$self->{max_doc_rank_ref}} = shift if @_;
return $self->{max_doc_rank_ref};
};
sub plan_for {
my ($self, $index) = @_;
# Get the query
my $query = $self->{query} or return;
# TODO:
# The dictionary should also have a max_rank!
# Get the maximum rank for fields, aka the document number
# and init the shared value for faster filtering
my $max_doc_rank_ref = $self->max_doc_rank_ref($index->max_rank);
# Prepare the nested query
$query = $query->prepare_for($index);
my @aggr;
# Add facets to the result
if ($self->{facets}) {
# This should have more parameters, like count
foreach (@{$self->{facets}}) {
push @aggr, Krawfish::Result::Segment::Aggregate::Facets->new($index, $_);
};
};
# Count field values
if ($self->{field_count}) {
# This should have more parameters, like count
foreach (@{$self->{field_count}}) {
push @aggr, Krawfish::Result::Segment::Aggregate::Values->new($index, $_);
};
};
# Add frequency and document frequency count to result
# TODO:
# This may be obsolete in some cases, because other aggregations already
# count frequencies.
if ($self->{count}) {
push @aggr, Krawfish::Result::Segment::Aggregate::Count->new;
};
if ($self->{length}) {
push @aggr, Krawfish::Result::Segment::Aggregate::Length->new;
};
# Augment the query with aggregations
# TODO:
# It may be better to have one aggregation object, that can be filled!
# like ->query($query)->aggregate_on($aggr)->prepare_for($index);
# and after the query is through, the aggregation map contains data
if (@aggr) {
$query = Krawfish::Result::Segment::Aggregate->new($query, \@aggr);
};
# Sort the result
# This is mandatory!
# Precalculate top_k value
my $top_k = undef;
if ($self->items_per_page) {
# Top k is defined
$top_k = $self->items_per_page + ($self->start_index // 0);
};
# TODO:
# Check for fields that are either not part
# of the index or are identified in
# the corpus query (it makes no sense to
# sort for author, if author=Fontane is
# required)
$query = Krawfish::Result::Sort::PriorityCascade->new(
query => $query,
index => $index,
fields => $self->{field_sort},
unique => UNIQUE_FIELD,
top_k => $top_k,
max_rank_ref => $max_doc_rank_ref
);
print_log('kq_meta', "Field sort with: " . $query->to_string) if DEBUG;
# Limit the result
if ($self->items_per_page || $self->start_index) {
$query = Krawfish::Result::Limit->new(
$query,
$self->start_index,
$self->items_per_page
);
};
# The order needs to be:
# snippet(
# fields(
# limit( -
# sorted( -
# faceted( -
# count(Q) -
# )
# )
# )
# )
# )
# Return the query
return $query;
};
1;
__END__