blob: 8a80d72e848a0395be7d9530e5f57803fd166a1f [file] [log] [blame]
package Krawfish::Koral;
use strict;
use warnings;
use Role::Tiny::With;
use Krawfish::Log;
use Krawfish::Koral::Document;
use Krawfish::Koral::Query::Builder;
use Krawfish::Koral::Corpus::Builder;
use Krawfish::Koral::Compile::Builder;
use Krawfish::Koral::Compile;
use Krawfish::Koral::Compile::Node;
with 'Krawfish::Koral::Report';
with 'Krawfish::Koral::Result::Inflatable';
use constant DEBUG => 0;
# Parse a koral query object and transform to an
# actual index query.
# Procession order for query and corpus:
# a) parse (cluster)
# b) normalize and finalize (cluster)
# c) refer (no multiple leaf lifting) (cluster) (or not)
# d) inflate (some normalization) (node)
# e) memoize (segment)
# f) optimize (segment)
#
# Usage:
# $koral = Koral->new;
# my $qb = $koral->query_builder;
# my $cb = $koral->corpus_builder;
# my $mb = $koral->compile_builder;
# $koral->compile(
# $mb->aggregate(
# $mb->a_frequencies,
# $mb->a_fields('license'),
# $mb->a_fields('corpus'),
# $mb->a_length
# )->start_index(0)
# ->items_per_page(20)
# ->sort_by(
# $mb->sort_field('author', 1)
# )->fields('author')
# ->snippet('')
# )->query(
# $qb->token('aa')
# )->corpus(
# $cb->string('xx')
# );
#
# $koral->to_cluster ... ->to_node($dict) ... ->to_segment($index)
# TODO:
# Filtering needs to be supported multiple times,
# so when one filter is applied (virtual corpus),
# another one can be filtered before (bin-sorting).
# TODO:
# When a user searches a term in a query,
# this should issue an update in the autosuggestion
# dictionary.
sub new {
my $class = shift;
my $self = bless {
query => undef, # The query definition
corpus => undef, # The vc definition
compilation => undef, # The compile definitions
matches => undef, # List of match IDs
document => undef, # Document data to import
response => undef # Response object
}, $class;
return $self unless @_;
# Expect a hash
my $koral = shift;
# Import document
if ($koral->{document}) {
$self->{document} = Krawfish::Koral::Document->new($koral->{document});
};
return $self;
};
# Query part of the Koral object
sub query {
my $self = shift;
if ($_[0]) {
$self->{query} = shift;
return $self;
};
return $self->{query};
};
# Get the query builder
sub query_builder {
Krawfish::Koral::Query::Builder->new;
};
# Corpus part of the Koral object
sub corpus {
my $self = shift;
if ($_[0]) {
$self->{corpus} = shift;
return $self;
};
return $self->{corpus};
};
# Get the corpus builder
sub corpus_builder {
Krawfish::Koral::Corpus::Builder->new;
};
# Compile part of the Koral object
# TODO:
# It may be better to have a separation of
# - groupBy
# - sortBy
# - aggregateBy
# - enrichBy
sub compilation {
my $self = shift;
if ($_[0]) {
$self->{compilation} = Krawfish::Koral::Compile->new(@_);
return $self;
};
return $self->{compilation};
};
# Get the compile builder
sub compilation_builder {
Krawfish::Koral::Compile::Builder->new;
};
# Clone the query object
sub clone {
...
};
# Create a single query tree
sub to_query {
my ($self, $replicant_id) = @_;
# Build a complete query object
my $query;
my $corpus_only = 0;
# A virtual corpus and a query is given
if ($self->corpus && $self->query) {
# Filter query by corpus
$query = $self->query_builder->filter_by($self->query, $self->corpus);
}
# Only a query is given
elsif ($self->query) {
# Add corpus filter for live documents
$query = $self->query_builder->filter_by(
$self->query,
$self->corpus_builder->anywhere
);
}
# Only a corpus query is given
elsif ($self->corpus) {
# Remember the query is only a corpus query
$corpus_only = 1;
$query = $self->corpus;
}
# Neither nor - so may be a group query
else {
$corpus_only = 1;
$query = $self->corpus_builder->anywhere;
};
# TODO:
# Ignore classes in InCorpus(), that have no reference
# in corpus. Or spit out a warning!
# If request is focused on replication, filter to replicates
if ($replicant_id) {
$query = $self->query_builder->filter_by(
$query,
$self->corpus_builder->replicant_node($replicant_id)
);
}
# Focus on primary data
else {
# $query = $self->query_builder->filter_by(
# $query,
# $self->corpus_builder->primary_node
# );
}
# Normalize the query
my $query_norm;
unless ($query_norm = $query->normalize) {
$self->copy_info_from($query);
return;
};
# Finalize the query
my $query_final;
unless ($query_final = $query_norm->finalize) {
$self->copy_info_from($query);
return;
};
# This is just for testing
return $query_final unless $self->compilation;
if ($corpus_only) {
# TODO:
# There is only a corpus query involved,
# this may make some compile queries neglectable!
};
# Normalize the compile
my $compile;
unless ($compile = $self->compilation->normalize) {
$self->copy_info_from($self->compilation);
return;
};
# Serialize from compile
return $self->compilation->wrap($query_final);
};
# Prepare the query to work on segments
sub to_segments {
my ($self, $replicant_id) = @_;
# Get compilation object
my $cmp = $self->compilation;
# Check for a set limit
my $top_k;
foreach ($cmp->operations) {
if ($_->type eq 'limit') {
$top_k = $_->start_index + $_->items_per_page;
};
};
print_log('koral', 'Add wrapping node query') if DEBUG;
# Add wrapping node query
$cmp->add(Krawfish::Koral::Compile::Node->new($top_k));
return $self->to_query($replicant_id);
};
sub inflate {
...
};
# Serialization of KoralQuery
sub to_koral_fragment {
my $self = shift;
my $koral = {};
# Set query object
if ($self->query) {
$koral->{query} = $self->query->to_koral_fragment
};
# Set corpus object
if ($self->corpus) {
$koral->{corpus} = $self->corpus->to_koral_fragment
};
$self->merge_info($koral);
return $koral;
};
# Stringification
sub to_string {
my ($self, $id) = @_;
my $str = '';
my @list = ();
if ($self->compilation) {
push @list, 'compilation=[' . $self->compilation->to_string($id) . ']';
};
if ($self->corpus) {
push @list, 'corpus=[' . $self->corpus->to_string($id) . ']';
};
if ($self->query) {
push @list, 'query=[' . $self->query->to_string($id) . ']';
};
return join(',', @list);
};
# Find identical subqueries and replace outer queries with
# - references or
# - cached queries
sub replace_subqueries {
my ($self, $query) = @_;
# The reference store will collect signatures of subqueries
# To replace identical subqueries with reference pointers
my $refs = {};
# TODO: Load real cache!
# The cache is global and will replace subqueries that are
# already cached
my $cache = Krawfish::Cache->new;
$query->replace_subqueries($refs, $cache);
return $query;
};
1;
__END__
# TODO:
# This is the new entry point!
sub prepare_for_cluster {
# ->normalize->finalize->refer
...
};
sub prepare_for_node {
# ->identify($dict)
# WARN! This may require a new normalization, but it should be kept in mind that this
# also may require double added warnings!
...
};
sub prepare_for_segment {
# ->cache->optimize($segment)
...
};
# This introduces the normalization phase
# TODO:
# It should probably return a Koral::* object, that can be send!
sub to_nodes {
my $self = shift;
# Optionally pass a node id for replication retrieval
my $replicant_id = shift;
# Build a complete query object
my $query;
# A virtual corpus and a query is given
if ($self->corpus && $self->query) {
# Filter query by corpus
$query = $self->query_builder->filter_by($self->query, $self->corpus);
}
# Only a query is given
elsif ($self->query) {
print_log('koral', 'Added live document filter') if DEBUG;
# Add corpus filter for live documents
$query = $self->query_builder->filter_by(
$self->query,
$self->corpus_builder->anywhere
);
}
# Only a corpus query is given
else {
# TODO:
# This may have influence on the possible compile object!
$query = $self->corpus;
};
# If request is focused on replication, filter to replicates
if ($replicant_id) {
$query = $self->query_builder->filter_by(
$query,
$self->corpus_builder->replicant_node($replicant_id)
);
}
# Focus on primary data
else {
# $query = $self->query_builder->filter_by(
# $query,
# $self->corpus_builder->primary_node
# );
}
# Normalize the query
my $query_norm;
unless ($query_norm = $query->normalize) {
$self->copy_info_from($query);
return;
};
# Finalize the query
my $query_final;
unless ($query_final = $query_norm->finalize) {
$self->copy_info_from($query);
return;
};
# This is just for testing
return $query_final unless $self->compilation;
# Normalize the compile
my $compile;
unless ($compile = $self->compilation->normalize) {
$self->copy_info_from($self->compilation);
return;
};
# Serialize from compile
return $self->compilation->to_nodes($query_final);
};
# TODO:
# This is just temporarily, because results are still a mess!
sub to_segments {
my ($self, $dict) = @_;
};