lib/Krawfish/Koral.pm - KorAP/Krawfish-prototype - Gitiles

 package Krawfish::Koral;
 use strict;
 use warnings;
 use Role::Tiny::With;
 use Krawfish::Log;
 use Krawfish::Koral::Document;
 use Krawfish::Koral::Query::Builder;
 use Krawfish::Koral::Corpus::Builder;
 use Krawfish::Koral::Compile::Builder;
 use Krawfish::Koral::Compile;

 use Krawfish::Koral::Compile::Node;

 with 'Krawfish::Koral::Report';
 with 'Krawfish::Koral::Result::Inflatable';

 use constant DEBUG => 0;

 # Parse a koral query object and transform to an
 # actual index query.

 # Procession order for query and corpus:
 #   a) parse                            (cluster)
 #   b) normalize and finalize           (cluster)
 #   c) refer (no multiple leaf lifting) (cluster) (or not)
 #   d) inflate (some normalization)     (node)
 #   e) memoize                          (segment)
 #   f) optimize                         (segment)
 #
 # Usage:
 #   $koral = Koral->new;
 #   my $qb = $koral->query_builder;
 #   my $cb = $koral->corpus_builder;
 #   my $mb = $koral->compile_builder;
 #   $koral->compile(
 #     $mb->aggregate(
 #       $mb->a_frequencies,
 #       $mb->a_fields('license'),
 #       $mb->a_fields('corpus'),
 #       $mb->a_length
 #     )->start_index(0)
 #     ->items_per_page(20)
 #     ->sort_by(
 #       $mb->sort_field('author', 1)
 #     )->fields('author')
 #     ->snippet('')
 #   )->query(
 #     $qb->token('aa')
 #   )->corpus(
 #     $cb->string('xx')
 #   );
 #
 #   $koral->to_cluster ... ->to_node($dict) ... ->to_segment($index)

 # TODO:
 #   Filtering needs to be supported multiple times,
 #   so when one filter is applied (virtual corpus),
 #   another one can be filtered before (bin-sorting).

 # TODO:
 #   When a user searches a term in a query,
 #   this should issue an update in the autosuggestion
 #   dictionary.

 sub new {
   my $class = shift;
   my $self = bless {
     query       => undef,  # The query definition
     corpus      => undef,  # The vc definition
     compilation => undef,  # The compile definitions
     matches     => undef,  # List of match IDs
     document    => undef,  # Document data to import
     response    => undef   # Response object
   }, $class;

   return $self unless @_;

   # Expect a hash
   my $koral = shift;

   # Import document
   if ($koral->{document}) {
     $self->{document} = Krawfish::Koral::Document->new($koral->{document});
   };

   return $self;
 };


 # Query part of the Koral object
 sub query {
   my $self = shift;
   if ($_[0]) {
     $self->{query} = shift;
     return $self;
   };
   return $self->{query};
 };


 # Get the query builder
 sub query_builder {
   Krawfish::Koral::Query::Builder->new;
 };


 # Corpus part of the Koral object
 sub corpus {
   my $self = shift;
   if ($_[0]) {
     $self->{corpus} = shift;
     return $self;
   };
   return $self->{corpus};
 };


 # Get the corpus builder
 sub corpus_builder {
   Krawfish::Koral::Corpus::Builder->new;
 };


 # Compile part of the Koral object
 # TODO:
 #   It may be better to have a separation of
 #   - groupBy
 #   - sortBy
 #   - aggregateBy
 #   - enrichBy
 sub compilation {
   my $self = shift;
   if ($_[0]) {
     $self->{compilation} = Krawfish::Koral::Compile->new(@_);
     return $self;
   };
   return $self->{compilation};
 };


 # Get the compile builder
 sub compilation_builder {
   Krawfish::Koral::Compile::Builder->new;
 };


 # Clone the query object
 sub clone {
   ...
 };


 # Create a single query tree
 sub to_query {
   my ($self, $replicant_id) = @_;

   # Build a complete query object
   my $query;
   my $corpus_only = 0;

   # A virtual corpus and a query is given
   if ($self->corpus && $self->query) {

     # Filter query by corpus
     $query = $self->query_builder->filter_by($self->query, $self->corpus);
   }

   # Only a query is given
   elsif ($self->query) {

     # Add corpus filter for live documents
     $query = $self->query_builder->filter_by(
       $self->query,
       $self->corpus_builder->anywhere
     );
   }

   # Only a corpus query is given
   elsif ($self->corpus) {

     # Remember the query is only a corpus query
     $corpus_only = 1;
     $query = $self->corpus;
   }

   # Neither nor - so may be a group query
   else {
     $corpus_only = 1;
     $query = $self->corpus_builder->anywhere;
   };

   # TODO:
   #   Ignore classes in InCorpus(), that have no reference
   #   in corpus. Or spit out a warning!

   # If request is focused on replication, filter to replicates
   if ($replicant_id) {
     $query = $self->query_builder->filter_by(
       $query,
       $self->corpus_builder->replicant_node($replicant_id)
     );
   }

   # Focus on primary data
   else {
     # $query = $self->query_builder->filter_by(
     #   $query,
     #   $self->corpus_builder->primary_node
     # );
   }

   # Normalize the query
   my $query_norm;
   unless ($query_norm = $query->normalize) {
     $self->copy_info_from($query);
     return;
   };

   # Finalize the query
   my $query_final;
   unless ($query_final = $query_norm->finalize) {
     $self->copy_info_from($query);
     return;
   };


   # This is just for testing
   return $query_final unless $self->compilation;

   if ($corpus_only) {
     # TODO:
     #   There is only a corpus query involved,
     #   this may make some compile queries neglectable!
   };

   # Normalize the compile
   my $compile;
   unless ($compile = $self->compilation->normalize) {
     $self->copy_info_from($self->compilation);
     return;
   };

   # Serialize from compile
   return $self->compilation->wrap($query_final);
 };


 # Prepare the query to work on segments
 sub to_segments {
   my ($self, $replicant_id) = @_;

   # Get compilation object
   my $cmp = $self->compilation;

   # Check for a set limit
   my $top_k;
   foreach ($cmp->operations) {
     if ($_->type eq 'limit') {
       $top_k = $_->start_index + $_->items_per_page;
     };
   };

   print_log('koral', 'Add wrapping node query') if DEBUG;

   # Add wrapping node query
   $cmp->add(Krawfish::Koral::Compile::Node->new($top_k));

   return $self->to_query($replicant_id);
 };


 sub inflate {
   ...
 };


 # Serialization of KoralQuery
 sub to_koral_fragment {
   my $self = shift;

   my $koral = {};

   # Set query object
   if ($self->query) {
     $koral->{query} = $self->query->to_koral_fragment
   };

   # Set corpus object
   if ($self->corpus) {
     $koral->{corpus} = $self->corpus->to_koral_fragment
   };

   $self->merge_info($koral);

   return $koral;
 };


 # Stringification
 sub to_string {
   my ($self, $id) = @_;
   my $str = '';

   my @list = ();

   if ($self->compilation) {
     push @list, 'compilation=[' . $self->compilation->to_string($id) . ']';
   };
   if ($self->corpus) {
     push @list, 'corpus=[' . $self->corpus->to_string($id) . ']';
   };
   if ($self->query) {
     push @list, 'query=[' . $self->query->to_string($id) . ']';
   };

   return join(',', @list);
 };


 # Find identical subqueries and replace outer queries with
 # - references or
 # - cached queries
 sub replace_subqueries {
   my ($self, $query) = @_;

   # The reference store will collect signatures of subqueries
   # To replace identical subqueries with reference pointers
   my $refs = {};

   # TODO: Load real cache!
   # The cache is global and will replace subqueries that are
   # already cached
   my $cache = Krawfish::Cache->new;
   $query->replace_subqueries($refs, $cache);

   return $query;
 };


 1;


 __END__


 # TODO:
 #   This is the new entry point!
 sub prepare_for_cluster {
   # ->normalize->finalize->refer
   ...
 };

 sub prepare_for_node {
   # ->identify($dict)
   # WARN! This may require a new normalization, but it should be kept in mind that this
   # also may require double added warnings!
   ...
 };

 sub prepare_for_segment {
   # ->cache->optimize($segment)
   ...
 };


 # This introduces the normalization phase
 # TODO:
 #   It should probably return a Koral::* object, that can be send!
 sub to_nodes {
   my $self = shift;

   # Optionally pass a node id for replication retrieval
   my $replicant_id = shift;

   # Build a complete query object
   my $query;

   # A virtual corpus and a query is given
   if ($self->corpus && $self->query) {

     # Filter query by corpus
     $query = $self->query_builder->filter_by($self->query, $self->corpus);
   }

   # Only a query is given
   elsif ($self->query) {

     print_log('koral', 'Added live document filter') if DEBUG;

     # Add corpus filter for live documents
     $query = $self->query_builder->filter_by(
       $self->query,
       $self->corpus_builder->anywhere
     );
   }

   # Only a corpus query is given
   else {

     # TODO:
     #   This may have influence on the possible compile object!
     $query = $self->corpus;
   };

   # If request is focused on replication, filter to replicates
   if ($replicant_id) {
     $query = $self->query_builder->filter_by(
       $query,
       $self->corpus_builder->replicant_node($replicant_id)
     );
   }

   # Focus on primary data
   else {
     # $query = $self->query_builder->filter_by(
     #   $query,
     #   $self->corpus_builder->primary_node
     # );
   }

   # Normalize the query
   my $query_norm;
   unless ($query_norm = $query->normalize) {
     $self->copy_info_from($query);
     return;
   };

   # Finalize the query
   my $query_final;
   unless ($query_final = $query_norm->finalize) {
     $self->copy_info_from($query);
     return;
   };

   # This is just for testing
   return $query_final unless $self->compilation;

   # Normalize the compile
   my $compile;
   unless ($compile = $self->compilation->normalize) {
     $self->copy_info_from($self->compilation);
     return;
   };

   # Serialize from compile
   return $self->compilation->to_nodes($query_final);
 };


 # TODO:
 #   This is just temporarily, because results are still a mess!
 sub to_segments {
   my ($self, $dict) = @_;
 };
	package Krawfish::Koral;
	use strict;
	use warnings;
	use Role::Tiny::With;
	use Krawfish::Log;
	use Krawfish::Koral::Document;
	use Krawfish::Koral::Query::Builder;
	use Krawfish::Koral::Corpus::Builder;
	use Krawfish::Koral::Compile::Builder;
	use Krawfish::Koral::Compile;

	use Krawfish::Koral::Compile::Node;

	with 'Krawfish::Koral::Report';
	with 'Krawfish::Koral::Result::Inflatable';

	use constant DEBUG => 0;

	# Parse a koral query object and transform to an
	# actual index query.

	# Procession order for query and corpus:
	# a) parse (cluster)
	# b) normalize and finalize (cluster)
	# c) refer (no multiple leaf lifting) (cluster) (or not)
	# d) inflate (some normalization) (node)
	# e) memoize (segment)
	# f) optimize (segment)
	#
	# Usage:
	# $koral = Koral->new;
	# my $qb = $koral->query_builder;
	# my $cb = $koral->corpus_builder;
	# my $mb = $koral->compile_builder;
	# $koral->compile(
	# $mb->aggregate(
	# $mb->a_frequencies,
	# $mb->a_fields('license'),
	# $mb->a_fields('corpus'),
	# $mb->a_length
	# )->start_index(0)
	# ->items_per_page(20)
	# ->sort_by(
	# $mb->sort_field('author', 1)
	# )->fields('author')
	# ->snippet('')
	# )->query(
	# $qb->token('aa')
	# )->corpus(
	# $cb->string('xx')
	# );
	#
	# $koral->to_cluster ... ->to_node($dict) ... ->to_segment($index)

	# TODO:
	# Filtering needs to be supported multiple times,
	# so when one filter is applied (virtual corpus),
	# another one can be filtered before (bin-sorting).

	# TODO:
	# When a user searches a term in a query,
	# this should issue an update in the autosuggestion
	# dictionary.

	sub new {
	my $class = shift;
	my $self = bless {
	query => undef, # The query definition
	corpus => undef, # The vc definition
	compilation => undef, # The compile definitions
	matches => undef, # List of match IDs
	document => undef, # Document data to import
	response => undef # Response object
	}, $class;

	return $self unless @_;

	# Expect a hash
	my $koral = shift;

	# Import document
	if ($koral->{document}) {
	$self->{document} = Krawfish::Koral::Document->new($koral->{document});
	};

	return $self;
	};


	# Query part of the Koral object
	sub query {
	my $self = shift;
	if ($_[0]) {
	$self->{query} = shift;
	return $self;
	};
	return $self->{query};
	};


	# Get the query builder
	sub query_builder {
	Krawfish::Koral::Query::Builder->new;
	};


	# Corpus part of the Koral object
	sub corpus {
	my $self = shift;
	if ($_[0]) {
	$self->{corpus} = shift;
	return $self;
	};
	return $self->{corpus};
	};


	# Get the corpus builder
	sub corpus_builder {
	Krawfish::Koral::Corpus::Builder->new;
	};


	# Compile part of the Koral object
	# TODO:
	# It may be better to have a separation of
	# - groupBy
	# - sortBy
	# - aggregateBy
	# - enrichBy
	sub compilation {
	my $self = shift;
	if ($_[0]) {
	$self->{compilation} = Krawfish::Koral::Compile->new(@_);
	return $self;
	};
	return $self->{compilation};
	};


	# Get the compile builder
	sub compilation_builder {
	Krawfish::Koral::Compile::Builder->new;
	};


	# Clone the query object
	sub clone {
	...
	};


	# Create a single query tree
	sub to_query {
	my ($self, $replicant_id) = @_;

	# Build a complete query object
	my $query;
	my $corpus_only = 0;

	# A virtual corpus and a query is given
	if ($self->corpus && $self->query) {

	# Filter query by corpus
	$query = $self->query_builder->filter_by($self->query, $self->corpus);
	}

	# Only a query is given
	elsif ($self->query) {

	# Add corpus filter for live documents
	$query = $self->query_builder->filter_by(
	$self->query,
	$self->corpus_builder->anywhere
	);
	}

	# Only a corpus query is given
	elsif ($self->corpus) {

	# Remember the query is only a corpus query
	$corpus_only = 1;
	$query = $self->corpus;
	}

	# Neither nor - so may be a group query
	else {
	$corpus_only = 1;
	$query = $self->corpus_builder->anywhere;
	};

	# TODO:
	# Ignore classes in InCorpus(), that have no reference
	# in corpus. Or spit out a warning!

	# If request is focused on replication, filter to replicates
	if ($replicant_id) {
	$query = $self->query_builder->filter_by(
	$query,
	$self->corpus_builder->replicant_node($replicant_id)
	);
	}

	# Focus on primary data
	else {
	# $query = $self->query_builder->filter_by(
	# $query,
	# $self->corpus_builder->primary_node
	# );
	}

	# Normalize the query
	my $query_norm;
	unless ($query_norm = $query->normalize) {
	$self->copy_info_from($query);
	return;
	};

	# Finalize the query
	my $query_final;
	unless ($query_final = $query_norm->finalize) {
	$self->copy_info_from($query);
	return;
	};


	# This is just for testing
	return $query_final unless $self->compilation;

	if ($corpus_only) {
	# TODO:
	# There is only a corpus query involved,
	# this may make some compile queries neglectable!
	};

	# Normalize the compile
	my $compile;
	unless ($compile = $self->compilation->normalize) {
	$self->copy_info_from($self->compilation);
	return;
	};

	# Serialize from compile
	return $self->compilation->wrap($query_final);
	};


	# Prepare the query to work on segments
	sub to_segments {
	my ($self, $replicant_id) = @_;

	# Get compilation object
	my $cmp = $self->compilation;

	# Check for a set limit
	my $top_k;
	foreach ($cmp->operations) {
	if ($_->type eq 'limit') {
	$top_k = $_->start_index + $_->items_per_page;
	};
	};

	print_log('koral', 'Add wrapping node query') if DEBUG;

	# Add wrapping node query
	$cmp->add(Krawfish::Koral::Compile::Node->new($top_k));

	return $self->to_query($replicant_id);
	};



	sub inflate {
	...
	};


	# Serialization of KoralQuery
	sub to_koral_fragment {
	my $self = shift;

	my $koral = {};

	# Set query object
	if ($self->query) {
	$koral->{query} = $self->query->to_koral_fragment
	};

	# Set corpus object
	if ($self->corpus) {
	$koral->{corpus} = $self->corpus->to_koral_fragment
	};

	$self->merge_info($koral);

	return $koral;
	};


	# Stringification
	sub to_string {
	my ($self, $id) = @_;
	my $str = '';

	my @list = ();

	if ($self->compilation) {
	push @list, 'compilation=[' . $self->compilation->to_string($id) . ']';
	};
	if ($self->corpus) {
	push @list, 'corpus=[' . $self->corpus->to_string($id) . ']';
	};
	if ($self->query) {
	push @list, 'query=[' . $self->query->to_string($id) . ']';
	};

	return join(',', @list);
	};



	# Find identical subqueries and replace outer queries with
	# - references or
	# - cached queries
	sub replace_subqueries {
	my ($self, $query) = @_;

	# The reference store will collect signatures of subqueries
	# To replace identical subqueries with reference pointers
	my $refs = {};

	# TODO: Load real cache!
	# The cache is global and will replace subqueries that are
	# already cached
	my $cache = Krawfish::Cache->new;
	$query->replace_subqueries($refs, $cache);

	return $query;
	};



	1;


	__END__



	# TODO:
	# This is the new entry point!
	sub prepare_for_cluster {
	# ->normalize->finalize->refer
	...
	};

	sub prepare_for_node {
	# ->identify($dict)
	# WARN! This may require a new normalization, but it should be kept in mind that this
	# also may require double added warnings!
	...
	};

	sub prepare_for_segment {
	# ->cache->optimize($segment)
	...
	};



	# This introduces the normalization phase
	# TODO:
	# It should probably return a Koral::* object, that can be send!
	sub to_nodes {
	my $self = shift;

	# Optionally pass a node id for replication retrieval
	my $replicant_id = shift;

	# Build a complete query object
	my $query;

	# A virtual corpus and a query is given
	if ($self->corpus && $self->query) {

	# Filter query by corpus
	$query = $self->query_builder->filter_by($self->query, $self->corpus);
	}

	# Only a query is given
	elsif ($self->query) {

	print_log('koral', 'Added live document filter') if DEBUG;

	# Add corpus filter for live documents
	$query = $self->query_builder->filter_by(
	$self->query,
	$self->corpus_builder->anywhere
	);
	}

	# Only a corpus query is given
	else {

	# TODO:
	# This may have influence on the possible compile object!
	$query = $self->corpus;
	};

	# If request is focused on replication, filter to replicates
	if ($replicant_id) {
	$query = $self->query_builder->filter_by(
	$query,
	$self->corpus_builder->replicant_node($replicant_id)
	);
	}

	# Focus on primary data
	else {
	# $query = $self->query_builder->filter_by(
	# $query,
	# $self->corpus_builder->primary_node
	# );
	}

	# Normalize the query
	my $query_norm;
	unless ($query_norm = $query->normalize) {
	$self->copy_info_from($query);
	return;
	};

	# Finalize the query
	my $query_final;
	unless ($query_final = $query_norm->finalize) {
	$self->copy_info_from($query);
	return;
	};

	# This is just for testing
	return $query_final unless $self->compilation;

	# Normalize the compile
	my $compile;
	unless ($compile = $self->compilation->normalize) {
	$self->copy_info_from($self->compilation);
	return;
	};

	# Serialize from compile
	return $self->compilation->to_nodes($query_final);
	};




	# TODO:
	# This is just temporarily, because results are still a mess!
	sub to_segments {
	my ($self, $dict) = @_;
	};