Fixed koral stringification and improved cluster architecture
diff --git a/lib/Krawfish/Cluster.pm b/lib/Krawfish/Cluster.pm
new file mode 100644
index 0000000..9c0c470
--- /dev/null
+++ b/lib/Krawfish/Cluster.pm
@@ -0,0 +1,15 @@
+package Krawfish::Cluster;
+use strict;
+use warnings;
+
+# Krawfish::Cluster queries to multiple nodes
+# and takes care of failures in responses
+
+sub new {
+ my $class = shift;
+ bless {
+ nodes => []
+ }, $class;
+};
+
+1;
diff --git a/lib/Krawfish/Controller/Index.pm b/lib/Krawfish/Controller/Index.pm
index 6964305..9a1cbcc 100644
--- a/lib/Krawfish/Controller/Index.pm
+++ b/lib/Krawfish/Controller/Index.pm
@@ -1,5 +1,7 @@
package Krawfish::Controller::Index;
use Mojo::Base 'Mojolicious::Controller';
+use Krawfish::Cluster;
+use Krawfish::Koral;
use strict;
use warnings;
@@ -9,4 +11,70 @@
$c->render(json => $kq);
};
+sub search {
+ my $c = shift;
+ my $json = $c->req->body->json;
+
+ # TODO:
+ # This is just conceptually for the moment
+
+ # Read koral from json input
+ $koral = Krawfish::Koral->new;
+
+ # There is something seriously wrong
+ unless ($koral->from_koral_query($json)) {
+ # Query can't be serialized!
+ return $c->reply->exception('Unable to parse KoralQuery');
+ };
+
+ # Create a response object
+ my $response = $koral->clone;
+ # TODO:
+ # Or clone on normalization???
+
+ # Prepare passed query to nodes
+ my $node_koral = $koral->to_nodes;
+
+ # Something went wrong during normalization
+ unless ($node_koral) {
+ $response->copy_info_from($koral);
+ return $c->render(json => $response);
+ };
+
+ # Nothing matches
+ if ($node_koral->is_nothing) {
+ $response->copy_info_from($koral);
+ warn 'Matches nowhere - no reason to send to nodes';
+ return $c->render(json => $response->to_koral_query);
+ };
+
+ # Get nodes object
+ my $cluster = Krawfish::Cluster->new;
+
+ # Send to all nodes
+ $node_koral->send(
+ $cluster => (
+
+ # This sub will be triggered for each node
+ sub {
+ my ($query, $node) = @_;
+
+ # Process the head data
+ $query->process_head($node->response->head);
+ },
+
+ # This sub will triggered after all nodes were passed
+ sub {
+ my $query = shift;
+
+ # Add result to response
+ $response->{response} = $query->to_response;
+
+ # Return koral query response
+ return $c->render(json => $response->to_koral_query);
+ }
+ )
+ );
+};
+
1;
diff --git a/lib/Krawfish/Koral.pm b/lib/Krawfish/Koral.pm
index 87da869..59cf7bd 100644
--- a/lib/Krawfish/Koral.pm
+++ b/lib/Krawfish/Koral.pm
@@ -5,6 +5,7 @@
use Krawfish::Koral::Query::Builder;
use Krawfish::Koral::Corpus::Builder;
use Krawfish::Koral::Meta::Builder;
+use Krawfish::Koral::Meta;
use Krawfish::Koral::Document;
# Parse a koral query and transform to an actual
@@ -62,10 +63,12 @@
sub new {
my $class = shift;
my $self = bless {
- query => undef,
- corpus => undef,
- meta => undef,
- document => undef
+ query => undef, # The query definition
+ corpus => undef, # The vc definition
+ matches => undef, # List of match IDs
+ meta => undef, # The meta definitions
+ document => undef, # Document data to import
+ response => undef # Response object
}, $class;
return $self unless @_;
@@ -93,7 +96,7 @@
};
-# Get the builder
+# Get the query builder
sub query_builder {
Krawfish::Koral::Query::Builder->new;
};
@@ -110,7 +113,7 @@
};
-# Get the builder
+# Get the corpus builder
sub corpus_builder {
Krawfish::Koral::Corpus::Builder->new;
};
@@ -120,24 +123,19 @@
sub meta {
my $self = shift;
if ($_[0]) {
- $self->{meta} = shift;
+ $self->{meta} = Krawfish::Koral::Meta->new(@_);
return $self;
};
return $self->{meta};
};
-# Get the builder
+# Get the meta builder
sub meta_builder {
Krawfish::Koral::Meta::Builder->new;
};
-sub sorting {
- ...
-};
-
-
# sub response { ... };
sub from_koral_query {
@@ -145,6 +143,75 @@
};
+# Clone the query object
+sub clone {
+ ...
+};
+
+# This introduces the normalization phase
+sub to_nodes {
+ my $self = shift;
+
+ # Build a complete query object
+ my $query;
+
+ # A virtual corpus and a query is given
+ if ($self->corpus && $self->query) {
+
+ # Filter query by corpus
+ $query = $self->query_builder->filter_by($self->query, $self->corpus);
+ }
+
+ # Only a query is given
+ elsif ($self->query) {
+
+ # Add corpus filter for live documents
+ $query = $self->query_builder->filter_by(
+ $self->query,
+ $self->corpus_builder->any
+ );
+ }
+
+ # Only a corpus query is given
+ else {
+
+ # TODO:
+ # This may have influence on the possible meta object!
+ $query = $self->corpus;
+ };
+
+ # Normalize the query
+ my $query_norm;
+ unless ($query_norm = $query->normalize) {
+ $self->copy_info_from($query);
+ return;
+ };
+
+ # Finalize the query
+ my $query_final;
+ unless ($query_final = $query_norm->normalize) {
+ $self->copy_info_from($query);
+ return;
+ };
+
+ # This is just for testing
+ return $query_final unless $self->meta;
+
+ # Normalize the meta
+ my $meta;
+ unless ($meta = $self->meta->normalize) {
+ $self->copy_info_from($self->meta);
+ return;
+ };
+
+ # Serialize from meta
+ return $self->meta->to_nodes($query_final);
+};
+
+
+
+
+
# Serialization of KoralQuery
sub to_koral_query {
my $self = shift;
@@ -170,140 +237,10 @@
-# This introduces the normalization phase
-sub to_nodes {
- my $self = shift;
-
- my $query;
-
- # A virtual corpus and a query is given
- if ($self->corpus && $self->query) {
-
- # Filter query by corpus
- $query = $self->query_builder->filter_by($self->query, $self->corpus);
- }
-
- # Only a query is given
- elsif ($self->query) {
-
- # Add corpus filter for live documents
- $query = $self->query_builder->filter_by(
- $self->query,
- $self->corpus_builder->any
- );
- }
-
- # Only corpus query is given
- else {
- $query = $self->corpus;
- };
-
- # Normalize the query
- my $query_norm;
- unless ($query_norm = $query->normalize) {
- $self->copy_info_from($query);
- return;
- };
-
- # Finalize the query
- my $query_final;
- unless ($query_final = $query_norm->normalize) {
- $self->copy_info_from($query);
- return;
- };
-
- # Normalize the meta
- my $meta;
- unless ($meta = $self->meta->normalize) {
- $self->copy_info_from($self->meta);
- return;
- };
-
- # Get serialization for nodes
- return $self->meta->to_nodes($query_final);
-};
-# Normalize object
-sub normalize {
- my $self = shift;
-
- my $query;
-
- # Corpus and query are given - filter!
- if ($self->query && $self->corpus) {
-
- my $corpus = $self->corpus;
-
- # Add corpus filter
- $query = $self->query_builder->filter_by($self->query, $self->corpus);
-
- # Meta is defined
- if ($self->meta) {
-
- # TODO: Make this a filter query!
-
- # Wrap in sort filter if available
- $corpus = $self->meta->sort_filter($corpus);
- };
- }
-
- # Only corpus is given
- elsif ($self->corpus) {
- $query = $self->corpus;
-
- # Wrap in sort filter if available
-
- # TODO:
- # $query = $self->meta->sort_filter($query, $index) if $self->meta;
- }
-
- # Only query is given
- elsif ($self->query) {
-
- # Add corpus filter for live documents
- $query = $self->query_builder->filter_by(
- $self->query,
- $self->corpus_builder->any
- );
- };
-
-
- # If meta is defined, prepare results
- if ($self->meta) {
- $query = $self->meta->search_for($query);
- };
-
- # TODO:
- # The following operations will invalidate sort filtering:
- # - grouping
- # - aggregate (except result is already cached)
-
- # TODO:
- # if ($self->sorting && $self->sorting->filter) {
- # # Filter matches using a sort filter
- # $query = $self->query->filter_by($self->sorting->filter);
- # };
-
- # TODO:
- # - Find identical subqueries
- # - This is especially useful for VC filtering,
- # - Terms (PostingsList) will automatically avoid
- # lifting posting lists multiple times.
- #
- # That means: create a buffered version of $self->corpus
- #
- # TODO: Make this part of ->plan_for($index, $refs)
- #
- # $query->replace_references;
-
- # Prepare query
- return $query->normalize;
-};
-
-
# Get KoralQuery with results
sub to_result {
my ($self, $index) = @_;
@@ -452,33 +389,83 @@
my $self = shift;
my $str = '';
- # TODO:
- # Post normalization, this will only have
- # a query part, otherwise it will have
- # a corpus and a meta part
+ my @list = ();
- if ($self->corpus && $self->query) {
- $str .= 'filter(';
- $str .= $self->query->to_string;
- $str .= ',';
- $str .= $self->corpus->to_string;
- $str .= ')';
- }
- elsif ($self->corpus) {
- $str .= $self->corpus->to_string;
- }
- elsif ($self->query) {
- $str .= $self->query->to_string;
+ if ($self->meta) {
+ push @list, 'meta=[' . $self->meta->to_string . ']';
+ };
+ if ($self->corpus) {
+ push @list, 'corpus=[' . $self->corpus->to_string . ']';
+ };
+ if ($self->query) {
+ push @list, 'query=[' . $self->query->to_string . ']';
};
- # warn 'Stringification is not well defined';
- # TODO:
- # introduce ->normalize etc.
-
- return $str;
+ return join(',', @list);
};
1;
__END__
+
+
+sub sorting {
+ ...
+};
+
+
+# Normalize object
+sub normalize {
+ my $self = shift;
+
+ my $query;
+
+ # Corpus and query are given - filter!
+ if ($self->query && $self->corpus) {
+
+ my $corpus = $self->corpus;
+
+ # Add corpus filter
+ $query = $self->query_builder->filter_by($self->query, $self->corpus);
+
+ # Meta is defined
+ if ($self->meta) {
+
+ # TODO: Make this a filter query!
+
+ # Wrap in sort filter if available
+ $corpus = $self->meta->sort_filter($corpus);
+ };
+ }
+
+ # Only corpus is given
+ elsif ($self->corpus) {
+ $query = $self->corpus;
+
+ # Wrap in sort filter if available
+
+ # TODO:
+ # $query = $self->meta->sort_filter($query, $index) if $self->meta;
+ }
+
+ # Only query is given
+ elsif ($self->query) {
+
+ # Add corpus filter for live documents
+ $query = $self->query_builder->filter_by(
+ $self->query,
+ $self->corpus_builder->any
+ );
+ };
+
+
+ # If meta is defined, prepare results
+ if ($self->meta) {
+ $query = $self->meta->search_for($query);
+ };
+
+
+ # Prepare query
+ return $query->normalize;
+};
diff --git a/lib/Krawfish/Koral/Meta.pm b/lib/Krawfish/Koral/Meta.pm
index 3440e48..d74d44d 100644
--- a/lib/Krawfish/Koral/Meta.pm
+++ b/lib/Krawfish/Koral/Meta.pm
@@ -59,7 +59,7 @@
# 1. Introduce required information
# e.g. sort(field) => fields(field)
- my $aggregation = 0;
+ my $sort_filtering = 1;
for (my $i = 0; $i < @meta; $i++) {
# For all sort fields, it may be beneficial to
@@ -72,7 +72,12 @@
# There is at least one aggregation field
elsif ($meta[$i]->type eq 'aggregate') {
- $aggregation = 1;
+ $sort_filtering = 0;
+ }
+
+ # There is at least one group option
+ elsif ($meta[$i]->type eq 'group') {
+ $sort_filtering = 0;
}
# Remove any given sortfilter
@@ -141,9 +146,9 @@
$meta[-1] = $meta[-1]->normalize;
# 4. Optimize
- # No aggregation queries =>
+ # No aggregation or group queries =>
# add a sort filter at the end
- unless ($aggregation) {
+ if ($sort_filtering) {
push @meta, Krawfish::Koral::Meta::SortFilter->new;
};
@@ -153,17 +158,29 @@
return $self;
};
-# Create a Krawfish::Result::Meta::Node::* query
+
+# This will create a Krawfish::Result::Node::* query
sub to_nodes {
my ($self, $query) = @_;
# TODO:
# Don't forget the warnings etc.
+ # The order is probably:
+ # snippets(fields(aggregate(limit(sort()))))
+
# The meta query is expected to be normalized
- foreach (reverse $self->operands) {
+ foreach (reverse $self->operations) {
$query = $_->to_nodes($query);
};
+
+ return $query;
+};
+
+
+sub to_segment {
+ my ($self, $index) = @_;
+ ...
};
diff --git a/lib/Krawfish/Koral/Meta/Aggregate.pm b/lib/Krawfish/Koral/Meta/Aggregate.pm
index 9d0043f..69fa2c1 100644
--- a/lib/Krawfish/Koral/Meta/Aggregate.pm
+++ b/lib/Krawfish/Koral/Meta/Aggregate.pm
@@ -1,4 +1,5 @@
package Krawfish::Koral::Meta::Aggregate;
+use Krawfish::Result::Node::Aggregate;
use List::MoreUtils qw/uniq/;
use strict;
use warnings;
@@ -34,8 +35,7 @@
sub to_nodes {
my ($self, $query) = @_;
- warn 'TODO';
- return $query;
+ return Krawfish::Result::Node::Aggregate->new($query, [$self->operations]);
};
diff --git a/lib/Krawfish/Koral/Meta/Fields.pm b/lib/Krawfish/Koral/Meta/Fields.pm
index 3792c7e..db5bf20 100644
--- a/lib/Krawfish/Koral/Meta/Fields.pm
+++ b/lib/Krawfish/Koral/Meta/Fields.pm
@@ -1,4 +1,5 @@
package Krawfish::Koral::Meta::Fields;
+use Krawfish::Result::Node::Fields;
use Krawfish::Util::String qw/squote/;
use List::MoreUtils qw/uniq/;
use strict;
@@ -38,10 +39,11 @@
};
+# TODO:
+# For the moment, I am not sure where "fields" act
sub to_nodes {
my ($self, $query) = @_;
- warn 'TODO';
- return $query;
+ return Krawfish::Result::Node::Fields->new($query, [$self->operations]);
};
diff --git a/lib/Krawfish/Koral/Meta/Sort.pm b/lib/Krawfish/Koral/Meta/Sort.pm
index fa6c3e7..b5ef039 100644
--- a/lib/Krawfish/Koral/Meta/Sort.pm
+++ b/lib/Krawfish/Koral/Meta/Sort.pm
@@ -1,4 +1,5 @@
package Krawfish::Koral::Meta::Sort;
+use Krawfish::Result::Node::Sort;
use List::MoreUtils qw/uniq/;
use strict;
use warnings;
@@ -49,8 +50,7 @@
sub to_nodes {
my ($self, $query) = @_;
- warn 'TODO';
- return $query;
+ return Krawfish::Result::Node::Sort->new($query, [$self->operations]);
};
diff --git a/lib/Krawfish/Result/Node.pm b/lib/Krawfish/Result/Node.pm
index 5781163..a53c6e4 100644
--- a/lib/Krawfish/Result/Node.pm
+++ b/lib/Krawfish/Result/Node.pm
@@ -31,6 +31,14 @@
};
+# Overwrite process_head and pass to deeper query
+sub process_head {
+ my ($self, $head) = @_;
+ $_[0]->{query}->process_head($head);
+ return;
+};
+
+
sub buffer {
return $_[0]->{buffer};
};
@@ -167,7 +175,7 @@
my ($self, $data) = @_;
foreach my $aggr (@{$self->{aggregation}}) {
- $aggr->aggregate($date);
+ $aggr->aggregate($data);
};
return 1;
diff --git a/lib/Krawfish/Result/Node/Aggregate.pm b/lib/Krawfish/Result/Node/Aggregate.pm
index 61f7f0f..840f5a5 100644
--- a/lib/Krawfish/Result/Node/Aggregate.pm
+++ b/lib/Krawfish/Result/Node/Aggregate.pm
@@ -1,7 +1,11 @@
-package Krawfish::Node::Aggregate;
+package Krawfish::Result::Node::Aggregate;
+use parent 'Krawfish::Result::Node';
use strict;
use warnings;
+# TODO:
+# Implement the aggregate() method on all Node::Aggregate::*
+
# May be renamed to
# - Krawfish::MultiSegment::Aggregate
# - Krawfish::MultiNodes::Aggregate
@@ -12,10 +16,71 @@
# match from that segment.
# Do this, until k is fine.
-# Distributet results are returned from each index
+# Distributed results are returned from each index
# in an aggregate data section followed by result lines.
# The result lines can be returned using next_current() etc.
# while the data aggregation section is returned by the first
# call.
+sub new {
+ my $class = shift;
+ bless {
+ query => shift,
+ aggregates => shift,
+ _fetched => undef,
+ _result => undef
+ }, $class;
+};
+
+
+# This will read all header information from the nodes and aggregate the date
+sub process_head {
+ my ($self, $head) = @_;
+
+ # Get aggregation data from head
+ my $data = $head->{aggregate};
+
+ # Iterate over all registered aggregates
+ foreach (@{$self->{aggregates}}) {
+
+ # Aggregate head data
+ $_->aggregate($data);
+ };
+
+ # Go deeper
+ $self->{query}->process_head($head);
+};
+
+
+
+# Get result information
+# Maybe "on final"
+sub result {
+ my $self = shift;
+
+ # Fetch all aggregation data from the types
+ my $result = {};
+
+ # Add to result hash
+ foreach my $op (@{$self->{aggregates}}) {
+ $result->{$op->type} = $op->aggregate;
+ };
+};
+
+
+# Next query line - do nothing
+sub next {
+ $_[0]->{query}->next;
+};
+
+
+
+sub to_string {
+ my $self = shift;
+ return 'aggr(' .
+ join(',', map { $_->to_string } @{$self->{aggregates}}) .
+ ':' . $self->{query}->to_string . ')';
+};
+
+
1;
diff --git a/lib/Krawfish/Result/Node/Fields.pm b/lib/Krawfish/Result/Node/Fields.pm
index 0921a7a..3d2a69c 100644
--- a/lib/Krawfish/Result/Node/Fields.pm
+++ b/lib/Krawfish/Result/Node/Fields.pm
@@ -1,14 +1,32 @@
package Krawfish::Result::Node::Fields;
+use parent 'Krawfish::Query';
+use Krawfish::Util::String qw/squote/;
use strict;
use warnings;
+# Koral::Node::Fields does actually nothing. It's just a wrapper
+# However - it may very well - like snippets - first collect matches and
+# then resend request to the cluster for more information,
+# like
+
+# TODO:
+# Fields should be part of the snippet generation mechanism!
+
sub new {
my $class = shift;
- return bless {
+ bless {
+ query => shift,
+ fields => shift
}, $class;
};
sub to_string {
+ my $self = shift;
+ return 'fields(' . join(',', map { squote($_) } @{$self->{fields}}) . ':' . $self->{query}->to_string . ')';
+};
+
+sub next {
+ $_[0]->{query}->next;
};
1;
diff --git a/lib/Krawfish/Result/Node/Sort.pm b/lib/Krawfish/Result/Node/Sort.pm
index 70c844d..c60fcd5 100644
--- a/lib/Krawfish/Result/Node/Sort.pm
+++ b/lib/Krawfish/Result/Node/Sort.pm
@@ -1,8 +1,7 @@
-package Krawfish::Node::Sort;
+package Krawfish::Result::Node::Sort;
use strict;
use warnings;
-
# This will simply mergesort the inmcoming
# streams using next and prepare 'criterion'
# for current.
@@ -15,11 +14,20 @@
sub new {
my $class = shift;
return bless {
+ query => shift,
+ sort => shift
}, $class;
};
+
sub to_string {
- ...
+ my $self = shift;
+ return 'sort(' . join(',', map { $_->to_string } @{$self->{sort}}) . ':' . $self->{query}->to_string . ')';
};
+sub next {
+ $_[0]->{query}->next;
+};
+
+
1;
diff --git a/t/koral/filter.t b/t/koral/filter.t
index a3e512d..df3614a 100644
--- a/t/koral/filter.t
+++ b/t/koral/filter.t
@@ -31,18 +31,19 @@
$koral->query($kqb->term('bb'));
$koral->corpus($kcb->string('author')->eq('Peter'));
-is($koral->to_string, 'filter(bb,author=Peter)', 'Stringification');
+is($koral->to_string, 'corpus=[author=Peter],query=[bb]', 'Stringification');
-my $query = $koral->normalize->finalize->optimize($index);
-
+my $query = $koral->to_nodes->optimize($index);
# Can't match anywhere:
is($query->to_string, "[0]", 'Planned stringification');
$koral->corpus($kcb->string('author')->eq('Arthur'));
-is($koral->to_string, 'filter(bb,author=Arthur)', 'Stringification');
-$query = $koral->normalize->finalize->optimize($index);
+
+is($koral->to_string, 'corpus=[author=Arthur],query=[bb]', 'Stringification');
+
+$query = $koral->to_nodes->optimize($index);
# Can't match anywhere:
is($query->to_string, "filter('bb','author:Arthur')", 'Planned stringification');
diff --git a/t/koral/flow.t b/t/koral/flow.t
index cf7fe52..8946788 100644
--- a/t/koral/flow.t
+++ b/t/koral/flow.t
@@ -18,7 +18,9 @@
$mb->a_length,
),
$mb->fields('age'),
- $mb->sort_by('author')
+ $mb->sort_by(
+ $mb->s_field('author')
+ )
);
# Create query
@@ -43,9 +45,12 @@
)
);
-# Get the query
-# ok(my $query = $koral->to_nodes, 'Create complex query construct');
+is($koral->to_string, "meta=[aggr=[facets:['size','age'],freq,length],fields=['age'],sort=[field='author'<]],corpus=[1880&author=Goethe],query=[[/b./|aa][]cc]", 'Serialization');
+# Get the query
+ok(my $query = $koral->to_nodes, 'Create complex query construct');
+
+is($query->to_string, "fields('age','author','id':sort(field='author'<,field='id'<:aggr(length,freq,facets:['size','age']:filter(/b./|aa[]cc,1880&author=Goethe))))", 'Stringification');
done_testing;
__END__
diff --git a/t/koral/query.t b/t/koral/query.t
index 7d1a9bf..99a95ae 100644
--- a/t/koral/query.t
+++ b/t/koral/query.t
@@ -93,7 +93,7 @@
TODO: {
local $TODO = 'Test Serialization output';
- ok($koral = $koral->normalize->finalize, 'Finalize query');
+ ok($koral = $koral->to_nodes, 'Finalize query');
};
diff --git a/t/koral/serialization.t b/t/koral/serialization.t
index c549308..7aef59f 100644
--- a/t/koral/serialization.t
+++ b/t/koral/serialization.t
@@ -18,7 +18,7 @@
);
is($koral->to_string,
- '[Der][]<opennlp/c=NP>',
+ 'query=[[Der][]<opennlp/c=NP>]',
'Stringification');
my $serial = $koral->to_koral_query;
@@ -54,7 +54,8 @@
$koral->corpus($corpus_query);
is($koral->to_string,
- 'filter([Der][]<opennlp/c=NP>,author=Peter&pubDate>=2014-04-03)', 'Stringification');
+ 'corpus=[author=Peter&pubDate>=2014-04-03],query=[[Der][]<opennlp/c=NP>]',
+ 'Stringification');
$serial = $koral->to_koral_query;
diff --git a/t/plan/meta.t b/t/plan/meta.t
index 03d264d..1f8750f 100644
--- a/t/plan/meta.t
+++ b/t/plan/meta.t
@@ -21,9 +21,8 @@
)
);
-is($koral->to_string, '[Der]<opennlp/c=NP>');
+is($koral->to_string, 'query=[[Der]<opennlp/c=NP>]');
-diag 'Meta normalization not implemented yet';
done_testing;
__END__