Introduce concept for cluster search
diff --git a/lib/Krawfish/Cluster.pm b/lib/Krawfish/Cluster.pm
index 9c0c470..99b05bf 100644
--- a/lib/Krawfish/Cluster.pm
+++ b/lib/Krawfish/Cluster.pm
@@ -1,10 +1,13 @@
package Krawfish::Cluster;
+use Mojo::IOLoop;
use strict;
use warnings;
# Krawfish::Cluster queries to multiple nodes
# and takes care of failures in responses
+# See http://verdi.uwplse.org/
+
sub new {
my $class = shift;
bless {
@@ -12,4 +15,46 @@
}, $class;
};
+
+# Search for a query and return a response
+sub search_for {
+ my ($self, $query, $cb) = @_;
+
+ # This should probably open multiple websockets/unx-sockets in parallel
+ # https://stackoverflow.com/questions/13417000/synchronous-request-with-websockets
+ Mojo::IOLoop->delay(
+ sub {
+ my $delay = shift;
+ foreach my $node (@{$self->{nodes}}) {
+ $ua->post($node => json => $query => $delay->begin);
+ };
+ },
+ sub {
+ my $delay = shift;
+
+ # Iterate over all results
+ foreach (@_) {
+
+ # Responses have a head and a tail section
+ # In case, no aggregation or grouping is done,
+ # there is no head section.
+ # In case, there is grouping, there is no
+ # tail.
+ my $response = $_->res->json;
+
+ # Aggregate data, e.g. for grouping
+ $query->process_head($response->{head});
+
+ # Get through the matches
+ # TODO:
+ # This is, however, bad for merge sort!
+ $query->process_tail($response->{tail});
+ };
+ }
+ )->wait;
+
+ return $query->to_result;
+};
+
+
1;
diff --git a/lib/Krawfish/Controller/Index.pm b/lib/Krawfish/Controller/Index.pm
index 410a03e..f83ef9a 100644
--- a/lib/Krawfish/Controller/Index.pm
+++ b/lib/Krawfish/Controller/Index.pm
@@ -54,29 +54,17 @@
# Get nodes object
my $cluster = Krawfish::Cluster->new;
- # Send to all nodes
- $node_koral->send(
- $cluster => (
+ # Send query to all nodes
+ $cluster->search_for(
+ $node_koral => sub {
+ my $response = shift;
- # This sub will be triggered for each node
- sub {
- my ($query, $node) = @_;
+ # Add result to response
+ $response->{response} = $query->to_response;
- # Process the head data
- $query->process_head($node->response->head);
- },
-
- # This sub will triggered after all nodes were passed
- sub {
- my $query = shift;
-
- # Add result to response
- $response->{response} = $query->to_response;
-
- # Return koral query response
- return $c->render(json => $response->to_koral_query);
- }
- )
+ # Return koral query response
+ return $c->render(json => $response->to_koral_query);
+ }
);
};
diff --git a/lib/Krawfish/Posting.pm b/lib/Krawfish/Posting.pm
index ec175dd..fd3d912 100644
--- a/lib/Krawfish/Posting.pm
+++ b/lib/Krawfish/Posting.pm
@@ -111,13 +111,15 @@
return @classes;
};
+
# Return classes sorted by start position
sub get_classes_sorted {
my ($self, $nrs) = @_;
# The same as get_classes, but ordered by start position
return sort { $a->[1] <=> $b->[1] } $self->get_classes($nrs);
-}
+};
+
# This will be overwritten for at least cached buffers
# necessary for sorting
@@ -126,6 +128,7 @@
};
+# Clone the posting with all information
sub clone {
my $self = shift;
return __PACKAGE__->new(
@@ -136,7 +139,8 @@
);
}
-# Stringify
+
+# Stringification
sub to_string {
my $self = shift;
my $str = '[' .
@@ -152,6 +156,7 @@
};
+# Check if two postings are identical
sub is_identical {
my ($self, $comp) = @_;
return unless $comp;
diff --git a/lib/Krawfish/Posting/Bundle.pm b/lib/Krawfish/Posting/Bundle.pm
index 272da8b..cb8616b 100644
--- a/lib/Krawfish/Posting/Bundle.pm
+++ b/lib/Krawfish/Posting/Bundle.pm
@@ -7,8 +7,8 @@
# TODO:
# This is quite similar to K::P::Group
-# This is a container class for multiple
-# Krawfish::Posting objects
+# This is a container class for multiple Krawfish::Posting objects,
+# used for (among others) sorting.
# Constructor
sub new {
diff --git a/lib/Krawfish/Posting/Match.pm b/lib/Krawfish/Posting/Match.pm
index 299a13b..6062e43 100644
--- a/lib/Krawfish/Posting/Match.pm
+++ b/lib/Krawfish/Posting/Match.pm
@@ -1,10 +1,13 @@
package Krawfish::Posting::Match;
use parent 'Krawfish::Posting';
use Krawfish::Util::String qw/squote/;
-use JSON::XS;
use warnings;
use strict;
+
+# Matches are returned from searches and can be enriched
+# with various information
+
# Get or set field to match
sub fields {
my $self = shift;
@@ -36,7 +39,22 @@
};
};
+sub sorting_criteria;
+sub snippet;
+
+sub segment_id;
+
+sub match_id;
+
+
+# serialize to koralquery
+sub to_koral_query {
+ ...
+};
+
+
+# Stringification
sub to_string {
my $self = shift;
my $str = '[';
diff --git a/lib/Krawfish/Result/Cluster.pm b/lib/Krawfish/Result/Cluster.pm
deleted file mode 100644
index 41b974f..0000000
--- a/lib/Krawfish/Result/Cluster.pm
+++ /dev/null
@@ -1 +0,0 @@
-# See http://verdi.uwplse.org/
diff --git a/lib/Krawfish/Result/Group/Fields.pm b/lib/Krawfish/Result/Group/Fields.pm
index 82c442e..15eb364 100644
--- a/lib/Krawfish/Result/Group/Fields.pm
+++ b/lib/Krawfish/Result/Group/Fields.pm
@@ -6,9 +6,9 @@
use constant DEBUG => 0;
# This will group matches (especially document matches) by field
-# This is useful for document browsing.
+# This is useful e.g. for document browsing per corpus.
#
-# Because the groupiung is based on ranking, the sorting will be trivial.
+# Because the grouping is based on ranking, the sorting will be trivial.
sub new {
my $class = shift;
diff --git a/lib/Krawfish/Result/Node/Sort.pm b/lib/Krawfish/Result/Node/Sort.pm
index c60fcd5..0c9aa3d 100644
--- a/lib/Krawfish/Result/Node/Sort.pm
+++ b/lib/Krawfish/Result/Node/Sort.pm
@@ -1,33 +1,84 @@
package Krawfish::Result::Node::Sort;
+use Krawfish::Util::Heap;
use strict;
use warnings;
-# This will simply mergesort the inmcoming
-# streams using next and prepare 'criterion'
-# for current.
+# This will sort the incoming results using a heap
+# and the sort criteria.
+# This is obviously less efficient than a dynamic
+# mergesort, but for the moment, it's way simpler.
-# May need to return Krawfish::Posting::Sorted with a 'criterion' array.
-
-# Instead of next() followed by current(), this should use
-# next_current() and - for matches - next_match()
+# TODO:
+# May need to return Krawfish::Posting::Sorted with a 'criterion' array.
+# Instead of next() followed by current(), this should use
+# next_current() and - for matches - next_match()
sub new {
my $class = shift;
- return bless {
+ my $self = bless {
query => shift,
- sort => shift
+ sort => shift,
+ top_k => shift
}, $class;
+
+ $self->{heap} = Krawfish::Util::Heap->new($self->{top_k});
+
+ # Add criterion comparation method here
+ $self->{heap}->sort_by(
+ sub {
+ my ($obj_a, $obj_b) = @_;
+
+ my $criterion_a = $obj_a->{criterion};
+ my $criterion_b = $obj_b->{criterion};
+
+ for (my $i = 0; $i < @{$criterion_a}; $i++) {
+ if ($criterion_b->[$i]) {
+ return 1;
+ };
+ if ($criterion_a->[$i] < $criterion_b->[$i]) {
+ return -1;
+ }
+ elsif ($criterion_a->[$i] > $criterion_b->[$i]) {
+ return 1;
+ };
+ };
+ return -1;
+ }
+ );
+
+ return $self;
};
sub to_string {
my $self = shift;
- return 'sort(' . join(',', map { $_->to_string } @{$self->{sort}}) . ':' . $self->{query}->to_string . ')';
+ return 'sort(' .
+ join(',', map { $_->to_string }
+ @{$self->{sort}}) . ':' . $self->{query}->to_string . ')';
};
-sub next {
- $_[0]->{query}->next;
+
+# Process one tail
+sub process_tail {
+ my ($self, $tail) = @_;
+
+ # Iterate over all matches
+ foreach my $match (@$tail) {
+
+ # Enqueue as long as the list isn't full
+ unless ($self->{heap}->enqueue($match)) {
+ last;
+ };
+ };
+
+ $self->{query}->process_tail($tail);
};
+sub to_result {
+ ...
+};
+
+
+
1;
diff --git a/lib/Krawfish/Util/Heap.pm b/lib/Krawfish/Util/Heap.pm
new file mode 100644
index 0000000..e4c46ff
--- /dev/null
+++ b/lib/Krawfish/Util/Heap.pm
@@ -0,0 +1,41 @@
+package Krawfish::Util::Heap;
+use strict;
+use warnings;
+
+# Heap structure for top-k heap sort
+
+# TODO:
+# Use this as the base for PrioritySort
+
+
+sub new {
+ my $class = shift;
+ bless {
+ top_k => shift,
+ _sort => sub { $_[0] cmp $_[1] }
+ }, $class;
+};
+
+
+# Get or set sort method
+sub sort_by {
+ my $self = shift;
+ if (@_) {
+ $self->{_sort} = shift;
+ return $self;
+ };
+ return $self->{_sort};
+};
+
+
+sub enqueue {
+ ...
+};
+
+
+sub dequeue {
+ ...
+};
+
+
+1;