Improve ranking mechanism

Change-Id: I23add5381ba7099485e5883bebad4997962b82ba
diff --git a/lib/Krawfish/Cluster.pm b/lib/Krawfish/Cluster.pm
index 9edd54c..3387e25 100644
--- a/lib/Krawfish/Cluster.pm
+++ b/lib/Krawfish/Cluster.pm
@@ -21,7 +21,7 @@
 sub search_for {
   my ($self, $query, $cb) = @_;
 
-  # This should probably open multiple websockets/unx-sockets in parallel
+  # This should probably open multiple websockets/unix-sockets in parallel
   # https://stackoverflow.com/questions/13417000/synchronous-request-with-websockets
   Mojo::IOLoop->delay(
     sub {
diff --git a/lib/Krawfish/Index.pm b/lib/Krawfish/Index.pm
index c911021..d3dd5de 100644
--- a/lib/Krawfish/Index.pm
+++ b/lib/Krawfish/Index.pm
@@ -9,6 +9,10 @@
 
 use constant DEBUG => 0;
 
+# TODO:
+#   May need to be renamed to Krawfish::Node
+
+
 # This is the central object for index handling on node level.
 # A new document will be added by adding the following information:
 # - To the dynamic DICTIONARY
@@ -129,7 +133,19 @@
   my $dict = $self->dict;
 
   # Add field
-  if ($dict->add_field($field_term, $collation)) {
+  if (my $term_id = $dict->add_field($field_term, $collation)) {
+
+    # Field is meant to be sortable
+    if ($collation) {
+
+      # Propagate the field to all segments
+      foreach (@{$self->segments}) {
+
+        # Introduce ranking file
+        $_->field_ranks->introduce_rank($term_id, $collation)
+      };
+    };
+
     # Field is added to the dictionary
     return 1;
   };
diff --git a/lib/Krawfish/Index/Fields.pm b/lib/Krawfish/Index/Fields.pm
index 59acb8f..fe29b6e 100644
--- a/lib/Krawfish/Index/Fields.pm
+++ b/lib/Krawfish/Index/Fields.pm
@@ -1,7 +1,8 @@
 package Krawfish::Index::Fields;
 use Krawfish::Index::Fields::Doc;
+use Krawfish::Index::Fields::Ranks;
 use Krawfish::Index::Fields::Pointer;
-use Krawfish::Index::Rank::Fields;
+# use Krawfish::Index::Rank::Fields;
 use Krawfish::Log;
 use warnings;
 use strict;
@@ -43,7 +44,8 @@
   my $class = shift;
   bless {
     docs => [],
-    last_doc_id => -1
+    last_doc_id => -1,
+    ranks => {}
   }, $class;
 };
 
@@ -62,7 +64,6 @@
   # TODO:
   #   use Krawfish::Index::Store::V1::Fields->new;
   $self->{docs}->[$self->last_doc_id] = Krawfish::Index::Fields::Doc->new($doc);
-
   return $doc_id;
 };
 
diff --git a/lib/Krawfish/Index/Fields/Doc.pm b/lib/Krawfish/Index/Fields/Doc.pm
index 71e9ddb..109466d 100644
--- a/lib/Krawfish/Index/Fields/Doc.pm
+++ b/lib/Krawfish/Index/Fields/Doc.pm
@@ -35,7 +35,6 @@
     };
   } @$fields;
 
-
   # Add field data
   my @data = ();
   foreach (@sorted_fields) {
@@ -49,6 +48,7 @@
     push @data, $_->value if $_->type eq 'int' || $_->type eq 'store';
   };
 
+
   push @data, 'EOF';
 
   print_log('fields_doc', 'The fields are ' . join(',', map { defined $_ ? $_ : '?' } @data)) if DEBUG;
diff --git a/lib/Krawfish/Index/Fields/Rank.pm b/lib/Krawfish/Index/Fields/Rank.pm
new file mode 100644
index 0000000..7935d09
--- /dev/null
+++ b/lib/Krawfish/Index/Fields/Rank.pm
@@ -0,0 +1,23 @@
+package Krawfish::Index::Fields::Rank;
+use strict;
+use warnings;
+
+sub new {
+  my $class = shift;
+  bless {
+    collation => shift,
+    prefix => [],
+    suffix => [],
+    plain => []
+  }, $class;
+};
+
+
+# Add an entry to the plain list
+sub add {
+  my $self = shift;
+  my ($value, $doc_id) = @_;
+  push @{$self->{plain}}, [$value, $doc_id];
+};
+
+1;
diff --git a/lib/Krawfish/Index/Fields/Ranks.pm b/lib/Krawfish/Index/Fields/Ranks.pm
new file mode 100644
index 0000000..73a85b9
--- /dev/null
+++ b/lib/Krawfish/Index/Fields/Ranks.pm
@@ -0,0 +1,25 @@
+package Krawfish::Index::Fields::Ranks;
+use Krawfish::Index::Fields::Rank;
+use strict;
+use warnings;
+
+sub new {
+  my $class = shift;
+  bless {}, $class;
+};
+
+# Get the rank by
+sub by {
+  my ($self, $field_id) = @_;
+
+  # Field may be ranked or not
+  return $self->{$field_id};
+};
+
+sub introduce_rank {
+  my ($self, $field_id, $collation) = @_;
+  $self->{$field_id} = Krawfish::Index::Fields::Rank->new($collation);
+};
+
+
+1;
diff --git a/lib/Krawfish/Index/Segment.pm b/lib/Krawfish/Index/Segment.pm
index 72cc5ac..03a3d11 100644
--- a/lib/Krawfish/Index/Segment.pm
+++ b/lib/Krawfish/Index/Segment.pm
@@ -1,5 +1,6 @@
 package Krawfish::Index::Segment;
 use Krawfish::Index::Fields;
+use Krawfish::Index::Fields::Ranks;
 use Krawfish::Index::PostingsLive;
 use Krawfish::Index::PostingsList;
 use Krawfish::Index::Forward;
@@ -61,9 +62,9 @@
     $self->{file}
   );
 
-  # $self->{field_ranks} = Krawfish::Index::Fields::Ranks->new(
-  #  $self->{file}
-  #);
+  $self->{field_ranks} = Krawfish::Index::Fields::Ranks->new(
+    $self->{file}
+  );
 
   # Create a list of docid -> uuid mappers
   # This may be problematic as uuids may need to be uint64,
@@ -96,9 +97,9 @@
 
 
 # Alias for last doc
-sub max_rank {
-  $_[0]->{live}->next_doc_id - 1;
-};
+#sub max_rank {
+#  $_[0]->{live}->next_doc_id - 1;
+#};
 
 
 # Get subtokens
@@ -125,6 +126,11 @@
 };
 
 
+sub field_ranks {
+  $_[0]->{field_ranks};
+};
+
+
 # Return a postings list based on a term_id
 sub postings {
   my ($self, $term_id) = @_;
@@ -141,6 +147,7 @@
 };
 
 
+
 # Add a prepared document to the index
 sub add {
   my ($self, $doc) = @_;
@@ -167,19 +174,26 @@
   #   Rank fields!
   # $self->field_ranks();
 
-  # TODO:
-  #   Deal with sortables!
-
   # $self->invert->add()
 
   # Create term index for fields
   my $fields = $doc->fields;
+  my $ranks  = $self->field_ranks;
   foreach (@$fields) {
     next if $_->type eq 'store';
     if (DEBUG) {
       print_log('seg', 'Added field #' . $_->term_id . ' for doc_id=' . $doc_id);
     };
     $self->postings($_->term_id)->append($doc_id);
+
+    # The field is sortable
+    if ($_->sortable) {
+
+      # Add field value to ranking
+      my $ranked_by = $ranks->by($_->key_id);
+
+      $ranked_by->add($_->value, $doc_id) if $ranked_by;
+    };
   };
 
 
diff --git a/lib/Krawfish/Koral/Document.pm b/lib/Krawfish/Koral/Document.pm
index aa96468..945ebe6 100644
--- a/lib/Krawfish/Koral/Document.pm
+++ b/lib/Krawfish/Koral/Document.pm
@@ -63,11 +63,6 @@
 };
 
 
-sub sortable {
-  $_[0]->{sortable};
-};
-
-
 # Translate all terms into term_ids and
 # add unknown terms to the dictionary
 sub identify {
diff --git a/lib/Krawfish/Koral/Document/FieldBase.pm b/lib/Krawfish/Koral/Document/FieldBase.pm
index e8c25cb..e1f3559 100644
--- a/lib/Krawfish/Koral/Document/FieldBase.pm
+++ b/lib/Krawfish/Koral/Document/FieldBase.pm
@@ -11,7 +11,7 @@
 
 sub new {
   my $class = shift;
-  # key, value, key_id, key_value_id
+  # key, value, key_id, key_value_id, sortable
   bless { @_ }, $class;
 };
 
diff --git a/lib/Krawfish/Koral/Document/FieldString.pm b/lib/Krawfish/Koral/Document/FieldString.pm
index 0f67869..68715a6 100644
--- a/lib/Krawfish/Koral/Document/FieldString.pm
+++ b/lib/Krawfish/Koral/Document/FieldString.pm
@@ -16,10 +16,13 @@
 sub identify {
   my ($self, $dict) = @_;
 
+  # This will check, if the field is
+  # sortable
   return if $self->{key_id} && $self->{key_value_id};
 
   # Get or introduce new key term_id
   my $key  = '!' . $self->{key};
+
   $self->{key_id} = $dict->add_term($key);
 
   # Set sortable
diff --git a/lib/Krawfish/Koral/Document/Fields.pm b/lib/Krawfish/Koral/Document/Fields.pm
index 4c13c57..33833c8 100644
--- a/lib/Krawfish/Koral/Document/Fields.pm
+++ b/lib/Krawfish/Koral/Document/Fields.pm
@@ -60,5 +60,4 @@
 };
 
 
-
 1;
diff --git a/lib/Krawfish/Koral/Meta/Limit.pm b/lib/Krawfish/Koral/Meta/Limit.pm
index ed2dce6..7b3a922 100644
--- a/lib/Krawfish/Koral/Meta/Limit.pm
+++ b/lib/Krawfish/Koral/Meta/Limit.pm
@@ -33,8 +33,13 @@
   my ($self, $query) = @_;
 
   # For sampling, limiting has no effect
-  if ($query->type eq 'sort') {
-    $query->top_k($self->items_per_type);
+  if ($query->type eq 'sample') {
+
+    # WARNING:
+    #   This only holds true for the segment level,
+    #   on the cluster and node levels,
+    #   the limit (at least the top-k) is important.
+    $query->top_k($self->items_per_page);
     return $query;
   };
 
diff --git a/lib/Krawfish/Koral/Meta/Node/Aggregate.pm b/lib/Krawfish/Koral/Meta/Node/Aggregate.pm
index 27dacf2..1782fe3 100644
--- a/lib/Krawfish/Koral/Meta/Node/Aggregate.pm
+++ b/lib/Krawfish/Koral/Meta/Node/Aggregate.pm
@@ -3,6 +3,8 @@
 use strict;
 use warnings;
 
+# TODO:
+#   Identify() should probably first return a Segment::Aggregate object
 
 sub new {
   my $class = shift;
diff --git a/lib/Krawfish/Koral/Meta/Node/Sort.pm b/lib/Krawfish/Koral/Meta/Node/Sort.pm
index 20d6957..78d8552 100644
--- a/lib/Krawfish/Koral/Meta/Node/Sort.pm
+++ b/lib/Krawfish/Koral/Meta/Node/Sort.pm
@@ -3,13 +3,19 @@
 use strict;
 use warnings;
 
-use constant DEBUG => 1;
+use constant (
+  DEBUG => 1,
+  UNIQUE => 'id'
+);
 
 sub new {
   my $class = shift;
 
   if (DEBUG) {
-    print_log('kq_n_sort', 'Create sort query with ' . join(', ', map {$_ ? $_ : '?'} @_));
+    print_log(
+      'kq_n_sort', 'Create sort query with ' .
+        join(', ', map {$_ ? $_ : '?'} @_)
+      );
   };
 
   my $self = bless {
@@ -21,6 +27,11 @@
 };
 
 
+sub type {
+  'sort';
+};
+
+
 # Get identifiers
 sub identify {
   my ($self, $dict) = @_;
@@ -28,10 +39,10 @@
   my @identifier;
   foreach (@{$self->{sort}}) {
 
-    # Field may not exist in dictionary
-    my $field = $_->identify($dict);
-    if ($field) {
-      push @identifier, $field;
+    # Criterion may not exist in dictionary
+    my $criterion = $_->identify($dict);
+    if ($criterion) {
+      push @identifier, $criterion;
     };
   };
 
@@ -48,6 +59,7 @@
 };
 
 
+# Stringification
 sub to_string {
   my $self = shift;
   my $str = join(',', map { $_->to_string } @{$self->{sort}});
@@ -67,9 +79,13 @@
 sub optimize {
   my ($self, $segment) = @_;
 
-  warn 'Sorting not yet implemented';
+  my $query = $self->{query}->optimize($segment);
 
-  return $self->{query}->optimize($segment);
+  if ($query->max_freq == 0) {
+    return Krawfish::Query::Nothing->new;
+  };
+
+  return $self;
 };
 
 1;
diff --git a/lib/Krawfish/Koral/Meta/Node/Sort/Sample.pm b/lib/Krawfish/Koral/Meta/Node/Sort/Sample.pm
index 827b2c5..6c71886 100644
--- a/lib/Krawfish/Koral/Meta/Node/Sort/Sample.pm
+++ b/lib/Krawfish/Koral/Meta/Node/Sort/Sample.pm
@@ -7,7 +7,7 @@
   my $class = shift;
   bless {
     query => shift,
-    n => shift
+    top_k => shift
   }, $class;
 };
 
@@ -15,6 +15,18 @@
   'sample'
 };
 
+# Set or get the top_k limitation!
+sub top_k {
+  my $self = shift;
+  if (defined $_[0]) {
+    $self->{top_k} = shift;
+    return $self;
+  };
+  return $self->{top_k};
+};
+
+
+
 sub identify {
   my ($self, $dict) = @_;
 
@@ -35,13 +47,13 @@
 
   return Krawfish::Result::Segment::Sort::Sample->new(
     $query,
-    $self->{n}
+    $self->{top_k}
   )
 };
 
 
 sub to_string {
-  return 'sample(' . $_[0]->{n} . ':' . $_[0]->{query}->to_string . ')';
+  return 'sample(' . $_[0]->{top_k} . ':' . $_[0]->{query}->to_string . ')';
 };
 
 1;
diff --git a/lib/Krawfish/Koral/Meta/Sort.pm b/lib/Krawfish/Koral/Meta/Sort.pm
index a72ea18..0e2cb68 100644
--- a/lib/Krawfish/Koral/Meta/Sort.pm
+++ b/lib/Krawfish/Koral/Meta/Sort.pm
@@ -1,11 +1,15 @@
 package Krawfish::Koral::Meta::Sort;
-use Krawfish::Result::Node::Sort;
 use Krawfish::Koral::Meta::Node::Sort;
+use Krawfish::Koral::Meta::Sort::Field;
+use Krawfish::Koral::Meta::Type::Key;
 use Krawfish::Log;
 use strict;
 use warnings;
 
-use constant DEBUG => 1;
+use constant {
+  DEBUG => 1,
+  UNIQUE_ID => 'id'
+};
 
 # TODO:
 #   Support top_k setting from limit!
@@ -27,11 +31,17 @@
   bless {
     sort => [@_],
     top_k => undef,
-    filter => undef
+    filter => undef,
+    unique => UNIQUE_ID
   }, $class;
 };
 
 
+sub type {
+  'sort';
+};
+
+
 # Set or get the top_k limitation!
 sub top_k {
   my $self = shift;
@@ -43,6 +53,8 @@
 };
 
 
+# Use sort filter (only possible, in case no aggregation
+# or grouping is applied)
 sub filter {
   my $self = shift;
   if (defined $_[0]) {
@@ -83,10 +95,6 @@
 };
 
 
-sub type {
-  'sort';
-};
-
 
 # Remove duplicates
 sub normalize {
@@ -94,6 +102,13 @@
   my @unique;
   my %unique;
   my $sampling = 0;
+
+  # Add unique sorting to sort array
+  push @{$self->{sort}}, Krawfish::Koral::Meta::Sort::Field->new(
+    Krawfish::Koral::Meta::Type::Key->new($self->{unique})
+  );
+
+  # Normalize sorting
   foreach (@{$self->{sort}}) {
 
     # Sampling can't be combined with other sorting
@@ -104,16 +119,21 @@
       return $_;
     };
 
+    # Push unique sorting criteria to sorting array
     unless (exists $unique{$_->to_string}) {
       push @unique, $_;
       $unique{$_->to_string} = 1;
     };
   };
+
+  # Create unique sort
   @{$self->{sort}} = @unique;
+
   return $self;
 };
 
 
+# Wrap query object
 sub wrap {
   my ($self, $query) = @_;
   return Krawfish::Koral::Meta::Node::Sort->new(
diff --git a/lib/Krawfish/Koral/Meta/Sort/Field.pm b/lib/Krawfish/Koral/Meta/Sort/Field.pm
index 1928cbc..3e56878 100644
--- a/lib/Krawfish/Koral/Meta/Sort/Field.pm
+++ b/lib/Krawfish/Koral/Meta/Sort/Field.pm
@@ -1,4 +1,5 @@
 package Krawfish::Koral::Meta::Sort::Field;
+use Krawfish::Result::Segment::Sort::Field;
 use Krawfish::Util::String qw/squote/;
 use strict;
 use warnings;
@@ -6,7 +7,7 @@
 sub new {
   my $class = shift;
   bless {
-    field => shift,
+    field => shift, # Is a Koral::Meta::Type::Key
     desc => shift
   }, $class;
 };
@@ -19,6 +20,20 @@
   return $_[0]->{field};
 };
 
+sub desc {
+  return $_[0]->{desc};
+};
+
+sub optimize {
+  my ($self, $segment) = @_;
+
+  return Krawfish::Result::Segment::Sort::Field->new(
+    $segment,
+    $_[0]->{field}, # Should probably be a field_id!
+    $_[0]->{desc}
+  );
+};
+
 sub identify {
   my ($self, $dict) = @_;
   my $field = $self->{field}->identify($dict);
diff --git a/lib/Krawfish/Result/Segment/Sort.pm b/lib/Krawfish/Result/Segment/Sort.pm
index 5489a9d..1d6b4a2 100644
--- a/lib/Krawfish/Result/Segment/Sort.pm
+++ b/lib/Krawfish/Result/Segment/Sort.pm
@@ -50,6 +50,8 @@
 # TODO:
 #   It should be possible to add the sorting criteria.
 
+
+# Constructor
 sub new {
   my $class = shift;
   my %param = @_;
diff --git a/lib/Krawfish/Result/Segment/Sort/Field.pm b/lib/Krawfish/Result/Segment/Sort/Field.pm
index 85b400b..9ae4cfe 100644
--- a/lib/Krawfish/Result/Segment/Sort/Field.pm
+++ b/lib/Krawfish/Result/Segment/Sort/Field.pm
@@ -12,16 +12,16 @@
   my $class = shift;
 
   my $self = bless {
-    index => shift,
-    field => shift,
+    segment    => shift,
+    field      => shift,
     descending => shift // 0
   }, $class;
 
   # Get ranking
-  $self->{ranks} = $self->{index}->fields->ranked_by($field) or return;
+  # $self->{ranks} = $self->{index}->fields->ranked_by($field) or return;
 
   # Get maximum rank if descending order
-  $self->{max} = $self->{ranks}->max if $self->{descending};
+  # $self->{max} = $self->{ranks}->max if $self->{descending};
 
   return $self;
 };
@@ -29,13 +29,13 @@
 
 sub get {
   my $self = shift;
-  my $max = $ranking->max if $desc;
+#  my $max = $ranking->max if $desc;
 
   # Get stored rank
-  $rank = $ranking->get(shift);
+#  $rank = $ranking->get(shift);
 
   # Revert if maximum rank is set
-  return $max ? $max - $rank : $rank;
+#  return $max ? $max - $rank : $rank;
 };
 
 
diff --git a/lib/Krawfish/Result/Segment/Sort/MultiField.pm b/lib/Krawfish/Result/Segment/Sort/MultiField.pm
deleted file mode 100644
index 25159cb..0000000
--- a/lib/Krawfish/Result/Segment/Sort/MultiField.pm
+++ /dev/null
@@ -1,30 +0,0 @@
-package Krawfish::Result::Segment::Sort::MultiField;
-use strict;
-use warnings;
-
-# Sorting criterion for multi value field ranks.
-
-# See Krawfish::Result::Segment::Sort::Field
-
-sub new {
-  my $class = shift;
-
-  my $self = bless {
-    index => shift,
-    field => shift,
-    descending => shift // 0
-  }, $class;
-
-  # Get ranking
-  # TODO: Depending on the descending value, different rankings need to be loaded
-
-  return $self;
-};
-
-
-sub get {
-  my $self = shift;
-
-  # Get stored rank
-  return $self->{ranking}->get(shift);
-};
diff --git a/lib/Krawfish/Result/Sort/Random.pm b/lib/Krawfish/Result/Segment/Sort/Random.pm
similarity index 61%
rename from lib/Krawfish/Result/Sort/Random.pm
rename to lib/Krawfish/Result/Segment/Sort/Random.pm
index c302664..1fedc12 100644
--- a/lib/Krawfish/Result/Sort/Random.pm
+++ b/lib/Krawfish/Result/Segment/Sort/Random.pm
@@ -1 +1,3 @@
+# Return all elements in random order
+
 # https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle
diff --git a/lib/Krawfish/Result/Segment/Sort/Sample.pm b/lib/Krawfish/Result/Segment/Sort/Sample.pm
index 70def45..247e086 100644
--- a/lib/Krawfish/Result/Segment/Sort/Sample.pm
+++ b/lib/Krawfish/Result/Segment/Sort/Sample.pm
@@ -3,16 +3,21 @@
 use strict;
 use warnings;
 
-# https://en.wikipedia.org/wiki/Reservoir_sampling
-# https://webkist.wordpress.com/2008/10/01/reservoir-sampling-in-perl/
-# https://blogs.msdn.microsoft.com/spt/2008/02/05/reservoir-sampling/
-
-# A. Anagnostopoulos, A. Z. Broder, and D. Carmel. Sampling search-engine results. In Proc. of the Fourteenth International World Wide Web Conference, Chiba, Japan, 2005. ACM Press.
-
+# Sort all matches in random order and only return the top_k
+# results. This is implemented using reservoir sampling.
+# Difference to random sorting is, this won't randomly sort all
+# results, making paging possible.
 
 # WARNING:
-#   Sorting does not respect current_match of any nested query, that's why
-#   sorting is always separated from enriching!
+#   Sorting does not respect current_match of any nested query,
+#   that's why sorting is always separated from enriching!
+
+# See
+#   https://en.wikipedia.org/wiki/Reservoir_sampling
+#   https://webkist.wordpress.com/2008/10/01/reservoir-sampling-in-perl/
+#   https://blogs.msdn.microsoft.com/spt/2008/02/05/reservoir-sampling/
+#   A. Anagnostopoulos, A. Z. Broder, and D. Carmel. Sampling search-engine results. In Proc. of the Fourteenth International World Wide Web Conference, Chiba, Japan, 2005. ACM Press.
+
 
 use constant DEBUG => 1;
 
diff --git a/lib/Krawfish/Result/Segment/Sort/SubTerm.pm b/lib/Krawfish/Result/Segment/Sort/SubTerm.pm
index b86e964..111af25 100644
--- a/lib/Krawfish/Result/Segment/Sort/SubTerm.pm
+++ b/lib/Krawfish/Result/Segment/Sort/SubTerm.pm
@@ -1,7 +1,9 @@
-package Krawfish::Result::Segment::Sort::Field;
+package Krawfish::Result::Segment::Sort::SubTerm;
 use strict;
 use warnings;
 
+# This will sort based on a pre-ranked subterm
+
 sub new {
   my $class = shift;