Established second level sorting

Change-Id: Ie9433c62c6cf23a97059ebae214a29ae55681130
diff --git a/lib/Krawfish/Meta/Segment/Bundle.pm b/lib/Krawfish/Meta/Segment/Bundle.pm
new file mode 100644
index 0000000..4cf1812
--- /dev/null
+++ b/lib/Krawfish/Meta/Segment/Bundle.pm
@@ -0,0 +1,32 @@
+package Krawfish::Meta::Segment::Bundle;
+use parent 'Krawfish::Meta';
+
+sub current_bundle {
+  $_[0]->{current_bundle};
+};
+
+sub current_match {
+  $_[0]->{current_match};
+};
+
+sub current {
+  return $_[0]->{query}->current;
+};
+
+
+sub next_bundle {
+  ...
+};
+
+
+# These call methods in Posting::Bundle!
+sub next {
+  ...
+};
+
+sub max_freq {
+  $_[0]->{query}->max_freq;
+};
+
+
+1;
diff --git a/lib/Krawfish/Meta/Segment/BundleDocs.pm b/lib/Krawfish/Meta/Segment/BundleDocs.pm
index 121bc7c..b4bfe08 100644
--- a/lib/Krawfish/Meta/Segment/BundleDocs.pm
+++ b/lib/Krawfish/Meta/Segment/BundleDocs.pm
@@ -1,5 +1,5 @@
 package Krawfish::Meta::Segment::BundleDocs;
-use parent 'Krawfish::Meta';
+use parent 'Krawfish::Meta::Segment::Bundle';
 use Krawfish::Log;
 use Krawfish::Posting::DocBundle;
 use strict;
@@ -7,13 +7,17 @@
 
 # Bundle matches in the same document.
 
+# TODO:
+#   The problem with the current approch is, that next_bundle
+#   will bundle the next doc - without asking if the doc
+#   is relevant.
+
 use constant DEBUG => 1;
 
 sub new {
   my $class = shift;
   bless {
     query => shift,
-    next_item => undef,
     buffer => undef
   }, $class;
 };
@@ -27,12 +31,41 @@
     print_log('d_bundle', 'Get bundle');
   };
 
-  return $self->{current_bundle} if $self->{current_bundle};
+  return $self->{current_bundle};
+};
 
-  my $first = $self->{next_item};
 
-  # Need a next() first
-  return unless $first;
+# TODO:
+#   Implement next doc!
+sub next_doc {
+  ...
+};
+
+
+# Move to next bundle
+sub next_bundle {
+  my $self = shift;
+
+  # Reset current bundle
+  $self->{current_bundle} = undef;
+
+  my $first;
+
+  # There is a bundle on buffer
+  if ($self->{buffer}) {
+    $first = $self->{buffer};
+    $self->{buffer} = undef;
+  }
+
+  # Move forward
+  elsif ($self->{query}->next) {
+    $first = $self->{query}->current or return;
+  }
+
+  # Can't move forward
+  else {
+    return;
+  };
 
   my $bundle = Krawfish::Posting::DocBundle->new($first);
   my $query = $self->{query};
@@ -41,7 +74,6 @@
     print_log('d_bundle', 'Start bundle with ' . $first->to_string);
   };
 
-
   # There is a next entry
   my $next;
   while ($query->next) {
@@ -60,41 +92,6 @@
   };
 
   $self->{current_bundle} = $bundle;
-
-  return $bundle;
-};
-
-
-sub max_freq {
-  $_[0]->{query}->max_freq;
-};
-
-sub current {
-  return $_[0]->{query}->current;
-};
-
-
-# Move to next bundle
-sub next {
-  my $self = shift;
-  $self->{current_bundle} = undef;
-
-  if ($self->{buffer}) {
-    $self->{next_item} = $self->{buffer};
-    $self->{buffer} = undef;
-  }
-
-  # Move forward
-  elsif ($self->{query}->next) {
-    $self->{next_item} = $self->{query}->current;
-  }
-
-  # Can't move forward
-  else {
-    $self->{next_item} = undef;
-    return;
-  };
-
   return 1;
 };
 
diff --git a/lib/Krawfish/Meta/Segment/Sort.pm b/lib/Krawfish/Meta/Segment/Sort.pm
index ba49bcb..8add0f5 100644
--- a/lib/Krawfish/Meta/Segment/Sort.pm
+++ b/lib/Krawfish/Meta/Segment/Sort.pm
@@ -50,7 +50,7 @@
   my %param = @_;
 
   if (DEBUG) {
-    print_log('p_sort', 'Initiate sort object');
+    print_log('sort', 'Initiate sort object');
   };
 
   # TODO:
@@ -125,7 +125,7 @@
   my $query = $self->{query};
 
   if (DEBUG) {
-    print_log('p_sort', 'Initialize sort object');
+    print_log('sort', 'Initialize sort object');
   };
 
   # TODO:
@@ -139,35 +139,32 @@
 
   # Get maximum accepted rank from queue
   my $max_rank_ref = $self->{max_rank_ref};
-
-  my $last_doc_id = -1;
   my $queue = $self->{queue};
-
   my $sort = $self->{sort};
 
   if (DEBUG) {
-    print_log('p_sort', qq!Next Rank on field #! . $sort->term_id);
+    print_log('sort', qq!Next Rank on field #! . $sort->term_id);
   };
 
   # Store the last match buffered
   my ($match, $rank);
 
   # Init
-  $query->next;
+  $query->next_bundle;
 
   # Pass through all queries
-  while ($match = $query->current) {
+  while ($match = $query->current_bundle) {
 
     if (DEBUG) {
-      print_log('p_sort', 'Get next posting from ' . $query->to_string);
+      print_log('sort', 'Get next posting from ' . $query->to_string);
     };
 
     # Get stored rank
     $rank = $sort->rank_for($match->doc_id);
 
     if (DEBUG) {
-      print_log('p_sort', 'Rank for doc id ' . $match->doc_id . " is $rank");
-      print_log('p_sort', "Check rank $rank against max rank " . $$max_rank_ref);
+      print_log('sort', 'Rank for doc id ' . $match->doc_id . " is $rank");
+      print_log('sort', "Check rank $rank against max rank " . $$max_rank_ref);
     };
 
 
@@ -178,7 +175,7 @@
       $match = undef;
 
       if (DEBUG) {
-        print_log('p_sort', 'Move to next doc');
+        print_log('sort', 'Move to next doc');
       };
 
       # Skip to next document
@@ -193,21 +190,15 @@
     $queue->insert([$rank, 0, $bundle, $bundle->matches]) if $bundle;
 
     if (DEBUG) {
-      print_log('p_sort', 'Move to next position');
+      print_log('sort', 'Move to next position');
     };
 
-    # Move to next match
-    $query->next;
+    # Move to next bundle
+    $query->next_bundle;
   };
 
-
   my $array = $queue->reverse_array;
-  print_log('p_sort', 'Get list ranking of ' . Dumper($array)) if DEBUG;
-
-  # Get the rank reference (old)
-  # TODO:
-  #   Remove!
-  $self->{stack} = [$array];
+  print_log('sort', 'Get list ranking of ' . Dumper($array)) if DEBUG;
 
   # Get the rank reference (new);
   $self->{new_sorted} = $array;
@@ -217,7 +208,7 @@
 
 # Move to the next item in the bundled document list
 # and create the next bundle
-sub next {
+sub next_bundle {
   my $self = shift;
 
   # Initialize query - this will do a full run on the first field level!
@@ -229,7 +220,7 @@
   #
   #    if (DEBUG) {
   #      print_log(
-  #        'p_sort',
+  #        'sort',
   #        'top_k ' . $self->{top_k} . ' is reached at position ' . $self->{pos}
   #      );
   #    };
@@ -240,7 +231,7 @@
 
   if ($self->{pos_in_sort} >= @{$self->{new_sorted}}) {
     if (DEBUG) {
-      print_log('p_sort', 'No more elements in the priority array');
+      print_log('sort', 'No more elements in the priority array');
     };
 
     $self->{current_bundle} = undef;
@@ -254,31 +245,35 @@
   my $top_bundle = $self->{new_sorted}->[$pos];
 
   if (DEBUG) {
-    print_log('p_sort', "Move to next bundle at $pos, which is " . $top_bundle->[VALUE]);
+    print_log('sort', "Move to next bundle at $pos, which is " . $top_bundle->[VALUE]);
   };
 
   my $rank = $top_bundle->[RANK];
 
+  if (DEBUG) {
+    print_log('sort', "Create new bundle from prio at $pos starting with " .
+                $top_bundle->[VALUE]->to_string);
+  };
+
   # TODO:
   #   Add criterion to bundle
   my $new_bundle = Krawfish::Posting::Bundle->new($top_bundle->[VALUE]);
 
-
-  if (DEBUG) {
-    print_log('p_sort', "Get first bundle at $pos from prio " .
-                $top_bundle->[VALUE]->to_string);
-  };
-
   $pos++;
   for (; $pos < @{$self->{new_sorted}}; $pos++) {
     $top_bundle = $self->{new_sorted}->[$pos];
 
     if (DEBUG) {
-      print_log('p_sort', 'Check follow up from prio: ' . $top_bundle->[VALUE]->to_string);
+      print_log('sort', 'Check follow up from prio: ' . $top_bundle->[VALUE]->to_string);
     };
 
     # Add element to bundle
     if ($rank == $top_bundle->[RANK]) {
+
+      if (DEBUG) {
+        print_log('sort', "Both items have the same rank $rank");
+      };
+
       unless ($new_bundle->add($top_bundle->[VALUE])) {
         warn 'Unable to add bundle to new bundle';
       };
@@ -293,6 +288,10 @@
   # Get position
   $self->{pos_in_sort} = $pos;
 
+  if (DEBUG) {
+    print_log('sort', 'Current bundle is now ' . $new_bundle->to_string);
+  };
+
   # Set current bundle
   $self->{current_bundle} = $new_bundle;
 
@@ -302,45 +301,43 @@
 };
 
 
-sub next_bundle {
-  ...
-};
-
 sub current_bundle {
-  my $self = shift;
-  return $self->{current_bundle};
+  $_[0]->{current_bundle};
 };
 
 
 # Return the current match
 sub current {
-
-  if (DEBUG) {
-    print_log('p_sort', 'Current posting is ' . $_[0]->{current}->to_string);
-  };
-
-  $_[0]->{current};
+  #  if (DEBUG) {
+  #    print_log('sort', 'Current posting is ' . $_[0]->{current}->to_string);
+  #  };
+  #
+  #  $_[0]->{current};
+  ...
 };
 
 
 # Get the current match object
 sub current_match {
-  my $self = shift;
-  my $current = $self->current or return;
-  my $match = Krawfish::Koral::Result::Match->new(
-    doc_id  => $current->doc_id,
-    start   => $current->start,
-    end     => $current->end,
-    payload => $current->payload,
-  );
-
-  if (DEBUG) {
-    print_log('p_sort', 'Current match is ' . $match->to_string);
-  };
-
-  return $match;
+  #  my $self = shift;
+  #  my $current = $self->current or return;
+  #  my $match = Krawfish::Koral::Result::Match->new(
+  #    doc_id  => $current->doc_id,
+  #    start   => $current->start,
+  #    end     => $current->end,
+  #    payload => $current->payload,
+  #  );
+  #
+  #  if (DEBUG) {
+  #    print_log('sort', 'Current match is ' . $match->to_string);
+  #  };
+  #
+  #  return $match;
+  ...
 };
 
+
+
 sub to_string {
   my $self = shift;
   my $str = 'sort(';
@@ -381,7 +378,7 @@
 
     if (DEBUG) {
       print_log(
-        'p_sort',
+        'sort',
         'top_k ' . $self->{top_k} . ' is reached at position ' . $self->{pos}
       );
     };
@@ -402,7 +399,7 @@
 
     if (DEBUG) {
       print_log(
-        'p_sort',
+        'sort',
         'There is already a match in [sorted]: ' . $self->{current}->to_string,
       );
     };
@@ -412,7 +409,7 @@
 
   # Nothing presorted
   elsif (DEBUG) {
-    print_log('p_sort', 'There is no match in [sorted]');
+    print_log('sort', 'There is no match in [sorted]');
   };
 
   # Get the list values
@@ -421,7 +418,7 @@
   # This will get the level from the stack
   my $level = $#{$stack};
 
-  print_log('p_sort', "Check stack on current level $level") if DEBUG;
+  print_log('sort', "Check stack on current level $level") if DEBUG;
 
   # If the current list is empty, remove from stack
   while (scalar @$stack && (
@@ -429,20 +426,20 @@
       !scalar(@{$stack->[$level]->[0]})
     )) {
 
-    print_log('p_sort', "Stack is empty at least on level $level") if DEBUG;
+    print_log('sort', "Stack is empty at least on level $level") if DEBUG;
 
     pop @$stack;
     $level--;
 
     #if (DEBUG) {
-    #  print_log('p_sort', "Stack is reduced to level $level with " . Dumper($stack));
+    #  print_log('sort', "Stack is reduced to level $level with " . Dumper($stack));
     #};
   };
 
   # There is nothing to sort further
   unless (scalar @$stack) {
 
-    print_log('p_sort', 'There is nothing to sort further') if DEBUG;
+    print_log('sort', 'There is nothing to sort further') if DEBUG;
 
     $self->{current} = undef;
     return;
@@ -464,7 +461,7 @@
 
     if (DEBUG) {
       print_log(
-        'p_sort',
+        'sort',
         "Found $same matches at first node",
         "  on level $level in " . _string_array($stack->[$level])
       );
@@ -473,7 +470,7 @@
     # Get the identical elements from the list
     my @presort = splice(@{$stack->[$level]}, 0, $same - 1);
 
-    print_log('p_sort', 'Presort array is ' . _string_array(\@presort)) if DEBUG;
+    print_log('sort', 'Presort array is ' . _string_array(\@presort)) if DEBUG;
 
     # TODO:
     #   Push presort on the stack!
@@ -488,7 +485,7 @@
     #my ($field, $desc) = @{$self->{fields}->[$level + 1]};
 
     #if (DEBUG) {
-    #  print_log('p_sort', qq!Next Rank on field "$field"!);
+    #  print_log('sort', qq!Next Rank on field "$field"!);
     #};
 
     #$level++;
@@ -504,7 +501,7 @@
 
     if (DEBUG) {
       print_log(
-        'p_sort',
+        'sort',
         "Sorted array",
         "  on new level $level is " . _string_array($stack->[$level])
       );
@@ -514,13 +511,13 @@
   # There are matches on the list without identical ranks
 
 #  if (DEBUG) {
-#    print_log('p_sort', "Stack with level $level is " . Dumper($stack));
+#    print_log('sort', "Stack with level $level is " . Dumper($stack));
 #  };
 
   # Get the top list entry
   my $top = shift @{$stack->[$level]};
 
-  print_log('p_sort', 'Push value ' . $top->[VALUE]) if DEBUG;
+  print_log('sort', 'Push value ' . $top->[VALUE]) if DEBUG;
 
   # Push matches to result list
   push @{$self->{sorted}}, $top->[VALUE]->unbundle;
@@ -540,7 +537,7 @@
   my $self = shift;
 
   if (DEBUG) {
-    print_log('p_sort', 'Check for duplicates from index ' . $self->{pos});
+    print_log('sort', 'Check for duplicates from index ' . $self->{pos});
   };
 
   return $self->{list}->[$self->{pos}]->[1] || 1;
@@ -561,7 +558,7 @@
   warn 'DEPRECATED';
 
   if (DEBUG) {
-    print_log('p_sort', 'Heapsort list of length ' . scalar(@$sub_list) .
+    print_log('sort', 'Heapsort list of length ' . scalar(@$sub_list) .
                 qq! by field "$field" for top_k = $top_k!);
   };
 
diff --git a/lib/Krawfish/Meta/Segment/Sort/Sample.pm b/lib/Krawfish/Meta/Segment/Sort/Sample.pm
index 3b4efa9..48466aa 100644
--- a/lib/Krawfish/Meta/Segment/Sort/Sample.pm
+++ b/lib/Krawfish/Meta/Segment/Sort/Sample.pm
@@ -9,6 +9,9 @@
 # Difference to random sorting is, this won't randomly sort all
 # results, making paging possible.
 
+# When the number of matches is known in advance, another
+# approach may be valid.
+
 # WARNING:
 #   Sorting does not respect current_match of any nested query,
 #   that's why sorting is always separated from enriching!
diff --git a/lib/Krawfish/Meta/Segment/SortAfter.pm b/lib/Krawfish/Meta/Segment/SortAfter.pm
index 8fd7e79..9fe18bd 100644
--- a/lib/Krawfish/Meta/Segment/SortAfter.pm
+++ b/lib/Krawfish/Meta/Segment/SortAfter.pm
@@ -1,5 +1,7 @@
 package Krawfish::Meta::Segment::SortAfter;
-use parent 'Krawfish::Meta';
+use parent 'Krawfish::Meta::Segment::Bundle';
+use Data::Dumper;
+use Krawfish::Log;
 use strict;
 use warnings;
 
@@ -10,9 +12,20 @@
 # (because all matches are already retrieved)
 # and immediately stops, when top_k is reached.
 #
-# That also means, this respects next etc.
-# and doesn't do all the work in init() phase.
+# That also means, this does all the work in next_bundle()
+# instead of init().
 
+
+use constant {
+  DEBUG   => 1,
+  RANK    => 0,
+  SAME    => 1,
+  VALUE   => 2,
+  MATCHES => 3
+};
+
+
+# Constructor
 sub new {
   my $class = shift;
   my %param = @_;
@@ -36,31 +49,41 @@
     $max_rank_ref
   );
 
+  if (DEBUG) {
+    print_log('sort_after', 'Initiate follow up sort');
+  };
+
   bless {
-    query   => $query,
-    segment => $segment,
-    top_k   => $top_k,
-    sort    => $sort,
-    count   => 0 # number of (bundled) matches already served
+    query    => $query,
+    segment  => $segment,
+    top_k    => $top_k,
+    sort     => $sort,
+    max_rank => $segment->max_rank,
+    count    => 0 # number of (bundled) matches already served
   }, $class;
 };
 
 
 # Move to next bundle
-sub next {
+sub next_bundle {
   my $self = shift;
 
+  if (DEBUG) {
+    print_log('sort_after', 'Move to next bundle');
+  };
+
+  $_[0]->{current_bundle} = undef;
+
   # Already served enough
   if ($self->{count} > $self->{top_k}) {
-    $_[0]->{current_bundle} = undef;
     return;
   }
 
   # There are sorted bundles on the buffer
-  if (@{$self->{buffer}}) {
+  if ($self->{buffer} && @{$self->{buffer}}) {
 
     # This is also a bundle
-    $self->{current_bundle} = shift @{$self->{buffer}};
+    $self->{current_bundle} = shift(@{$self->{buffer}})->[VALUE];
 
     # Move forward by the length of the bundle
     $self->{count} += $self->{current_bundle}->size;
@@ -70,14 +93,82 @@
   };
 
   # Get a new bundle from the nested query
-  if ($self->{query}->next) {
-    my $next_bundle = $self->{query}->current_bundle;
+  unless ($self->{query}->next_bundle) {
+    return;
+  };
 
-    # 1. Split the bundle
-    # 2. Sort
-    # 3. add sorting criterion
+  if (DEBUG) {
+    print_log('Get next bundle from ' . $self->{query}->to_string);
+  };
+
+  my $next_bundle = $self->{query}->current_bundle;
+
+  # Next bundle is already sorted
+  if ($next_bundle->size == 1) {
+    ($self->{current_bundle}) = $next_bundle->unbundle;
+    return 1;
+  };
+
+  # Sort next bundle
+
+  # This should probably check for a simpler sorting
+  # algorithm for small data sets
+  my $rank;
+  my $sort = $self->{sort};
+  my $max_rank_ref = \(my $max_rank = $self->{max_rank});
+
+  if (DEBUG) {
+    print_log('sort_after', 'Sort nested bundle');
+  };
+
+  # Create initial priority queue
+  my $queue = Krawfish::Util::PriorityQueue::PerDoc->new(
+    $self->{top_k} - $self->{count},
+    $max_rank_ref
+  );
+
+  # Unbundle bundle and go through matches
+  # TODO:
+  #   This should be an iterator
+  foreach my $posting ($next_bundle->unbundle) {
+
+    if (DEBUG) {
+      print_log('sort_after', 'Get next posting from ' . $self->{query}->to_string);
+    };
+
+    # Get stored rank
+    $rank = $sort->rank_for($posting->doc_id);
+
+    # TODO:
+    #   Support next_doc() and preview_doc_id()
+    #
+    #        if ($rank > $$max_rank_ref) {
+    #          # Document is irrelevant
+    #          $match = undef;
+    #
+    #          if (DEBUG) {
+    #            print_log('sort', 'Move to next doc');
+    #          };
+    #
+    #          # Skip to next document
+    #          $query->next_doc;
+    #          CORE::next;
+    #        };
+
+    $queue->insert([$rank, 0, $posting, $posting->matches]);
     # 4. Push to buffer
   };
+
+  my $array = $queue->reverse_array;
+
+  print_log('sort_after', 'Get list ranking of ' . Dumper($array)) if DEBUG;
+
+  # This is also a bundle
+  $self->{current_bundle} = shift(@{$array})->[VALUE];
+
+  print_log('sort_after', 'New current bundle is ' . $self->{current_bundle}->to_string) if DEBUG;
+
+  $self->{buffer} = $array;
 };