Added some notes regarding subterm sorting Change-Id: I935efcfe519f2ff19ea497eb7712b5668e2c4411

commit: 5238ce5b789ca1ff642190b3afe2d4442bd191eb [log] [tgz]
author: Akron <nils@diewald-online.de> Mon Oct 09 15:30:55 2017 +0200
committer: Akron <nils@diewald-online.de> Mon Oct 09 15:30:55 2017 +0200
tree: e4b3c723fa150f438dd051397488a9e06075bb16
parent: 6f5e5f8db1dc308b0807ca6d9858245f76c55580 [diff]
diff --git a/lib/Krawfish/Meta/Cluster/Limit.pm b/lib/Krawfish/Meta/Cluster/Limit.pm
index e956e90..ac49c44 100644
--- a/lib/Krawfish/Meta/Cluster/Limit.pm
+++ b/lib/Krawfish/Meta/Cluster/Limit.pm

@@ -1,12 +1,66 @@
-package Krawfish::Meta::Cluster;
+package Krawfish::Meta::Limit;
+use parent 'Krawfish::Meta';
+use Krawfish::Log;
 use strict;
 use warnings;
 
+use constant DEBUG => 0;
+
+
+# Construct limitation collector
 sub new {
   my $class = shift;
   bless {
-    query => shift
+    query => shift,
+    start_index => shift,
+    items_per_page => shift // 0,
+    pos => 0
   }, $class;
 };
 
+
+# Next if in limited area
+sub next {
+  my $self = shift;
+  my $query = $self->{query};
+
+  # Move to start index
+  while ($self->{pos} < $self->{start_index}) {
+    $self->{pos}++;
+    $query->next or return;
+    print_log('limit', 'Ignore match at position ' . $self->{pos}) if DEBUG;
+  };
+
+  # Position is under limit
+  if (!$self->{items_per_page} ||
+        $self->{pos} < ($self->{start_index} + $self->{items_per_page})) {
+    $self->{pos}++;
+    print_log('limit', 'Collect match at position ' . $self->{pos}) if DEBUG;
+    return $query->next;
+  };
+  return;
+};
+
+
+# Get current element
+sub current {
+  $_[0]->{query}->current;
+};
+
+# May return a hash reference with information
+sub current_group {
+  ...
+};
+
+
+# Stringify collector
+sub to_string {
+  my $self = shift;
+  my $end = $self->{items_per_page} ? ($self->{start_index} + $self->{items_per_page}) : '';
+  my $str = 'resultLimit(';
+  $str .= '[' . $self->{start_index} . '-' . $end . ']:';
+  $str .= $self->{query}->to_string;
+  return $str . ')';
+};
+
 1;

diff --git a/lib/Krawfish/Meta/Limit.pm b/lib/Krawfish/Meta/Limit.pm
deleted file mode 100644
index ac49c44..0000000
--- a/lib/Krawfish/Meta/Limit.pm
+++ /dev/null

@@ -1,66 +0,0 @@
-package Krawfish::Meta::Limit;
-use parent 'Krawfish::Meta';
-use Krawfish::Log;
-use strict;
-use warnings;
-
-use constant DEBUG => 0;
-
-
-# Construct limitation collector
-sub new {
-  my $class = shift;
-  bless {
-    query => shift,
-    start_index => shift,
-    items_per_page => shift // 0,
-    pos => 0
-  }, $class;
-};
-
-
-# Next if in limited area
-sub next {
-  my $self = shift;
-  my $query = $self->{query};
-
-  # Move to start index
-  while ($self->{pos} < $self->{start_index}) {
-    $self->{pos}++;
-    $query->next or return;
-    print_log('limit', 'Ignore match at position ' . $self->{pos}) if DEBUG;
-  };
-
-  # Position is under limit
-  if (!$self->{items_per_page} ||
-        $self->{pos} < ($self->{start_index} + $self->{items_per_page})) {
-    $self->{pos}++;
-    print_log('limit', 'Collect match at position ' . $self->{pos}) if DEBUG;
-    return $query->next;
-  };
-  return;
-};
-
-
-# Get current element
-sub current {
-  $_[0]->{query}->current;
-};
-
-# May return a hash reference with information
-sub current_group {
-  ...
-};
-
-
-# Stringify collector
-sub to_string {
-  my $self = shift;
-  my $end = $self->{items_per_page} ? ($self->{start_index} + $self->{items_per_page}) : '';
-  my $str = 'resultLimit(';
-  $str .= '[' . $self->{start_index} . '-' . $end . ']:';
-  $str .= $self->{query}->to_string;
-  return $str . ')';
-};
-
-1;

diff --git a/lib/Krawfish/Meta/Segment/Enrich/SortCriterion.pm b/lib/Krawfish/Meta/Segment/Enrich/SortCriterion.pm
index 7f3bd1e..fefb287 100644
--- a/lib/Krawfish/Meta/Segment/Enrich/SortCriterion.pm
+++ b/lib/Krawfish/Meta/Segment/Enrich/SortCriterion.pm

@@ -1,3 +1,8 @@
+package Krawfish::Meta::Segment::Enrich::SortCriterion;
+use parent 'Krawfish::Meta';
+use warnings;
+use strict;
+
 # Enrich an item with sort criteria.
 # This is necessary to sort items beyond the segment.
 # The problem with this enrichment is,
@@ -18,6 +23,51 @@
 # 3. Go through all fields and collect values or term_ids
 # 4. Create criterion vectors per match based on these information
 #
+# But:
+# It may very well be possible to only enrich if required
+# on the node level.
+#
 # On the node level, the relevant criteria (top_k) will be inflated,
-# taken the ordering into account (which mean following matches may
+# taken the ordering into account (which means following matches may
 # have a lot of criteria in common.
+
+
+sub new {
+  my $class = shift;
+  bless {
+    query => shift,
+
+    # Store all criteria in sorted order,
+    # which may include terms and fields.
+    # This will also keep the direction
+    # and possibly the collation.
+    criteria => shift
+  }, $class
+};
+
+sub _init {
+  my $self = shift;
+
+  return if $self->{init}++;
+
+  # TODO:
+  #   Go through all criteria and collect required field IDs.
+  #   Bring required field IDs in order.
+  #   Create an array for field_id => criterion_position to
+  #   map the surface term to the criterion after fetching.
+  #   Remember the criterion position for optional term sorting.
+};
+
+
+sub current_match {
+  # TODO:
+  #   Create an empty list for sorting criteria.
+  #   a) Retrieve for the document id all the relevant fields
+  #      if there are fields to retrieve.
+  #      Add in the position of the criteria list.
+  #   b) The surface term is already retrieved and enriched.
+  #      Add in the position of the criteria list.
+};
+
+
+1;

diff --git a/lib/Krawfish/Meta/Segment/Sort.pm b/lib/Krawfish/Meta/Segment/Sort.pm
index 7e9cb94..868fe45 100644
--- a/lib/Krawfish/Meta/Segment/Sort.pm
+++ b/lib/Krawfish/Meta/Segment/Sort.pm

@@ -27,8 +27,8 @@
 #   They will always be sorted at the end.
 
 # TODO:
-#   Ranks should respect the ranking mechanism of FieldsRank and
-#   TermRank, where only even values are fine and odd values need
+#   Ranks should respect the ranking mechanism of FieldsRan,
+#   where only even values are fine and odd values need
 #   to be sorted in a separate step (this is still open for discussion).
 
 # TODO:
@@ -63,8 +63,6 @@
   # TODO:
   #   Check for mandatory parameters
   #
-  # TODO:
-  #   Check if query is a bundled query!
   my $query    = $param{query};
 
   unless ($query->isa('Krawfish::Meta::Segment::Bundle')) {

diff --git a/lib/Krawfish/Meta/Segment/Sort/SubTerm.pm b/lib/Krawfish/Meta/Segment/Sort/SubTerm.pm
index a677f6f..e2e173b 100644
--- a/lib/Krawfish/Meta/Segment/Sort/SubTerm.pm
+++ b/lib/Krawfish/Meta/Segment/Sort/SubTerm.pm

@@ -3,14 +3,26 @@
 use warnings;
 
 # This will sort based on a pre-ranked subterm
+# or rather a subterm list for a class
+#
+# A given node-wide vector_ref can be used to limit
+# the list of terms to check.
+#
+# As classes are in order, a sortafter on subterms
+# for further classes are only relevant in case
+# there are matches with identical ranks on this class.
 
 sub new {
   my $class = shift;
 
+  # TODO:
+  #   Possibly remember the collation
   my $self = bless {
-    index => shift,
-    suffix => shift // 0,
+    index      => shift,
+    suffix     => shift // 0,
     descending => shift // 0,
+    class      => shift // 0,
+    max_rank_vector_ref => shift // []
   }, $class;
 
   # Get ranking
@@ -22,15 +34,27 @@
   return $self;
 };
 
-sub get {
-  my $self = shift;
-  my $subterm_id = shift;
+
+# Check for the rank of the match if it is smaller
+# than the given rank.
+sub rank_lt {
+  my ($self, $match) = shift;
+
+  # TODO:
+  #   For the requested class(es),
+  #   retrieve the subterm_ids.
+  #   This is similar to Enrich::Snippet retrieval,
+  #   as classes may have overlaps.
+  #   go through all terms in either left-to-right (prefix)
+  #   or right-to-left (suffix) order and rank as long as
+  #   the terms are littler than the rank vector
+
   my $rank;
   if ($self->{suffix}) {
-    $rank = $self->{dict}->prefix_rank_by_subterm_id($subterm_id);
+    $rank = $self->{dict}->suffix_rank_by_subterm_id($subterm_id);
   }
   else {
-    $rank = $self->{dict}->suffix_rank_by_subterm_id($subterm_id);
+    $rank = $self->{dict}->prefix_rank_by_subterm_id($subterm_id);
   };
 
   # Revert if maximum rank is set
commit	5238ce5b789ca1ff642190b3afe2d4442bd191eb	[log] [tgz]
author	Akron <nils@diewald-online.de>	Mon Oct 09 15:30:55 2017 +0200
committer	Akron <nils@diewald-online.de>	Mon Oct 09 15:30:55 2017 +0200
tree	e4b3c723fa150f438dd051397488a9e06075bb16
parent	6f5e5f8db1dc308b0807ca6d9858245f76c55580 [diff]