Added some notes regarding subterm sorting
Change-Id: I935efcfe519f2ff19ea497eb7712b5668e2c4411
diff --git a/lib/Krawfish/Meta/Cluster/Limit.pm b/lib/Krawfish/Meta/Cluster/Limit.pm
index e956e90..ac49c44 100644
--- a/lib/Krawfish/Meta/Cluster/Limit.pm
+++ b/lib/Krawfish/Meta/Cluster/Limit.pm
@@ -1,12 +1,66 @@
-package Krawfish::Meta::Cluster;
+package Krawfish::Meta::Limit;
+use parent 'Krawfish::Meta';
+use Krawfish::Log;
use strict;
use warnings;
+use constant DEBUG => 0;
+
+
+# Construct limitation collector
sub new {
my $class = shift;
bless {
- query => shift
+ query => shift,
+ start_index => shift,
+ items_per_page => shift // 0,
+ pos => 0
}, $class;
};
+
+# Next if in limited area
+sub next {
+ my $self = shift;
+ my $query = $self->{query};
+
+ # Move to start index
+ while ($self->{pos} < $self->{start_index}) {
+ $self->{pos}++;
+ $query->next or return;
+ print_log('limit', 'Ignore match at position ' . $self->{pos}) if DEBUG;
+ };
+
+ # Position is under limit
+ if (!$self->{items_per_page} ||
+ $self->{pos} < ($self->{start_index} + $self->{items_per_page})) {
+ $self->{pos}++;
+ print_log('limit', 'Collect match at position ' . $self->{pos}) if DEBUG;
+ return $query->next;
+ };
+ return;
+};
+
+
+# Get current element
+sub current {
+ $_[0]->{query}->current;
+};
+
+# May return a hash reference with information
+sub current_group {
+ ...
+};
+
+
+# Stringify collector
+sub to_string {
+ my $self = shift;
+ my $end = $self->{items_per_page} ? ($self->{start_index} + $self->{items_per_page}) : '';
+ my $str = 'resultLimit(';
+ $str .= '[' . $self->{start_index} . '-' . $end . ']:';
+ $str .= $self->{query}->to_string;
+ return $str . ')';
+};
+
1;
diff --git a/lib/Krawfish/Meta/Limit.pm b/lib/Krawfish/Meta/Limit.pm
deleted file mode 100644
index ac49c44..0000000
--- a/lib/Krawfish/Meta/Limit.pm
+++ /dev/null
@@ -1,66 +0,0 @@
-package Krawfish::Meta::Limit;
-use parent 'Krawfish::Meta';
-use Krawfish::Log;
-use strict;
-use warnings;
-
-use constant DEBUG => 0;
-
-
-# Construct limitation collector
-sub new {
- my $class = shift;
- bless {
- query => shift,
- start_index => shift,
- items_per_page => shift // 0,
- pos => 0
- }, $class;
-};
-
-
-# Next if in limited area
-sub next {
- my $self = shift;
- my $query = $self->{query};
-
- # Move to start index
- while ($self->{pos} < $self->{start_index}) {
- $self->{pos}++;
- $query->next or return;
- print_log('limit', 'Ignore match at position ' . $self->{pos}) if DEBUG;
- };
-
- # Position is under limit
- if (!$self->{items_per_page} ||
- $self->{pos} < ($self->{start_index} + $self->{items_per_page})) {
- $self->{pos}++;
- print_log('limit', 'Collect match at position ' . $self->{pos}) if DEBUG;
- return $query->next;
- };
- return;
-};
-
-
-# Get current element
-sub current {
- $_[0]->{query}->current;
-};
-
-# May return a hash reference with information
-sub current_group {
- ...
-};
-
-
-# Stringify collector
-sub to_string {
- my $self = shift;
- my $end = $self->{items_per_page} ? ($self->{start_index} + $self->{items_per_page}) : '';
- my $str = 'resultLimit(';
- $str .= '[' . $self->{start_index} . '-' . $end . ']:';
- $str .= $self->{query}->to_string;
- return $str . ')';
-};
-
-1;
diff --git a/lib/Krawfish/Meta/Segment/Enrich/SortCriterion.pm b/lib/Krawfish/Meta/Segment/Enrich/SortCriterion.pm
index 7f3bd1e..fefb287 100644
--- a/lib/Krawfish/Meta/Segment/Enrich/SortCriterion.pm
+++ b/lib/Krawfish/Meta/Segment/Enrich/SortCriterion.pm
@@ -1,3 +1,8 @@
+package Krawfish::Meta::Segment::Enrich::SortCriterion;
+use parent 'Krawfish::Meta';
+use warnings;
+use strict;
+
# Enrich an item with sort criteria.
# This is necessary to sort items beyond the segment.
# The problem with this enrichment is,
@@ -18,6 +23,51 @@
# 3. Go through all fields and collect values or term_ids
# 4. Create criterion vectors per match based on these information
#
+# But:
+# It may very well be possible to only enrich if required
+# on the node level.
+#
# On the node level, the relevant criteria (top_k) will be inflated,
-# taken the ordering into account (which mean following matches may
+# taken the ordering into account (which means following matches may
# have a lot of criteria in common.
+
+
+sub new {
+ my $class = shift;
+ bless {
+ query => shift,
+
+ # Store all criteria in sorted order,
+ # which may include terms and fields.
+ # This will also keep the direction
+ # and possibly the collation.
+ criteria => shift
+ }, $class
+};
+
+sub _init {
+ my $self = shift;
+
+ return if $self->{init}++;
+
+ # TODO:
+ # Go through all criteria and collect required field IDs.
+ # Bring required field IDs in order.
+ # Create an array for field_id => criterion_position to
+ # map the surface term to the criterion after fetching.
+ # Remember the criterion position for optional term sorting.
+};
+
+
+sub current_match {
+ # TODO:
+ # Create an empty list for sorting criteria.
+ # a) Retrieve for the document id all the relevant fields
+ # if there are fields to retrieve.
+ # Add in the position of the criteria list.
+ # b) The surface term is already retrieved and enriched.
+ # Add in the position of the criteria list.
+};
+
+
+1;
diff --git a/lib/Krawfish/Meta/Segment/Sort.pm b/lib/Krawfish/Meta/Segment/Sort.pm
index 7e9cb94..868fe45 100644
--- a/lib/Krawfish/Meta/Segment/Sort.pm
+++ b/lib/Krawfish/Meta/Segment/Sort.pm
@@ -27,8 +27,8 @@
# They will always be sorted at the end.
# TODO:
-# Ranks should respect the ranking mechanism of FieldsRank and
-# TermRank, where only even values are fine and odd values need
+# Ranks should respect the ranking mechanism of FieldsRan,
+# where only even values are fine and odd values need
# to be sorted in a separate step (this is still open for discussion).
# TODO:
@@ -63,8 +63,6 @@
# TODO:
# Check for mandatory parameters
#
- # TODO:
- # Check if query is a bundled query!
my $query = $param{query};
unless ($query->isa('Krawfish::Meta::Segment::Bundle')) {
diff --git a/lib/Krawfish/Meta/Segment/Sort/SubTerm.pm b/lib/Krawfish/Meta/Segment/Sort/SubTerm.pm
index a677f6f..e2e173b 100644
--- a/lib/Krawfish/Meta/Segment/Sort/SubTerm.pm
+++ b/lib/Krawfish/Meta/Segment/Sort/SubTerm.pm
@@ -3,14 +3,26 @@
use warnings;
# This will sort based on a pre-ranked subterm
+# or rather a subterm list for a class
+#
+# A given node-wide vector_ref can be used to limit
+# the list of terms to check.
+#
+# As classes are in order, a sortafter on subterms
+# for further classes are only relevant in case
+# there are matches with identical ranks on this class.
sub new {
my $class = shift;
+ # TODO:
+ # Possibly remember the collation
my $self = bless {
- index => shift,
- suffix => shift // 0,
+ index => shift,
+ suffix => shift // 0,
descending => shift // 0,
+ class => shift // 0,
+ max_rank_vector_ref => shift // []
}, $class;
# Get ranking
@@ -22,15 +34,27 @@
return $self;
};
-sub get {
- my $self = shift;
- my $subterm_id = shift;
+
+# Check for the rank of the match if it is smaller
+# than the given rank.
+sub rank_lt {
+ my ($self, $match) = shift;
+
+ # TODO:
+ # For the requested class(es),
+ # retrieve the subterm_ids.
+ # This is similar to Enrich::Snippet retrieval,
+ # as classes may have overlaps.
+ # go through all terms in either left-to-right (prefix)
+ # or right-to-left (suffix) order and rank as long as
+ # the terms are littler than the rank vector
+
my $rank;
if ($self->{suffix}) {
- $rank = $self->{dict}->prefix_rank_by_subterm_id($subterm_id);
+ $rank = $self->{dict}->suffix_rank_by_subterm_id($subterm_id);
}
else {
- $rank = $self->{dict}->suffix_rank_by_subterm_id($subterm_id);
+ $rank = $self->{dict}->prefix_rank_by_subterm_id($subterm_id);
};
# Revert if maximum rank is set