Improve composition on koral compilation
Change-Id: Iadb072c2b7f83656a1afcf89e2fb49a11c7026ca
diff --git a/lib/Krawfish/Compile.pm b/lib/Krawfish/Compile.pm
index 2f2c7f0..8efd01f 100644
--- a/lib/Krawfish/Compile.pm
+++ b/lib/Krawfish/Compile.pm
@@ -1,13 +1,32 @@
package Krawfish::Compile;
-use parent 'Krawfish::Query';
use Krawfish::Koral::Result::Match;
use Krawfish::Koral::Result;
use Krawfish::Log;
+use Role::Tiny;
use strict;
use warnings;
+# TODO:
+# It may be better to use Krawfish::Corpus instead
+#
+with 'Krawfish::Query';
+
+requires qw/current_match
+ match_from_query
+ compile
+ result/;
+
+# TODO:
+# result() should be in a separate
+# interface, so it is
+# usable in Aggregation::Base as well.
+
# Krawfish::Compile is the base class for all Compile queries.
+# TODO:
+# It may be beneficial to have group, aggregation, sort etc.
+# queries on the root level instead of the intermediate
+# compile level
use constant DEBUG => 0;
@@ -115,15 +134,6 @@
};
-# Shorthand for "search through"
-sub finalize {
-
- warn 'DEPRECATED';
- while ($_[0]->next) {};
- return $_[0];
-};
-
-
# Get result object
sub result {
my $self = shift;
diff --git a/lib/Krawfish/Compile/Cluster/Limit.pm b/lib/Krawfish/Compile/Cluster/Limit.pm
index ca51453..d7aa71a 100644
--- a/lib/Krawfish/Compile/Cluster/Limit.pm
+++ b/lib/Krawfish/Compile/Cluster/Limit.pm
@@ -1,8 +1,11 @@
package Krawfish::Compile::Cluster::Limit;
-use parent 'Krawfish::Compile';
use Krawfish::Log;
use strict;
use warnings;
+use Role::Tiny;
+
+with 'Krawfish::Compile';
+
use constant DEBUG => 0;
diff --git a/lib/Krawfish/Compile/Node.pm b/lib/Krawfish/Compile/Node.pm
index 4a1adf3..c148378 100644
--- a/lib/Krawfish/Compile/Node.pm
+++ b/lib/Krawfish/Compile/Node.pm
@@ -1,6 +1,7 @@
package Krawfish::Compile::Node;
use strict;
use warnings;
+use Role::Tiny;
# Create a class for result aggregation on the node level.
diff --git a/lib/Krawfish/Compile/Node/Aggregate.pm b/lib/Krawfish/Compile/Node/Aggregate.pm
index cc6ffe9..dce5624 100644
--- a/lib/Krawfish/Compile/Node/Aggregate.pm
+++ b/lib/Krawfish/Compile/Node/Aggregate.pm
@@ -1,7 +1,9 @@
package Krawfish::Compile::Node::Aggregate;
-use parent 'Krawfish::Compile::Node';
use strict;
use warnings;
+use Role::Tiny;
+
+with 'Krawfish::Compile::Node';
# TODO:
# Implement the aggregate() method on all Node::Aggregate::*
diff --git a/lib/Krawfish/Compile/Node/Enrich/Fields.pm b/lib/Krawfish/Compile/Node/Enrich/Fields.pm
index b2f53f0..5f96229 100644
--- a/lib/Krawfish/Compile/Node/Enrich/Fields.pm
+++ b/lib/Krawfish/Compile/Node/Enrich/Fields.pm
@@ -1,5 +1,4 @@
package Krawfish::Compile::Node::Enrich::Fields;
-use parent 'Krawfish::Query';
use Krawfish::Util::String qw/squote/;
use strict;
use warnings;
diff --git a/lib/Krawfish/Compile/Segment/Aggregate.pm b/lib/Krawfish/Compile/Segment/Aggregate.pm
index 196693f..c40d7b2 100644
--- a/lib/Krawfish/Compile/Segment/Aggregate.pm
+++ b/lib/Krawfish/Compile/Segment/Aggregate.pm
@@ -1,8 +1,10 @@
package Krawfish::Compile::Segment::Aggregate;
-use parent 'Krawfish::Compile';
use Krawfish::Log;
use strict;
use warnings;
+use Role::Tiny;
+
+with 'Krawfish::Compile';
use constant DEBUG => 0;
@@ -10,6 +12,16 @@
# per match.
# TODO:
+# It may be necessary to introduce an "AggregateOnCorpus"
+# mechanism, that first wraps the corpus before filtering.
+# This - however - will require the corpus being referenced
+# so aggregation is not done multiple times.
+# This is necessary, e.g., to aggregate the number of tokens
+# in a corpus independent of the matches in this corpus.
+# A value, relevant to compute t-score or mi.
+# See http://lingua.mtsu.edu/chinese-computing/docs/tscore.html
+
+# TODO:
# See https://www.elastic.co/guide/en/
# elasticsearch/reference/current/
# search-aggregations.html
diff --git a/lib/Krawfish/Compile/Segment/Aggregate/Base.pm b/lib/Krawfish/Compile/Segment/Aggregate/Base.pm
index 2edddfa..3aa1fe1 100644
--- a/lib/Krawfish/Compile/Segment/Aggregate/Base.pm
+++ b/lib/Krawfish/Compile/Segment/Aggregate/Base.pm
@@ -1,36 +1,19 @@
package Krawfish::Compile::Segment::Aggregate::Base;
use strict;
use warnings;
+use Role::Tiny;
-# Does not need anything in the object
-sub new {
- my $class = shift;
- bless \(my $self = ''), $class;
-};
-
-# Per default do nothing
-sub each_doc {
-};
-
-# Per default do nothing
-sub each_match {
-};
+requires qw/each_doc
+ each_match
+ on_finish
+ result
+ to_string/;
# Per default do nothing
sub on_finish {
+ $_[0];
};
-# Not implemented on base
-sub to_string {
- ...
-};
-
-sub collection {
- warn 'DEPRECATED';
- ...
-};
-
-
# Get result object
sub result {
my $self = shift;
@@ -38,7 +21,7 @@
$self->{result} = shift;
return $self;
};
- $self->{result} //= Krawfish::Koral::Result->new;
+ # $self->{result} //= Krawfish::Koral::Result->new;
return $self->{result};
};
diff --git a/lib/Krawfish/Compile/Segment/Aggregate/Fields.pm b/lib/Krawfish/Compile/Segment/Aggregate/Fields.pm
index 1c85b18..ed96d86 100644
--- a/lib/Krawfish/Compile/Segment/Aggregate/Fields.pm
+++ b/lib/Krawfish/Compile/Segment/Aggregate/Fields.pm
@@ -1,10 +1,13 @@
package Krawfish::Compile::Segment::Aggregate::Fields;
-use parent 'Krawfish::Compile::Segment::Aggregate::Base';
use Krawfish::Koral::Result::Aggregate::Fields;
use Krawfish::Util::String qw/squote/;
use Krawfish::Log;
use strict;
use warnings;
+use Role::Tiny;
+
+with 'Krawfish::Compile::Segment::Aggregate::Base';
+
use constant DEBUG => 0;
@@ -112,19 +115,6 @@
};
-# Return result
-#sub result {
- # Return fields
- # Example structure for year
- # {
- # 1997 => [4, 67],
- # 1998 => [5, 89],
- # 1999 => [3, 20]
- # }
-# $_[0]->{result};
-#};
-
-
# Stringification
sub to_string {
return 'fields:' . join(',', map { '#' . $_ } @{$_[0]->{field_keys}});
diff --git a/lib/Krawfish/Compile/Segment/Aggregate/Frequencies.pm b/lib/Krawfish/Compile/Segment/Aggregate/Frequencies.pm
index 11f6c5a..cfc6087 100644
--- a/lib/Krawfish/Compile/Segment/Aggregate/Frequencies.pm
+++ b/lib/Krawfish/Compile/Segment/Aggregate/Frequencies.pm
@@ -1,9 +1,11 @@
package Krawfish::Compile::Segment::Aggregate::Frequencies;
-use parent 'Krawfish::Compile::Segment::Aggregate::Base';
use Krawfish::Koral::Result::Aggregate::Frequencies;
use Krawfish::Log;
use strict;
use warnings;
+use Role::Tiny;
+
+with 'Krawfish::Compile::Segment::Aggregate::Base';
# Count the frequencies of all matches of the query
# per doc and per match.
@@ -53,12 +55,6 @@
};
-# Return result object
-sub result {
- $_[0]->{result};
-};
-
-
# Stringification
sub to_string {
'freq'
diff --git a/lib/Krawfish/Compile/Segment/Aggregate/Length.pm b/lib/Krawfish/Compile/Segment/Aggregate/Length.pm
index 1242841..6c7cfce 100644
--- a/lib/Krawfish/Compile/Segment/Aggregate/Length.pm
+++ b/lib/Krawfish/Compile/Segment/Aggregate/Length.pm
@@ -1,8 +1,10 @@
package Krawfish::Compile::Segment::Aggregate::Length;
-use parent 'Krawfish::Compile::Segment::Aggregate::Base';
use Krawfish::Log;
use strict;
use warnings;
+use Role::Tiny;
+
+with 'Krawfish::Compile::Segment::Aggregate::Base';
# This will check the hits length in subtokens -
# currently other word lengths are not supported
@@ -33,12 +35,6 @@
};
-# Return result object
-sub result {
- $_[0]->{result};
-};
-
-
# Stringification
sub to_string {
'length'
diff --git a/lib/Krawfish/Compile/Segment/Aggregate/TermFreq.pm b/lib/Krawfish/Compile/Segment/Aggregate/TermFreq.pm
index d76ebe3..51af73e 100644
--- a/lib/Krawfish/Compile/Segment/Aggregate/TermFreq.pm
+++ b/lib/Krawfish/Compile/Segment/Aggregate/TermFreq.pm
@@ -1,9 +1,11 @@
package Krawfish::Compile::Segment::Aggregate::TermFreq;
-use parent 'Krawfish::Compile::Segment::Aggregate::Base';
use Krawfish::Util::String qw/squote/;
use Krawfish::Log;
use strict;
use warnings;
+use Role::Tiny;
+
+with 'Krawfish::Compile::Segment::Aggregate::Base';
# Counts the frequency for each term in a TermFrequency
# query. This is necessary for co-occurrence search and the
@@ -59,6 +61,7 @@
$frew->{$term} = $self->{freq};
};
+
# Stringification
sub to_string {
return 'tfreq:' . squote($self->{term_query}->term);
diff --git a/lib/Krawfish/Compile/Segment/Aggregate/Values.pm b/lib/Krawfish/Compile/Segment/Aggregate/Values.pm
index 763f517..6ea144e 100644
--- a/lib/Krawfish/Compile/Segment/Aggregate/Values.pm
+++ b/lib/Krawfish/Compile/Segment/Aggregate/Values.pm
@@ -1,17 +1,16 @@
package Krawfish::Compile::Segment::Aggregate::Values;
-use parent 'Krawfish::Compile::Segment::Aggregate::Base';
use Krawfish::Koral::Result::Aggregate::Values;
use Krawfish::Log;
use strict;
use warnings;
+use Role::Tiny;
+
+with 'Krawfish::Compile::Segment::Aggregate::Base';
# TODO:
# Rename to FieldCalc or FieldSum
# TODO:
-# Support corpus classes
-
-# TODO:
# This is rather a group query or better:
# An aggregation on groups!
@@ -60,7 +59,7 @@
# Release for each doc
sub each_doc {
- my ($self, $current, $result) = @_;
+ my ($self, $current) = @_;
$self->_init;
@@ -98,12 +97,6 @@
};
-# Result
-sub result {
- $_[0]->{result};
-};
-
-
# Stringification
sub to_string {
return 'values:' . join(',', @{$_[0]->{field_ids}});
diff --git a/lib/Krawfish/Compile/Segment/Bundle.pm b/lib/Krawfish/Compile/Segment/Bundle.pm
index 6336697..2fdb0b4 100644
--- a/lib/Krawfish/Compile/Segment/Bundle.pm
+++ b/lib/Krawfish/Compile/Segment/Bundle.pm
@@ -1,9 +1,12 @@
package Krawfish::Compile::Segment::Bundle;
-use parent 'Krawfish::Compile';
+use Role::Tiny;
use Krawfish::Log;
use strict;
use warnings;
+requires qw/current_bundle
+ next_bundle/;
+
# This class represents bundles of postings
# (or bundles of bundles of postings) sorted by a certain criterion.
diff --git a/lib/Krawfish/Compile/Segment/BundleDocs.pm b/lib/Krawfish/Compile/Segment/BundleDocs.pm
index db13935..871d37c 100644
--- a/lib/Krawfish/Compile/Segment/BundleDocs.pm
+++ b/lib/Krawfish/Compile/Segment/BundleDocs.pm
@@ -1,9 +1,14 @@
package Krawfish::Compile::Segment::BundleDocs;
-use parent 'Krawfish::Compile::Segment::Bundle';
use Krawfish::Log;
use Krawfish::Posting::List;
use strict;
use warnings;
+use Role::Tiny;
+
+with 'Krawfish::Compile::Segment::Bundle';
+
+requires qw/next_bundle/;
+
# Bundle matches in the same document.
diff --git a/lib/Krawfish/Compile/Segment/Enrich/Context.pm b/lib/Krawfish/Compile/Segment/Enrich/Context.pm
deleted file mode 100644
index 09feebd..0000000
--- a/lib/Krawfish/Compile/Segment/Enrich/Context.pm
+++ /dev/null
@@ -1,113 +0,0 @@
-package Krawfish::Compile::Segment::Enrich::Context;
-use parent 'Krawfish::Compile';
-use Krawfish::Log;
-use strict;
-use warnings;
-
-use constant DEBUG => 0;
-
-
-# DEPRECATED!!!
-
-
-# This will add context (only surface forms) to each match
-
-# TODO:
-# Context always needs to be left AND right, because
-# at least the surrounding elements context will expand
-# both sides at the same time!
-
-
-sub new {
- my $class = shift;
- bless {
- forward_obj => shift,
- query => shift,
-
- # TODO:
- # Should support
- # - surrounding elements
- # - left elements / right elements
- # - left tokens / right tokens
- # - left characters / right characters
- contextualize => shift, # Accept context object
- match => undef
- }, $class;
-};
-
-# Initialize forward counter
-sub _init {
- return if $_[0]->{forward_pointer};
-
- my $self = shift;
-
- print_log('e_context', 'Create forward pointer') if DEBUG;
-
- # Load the ranked list - may be too large for memory!
- $self->{forward_pointer} = $self->{forward_obj}->pointer;
-};
-
-
-sub current_match {
- my $self = shift;
-
- $self->_init;
-
- # Match is already set
- if ($self->{match}) {
- if (DEBUG) {
- print_log(
- 'e_context',
- 'Match already defined ' . $self->{match}->to_string
- );
- };
- return $self->{match};
- };
-
- # TODO:
- # may simply be $self->{query}->current_match
- my $match = $self->match_from_query;
-
- # Get forward pointer
- my $forward = $self->{forward_pointer};
-
- my $doc_id = $match->doc_id;
-
- unless ($forward->skip_doc($doc_id) == $doc_id) {
-
- # TODO: This should never happen!
- return;
- };
-
- # Get the context
- # TODO:
- # This may be retrieved as part of the snippet!
- my $left_context = $self->{contextualize}->left_context($match, $forward);
- my $right_context = $self->{contextualize}->right_context($match, $forward);
-
- # Add context to match
- $match->add(
- Krawfish::Posting::Match::Context->new(
- left => $left_context,
- right => $right_context
- ));
-
- return $match;
-};
-
-
-# Next match
-sub next {
- my $self = shift;
- $self->{match} = undef;
- return $self->{query}->next;
-};
-
-
-sub to_string {
- my $str = 'enrichContext(' . $self->{contextualize}->to_string . ':';
- $str .= $_[0]->{query}->to_string;
- return $str . ')';
-};
-
-1;
diff --git a/lib/Krawfish/Compile/Segment/Enrich/CorpusClasses.pm b/lib/Krawfish/Compile/Segment/Enrich/CorpusClasses.pm
index efa9a9c..601c72c 100644
--- a/lib/Krawfish/Compile/Segment/Enrich/CorpusClasses.pm
+++ b/lib/Krawfish/Compile/Segment/Enrich/CorpusClasses.pm
@@ -1,8 +1,10 @@
package Krawfish::Compile::Segment::Enrich::CorpusClasses;
-use parent 'Krawfish::Compile';
use Krawfish::Koral::Result::Enrich::CorpusClasses;
use strict;
use warnings;
+use Role::Tiny;
+
+with 'Krawfish::Compile';
# Constructor
diff --git a/lib/Krawfish/Compile/Segment/Enrich/Fields.pm b/lib/Krawfish/Compile/Segment/Enrich/Fields.pm
index 8cb14f0..65e1240 100644
--- a/lib/Krawfish/Compile/Segment/Enrich/Fields.pm
+++ b/lib/Krawfish/Compile/Segment/Enrich/Fields.pm
@@ -1,9 +1,11 @@
package Krawfish::Compile::Segment::Enrich::Fields;
-use parent 'Krawfish::Compile';
use Krawfish::Koral::Result::Enrich::Fields;
use Krawfish::Log;
use strict;
use warnings;
+use Role::Tiny;
+
+with 'Krawfish::Compile';
use constant DEBUG => 0;
diff --git a/lib/Krawfish/Compile/Segment/Enrich/Snippet.pm b/lib/Krawfish/Compile/Segment/Enrich/Snippet.pm
index ffae60b..5734a2b 100644
--- a/lib/Krawfish/Compile/Segment/Enrich/Snippet.pm
+++ b/lib/Krawfish/Compile/Segment/Enrich/Snippet.pm
@@ -1,12 +1,14 @@
package Krawfish::Compile::Segment::Enrich::Snippet;
use strict;
use warnings;
-use parent 'Krawfish::Compile';
use Krawfish::Koral::Result::Enrich::Snippet;
# use Krawfish::Compile::Segment::Enrich::Snippet::Highlights;
use Krawfish::Koral::Document::Stream;
use Krawfish::Koral::Document::Subtoken;
use Krawfish::Log;
+use Role::Tiny;
+
+with 'Krawfish::Compile';
use constant DEBUG => 1;
diff --git a/lib/Krawfish/Compile/Segment/Enrich/Snippet/Element/Pagebreak.pm b/lib/Krawfish/Compile/Segment/Enrich/Snippet/Element/Pagebreak.pm
index 5c4dd16..24a937a 100644
--- a/lib/Krawfish/Compile/Segment/Enrich/Snippet/Element/Pagebreak.pm
+++ b/lib/Krawfish/Compile/Segment/Enrich/Snippet/Element/Pagebreak.pm
@@ -1,7 +1,9 @@
package Krawfish::Compile::Segment::Enrich::Snippet::Element::Pagebreak;
-use parent 'Krawfish::Compile::Segment::Enrich::Snippet::Element';
use strict;
use warnings;
+use Role::Tiny;
+
+with 'Krawfish::Compile::Segment::Enrich::Snippet::Element';
# TODO:
# Probably remove this support and
diff --git a/lib/Krawfish/Compile/Segment/Enrich/SortCriterion.pm b/lib/Krawfish/Compile/Segment/Enrich/SortCriterion.pm
index b388f0e..7f2e08d 100644
--- a/lib/Krawfish/Compile/Segment/Enrich/SortCriterion.pm
+++ b/lib/Krawfish/Compile/Segment/Enrich/SortCriterion.pm
@@ -1,7 +1,9 @@
package Krawfish::Compile::Segment::Enrich::SortCriterion;
-use parent 'Krawfish::Compile';
use warnings;
use strict;
+use Role::Tiny;
+
+with 'Krawfish::Compile';
# Enrich an item with sort criteria.
# This is necessary to sort items beyond the segment.
diff --git a/lib/Krawfish/Compile/Segment/Enrich/Terms.pm b/lib/Krawfish/Compile/Segment/Enrich/Terms.pm
index a5299a0..3c8cb82 100644
--- a/lib/Krawfish/Compile/Segment/Enrich/Terms.pm
+++ b/lib/Krawfish/Compile/Segment/Enrich/Terms.pm
@@ -1,9 +1,12 @@
package Krawfish::Compile::Segment::Enrich::Terms;
use Krawfish::Koral::Result::Enrich::Terms;
-use parent 'Krawfish::Compile';
use Krawfish::Log;
use strict;
use warnings;
+use Role::Tiny;
+
+with 'Krawfish::Compile';
+
# TODO:
# Potentially rename to ::Terms! or ::Classes!
diff --git a/lib/Krawfish/Compile/Segment/Exist.pm b/lib/Krawfish/Compile/Segment/Exist.pm
index 4b02063..a1c425d 100644
--- a/lib/Krawfish/Compile/Segment/Exist.pm
+++ b/lib/Krawfish/Compile/Segment/Exist.pm
@@ -1,7 +1,10 @@
package Krawfish::Compile::Segment::Exist;
-use parent 'Krawfish::Compile';
use strict;
use warnings;
+use Role::Tiny;
+
+with 'Krawfish::Compile';
+
# Check, if a certain query results in at least one single
# posting.
diff --git a/lib/Krawfish/Compile/Segment/Group.pm b/lib/Krawfish/Compile/Segment/Group.pm
index 7a663c8..20ea239 100644
--- a/lib/Krawfish/Compile/Segment/Group.pm
+++ b/lib/Krawfish/Compile/Segment/Group.pm
@@ -1,10 +1,15 @@
package Krawfish::Compile::Segment::Group;
-use parent 'Krawfish::Compile';
use Krawfish::Log;
use strict;
use warnings;
+use Role::Tiny;
-use constant DEBUG => 1;
+with 'Krawfish::Compile';
+
+requires qw/group/;
+
+use constant DEBUG => 0;
+
# Override to compile data
sub compile {
@@ -48,26 +53,14 @@
};
-
-# Get collection
-sub collection {
- warn 'DEPRECATED';
- $_[0]->{result};
-};
-
-
-# Get collection
+# Get group
+# TODO:
+# rename to group_result
sub group {
$_[0]->{group};
};
-# Get collection
-# sub result {
-# $_[0]->{result};
-# };
-
-
# Get current posting
sub current {
return $_[0]->{query}->current;
diff --git a/lib/Krawfish/Compile/Segment/Group/ClassFrequencies.pm b/lib/Krawfish/Compile/Segment/Group/ClassFrequencies.pm
index 529267f..57bbe0b 100644
--- a/lib/Krawfish/Compile/Segment/Group/ClassFrequencies.pm
+++ b/lib/Krawfish/Compile/Segment/Group/ClassFrequencies.pm
@@ -1,9 +1,11 @@
package Krawfish::Compile::Segment::Group::ClassFrequencies;
-use parent 'Krawfish::Compile';
use Krawfish::Koral::Result::Group::ClassFrequencies;
use Krawfish::Log;
use strict;
use warnings;
+use Role::Tiny;
+
+with 'Krawfish::Compile::Segment::Group';
use constant DEBUG => 0;
diff --git a/lib/Krawfish/Compile/Segment/Group/Fields.pm b/lib/Krawfish/Compile/Segment/Group/Fields.pm
index 4fa653c..2ec4f8a 100644
--- a/lib/Krawfish/Compile/Segment/Group/Fields.pm
+++ b/lib/Krawfish/Compile/Segment/Group/Fields.pm
@@ -1,10 +1,12 @@
package Krawfish::Compile::Segment::Group::Fields;
-use parent 'Krawfish::Compile::Segment::Group';
use Krawfish::Koral::Result::Group::Fields;
use Krawfish::Util::Constants qw/NOMOREDOCS/;
use Krawfish::Log;
use strict;
use warnings;
+use Role::Tiny;
+
+with 'Krawfish::Compile::Segment::Group';
use constant DEBUG => 1;
diff --git a/lib/Krawfish/Compile/Segment/Group/Spans.pm b/lib/Krawfish/Compile/Segment/Group/Spans.pm
index 7085162..7279e68 100644
--- a/lib/Krawfish/Compile/Segment/Group/Spans.pm
+++ b/lib/Krawfish/Compile/Segment/Group/Spans.pm
@@ -1,8 +1,10 @@
package Krawfish::Compile::Segment::Group::Spans;
-use parent 'Krawfish::Compile';
use Krawfish::Log;
use strict;
use warnings;
+use Role::Tiny;
+
+with 'Krawfish::Compile::Segment::Group';
# This may be generalizable, but for the moment
# It should make it possible to group the span positions
diff --git a/lib/Krawfish/Compile/Segment/Group/TermExistence.pm b/lib/Krawfish/Compile/Segment/Group/TermExistence.pm
index 009c6ed..8736432 100644
--- a/lib/Krawfish/Compile/Segment/Group/TermExistence.pm
+++ b/lib/Krawfish/Compile/Segment/Group/TermExistence.pm
@@ -1,7 +1,9 @@
package Krawfish::Compile::Segment::Group::TermExistence;
-use parent 'Krawfish::Compile';
use strict;
use warnings;
+use Role::Tiny;
+
+with 'Krawfish::Compile::Segment::Group';
# The query works similar to Or-query, but only accepts term ids.
diff --git a/lib/Krawfish/Compile/Segment/Sort.pm b/lib/Krawfish/Compile/Segment/Sort.pm
index 825399a..1ba5613 100644
--- a/lib/Krawfish/Compile/Segment/Sort.pm
+++ b/lib/Krawfish/Compile/Segment/Sort.pm
@@ -1,5 +1,4 @@
package Krawfish::Compile::Segment::Sort;
-use parent 'Krawfish::Compile::Segment::Bundle';
use Krawfish::Util::String qw/squote/;
use Krawfish::Util::PriorityQueue::PerDoc;
use Krawfish::Koral::Result::Match;
@@ -8,6 +7,10 @@
use Data::Dumper;
use strict;
use warnings;
+use Role::Tiny;
+
+with 'Krawfish::Compile';
+with 'Krawfish::Compile::Segment::BundleDocs';
# This is the general sorting implementation based on ranks.
#
@@ -18,6 +21,9 @@
# during search in a sort filter.
# TODO:
+# Split up the roles for better compositionality
+
+# TODO:
# Currently this is limited to fields and works different to subterms.
# So this may need to be renamed to Sort/ByField and Sort/ByFieldAfter.
@@ -25,6 +31,11 @@
# It's possible that fields return a rank of 0, indicating that
# the field does not exist for the document.
# They will always be sorted at the end.
+# In that case these fields have to be looked up, in case they are
+# potentially in the result set (meaning they are ranked before/after
+# the last accepted rank field). If so, they need to be remembered.
+# After a sort turn, the non-ranked fields are sorted in the ranked
+# fields. The field can be reranked any time.
# TODO:
# Ranks should respect the ranking mechanism of FieldsRan,
@@ -65,7 +76,7 @@
#
my $query = $param{query};
- unless ($query->isa('Krawfish::Compile::Segment::Bundle')) {
+ unless (Role::Tiny::does_role($query, 'Krawfish::Compile::Segment::Bundle')) {
warn 'The query is no bundled query';
return;
};
@@ -365,6 +376,7 @@
};
+# Stringification
sub to_string {
my $self = shift;
my $str = 'sort(';
diff --git a/lib/Krawfish/Compile/Segment/Sort/Filter.pm b/lib/Krawfish/Compile/Segment/Sort/Filter.pm
deleted file mode 100644
index 9f723d9..0000000
--- a/lib/Krawfish/Compile/Segment/Sort/Filter.pm
+++ /dev/null
@@ -1,122 +0,0 @@
-package Krawfish::Compile::Segment::Sort::Filter;
-use parent 'Krawfish::Corpus';
-use Krawfish::Log;
-use strict;
-use warnings;
-
-warn 'NOT USED YET';
-
-use constant DEBUG => 0;
-
-# This is a corpus query implementation
-#
-# TODO: Better move to Krawfish::Corpus
-
-sub new {
- my $class = shift;
- my %param = @_;
-
- my $ranking = $param{index}->fields->ranked_by($param{field});
- my $max = $ranking->max if $param{desc};
-
- bless {
- query => $param{query},
- max_rank_ref => $param{max_rank_ref},
- field => $param{field},
- desc => $param{desc},
- ranking => $ranking,
- max => $max,
- init => 0
- }, $class;
-};
-
-
-# Forward to next document
-sub next {
- my $self = shift;
-
- my $query = $self->{query};
-
- # Get next document
- while ($query->next) {
-
- # Check object
- return 1 if $self->_check;
- };
-
- # No next
- return;
-};
-
-
-# Check the document id for the rank
-sub _check {
- my $self = shift;
-
- # Maximum rank reference
- my $max_rank_ref = $self->{max_rank_ref};
-
- # Get the current doc_id
- my $query = $self->{query};
- my $current = $query->current;
- my $doc_id = $current->doc_id;
-
- # Get rank for field
- my $rank = $self->{ranking}->get($doc_id);
-
- # Invert rank if descending field is required
- $rank = $self->{max} - $rank if $self->{max};
-
- if (DEBUG) {
- print_log('vc_sort_filter', 'Current posting is ' . $current->to_string);
- };
-
- # Rank is smaller then required
- if ($rank <= $$max_rank_ref) {
-
- # Document is fine
- $self->{current} = $current;
- return 1;
- };
-
- if (DEBUG) {
- print_log('vc_sort_filter', $current->to_string . ' is filtered out');
- };
-
- $self->{current} = undef;
- return;
-};
-
-
-# Get current document
-sub current {
- $_[0]->{current};
-};
-
-
-# Skip to the relevant document
-sub skip_doc {
- my ($self, $doc_id) = @_;
-
- my $query = $self->{query};
-
- # Skip the document
- if ($query->skip_doc($doc_id)) {
-
- # Return the document id, if it matches
- return $doc_id if $query->_check;
-
- # Get the next matching element
- if ($self->next) {
-
- # return the document id
- return $self->{current}->doc_id;
- };
- };
-
- # Fail
- return;
-};
-
-
-1;
diff --git a/lib/Krawfish/Compile/Segment/Sort/Priority.pm b/lib/Krawfish/Compile/Segment/Sort/Priority.pm
deleted file mode 100644
index 926676d..0000000
--- a/lib/Krawfish/Compile/Segment/Sort/Priority.pm
+++ /dev/null
@@ -1,149 +0,0 @@
-package Krawfish::Compile::Segment::Sort::Priority;
-use Krawfish::Util::PriorityQueue;
-use Krawfish::Log;
-use Data::Dumper;
-use strict;
-use warnings;
-
-# WARNING!
-# THIS IS DEPRECATED IN FAVOR OF Segment::Sort and Segment::SortAfter
-
-
-use constant DEBUG => 0;
-
-sub new {
- my $class = shift;
- my %param = @_;
-
- my $query = $param{query};
- my $fields = $param{fields};
- my $field = $param{field};
- my $desc = $param{desc} ? 1 : 0;
- my $top_k = $param{top_k};
-
- my $max_rank_ref = $param{max_rank_ref};
-
- # Create priority queue
- my $queue = Krawfish::Util::PrioritySort->new($top_k, $max_rank_ref);
-
- return bless {
- field_rank => $fields->ranked_by($field),
- field => $field,
- desc => $desc,
- query => $query,
- queue => $queue,
- list => undef,
- pos => -1
- }, $class;
-};
-
-
-# Init queue
-sub _init {
- my $self = shift;
-
- return if $self->{init}++;
-
- my $field_rank = $self->{field_rank};
-
- my $max;
- # Get maximum rank if descending order
- if ($self->{desc}) {
- $max = $field_rank->max;
- };
-
- my $query = $self->{query};
- my $queue = $self->{queue};
- my $last_doc_id = -1;
- my $rank;
-
- # Pass through all queries
- while ($query->next) {
-
- if (DEBUG) {
- print_log('p_sort', 'Get next posting from ' . $query->to_string);
- };
-
- # Clone record
- my $record = $query->current->clone;
-
- # Fetch rank if doc_id changes
- if ($record->doc_id != $last_doc_id) {
-
- # Get stored rank
- $rank = $field_rank->get($record->doc_id);
-
- # Revert if maximum rank is set
- $rank = $max - $rank if $max;
- };
-
- if (DEBUG) {
- print_log('p_sort', 'Rank for doc id ' . $record->doc_id . " is $rank");
- };
-
- # Insert into priority queue
- $queue->insert([$rank, 0, $record]);
- };
-
- # Get the rank reference
- $self->{list} = $queue->reverse_array;
- $self->{length} = $queue->length;
-};
-
-
-# Get next element from list
-sub next {
- my $self = shift;
- $self->_init;
- if ($self->{pos}++ < $self->{length}) {
- return 1;
- };
- return;
-};
-
-
-# Get current element
-sub current {
- my $self = shift;
-
- # 2 is the index of the value
- if (DEBUG) {
- print_log('p_sort', 'Get match from index ' . $self->{pos});
- };
-
- return $self->{list}->[$self->{pos}]->[2];
-};
-
-
-# Return the number of duplicates of the current match
-sub duplicate_rank {
- my $self = shift;
-
- if (DEBUG) {
- print_log('p_sort', 'Check for duplicates from index ' . $self->{pos});
- };
-
- return $self->{list}->[$self->{pos}]->[1] || 1;
-};
-
-
-# This returns an additional data structure with key/value pairs
-# in sorted order to document the sort criteria.
-# Like: [[class_1 => 'cba'], [author => 'Goethe']]...
-# This is necessary for the cluster-merge-sort
-sub current_sort {
- ...
-};
-
-
-sub to_string {
- my $self = shift;
- my $str = 'prioritySort(';
- $str .= $self->{desc} ? '^' : 'v';
- $str .= ',' . $self->{field} . ':';
- $str .= $self->{query}->to_string;
- return $str . ')';
-};
-
-
-1;
diff --git a/lib/Krawfish/Compile/Segment/Sort/PriorityCascade.pm b/lib/Krawfish/Compile/Segment/Sort/PriorityCascade.pm
deleted file mode 100644
index 58806e4..0000000
--- a/lib/Krawfish/Compile/Segment/Sort/PriorityCascade.pm
+++ /dev/null
@@ -1,470 +0,0 @@
-package Krawfish::Compile::Segment::Sort::PriorityCascade;
-use parent 'Krawfish::Compile';
-use Krawfish::Util::String qw/squote/;
-use Krawfish::Util::PriorityQueue::PerDoc;
-use Krawfish::Koral::Result;
-use Krawfish::Posting::Bundle;
-use Krawfish::Log;
-use Data::Dumper;
-use strict;
-use warnings;
-
-# WARNING!
-# THIS IS DEPRECATED IN FAVOR OF Segment::Sort and Segment::SortAfter
-
-# This is only based on criteria that return ranks
-
-use constant {
- DEBUG => 0,
- RANK => 0,
- SAME => 1,
- VALUE => 2,
- MATCHES => 3
-};
-
-# TODO:
-# my $offset = $param{offset};
-# This may however not work in a multi-segment
-# or cluster scenario - so let's forget about it
-
-# TODO:
-# It's possible that fields return a rank of 0, indicating that
-# the field is not yet ranked.
-# In that case these fields have to be looked up, in case they are
-# potentially in the result set (meaning they are ranked before/after
-# the last accepted rank field). If so, they need to be remembered.
-# After a sort turn, the non-ranked fields are sorted in the ranked
-# fields. The field can be reranked any time.
-
-# TODO:
-# Ranks should respect the ranking mechanism of FieldsRank and
-# TermRank, where only even values are fine and odd values need
-# to be sorted in a separate step.
-
-sub new {
- my $class = shift;
- my %param = @_;
-
- # TODO:
- # Check for mandatory parameters
- my $query = $param{query};
-
- # This is the index element
- my $index = $param{index};
- my $top_k = $param{top_k};
-
- # This is the fields element
- # It has the structure [[field], [field, 1]]
- # where the second value is the descending marker
- my $fields = $param{fields};
- # TODO: Change to criterion!
-
- # For final field distinction, use unique field
- push @$fields, [$param{unique}];
-
- # The maximum ranking value may be used
- # by outside filters to know in advance,
- # if a document can't be part of the result set
- my $max_rank_ref;
- if (defined $param{max_rank_ref}) {
-
- # Get reference from definition
- $max_rank_ref = $param{max_rank_ref};
- }
- else {
-
- # Create a new reference
- $max_rank_ref = \(my $max_rank = $index->max_rank);
- };
-
- # Create initial priority queue
- my $queue = Krawfish::Util::PriorityQueue::PerDoc->new(
- $top_k,
- $max_rank_ref
- );
-
- # Construct
- return bless {
- fields => $fields,
- index => $index,
- top_k => $top_k,
- query => $query,
- queue => $queue,
- max_rank_ref => $max_rank_ref,
- stack => [], # All lists on a stack
- sorted => [],
- pos => 0
- }, $class;
-};
-
-
-# Initialize the sorting - this will do a full run!
-sub _init {
- my $self = shift;
-
- # Result already initiated
- return if $self->{init}++;
-
- my $query = $self->{query};
-
- # Get first sorting criterion
- my ($field, $desc) = @{$self->{fields}->[0]};
-
- # Get ranking
- my $ranking = $self->{index}->fields->ranked_by($field);
-
- # Get maximum rank if descending order
- my $max = $ranking->max if $desc;
-
- # Get maximum accepted rank from queue
- my $max_rank_ref = $self->{max_rank_ref};
-
- my $last_doc_id = -1;
- my $rank;
- my $queue = $self->{queue};
-
- # Store the last match buffered
- my $match;
-
- if (DEBUG) {
- print_log('p_sort', qq!Next Rank on field "$field"!);
- };
-
- # Pass through all queries
- while ($match || ($query->next && ($match = $query->current))) {
-
- if (DEBUG) {
- print_log('p_sort', 'Get next posting from ' . $query->to_string);
- };
-
- # Get stored rank
- $rank = $ranking->get($match->doc_id);
-
- # Revert if maximum rank is set
- $rank = $max - $rank if $max;
-
- if (DEBUG) {
- print_log('p_sort', 'Rank for doc id ' . $match->doc_id . " is $rank");
- };
-
- # Precheck if the match is relevant
- if ($rank <= $$max_rank_ref) {
-
- # Create new bundle of matches
- my $bundle = Krawfish::Posting::Bundle->new($match->clone);
-
- # Remember doc_id
- $last_doc_id = $match->doc_id;
- $match = undef;
-
- # Iterate over next queries
- while ($query->next) {
-
- # New match should join the bundle
- if ($query->current->doc_id == $last_doc_id) {
-
- # Add match to bundle
- $bundle->add($query->current);
- }
-
- # New match is new
- else {
-
- # Remember match for the next tome
- $match = $query->current;
- last;
- };
- };
-
- # Insert into priority queue
- $queue->insert([$rank, 0, $bundle, $bundle->length]) if $bundle;
- }
-
- # Document is irrelevant
- else {
- $match = undef;
- };
- };
-
- print_log('p_sort', 'Get list ranking') if DEBUG;
-
- # Get the rank reference
- $self->{stack} = [$queue->reverse_array];
-};
-
-
-# Move to the next item in the sorted list
-sub next {
- my $self = shift;
-
- if ($self->{pos}++ >= $self->{top_k}) {
-
- if (DEBUG) {
- print_log(
- 'p_sort',
- 'top_k ' . $self->{top_k} . ' is reached at position ' . $self->{pos}
- );
- };
-
- $self->{current} = undef;
- return;
- };
-
- # Initialize query - this will do a full run on the first field level!
- $self->_init;
-
- # There are sorted results in the result list
- if (scalar @{$self->{sorted}}) {
-
- # Make this current
- $self->{current} = shift @{$self->{sorted}};
-
- if (DEBUG) {
- print_log(
- 'p_sort',
- 'There is already a match in [sorted]: ' . $self->{current}->to_string,
- );
- };
-
- return 1;
- }
-
- # Nothing presorted
- elsif (DEBUG) {
- print_log('p_sort', 'There is no match in [sorted]');
- };
-
- # Get the list values
- my $stack = $self->{stack};
-
- # The result list is empty - sort next items
- # if ($self->{presorted}) {
- # };
-
- # This will get the level from the stack
- my $level = $#{$stack};
-
- print_log('p_sort', "Check stack on current level $level") if DEBUG;
-
- # If the current list is empty, remove from stack
- while (scalar @$stack && (
- !scalar(@{$stack->[$level]}) ||
- !scalar(@{$stack->[$level]->[0]})
- )) {
-
- print_log('p_sort', "Stack is empty at least on level $level") if DEBUG;
-
- pop @$stack;
- $level--;
-
- if (DEBUG) {
- print_log('p_sort', "Stack is reduced to level $level with " . Dumper($stack));
- };
- };
-
- # There is nothing to sort further
- unless (scalar @$stack) {
-
- print_log('p_sort', 'There is nothing to sort further') if DEBUG;
-
- $self->{current} = undef;
- return;
- };
-
- # while (my $same = $list->[0]->[SAME]) {
- # $list = $self->heap_sort();
- # };
-
- # TODO:
- # Depending on how many identical ranks exist,
- # here the next strategy should be chosen.
- # Either sort in place, or sort using heapsort again.
-
-
- # The first item in the current list has multiple identical ranks
- # As long as the first item in the list has duplicates,
- # order by the next level
- while ((my $same = ($stack->[$level]->[0]->[SAME] // 1)) > 1) {
-
- if (DEBUG) {
- print_log(
- 'p_sort',
- "Found $same matches at first node",
- " on level $level in " . _string_array($stack->[$level])
- );
- };
-
- # Get the identical elements from the list
- my @presort = splice(@{$stack->[$level]}, 0, $same - 1);
-
- print_log('p_sort', 'Presort array is ' . _string_array(\@presort)) if DEBUG;
- # TODO: Push presort on the stack!
-
- # This is the new top_k!
- # TODO: Check if this is really correct!
- my $top_k = $self->{top_k} - ($self->{pos} - 1);
-
- # Get next field to rank on level
- # level 0 is preinitialized, so it is one off
- my ($field, $desc) = @{$self->{fields}->[$level + 1]};
-
- if (DEBUG) {
- print_log('p_sort', qq!Next Rank on field "$field"!);
- };
-
- $level++;
-
- # TODO:
- # If the same count is smaller than X (at least top_k - pos)
- # do quicksort or something similar
- # if ($same < $top_k || $same < 128) {
- # }
- # else
- $stack->[$level] = $self->heap_sort($top_k, \@presort, $field, $desc);
- # };
-
- if (DEBUG) {
- print_log(
- 'p_sort',
- "Sorted array",
- " on new level $level is " . _string_array($stack->[$level])
- );
- };
- };
-
- # There are matches on the list without identical ranks
-
- if (DEBUG) {
- print_log('p_sort', "Stack with level $level is " . Dumper($stack));
- };
-
- # Get the top list entry
- my $top = shift @{$stack->[$level]};
-
- print_log('p_sort', 'Push value ' . $top->[VALUE]) if DEBUG;
-
- # Push matches to result list
- push @{$self->{sorted}}, $top->[VALUE]->unbundle;
-
- # Make the first match the current
- # TODO: Be aware! This is a BUNDLE!
- $self->{current} = shift @{$self->{sorted}};
- return 1;
-};
-
-
-sub _string_array {
- my $array = shift;
- my $str = '';
- foreach (@$array) {
- $str .= '[';
- $str .= 'R:' . $_->[RANK] . ';';
- $str .= ($_->[SAME] ? 'S:' . $_->[SAME] . ';' : '');
- $str .= ($_->[MATCHES] ? 'M:' . $_->[MATCHES] : '');
- $str .= ']';
- };
- return $str;
-};
-
-
-# Todo:
-# Accept an iterator, a ranking, and return an iterator
-sub heap_sort {
- my ($self, $top_k, $sub_list, $field, $desc) = @_;
-
- if (DEBUG) {
- print_log('p_sort', 'Heapsort list of length ' . scalar(@$sub_list) .
- qq! by field "$field" for top_k = $top_k!);
- };
-
- my $index = $self->{index};
- my $ranking = $index->fields->ranked_by($field);
-
- # Get maximum rank if descending order
- my $max = $ranking->max if $desc;
-
- # Get maximum rank
- my $max_rank = $index->max_rank;
- my $max_rank_ref = \$max_rank;
-
- # Create new priority queue
- my $queue = Krawfish::Util::PriorityQueue::PerDoc->new(
- $top_k,
- $max_rank_ref
- );
-
- my $rank;
-
- # Iterate over list
- foreach (@$sub_list) {
- my $bundle = $_->[VALUE];
-
- # Get stored rank
- $rank = $ranking->get($bundle->doc_id);
-
- # Revert if maximum rank is set
- $rank = $max - $rank if $max;
-
- # Insert into queue
- $queue->insert([$rank, 0, $bundle, $bundle->length]);
- };
-
- # Return reverse list
- return $queue->reverse_array;
-};
-
-
-# Return the current match
-sub current {
-
- if (DEBUG) {
- print_log('p_sort', 'Current posting is ' . $_[0]->{current}->to_string);
- };
-
- $_[0]->{current};
-};
-
-sub current_match {
- my $self = shift;
- my $current = $self->current or return;
- my $match = Krawfish::Koral::Result::Match->new(
- doc_id => $current->doc_id,
- start => $current->start,
- end => $current->end,
- payload => $current->payload,
- );
-
- if (DEBUG) {
- print_log('p_sort', 'Current match is ' . $match->to_string);
- };
-
- return $match;
-};
-
-# Return the number of duplicates of the current match
-sub duplicate_rank {
- my $self = shift;
-
- if (DEBUG) {
- print_log('p_sort', 'Check for duplicates from index ' . $self->{pos});
- };
-
- return $self->{list}->[$self->{pos}]->[1] || 1;
-};
-
-
-sub to_string {
- my $self = shift;
- my $str = 'resultSorted([';
- $str .= join(',', map { squote($_->[0]) . ($_->[1] ? '>' : '<') } @{$self->{fields}});
- $str .= ']';
- $str .= ',0-' . $self->{top_k} if $self->{top_k};
- $str .= ':' . $self->{query}->to_string;
- return $str . ')';
-};
-
-
-1;
-
-__END__
-
diff --git a/lib/Krawfish/Compile/Segment/Sort/Simple.pm b/lib/Krawfish/Compile/Segment/Sort/Simple.pm
deleted file mode 100644
index 5f438d2..0000000
--- a/lib/Krawfish/Compile/Segment/Sort/Simple.pm
+++ /dev/null
@@ -1,85 +0,0 @@
-package Krawfish::Compile::Segment::Sort::Simple;
-use Krawfish::Log;
-use strict;
-use warnings;
-
-warn 'NOT USED YET';
-
-# This should be used fur subsequent
-# sorting following the first pass
-
-# May use insertion sort for small numbers
-# of duplicates.
-
-# This may very well be a PrioritySort,
-# so initially there is a very simple
-# querier that only add rank and same elements
-# and subsequential they are ranked
-
-sub new {
- my $class = shift;
- my %param = @_;
-
- my $query = $param{query};
- my $fields = $param{fields};
- my $field = $param{field};
- my $desc = $param{desc} ? 1 : 0;
-
- my $top_k = $param{top_k};
-
- return bless {
- field_rank => $fields->ranked_by($field),
- field => $field,
- desc => $desc,
- query => $query,
- queue => $queue,
- list => undef,
- pos => -1
- }, $class;
-};
-
-sub next {
-
- # TODO:
- # In case the sorting before
- # results in a very bad configuration
- # (lots of duplicates in the final pos),
- # choose a different strategy!
-
- my $field_rank = $self->{field_rank};
-
- my $max;
- # Get maximum rank if descending order
- if ($self->{desc}) {
- $max = $field_rank->max;
- };
-
- my $query = $self->{query};
-
- while ($query->next) {
- if (DEBUG) {
- print_log('s_sort', 'Get next posting from ' . $query->to_string);
- };
-
- # The rank is totally fine
- if ($query->duplicate_rank == 1) {
- $self->{pos} = 0;
- $self->{list} = [$query->current];
- return 1;
- }
-
- # The rank has many duplicates
- else {
-
- # Sort elements!
- my $elements = $query->duplicate_rank;
- for (1..$elements) {
- $query->next;
-
-# # Clone record
-# my $record = $query->current->clone;
-
- };
- };
- };
-};
diff --git a/lib/Krawfish/Compile/Segment/SortAfter.pm b/lib/Krawfish/Compile/Segment/SortAfter.pm
index d1f3642..ee964b6 100644
--- a/lib/Krawfish/Compile/Segment/SortAfter.pm
+++ b/lib/Krawfish/Compile/Segment/SortAfter.pm
@@ -1,9 +1,17 @@
package Krawfish::Compile::Segment::SortAfter;
-use parent 'Krawfish::Compile::Segment::Sort';
use Data::Dumper;
use Krawfish::Log;
use strict;
use warnings;
+use Role::Tiny;
+
+with 'Krawfish::Compile::Segment::Sort';
+
+# TODO:
+# Split this up, so it can be composed
+# using the same roles as ::Sort,
+# by changing the get_bundle_from_buffer
+# method.
# This sorting query is similar to
# Krawfish::Compile::Segment::Sort,
diff --git a/lib/Krawfish/Koral.pm b/lib/Krawfish/Koral.pm
index 6da68d4..3b2b272 100644
--- a/lib/Krawfish/Koral.pm
+++ b/lib/Krawfish/Koral.pm
@@ -120,6 +120,12 @@
# Compile part of the Koral object
+# TODO:
+# It may be better to have a separation of
+# - groupBy
+# - sortBy
+# - aggregateBy
+# - enrichBy
sub compilation {
my $self = shift;
if ($_[0]) {
diff --git a/t/compile/sort_priority.t b/t/compile/sort_priority.t
deleted file mode 100644
index 8cd8010..0000000
--- a/t/compile/sort_priority.t
+++ /dev/null
@@ -1,90 +0,0 @@
-use Test::More;
-use Test::Krawfish;
-use strict;
-use warnings;
-
-use_ok('Krawfish::Index');
-use_ok('Krawfish::Koral::Query::Builder');
-use_ok('Krawfish::Compile::Segment::Sort::Priority');
-
-my $index = Krawfish::Index->new;
-
-ok_index($index, {
- docID => 7,
- author => 'Carol'
-} => [qw/aa bb/], 'Add complex document');
-ok_index($index, {
- docID => 3,
- author => 'Arthur'
-} => [qw/aa bb cc/], 'Add complex document');
-ok_index($index, {
- docID => 1,
- author => 'Bob'
-} => [qw/aa bb cc/], 'Add complex document');
-
-my $kq = Krawfish::Koral::Query::Builder->new;
-
-my $query = $kq->bool_or('aa', 'bb');
-
-TODO: {
- local $TODO = 'Test further - priority sorting does not yet work distributed'
-};
-
-
-done_testing;
-__END__
-
-# Set maximum rank reference to the last doc id of the index
-my $max_rank = $index->max_rank;
-
-# Get sort object
-ok(my $sort = Krawfish::Result::Sort::Priority->new(
- query => $query->normalize->finalize->optimize($index),
- field => 'docID',
- fields => $index->fields,
- top_k => 2,
- max_rank_ref => \$max_rank
-), 'Create sort object');
-
-# This will be sorted by the doc id,
-# so the doc-id=1 document will show up first
-ok($sort->next, 'First next');
-
-is($sort->current->doc_id, 2, 'Obj');
-ok($sort->next, 'Next');
-is($sort->current->doc_id, 2, 'Obj');
-ok(!$sort->next, 'No more next');
-
-# Next try
-$max_rank = $index->max_rank;
-ok($sort = Krawfish::Result::Sort::Priority->new(
- query => $query->normalize->finalize->optimize($index),
- fields => $index->fields,
- field => 'docID',
- desc => 1,
- top_k => 3,
- max_rank_ref => \$max_rank
-), 'Create sort object');
-
-# Although top_k is set,
-# the list exceeds the limit
-ok($sort->next, 'First next');
-is($sort->current->doc_id, 0, 'Obj');
-is($sort->duplicate_rank, 2, 'Duplicates');
-ok($sort->next, 'Next');
-is($sort->current->doc_id, 0, 'Obj');
-is($sort->duplicate_rank, 1, 'Duplicates');
-ok($sort->next, 'No more next');
-is($sort->current->doc_id, 1, 'Obj');
-is($sort->duplicate_rank, 2, 'Duplicates');
-ok($sort->next, 'No more next');
-is($sort->current->doc_id, 1, 'Obj');
-is($sort->duplicate_rank, 1, 'Duplicates');
-ok(!$sort->next, 'No more next');
-
-is($sort->to_string, "prioritySort(^,docID:or('aa','bb'))", 'Stringification');
-
-done_testing;
-__END__
-
-
diff --git a/t/compile/sort_priority_cascade.t b/t/compile/sort_priority_cascade.t
deleted file mode 100644
index c519cf3..0000000
--- a/t/compile/sort_priority_cascade.t
+++ /dev/null
@@ -1,92 +0,0 @@
-use Test::More;
-use Test::Krawfish;
-use strict;
-use warnings;
-
-use_ok('Krawfish::Index');
-use_ok('Krawfish::Koral::Query::Builder');
-use_ok('Krawfish::Compile::Segment::Sort::PriorityCascade');
-use_ok('Krawfish::Compile::Segment::Enrich::Fields');
-
-my $index = Krawfish::Index->new;
-
-ok_index($index, {
- docID => 7,
- author => 'Arthur'
-} => [qw/aa bb/], 'Add complex document');
-ok_index($index, {
- docID => 3,
- author => 'Arthur'
-} => [qw/aa bb cc/], 'Add complex document');
-ok_index($index, {
- docID => 1,
- author => 'Bob'
-} => [qw/aa bb cc/], 'Add complex document');
-
-my $kq = Krawfish::Koral::Query::Builder->new;
-
-my $query = $kq->bool_or('aa', 'bb');
-
-
-TODO: {
- local $TODO = 'Test further - priority sorting does not yet work distributed'
-};
-
-
-done_testing;
-__END__
-
-# Set maximum rank reference to the last doc id of the index
-my $max_rank = $index->max_rank;
-
-
-# Get sort object
-ok(my $sort = Krawfish::Result::Sort::PriorityCascade->new(
- query => $query->normalize->finalize->optimize($index),
- index => $index,
- fields => [
- ['author'], # Order by author with highest priority
- ['docID'] # Then by doc id
- ],
- unique => 'docID',
- top_k => 3,
- max_rank_ref => \$max_rank
-), 'Create sort object');
-
-
-ok(my $sort_fields = Krawfish::Result::Segment::Enrich::Fields->new(
- $index,
- $sort,
- ['author', 'docID']
-), 'Create fields object');
-
-
-# This will be sorted by the doc id,
-# so the doc-id=1 document will show up first
-ok($sort_fields->next, 'First next');
-is($sort_fields->current_match->to_string,
- q![1:0-1|author='Arthur';docID='3']!, 'Match');
-is($sort_fields->current->doc_id, 1, 'DocID');
-
-
-ok($sort_fields->next, 'Next');
-is($sort_fields->current_match->to_string,
- q![1:1-2|author='Arthur';docID='3']!, 'Match');
-is($sort_fields->current->doc_id, 1, 'DocID');
-
-
-ok($sort_fields->next, 'Next');
-is($sort_fields->current_match->to_string,
- q![0:0-1|author='Arthur';docID='7']!, 'Match');
-is($sort_fields->current->doc_id, 0, 'DocID');
-
-
-ok(!$sort_fields->next, 'Next');
-
-TODO: {
- local $TODO = 'Test with unique field';
-};
-
-done_testing;
-__END__
-