Rename Meta to Compile Change-Id: Ief55855e2642a64672a840ece9d9fd876a49d373

commit: 5cf5fca327157df9d7c3aa74cc651e52b630d4db [log] [tgz]
author: Akron <nils@diewald-online.de> Mon Oct 09 19:01:47 2017 +0200
committer: Akron <nils@diewald-online.de> Mon Oct 09 19:01:47 2017 +0200
tree: 5f23c44dba64aeee9fe659f91393663c58d27d4a
parent: a3e70a02ac3490431340042e7915ee697ec188f2 [diff]
diff --git a/lib/Krawfish/Compile/Segment/Aggregate.pm b/lib/Krawfish/Compile/Segment/Aggregate.pm
new file mode 100644
index 0000000..0ad04fc
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Aggregate.pm

@@ -0,0 +1,136 @@
+package Krawfish::Compile::Segment::Aggregate;
+use parent 'Krawfish::Compile';
+use strict;
+use warnings;
+
+use constant DEBUG => 0;
+
+# TODO:
+#   It may be better to have one aggregator per match
+#   and one aggregator per doc.
+
+# Aggregate values of matches per document and
+# per match.
+
+# TODO:
+#   See https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations.html
+
+# TODO: Sort all ops for each_match and each_doc support
+sub new {
+  my $class = shift;
+  my $result = {};
+  my $self = bless {
+    query       => shift,
+    ops         => shift,
+
+    last_doc_id => -1,
+    collection  => $result,
+    last_doc_id => -1,
+    finished    => 0
+  }, $class;
+
+  # The aggregation needs to trigger on each match
+  my (@each_doc, @each_match);
+  foreach my $op (@{$self->{ops}}) {
+    if ($op->can('each_match')) {
+      push @each_match, $op;
+    };
+
+    # The aggregation needs to trigger on each doc
+    if ($op->can('each_doc')) {
+      push @each_doc, $op;
+    };
+  };
+
+  $self->{each_doc} = \@each_doc;
+  $self->{each_match} = \@each_match;
+
+  return $self;
+};
+
+
+sub collection {
+  $_[0]->{collection};
+};
+
+
+# TODO:
+#   Add collection data to result document
+sub collect {
+  my ($self, $result) = @_;
+
+  # Add collect
+  $result->add_collection($self->collection);
+
+  # Collect result from nested query
+  $self->{query}->collect($result);
+  return $result;
+};
+
+
+# Iterate to the next result
+sub next {
+  my $self = shift;
+
+  # Get container object
+  my $collection = $self->collection;
+
+  # There is a next match
+  # TODO:
+  #   If there is no operand per match, only use next_doc
+  if ($self->{query}->next) {
+
+    # Get the current posting
+    my $current = $self->{query}->current;
+
+    if ($current->doc_id != $self->{last_doc_id}) {
+
+      # Collect data of current operation
+      foreach (@{$self->{each_doc}}) {
+        $_->each_doc($current, $collection);
+      };
+
+      # Set last doc to current doc
+      $self->{last_doc_id} = $current->doc_id;
+    };
+
+    # Collect data of current operation
+    foreach (@{$self->{each_match}}) {
+      $_->each_match($current, $collection);
+    };
+
+    return 1;
+  };
+
+  # Release on_finish event
+  unless ($self->{finished}) {
+    foreach (@{$self->{ops}}) {
+      $_->on_finish($collection);
+    };
+    $self->{finished} = 1;
+  };
+
+  return 0;
+};
+
+
+sub current {
+  return $_[0]->{query}->current;
+};
+
+
+sub to_string {
+  my $self = shift;
+  my $str = 'aggr(';
+  $str .= '[' . join(',', map { $_->to_string } @{$self->{ops}}) . ']:';
+  $str .= $self->{query}->to_string;
+  return $str . ')';
+};
+
+# Shorthand for "search through"
+sub finalize {
+  while ($_[0]->next) {};
+  return $_[0];
+};
+
+1;

diff --git a/lib/Krawfish/Compile/Segment/Aggregate/Base.pm b/lib/Krawfish/Compile/Segment/Aggregate/Base.pm
new file mode 100644
index 0000000..efc4278
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Aggregate/Base.pm

@@ -0,0 +1,32 @@
+package Krawfish::Compile::Segment::Aggregate::Base;
+use strict;
+use warnings;
+
+# Does not need anything in the object
+sub new {
+  my $class = shift;
+  bless \(my $self = ''), $class;
+};
+
+# Per default do nothing
+sub each_doc {
+};
+
+# Per default do nothing
+sub each_match {
+};
+
+# Per default do nothing
+sub on_finish {
+};
+
+# Not implemented on base
+sub to_string {
+  ...
+};
+
+sub collection {
+  ...
+};
+
+1;

diff --git a/lib/Krawfish/Compile/Segment/Aggregate/Fields.pm b/lib/Krawfish/Compile/Segment/Aggregate/Fields.pm
new file mode 100644
index 0000000..2887f70
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Aggregate/Fields.pm

@@ -0,0 +1,158 @@
+package Krawfish::Compile::Segment::Aggregate::Fields;
+use parent 'Krawfish::Compile::Segment::Aggregate::Base';
+use Krawfish::Koral::Result::Aggregate::Fields;
+use Krawfish::Util::String qw/squote/;
+use Krawfish::Log;
+use strict;
+use warnings;
+
+use constant DEBUG => 0;
+
+
+# TODO:
+#   Simplify the counting by mapping the requested fields to
+#   an array, that points to a map.
+
+# TODO:
+#   Look for fast int => int hash maps
+#   http://java-performance.info/implementing-world-fastest-java-int-to-int-hash-map/
+#   http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx
+#   https://gist.github.com/badboy/6267743
+
+# TODO:
+#   Support corpus classes!
+
+# TODO:
+#   In case the field has no rank, because it is a multivalued field,
+#   a different mechanism has to be used!
+
+# TODO: It may be beneficial to store example documents in the
+#   field ranks, too - so they don't need to be collected on the way ...
+#   See Group::Fields as well.
+#   For this, add a "witness" field
+
+# TODO:
+#   Field aggregates should be sortable either <asc> or <desc>,
+#   and should have a count limitation, may be even a start_index and an items_per_page
+
+
+sub new {
+  my $class = shift;
+  bless {
+    field_obj  => shift,
+    field_keys => [map { ref($_) ? $_->term_id : $_ } @{shift()}],
+
+    # TODO:
+    #   This needs to be an object, so it can be inflated again!
+    # collection => {}, # The buckets in memory
+
+    aggregation => Krawfish::Koral::Result::Aggregate::Fields->new,
+
+    freq    => 0,
+    field_freqs => {}
+  }, $class;
+};
+
+
+# Initialize field pointer
+sub _init {
+  return if $_[0]->{field_pointer};
+
+  my $self = shift;
+
+  print_log('aggr_fields', 'Create pointer on fields') if DEBUG;
+
+  # Load the ranked list - may be too large for memory!
+  $self->{field_pointer} = $self->{field_obj}->pointer;
+};
+
+
+# On every doc
+sub each_doc {
+  my ($self, $current) = @_;
+
+  $self->_init;
+
+  print_log('aggr_fields', 'Aggregate on fields') if DEBUG;
+
+  my $doc_id = $current->doc_id;
+
+  my $pointer = $self->{field_pointer};
+
+  # Set match frequencies to all remembered doc frequencies
+  my $aggr = $self->{aggregation};
+
+  # Skip to document in question
+  # TODO:
+  #   skip_doc should ALWAYS return either the document or NOMOREDOC!
+
+  if ($pointer->skip_doc($doc_id) == $doc_id) {
+
+    $aggr->flush;
+
+    # my $coll = $self->{collection};
+
+    # Get all requested fields
+    # my @fields;
+
+    if (DEBUG) {
+      print_log('aggr_fields', 'Look for frequencies for key ids ' .
+                  join(', ', map { '#' . $_ } @{$self->{field_keys}}) . " in doc $doc_id");
+    };
+
+    # Iterate over all fields
+    foreach my $field ($pointer->fields(@{$self->{field_keys}}))  {
+
+      # This should probably be a method in the fields pointer!
+      next if $field->type eq 'store';
+
+      # Increment occurrence
+      $aggr->incr_doc($field->key_id, $field->term_id);
+
+      if (DEBUG) {
+        print_log('aggr_fields', '#' . $field->term_id . ' has frequencies');
+      };
+    };
+  }
+
+  # Do not check rank
+  else {
+    $aggr->flush;
+  };
+};
+
+
+# On every match
+sub each_match {
+  $_[0]->{aggregation}->incr_match;
+};
+
+
+# finish the results
+sub on_finish {
+  my ($self, $collection) = @_;
+
+  $self->{aggregation}->flush;
+
+  $collection->{fields} = $self->{aggregation};
+};
+
+
+sub collection {
+  # Return fields
+  # Example structure for year
+  # {
+  #   1997 => [4, 67],
+  #   1998 => [5, 89],
+  #   1999 => [3, 20]
+  # }
+  $_[0]->{collection};
+};
+
+
+sub to_string {
+  return 'fields:' . join(',', map { '#' . $_ } @{$_[0]->{field_keys}});
+};
+
+
+1;

diff --git a/lib/Krawfish/Compile/Segment/Aggregate/Frequencies.pm b/lib/Krawfish/Compile/Segment/Aggregate/Frequencies.pm
new file mode 100644
index 0000000..099c092
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Aggregate/Frequencies.pm

@@ -0,0 +1,31 @@
+package Krawfish::Compile::Segment::Aggregate::Frequencies;
+use parent 'Krawfish::Compile::Segment::Aggregate::Base';
+use Krawfish::Log;
+use strict;
+use warnings;
+
+# Count the frequencies of all matches of the query
+# per doc and per match
+
+# TODO:
+#   Support virtual corpus classes
+
+# Add to totalResources immediately
+sub each_doc {
+  $_[2]->{totalResources}++;
+};
+
+
+# Add to totalResults immediately
+sub each_match {
+  $_[2]->{totalResults}++;
+};
+
+
+# Stringification
+sub to_string {
+  'freq'
+};
+
+1;
+

diff --git a/lib/Krawfish/Compile/Segment/Aggregate/Length.pm b/lib/Krawfish/Compile/Segment/Aggregate/Length.pm
new file mode 100644
index 0000000..87f9274
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Aggregate/Length.pm

@@ -0,0 +1,59 @@
+package Krawfish::Compile::Segment::Aggregate::Length;
+use parent 'Krawfish::Compile::Segment::Aggregate::Base';
+use Krawfish::Log;
+use strict;
+use warnings;
+
+# This will check the hits length in subtokens -
+# currently other word lengths are not supported
+
+# See https://en.wikipedia.org/wiki/Selection_algorithm
+# for algorithms to find the median or similar.
+
+
+use constant DEBUG => 0;
+
+sub new {
+  my $class = shift;
+  bless {
+    segment => shift,
+    query => shift,
+    min  => 32_000,
+    max  => 0,
+    sum  => 0,
+    freq => 0
+  }, $class;
+};
+
+
+# On every match
+sub each_match {
+  my ($self, $current) = @_;
+  my $length = $current->end - $current->start;
+  $self->{min} = $length < $self->{min} ? $length : $self->{min};
+  $self->{max} = $length > $self->{max} ? $length : $self->{max};
+  $self->{sum} += $length;
+  $self->{freq}++;
+};
+
+
+# Finish the aggregation
+sub on_finish {
+  my ($self, $collection) = @_;
+
+  return if $self->{freq} == 0;
+  $collection->{length} = {
+    min  => $self->{min},
+    max  => $self->{max},
+    sum  => $self->{sum},
+    avg  => $self->{sum} / $self->{freq}
+  };
+};
+
+
+# Stringification
+sub to_string {
+  'length'
+};
+
+1;

diff --git a/lib/Krawfish/Compile/Segment/Aggregate/TermExistence.pm b/lib/Krawfish/Compile/Segment/Aggregate/TermExistence.pm
new file mode 100644
index 0000000..fe9ed04
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Aggregate/TermExistence.pm

@@ -0,0 +1,8 @@
+# Similar to TermFreq, but stops
+# calculating after one match in the virtual corpus.
+# This probably requires a single list of term queries, that can be
+# closed, once a match occurs.
+
+# Probably better suited in Group
+
+__END__

diff --git a/lib/Krawfish/Compile/Segment/Aggregate/TermFreq.pm b/lib/Krawfish/Compile/Segment/Aggregate/TermFreq.pm
new file mode 100644
index 0000000..d76ebe3
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Aggregate/TermFreq.pm

@@ -0,0 +1,67 @@
+package Krawfish::Compile::Segment::Aggregate::TermFreq;
+use parent 'Krawfish::Compile::Segment::Aggregate::Base';
+use Krawfish::Util::String qw/squote/;
+use Krawfish::Log;
+use strict;
+use warnings;
+
+# Counts the frequency for each term in a TermFrequency
+# query. This is necessary for co-occurrence search and the
+# Glemm service.
+
+
+# TODO:
+#   This is rather a group query than an aggregation query.
+
+use constant DEBUG => 0;
+
+sub new {
+  my $class = shift;
+  my $self = bless {
+    index => shift,
+    term_query => shift,
+    freq => 0
+  }, $class;
+
+  # The term never occurs
+  unless ($self->{term}->next) {
+    $self->{term_query} = undef;
+  };
+
+  return $self;
+};
+
+sub each_doc {
+  my ($self, $current) = @_;
+
+  return unless $self->{term_query};
+
+  # Get the current doc_id from the VC
+  my $doc_id = $current->doc_id;
+
+  my $term = $self->{term_query};
+
+  # Check, if the term occurs in the doc
+  if ($term->current->doc_id == $doc_id || $term->skip_doc($doc_id) == $doc_id) {
+
+    # Add frequency in document to result
+    $self->{freq} += $term->freq_in_doc;
+  };
+};
+
+
+# Finish the result
+sub on_finish {
+  my ($self, $result) = @_;
+
+  my $term = $self->{term_query}->term;
+  my $freq = ($result->{freq} //= {});
+  $frew->{$term} = $self->{freq};
+};
+
+# Stringification
+sub to_string {
+  return 'tfreq:' . squote($self->{term_query}->term);
+};
+
+1;

diff --git a/lib/Krawfish/Compile/Segment/Aggregate/TokenFreq.pm b/lib/Krawfish/Compile/Segment/Aggregate/TokenFreq.pm
new file mode 100644
index 0000000..e165bd3
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Aggregate/TokenFreq.pm

@@ -0,0 +1 @@
+# Similar to TermFreq but will count the number of tokens per document.

diff --git a/lib/Krawfish/Compile/Segment/Aggregate/Values.pm b/lib/Krawfish/Compile/Segment/Aggregate/Values.pm
new file mode 100644
index 0000000..71b0711
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Aggregate/Values.pm

@@ -0,0 +1,107 @@
+package Krawfish::Compile::Segment::Aggregate::Values;
+use parent 'Krawfish::Compile::Segment::Aggregate::Base';
+use Krawfish::Koral::Result::Aggregate::Values;
+use Krawfish::Log;
+use strict;
+use warnings;
+
+# TODO:
+#   Rename to FieldCalc or FieldSum
+
+# TODO:
+#   Support corpus classes
+
+# TODO:
+#   This is rather a group query or better:
+#   An aggregation on groups!
+
+use constant {
+  DEBUG => 0
+};
+
+sub new {
+  my $class = shift;
+  my $self = bless {
+    fields_obj => shift,
+
+    # This needs to be numerical field ids!
+    field_ids =>  [map { ref($_) ? $_->term_id : $_ } @{shift()}],
+  }, $class;
+
+  # Initialize aggregator
+  $self->{aggregation} = Krawfish::Koral::Result::Aggregate::Values->new(
+    $self->{field_ids}
+  );
+
+  return $self;
+};
+
+
+# Initialize field pointer
+sub _init {
+  return if $_[0]->{field_pointer};
+
+  my $self = shift;
+
+  $self->{field_pointer} = $self->{fields_obj}->pointer;
+};
+
+
+# Release for each doc
+sub each_doc {
+  my ($self, $current, $result) = @_;
+
+  $self->_init;
+
+  if (DEBUG) {
+    print_log('aggr_values', 'Aggregate on field values');
+  };
+
+  # Get current document
+  my $doc_id = $current->doc_id;
+
+  my $pointer = $self->{field_pointer};
+
+  # Get aggregation information
+  my $aggr = $self->{aggregation};
+
+  # Move fields pointer to current document
+  if ($pointer->skip_doc($doc_id) == $doc_id) {
+
+    # collect values
+    my @values = $pointer->int_fields(@{$self->{field_ids}}) or return;
+
+    # Aggregate all values
+    foreach my $field (@values) {
+
+      # Aggregate value
+      $aggr->add($field->key_id, $field->value);
+    };
+  };
+};
+
+
+sub collection {
+  $_[0]->{collection};
+};
+
+
+# Stringification
+sub to_string {
+  return 'values:' . join(',', @{$_[0]->{field_ids}});
+};
+
+
+# Finish the aggregation
+sub on_finish {
+  my ($self, $collection) = @_;
+
+  # Summarize collection
+  $self->{aggregation}->summarize;
+
+  # Maybe push to collection instead
+  $collection->{values} = $self->{aggregation};
+};
+
+
+1;

diff --git a/lib/Krawfish/Compile/Segment/Bundle.pm b/lib/Krawfish/Compile/Segment/Bundle.pm
new file mode 100644
index 0000000..6336697
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Bundle.pm

@@ -0,0 +1,117 @@
+package Krawfish::Compile::Segment::Bundle;
+use parent 'Krawfish::Compile';
+use Krawfish::Log;
+use strict;
+use warnings;
+
+
+# This class represents bundles of postings
+# (or bundles of bundles of postings) sorted by a certain criterion.
+
+
+use constant DEBUG => 0;
+
+
+# Bundle the current match
+sub current_bundle {
+  my $self = shift;
+
+  if (DEBUG) {
+    print_log('bundle', 'Get bundle');
+  };
+
+  return $self->{current_bundle};
+};
+
+
+# Get current match
+sub current_match {
+  $_[0]->{current_match};
+};
+
+
+# Get next bundle
+# This needs to be overwritten!
+sub next_bundle {
+  ...
+};
+
+
+
+# Move to the next posting in the list,
+# maybe in nested bundles.
+# These calls methods in Posting::Bundle!
+sub next {
+  my $self = shift;
+
+  if (DEBUG) {
+    print_log('bundle', 'Move to next posting');
+  };
+
+  # Get current bundle
+  my $bundle = $self->current_bundle;
+
+  # Check next in bundle
+  while (!$bundle || !$bundle->next) {
+
+    if (DEBUG) {
+      if (!$bundle) {
+        print_log('bundle', 'Current bundle does not exist yet or there is none');
+      }
+      else {
+        print_log('bundle', 'There is no more entry in current bundle');
+      };
+
+      print_log('bundle', 'Move to next bundle');
+    };
+
+
+    # There are more bundles
+    if ($self->next_bundle) {
+      $bundle = $self->current_bundle;
+      if (DEBUG) {
+        print_log('bundle', 'Current bundle to check is ' . $bundle->to_string);
+      };
+    }
+
+    # There are no more bundles
+    else {
+
+      if (DEBUG) {
+        print_log('bundle', 'No more bundles');
+      };
+
+      $self->{current} = undef;
+      return 0;
+    };
+  };
+
+  $self->{current} = $bundle->current;
+
+  if (DEBUG) {
+    print_log('bundle', 'Set current posting to ' . $self->{current}->to_string);
+  };
+
+  return 1;
+};
+
+
+# Return the current match
+sub current {
+  my $self = shift;
+  if (DEBUG) {
+    print_log('bundle', 'Current posting is ' . $self->{current}->to_string);
+  };
+
+  $self->{current};
+};
+
+
+
+# Get frequency
+sub max_freq {
+  $_[0]->{query}->max_freq;
+};
+
+
+1;

diff --git a/lib/Krawfish/Compile/Segment/BundleDocs.pm b/lib/Krawfish/Compile/Segment/BundleDocs.pm
new file mode 100644
index 0000000..db13935
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/BundleDocs.pm

@@ -0,0 +1,139 @@
+package Krawfish::Compile::Segment::BundleDocs;
+use parent 'Krawfish::Compile::Segment::Bundle';
+use Krawfish::Log;
+use Krawfish::Posting::List;
+use strict;
+use warnings;
+
+# Bundle matches in the same document.
+
+# TODO:
+#   The problem with the current approch is, that next_bundle
+#   will bundle the next doc - without asking if the doc
+#   is relevant.
+
+use constant DEBUG => 0;
+
+sub new {
+  my $class = shift;
+  bless {
+    query => shift,
+    buffer => undef
+  }, $class;
+};
+
+
+# Clone query
+sub clone {
+  my $self = shift;
+  __PACKAGE__->new(
+    $self->{query}->clone
+  );
+};
+
+
+# TODO:
+#   Implement next doc!
+sub next_doc {
+  ...
+};
+
+
+# Move to next bundle
+sub next_bundle {
+  my $self = shift;
+
+  # Reset current bundle
+  $self->{current_bundle} = undef;
+
+  my $first;
+
+  # There is a bundle on buffer
+  if ($self->{buffer}) {
+
+
+    if (DEBUG) {
+      print_log('d_bundle', 'There is a buffered posting for first');
+    };
+
+    $first = $self->{buffer};
+    $self->{buffer} = undef;
+  }
+
+  # Move forward
+  elsif ($self->{query}->next) {
+
+    if (DEBUG) {
+      print_log('d_bundle', 'Get new posting for first');
+    };
+
+    unless ($first = $self->{query}->current) {
+
+      if (DEBUG) {
+        print_log('d_bundle', 'There is no more posting');
+      };
+
+      return;
+    };
+  }
+
+  # Can't move forward
+  else {
+
+    if (DEBUG) {
+      print_log('d_bundle', 'No postings to move forward');
+    };
+
+    return;
+  };
+
+  my $bundle = Krawfish::Posting::List->new($first);
+  my $query = $self->{query};
+
+  if (DEBUG) {
+    print_log('d_bundle', 'Start bundle with ' . $first->to_string);
+  };
+
+  # There is a next entry
+  my $next;
+  while ($query->next) {
+    $next = $query->current;
+
+    # Documents are identical - bundle
+    if ($next->doc_id == $first->doc_id) {
+
+      if (DEBUG) {
+        print_log('d_bundle', 'Posting ' . $next->to_string . ' has identical doc id');
+      };
+
+      $bundle->add($next);
+    }
+
+    # Remember the next bundle
+    else {
+
+      if (DEBUG) {
+        print_log('d_bundle', 'Posting ' . $next->to_string . ' has different doc id');
+      };
+
+      $self->{buffer} = $next;
+      last;
+    };
+  };
+
+  if (DEBUG) {
+    print_log('d_bundle', 'Current bundle is ' . $bundle->to_string);
+  };
+
+  $self->{current_bundle} = $bundle;
+  return 1;
+};
+
+
+# Stringification
+sub to_string {
+  'bundleDocs(' . $_[0]->{query}->to_string . ')';
+};
+
+
+1;

diff --git a/lib/Krawfish/Compile/Segment/Enrich/Context.pm b/lib/Krawfish/Compile/Segment/Enrich/Context.pm
new file mode 100644
index 0000000..09feebd
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Enrich/Context.pm

@@ -0,0 +1,113 @@
+package Krawfish::Compile::Segment::Enrich::Context;
+use parent 'Krawfish::Compile';
+use Krawfish::Log;
+use strict;
+use warnings;
+
+use constant DEBUG => 0;
+
+
+# DEPRECATED!!!
+
+
+# This will add context (only surface forms) to each match
+
+# TODO:
+#   Context always needs to be left AND right, because
+#   at least the surrounding elements context will expand
+#   both sides at the same time!
+
+
+sub new {
+  my $class = shift;
+  bless {
+    forward_obj => shift,
+    query => shift,
+
+    # TODO:
+    #   Should support
+    #   - surrounding elements
+    #   - left elements / right elements
+    #   - left tokens / right tokens
+    #   - left characters / right characters
+    contextualize  => shift, # Accept context object
+    match => undef
+  }, $class;
+};
+
+# Initialize forward counter
+sub _init {
+  return if $_[0]->{forward_pointer};
+
+  my $self = shift;
+
+  print_log('e_context', 'Create forward pointer') if DEBUG;
+
+  # Load the ranked list - may be too large for memory!
+  $self->{forward_pointer} = $self->{forward_obj}->pointer;
+};
+
+
+sub current_match {
+  my $self = shift;
+
+  $self->_init;
+
+  # Match is already set
+  if ($self->{match}) {
+    if (DEBUG) {
+      print_log(
+        'e_context',
+        'Match already defined ' . $self->{match}->to_string
+      );
+    };
+    return $self->{match};
+  };
+
+  # TODO:
+  #   may simply be $self->{query}->current_match
+  my $match = $self->match_from_query;
+
+  # Get forward pointer
+  my $forward = $self->{forward_pointer};
+
+  my $doc_id = $match->doc_id;
+
+  unless ($forward->skip_doc($doc_id) == $doc_id) {
+
+    # TODO: This should never happen!
+    return;
+  };
+
+  # Get the context
+  # TODO:
+  #   This may be retrieved as part of the snippet!
+  my $left_context = $self->{contextualize}->left_context($match, $forward);
+  my $right_context = $self->{contextualize}->right_context($match, $forward);
+
+  # Add context to match
+  $match->add(
+    Krawfish::Posting::Match::Context->new(
+      left => $left_context,
+      right => $right_context
+    ));
+
+  return $match;
+};
+
+
+# Next match
+sub next {
+  my $self = shift;
+  $self->{match} = undef;
+  return $self->{query}->next;
+};
+
+
+sub to_string {
+  my $str = 'enrichContext(' . $self->{contextualize}->to_string . ':';
+  $str .= $_[0]->{query}->to_string;
+  return $str . ')';
+};
+
+1;

diff --git a/lib/Krawfish/Compile/Segment/Enrich/Fields.pm b/lib/Krawfish/Compile/Segment/Enrich/Fields.pm
new file mode 100644
index 0000000..a201418
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Enrich/Fields.pm

@@ -0,0 +1,135 @@
+package Krawfish::Compile::Segment::Enrich::Fields;
+use parent 'Krawfish::Compile';
+use Krawfish::Koral::Result::Enrich::Fields;
+use Krawfish::Log;
+use strict;
+use warnings;
+
+use constant DEBUG => 0;
+
+# This will enrich each match with specific field information
+# Needs to be called on the segment level
+
+# Constructor
+sub new {
+  my $class = shift;
+  bless {
+    field_obj => shift,
+    query     => shift,
+
+    # Could be treated like in Aggregate/Fields or Group/Fields!
+    fields    => shift, # Expects to be numerical sorted field identifier
+    match     => undef,
+    pointer   => undef,
+    last_doc_id => -1
+  }, $class;
+};
+
+sub _init {
+  my $self = shift;
+
+  # Pointer is already initiated
+  return if $self->{init}++;
+
+  # Match is already set
+  if (DEBUG) {
+    print_log(
+      'r_fields',
+      'Initiate pointer to fields'
+    );
+  };
+
+  $self->{pointer} = $self->{field_obj}->pointer;
+  return;
+};
+
+
+sub pointer {
+  $_[0]->{pointer};
+};
+
+
+# Get current match
+sub current_match {
+  my $self = shift;
+
+  $self->_init;
+
+  # Match is already set
+  if ($self->{match}) {
+    if (DEBUG) {
+      print_log(
+        'r_fields',
+        'Match already defined ' . $self->{match}->to_string
+      );
+    };
+    return $self->{match};
+  };
+
+  my $match = $self->match_from_query;
+
+
+  # Match is in the same document as before
+  if ($match->doc_id == $self->{last_doc_id}) {
+
+    # Create an enrichment
+    $match->add(
+      Krawfish::Koral::Result::Enrich::Fields->new(@{$self->{last_fields}})
+      );
+
+    # The document has no associated fields
+    return ($self->{match} = $match);
+  };
+
+  # Retrieve from data
+
+  # Get fields object
+  my $fields = $self->{pointer};
+
+  # Move to document in field stream
+  my $fields_doc_id = $fields->skip_doc($match->doc_id);
+  if ($fields_doc_id != $match->doc_id) {
+
+    if (DEBUG) {
+      print_log('r_fields', 'Match doc id #' . $match->doc_id .
+                  ' mismatches fields doc id #' . $fields_doc_id);
+    };
+
+    # The document has no associated fields
+    return ($self->{match} = $match);
+  };
+
+  # Get the fields from the fields stream
+  my @fields = $fields->fields(@{$self->{fields}});
+
+  $self->{last_doc_id} = $match->doc_id;
+  $self->{last_fields} = [@fields];
+
+  # Create an enrichment
+  $match->add(
+    Krawfish::Koral::Result::Enrich::Fields->new(@fields)
+    );
+
+  $self->{match} = $match;
+
+  print_log('r_fields', 'Match now contains data for ' . join(', ', @fields)) if DEBUG;
+
+  return $match;
+};
+
+
+# Next match
+sub next {
+  my $self = shift;
+  $self->{match} = undef;
+  return $self->{query}->next;
+};
+
+
+sub to_string {
+  my $str = 'enrichFields(' . join(',', @{$_[0]->{fields}}) . ':';
+  $str .= $_[0]->{query}->to_string;
+  return $str . ')';
+};
+
+1;

diff --git a/lib/Krawfish/Compile/Segment/Enrich/Snippet.pm b/lib/Krawfish/Compile/Segment/Enrich/Snippet.pm
new file mode 100644
index 0000000..8de0052
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Enrich/Snippet.pm

@@ -0,0 +1,126 @@
+package Krawfish::Compile::Segment::Enrich::Snippet;
+use parent 'Krawfish::Compile';
+use Krawfish::Koral::Result::Enrich::Snippet;
+# use Krawfish::Compile::Segment::Enrich::Snippet::Highlights;
+use Krawfish::Log;
+use strict;
+use warnings;
+
+use constant DEBUG => 0;
+
+# TODO:
+#   It may be more efficient to first collect all required
+#   annotations (for decoration, context, hit etc.) and
+#   then iterate over left context, hit, right context
+#   and get all annotations per token at a time
+
+
+sub new {
+  my $class = shift;
+  # query
+  # fwd_obj
+  # left
+  # right
+  # hit
+  return bless { @_ }, $class;
+};
+
+
+# Initialize forward index
+sub _init {
+  return if $_[0]->{_init}++;
+
+  my $self = shift;
+  $self->{fwd_pointer} = $self->{fwd_obj}->pointer;
+};
+
+
+# Iterated through the ordered matches
+sub next {
+  my $self = shift;
+  $self->_init;
+  $self->{match} = undef;
+  return $self->{query}->next;
+};
+
+
+# Return the current match
+sub current_match {
+  my $self = shift;
+
+  print_log('c_snippet', 'Get current match') if DEBUG;
+
+  # Match is already set
+  return $self->{match} if $self->{match};
+
+  # Get current match from query
+  my $match = $self->match_from_query;
+
+  print_log('c_snippet', 'match is ' . $match) if DEBUG;
+
+  # Get forward query
+  my $forward = $self->{fwd_pointer};
+
+  # TODO:
+  #   Fetch preceding context
+
+  my $doc_id = $match->doc_id;
+  unless ($forward->skip_doc($doc_id) == $doc_id) {
+
+    # TODO: This should never happen!
+    return;
+  };
+
+  if (DEBUG) {
+    print_log('c_snippet', 'Move to match doc position');
+  };
+
+
+  # Move pointer to start position of match
+  unless ($forward->skip_pos($match->start)) {
+
+    # This should never happen!
+    return;
+  };
+
+  # Get data from hit
+  my $hit_data = $self->{hit}->content($match, $forward);
+
+  if (DEBUG) {
+    print_log('c_snippet', 'Move to match position');
+  };
+
+  # Create snippet posting
+  my $snippet = Krawfish::Koral::Result::Enrich::Snippet->new(
+    hit_ids => $hit_data
+  );
+
+  # Add snippet to match
+  $match->add($snippet);
+
+  # Deal with left
+  # Deal with hit
+  # Deal with right
+
+  $self->{match} = $match;
+  return $match;
+};
+
+
+# Stringification
+sub to_string {
+  my $self = shift;
+  my $str = 'snippet(';
+  if ($self->{left}) {
+    $str .= $self->{left}->to_string . ',';
+  };
+  if ($self->{right}) {
+    $str .= $self->{right}->to_string . ',';
+  };
+  $str .= $self->{hit}->to_string . ':';
+  $str .= $self->{query}->to_string;
+  return $str . ')';
+};
+
+
+1;

diff --git a/lib/Krawfish/Compile/Segment/Enrich/Snippet/Context.pm b/lib/Krawfish/Compile/Segment/Enrich/Snippet/Context.pm
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Enrich/Snippet/Context.pm


diff --git a/lib/Krawfish/Compile/Segment/Enrich/Snippet/Context/Span.pm b/lib/Krawfish/Compile/Segment/Enrich/Snippet/Context/Span.pm
new file mode 100644
index 0000000..4ebd8ce
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Enrich/Snippet/Context/Span.pm

@@ -0,0 +1,145 @@
+package Krawfish::Compile::Segment::Enrich::Snippet::Context::Span;
+use Krawfish::Posting::Forward;
+use strict;
+use warnings;
+
+sub new {
+  my $class = shift;
+  # foundry_id, layer_id, anno_id, count
+  #
+  # The number of elements left or right
+  # to the match. Defaults to 0
+  # (so: only expand to the element)
+  #   count
+
+  # Maximum number of tokens
+  #   max
+  bless { @_ }, $class;
+};
+
+
+# Get left context
+sub left {
+  my ($self, $match, $pointer) = @_;
+
+  # The context as an array of preceeding strings and term_ids
+  my @context = ();
+
+  # Do not search beyond maximum tokens
+  my $max        = $self->{max_left};
+  my $count      = $self->{count_left};
+  my $last_match = undef;
+
+  # Get the start position of the match
+  # Move to that position (may be before current pointer position
+  unless ($pointer->skip_pos($match->start)) {
+    warn "pointer currently can't be repositioned";
+  };
+
+  # Remember the match position
+  my $remember_position = $pointer->current;
+
+  # As long as it is allowed, iterate through tokens
+  while ($max-- > 0) {
+
+    # Get current forward token
+    my $current = $pointer->current;
+    my $pos = $current->pos;
+
+    # Check if the token is relevant for annotation
+    if (my $anno = $current->annotation(
+      $self->{foundry_id},
+      $self->{layer_id},
+      $self->{anno_id},
+    )) {
+      # Annotation was found - get span length!
+
+      # Get the first annotation (that has the shortest span length)
+      # TODO:
+      #   It may be beneficial to check, if there is a better length,
+      #   e.g. one that surrounds the match
+      my $anno_data = $anno->[-1];
+
+      # The first part is the end position
+      my $anno_end = $anno_data->[0];
+
+      # TODO:
+      #   DO something with $anno_end
+
+      # Element was found
+      if ($count-- < 0) {
+
+        # The span is not the start of the match
+        # (in which the part is already in the match)
+        if ($pos != $match->start) {
+          # Add token to context
+          unshift @context, Krawfish::Posting::Forward->new(
+            term_id        => $current->term_id,
+
+            # Preceding data may need to be cut!
+            preceding_data => $current->preceding_data
+          );
+        };
+
+        # The requirement is fulfilled
+        $last_match = undef;
+        last;
+      };
+
+      # remember last match to gracefully cut all matches
+      $last_match = $max;
+    }
+
+    # Element is not found at position
+    else {
+
+      # The token is part of the match (and therefore not part of the context)
+      if ($pos == $match->start) {
+        # Add token to context
+        unshift @context, Krawfish::Posting::Forward->new(
+          term_id        => 0,
+          preceding_data => $current->preceding_data
+        );
+      };
+
+      # Add token to context
+      unshift @context, Krawfish::Posting::Forward->new(
+        term_id        => $current->term_id,
+        preceding_data => $current->preceding_data
+      );
+
+      # Maximum tokens exceeded
+      if ($max-- < 1) {
+        last;
+      };
+
+      # Move backwards
+      $pointer->prev;
+    };
+  };
+
+  # There is a last match to trim
+  if ($last_match) {
+    # TODO:
+    #   Cut the context array to the last match
+    # TODO:
+    #   Cut the last preceding data part as well!
+  };
+
+  # TODO
+  #   Revert array
+};
+
+sub right {
+  ...
+};
+
+sub to_string {
+  my $self = shift;
+  my $str = 'span(';
+  $str .= '#' . $self->{foundry_id} . '/#' . $self->{layer_id} . '=#' . $self->{anno_id} . ',';
+  return $str . $self->{count} . ',' . $self->{max} . ')';
+};
+
+
+1;

diff --git a/lib/Krawfish/Compile/Segment/Enrich/Snippet/Element.pm b/lib/Krawfish/Compile/Segment/Enrich/Snippet/Element.pm
new file mode 100644
index 0000000..163213a
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Enrich/Snippet/Element.pm

@@ -0,0 +1,24 @@
+package Krawfish::Compile::Segment::Snippet::Element;
+use Krawfish::Log;
+use strict;
+use warnings;
+
+sub start_char {
+  ...
+};
+sub end_char {
+  ...
+};
+
+sub open_html {
+  ...
+};
+sub close_html {
+  ...
+};
+
+1;
+
+__END__
+
+1;

diff --git a/lib/Krawfish/Compile/Segment/Enrich/Snippet/Element/Pagebreak.pm b/lib/Krawfish/Compile/Segment/Enrich/Snippet/Element/Pagebreak.pm
new file mode 100644
index 0000000..036d69d
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Enrich/Snippet/Element/Pagebreak.pm

@@ -0,0 +1,30 @@
+package Krawfish::Compile::Segment::Enrich::Snippet::Element::Pagebreak;
+use parent 'Krawfish::Compile::Segment::Enrich::Snippet::Element';
+use strict;
+use warnings;
+
+sub new {
+  my $self = shift;
+  bless {
+    start_char => shift,
+    page_after => shift
+  }, $class;
+};
+
+sub start_char {
+  return $_[0]->{start_char};
+};
+
+sub end_char {
+  return $_[0]->{end_char};
+};
+
+sub open_html {
+  return '<span class="pagebreak" data-page-after="' . $self->{page_after} . '"></span>';
+};
+
+sub close_html {
+  return '';
+};
+
+1;

diff --git a/lib/Krawfish/Compile/Segment/Enrich/Snippet/Highlights.pm b/lib/Krawfish/Compile/Segment/Enrich/Snippet/Highlights.pm
new file mode 100644
index 0000000..0f8bfb3
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Enrich/Snippet/Highlights.pm

@@ -0,0 +1,564 @@
+package Krawfish::Compile::Segment::Enrich::Snippet::Highlights;
+use Krawfish::Log;
+use strict;
+use warnings;
+
+use constant DEBUG => 0;
+
+# -1 is match highlight
+# $annotation_nr_counter = 256;
+# $relation_number_counter = 2048;
+# $identifier_number_counter = -2;
+
+# private HashMap<Integer, String> annotationNumber = new HashMap<>(16);
+# private HashMap<Integer, Relation> relationNumber = new HashMap<>(16);
+# private HashMap<Integer, Integer> identifierNumber = new HashMap<>(16);
+
+sub new {
+  my $class = shift;
+  bless {
+    highlights => shift,
+    segments   => shift,
+    list       => [], # Combined array
+    stack      => [] # Stack for balancing the elements
+  }, $class;
+};
+
+
+sub clear {
+  ...
+};
+
+
+sub add_open {
+  my $self = shift;
+};
+
+sub process {
+  ...
+};
+
+1;
+
+__END__
+
+
+sub parse_simple {
+  my $self = shift;
+
+  my $segments = $self->{segments};
+
+  # TODO:
+  #   In Krill, offsets are collected in advance,
+  #   but I guess it's cleaner to do on the fly
+
+  print_log('c_highl', 'Process highlight stack') if DEBUG;
+
+  my @highlights = ();
+
+  # my $start_seg = $segments->get($match->doc_id, $match->start);
+  # my $end_seg   = $segments->get($match->doc_id, $match->end - 1);
+
+  # Add match as highlight
+  push @highlights, _highlight($match->start, $match->end, -1);
+
+  # TODO:
+  #   Check that highlights are between these values
+  # my $start_pos = $match->start;
+  # my $end_pos = $match->end;
+
+  # TODO:
+  #   Filter multiple identifiers, that may be introduced and would
+  #   result in invalid xml
+  # this._filterMultipleIdentifiers();
+
+  my @open_list  = sort _open_sort @highlights;
+  my @close_list = sort _close_sort @highlights;
+
+  # Final highlight stack
+  my @stack = ();
+
+  # Create sorted stack unless both lists are empty
+  while (scalar @open_list || scalar @close_list) {
+
+    # Shortcut for list ending
+    if (!scalar @open_list) {
+      push @stack, map { $_->[4] = } @close_list;
+      last;
+    }
+
+    # Not sure about this - but may happen
+    elsif (!scalar @close_list) {
+      last;
+    };
+
+    # Type 0: Textual data
+    # Type 1: Opening
+    # Type 2: Closing
+
+    # Check if the opening tag starts before the closing tag ends
+    if ($open_list[0]->[0] < $close_list[0]->[1]) {
+
+      # Clone highlight
+      my $element = [@{(shift(@open_list))}];
+
+      # Set element to be terminal
+      $element->[3] = 1; # terminal
+      $element->[4] = 1; # Opening
+      push @stack, $element;
+    }
+
+    # No - then close
+    else {
+      my $element = shift(@close_list);
+      $element->[4] = 2; # closing
+      push @stack, $element;
+    };
+  };
+
+  $self->{stack} = \@stack; # is a position stack!
+
+  # TODO:
+  #   Problem to solve is now discontinuing elements!
+
+  return $self;
+};
+
+
+sub _highlight {
+  my ($start, $end, $class, $terminal) = @_;
+  return [$start, $end, $class, $terminal // 0];
+};
+
+# Sort opening tags by start, end and class number
+sub _open_sort {
+
+  # Compare start position
+  if ($a->start > $b->start) {
+    return 1;
+  }
+  elsif ($a->start == $b->start) {
+    # Compare end position
+    if ($a->end > $b->end) {
+      return -1;
+    }
+    elsif ($a->end == $b->end) {
+      # Compare class number
+      if ($a->[2] > $b->[2]) {
+        return 1;
+      }
+      elsif ($a->[2] < $b->[2]) {
+        return -1;
+      };
+      return 0;
+    };
+  }
+  return -1;
+};
+
+
+# Sort closing tags by end and start
+sub _close_sort {
+
+  # Compare end positions
+  if ($a->[1] > $b->[1]) {
+    return 1;
+  }
+  elsif ($a->[1] == $b->[1]) {
+
+    # Compare start position
+    if ($a->[0] < $b->[0]) {
+      return 1;
+    }
+    elsif ($a->[0] == $b->[0]) {
+      return 0;
+    };
+    return -1;
+  };
+  return -1;
+};
+
+
+
+
+
+1;
+
+
+
+
+__END__
+
+sub parse {
+  my ($self, $match) = @_;
+
+  my $segments = $self->{segments};
+
+  # Collect offsets for match
+  # TODO: In Krill, offsets are collected in advance,
+  # but I guess it's cleaner to do on the fly
+  my $start_seg = $segments->get($match->doc_id, $match->start);
+  my $end_seg   = $segments->get($match->doc_id, $match->end - 1);
+
+  # TODO: Collect offsets for inner match
+
+  # match number
+
+  # TODO: Parse identifier string
+
+  # $self->add_open(0, $start_seg->[0]);
+  # $self->add_close(0, $end_seg->[1]);
+
+  # foreach (@highlights) {
+  #   if ($_->start >= $match->start && $_->end <= $self->end) {
+  #
+  #   };
+  # }
+
+  my $stack = $self->_process_highlight_stack;
+  return $self->{list};
+};
+
+
+sub _process_highlight_spans {
+  my $self = shift;
+
+  # TODO:
+  #   Check potential start and end characters here
+
+  my $identifier = undef;
+
+  # my $array = $self->_process_offset_chars($match->doc_id);
+
+  # foreach my $highlight (@{$self->highlights}) {
+  #   my $start = $self->{segments}->get($match->doc_id, $highlight->start);
+  #   my $end = $self->{segments}->get($match->doc_id, $highlight->end);
+
+  #  return if $start < 0 || $end < 0;
+  #  $self->{span}->add($start, $end, $highlight->nr);
+  # };
+};
+
+
+# TODO: Process context, primary data
+sub _process_offset_chars {
+#  my $self = shift;
+  #  if ($context) {}
+};
+
+sub _process_highlight_stack {
+  my $self = shift;
+
+  print_log('c_highl', 'Process highlight stack') if DEBUG;
+
+  my @open_list = ();
+  my @close_list = ();
+
+  # TODO:
+  #   Filter multiple identifiers, that may be introduced and would
+  #   result in invalid xml
+  # this._filterMultipleIdentifiers();
+
+  my @highlights = @_;
+
+  push @open_list,  @highlights;
+  push @close_list, @highlights;
+
+  @open_list  = sort _open_sort  @open_list;
+  @close_list = sort _close_sort @open_list;
+
+  my @stack = ();
+
+  # Create sorted stack unless both lists are empty
+  while (scalar @open_list || scalar @close_list) {
+    if (!scalar @open_list) {
+      push @stack, @close_list;
+      last;
+    }
+
+    # Not sure about this - but may happen
+    elsif (!scalar @close_list) {
+      last;
+    };
+
+    if ($open_list[0]->start < $close_list[0]->end) {
+
+      my $e = (shift(@open_list))->clone;
+      $e->[3] = 1;
+      push @stack, $e;
+    }
+    else {
+      push @stack, shift(@close_list)
+    };
+  };
+  return \@stack;
+};
+
+sub add_close {
+  my $self = shift;
+  my ($nr, $end) = @_;
+
+  $self->{temp_stack} = [];
+
+  # Check if there is an opening tag
+  unless ($self->{stack}->[0]) {
+    warn 'Nothing to close on stack';
+    return;
+  };
+
+  if (DEBUG) {
+    print_log(
+      'c_highl',
+      "Stack for checkinmg with class $nr is " .
+        join('|', @{$self->{stack}})
+      );
+  };
+
+
+  # Class number of the last element
+  my $eold  = pop @{$self->{stack}};
+
+  my $last_combinator;
+
+  # the closing element is not balanced, i.e. the last element differs
+  while ($eold != $nr) {
+
+    # Get last element
+    $last_combinator = $self->{list}->[-1];
+
+    if (DEBUG) {
+      print_log(
+        'c_highl',
+        'Closing element is unbalanced - ' .
+          $eold . ' != ' . $nr . ' with last combinator ' .
+          join('|',
+               $last_combinator->{type},
+               $last_combinator->{nr},
+               $last_combinator->{chars}
+             )
+        );
+    };
+
+    # combinator is opening and the number is not equal to the last
+    # element on the balanceStack
+    if ($last_combinator->{type} == 1 && $last_combinator->{nr} == $eold) {
+
+      # Remove the last element - it's empty and uninteresting!
+      pop @{$self->{list}};
+    }
+
+    # combinator is either closing (??) or another opener
+    else {
+
+      print_log('c_highl', "Close element a) $eold") if DEBUG;
+
+      # Add close element of the unclosed element
+      # This will be continued
+      push @{$self->{list}}, Krawfish::Collection::Snippet::Highlights::Combinator->new_node(
+        2, $eold, 0
+      );
+    };
+
+    # add this element number temporarily on the stack
+    push @{$self->{temp_stack}}, $eold;
+
+    # Check next element
+    $eold = pop @{$self->{stack}};
+  };
+
+  # Get last combinator on the stack
+  $last_combinator = $self->{list}->[-1];
+
+  if (DEBUG) {
+    print_log(
+      'c_highl',
+      "LastComb: " .
+        join('|',
+             $last_combinator->{type},
+             $last_combinator->{nr},
+             $last_combinator->{chars}
+           ) .
+           " for $nr"
+         );
+  };
+
+  if ($last_combinator->{type} == 1 && $last_combinator->{nr} == $nr) {
+
+    while ($last_combinator->{type} == 1 && $last_combinator->{nr} == $nr) {
+      # Remove the damn thing - It's empty and uninteresting!
+      pop @{$self->{list}};
+      $last_combinator = $self->{list}->[-1];
+    };
+  }
+
+  else {
+    print_log('c_highl', "Close element b) $nr") if DEBUG;
+
+    # Add closer
+    push @{$self->{list}}, Krawfish::Collection::Snippet::Highlights::Combinator->new_node(
+      2, $eold, 1
+    );
+  };
+
+  for my $e (@{$self->{temp_stack}}) {
+    print_log('c_highl', "Reopen element $e") if DEBUG;
+    push @{$self->{list}}, Krawfish::Collection::Snippet::Highlights::Combinator->new_node(
+      1, $e
+    );
+
+    push @{$self->{stack}}, $e;
+  };
+};
+
+
+sub get_first {
+  $_[0]->{list}->[0];
+};
+
+
+sub get_last {
+  $_[0]->{list}->[-1];
+};
+
+
+sub get {
+  $_[0]->{list}->[$_[1]];
+};
+
+
+sub size {
+  scalar @{$_[0]->{list}}
+};
+
+
+# Add textual element
+sub add_string {
+  my ($self, $string) = @_;
+  my $element = Krawfish::Collection::Snippet::Highlights::Combinator->new_text(
+    $string
+  );
+  push @{$self->{list}}, $element;
+};
+
+
+# Open element
+sub add_open {
+  my ($self, $number) = @_;
+
+  my $element = Krawfish::Collection::Snippet::Highlights::Combinator->new_node(
+    1 => $number
+  );
+  push @{$self->{list}}, $element;
+  push @{$self->{stack}}, $number;
+};
+
+
+sub to_string {
+  my $self = shift;
+  my $str = '';
+  foreach (@{$self->{list}}) {
+    $str .= $_->to_string . "\n";
+  };
+  return $str;
+};
+
+
+package Krawfish::Collection::Snippet::Highlights::Combinator;
+use strict;
+use warnings;
+
+# Type 0: Textual data
+# Type 1: Opening
+# Type 2: Closing
+
+# Constructor for nodes
+sub new_node {
+  my $class = shift;
+  my $self = bless {
+    type => shift,         # byte
+    nr => shift,           # integer
+    terminal => shift // 1, # boolean
+    chars => ''
+  }, $class;
+
+  # Terminal elements are closed and won't be reopened
+
+  return $self;
+};
+
+
+# Constructor for textual data
+sub new_text {
+  my $class = shift;
+  bless {
+    type => 0,
+    chars => shift,
+    nr => 0,
+    terminal => 1
+  }, $class;
+};
+
+
+# TODO: This may not be set here
+sub to_bracket {
+  my $self = shift;
+  my $match = shift;
+
+  my $str = '';
+
+  # Closing bracket
+  if ($self->{type} == 2) {
+
+    # Close matching element
+    if ($self->{nr} == -1) {
+      return ']';
+    };
+
+    # Close matching highlight, relation, span ...
+    return '}';
+  }
+
+  elsif ($self->{type} == 1) {
+    if ($self->{nr} == -1) {
+      $str .= '[';
+    }
+
+    # Is identifier
+    elsif ($self->{nr} < -1) {
+      $str .= '{#' . $match->class_id($self->{nr}) . ':';
+    }
+
+    # Highlight, relation, Span
+    else {
+      $str .= '{';
+
+      # Todo: Use highlight directive
+
+      if ($self->{nr} >= 256) {
+
+        # Is an annotation?
+        if ($self->{nr} < 2048) {
+          $str .= $match->annotation_id($self->{nr});
+        }
+
+        # Relation
+        else {
+          my $rel = $match->relation_id($self->{nr});
+          $str .= $rel->annotation;
+          $str .= '>';
+        }
+      }
+
+      # Highlight
+      elsif ($self->{nr} != 0) {
+        $str .= $self->{nr} . ':';
+      }
+
+      return $str;
+    };
+
+    return $self->{chars};
+  };
+};
+
+1;

diff --git a/lib/Krawfish/Compile/Segment/Enrich/Snippet/Hit.pm b/lib/Krawfish/Compile/Segment/Enrich/Snippet/Hit.pm
new file mode 100644
index 0000000..a22fd95
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Enrich/Snippet/Hit.pm

@@ -0,0 +1,54 @@
+package Krawfish::Compile::Segment::Enrich::Snippet::Hit;
+use Krawfish::Koral::Document::Subtoken;
+use Krawfish::Log;
+use strict;
+use warnings;
+
+use constant DEBUG => 0;
+
+sub new {
+  my $class = shift;
+  bless {
+    @_
+  }, $class;
+};
+
+sub content {
+  my ($self, $match, $forward) = @_;
+
+  if ($match->start != $forward->pos) {
+    warn 'The current position is not at the start position of the match';
+    return;
+  };
+
+  my @data;
+  my $length = $match->end - $match->start;
+  while ($length > 0) {
+
+    # Get the current token
+    my $current = $forward->current;
+
+    # Add token to text
+    push @data, Krawfish::Koral::Document::Subtoken->new_by_term_id(
+      $current->preceding_data,
+      $current->term_id
+    );
+
+    # Get the surface data
+    $length--;
+    $forward->next or last;
+  };
+
+  if (DEBUG) {
+    print_log('c_snippet', 'Add snippet match data: ' . join(',', @data));
+  };
+
+  return \@data;
+};
+
+sub to_string {
+  'hit';
+};
+
+
+1;

diff --git a/lib/Krawfish/Compile/Segment/Enrich/Snippet/Spans.pm b/lib/Krawfish/Compile/Segment/Enrich/Snippet/Spans.pm
new file mode 100644
index 0000000..f7ad380
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Enrich/Snippet/Spans.pm

@@ -0,0 +1,32 @@
+package Krawfish::Compile::Segment::Enrich::Snippet::Spans;
+use Krawfish::Log;
+use strict;
+use warnings;
+
+use constant DEBUG => 0;
+
+sub new {
+  my $class = shift;
+  bless {
+    elements => [],
+    text => ''
+  }, $class;
+};
+
+sub add_element {
+  my ($self, $element) = @_;
+  push @{$self->{elements}}, $element;
+};
+
+sub add_text {
+  my ($self, $text) = @_;
+  $self->{text} .= $text;
+  return $self;
+};
+
+sub to_html {
+  my $self = shift;
+  return $self->{text};
+};
+
+1;

diff --git a/lib/Krawfish/Compile/Segment/Enrich/SortCriterion.pm b/lib/Krawfish/Compile/Segment/Enrich/SortCriterion.pm
new file mode 100644
index 0000000..b388f0e
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Enrich/SortCriterion.pm

@@ -0,0 +1,73 @@
+package Krawfish::Compile::Segment::Enrich::SortCriterion;
+use parent 'Krawfish::Compile';
+use warnings;
+use strict;
+
+# Enrich an item with sort criteria.
+# This is necessary to sort items beyond the segment.
+# The problem with this enrichment is,
+# that it needs to augment the sorted items after sorting,
+# so they are not in a proper order to go through
+# the fields lists (for example) to collect the field values
+# or through the forward index to collect term_ids (though
+# this may be a different API).
+#
+# A proper way to do this would be to go through the sorted
+# lists and create a new sorted list in doc order (or to somehow
+# keep match order) to make it possible to enrich with all
+# sorting criteria.
+#
+# 1. For Fields: Create a list of all docs to enrich in doc_id order
+#    (Ignore duplicates)
+# 2. Prepare all requested fields in field order
+# 3. Go through all fields and collect values or term_ids
+# 4. Create criterion vectors per match based on these information
+#
+# But:
+# It may very well be possible to only enrich if required
+# on the node level.
+#
+# On the node level, the relevant criteria (top_k) will be inflated,
+# taken the ordering into account (which means following matches may
+# have a lot of criteria in common.
+
+
+sub new {
+  my $class = shift;
+  bless {
+    query => shift,
+
+    # Store all criteria in sorted order,
+    # which may include terms and fields.
+    # This will also keep the direction
+    # and possibly the collation.
+    criteria => shift
+  }, $class
+};
+
+sub _init {
+  my $self = shift;
+
+  return if $self->{init}++;
+
+  # TODO:
+  #   Go through all criteria and collect required field IDs.
+  #   Bring required field IDs in order.
+  #   Create an array for field_id => criterion_position to
+  #   map the surface term to the criterion after fetching.
+  #   Remember the criterion position for optional term sorting.
+};
+
+
+sub current_match {
+  # TODO:
+  #   Create an empty list for sorting criteria.
+  #   a) Retrieve for the document id all the relevant fields
+  #      if there are fields to retrieve.
+  #      Add in the position of the criteria list.
+  #   b) The surface term is already retrieved and enriched.
+  #      Add in the position of the criteria list.
+};
+
+
+1;

diff --git a/lib/Krawfish/Compile/Segment/Enrich/TermFreq.pm b/lib/Krawfish/Compile/Segment/Enrich/TermFreq.pm
new file mode 100644
index 0000000..452a385
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Enrich/TermFreq.pm

@@ -0,0 +1,2 @@
+# Enrich all matches (per doc) with frequencies of terms
+# in the specific document, e.g. number of token, number of sentences etc.

diff --git a/lib/Krawfish/Compile/Segment/Enrich/Terms.pm b/lib/Krawfish/Compile/Segment/Enrich/Terms.pm
new file mode 100644
index 0000000..a5299a0
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Enrich/Terms.pm

@@ -0,0 +1,234 @@
+package Krawfish::Compile::Segment::Enrich::Terms;
+use Krawfish::Koral::Result::Enrich::Terms;
+use parent 'Krawfish::Compile';
+use Krawfish::Log;
+use strict;
+use warnings;
+
+# TODO:
+#   Potentially rename to ::Terms! or ::Classes!
+
+# Enrich each match with all term ids for a specific region and
+# for a specific class
+
+# TODO:
+#   Make this usable for Krawfish::Compile::Group::Classes
+#   and Krawfish::Compile::Sort::Classes
+#   by supporting $nrs instead of $nr
+#
+# TODO:
+#   Classes may overlap, so subtokens should be cached/buffered
+#   or rather memoized!
+#   This also means it's necessary to retrieve term-ids without gaps
+#   so the next match can retrieve the term ids overlapping
+#
+# TODO:
+#   For a class there may be more than one start+end values.
+#   For sorting, gaps should be recognized and marked
+#   with a term_id=0.
+#   For sorting it's important to remember that!
+
+use constant {
+  DEBUG     => 0,
+  NR        => 0,
+  START_POS => 1,
+  END_POS   => 2,
+};
+
+
+sub new {
+  my $class = shift;
+  bless {
+    forward_obj => shift,
+    query => shift,
+    nrs   => shift // [0],
+    match => undef
+  }, $class;
+};
+
+sub to_string {
+  my $self = shift;
+  'terms(' . join(',', @{$self->{nrs}}) . ':'
+    . $self->{query}->to_string . ')'
+};
+
+
+sub _init {
+  my $self = shift;
+
+  return if $self->{init}++;
+
+  if (DEBUG) {
+    print_log(
+      'r_terms',
+      'Initiate pointer to forward index'
+    );
+  };
+
+  # The pointer can move backwards if necessary
+  $self->{pointer} = $self->{forward_obj}->pointer;
+};
+
+
+sub pointer {
+  $_[0]->{pointer};
+};
+
+
+
+# Next match
+sub next {
+  my $self = shift;
+  $self->{match} = undef;
+  return $self->{query}->next;
+};
+
+
+# Get the current match
+sub current_match {
+  my $self = shift;
+
+  $self->_init;
+
+  # Current match is already defined
+  if ($self->{match}) {
+
+    # Return match
+    return $self->{match};
+  };
+
+  # Get match based on current query position
+  my $match = $self->match_from_query;
+
+  if (DEBUG) {
+    print_log(
+      'r_terms',
+      'Get match from query'
+    );
+  };
+
+  # Get classes of the match
+  my @classes = $match->get_classes($self->{nrs});
+
+  # No classes found in match
+  return $match unless @classes;
+
+  # This only contains classes requested,
+  # but potentially multiple times
+
+  # First retrieve term ids
+  my $start = $classes[0]->[START_POS];
+  my $end   = $classes[0]->[END_POS];
+  foreach (@classes[1 .. $#classes]) {
+    $start = $_->[START_POS] if $_->[START_POS] < $start;
+    $end = $_->[END_POS] if $_->[END_POS] > $end;
+  };
+
+  if (DEBUG) {
+    print_log(
+      'r_terms',
+      "Retrieve subtokens for class position $start-$end"
+    );
+  };
+
+  # TODO:
+  #   Instead of using pointer directly,
+  #   this should use a forward buffer
+  #   with a yet to be defined API
+
+  my $pointer = $self->pointer;
+
+  # Skip to current document
+  my @term_ids = ();
+
+  my $doc_id = $match->doc_id;
+  if ($pointer->skip_doc($doc_id) == $doc_id &&
+
+        # Skip to current position
+        $pointer->skip_pos($start)) {
+
+    if (DEBUG) {
+      print_log('r_terms', "Pointer is repositioned");
+    };
+
+
+    # Collect all relevant subtoken term ids
+    for (my $i = $start; $i < $end; $i++) {
+      # Add terms to list
+      my $current = $pointer->current or return $match;
+      push @term_ids, $current->term_id;
+
+      # Move to next subtoken
+      $pointer->next;
+    };
+  }
+
+  # Document not available
+  else {
+    # Nothing to add
+    return $match;
+  };
+
+  if (DEBUG) {
+    print_log(
+      'r_terms',
+      'Retrieved terms are ' . join(',', @term_ids)
+    );
+  };
+
+  # Add lists of term_ids
+  # Structure is
+  # {
+  #   class1 => [id,id,0,id,id],
+  #   class2 => [...]
+  # }
+  # WARNING:
+  #   Gaps in classes are marked with 0!
+  my %term_id_per_class;
+  foreach my $class (@classes) {
+
+    if (DEBUG) {
+      print_log(
+        'r_terms',
+        'Add term ids for class ' . $class->[NR] .
+          ' with theoretical start at ' . $start
+      );
+    };
+
+    # Get the term vector of the class
+    my $term_ids = ($term_id_per_class{$class->[NR]} //= []);
+
+    # Foreach position, set the term_id
+    foreach my $pos ($class->[START_POS] .. ($class->[END_POS] - 1)) {
+
+      # Get the position without offset
+      $pos = $start - $pos;
+
+      # Copy the term id from the retrieved list
+      $term_ids->[$pos] = $term_ids[$pos];
+    };
+  };
+
+  # Because this may introduce zeros at the beginning,
+  # all classes need to be trimmed again
+  foreach my $class_nr (keys %term_id_per_class) {
+    while (defined $term_id_per_class{$class_nr}->[0] &&
+             $term_id_per_class{$class_nr}->[0] == 0) {
+      shift @{$term_id_per_class{$class_nr}};
+    };
+  };
+
+  # Add term id information per class
+  $match->add(
+    Krawfish::Koral::Result::Enrich::Terms->new(
+      \%term_id_per_class
+    )
+  );
+
+  # Set match
+  $self->{match} = $match;
+};
+
+
+
+1;

diff --git a/lib/Krawfish/Compile/Segment/EnrichGroup/Values.pm b/lib/Krawfish/Compile/Segment/EnrichGroup/Values.pm
new file mode 100644
index 0000000..f94461c
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/EnrichGroup/Values.pm

@@ -0,0 +1,4 @@
+# Add per group values from fields,
+# like in a group on documents add the min and max values
+# of a field, e.g. the date span, or the total number
+# of sentences in a corpus.

diff --git a/lib/Krawfish/Compile/Segment/Group/AnnotationClasses.pm b/lib/Krawfish/Compile/Segment/Group/AnnotationClasses.pm
new file mode 100644
index 0000000..bba8b9e
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Group/AnnotationClasses.pm

@@ -0,0 +1,11 @@
+# This should make it possible to search for classes
+# and group based on the annotations at the certain range.
+# This, however, is probably quite tricky as
+# there is no simple position based forward index with
+# term_ids for annotations, meaning that this
+# has to check the annotations in the complete forward index,
+# probably making this unusable slow.
+# but who knows ...
+
+# A query like
+# group_by_annotation_classes(1,"opennlp","p","Der {1:[]} Mann")

diff --git a/lib/Krawfish/Compile/Segment/Group/Characters.pm b/lib/Krawfish/Compile/Segment/Group/Characters.pm
new file mode 100644
index 0000000..e5b1da7
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Group/Characters.pm

@@ -0,0 +1,66 @@
+package Krawfish::Compile::Segment::Group::Character;
+use Krawfish::Log;
+use strict;
+use warnings;
+
+
+# This groups on prefix or suffixes of subterms.
+# Necessary to support "Ansicht nach Wortendungen" for example.
+# It's possible to first group on terms and then - per term,
+# request the term surface in the dictionary and group by
+# the result.
+
+
+use constant DEBUG => 0;
+
+sub new {
+  my $class = shift;
+  bless {
+    segments   => shift, # Krawfish::Index::Segments object
+    # TODO: May as well be a subtoken object
+    from_start => shift,  # boolean - otherwise from end
+    char_count => shift
+    nrs => [@_]
+  }, $class;
+};
+
+
+sub get_group {
+  my ($self, $match) = @_;
+
+  # Get all classes from the match
+  my @classes = $match->get_classes($self->{nrs});
+
+  my $segments = $self->{segments};
+
+  my %group;
+
+  # Classes have nr, start, end
+  foreach my $class (sort { $a->start <=> $b->start } @classes) {
+
+    if ($self->{from_start}) {
+
+      # This will retrieve the segment from the segments stream
+      my $segment = $stream->get($match->doc_id, $class->start);
+
+      if ($segment->)
+
+        # The character count can be satisfied by the
+      my $first_chars = $segment->first_chars;
+
+      if (length($first_chars) <= $self->{char_count} {
+        substr($first_chars);
+      }
+      
+      # Check, if the segment only spans one segment
+      if ($class->end != $class->start+1) {
+        
+      };
+    }
+    else {
+      ...
+    };
+  };
+};
+
+1;

diff --git a/lib/Krawfish/Compile/Segment/Group/ClassFrequencies.pm b/lib/Krawfish/Compile/Segment/Group/ClassFrequencies.pm
new file mode 100644
index 0000000..529267f
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Group/ClassFrequencies.pm

@@ -0,0 +1,215 @@
+package Krawfish::Compile::Segment::Group::ClassFrequencies;
+use parent 'Krawfish::Compile';
+use Krawfish::Koral::Result::Group::ClassFrequencies;
+use Krawfish::Log;
+use strict;
+use warnings;
+
+use constant DEBUG => 0;
+
+# Aggregate by content information, for example,
+# based on a certain class
+#
+# TODO:
+#   Currently this only works for surface term_ids, but it may very well collect
+#   arbitrary annotations! In that case, multiple annotations and different
+#   annotation lengths have to be taken into account.
+#
+# TODO:
+#   The special case of class 0 needs to be treated.
+#
+# TODO:
+#   Support virtual corpus classes
+
+sub new {
+  my $class = shift;
+  my $self = bless {
+    forward_obj => shift,
+    query       => shift,
+    classes     => shift,
+    class_freq  => {},
+    term_cache  => {},
+    last_doc_id => -1
+  }, $class;
+
+  $self->{groups} = Krawfish::Koral::Result::Group::ClassFrequencies->new(
+    $self->{classes}
+  );
+
+  return $self;
+};
+
+
+# Initialize forward counter
+sub _init {
+  return if $_[0]->{forward_pointer};
+
+  my $self = shift;
+
+  print_log('g_class_freq', 'Create forward pointer') if DEBUG;
+
+  # Load the ranked list - may be too large for memory!
+  $self->{forward_pointer} = $self->{forward_obj}->pointer;
+};
+
+
+# Shorthand for "search through"
+sub finalize {
+  while ($_[0]->next) {};
+  return $_[0];
+};
+
+
+# Move to next match
+sub next {
+  my $self = shift;
+
+  $self->_init;
+
+  # No more matches
+  return unless $self->{query}->next;
+
+  # Get the current posting
+  my $current = $self->{query}->current;
+
+  my $groups = $self->{groups};
+  my $pointer = $self->{forward_pointer};
+
+  # Get the current doc_id and move to it
+  my $doc_id = $current->doc_id;
+
+  # Current doc_id differ - move forward
+  if ($doc_id != $self->{last_doc_id}) {
+
+    # Skip to doc
+    if ($pointer->skip_doc($doc_id) != $doc_id) {
+
+      # This should never happen, as for all docs there is a
+      # forward index!
+      return;
+    };
+
+    if (DEBUG) {
+      print_log('g_class_freq', "Moved forward index to $doc_id");
+    };
+
+    # Remember the last document
+    $self->{last_doc_id} = $doc_id;
+  };
+
+  # Remember terms (for overlap)
+  # with the structure pos -> term_id
+  # my $cache = $self->{term_cache};
+
+  # Collect classes - have the structure
+  # $classes[1] = [...term_id,0,0,term_id,...]
+  my @classes = ();
+  foreach (@{$self->{classes}}) {
+    $classes[$_] = []; # TODO: Length is normally the length of the match
+  };
+  my @term_cache = ();
+
+  # Get offset to simplify classes
+  my $offset = $current->start;
+
+  # TODO:
+  #   There may be overlaps within following
+  #   matches, so under certain circumstances the pointer
+  #   needs to move backwards.
+  #   It may be enough to do that at the beginning here.
+  #   Or instead of skip_pos, the forward pointer can
+  #   reposition automatically.
+
+  # Get class payloads
+  my @class_infos = $current->get_classes($self->{classes});
+
+  # Retrieve the requested classes for the current posting
+  foreach my $class_info (@class_infos) {
+
+    # Check class info
+    my ($nr, $start, $end) = @{$class_info};
+
+    # Iterate over the class and collect term ids
+    for (my $i = $start; $i < $end; $i++) {
+
+      # The relative position of the class in the match
+      my $rel_pos = $i - $offset;
+
+      # Check if term already retrieved
+      if ($term_cache[$rel_pos]) {
+
+        # Copy to class
+        $classes[$nr]->[$rel_pos] = $term_cache[$rel_pos];
+      }
+
+      # TODO:
+      #   Check for term cache
+      #   if ($term_cache->{$start})
+
+      # Retrieve term id
+      else {
+
+        # Collect all terms from start to end
+        if ($pointer->skip_pos($i)) {
+
+          # Add term id to class in correct order
+          my $term_id = $pointer->current->term_id;
+
+          if (DEBUG) {
+            print_log(
+              'g_class_freq',
+              "Term id at position $i is #" . $term_id);
+          };
+
+
+          # Set term_id at relative position in term_cache
+          $term_cache[$rel_pos] = $classes[$nr]->[$rel_pos] = $term_id;
+        };
+      }
+    };
+  };
+
+  if (DEBUG) {
+    print_log('g_class_freq', 'term cache is ' . join(',', @term_cache));
+  };
+
+  # The signature has the structure [class, term_id*]+
+  my @sig = ();
+
+  # Iterate over all classes
+  foreach my $nr (@{$self->{classes}}) {
+    push @sig, $nr;
+
+    foreach (@{$classes[$nr]}) {
+      push @sig, $_ if defined $_; # Add all set term_ids
+    };
+
+    push @sig, 0;
+  };
+
+  # Increment per match
+  $self->{groups}->incr_match(\@sig);
+
+  return 1;
+};
+
+
+sub current {
+  return $_[0]->{query}->current;
+};
+
+
+# Get collection
+sub collection {
+  $_[0]->{groups};
+};
+
+
+sub to_string {
+  my $self = shift;
+  my $str = 'gClassFreq(' . join(',', @{$self->{classes}}) .
+    ':' . $self->{query}->to_string . ')';
+  return $str;
+};
+
+1;

diff --git a/lib/Krawfish/Compile/Segment/Group/Classes.pm b/lib/Krawfish/Compile/Segment/Group/Classes.pm
new file mode 100644
index 0000000..af286ed
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Group/Classes.pm

@@ -0,0 +1,121 @@
+package Krawfish::Compile::Segment::Group::Classes;
+use Krawfish::Log;
+use strict;
+use warnings;
+
+# TODO:
+#   The name is somehow misleading, as this will only
+#   group by surface terms.
+
+# TODO:
+#   Refer to Krawfish::Compile::Segment::TermIDs!
+
+use constant {
+  DEBUG => 0,
+    NR => 0,
+    START_POS => 1,
+    END_POS => 2,
+};
+
+
+sub new {
+  my $class = shift;
+  bless {
+    segment => shift,
+    nrs => @_ ? [sort @_] : undef,
+    groups => {},
+  }, $class;
+};
+
+
+# Get the group signature for each match
+sub get_group {
+  my ($self, $match) = @_;
+
+  # Get all classes from the match
+  # Classes need to be sorted by start position
+  # to be retrievable, in case the subtokens-Stream
+  # is implemented as a postingslist (probably not)
+  my @classes = $match->get_classes_sorted($self->{nrs});
+
+  my $subtokens = $self->{segment}->subtokens;
+
+  my %class_group;
+
+  # Classes have nr, start, end
+  foreach my $class (@classes) {
+
+    # WARNING! CLASSES MAY OVERLAP SO SUBTOKENS SHOULD BE CACHED OR BUFFERED!
+
+    # Get start position
+    my $start = $class->[START_POS];
+
+    my @seq = ();
+
+    # Receive subtoken
+    my $subt = $subtokens->get($match->doc_id, $start);
+
+    # Push term id to subtoken
+    # TODO: A subtoken should have accessors
+    push (@seq, $subt->[2]);
+
+    while ($start < ($class->[END_POS] -1)) {
+      $subt = $subtokens->get($match->doc_id, ++$start);
+
+      # Push subterm id to subtoken
+      push (@seq, $subt->[2]);
+    };
+
+    # Class not yet set
+    unless ($class_group{$class->[NR]}) {
+      $class_group{$class->[NR]} = join('___', @seq);
+    }
+
+    # There is a gap in the class, potentially an overlap!
+    # TODO: Resolve overlaps!
+    # TODO: Mark gaps!
+    else {
+      $class_group{$class->[NR]} .= '___' . join('___', @seq);
+    };
+  };
+
+  my $string = '';
+  foreach (sort {$a <=> $b} keys %class_group) {
+    $string .= $_ .':' . $class_group{$_} . ';';
+  };
+
+  return $string;
+};
+
+
+# Return group info as hash
+sub to_hash {
+  my ($self, $signature, $doc_freq, $freq) = @_;
+
+  # TODO:
+  #   This can't work!
+  # Get dictionary object to convert terms to term id
+  # my $dict = $self->{segment}->dict;
+
+  my %hash = ();
+  while ($signature =~ /\G(\d+):(.+?);/g) {
+
+  #  if (DEBUG) {
+  #    print_log('g_class', "Build class $1 for signature $2");
+  #  };
+
+    $hash{"class_$1"} = [ split('___', $2)];
+  };
+  $hash{freq} = $freq if defined $freq;
+  $hash{doc_freq} = $doc_freq;
+  return \%hash;
+};
+
+
+sub to_string {
+  my $str = 'classes';
+  $str .= $_[0]->{nrs} ? '[' . join(',', @{$_[0]->{nrs}}) . ']' : '';
+  return $str;
+};
+
+1;

diff --git a/lib/Krawfish/Compile/Segment/Group/Fields.pm b/lib/Krawfish/Compile/Segment/Group/Fields.pm
new file mode 100644
index 0000000..75884b2
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Group/Fields.pm

@@ -0,0 +1,175 @@
+package Krawfish::Compile::Segment::Group::Fields;
+use parent 'Krawfish::Compile';
+use Krawfish::Koral::Result::Group::Fields;
+use Krawfish::Log;
+use strict;
+use warnings;
+
+use constant DEBUG => 0;
+
+# This will group matches (especially document matches) by field
+# This is useful e.g. for document browsing per corpus.
+#
+# Because the grouping is based on ranking, the sorting will be trivial.
+#
+# TODO:
+#   For some mechanisms, it is not necessary to count all occurrences,
+#   e.g. to get all keywords used in a certain virtual corpus or all
+#   used annotations.
+
+sub new {
+  my $class = shift;
+  my ($field_obj, $query, $fields) = @_;
+  my $self = bless {
+    field_obj  => $field_obj,
+    query      => $query,
+    field_keys => [map { ref($_) ? $_->term_id : $_ } @$fields],
+    last_doc_id => -1
+  }, $class;
+
+  # Initialize group object
+  $self->{groups} = Krawfish::Koral::Result::Group::Fields->new($self->{field_keys});
+
+  return $self;
+};
+
+
+# Initialize field pointer
+sub _init {
+  return if $_[0]->{field_pointer};
+
+  my $self = shift;
+
+  print_log('g_fields', 'Create pointer on fields') if DEBUG;
+
+  # Load the ranked list - may be too large for memory!
+  $self->{field_pointer} = $self->{field_obj}->pointer;
+};
+
+
+sub to_string {
+  my $self = shift;
+  my $str = 'gFields(' . join(',', map { '#' . $_ } @{$self->{field_keys}}) .
+    ':' . $self->{query}->to_string . ')';
+  return $str;
+};
+
+
+# Shorthand for "search through"
+sub finalize {
+  while ($_[0]->next) {};
+  return $_[0];
+};
+
+
+# Iterate to the next result
+sub next {
+  my $self = shift;
+
+  $self->_init;
+
+  my $groups = $self->{groups};
+  my $pointer = $self->{field_pointer};
+
+  # There is a next match
+  if ($self->{query}->next) {
+
+    # Get the current posting
+    my $current = $self->{query}->current;
+
+    if ($current->doc_id != $self->{last_doc_id}) {
+
+      # Flush old information
+      $groups->flush;
+
+      my $doc_id = $pointer->skip_doc($current->doc_id);
+
+      # There are no fields for this doc
+      next if $doc_id != $current->doc_id;
+
+      # Due to multivalued fields,
+      # a document can yield a permutation of
+      # patterns, so we recognize this
+      my @patterns = ();
+      my @field_keys = @{$self->{field_keys}};
+
+      # Ignore stored fields
+      my @field_objs = grep { $_->type ne 'store' } $pointer->fields(@field_keys);
+
+      my ($key_pos, $val_pos) = (0,0);
+
+      # Iterate through both lists and create a pattern
+      # Pattern may occur because fields can have multiple values
+      while ($key_pos < @field_keys) {
+
+        # There are no more values for the position
+        if (!$field_objs[$val_pos]) {
+          # Add ignorable null term
+          unless (@{$patterns[$key_pos]}) {
+            push @{$patterns[$key_pos]}, 0;
+          };
+          $key_pos++;
+        }
+
+        # Key identifier are matching
+        elsif ($field_keys[$key_pos] == $field_objs[$val_pos]->key_id) {
+
+          # Add key to pattern
+          $patterns[$key_pos] //= [];
+          push @{$patterns[$key_pos]}, $field_objs[$val_pos]->term_id;
+          $val_pos++;
+        }
+
+        # Forward key position
+        elsif ($field_keys[$key_pos] < $field_objs[$val_pos]->key_id) {
+
+          # Add ignorable null term
+          unless (@{$patterns[$key_pos]}) {
+            push @{$patterns[$key_pos]}, 0;
+          };
+          $key_pos++;
+        }
+
+        # $field_keys[$key_pos] > $field_objs[$val_pos]->key_id
+        else {
+
+          # I don't know if this can happen
+          $val_pos++;
+        };
+      };
+
+      # This adds
+      $groups->incr_doc(\@patterns);
+
+      # Set last doc to current doc
+      $self->{last_doc_id} = $current->doc_id;
+    };
+
+    # Add to frequencies
+    $groups->incr_match;
+
+    return 1;
+  };
+
+  # Flush cached results
+  $groups->flush;
+
+  return 0;
+};
+
+
+sub current {
+  return $_[0]->{query}->current;
+};
+
+
+# Get collection
+sub collection {
+  $_[0]->{groups};
+};
+
+
+1;
+
+
+__END__

diff --git a/lib/Krawfish/Compile/Segment/Group/Spans.pm b/lib/Krawfish/Compile/Segment/Group/Spans.pm
new file mode 100644
index 0000000..7085162
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Group/Spans.pm

@@ -0,0 +1,59 @@
+package Krawfish::Compile::Segment::Group::Spans;
+use parent 'Krawfish::Compile';
+use Krawfish::Log;
+use strict;
+use warnings;
+
+# This may be generalizable, but for the moment
+# It should make it possible to group the span positions
+# of a query based on a nesting query.
+#
+# The idea is to make the following possible:
+# Search for a term in sentences (like "{1:contains(<s>, {2:'baum'})}") and
+# based on the position and length of 1 and 2,
+# a result like
+#
+#     0: 5
+#     1: 7
+#   100: 2
+#
+# can be returned, where each class 1 is sliced in
+# 100 pieces and for each piece there is a dot, in case
+# class 2 occurs in that slice.
+#
+# By doing that it's easy to visualize the position of expressions
+# in sentences or documents etc.
+#
+# For example to answer questions like 'where in documents does
+# the phrase "Herzlichen Dank" occur?'
+#
+# If the span spans more than 1 slice, the result can be
+#
+#   0_2: 1
+#   0_3: 4
+#   4: 6
+#
+# etc. In case the second class is not nested in the first
+# class, this is not counted at all (as this would result
+# in weird data regarding the slice sizes).
+
+sub new {
+  my $class = shift;
+  my %param = @_;
+  bless {
+    slices => $param{slices} // 100,
+    wrap_clas => $param{wrap_class} // 1,
+    embedded_class => $param{embedded_class} // 2
+  }, $class;
+};
+
+# Get the group signature for each match
+# May well be renamed to get_signature
+sub get_group {
+  my $self = shift;
+  my $slice_start = 0;
+  my $slice_end = 0;
+  return $slice_start . '_' . $slice_end;
+};
+
+1;

diff --git a/lib/Krawfish/Compile/Segment/Group/TermExistence.pm b/lib/Krawfish/Compile/Segment/Group/TermExistence.pm
new file mode 100644
index 0000000..009c6ed
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Group/TermExistence.pm

@@ -0,0 +1,159 @@
+package Krawfish::Compile::Segment::Group::TermExistence;
+use parent 'Krawfish::Compile';
+use strict;
+use warnings;
+
+# The query works similar to Or-query, but only accepts term ids.
+
+sub new {
+  my $class = shift;
+  bless {
+    term_id   => shift,  # Term Query
+    term_ids  => shift,  # Optional TermExistence-Query
+    filter    => undef,
+    existence => []
+  }, $class;
+};
+
+sub _init {
+  ...
+};
+
+
+# TODO:
+#   Think about when next() is called, as it needs to be called on term_ids as well ...
+#   Mabe this should be done in _init as a while query somehow.
+sub next {
+  my $self = shift;
+
+  # Get the current document in the VC
+  my $filter = $self->{filter};
+  my $doc_id = $filter->doc_id;
+
+  # The next document to look for in the VC
+  my $next_doc_id;
+
+
+  # Check the single term_id for existence
+
+  # The simple term does not exist
+  my $term = $self->{term_id};
+  if (!$term) {
+    # Do nothing
+  }
+
+  # Should never happen
+  elsif (!$term->current) {
+    $self->{term_id} = undef;
+  }
+
+  # Term exists and can be checked
+  else {
+
+    # Is the VC document beyond the current document id
+    if ($doc_id > $term->doc_id) {
+
+      # Move the term document to the VC document
+      $term->skip_doc($doc_id);
+    };
+
+    # Are both terms in the same document?
+    if ($term->doc_id == $doc_id) {
+
+      # Add this term to existence
+      $self->exists($term->term_id);
+
+      # Close posting
+      $term->close;
+
+      # Do not check any further
+      $self->{term_id} = undef;
+    }
+
+    # Current term document is beyond current VC doc
+    else {
+      $next_doc_id = $term->doc_id;
+    };
+  };
+
+
+  # Check the complex term_ids for existence
+
+  my $terms = $self->{term_ids};
+
+  if (!$terms) {
+    # Do nothing
+  }
+
+  # Should never happen
+  elsif (!$terms->current) {
+    $self->{term_ids} = undef;
+  }
+
+  else {
+
+    # When there is a complex query, move on
+    if ($doc_id > $terms->doc_id) {
+      $terms->skip_doc($doc_id);
+    };
+
+    # There are no further matches
+    unless ($terms->current) {
+
+      # Merge existence values
+      $self->exists($terms->existence);
+      $terms->close;
+      $self->{term_ids} = undef;
+    }
+
+    # Current terms are beyond current VC doc
+    else {
+
+      # Remember the next relevant document id
+      if (!$next_doc_id || $next_doc_id > $term->doc_id) {
+        $next_doc_id = $term->doc_id;
+      };
+    };
+  };
+
+  # There is a next document id defined - move on
+  if (defined $next_doc_id) {
+
+    # Move the VC stream to the next relevant position
+    if ($filter->skip_doc($next_doc_id)) {
+
+      # It's fine
+      return 1;
+    };
+  };
+
+  return 0;
+};
+
+
+# Add term ids to existence list
+sub exists {
+  my ($self, $term_id) = @_;
+
+  if (ref $term_id) {
+    push @{$terms->existence}, @$term_id;
+  }
+  else {
+    push @{$terms->existence}, $term_id;
+  };
+};
+
+
+# Return list of existing term ids
+sub existence {
+  return $self->{existence}
+};
+
+
+sub filter_by {
+  ...
+    # It is relevant to filter The query - but one filter may be enough
+};
+
+
+1;

diff --git a/lib/Krawfish/Compile/Segment/Sort.pm b/lib/Krawfish/Compile/Segment/Sort.pm
new file mode 100644
index 0000000..3c69732
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Sort.pm

@@ -0,0 +1,377 @@
+package Krawfish::Compile::Segment::Sort;
+use parent 'Krawfish::Compile::Segment::Bundle';
+use Krawfish::Util::String qw/squote/;
+use Krawfish::Util::PriorityQueue::PerDoc;
+use Krawfish::Koral::Result::Match;
+use Krawfish::Posting::Bundle;
+use Krawfish::Log;
+use Data::Dumper;
+use strict;
+use warnings;
+
+# This is the general sorting implementation based on ranks.
+#
+# It establishes a top-k HeapSort approach and expects bundles
+# of matches to sort by a rank. It returns bundles.
+#
+# A given segment-wide $max_rank_ref can be used to ignore documents
+# during search in a sort filter.
+
+# TODO:
+#   Currently this is limited to fields and works different to subterms.
+#   So this may need to be renamed to Sort/ByField and Sort/ByFieldAfter.
+
+# TODO:
+#   It's possible that fields return a rank of 0, indicating that
+#   the field does not exist for the document.
+#   They will always be sorted at the end.
+
+# TODO:
+#   Ranks should respect the ranking mechanism of FieldsRan,
+#   where only even values are fine and odd values need
+#   to be sorted in a separate step (this is still open for discussion).
+
+# TODO:
+#   It may be beneficial to have the binary heap space limited
+#   and do a quickselect whenever the heap is full - to prevent full
+#   sort, see
+#   http://lemire.me/blog/2017/06/14/quickselect-versus-binary-heap-for-top-k-queries/
+#   https://plus.google.com/+googleguava/posts/QMD74vZ5dxc
+#   although
+#   http://lemire.me/blog/2017/06/21/top-speed-for-top-k-queries/
+#   says its irrelevant
+
+
+use constant {
+  DEBUG   => 0,
+  RANK    => 0,
+  SAME    => 1,
+  VALUE   => 2,
+  MATCHES => 3
+};
+
+
+# Constructor
+sub new {
+  my $class = shift;
+  my %param = @_;
+
+  if (DEBUG) {
+    print_log('sort', 'Initiate sort object');
+  };
+
+  # TODO:
+  #   Check for mandatory parameters
+  #
+  my $query    = $param{query};
+
+  unless ($query->isa('Krawfish::Compile::Segment::Bundle')) {
+    warn 'The query is no bundled query';
+    return;
+  };
+
+  my $segment  = $param{segment};
+  my $top_k    = $param{top_k};
+
+  # This is the sort criterion
+  my $sort     = $param{sort};
+
+  # Set top_k if not yet set
+  # - to be honest, heap sort is probably not the
+  # best approach for a full search
+  $top_k //= $segment->max_rank;
+
+  # The maximum ranking value may be used
+  # by outside filters to know in advance,
+  # if a document can't be part of the result set
+  my $max_rank_ref;
+  if (defined $param{max_rank_ref}) {
+
+    # Get reference from definition
+    $max_rank_ref = $param{max_rank_ref};
+  }
+  else {
+
+    # Create a new reference
+    $max_rank_ref = \(my $max_rank = $segment->max_rank);
+  };
+
+  # Create initial priority queue
+  my $queue = Krawfish::Util::PriorityQueue::PerDoc->new(
+    $top_k,
+    $max_rank_ref
+  );
+
+  # Construct
+  return bless {
+    segment      => $segment,
+    top_k        => $top_k,
+    query        => $query,
+    queue        => $queue,
+    max_rank_ref => $max_rank_ref,
+
+    # TODO:
+    #   Rename to criterion
+    sort         => $sort,
+
+    buffer       => undef,
+    pos_in_sort  => 0,
+
+    # Default starts
+    stack        => [],  # All lists on a stack
+    sorted       => [],
+    pos          => 0 # Number of matches already served
+  }, $class;
+};
+
+sub clone {
+  my $self = shift;
+  __PACKAGE__->new(
+    query   => $self->{query}->clone,
+    segment => $self->{segment},
+    top_k   => $self->{top_k},
+    sort    => $self->{sort},
+    max_rank_ref => $self->{max_rank_ref}
+  );
+};
+
+
+# Initialize the sorting - this will do a full run!
+sub _init {
+  my $self = shift;
+
+  # Result already initiated
+  return if $self->{init}++;
+
+  my $query = $self->{query};
+
+  if (DEBUG) {
+    print_log('sort', 'Initialize sort object');
+  };
+
+  # TODO:
+  #   Make this work for terms as well!
+
+  # TODO:
+  #   This currently only works for fields,
+  #   because it bundles document ids.
+  #   The prebundling of documents may be
+  #   done in a separated step.
+
+  # Get maximum accepted rank from queue
+  my $max_rank_ref = $self->{max_rank_ref};
+  my $queue = $self->{queue};
+  my $sort = $self->{sort};
+
+  if (DEBUG) {
+    print_log('sort', qq!Next Rank on field #! . $sort->term_id);
+  };
+
+  # Store the last match buffered
+  my ($match, $rank);
+
+  # Init
+  $query->next_bundle;
+
+  # Pass through all queries
+  while ($match = $query->current_bundle) {
+
+    if (DEBUG) {
+      print_log('sort', 'Get next posting from ' . $query->to_string);
+    };
+
+    # Get stored rank
+    $rank = $sort->rank_for($match->doc_id);
+
+    if (DEBUG) {
+      print_log('sort', 'Rank for doc id ' . $match->doc_id . " is $rank");
+      print_log('sort', "Check rank $rank against max rank " . $$max_rank_ref);
+    };
+
+
+    # Precheck if the match is relevant
+    if ($rank > $$max_rank_ref) {
+
+      # Document is irrelevant
+      $match = undef;
+
+      if (DEBUG) {
+        print_log('sort', 'Move to next doc');
+      };
+
+      # Skip to next document
+      $query->next_doc;
+      CORE::next;
+    };
+
+    # Get current bundle
+    my $bundle = $query->current_bundle;
+
+    # Insert bundle into priority queue with length information
+    $queue->insert([$rank, 0, $bundle, $bundle->matches]) if $bundle;
+
+    if (DEBUG) {
+      print_log('sort', 'Move to next bundle');
+    };
+
+    # Move to next bundle
+    $query->next_bundle;
+  };
+
+  my $array = $queue->reverse_array;
+  if (DEBUG && 0) {
+    print_log('sort', 'Get list ranking of ' . Dumper($array));
+  };
+
+  # Get the rank reference (new);
+  $self->{buffer} = $array;
+};
+
+
+
+# Move to the next item in the bundled document list
+# and create the next bundle
+sub next_bundle {
+  my $self = shift;
+
+  # Initialize query - this will do a full run on the first field level!
+  $self->_init;
+
+  if ($self->{pos_in_sort} >= @{$self->{buffer}}) {
+    if (DEBUG) {
+      print_log('sort', 'No more elements in the priority array');
+    };
+
+    $self->{current_bundle} = undef;
+    return;
+  };
+
+  # Set current bundle
+  $self->{current_bundle} = $self->get_bundle_from_buffer;
+
+  # Remember the number of entries
+  $self->{pos} += $self->{current_bundle}->matches;
+  return 1;
+};
+
+
+# Get the top bundle from buffer
+sub get_bundle_from_buffer {
+  my $self = shift;
+
+  # Iterate over the next elements with identical ranks
+  my $pos = $self->{pos_in_sort};
+
+  # Get the top entry
+  my $top_bundle = $self->{buffer}->[$pos];
+
+  if (DEBUG) {
+    print_log('sort_after', "Move to next bundle at $pos, which is " .
+                $top_bundle->[VALUE]);
+  };
+
+  my $rank = $top_bundle->[RANK];
+
+  if (DEBUG) {
+    print_log('sort_after', "Create new bundle from prio at $pos starting with " .
+                $top_bundle->[VALUE]->to_string);
+  };
+
+  # Initiate new bundle
+  my $new_bundle = Krawfish::Posting::Bundle->new($top_bundle->[VALUE]);
+
+  $pos++;
+  for (; $pos < @{$self->{buffer}}; $pos++) {
+    $top_bundle = $self->{buffer}->[$pos];
+
+    if (DEBUG) {
+      print_log('sort', 'Check follow up from prio: ' . $top_bundle->[VALUE]->to_string);
+    };
+
+    # Add element to bundle
+    if ($rank == $top_bundle->[RANK]) {
+
+      if (DEBUG) {
+        print_log('sort', "Both items have the same rank $rank");
+      };
+
+      unless ($new_bundle->add($top_bundle->[VALUE])) {
+        warn 'Unable to add bundle to new bundle';
+      };
+    }
+
+    # Stop
+    else {
+      last;
+    };
+  };
+
+  # Get position
+  $self->{pos_in_sort} = $pos;
+
+  if (DEBUG) {
+    print_log('sort', 'Current bundle is now ' . $new_bundle->to_string);
+  };
+
+  return $new_bundle;
+};
+
+
+# Get the current match object
+sub current_match {
+  #  my $self = shift;
+  #  my $current = $self->current or return;
+  #  my $match = Krawfish::Koral::Result::Match->new(
+  #    doc_id  => $current->doc_id,
+  #    start   => $current->start,
+  #    end     => $current->end,
+  #    payload => $current->payload,
+  #  );
+  #
+  #  if (DEBUG) {
+  #    print_log('sort', 'Current match is ' . $match->to_string);
+  #  };
+  #
+  #  return $match;
+  ...
+};
+
+
+# Return the current posting
+sub current {
+  my $self = shift;
+  if (DEBUG) {
+    print_log('sort', 'Current posting is ' . $self->{current}->to_string);
+  };
+
+  $self->{current};
+};
+
+
+sub to_string {
+  my $self = shift;
+  my $str = 'sort(';
+  $str .= $self->{sort}->to_string;
+  $str .= ',0-' . $self->{top_k} if $self->{top_k};
+  $str .= ':' . $self->{query}->to_string;
+  return $str . ')';
+};
+
+
+sub _string_array {
+  my $array = shift;
+  my $str = '';
+  foreach (@$array) {
+    $str .= '[';
+    $str .= 'R:' . $_->[RANK] . ';';
+    $str .= ($_->[SAME] ? 'S:' . $_->[SAME] . ';' : '');
+    $str .= ($_->[MATCHES] ? 'M:' . $_->[MATCHES] : '');
+    $str .= ']';
+  };
+  return $str;
+};
+
+
+1;
+
+__END__

diff --git a/lib/Krawfish/Compile/Segment/Sort/Alphabet.pm b/lib/Krawfish/Compile/Segment/Sort/Alphabet.pm
new file mode 100644
index 0000000..25d805f
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Sort/Alphabet.pm

@@ -0,0 +1,58 @@
+package Krawfish::Compile::Segment::Sort::Alphabet;
+use Krawfish::Log;
+use strict;
+use warnings;
+
+warn 'NOT USED YET';
+
+use constant DEBUG => 0;
+
+# Sort result alphabetically.
+# Requires a class or the match - and based on the start + end, the term_ids are requested
+# and added per match as an array.
+# Then the matches are sorted one term_id position after the other.
+#
+# In case the ordering is reverse alphabetically, the term_id array is reversed as well.
+#
+# In case the term_id array has no equal length, the shorter array is preferred.
+#
+# EXAMPLE:
+#   match1: [term_1, term_2, term_3]
+#   match2: [term_1, term_2, term_3]
+#
+# This is necessary for all alphabetical sortings!
+
+
+# ---- old:
+# Sort by characters of a certain segment (either the first or the last).
+# This will require to open the offset file to get the first two characters
+# for bucket sorting per token and then request the
+# forward index (the offset is already liftet and may be stored in the buckets
+# as well) for fine grained sorting!
+
+# TODO:
+#   This will need to pass sorting criteria as strings for cluster sorting.
+#   At least for the top k matches.
+
+sub new {
+  my $class = shift;
+  my $self = bless {
+    query => shift,
+    index => shift,
+    # top_k => shift
+  }, $class;
+
+  my $dict = $self->dict;
+  my $subt = $self->subtokens;
+
+  while ($query->next) {
+    my $element = $query->current->clone;
+
+    # Get the subtoken info
+    my $x = $subt->get($element->doc_id, $element->start);
+
+    # Add element with id for sorting
+    push @record, [$element, $x->[2]]
+  };
+
+};

diff --git a/lib/Krawfish/Compile/Segment/Sort/Criterion/Rank.pm b/lib/Krawfish/Compile/Segment/Sort/Criterion/Rank.pm
new file mode 100644
index 0000000..ac6710d
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Sort/Criterion/Rank.pm

@@ -0,0 +1,44 @@
+package Krawfish::Compile::Sort::Criterion::Field;
+use strict;
+use warnings;
+
+warn 'NOT USED YET';
+
+# TODO:
+#   The same criterion for K::Result::Node::Field
+#   will introduce field fetching etc.
+
+# Constructor
+sub new {
+  my $self = shift;
+  my ($index, $field, $desc) = @_;
+
+  bless {
+    field => $field,
+    desc => $desc,
+    ranking => $index->fields->ranked_by($field),
+    max => $self->{ranking}->max if $desc
+  }, $class;
+};
+
+
+# Get the rank of the match
+sub rank {
+  my ($self, $match) = @_;
+
+  # Get rank from match
+  my $rank = $self->{ranking}->get($match->doc_id);
+  return $self->{max} ? ($self->{max} - $rank) : $rank;
+};
+
+
+# Serialize to string
+sub to_string {
+  my $self = shift;
+  my $str = 'field=';
+  $str .= $self->{field};
+  $str .= $self->{desc} ? '>' : '<';
+  return $str;
+};
+
+1;

diff --git a/lib/Krawfish/Compile/Segment/Sort/Field.pm b/lib/Krawfish/Compile/Segment/Sort/Field.pm
new file mode 100644
index 0000000..ac441e4
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Sort/Field.pm

@@ -0,0 +1,90 @@
+package Krawfish::Compile::Segment::Sort::Field;
+use Krawfish::Log;
+use strict;
+use warnings;
+
+use constant DEBUG => 0;
+
+# TODO:
+#   Use this an instantiate it directly with
+#   a ranking!
+
+# Sorting criterion for field ranks.
+
+# TODO:
+#   Probably not only support ranks but all kinds of sorting
+#   by having a get_lt() API that also works for strings!
+
+# TODO:
+#   Return max rank for unknown fields!
+
+sub new {
+  my $class = shift;
+
+  my ($segment, $field_id, $descending) = @_;
+
+  # Get ranking
+  my $rank = $segment->field_ranks->by($field_id);
+
+  return unless $rank;
+
+  my $self = bless {
+    field_id => $field_id,
+    desc     => $descending,
+    max_rank => $rank->max_rank
+  }, $class;
+
+  # Get fields in descending order
+  if ($descending) {
+
+    # This may be a real descending order file
+    # or a reversed single-valued ascending order file
+    $self->{rank} = $rank->descending or return;
+  }
+
+  # Get fields in ascending order
+  else {
+    $self->{rank} = $rank->ascending or return;
+  };
+
+  return $self;
+};
+
+sub type {
+  'field';
+};
+
+
+# Get the rank for this criterion
+sub rank_for {
+  my ($self, $doc_id) = @_;
+
+  return $self->{rank}->rank_for($doc_id);
+
+  # Get rank if rank is littler than value
+  # my $value = shift;
+  # return $self->{rank}->rank_doc_lt($doc_id, $value);
+  #  my $max = $ranking->max if $desc;
+};
+
+
+sub criterion {
+  $_[0]->{field_id};
+};
+
+sub max_rank {
+  $_[0]->{max_rank}
+}
+
+sub term_id {
+  $_[0]->{field_id};
+};
+
+# TODO:
+#   This may need to be an inflatable
+sub to_string {
+  my $self = shift;
+  return 'field=#' . $self->{field_id} . ($_->{desc} ? '>' : '<')
+};
+
+1;

diff --git a/lib/Krawfish/Compile/Segment/Sort/Filter.pm b/lib/Krawfish/Compile/Segment/Sort/Filter.pm
new file mode 100644
index 0000000..9f723d9
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Sort/Filter.pm

@@ -0,0 +1,122 @@
+package Krawfish::Compile::Segment::Sort::Filter;
+use parent 'Krawfish::Corpus';
+use Krawfish::Log;
+use strict;
+use warnings;
+
+warn 'NOT USED YET';
+
+use constant DEBUG => 0;
+
+# This is a corpus query implementation
+#
+# TODO: Better move to Krawfish::Corpus
+
+sub new {
+  my $class = shift;
+  my %param = @_;
+
+  my $ranking = $param{index}->fields->ranked_by($param{field});
+  my $max = $ranking->max if $param{desc};
+
+  bless {
+    query        => $param{query},
+    max_rank_ref => $param{max_rank_ref},
+    field        => $param{field},
+    desc         => $param{desc},
+    ranking      => $ranking,
+    max          => $max,
+    init         => 0
+  }, $class;
+};
+
+
+# Forward to next document
+sub next {
+  my $self = shift;
+
+  my $query = $self->{query};
+
+  # Get next document
+  while ($query->next) {
+
+    # Check object
+    return 1 if $self->_check;
+  };
+
+  # No next
+  return;
+};
+
+
+# Check the document id for the rank
+sub _check {
+  my $self = shift;
+
+  # Maximum rank reference
+  my $max_rank_ref = $self->{max_rank_ref};
+
+  # Get the current doc_id
+  my $query = $self->{query};
+  my $current = $query->current;
+  my $doc_id = $current->doc_id;
+
+  # Get rank for field
+  my $rank = $self->{ranking}->get($doc_id);
+
+  # Invert rank if descending field is required
+  $rank = $self->{max} - $rank if $self->{max};
+
+  if (DEBUG) {
+    print_log('vc_sort_filter', 'Current posting is ' . $current->to_string);
+  };
+
+  # Rank is smaller then required
+  if ($rank <= $$max_rank_ref) {
+
+    # Document is fine
+    $self->{current} = $current;
+    return 1;
+  };
+
+  if (DEBUG) {
+    print_log('vc_sort_filter', $current->to_string . ' is filtered out');
+  };
+
+  $self->{current} = undef;
+  return;
+};
+
+
+# Get current document
+sub current {
+  $_[0]->{current};
+};
+
+
+# Skip to the relevant document
+sub skip_doc {
+  my ($self, $doc_id) = @_;
+
+  my $query = $self->{query};
+
+  # Skip the document
+  if ($query->skip_doc($doc_id)) {
+
+    # Return the document id, if it matches
+    return $doc_id if $query->_check;
+
+    # Get the next matching element
+    if ($self->next) {
+
+      # return the document id
+      return $self->{current}->doc_id;
+    };
+  };
+
+  # Fail
+  return;
+};
+
+
+1;

diff --git a/lib/Krawfish/Compile/Segment/Sort/No.pm b/lib/Krawfish/Compile/Segment/Sort/No.pm
new file mode 100644
index 0000000..61f3b90
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Sort/No.pm

@@ -0,0 +1,35 @@
+package Krawfish::Compile::Segment::Sort::No;
+use strict;
+use warnings;
+
+# This is a dummy sorting criterion that is
+# used for fields that are either not sortable
+# or for fields not in the dictionary.
+# As the first may be an indication for an error
+# in the index design,
+
+sub new {
+  my $class = shift;
+  return bless {
+    field   => shift,
+    desc    => shift
+  }, $class;
+};
+
+
+# Get the rank for this criterion
+sub rank_for {
+  return 0;
+};
+
+
+sub criterion {
+  $_[0]->{field};
+};
+
+sub to_string {
+  my $self = shift;
+  return 'field=' . $self->{field} . ($_->{desc} ? '>' : '<')
+};
+
+1;

diff --git a/lib/Krawfish/Compile/Segment/Sort/Priority.pm b/lib/Krawfish/Compile/Segment/Sort/Priority.pm
new file mode 100644
index 0000000..926676d
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Sort/Priority.pm

@@ -0,0 +1,149 @@
+package Krawfish::Compile::Segment::Sort::Priority;
+use Krawfish::Util::PriorityQueue;
+use Krawfish::Log;
+use Data::Dumper;
+use strict;
+use warnings;
+
+# WARNING!
+# THIS IS DEPRECATED IN FAVOR OF Segment::Sort and Segment::SortAfter
+
+
+use constant DEBUG => 0;
+
+sub new {
+  my $class = shift;
+  my %param = @_;
+
+  my $query  = $param{query};
+  my $fields = $param{fields};
+  my $field  = $param{field};
+  my $desc   = $param{desc} ? 1 : 0;
+  my $top_k  = $param{top_k};
+
+  my $max_rank_ref = $param{max_rank_ref};
+
+  # Create priority queue
+  my $queue = Krawfish::Util::PrioritySort->new($top_k, $max_rank_ref);
+
+  return bless {
+    field_rank => $fields->ranked_by($field),
+    field => $field,
+    desc => $desc,
+    query => $query,
+    queue => $queue,
+    list => undef,
+    pos => -1
+  }, $class;
+};
+
+
+# Init queue
+sub _init {
+  my $self = shift;
+
+  return if $self->{init}++;
+
+  my $field_rank = $self->{field_rank};
+
+  my $max;
+  # Get maximum rank if descending order
+  if ($self->{desc}) {
+    $max = $field_rank->max;
+  };
+
+  my $query = $self->{query};
+  my $queue = $self->{queue};
+  my $last_doc_id = -1;
+  my $rank;
+
+  # Pass through all queries
+  while ($query->next) {
+
+    if (DEBUG) {
+      print_log('p_sort', 'Get next posting from ' . $query->to_string);
+    };
+
+    # Clone record
+    my $record = $query->current->clone;
+
+    # Fetch rank if doc_id changes
+    if ($record->doc_id != $last_doc_id) {
+
+      # Get stored rank
+      $rank = $field_rank->get($record->doc_id);
+
+      # Revert if maximum rank is set
+      $rank = $max - $rank if $max;
+    };
+
+    if (DEBUG) {
+      print_log('p_sort', 'Rank for doc id ' . $record->doc_id . " is $rank");
+    };
+
+    # Insert into priority queue
+    $queue->insert([$rank, 0, $record]);
+  };
+
+  # Get the rank reference
+  $self->{list} = $queue->reverse_array;
+  $self->{length} = $queue->length;
+};
+
+
+# Get next element from list
+sub next {
+  my $self = shift;
+  $self->_init;
+  if ($self->{pos}++ < $self->{length}) {
+    return 1;
+  };
+  return;
+};
+
+
+# Get current element
+sub current {
+  my $self = shift;
+
+  # 2 is the index of the value
+  if (DEBUG) {
+    print_log('p_sort', 'Get match from index ' . $self->{pos});
+  };
+
+  return $self->{list}->[$self->{pos}]->[2];
+};
+
+
+# Return the number of duplicates of the current match
+sub duplicate_rank {
+  my $self = shift;
+
+  if (DEBUG) {
+    print_log('p_sort', 'Check for duplicates from index ' . $self->{pos});
+  };
+
+  return $self->{list}->[$self->{pos}]->[1] || 1;
+};
+
+
+# This returns an additional data structure with key/value pairs
+# in sorted order to document the sort criteria.
+# Like: [[class_1 => 'cba'], [author => 'Goethe']]...
+# This is necessary for the cluster-merge-sort
+sub current_sort {
+  ...
+};
+
+
+sub to_string {
+  my $self = shift;
+  my $str = 'prioritySort(';
+  $str .= $self->{desc} ? '^' : 'v';
+  $str .= ',' . $self->{field} . ':';
+  $str .= $self->{query}->to_string;
+  return $str . ')';
+};
+
+
+1;

diff --git a/lib/Krawfish/Compile/Segment/Sort/PriorityCascade.pm b/lib/Krawfish/Compile/Segment/Sort/PriorityCascade.pm
new file mode 100644
index 0000000..58806e4
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Sort/PriorityCascade.pm

@@ -0,0 +1,470 @@
+package Krawfish::Compile::Segment::Sort::PriorityCascade;
+use parent 'Krawfish::Compile';
+use Krawfish::Util::String qw/squote/;
+use Krawfish::Util::PriorityQueue::PerDoc;
+use Krawfish::Koral::Result;
+use Krawfish::Posting::Bundle;
+use Krawfish::Log;
+use Data::Dumper;
+use strict;
+use warnings;
+
+# WARNING!
+# THIS IS DEPRECATED IN FAVOR OF Segment::Sort and Segment::SortAfter
+
+# This is only based on criteria that return ranks
+
+use constant {
+  DEBUG   => 0,
+  RANK    => 0,
+  SAME    => 1,
+  VALUE   => 2,
+  MATCHES => 3
+};
+
+# TODO:
+#   my $offset = $param{offset};
+#   This may however not work in a multi-segment
+#   or cluster scenario - so let's forget about it
+
+# TODO:
+#   It's possible that fields return a rank of 0, indicating that
+#   the field is not yet ranked.
+#   In that case these fields have to be looked up, in case they are
+#   potentially in the result set (meaning they are ranked before/after
+#   the last accepted rank field). If so, they need to be remembered.
+#   After a sort turn, the non-ranked fields are sorted in the ranked
+#   fields. The field can be reranked any time.
+
+# TODO:
+#   Ranks should respect the ranking mechanism of FieldsRank and
+#   TermRank, where only even values are fine and odd values need
+#   to be sorted in a separate step.
+
+sub new {
+  my $class = shift;
+  my %param = @_;
+
+  # TODO:
+  #   Check for mandatory parameters
+  my $query = $param{query};
+
+  # This is the index element
+  my $index = $param{index};
+  my $top_k = $param{top_k};
+
+  # This is the fields element
+  # It has the structure [[field], [field, 1]]
+  # where the second value is the descending marker
+  my $fields = $param{fields};
+  # TODO: Change to criterion!
+
+  # For final field distinction, use unique field
+  push @$fields, [$param{unique}];
+
+  # The maximum ranking value may be used
+  # by outside filters to know in advance,
+  # if a document can't be part of the result set
+  my $max_rank_ref;
+  if (defined $param{max_rank_ref}) {
+
+    # Get reference from definition
+    $max_rank_ref = $param{max_rank_ref};
+  }
+  else {
+
+    # Create a new reference
+    $max_rank_ref = \(my $max_rank = $index->max_rank);
+  };
+
+  # Create initial priority queue
+  my $queue = Krawfish::Util::PriorityQueue::PerDoc->new(
+    $top_k,
+    $max_rank_ref
+  );
+
+  # Construct
+  return bless {
+    fields       => $fields,
+    index        => $index,
+    top_k        => $top_k,
+    query        => $query,
+    queue        => $queue,
+    max_rank_ref => $max_rank_ref,
+    stack        => [],  # All lists on a stack
+    sorted       => [],
+    pos          => 0
+  }, $class;
+};
+
+
+# Initialize the sorting - this will do a full run!
+sub _init {
+  my $self = shift;
+
+  # Result already initiated
+  return if $self->{init}++;
+
+  my $query = $self->{query};
+
+  # Get first sorting criterion
+  my ($field, $desc) = @{$self->{fields}->[0]};
+
+  # Get ranking
+  my $ranking = $self->{index}->fields->ranked_by($field);
+
+  # Get maximum rank if descending order
+  my $max = $ranking->max if $desc;
+
+  # Get maximum accepted rank from queue
+  my $max_rank_ref = $self->{max_rank_ref};
+
+  my $last_doc_id = -1;
+  my $rank;
+  my $queue = $self->{queue};
+
+  # Store the last match buffered
+  my $match;
+
+  if (DEBUG) {
+    print_log('p_sort', qq!Next Rank on field "$field"!);
+  };
+
+  # Pass through all queries
+  while ($match || ($query->next && ($match = $query->current))) {
+
+    if (DEBUG) {
+      print_log('p_sort', 'Get next posting from ' . $query->to_string);
+    };
+
+    # Get stored rank
+    $rank = $ranking->get($match->doc_id);
+
+    # Revert if maximum rank is set
+    $rank = $max - $rank if $max;
+
+    if (DEBUG) {
+      print_log('p_sort', 'Rank for doc id ' . $match->doc_id . " is $rank");
+    };
+
+    # Precheck if the match is relevant
+    if ($rank <= $$max_rank_ref) {
+
+      # Create new bundle of matches
+      my $bundle = Krawfish::Posting::Bundle->new($match->clone);
+
+      # Remember doc_id
+      $last_doc_id = $match->doc_id;
+      $match = undef;
+
+      # Iterate over next queries
+      while ($query->next) {
+
+        # New match should join the bundle
+        if ($query->current->doc_id == $last_doc_id) {
+
+          # Add match to bundle
+          $bundle->add($query->current);
+        }
+
+        # New match is new
+        else {
+
+          # Remember match for the next tome
+          $match = $query->current;
+          last;
+        };
+      };
+
+      # Insert into priority queue
+      $queue->insert([$rank, 0, $bundle, $bundle->length]) if $bundle;
+    }
+
+    # Document is irrelevant
+    else {
+      $match = undef;
+    };
+  };
+
+  print_log('p_sort', 'Get list ranking') if DEBUG;
+
+  # Get the rank reference
+  $self->{stack} = [$queue->reverse_array];
+};
+
+
+# Move to the next item in the sorted list
+sub next {
+  my $self = shift;
+
+  if ($self->{pos}++ >= $self->{top_k}) {
+
+    if (DEBUG) {
+      print_log(
+        'p_sort',
+        'top_k ' . $self->{top_k} . ' is reached at position ' . $self->{pos}
+      );
+    };
+
+    $self->{current} = undef;
+    return;
+  };
+
+  # Initialize query - this will do a full run on the first field level!
+  $self->_init;
+
+  # There are sorted results in the result list
+  if (scalar @{$self->{sorted}}) {
+
+    # Make this current
+    $self->{current} = shift @{$self->{sorted}};
+
+    if (DEBUG) {
+      print_log(
+        'p_sort',
+        'There is already a match in [sorted]: ' . $self->{current}->to_string,
+      );
+    };
+
+    return 1;
+  }
+
+  # Nothing presorted
+  elsif (DEBUG) {
+    print_log('p_sort', 'There is no match in [sorted]');
+  };
+
+  # Get the list values
+  my $stack = $self->{stack};
+
+  # The result list is empty - sort next items
+  #  if ($self->{presorted}) {
+  #  };
+
+  # This will get the level from the stack
+  my $level = $#{$stack};
+
+  print_log('p_sort', "Check stack on current level $level") if DEBUG;
+
+  # If the current list is empty, remove from stack
+  while (scalar @$stack && (
+    !scalar(@{$stack->[$level]}) ||
+      !scalar(@{$stack->[$level]->[0]})
+    )) {
+
+    print_log('p_sort', "Stack is empty at least on level $level") if DEBUG;
+
+    pop @$stack;
+    $level--;
+
+    if (DEBUG) {
+      print_log('p_sort', "Stack is reduced to level $level with " . Dumper($stack));
+    };
+  };
+
+  # There is nothing to sort further
+  unless (scalar @$stack) {
+
+    print_log('p_sort', 'There is nothing to sort further') if DEBUG;
+
+    $self->{current} = undef;
+    return;
+  };
+
+  # while (my $same = $list->[0]->[SAME]) {
+  #   $list = $self->heap_sort();
+  # };
+
+  # TODO:
+  #   Depending on how many identical ranks exist,
+  #   here the next strategy should be chosen.
+  #   Either sort in place, or sort using heapsort again.
+
+
+  # The first item in the current list has multiple identical ranks
+  # As long as the first item in the list has duplicates,
+  # order by the next level
+  while ((my $same = ($stack->[$level]->[0]->[SAME] // 1)) > 1) {
+
+    if (DEBUG) {
+      print_log(
+        'p_sort',
+        "Found $same matches at first node",
+        "  on level $level in " . _string_array($stack->[$level])
+      );
+    };
+
+    # Get the identical elements from the list
+    my @presort = splice(@{$stack->[$level]}, 0, $same - 1);
+
+    print_log('p_sort', 'Presort array is ' . _string_array(\@presort)) if DEBUG;
+    # TODO: Push presort on the stack!
+
+    # This is the new top_k!
+    # TODO: Check if this is really correct!
+    my $top_k = $self->{top_k} - ($self->{pos} - 1);
+
+    # Get next field to rank on level
+    # level 0 is preinitialized, so it is one off
+    my ($field, $desc) = @{$self->{fields}->[$level + 1]};
+
+    if (DEBUG) {
+      print_log('p_sort', qq!Next Rank on field "$field"!);
+    };
+
+    $level++;
+
+    # TODO:
+    #   If the same count is smaller than X (at least top_k - pos)
+    #   do quicksort or something similar
+    # if ($same < $top_k || $same < 128) {
+    # }
+    # else
+    $stack->[$level] = $self->heap_sort($top_k, \@presort, $field, $desc);
+    # };
+
+    if (DEBUG) {
+      print_log(
+        'p_sort',
+        "Sorted array",
+        "  on new level $level is " . _string_array($stack->[$level])
+      );
+    };
+  };
+
+  # There are matches on the list without identical ranks
+
+  if (DEBUG) {
+    print_log('p_sort', "Stack with level $level is " . Dumper($stack));
+  };
+
+  # Get the top list entry
+  my $top = shift @{$stack->[$level]};
+
+  print_log('p_sort', 'Push value ' . $top->[VALUE]) if DEBUG;
+
+  # Push matches to result list
+  push @{$self->{sorted}}, $top->[VALUE]->unbundle;
+
+  # Make the first match the current
+  # TODO: Be aware! This is a BUNDLE!
+  $self->{current} = shift @{$self->{sorted}};
+  return 1;
+};
+
+
+sub _string_array {
+  my $array = shift;
+  my $str = '';
+  foreach (@$array) {
+    $str .= '[';
+    $str .= 'R:' . $_->[RANK] . ';';
+    $str .= ($_->[SAME] ? 'S:' . $_->[SAME] . ';' : '');
+    $str .= ($_->[MATCHES] ? 'M:' . $_->[MATCHES] : '');
+    $str .= ']';
+  };
+  return $str;
+};
+
+
+# Todo:
+#   Accept an iterator, a ranking, and return an iterator
+sub heap_sort {
+  my ($self, $top_k, $sub_list, $field, $desc) = @_;
+
+  if (DEBUG) {
+    print_log('p_sort', 'Heapsort list of length ' . scalar(@$sub_list) .
+                qq! by field "$field" for top_k = $top_k!);
+  };
+
+  my $index = $self->{index};
+  my $ranking = $index->fields->ranked_by($field);
+
+  # Get maximum rank if descending order
+  my $max = $ranking->max if $desc;
+
+  # Get maximum rank
+  my $max_rank = $index->max_rank;
+  my $max_rank_ref = \$max_rank;
+
+  # Create new priority queue
+  my $queue = Krawfish::Util::PriorityQueue::PerDoc->new(
+    $top_k,
+    $max_rank_ref
+  );
+
+  my $rank;
+
+  # Iterate over list
+  foreach (@$sub_list) {
+    my $bundle = $_->[VALUE];
+
+    # Get stored rank
+    $rank = $ranking->get($bundle->doc_id);
+
+    # Revert if maximum rank is set
+    $rank = $max - $rank if $max;
+
+    # Insert into queue
+    $queue->insert([$rank, 0, $bundle, $bundle->length]);
+  };
+
+  # Return reverse list
+  return $queue->reverse_array;
+};
+
+
+# Return the current match
+sub current {
+
+  if (DEBUG) {
+    print_log('p_sort', 'Current posting is ' . $_[0]->{current}->to_string);
+  };
+
+  $_[0]->{current};
+};
+
+sub current_match {
+  my $self = shift;
+  my $current = $self->current or return;
+  my $match = Krawfish::Koral::Result::Match->new(
+    doc_id  => $current->doc_id,
+    start   => $current->start,
+    end     => $current->end,
+    payload => $current->payload,
+  );
+
+  if (DEBUG) {
+    print_log('p_sort', 'Current match is ' . $match->to_string);
+  };
+
+  return $match;
+};
+
+# Return the number of duplicates of the current match
+sub duplicate_rank {
+  my $self = shift;
+
+  if (DEBUG) {
+    print_log('p_sort', 'Check for duplicates from index ' . $self->{pos});
+  };
+
+  return $self->{list}->[$self->{pos}]->[1] || 1;
+};
+
+
+sub to_string {
+  my $self = shift;
+  my $str = 'resultSorted([';
+  $str .= join(',', map { squote($_->[0]) . ($_->[1] ? '>' : '<') } @{$self->{fields}});
+  $str .= ']';
+  $str .= ',0-' . $self->{top_k} if $self->{top_k};
+  $str .= ':' . $self->{query}->to_string;
+  return $str . ')';
+};
+
+
+1;
+
+__END__
+

diff --git a/lib/Krawfish/Compile/Segment/Sort/Random.pm b/lib/Krawfish/Compile/Segment/Sort/Random.pm
new file mode 100644
index 0000000..038ca8d
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Sort/Random.pm

@@ -0,0 +1,5 @@
+# Return all elements in random order
+# This may, however, not be necessary having Sample
+
+# https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle
+# https://lemire.me/blog/2017/09/26/benchmarking-algorithms-to-visit-all-values-in-an-array-in-random-order/

diff --git a/lib/Krawfish/Compile/Segment/Sort/Sample.pm b/lib/Krawfish/Compile/Segment/Sort/Sample.pm
new file mode 100644
index 0000000..674527e
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Sort/Sample.pm

@@ -0,0 +1,160 @@
+package Krawfish::Compile::Segment::Sort::Sample;
+use Krawfish::Koral::Result::Match;
+use Krawfish::Log;
+use strict;
+use warnings;
+
+# Sort all matches in random order and only return the top_k
+# results. This is implemented using reservoir sampling.
+# Difference to random sorting is, this won't randomly sort all
+# results, making paging possible.
+
+# When the number of matches is known in advance, another
+# approach may be valid.
+
+# WARNING:
+#   Sorting does not respect current_match of any nested query,
+#   that's why sorting is always separated from enriching!
+
+# See
+#   https://en.wikipedia.org/wiki/Reservoir_sampling
+#   https://webkist.wordpress.com/2008/10/01/reservoir-sampling-in-perl/
+#   https://blogs.msdn.microsoft.com/spt/2008/02/05/reservoir-sampling/
+#   A. Anagnostopoulos, A. Z. Broder, and D. Carmel. Sampling search-engine results. In Proc. of the Fourteenth International World Wide Web Conference, Chiba, Japan, 2005. ACM Press.
+
+
+use constant DEBUG => 1;
+
+# Create a sample sort of k elements in the list
+sub new {
+  my $class = shift;
+  bless {
+    query     => shift,
+    n         => shift, # Size of the sample
+    k         => 0,     # Items already seen
+    reservoir => [],
+    current => undef
+  }, $class;
+};
+
+
+sub max_freq {
+  my $self = shift;
+  my $n = $self->{query}->max_freq;
+  $n = $n < $self->{n} ? $n : $self->{n};
+  return $n;
+};
+
+# Initialize reservoir
+sub _init {
+  my $self = shift;
+
+  return if $self->{k};
+
+  if (DEBUG) {
+    print_log('r_s_sample', 'Initialize sampling, meaning iterate over all items');
+  };
+
+  while ($self->{query}->next) {
+
+    # Seen next item
+    $self->{k}++;
+
+    if (DEBUG) {
+      print_log('r_s_sample', 'Found item ' . $self->{k});
+    };
+
+    # The reservoir is not filled up yet
+    if ($self->{k} <= $self->{n}) {
+
+      if (DEBUG) {
+        print_log('r_s_sample', 'Add item ' . $self->{k} . ' to reservoir');
+      };
+
+      # Add current match to reservoir
+      my $current = $self->{query}->current;
+      push @{$self->{reservoir}}, $current;
+    }
+
+    # Check if the item should replace another item in the reservoir
+    elsif (rand(1) <= ($self->{n}/$self->{k})) {
+
+      my $item = int(rand($self->{n}));
+
+      if (DEBUG) {
+        print_log('r_s_sample', $self->{n} . ' == ' . scalar @{$self->{reservoir}});
+        print_log('r_s_sample', "Overwrite item $item with item " . $self->{k});
+      };
+
+      # Replace random match in reservoir
+      my $current = $self->{query}->current;
+
+      # TODO:
+      #   Check if $self->{n} is here equivalent to scalar @{$self->{reservoir}}
+      $self->{reservoir}->[$item] = $current;
+    }
+
+    elsif (DEBUG) {
+      print_log('r_s_sample', 'Ignore item ' . $self->{k});
+    };
+  };
+
+  return;
+};
+
+
+# Move to next item
+sub next {
+  my $self = shift;
+
+  # Fill reservoir
+  $self->_init;
+
+  # Get match from reservoir
+  my $current = shift @{$self->{reservoir}};
+
+  # There is no more match in reservoir
+  unless ($current) {
+    $self->{current} = undef;
+    return;
+  };
+
+  # Set current match
+  $self->{current} = $current;
+  return 1;
+};
+
+
+sub current {
+  $_[0]->{current};
+};
+
+
+sub match_from_query {
+  ...
+};
+
+
+sub current_match {
+  my $self = shift;
+  my $current = $self->current or return;
+  my $match = Krawfish::Koral::Result::Match->new(
+    doc_id  => $current->doc_id,
+    start   => $current->start,
+    end     => $current->end,
+    payload => $current->payload,
+  );
+
+  if (DEBUG) {
+    print_log('sort_sample', 'Current match is ' . $match->to_string);
+  };
+
+  return $match;
+};
+
+sub to_string {
+  'sample(' . $_[0]->{n} . ':' . $_[0]->{query}->to_string . ')';
+};
+
+
+1;

diff --git a/lib/Krawfish/Compile/Segment/Sort/Simple.pm b/lib/Krawfish/Compile/Segment/Sort/Simple.pm
new file mode 100644
index 0000000..5f438d2
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Sort/Simple.pm

@@ -0,0 +1,85 @@
+package Krawfish::Compile::Segment::Sort::Simple;
+use Krawfish::Log;
+use strict;
+use warnings;
+
+warn 'NOT USED YET';
+
+# This should be used fur subsequent
+# sorting following the first pass
+
+# May use insertion sort for small numbers
+# of duplicates.
+
+# This may very well be a PrioritySort,
+# so initially there is a very simple
+# querier that only add rank and same elements
+# and subsequential they are ranked
+
+sub new {
+  my $class = shift;
+  my %param = @_;
+
+  my $query = $param{query};
+  my $fields  = $param{fields};
+  my $field  = $param{field};
+  my $desc = $param{desc} ? 1 : 0;
+
+  my $top_k = $param{top_k};
+
+  return bless {
+    field_rank => $fields->ranked_by($field),
+    field => $field,
+    desc => $desc,
+    query => $query,
+    queue => $queue,
+    list => undef,
+    pos => -1
+  }, $class;
+};
+
+sub next {
+
+  # TODO:
+  #   In case the sorting before
+  #   results in a very bad configuration
+  #   (lots of duplicates in the final pos),
+  #   choose a different strategy!
+
+  my $field_rank = $self->{field_rank};
+
+  my $max;
+  # Get maximum rank if descending order
+  if ($self->{desc}) {
+    $max = $field_rank->max;
+  };
+
+  my $query = $self->{query};
+
+  while ($query->next) {
+    if (DEBUG) {
+      print_log('s_sort', 'Get next posting from ' . $query->to_string);
+    };
+
+    # The rank is totally fine
+    if ($query->duplicate_rank == 1) {
+      $self->{pos} = 0;
+      $self->{list} = [$query->current];
+      return 1;
+    }
+
+    # The rank has many duplicates
+    else {
+
+      # Sort elements!
+      my $elements = $query->duplicate_rank;
+      for (1..$elements) {
+        $query->next;
+
+#        # Clone record
+#        my $record = $query->current->clone;
+
+      };
+    };
+  };
+};

diff --git a/lib/Krawfish/Compile/Segment/Sort/SubTerm.pm b/lib/Krawfish/Compile/Segment/Sort/SubTerm.pm
new file mode 100644
index 0000000..dc79207
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Sort/SubTerm.pm

@@ -0,0 +1,65 @@
+package Krawfish::Compile::Segment::Sort::SubTerm;
+use strict;
+use warnings;
+
+# This will sort based on a pre-ranked subterm
+# or rather a subterm list for a class
+#
+# A given node-wide vector_ref can be used to limit
+# the list of terms to check.
+#
+# As classes are in order, a sortafter on subterms
+# for further classes are only relevant in case
+# there are matches with identical ranks on this class.
+
+sub new {
+  my $class = shift;
+
+  # TODO:
+  #   Possibly remember the collation
+  my $self = bless {
+    index      => shift,
+    suffix     => shift // 0,
+    descending => shift // 0,
+    class      => shift // 0,
+    max_rank_vector_ref => shift // []
+  }, $class;
+
+  # Get ranking
+  $self->{dict} = $self->{index}->dictionary or return;
+
+  # Get maximum rank if descending order
+  $self->{max} = $self->{ranks}->max if $self->{descending};
+
+  return $self;
+};
+
+
+# Check for the rank of the match if it is smaller
+# than the given rank.
+sub rank_lt {
+  my ($self, $match) = shift;
+
+  # TODO:
+  #   For the requested class(es),
+  #   retrieve the subterm_ids.
+  #   This is similar to Enrich::Snippet retrieval,
+  #   as classes may have overlaps.
+  #   go through all terms in either left-to-right (prefix)
+  #   or right-to-left (suffix) order and rank as long as
+  #   the terms are littler than the rank vector
+
+  my $rank;
+  if ($self->{suffix}) {
+    $rank = $self->{dict}->suffix_rank_by_subterm_id($subterm_id);
+  }
+  else {
+    $rank = $self->{dict}->prefix_rank_by_subterm_id($subterm_id);
+  };
+
+  # Revert if maximum rank is set
+  return $max ? $max - $rank : $rank;
+};
+
+
+1;

diff --git a/lib/Krawfish/Compile/Segment/Sort/Substring.pm b/lib/Krawfish/Compile/Segment/Sort/Substring.pm
new file mode 100644
index 0000000..c20e454
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/Sort/Substring.pm

@@ -0,0 +1,36 @@
+package Krawfish::Compile::Segment::Sort::Substring;
+use Krawfish::Log;
+use strict;
+use warnings;
+
+warn 'NOT USED YET';
+
+# To support C2-Wort-Type sorting based on word endings,
+# It's necessary to support sorting based on substrings.
+#
+# EXAMPLE:
+#   Sort based on the last two characters of a word in the
+#   correct order: substring(-2,2)
+#
+#   match1: D[er] al[te] Ma[nn]
+#   match2: D[er] gu[te] Schäf[er]
+#
+# This requires that all terms of a class are fetched from
+# the dictionary (or at least X characters).
+# Equal sequences will receive the same rank and can then be
+# sorted alphabetically in the next run.
+# Equal may mean that this is case insensitive.
+
+sub new {
+  my $class = shift;
+  bless {
+    offset  => shift, # Supports negative offset
+    length  => shift,
+    reverse => shift,  # The substring is read from right to left
+    caseinsensitive => shift,
+    resolve_diacritiques => shift
+  }, $class;
+};
+
+
+1;

diff --git a/lib/Krawfish/Compile/Segment/SortAfter.pm b/lib/Krawfish/Compile/Segment/SortAfter.pm
new file mode 100644
index 0000000..d1f3642
--- /dev/null
+++ b/lib/Krawfish/Compile/Segment/SortAfter.pm

@@ -0,0 +1,196 @@
+package Krawfish::Compile::Segment::SortAfter;
+use parent 'Krawfish::Compile::Segment::Sort';
+use Data::Dumper;
+use Krawfish::Log;
+use strict;
+use warnings;
+
+# This sorting query is similar to
+# Krawfish::Compile::Segment::Sort,
+# But it already expects sorted, bundled postings,
+# does not support $max_rank_ref
+# (because all matches are already retrieved),
+# and immediately stops, when top_k is reached.
+#
+# That also means, this does all the work in next_bundle()
+# instead of init().
+
+
+use constant {
+  DEBUG   => 0,
+  RANK    => 0,
+  SAME    => 1,
+  VALUE   => 2,
+  MATCHES => 3
+};
+
+
+# Constructor
+sub new {
+  my $class = shift;
+  my %param = @_;
+
+  my $query    = $param{query};
+  my $segment  = $param{segment};
+  my $top_k    = $param{top_k};
+
+  # This is the sort criterion
+  my $sort     = $param{sort};
+
+  $top_k //= $segment->max_rank;
+
+  if (DEBUG) {
+    print_log('sort_after', 'Initiate follow up sort');
+  };
+
+  bless {
+    query       => $query,
+    segment     => $segment,
+    top_k       => $top_k,
+    sort        => $sort,
+    max_rank    => $segment->max_rank,
+    pos_in_sort => 0, # Current position in sorted heap
+    pos         => 0  # Number of (bundled) matches already served
+  }, $class;
+};
+
+
+# Move to next bundle
+sub next_bundle {
+  my $self = shift;
+
+  if (DEBUG) {
+    print_log('sort_after', 'Move to next bundle');
+  };
+
+  $_[0]->{current_bundle} = undef;
+
+  # Already served enough
+  if ($self->{pos} > $self->{top_k}) {
+    return;
+  }
+
+  # There are sorted bundles on the buffer
+  if ($self->{buffer}) {
+
+    # The buffer is not exceeded yet
+    if ($self->{pos_in_sort} < @{$self->{buffer}}) {
+
+      $self->{current_bundle} = $self->get_bundle_from_buffer;
+
+      # Get the number of matches in the bundle
+      $self->{pos} += $self->{current_bundle}->matches;
+
+      # Fine
+      return 1;
+    };
+
+    # Buffer is exceeded - reset
+    $self->{buffer} = undef;
+    $self->{pos_in_sort} = 0;
+  };
+
+  # Get a new bundle from the nested query
+  unless ($self->{query}->next_bundle) {
+    return;
+  };
+
+  if (DEBUG) {
+    print_log('Get next bundle from ' . $self->{query}->to_string);
+  };
+
+  my $next_bundle = $self->{query}->current_bundle;
+
+  # Next bundle is already sorted
+  if ($next_bundle->size == 1) {
+
+    # Do nothing
+    $self->{current_bundle} = $next_bundle;
+    return 1;
+  };
+
+  # Sort next bundle
+
+  # This should probably check for a simpler sorting
+  # algorithm for small data sets
+  my $rank;
+  my $sort = $self->{sort};
+  my $max_rank_ref = \(my $max_rank = $self->{max_rank});
+
+  if (DEBUG) {
+    print_log('sort_after', 'Sort nested bundle');
+  };
+
+  # Create initial priority queue
+  my $queue = Krawfish::Util::PriorityQueue::PerDoc->new(
+    $self->{top_k} - $self->{pos},
+    $max_rank_ref
+  );
+
+  # Unbundle bundle and go through matches
+  for (my $i = 0; $i < $next_bundle->size; $i++) {
+
+    # Get item from list
+    my $posting = $next_bundle->item($i);
+
+    if (DEBUG) {
+      print_log('sort_after', 'Get next posting from ' . $self->{query}->to_string);
+    };
+
+    # Get stored rank
+    $rank = $sort->rank_for($posting->doc_id);
+
+    # Checking for $$max_rank_ref is not useful here,
+    # as the bundles are already bundled and skipping bundles
+    # using next_doc() and preview_doc_id() is not beneficial.
+
+    $queue->insert([$rank, 0, $posting, $posting->matches]);
+  };
+
+  # Get the sorted array (which has still the ranking structure etc.)
+  my $array = $queue->reverse_array;
+
+  print_log('sort_after', 'Get list ranking of ' . Dumper($array)) if DEBUG;
+
+  if (DEBUG) {
+    print_log(
+      'sort_after',
+      'New current bundle is ' . $self->{current_bundle}->to_string
+    );
+  };
+
+  # Store the sorted bundle in the buffer
+  $self->{buffer} = $array;
+
+  # Set current bundle
+  $self->{current_bundle} = $self->get_bundle_from_buffer;
+
+  # Remember the number of entries
+  $self->{pos} += $self->{current_bundle}->matches;
+  return 1;
+};
+
+
+# Clone query
+sub clone {
+  my $self = shift;
+  __PACKAGE__->new(
+    query   => $self->{query}->clone,
+    segment => $self->{segment},
+    top_k   => $self->{top_k},
+    sort    => $self->{sort}
+  );
+};
+
+
+# Stringification
+sub to_string {
+  my $self = shift;
+  my $str = 'sort(';
+  $str .= $self->{sort}->to_string;
+  $str .= ',0-' . $self->{top_k} if $self->{top_k};
+  $str .= ':' . $self->{query}->to_string;
+  return $str . ')';
+};
+
+1;
commit	5cf5fca327157df9d7c3aa74cc651e52b630d4db	[log] [tgz]
author	Akron <nils@diewald-online.de>	Mon Oct 09 19:01:47 2017 +0200
committer	Akron <nils@diewald-online.de>	Mon Oct 09 19:01:47 2017 +0200
tree	5f23c44dba64aeee9fe659f91393663c58d27d4a
parent	a3e70a02ac3490431340042e7915ee697ec188f2 [diff]