Merge groups on node level

Change-Id: Ibbfa35eb0ce51341eabbabe56f68dab8e8fae9aa
diff --git a/lib/Krawfish/Compile/Node.pm b/lib/Krawfish/Compile/Node.pm
index 3d588b9..364f0cf 100644
--- a/lib/Krawfish/Compile/Node.pm
+++ b/lib/Krawfish/Compile/Node.pm
@@ -18,6 +18,9 @@
 #   Add a timeout! Just in case ...!
 
 # TODO:
+#   Merge warnings, errors, messages!
+
+# TODO:
 #   Introduce max_rank_ref!
 
 # This may be less efficient than a dynamic
@@ -32,7 +35,7 @@
 # - Krawfish::MultiNodes::*
 
 
-use constant DEBUG => 0;
+use constant DEBUG => 1;
 
 
 # Constructor
@@ -58,7 +61,7 @@
     my $segment_query = $query->optimize($seg);
 
     if (DEBUG) {
-      print_log('node', 'Add query ' . $segment_query->to_string . ' to merge');
+      print_log('cmp_node', 'Add query ' . $segment_query->to_string . ' to merge');
     };
 
     # There are results expected
@@ -69,7 +72,13 @@
 
   $self->{segment_queries} = \@segment_queries;
 
-  # Add criterion comparation method here
+  # Query does not require sorted result
+  if (Role::Tiny::does_role($query, 'Krawfish::Koral::Compile::Node::Group')) {
+    $self->{top_k} = 0;
+    return $self;
+  };
+
+  # Add criterion comparation method
   $self->{prio} = Array::Queue::Priority->new(
     sort_cb => sub {
       my ($match_a, $match_b) = @_;
@@ -83,6 +92,7 @@
       return $crit_a->compare($crit_b);
     }
   );
+
   return $self;
 };
 
@@ -94,16 +104,17 @@
   return if $self->{init}++;
 
   if (DEBUG) {
-    print_log('node', 'Initialize sorting queue');
+    print_log('cmp_node', 'Initialize node response');
   };
 
+  # Priority queue if sorting is required, per default with size $n
+  my $prio = $self->{prio};
+
   my $i = 0;
   my $n = scalar @{$self->{segment_queries}};
 
-  # Priority queue, per default with size $n
-  my $prio = $self->{prio};
-
-  # Iterate over all segments until the prio is full
+  # Iterate over all segments - either for grouping
+  # or (in case of sorting) until the prio is full
   #
   # TODO:
   #   This needs to be done in parallel, as the initial
@@ -113,11 +124,23 @@
     # Get query from segment
     my $seg_q = $self->{segment_queries}->[$i];
 
+    # Do grouping!
+    unless ($prio) {
+
+      if (DEBUG) {
+        print_log('cmp_node', "Finalize query at channel $i");
+      };
+
+      # Search through all results
+      $seg_q->finalize;
+      next;
+    };
+
     # There is a next item from the segment
     if ($seg_q->next) {
 
       if (DEBUG) {
-        print_log('node', "Init query at channel $i");
+        print_log('cmp_node', "Init query at channel $i");
       };
 
       # Enqueue and remember the segment/channel
@@ -125,7 +148,7 @@
       $prio->add([$seg_q->current_match, $i]);
 
       if (DEBUG) {
-        print_log('node', "Added match " . $seg_q->current_match->to_string);
+        print_log('cmp_node', "Added match " . $seg_q->current_match->to_string);
       };
     }
 
@@ -133,7 +156,7 @@
     else {
 
       if (DEBUG) {
-        print_log('node', "Remove query at channel $i");
+        print_log('cmp_node', "Remove query at channel $i");
       };
 
       # Remove segment query
@@ -144,17 +167,19 @@
     };
   };
 
+  return unless $self->{prio};
+
   # Resize the priority queue
   # $prio->size($n);
 
   if (DEBUG) {
     print_log(
-      'node',
+      'cmp_node',
       'Array: ' . join(',', map { $_->[0]->to_string } @{$prio->queue})
     );
   };
 
-  $self->{prio} = $prio;
+  # $self->{prio} = $prio;
 };
 
 
@@ -165,7 +190,7 @@
   $self->_init;
 
   # There is no next
-  return if $self->{pos} > $self->{top_k} -1;
+  return if !$self->{prio} || $self->{pos} > $self->{top_k} -1;
 
   # Get next match from list
   # TODO: dequeue
@@ -194,7 +219,7 @@
 
   if (DEBUG) {
     print_log(
-      'node',
+      'cmp_node',
       'Array: ' . join(',', map { $_->[0]->to_string } @{$self->{prio}->queue})
     );
   };
@@ -220,13 +245,13 @@
 
   my $result = $self->result;
 
-  print_log('node','Compile result') if DEBUG;
+  print_log('cmp_node','Compile result') if DEBUG;
 
   my $k = $self->{top_k};
 
   # Get next match from list
   # TODO: dequeue
-  while ($k--) {
+  while ($k-- > 0) {
     my $entry = $self->{prio}->remove;
 
     # No more entries
@@ -256,22 +281,69 @@
 };
 
 
+# Group data
+sub group {
+  my $self = shift;
+
+  $self->_init;
+
+  if (DEBUG) {
+    print_log('cmp_node', 'Group data');
+  };
+
+  my $result = $self->result;
+
+  if (DEBUG && $result->{group}) {
+    print_log('cmp_node', 'Group is already done is already done');
+  };
+
+  # Aggregation already collected
+  return $result if $result->group;
+
+  # Iterate over all queries
+  foreach my $seg_q (@{$self->{segment_queries}}) {
+
+    # Check for compilation role
+    if (Role::Tiny::does_role($seg_q, 'Krawfish::Compile::Segment::Group')) {
+      if (DEBUG) {
+        print_log('cmp_node', 'Add result from ' . ref($seg_q));
+      };
+
+      # Merge aggregations
+      my $group = $seg_q->group;
+
+      if (DEBUG) {
+        use Data::Dumper;
+        print_log('cmp_node', 'Merge result: ' . ref($group) . ':' . $group->to_string);
+      };
+
+      # Merge group
+      $result->merge_group($group);
+
+      if (DEBUG) {
+        print_log('cmp_node', 'Groups merged');
+      };
+    };
+  };
+
+  return $result;
+};
+
+
 # Get aggregation data only
-# TODO:
-#   Identical with ::Compile
 sub aggregate {
   my $self = shift;
 
   $self->_init;
 
   if (DEBUG) {
-    print_log('node', 'Aggregate data');
+    print_log('cmp_node', 'Aggregate data');
   };
 
   my $result = $self->result;
 
   if (DEBUG && @{$result->{aggregation}}) {
-    print_log('node', 'Aggregation is already done');
+    print_log('cmp_node', 'Aggregation is already done');
   };
 
   # Aggregation already collected
@@ -283,19 +355,19 @@
     # Check for compilation role
     if (Role::Tiny::does_role($seg_q, 'Krawfish::Compile::Segment')) {
       if (DEBUG) {
-        print_log('node', 'Add result from ' . ref($seg_q));
+        print_log('cmp_node', 'Add result from ' . ref($seg_q));
       };
 
       # Merge aggregations
       my $aggregate = $seg_q->aggregate;
       if (DEBUG) {
         use Data::Dumper;
-        print_log('node', 'Merge result ' . $aggregate->to_string);
+        print_log('cmp_node', 'Merge result ' . $aggregate->to_string);
       };
       $result->merge_aggregation($aggregate);
 
       if (DEBUG) {
-        print_log('node', 'Result merged');
+        print_log('cmp_node', 'Result merged');
       };
     };
   };
diff --git a/lib/Krawfish/Compile/Segment.pm b/lib/Krawfish/Compile/Segment.pm
index 73b7d72..101a717 100644
--- a/lib/Krawfish/Compile/Segment.pm
+++ b/lib/Krawfish/Compile/Segment.pm
@@ -30,7 +30,7 @@
 
   if (DEBUG) {
     print_log(
-      'compile',
+      'cmp_seg',
       'Current match requested by ' . ref($self)
       );
   };
@@ -39,7 +39,7 @@
 
   if (DEBUG) {
     print_log(
-      'compile',
+      'cmp_seg',
       'Current match is ' . $match->to_string
       );
   };
@@ -53,7 +53,7 @@
   my $self = shift;
 
   if (DEBUG) {
-    print_log('compile', 'Get current from ' . ref $self);
+    print_log('cmp_seg', 'Get current from ' . ref $self);
   };
 
   return $self->{current} // $self->{query}->current;
@@ -68,7 +68,7 @@
 sub match_from_query {
   my $self = shift;
 
-  print_log('compile', 'Get match from query as ' . ref($self)) if DEBUG;
+  print_log('cmp_seg', 'Get match from query as ' . ref($self)) if DEBUG;
 
   my $match;
 
@@ -84,14 +84,14 @@
   # Not yet defined
   unless ($match) {
 
-    print_log('compile', 'No match found from ' . ref($self->{query})) if DEBUG;
+    print_log('cmp_seg', 'No match found from ' . ref($self->{query})) if DEBUG;
 
     # Get current object
     my $current = $self->current;
 
     unless ($current) {
       print_log(
-        'compile',
+        'cmp_seg',
         'No current definable from ' .
           ref($self)) if DEBUG;
       return;
@@ -99,7 +99,7 @@
 
     if (DEBUG) {
       print_log(
-        'compile',
+        'cmp_seg',
         'Current posting is from '. $self->{query}->to_string
       );
     };
@@ -156,7 +156,7 @@
   # This is rather for testing purposes
 
   if (DEBUG) {
-    print_log('compile', 'Compile aggregation with ' . ref($self));
+    print_log('cmp_seg', 'Compile aggregation with ' . ref($self));
   };
 
   # Get result object
@@ -166,7 +166,7 @@
   while ($self->next) {
     if (DEBUG) {
       print_log(
-        'compile',
+        'cmp_seg',
         'Add match ' . $self->current_match->to_string
       );
     };
@@ -179,7 +179,7 @@
 
   if (DEBUG) {
     print_log(
-      'compile',
+      'cmp_seg',
       'Result is ' . $result
     );
   };
@@ -214,12 +214,12 @@
   my $query = $self->{query};
 
   if (DEBUG) {
-    print_log('compile', 'Check if ' . ref($query) . ' does compiling');
+    print_log('cmp_seg', 'Check if ' . ref($query) . ' does compiling');
   };
 
   if (Role::Tiny::does_role($query, __PACKAGE__)) {
     if (DEBUG) {
-      print_log('compile', 'Add result from ' . ref($query));
+      print_log('cmp_seg', 'Add result from ' . ref($query));
     };
     $query->result($result)->aggregate;
   };
diff --git a/lib/Krawfish/Compile/Segment/Group.pm b/lib/Krawfish/Compile/Segment/Group.pm
index 877a81c..8b06ba1 100644
--- a/lib/Krawfish/Compile/Segment/Group.pm
+++ b/lib/Krawfish/Compile/Segment/Group.pm
@@ -8,7 +8,7 @@
 
 requires qw/group/;
 
-use constant DEBUG => 0;
+use constant DEBUG => 1;
 
 
 # Override to compile data
@@ -23,14 +23,7 @@
   my $result = $self->result;
 
   # Add all results
-  while ($self->next) {
-    if (DEBUG) {
-      print_log(
-        'compile',
-        'Check match ' . $self->current->to_string
-      );
-    };
-  };
+  $self->finalize;
 
   # Set group to result
   $result->group(
@@ -54,13 +47,27 @@
 
 
 # Get group
-# TODO:
-#   rename to group_result
 sub group {
   $_[0]->{group};
 };
 
 
+# Finalize query
+sub finalize {
+  my $self = shift;
+
+  if (DEBUG) {
+    print_log(
+      'group',
+      'Finalize query for grouping'
+    );
+  };
+
+  while ($self->next) {};
+  return $self;
+};
+
+
 # Get current posting
 sub current {
   return $_[0]->{query}->current;
diff --git a/lib/Krawfish/Compile/Segment/Group/ClassFrequencies.pm b/lib/Krawfish/Compile/Segment/Group/ClassFrequencies.pm
index 57bbe0b..417fc13 100644
--- a/lib/Krawfish/Compile/Segment/Group/ClassFrequencies.pm
+++ b/lib/Krawfish/Compile/Segment/Group/ClassFrequencies.pm
@@ -3,7 +3,7 @@
 use Krawfish::Log;
 use strict;
 use warnings;
-use Role::Tiny;
+use Role::Tiny::With;
 
 with 'Krawfish::Compile::Segment::Group';
 
@@ -55,10 +55,8 @@
 };
 
 
-# Shorthand for "search through"
-sub finalize {
-  while ($_[0]->next) {};
-  return $_[0];
+sub clone {
+  ...
 };
 
 
diff --git a/lib/Krawfish/Compile/Segment/Group/Fields.pm b/lib/Krawfish/Compile/Segment/Group/Fields.pm
index 7e1bd2e..0a6019e 100644
--- a/lib/Krawfish/Compile/Segment/Group/Fields.pm
+++ b/lib/Krawfish/Compile/Segment/Group/Fields.pm
@@ -4,11 +4,11 @@
 use Krawfish::Log;
 use strict;
 use warnings;
-use Role::Tiny;
+use Role::Tiny::With;
 
 with 'Krawfish::Compile::Segment::Group';
 
-use constant DEBUG => 0;
+use constant DEBUG => 1;
 
 # This will group matches (especially document matches) by field
 # This is useful e.g. for document browsing per corpus.
@@ -42,6 +42,17 @@
 };
 
 
+# Clone query
+sub clone {
+  my $self = shift;
+  return __PACKAGE__->new(
+    $self->{field_obj},
+    $self->{query},
+    $self->{field_keys}
+  );
+};
+
+
 # Initialize field pointer
 sub _init {
   return if $_[0]->{field_pointer};
@@ -135,6 +146,13 @@
         # Key identifier are matching
         elsif ($field_keys[$key_pos]->key_id == $field_objs[$val_pos]->key_id) {
 
+          if (DEBUG) {
+            print_log(
+              'g_fields',
+              'Key at ' . $key_pos . ' is ' . $field_keys[$key_pos]->key_id .
+                ' which is equal to ' . $field_objs[$val_pos]->key_id);
+          };
+
           # Add key to pattern
           $patterns[$key_pos] //= [];
           push @{$patterns[$key_pos]}, $field_objs[$val_pos]->term_id;
diff --git a/lib/Krawfish/Compile/Segment/Group/Spans.pm b/lib/Krawfish/Compile/Segment/Group/Spans.pm
index 7279e68..c08b25b 100644
--- a/lib/Krawfish/Compile/Segment/Group/Spans.pm
+++ b/lib/Krawfish/Compile/Segment/Group/Spans.pm
@@ -2,7 +2,7 @@
 use Krawfish::Log;
 use strict;
 use warnings;
-use Role::Tiny;
+use Role::Tiny::With;
 
 with 'Krawfish::Compile::Segment::Group';
 
@@ -58,4 +58,9 @@
   return $slice_start . '_' . $slice_end;
 };
 
+
+sub clone {
+  ...
+};
+
 1;
diff --git a/lib/Krawfish/Compile/Segment/Group/TermExistence.pm b/lib/Krawfish/Compile/Segment/Group/TermExistence.pm
index 8736432..d692377 100644
--- a/lib/Krawfish/Compile/Segment/Group/TermExistence.pm
+++ b/lib/Krawfish/Compile/Segment/Group/TermExistence.pm
@@ -1,7 +1,7 @@
 package Krawfish::Compile::Segment::Group::TermExistence;
 use strict;
 use warnings;
-use Role::Tiny;
+use Role::Tiny::With;
 
 with 'Krawfish::Compile::Segment::Group';
 
@@ -21,6 +21,10 @@
   ...
 };
 
+sub clone {
+  ...
+};
+
 
 # TODO:
 #   Think about when next() is called, as it needs to be called on term_ids as well ...
diff --git a/lib/Krawfish/Koral/Compile/Node/Group.pm b/lib/Krawfish/Koral/Compile/Node/Group.pm
new file mode 100644
index 0000000..d34c5ea
--- /dev/null
+++ b/lib/Krawfish/Koral/Compile/Node/Group.pm
@@ -0,0 +1,10 @@
+package Krawfish::Koral::Compile::Node::Group;
+use strict;
+use warnings;
+use Role::Tiny;
+
+requires qw/to_string
+            identify
+            optimize/;
+
+1;
diff --git a/lib/Krawfish/Koral/Compile/Node/Group/ClassFrequencies.pm b/lib/Krawfish/Koral/Compile/Node/Group/ClassFrequencies.pm
index 93bf0c7..3abe798 100644
--- a/lib/Krawfish/Koral/Compile/Node/Group/ClassFrequencies.pm
+++ b/lib/Krawfish/Koral/Compile/Node/Group/ClassFrequencies.pm
@@ -2,10 +2,14 @@
 use Krawfish::Compile::Segment::Group::ClassFrequencies;
 use Krawfish::Util::String qw/squote/;
 use Krawfish::Compile::Segment::Nowhere;
+use Role::Tiny::With;
 use strict;
 use warnings;
 
 
+with 'Krawfish::Koral::Compile::Node::Group';
+
+
 # Create new enrichment object for fields
 sub new {
   my $class = shift;
diff --git a/lib/Krawfish/Koral/Compile/Node/Group/Fields.pm b/lib/Krawfish/Koral/Compile/Node/Group/Fields.pm
index b2dfda4..e7aedb7 100644
--- a/lib/Krawfish/Koral/Compile/Node/Group/Fields.pm
+++ b/lib/Krawfish/Koral/Compile/Node/Group/Fields.pm
@@ -2,10 +2,14 @@
 use Krawfish::Compile::Segment::Group::Fields;
 use Krawfish::Util::String qw/squote/;
 use Krawfish::Compile::Segment::Nowhere;
+use Role::Tiny::With;
 use strict;
 use warnings;
 
 
+with 'Krawfish::Koral::Compile::Node::Group';
+
+
 # Create new enrichment object for fields
 sub new {
   my $class = shift;
diff --git a/lib/Krawfish/Koral/Result.pm b/lib/Krawfish/Koral/Result.pm
index 9eaa5e9..5a37452 100644
--- a/lib/Krawfish/Koral/Result.pm
+++ b/lib/Krawfish/Koral/Result.pm
@@ -80,7 +80,6 @@
       print_log('k_result', 'Add aggregation data for ' . $new_aggr->key);
     };
 
-
     # Introduce aggregation
     $self->add_aggregation($new_aggr);
   };
@@ -88,6 +87,34 @@
   return;
 };
 
+# Merge groups
+sub merge_group {
+  my ($self, $group) = @_;
+
+  if (DEBUG) {
+    print_log('k_result', 'Merge group data');
+  };
+
+  # Merge with existing group
+  if ($self->{group}) {
+
+    if ($self->{group}->key ne $group->key) {
+      $self->add_error(000 => 'Groups are not compatible');
+      delete $self->{group};
+      return;
+    };
+
+    # Merge
+    $self->{group}->merge($group);
+  }
+
+  # Establish first group
+  else {
+    $self->{group} = $group;
+  };
+
+ return;
+};
 
 # Get aggregations
 sub aggregation {
diff --git a/lib/Krawfish/Koral/Result/Aggregate.pm b/lib/Krawfish/Koral/Result/Aggregate.pm
index 695ce97..956aafb 100644
--- a/lib/Krawfish/Koral/Result/Aggregate.pm
+++ b/lib/Krawfish/Koral/Result/Aggregate.pm
@@ -3,7 +3,13 @@
 use warnings;
 use Role::Tiny;
 
-requires qw/key merge/;
+# TODO: Identical to Result::Group
+
+requires qw/key
+            merge
+            inflate
+            to_string
+            to_koral_fragment/;
 
 # Finish the calculation
 sub on_finish {
diff --git a/lib/Krawfish/Koral/Result/Group.pm b/lib/Krawfish/Koral/Result/Group.pm
index b199c31..2d81bc4 100644
--- a/lib/Krawfish/Koral/Result/Group.pm
+++ b/lib/Krawfish/Koral/Result/Group.pm
@@ -1,18 +1,20 @@
 package Krawfish::Koral::Result::Group;
-use Role::Tiny::With;
+use Role::Tiny;
 use strict;
 use warnings;
 
-with 'Krawfish::Koral::Report';
-with 'Krawfish::Koral::Result::Inflatable';
+# TODO: Identical to Result::Aggregate
+
+requires qw/key
+            merge
+            inflate
+            to_string
+            to_koral_fragment/;
 
 # This will be returned by a Group search
 # It needs a to_hash method,
 # does not require start, end etc ...
 
-# TODO:
-#   This is quite similar to K::P::Bundle
-
 # With a witness, the group has:
 # {
 #   criterion => [freq, doc_freq, match]
@@ -31,17 +33,9 @@
 #   criterion => [freq, doc_freq, match, freq, doc_freq, match, freq, doc_freq, match ...]
 # }
 
-
-sub freq {
-  ...
-};
-
-sub doc_freq {
-  ...
-};
-
-sub to_hash {
-  ...
+sub on_finish {
+  $_[0];
 };
 
 1;
+
diff --git a/lib/Krawfish/Koral/Result/Group/Fields.pm b/lib/Krawfish/Koral/Result/Group/Fields.pm
index 868a0bc..765b2d5 100644
--- a/lib/Krawfish/Koral/Result/Group/Fields.pm
+++ b/lib/Krawfish/Koral/Result/Group/Fields.pm
@@ -1,5 +1,6 @@
 package Krawfish::Koral::Result::Group::Fields;
 use Krawfish::Util::PatternList qw/pattern_list/;
+use Data::Dumper;
 use Role::Tiny::With;
 use Krawfish::Util::Bits;
 use Krawfish::Log;
@@ -8,8 +9,9 @@
 use warnings;
 
 with 'Krawfish::Koral::Result::Inflatable';
+with 'Krawfish::Koral::Result::Group';
 
-use constant DEBUG => 0;
+use constant DEBUG => 1;
 
 # Group on a sequence of field values
 
@@ -89,12 +91,42 @@
   };
 };
 
+
 # On finish, flush the cache
 sub on_finish {
   $_[0]->flush;
   $_[0];
 };
 
+
+# Merge groups
+sub merge {
+  my ($self, $group) = @_;
+  my $est_group = $self->{group};
+  my $new_group = $group->{group};
+
+  # Get groups
+  foreach my $signature (keys %{$new_group}) {
+    $est_group->{$signature} //= {};
+
+    if (DEBUG) {
+      print_log('p_g_fields','Result: ' . Dumper $new_group);
+    };
+
+    # Iterate over all existing groups
+    foreach my $flag (keys %{$new_group->{$signature}}) {
+
+      my $value = ($est_group->{$signature}->{$flag} //= [0,0]);
+      my $freq = $new_group->{$signature}->{$flag};
+
+      $value->[0] += $freq->[0];
+      $value->[1] += $freq->[1];
+    };
+  };
+};
+
+
+
 # Translate this to terms
 sub inflate {
   my ($self, $dict) = @_;