Finished design of group aggregations
Change-Id: I2373fb0850770fb555f34572948d96f875257203
diff --git a/lib/Krawfish/Compile/Node.pm b/lib/Krawfish/Compile/Node.pm
index a8015d9..bc39587 100644
--- a/lib/Krawfish/Compile/Node.pm
+++ b/lib/Krawfish/Compile/Node.pm
@@ -34,7 +34,7 @@
# - Krawfish::MultiNodes::*
-use constant DEBUG => 1;
+use constant DEBUG => 0;
# Constructor
diff --git a/lib/Krawfish/Compile/Segment/Enrich/Snippet.pm b/lib/Krawfish/Compile/Segment/Enrich/Snippet.pm
index 65599bf..cec25b7 100644
--- a/lib/Krawfish/Compile/Segment/Enrich/Snippet.pm
+++ b/lib/Krawfish/Compile/Segment/Enrich/Snippet.pm
@@ -14,7 +14,7 @@
with 'Krawfish::Compile::Segment';
-use constant DEBUG => 1;
+use constant DEBUG => 0;
# TODO:
diff --git a/lib/Krawfish/Compile/Segment/Group.pm b/lib/Krawfish/Compile/Segment/Group.pm
index 8b06ba1..49adc22 100644
--- a/lib/Krawfish/Compile/Segment/Group.pm
+++ b/lib/Krawfish/Compile/Segment/Group.pm
@@ -8,7 +8,7 @@
requires qw/group/;
-use constant DEBUG => 1;
+use constant DEBUG => 0;
# Override to compile data
diff --git a/lib/Krawfish/Compile/Segment/Group/Aggregate.pm b/lib/Krawfish/Compile/Segment/Group/Aggregate.pm
index 3840ca3..006a871 100644
--- a/lib/Krawfish/Compile/Segment/Group/Aggregate.pm
+++ b/lib/Krawfish/Compile/Segment/Group/Aggregate.pm
@@ -1,12 +1,9 @@
package Krawfish::Compile::Segment::Group::Aggregate;
use strict;
use warnings;
-use Krawfish::Compile::Query::Nowhere;
-use Role::Tiny::With;
+use Krawfish::Compile::Segment::Nowhere;
-with 'Krawfish::Compile::Segment';
-
-use constant DEBUG;
+use constant DEBUG => 0;
# Aggregate values of groups per document and per match
diff --git a/lib/Krawfish/Compile/Segment/Group/Fields.pm b/lib/Krawfish/Compile/Segment/Group/Fields.pm
index 01a42dd..3b67e83 100644
--- a/lib/Krawfish/Compile/Segment/Group/Fields.pm
+++ b/lib/Krawfish/Compile/Segment/Group/Fields.pm
@@ -8,7 +8,7 @@
with 'Krawfish::Compile::Segment::Group';
-use constant DEBUG => 1;
+use constant DEBUG => 0;
# This will group matches (especially document matches) by field
# This is useful e.g. for document browsing per corpus.
@@ -41,9 +41,8 @@
field_obj => $field_obj,
query => $query,
field_keys => $fields,
-
- # The Aggregation object is of type Group::Aggregate
aggr => $aggr,
+
last_doc_id => -1,
finished => 0
}, $class;
@@ -55,6 +54,17 @@
};
+# The Aggregation object is of type Group::Aggregate
+sub aggregation {
+ my ($self, $aggr) = @_;
+ if ($aggr) {
+ $self->{aggr} = $aggr;
+ return $self;
+ };
+ return $self->{aggr};
+};
+
+
# Clone query
sub clone {
my $self = shift;
@@ -62,7 +72,7 @@
$self->{field_obj},
$self->{query},
$self->{field_keys},
- $self->{aggr}->clone
+ $self->{aggr} ? $self->{aggr}->clone : undef
);
};
@@ -88,7 +98,7 @@
$str .= join(',', map { $_->to_string($id) } @{$self->{field_keys}});
if ($self->{aggr}) {
- $str .= ';', $self->{aggr}->to_string;
+ $str .= ';'. $self->{aggr}->to_string;
};
$str .= ':' . $self->{query}->to_string($id) . ')';
diff --git a/lib/Krawfish/Compile/Segment/Sort/Field.pm b/lib/Krawfish/Compile/Segment/Sort/Field.pm
index 38a2b9a..106de49 100644
--- a/lib/Krawfish/Compile/Segment/Sort/Field.pm
+++ b/lib/Krawfish/Compile/Segment/Sort/Field.pm
@@ -3,7 +3,7 @@
use strict;
use warnings;
-use constant DEBUG => 1;
+use constant DEBUG => 0;
# TODO:
# Use this an instantiate it directly with
diff --git a/lib/Krawfish/Koral.pm b/lib/Krawfish/Koral.pm
index e73a45c..8a80d72 100644
--- a/lib/Krawfish/Koral.pm
+++ b/lib/Krawfish/Koral.pm
@@ -14,7 +14,7 @@
with 'Krawfish::Koral::Report';
with 'Krawfish::Koral::Result::Inflatable';
-use constant DEBUG => 1;
+use constant DEBUG => 0;
# Parse a koral query object and transform to an
# actual index query.
diff --git a/lib/Krawfish/Koral/Compile.pm b/lib/Krawfish/Koral/Compile.pm
index dd9cd34..c279997 100644
--- a/lib/Krawfish/Koral/Compile.pm
+++ b/lib/Krawfish/Koral/Compile.pm
@@ -35,15 +35,16 @@
# Support aggregations on groups!
our %COMPILE_ORDER = (
- limit => 1,
- cluster_merge => 2,
- node_merge => 3,
- sort => 4,
- sample => 5,
- enrich => 6,
- group => 7,
- aggregate => 8,
- filter => 9
+ limit => 1,
+ cluster_merge => 2,
+ node_merge => 3,
+ sort => 4,
+ sample => 5,
+ enrich => 6,
+ group_aggregate => 7,
+ group => 8,
+ aggregate => 9,
+ filter => 10
);
@@ -147,17 +148,18 @@
# and take the first value for single values
# start_index=0 + start_index=2 => start_index=0
#
- # 3. Remove duplicates
- # aggr_freq + aggr_freq => - aggr_freq
for (my $i = 1; $i < @compile; $i++) {
+ # 3. Remove duplicates
+ # aggr_freq + aggr_freq => - aggr_freq
# Consecutive types are identical, join
if ($compile[$i]->type eq $compile[$i-1]->type) {
# Join fields or aggregations
if ($compile[$i]->type eq 'enrich' ||
+ $compile[$i]->type eq 'sort' ||
$compile[$i]->type eq 'aggregate' ||
- $compile[$i]->type eq 'sort'
+ $compile[$i]->type eq 'group_aggregate'
) {
# The first operations have higher precedence
diff --git a/lib/Krawfish/Koral/Compile/Aggregate.pm b/lib/Krawfish/Koral/Compile/Aggregate.pm
index 9c275e8..0862bcb 100644
--- a/lib/Krawfish/Koral/Compile/Aggregate.pm
+++ b/lib/Krawfish/Koral/Compile/Aggregate.pm
@@ -10,6 +10,11 @@
# TODO:
# Check that only valid aggregate objects are passed
+# TODO:
+# If a change is made here, check for
+# Krawfish::Koral::Compile::Group::Aggregate
+# as well.
+
our %AGGR_ORDER = (
'length' => 1,
'freq' => 2,
diff --git a/lib/Krawfish/Koral/Compile/Builder.pm b/lib/Krawfish/Koral/Compile/Builder.pm
index 8ba9368..6aa7e93 100644
--- a/lib/Krawfish/Koral/Compile/Builder.pm
+++ b/lib/Krawfish/Koral/Compile/Builder.pm
@@ -15,6 +15,7 @@
use Krawfish::Koral::Compile::Aggregate::Length;
use Krawfish::Koral::Compile::Aggregate::Values;
use Krawfish::Koral::Compile::Group;
+use Krawfish::Koral::Compile::Group::Aggregate;
use Krawfish::Koral::Compile::Group::Fields;
use Krawfish::Koral::Compile::Group::ClassFrequencies;
@@ -61,7 +62,8 @@
# Aggregate on groups
sub group_aggregate {
- return;
+ my $self = shift;
+ return Krawfish::Koral::Compile::Group::Aggregate->new(@_);
};
diff --git a/lib/Krawfish/Koral/Compile/Group.pm b/lib/Krawfish/Koral/Compile/Group.pm
index 5d5250e..4f00af8 100644
--- a/lib/Krawfish/Koral/Compile/Group.pm
+++ b/lib/Krawfish/Koral/Compile/Group.pm
@@ -10,11 +10,12 @@
}, $class;
};
-
sub criterion {
$_[0]->{criterion};
};
+
+# Type is group
sub type {
'group';
};
@@ -27,19 +28,21 @@
my ($self, $query) = @_;
# Group by
- return $self->criterion->wrap($query);
+ my $wrap = $self->criterion->wrap($query);
};
-# Normalize aggregations
+# Normalize group
sub normalize {
$_[0];
};
+# Stringification
sub to_string {
my $self = shift;
return 'group=[' . $self->criterion->to_string . ']';
};
+
1;
diff --git a/lib/Krawfish/Koral/Compile/Group/Aggregate.pm b/lib/Krawfish/Koral/Compile/Group/Aggregate.pm
new file mode 100644
index 0000000..a6a58e3
--- /dev/null
+++ b/lib/Krawfish/Koral/Compile/Group/Aggregate.pm
@@ -0,0 +1,118 @@
+package Krawfish::Koral::Compile::Group::Aggregate;
+use Krawfish::Koral::Compile::Node::Group::Aggregate;
+use Krawfish::Log;
+use List::MoreUtils qw/uniq/;
+use strict;
+use warnings;
+
+use constant DEBUG => 0;
+
+# TODO:
+# Check that only valid aggregate objects are passed
+
+# TODO:
+# If a change is made here, check for
+# Krawfish::Koral::Compile::Aggregate
+# as well.
+
+# Prepare Group Aggregations
+
+# Constructor
+sub new {
+ my $class = shift;
+ bless [@_], $class;
+};
+
+
+# Aggregation type
+sub type {
+ 'group_aggregate';
+};
+
+
+# Get or set operations
+# TODO:
+# Identical to Compile::Aggregate
+sub operations {
+ my $self = shift;
+ if (@_) {
+ @$self = @_;
+ return $self;
+ };
+ return @$self;
+};
+
+
+# Wrap aggregates in each other
+sub wrap {
+ my ($self, $query) = @_;
+
+ if (DEBUG) {
+ print_log('kq_gaggr', 'Wrap operation ' . join(',', @$self));
+ };
+
+
+ # Join aggregates
+ return Krawfish::Koral::Compile::Node::Group::Aggregate->new(
+ $query,
+ [$self->operations]
+ );
+};
+
+
+# Normalize aggregations
+# This is similar to Compile::Aggregate, but does not sort
+# aggregations to keep columns intact
+sub normalize {
+ my $self = shift;
+
+ my @ops = @$self;
+
+ # Check for doubles
+ for (my $i = 1; $i < @ops; $i++) {
+
+ # Two consecutive operations are identical
+ if ($ops[$i]->type eq $ops[$i-1]->type) {
+
+ # Merge fields or values
+ if ($ops[$i]->type eq 'fields' || $ops[$i]->type eq 'values') {
+ $ops[$i-1]->operations(
+ $ops[$i-1]->operations,
+ $ops[$i]->operations
+ );
+
+ # Remove double operation
+ splice(@ops, $i, 1);
+ $i--;
+ }
+
+ else {
+ # Remove double operation
+ splice(@ops, $i, 1);
+ };
+
+ CORE::next;
+ };
+
+ # Normalize when no longer consecutive operations
+ # can be expected
+ $ops[$i-1] = $ops[$i-1]->normalize;
+ };
+
+ # Normalize last operation
+ $ops[-1] = $ops[-1]->normalize;
+
+ $self->operations(@ops);
+
+ return $self;
+};
+
+
+# Stringification
+sub to_string {
+ my ($self, $id) = @_;
+ return 'gaggr=[' . join(',', map { $_->to_string($id) } @$self) . ']';
+};
+
+
+1;
diff --git a/lib/Krawfish/Koral/Compile/Group/Fields.pm b/lib/Krawfish/Koral/Compile/Group/Fields.pm
index 24a2723..6d95694 100644
--- a/lib/Krawfish/Koral/Compile/Group/Fields.pm
+++ b/lib/Krawfish/Koral/Compile/Group/Fields.pm
@@ -43,16 +43,18 @@
};
};
@$self = @unique;
+
return $self;
};
sub wrap {
my ($self, $query) = @_;
+
return Krawfish::Koral::Compile::Node::Group::Fields->new(
$query,
[$self->operations]
- )
+ );
};
diff --git a/lib/Krawfish/Koral/Compile/Node.pm b/lib/Krawfish/Koral/Compile/Node.pm
index aa4fb9e..e2ed577 100644
--- a/lib/Krawfish/Koral/Compile/Node.pm
+++ b/lib/Krawfish/Koral/Compile/Node.pm
@@ -6,7 +6,7 @@
# Koral class to join query results on node level
-use constant DEBUG => 1;
+use constant DEBUG => 0;
sub new {
my $class = shift;
diff --git a/lib/Krawfish/Koral/Compile/Node/Group/Aggregate.pm b/lib/Krawfish/Koral/Compile/Node/Group/Aggregate.pm
new file mode 100644
index 0000000..4312022
--- /dev/null
+++ b/lib/Krawfish/Koral/Compile/Node/Group/Aggregate.pm
@@ -0,0 +1,94 @@
+package Krawfish::Koral::Compile::Node::Group::Aggregate;
+use Krawfish::Compile::Segment::Group::Aggregate;
+use Krawfish::Compile::Segment::Nowhere;
+use strict;
+use warnings;
+
+# TODO:
+# Identify() should probably first return a Segment::Aggregate object
+
+sub new {
+ my $class = shift;
+ bless {
+ query => shift,
+ aggregates => shift
+ }, $class;
+};
+
+
+# Aggregation
+sub identify {
+ my ($self, $dict) = @_;
+
+ my @identifier;
+ foreach (@{$self->{aggregates}}) {
+
+ # Field may not exist in dictionary
+ my $aggr = $_->identify($dict);
+ if ($aggr) {
+ push @identifier, $aggr;
+ }
+ # else {
+ # TODO:
+ # This should introduce empty aggregations with names as placeholders!
+ # }
+ };
+
+ # Identify the query
+ $self->{query} = $self->{query}->identify($dict);
+
+ # Do not return any fields
+ return $self->{query} if @identifier == 0;
+
+ $self->{aggregates} = \@identifier;
+
+ return $self;
+};
+
+
+# Optimize aggregation query
+sub optimize {
+ my ($self, $segment) = @_;
+
+ my $query = $self->{query}->optimize($segment);
+
+ # There is nothing to query - return nothing
+ # TODO:
+ # It may be required to have some default
+ # null-values for aggregation that need to
+ # be returned.
+ if ($query->max_freq == 0) {
+ return Krawfish::Compile::Segment::Nowhere->new;
+ };
+
+ # Get all aggregations
+ my $aggr = $self->{aggregates};
+
+ # Can't overwrite aggregates because of reoptimization on nodes
+ my @aggr;
+
+ # Optimize all aggregation objects
+ for (my $i = 0; $i < @$aggr; $i++) {
+ push @aggr, $aggr->[$i]->optimize($segment);
+ };
+
+ # Set aggregation for group query
+ if ($query->does('Krawfish::Compile::Segment::Group')) {
+ # Create aggregation query with all aggregations
+ $query->aggregation(
+ Krawfish::Compile::Segment::Group::Aggregate->new(\@aggr));
+ };
+ return $query;
+};
+
+
+# Stringification
+sub to_string {
+ my ($self, $id) = @_;
+ return 'gaggr(' .
+ join(',', map { $_->to_string($id) } @{$self->{aggregates}}) .
+ ':' . $self->{query}->to_string($id) . ')';
+};
+
+
+1;
diff --git a/lib/Krawfish/Koral/Compile/Node/Merge.pm b/lib/Krawfish/Koral/Compile/Node/Merge.pm
index ad4b143..0b16427 100644
--- a/lib/Krawfish/Koral/Compile/Node/Merge.pm
+++ b/lib/Krawfish/Koral/Compile/Node/Merge.pm
@@ -17,7 +17,7 @@
# and is different to all other
# Krawfish::Koral::Compile::Node::* queries
-use constant DEBUG => 1;
+use constant DEBUG => 0;
sub new {
diff --git a/lib/Krawfish/Koral/Query/Term.pm b/lib/Krawfish/Koral/Query/Term.pm
index e4e5dd2..69dbd86 100644
--- a/lib/Krawfish/Koral/Query/Term.pm
+++ b/lib/Krawfish/Koral/Query/Term.pm
@@ -27,7 +27,7 @@
# The regex is valid for the value in case it is given.
# Otherwise it's valid for the key.
-use constant DEBUG => 1;
+use constant DEBUG => 0;
sub new {
my $class = shift;
diff --git a/lib/Krawfish/Koral/Query/TermGroup.pm b/lib/Krawfish/Koral/Query/TermGroup.pm
index 7687353..3e9766a 100644
--- a/lib/Krawfish/Koral/Query/TermGroup.pm
+++ b/lib/Krawfish/Koral/Query/TermGroup.pm
@@ -66,7 +66,7 @@
# {1:[marmot/m=case:dat]}|{2:[marmot/m=gender:masc]}|{3:[marmot/m=number:sg]}
-use constant DEBUG => 1;
+use constant DEBUG => 0;
sub new {
my $class = shift;
diff --git a/lib/Krawfish/Koral/Result/Enrich/Snippet/Markup.pm b/lib/Krawfish/Koral/Result/Enrich/Snippet/Markup.pm
index 96f4800..24147e0 100644
--- a/lib/Krawfish/Koral/Result/Enrich/Snippet/Markup.pm
+++ b/lib/Krawfish/Koral/Result/Enrich/Snippet/Markup.pm
@@ -21,7 +21,7 @@
type
clone/;
-use constant DEBUG => 1;
+use constant DEBUG => 0;
# TODO:
# Have common methods with
diff --git a/lib/Krawfish/Koral/Result/Group/Fields.pm b/lib/Krawfish/Koral/Result/Group/Fields.pm
index 85e5e5e..0f6a01a 100644
--- a/lib/Krawfish/Koral/Result/Group/Fields.pm
+++ b/lib/Krawfish/Koral/Result/Group/Fields.pm
@@ -11,7 +11,7 @@
with 'Krawfish::Koral::Result::Inflatable';
with 'Krawfish::Koral::Result::Group';
-use constant DEBUG => 1;
+use constant DEBUG => 0;
# Group on a sequence of field values
diff --git a/lib/Krawfish/Koral/Util/Boolean.pm b/lib/Krawfish/Koral/Util/Boolean.pm
index 2bb5187..48a0a04 100644
--- a/lib/Krawfish/Koral/Util/Boolean.pm
+++ b/lib/Krawfish/Koral/Util/Boolean.pm
@@ -12,7 +12,7 @@
# - Koral::Query::TermGroup
# - Koral::Query::Or
-use constant DEBUG => 1;
+use constant DEBUG => 0;
requires qw/bool_and_query
bool_or_query/;
diff --git a/lib/Krawfish/Query/Unique.pm b/lib/Krawfish/Query/Unique.pm
index 63fd0ca..5547792 100644
--- a/lib/Krawfish/Query/Unique.pm
+++ b/lib/Krawfish/Query/Unique.pm
@@ -8,7 +8,7 @@
# Filter duplicate postings
-use constant DEBUG => 1;
+use constant DEBUG => 0;
# Constructor
sub new {