Improve design of group aggregations
Change-Id: I9d626d32472eef54e5eba1e3a943b2644f138a5b
diff --git a/lib/Krawfish/Koral/Compile/Builder.pm b/lib/Krawfish/Koral/Compile/Builder.pm
index 6e8a221..8ba9368 100644
--- a/lib/Krawfish/Koral/Compile/Builder.pm
+++ b/lib/Krawfish/Koral/Compile/Builder.pm
@@ -59,6 +59,12 @@
};
+# Aggregate on groups
+sub group_aggregate {
+ return;
+};
+
+
# Some aggregation types
# Aggregate frequencies
sub a_frequencies {
diff --git a/lib/Krawfish/Koral/Result/Aggregate.pm b/lib/Krawfish/Koral/Result/Aggregate.pm
index 956aafb..0330422 100644
--- a/lib/Krawfish/Koral/Result/Aggregate.pm
+++ b/lib/Krawfish/Koral/Result/Aggregate.pm
@@ -5,6 +5,12 @@
# TODO: Identical to Result::Group
+# TODO:
+# This should probably be abstract in the sense,
+# that all aggregation method should
+# make use of the same "flags" storing mechanism
+# and should reuse the pattern mechanism for groups.
+
requires qw/key
merge
inflate
diff --git a/lib/Krawfish/Koral/Result/Aggregate/Frequencies.pm b/lib/Krawfish/Koral/Result/Aggregate/Frequencies.pm
index 28ce295..1d7baaf 100644
--- a/lib/Krawfish/Koral/Result/Aggregate/Frequencies.pm
+++ b/lib/Krawfish/Koral/Result/Aggregate/Frequencies.pm
@@ -11,11 +11,13 @@
# TODO:
# requires a merge() method
-# This calculates frequencies for all classes
+# This calculates frequencies for all corpus classes
# TODO:
-# Instead of keys a byte-trie may in the end
+# Instead of keys a bit-trie or a list may in the end
# be the most efficient data structure.
+# This should probably be abstracted and used by all
+# Aggregate objects.
# Constructor
sub new {
diff --git a/lib/Krawfish/Koral/Result/Group/Aggregate.pm b/lib/Krawfish/Koral/Result/Group/Aggregate.pm
new file mode 100644
index 0000000..98aac42
--- /dev/null
+++ b/lib/Krawfish/Koral/Result/Group/Aggregate.pm
@@ -0,0 +1,48 @@
+package Krawfish::Koral::Result::Group::Aggregate;
+use strict;
+use warnings;
+
+# Return an object to collect aggregations
+# for a group
+
+# TODO:
+# All aggregations on groups should add their data here
+
+# TODO:
+# Join with Krawfish::Koral::Util::Row
+
+# TODO:
+# Join with Krawfish::Koral::Result::Aggregate
+
+# The structure should be:
+
+# {
+# $criterion => {
+# $flag => {
+# $aggregate => []
+# }
+# }
+# }
+
+# TODO:
+# Flags should probably be in a sorted list
+# with objects per Group::Aggregate
+
+# TODO:
+# Each aggregation method may have an index,
+# so the can directly access their data in the array.
+
+sub new {
+ my $class = shift;
+ bless {}, $class;
+};
+
+
+sub flags {
+ my ($self, $flags) = @_;
+
+ return $self->{$flags} //= [];
+};
+
+
+1;
diff --git a/lib/Krawfish/Koral/Result/Group/Aggregates.pm b/lib/Krawfish/Koral/Result/Group/Aggregates.pm
new file mode 100644
index 0000000..5fb27e1
--- /dev/null
+++ b/lib/Krawfish/Koral/Result/Group/Aggregates.pm
@@ -0,0 +1,57 @@
+package Krawfish::Koral::Result::Group::Aggregates;
+use Krawfish::Koral::Result::Group::Aggregate;
+use strict;
+use warnings;
+
+# Store and retrieve group objects based on group definitions
+# This is an intermediate to Group::Aggregate
+
+# Constructor
+sub new {
+ my $class = shift;
+ bless {}, $class;
+};
+
+# Convert a group definition to a signature
+sub group_to_sig {
+ my $group = shift;
+ join('_', @$group);
+};
+
+
+# Convert a signature to a group definition
+sub sig_to_group {
+ my $sig = shift;
+ return [split('_', $sig)];
+};
+
+
+# Return a list of criteria
+# Accepts a list of pattterns, see Krawfish::Util::PatternList
+sub aggregates {
+ my ($self, $pattern_list) = @_;
+
+ # In case the pattern is null, return a
+ # default object, otherwise one, that is
+ # based on a pattern.
+ my @aggrs = ();
+ foreach (@$pattern_list) {
+
+ # Get a signature of the group
+ my $sig = group_to_sig($_);
+
+ # Aggregation group not initialized yet
+ unless (exists $self->{$sig}) {
+ $self->{$sig} = Krawfish::Koral::Result::Group::Aggregate->new;
+ };
+
+ # Push to list
+ push @aggrs, $self->{$sig};
+ };
+
+ return \@aggrs;
+};
+
+
+
+1;
diff --git a/lib/Krawfish/Util/PatternList.pm b/lib/Krawfish/Util/PatternList.pm
index 7fbe3fa..7f2749b 100644
--- a/lib/Krawfish/Util/PatternList.pm
+++ b/lib/Krawfish/Util/PatternList.pm
@@ -5,6 +5,21 @@
use strict;
use warnings;
+# This is used by
+# Krawfish::Koral::Result::Group::Fields
+
+# TODO:
+# This should probably be relocated to
+# Krawfish::Koral::Util::*
+
+# TODO:
+# Instead of "list" it may be renamed to "group".
+
+our @EXPORT = qw/pattern_list/;
+
+use constant DEBUG => 0;
+
+
# Based on a pattern, this creates a list.
#
# Expect a list of structure
@@ -17,19 +32,6 @@
# 1,3,2
# 1,5,2
# 1,8,2
-
-# This is used by
-# Krawfish::Koral::Result::Group::Fields
-
-# TODO:
-# This should probably be relocated to
-# Krawfish::Koral::Util::*
-
-our @EXPORT = qw/pattern_list/;
-
-use constant DEBUG => 0;
-
-
sub pattern_list {
my @list = @_;
diff --git a/t/compile/segment/group_fields_aggregate_values.t b/t/compile/segment/group_fields_aggregate_values.t
index 3384b58..d39afd8 100644
--- a/t/compile/segment/group_fields_aggregate_values.t
+++ b/t/compile/segment/group_fields_aggregate_values.t
@@ -4,6 +4,28 @@
use strict;
use warnings;
+use_ok('Krawfish::Koral::Result::Group::Aggregates');
+
+my $aggrs = Krawfish::Koral::Result::Group::Aggregates->new;
+
+# Get group
+ok(my $aggr = $aggrs->aggregates(
+ [
+ [qw/a b c/],
+ [qw/d e f/],
+ [qw/g h i/]
+ ]));
+
+# First group
+ok($aggr->[0], 'First group defined');
+
+# Set first value at flags 4
+$aggr->[0]->{4}->[0] = 2;
+
+# Get group with same signature
+is($aggrs->aggregates([[qw/a b c/]])->[0]->{4}->[0], 2);
+
+
use_ok('Krawfish::Index');
use_ok('Krawfish::Koral');
@@ -15,7 +37,11 @@
$mb->group_by(
$mb->g_fields('author')
),
- $mb->aggregate(
+
+ # Group aggregates need a different name,
+ # as match number etc. may
+ # need to be aggregated globally in addition
+ $mb->group_aggregate(
$mb->a_values('size')
)
);