Merge value aggregates on node level
Change-Id: I262df6c5a301bf3ede758bcb0db0e304657c66b9
diff --git a/lib/Krawfish/Koral/Result.pm b/lib/Krawfish/Koral/Result.pm
index 59a0752..9eaa5e9 100644
--- a/lib/Krawfish/Koral/Result.pm
+++ b/lib/Krawfish/Koral/Result.pm
@@ -1,5 +1,6 @@
package Krawfish::Koral::Result;
use Role::Tiny::With;
+use Krawfish::Log;
with 'Krawfish::Koral::Report';
with 'Krawfish::Koral::Result::Inflatable';
use strict;
@@ -15,6 +16,8 @@
# and remove the intermediate compile
# directive!
+use constant DEBUG => 0;
+
# Constructor
sub new {
my $class = shift;
@@ -54,7 +57,11 @@
my $aggregates = $self->{aggregation};
# Check all aggregations
- AGGR: foreach my $new_aggr (@{$result->{aggregation}}) {
+ AGGR: foreach my $new_aggr (@{$result->{aggregation}}) {
+
+ if (DEBUG) {
+ print_log('k_result', 'Merge aggregation data for ' . $new_aggr->key);
+ };
# Merge with existing aggregation
foreach my $est_aggr (@$aggregates) {
@@ -64,10 +71,16 @@
# Merge
$est_aggr->merge($new_aggr);
+
next AGGR;
};
};
+ if (DEBUG) {
+ print_log('k_result', 'Add aggregation data for ' . $new_aggr->key);
+ };
+
+
# Introduce aggregation
$self->add_aggregation($new_aggr);
};
@@ -99,6 +112,10 @@
my ($self, $id) = @_;
my $str = '';
+ if (DEBUG) {
+ print_log('k_result', 'Stringify result');
+ };
+
# Add aggregation
if (@{$self->{aggregation}}) {
$str .= '[aggr=';
diff --git a/lib/Krawfish/Koral/Result/Aggregate/Fields.pm b/lib/Krawfish/Koral/Result/Aggregate/Fields.pm
index 913d00b..ad8d9a2 100644
--- a/lib/Krawfish/Koral/Result/Aggregate/Fields.pm
+++ b/lib/Krawfish/Koral/Result/Aggregate/Fields.pm
@@ -257,20 +257,20 @@
sub to_string {
my ($self, $ids) = @_;
+ my $str = '[fields=';
+
# IDs not supported
if ($ids) {
# warn 'ID based stringification currently not supported';
- return '';
+ return $str . '#?]';
};
# No terms yet
unless ($self->{fields_terms}) {
# warn 'ID based stringification currently not supported';
- return '';
+ return $str . '#?]';
};
- my $str = '[fields=';
-
my @classes = @{$self->_to_classes};
my $first = 0;
foreach (my $i = 0; $i < @classes; $i++) {
diff --git a/lib/Krawfish/Koral/Result/Aggregate/Length.pm b/lib/Krawfish/Koral/Result/Aggregate/Length.pm
index 1d60361..9699268 100644
--- a/lib/Krawfish/Koral/Result/Aggregate/Length.pm
+++ b/lib/Krawfish/Koral/Result/Aggregate/Length.pm
@@ -14,9 +14,6 @@
# It may very vell also support query
# classes.
-# TODO:
-# Implement merge()
-
use constant {
MIN_INIT_VALUE => 32_000
};
@@ -52,7 +49,30 @@
# Merge aggregation results on node level
sub merge {
- ...
+ my ($self, $aggr) = @_;
+
+ my $length = ($self->{length} //= {});
+
+ foreach my $flag (keys %{$aggr->{length}}) {
+ my $l_flag = $length->{$flag};
+ my $a_flag = $aggr->{length}->{$flag};
+
+ if (!defined $l_flag) {
+ # Set flag
+ $length->{$flag} = {
+ min => $a_flag->{min},
+ max => $a_flag->{max},
+ sum => $a_flag->{sum},
+ freq => $a_flag->{freq}
+ };
+ }
+ else {
+ $l_flag->{min} = $a_flag->{min} < $l_flag->{min} ? $a_flag->{min} : $l_flag->{min};
+ $l_flag->{max} = $a_flag->{max} > $l_flag->{max} ? $a_flag->{max} : $l_flag->{max};
+ $l_flag->{sum} += $a_flag->{sum};
+ $l_flag->{freq} += $a_flag->{freq};
+ };
+ };
};
diff --git a/lib/Krawfish/Koral/Result/Aggregate/Values.pm b/lib/Krawfish/Koral/Result/Aggregate/Values.pm
index c2a129d..e84e606 100644
--- a/lib/Krawfish/Koral/Result/Aggregate/Values.pm
+++ b/lib/Krawfish/Koral/Result/Aggregate/Values.pm
@@ -3,6 +3,8 @@
use warnings;
use Role::Tiny::With;
use Krawfish::Util::Bits;
+use Krawfish::Log;
+use Data::Dumper;
with 'Krawfish::Koral::Result::Inflatable';
with 'Krawfish::Koral::Result::Aggregate';
@@ -13,11 +15,9 @@
# What's the difference between a corpus and a rewritten
# corpus in regards to number of sentences.
-# TODO:
-# Implement merge()
-
use constant {
- MIN_INIT_VALUE => 32_000
+ MIN_INIT_VALUE => 32_000,
+ DEBUG => 0
};
sub new {
@@ -25,13 +25,13 @@
my $self = bless {
field_ids => shift,
flags => shift,
- fields => {},
+ values => {},
field_terms => undef
}, $class;
# Initiate aggregation maps for each field
foreach (@{$self->{field_ids}}) {
- $self->{fields}->{$_} = {};
+ $self->{values}->{$_} = {};
};
return $self;
@@ -44,7 +44,61 @@
# Merge aggregation results on node level
sub merge {
- ...
+ my ($self, $aggr) = @_;
+
+ if (DEBUG) {
+ print_log(
+ 'k_r_a_values',
+ 'Aggr: ' . Dumper($self),
+ 'New: ' . Dumper($aggr));
+ };
+
+
+ my $value = ($self->{values} //= {});
+
+ # Iterate over all fields
+ foreach my $field (keys %{$aggr->{values}}) {
+
+ $value = ($value->{$field} //= {});
+
+ # Iterate over flags
+ foreach my $flag (keys %{$aggr->{values}->{$field}}) {
+
+ if (DEBUG) {
+ print_log('k_r_a_values', 'Merge #' . $field . ':' . $flag);
+ };
+
+ # Get flag fvalue
+ my $a_flag = $aggr->{values}->{$field}->{$flag};
+
+ if (!exists $value->{$flag} || !defined $value->{$flag}->{min}) {
+
+ # Set flag
+ $value->{$flag} = {
+ min => $a_flag->{min},
+ max => $a_flag->{max},
+ sum => $a_flag->{sum},
+ freq => $a_flag->{freq}
+ };
+
+ if (DEBUG) {
+ print_log('k_r_a_values', 'a: ' . Dumper $a_flag);
+ };
+ }
+ else {
+
+ if (DEBUG) {
+ print_log('k_r_a_values', 'b: ' . Dumper $a_flag);
+ };
+
+ my $l_flag = $value->{$flag};
+ $l_flag->{min} = $a_flag->{min} < $l_flag->{min} ? $a_flag->{min} : $l_flag->{min};
+ $l_flag->{max} = $a_flag->{max} > $l_flag->{max} ? $a_flag->{max} : $l_flag->{max};
+ $l_flag->{sum} += $a_flag->{sum};
+ $l_flag->{freq} += $a_flag->{freq};
+ };
+ };
+ };
};
@@ -53,7 +107,7 @@
my ($self, $field_id, $value, $flags) = @_;
# Get field of interest
- my $aggr = $self->{fields}->{$field_id};
+ my $aggr = $self->{values}->{$field_id};
my $aggr_flag = $aggr->{$flags} //= {
min => MIN_INIT_VALUE,
@@ -73,10 +127,10 @@
sub inflate {
my ($self, $dict) = @_;
- my $fields = $self->{fields};
+ my $values = $self->{values};
- my %fields;
- foreach my $field_id (keys %{$fields}) {
+ my %values;
+ foreach my $field_id (keys %{$values}) {
my $field_term = $dict->term_by_term_id($field_id);
@@ -85,10 +139,10 @@
# this may be a direct feature of the dictionary instead
# $field_term =~ s/^!//;
$field_term = substr($field_term, 1); # ~ s/^!//;
- $fields{$field_term} = $fields->{$field_id};
+ $values{$field_term} = $values->{$field_id};
};
- $self->{field_terms} = \%fields;
+ $self->{field_terms} = \%values;
return $self;
};
@@ -99,16 +153,29 @@
my @classes;
- my $fields = $self->{field_terms};
+ my $values = $self->{field_terms};
- # Iterate over fields
- foreach my $field (keys %$fields) {
+ if (DEBUG) {
+ print_log('k_r_values', 'Make classes');
+ };
- my $flags = $fields->{$field};
+ # Iterate over values
+ foreach my $field (keys %$values) {
+
+ if (DEBUG) {
+ print_log('k_r_values', 'Field is ' . $field);
+ };
+
+ my $flags = $values->{$field};
# Iterate over flags
foreach my $flag (keys %$flags) {
+ if (DEBUG) {
+ print_log('k_r_values', 'Flag is ' . $flag . '|' . bitstring($flag));
+ };
+
+
# Iterate over classes
foreach my $class (flags_to_classes($flag)) {
@@ -145,19 +212,20 @@
sub to_string {
my ($self, $ids) = @_;
+ my $str = '[values=';
+
# IDs not supported
if ($ids) {
- warn 'ID based stringification currently not supported';
- return '';
+ # warn 'ID based stringification currently not supported';
+ return $str . '#?]';
};
# No terms yet
unless ($self->{field_terms}) {
- warn 'ID based stringification currently not supported';
- return '';
+ # warn 'ID based stringification currently not supported';
+ return $str . '#?]';
};
- my $str = '[values=';
my @classes = @{$self->_to_classes};
my $first = 0;
@@ -165,12 +233,12 @@
$str .= $i == 0 ? 'total' : 'inCorpus-' . $i;
$str .= ':[';
- my $fields = $classes[$i];
+ my $values = $classes[$i];
- foreach my $field (sort keys %$fields) {
+ foreach my $field (sort keys %$values) {
$str .= $field . ':';
- my $values = $fields->{$field};
+ my $values = $values->{$field};
$str .= '[';
$str .= 'sum:' . $values->{sum} . ',';
@@ -201,10 +269,10 @@
my $first = 0;
foreach (my $i = 0; $i < @classes; $i++) {
my $val = $aggr->{$i == 0 ? 'total' : 'inCorpus-' . $i} = {};
- my $fields = $classes[$i];
+ my $values = $classes[$i];
- foreach my $field (sort keys %$fields) {
- my $values = $fields->{$field};
+ foreach my $field (sort keys %$values) {
+ my $values = $values->{$field};
# Set values per field
$val->{$field} = {