Fixed aggregation on tests

commit: 9005af6615a25ecf6bf2585e826de9f246c1f0bf [log] [tgz]
author: Akron <nils@diewald-online.de> Tue Aug 15 15:26:24 2017 +0200
committer: Akron <nils@diewald-online.de> Tue Aug 15 15:26:24 2017 +0200
tree: dbcf62d6771668645f2bd8912d271124ce0108e5
parent: 79204af0e557b5bbf1aa27018e55a823043ca4a3 [diff]
diff --git a/lib/Krawfish/Controller/Index.pm b/lib/Krawfish/Controller/Index.pm
index 2ce6811..c5e2632 100644
--- a/lib/Krawfish/Controller/Index.pm
+++ b/lib/Krawfish/Controller/Index.pm

@@ -18,6 +18,19 @@
 };
 
 
+sub commit_info {
+  my $c = shift;
+  my $commit_id = $c->stash('commit_id');
+
+  # List all commits
+  unless ($commit_id) {
+    ...
+  };
+
+  # List information on one special commit
+  ...
+};
+
 # The search API first searches for matches, then retrieves information
 # per match identical to the match API
 sub search {

diff --git a/lib/Krawfish/Corpus/Any.pm b/lib/Krawfish/Corpus/Any.pm
index 2d2c010..e25e6e3 100644
--- a/lib/Krawfish/Corpus/Any.pm
+++ b/lib/Krawfish/Corpus/Any.pm

@@ -7,7 +7,7 @@
 use strict;
 use warnings;
 
-use constant DEBUG => 1;
+use constant DEBUG => 0;
 
 
 # Construct query on live documents

diff --git a/lib/Krawfish/Index.pm b/lib/Krawfish/Index.pm
index 2ebfa92..f1e047c 100644
--- a/lib/Krawfish/Index.pm
+++ b/lib/Krawfish/Index.pm

@@ -72,6 +72,9 @@
 # TODO:
 #   Maybe 65.535 documents are enough per segment ...
 
+# TODO:
+#   Commits need to be logged and per commit, information
+#   regarding newly added documents need to be accessible.
 
 # Construct a new index object
 sub new {

diff --git a/lib/Krawfish/Index/Dictionary.pm b/lib/Krawfish/Index/Dictionary.pm
index d04905c..e2f5d3e 100644
--- a/lib/Krawfish/Index/Dictionary.pm
+++ b/lib/Krawfish/Index/Dictionary.pm

@@ -142,7 +142,7 @@
 #   requested, for example, by the term_id API for co-occurrence search.
 #   That's why all subterms need to be stored as well.
 
-use constant DEBUG => 1;
+use constant DEBUG => 0;
 
 sub new {
   my $class = shift;

diff --git a/lib/Krawfish/Index/Segment.pm b/lib/Krawfish/Index/Segment.pm
index 681841a..6488965 100644
--- a/lib/Krawfish/Index/Segment.pm
+++ b/lib/Krawfish/Index/Segment.pm

@@ -32,7 +32,7 @@
 #  ([leaf-backref][prefix-rank][suffix-rank])*
 
 
-use constant DEBUG => 1;
+use constant DEBUG => 0;
 
 sub new {
   my $class = shift;

diff --git a/lib/Krawfish/Posting/Aggregate/Fields.pm b/lib/Krawfish/Posting/Aggregate/Fields.pm
index 6a073f9..82cd32b 100644
--- a/lib/Krawfish/Posting/Aggregate/Fields.pm
+++ b/lib/Krawfish/Posting/Aggregate/Fields.pm

@@ -1,10 +1,21 @@
 package Krawfish::Posting::Aggregate::Fields;
+use Krawfish::Log;
 use strict;
 use warnings;
 
+# This remembers facets for multiple classes,
+# both using ids and terms
+
+
 # TODO:
 #   This should be part of Koral::Result!
 
+# TODO:
+#   It may be beneficial to deal with Koral::Type here,
+#   so inflate() would be an action directly done in Koral::Type
+
+use constant DEBUG => 1;
+
 sub new {
   my $class = shift;
   bless {
@@ -14,6 +25,8 @@
   }, $class;
 };
 
+
+# Increment the field frequency for each field in the current doc
 sub incr_doc {
   my ($self, $key_id, $field_id) = @_;
 
@@ -26,16 +39,29 @@
   # Increase doc frequency for the key
   $field_freq->[0]++;
 
-  # Remember
+  if (DEBUG) {
+    print_log('p_a_facets', 'Increment doc frequency for ' . $key_id . ':' . $field_id);
+  };
+
+
+  # Remember the frequency
+  # The problem here is, that they are only loosely coupled to the field
+  # frequency of the field. This may be problematic
   push @{$self->{cache}}, $field_freq;
 };
 
 
+# Increment the field frequency for each field per match
 sub incr_match {
   $_[0]->{freq}++;
+
+  if (DEBUG) {
+    print_log('p_a_facets', 'Increment match frequency');
+  };
 };
 
 
+# Flush all frequency information remembered
 sub flush {
   my $self = shift;
 
@@ -46,11 +72,16 @@
 
     $self->{cache} = [];
     $self->{freq} = 0;
+
+    if (DEBUG) {
+      print_log('p_a_facets', 'Flush field frequency for all remembered frequencies');
+    };
   };
 };
 
 
-sub to_terms {
+# Translate this to terms
+sub inflate {
   my ($self, $dict) = @_;
 
   # Get fields
@@ -59,14 +90,21 @@
 
   # Iterate over field identifier
   foreach my $field_id (keys %$fields) {
+
+    # Request the term from the dictionary
     my $field_term = $dict->term_by_term_id($field_id);
 
+    # Remove the term marker
+    # TODO:
+    #   this may be a direct feature of the dictionary instead
     $field_term =~ s/^!//;
     my $aggr = ($fields{$field_term} //= {});
 
     # Get facets for field
     my $values = $fields->{$field_id};
     foreach my $value (keys %$values) {
+
+      # Get the 
       my $facet = $dict->term_by_term_id($value);
       $facet =~ s/^\+$field_term://;
 

diff --git a/lib/Krawfish/Query/Filter.pm b/lib/Krawfish/Query/Filter.pm
index 8c00f11..2f572dc 100644
--- a/lib/Krawfish/Query/Filter.pm
+++ b/lib/Krawfish/Query/Filter.pm

@@ -4,7 +4,7 @@
 use strict;
 use warnings;
 
-use constant DEBUG => 1;
+use constant DEBUG => 0;
 
 # Filters a term to check, if it is
 # in a supported document

diff --git a/lib/Krawfish/Result/Segment/Aggregate/Facets.pm b/lib/Krawfish/Result/Segment/Aggregate/Facets.pm
index 58b711a..c011e95 100644
--- a/lib/Krawfish/Result/Segment/Aggregate/Facets.pm
+++ b/lib/Krawfish/Result/Segment/Aggregate/Facets.pm

@@ -59,7 +59,7 @@
 
   my $self = shift;
 
-  print_log('aggr_facets', 'Load fields') if DEBUG;
+  print_log('aggr_facets', 'Create pointer on fields') if DEBUG;
 
   # Load the ranked list - may be too large for memory!
   $self->{field_pointer} = $self->{field_obj}->pointer;
@@ -74,17 +74,19 @@
 
   print_log('aggr_facets', 'Aggregate on fields') if DEBUG;
 
-
   my $doc_id = $current->doc_id;
 
   my $pointer = $self->{field_pointer};
 
   # Set match frequencies to all remembered doc frequencies
   my $aggr = $self->{aggregation};
-  $aggr->flush;
 
   # Skip to document in question
-  if ($pointer->skip_doc($doc_id)) {
+  # TODO:
+  #   skip_doc should ALWAYS return either the document or NOMOREDOC!
+  if ($pointer->skip_doc($doc_id) != -1) {
+
+    $aggr->flush;
 
     my $coll = $self->{collection};
 
@@ -92,8 +94,8 @@
     my @fields;
 
     if (DEBUG) {
-      print_log('aggr_facets', 'Look for frequencies for ' .
-                  join(',', @{$self->{field_keys}}));
+      print_log('aggr_facets', 'Look for frequencies for key ids ' .
+                  join(',', @{$self->{field_keys}}) . " in $doc_id");
     };
 
     # Iterate over all fields
commit	9005af6615a25ecf6bf2585e826de9f246c1f0bf	[log] [tgz]
author	Akron <nils@diewald-online.de>	Tue Aug 15 15:26:24 2017 +0200
committer	Akron <nils@diewald-online.de>	Tue Aug 15 15:26:24 2017 +0200
tree	dbcf62d6771668645f2bd8912d271124ce0108e5
parent	79204af0e557b5bbf1aa27018e55a823043ca4a3 [diff]