Fixed aggregation on
tests
diff --git a/lib/Krawfish/Controller/Index.pm b/lib/Krawfish/Controller/Index.pm
index 2ce6811..c5e2632 100644
--- a/lib/Krawfish/Controller/Index.pm
+++ b/lib/Krawfish/Controller/Index.pm
@@ -18,6 +18,19 @@
};
+sub commit_info {
+ my $c = shift;
+ my $commit_id = $c->stash('commit_id');
+
+ # List all commits
+ unless ($commit_id) {
+ ...
+ };
+
+ # List information on one special commit
+ ...
+};
+
# The search API first searches for matches, then retrieves information
# per match identical to the match API
sub search {
diff --git a/lib/Krawfish/Corpus/Any.pm b/lib/Krawfish/Corpus/Any.pm
index 2d2c010..e25e6e3 100644
--- a/lib/Krawfish/Corpus/Any.pm
+++ b/lib/Krawfish/Corpus/Any.pm
@@ -7,7 +7,7 @@
use strict;
use warnings;
-use constant DEBUG => 1;
+use constant DEBUG => 0;
# Construct query on live documents
diff --git a/lib/Krawfish/Index.pm b/lib/Krawfish/Index.pm
index 2ebfa92..f1e047c 100644
--- a/lib/Krawfish/Index.pm
+++ b/lib/Krawfish/Index.pm
@@ -72,6 +72,9 @@
# TODO:
# Maybe 65.535 documents are enough per segment ...
+# TODO:
+# Commits need to be logged and per commit, information
+# regarding newly added documents need to be accessible.
# Construct a new index object
sub new {
diff --git a/lib/Krawfish/Index/Dictionary.pm b/lib/Krawfish/Index/Dictionary.pm
index d04905c..e2f5d3e 100644
--- a/lib/Krawfish/Index/Dictionary.pm
+++ b/lib/Krawfish/Index/Dictionary.pm
@@ -142,7 +142,7 @@
# requested, for example, by the term_id API for co-occurrence search.
# That's why all subterms need to be stored as well.
-use constant DEBUG => 1;
+use constant DEBUG => 0;
sub new {
my $class = shift;
diff --git a/lib/Krawfish/Index/Segment.pm b/lib/Krawfish/Index/Segment.pm
index 681841a..6488965 100644
--- a/lib/Krawfish/Index/Segment.pm
+++ b/lib/Krawfish/Index/Segment.pm
@@ -32,7 +32,7 @@
# ([leaf-backref][prefix-rank][suffix-rank])*
-use constant DEBUG => 1;
+use constant DEBUG => 0;
sub new {
my $class = shift;
diff --git a/lib/Krawfish/Posting/Aggregate/Fields.pm b/lib/Krawfish/Posting/Aggregate/Fields.pm
index 6a073f9..82cd32b 100644
--- a/lib/Krawfish/Posting/Aggregate/Fields.pm
+++ b/lib/Krawfish/Posting/Aggregate/Fields.pm
@@ -1,10 +1,21 @@
package Krawfish::Posting::Aggregate::Fields;
+use Krawfish::Log;
use strict;
use warnings;
+# This remembers facets for multiple classes,
+# both using ids and terms
+
+
# TODO:
# This should be part of Koral::Result!
+# TODO:
+# It may be beneficial to deal with Koral::Type here,
+# so inflate() would be an action directly done in Koral::Type
+
+use constant DEBUG => 1;
+
sub new {
my $class = shift;
bless {
@@ -14,6 +25,8 @@
}, $class;
};
+
+# Increment the field frequency for each field in the current doc
sub incr_doc {
my ($self, $key_id, $field_id) = @_;
@@ -26,16 +39,29 @@
# Increase doc frequency for the key
$field_freq->[0]++;
- # Remember
+ if (DEBUG) {
+ print_log('p_a_facets', 'Increment doc frequency for ' . $key_id . ':' . $field_id);
+ };
+
+
+ # Remember the frequency
+ # The problem here is, that they are only loosely coupled to the field
+ # frequency of the field. This may be problematic
push @{$self->{cache}}, $field_freq;
};
+# Increment the field frequency for each field per match
sub incr_match {
$_[0]->{freq}++;
+
+ if (DEBUG) {
+ print_log('p_a_facets', 'Increment match frequency');
+ };
};
+# Flush all frequency information remembered
sub flush {
my $self = shift;
@@ -46,11 +72,16 @@
$self->{cache} = [];
$self->{freq} = 0;
+
+ if (DEBUG) {
+ print_log('p_a_facets', 'Flush field frequency for all remembered frequencies');
+ };
};
};
-sub to_terms {
+# Translate this to terms
+sub inflate {
my ($self, $dict) = @_;
# Get fields
@@ -59,14 +90,21 @@
# Iterate over field identifier
foreach my $field_id (keys %$fields) {
+
+ # Request the term from the dictionary
my $field_term = $dict->term_by_term_id($field_id);
+ # Remove the term marker
+ # TODO:
+ # this may be a direct feature of the dictionary instead
$field_term =~ s/^!//;
my $aggr = ($fields{$field_term} //= {});
# Get facets for field
my $values = $fields->{$field_id};
foreach my $value (keys %$values) {
+
+ # Get the
my $facet = $dict->term_by_term_id($value);
$facet =~ s/^\+$field_term://;
diff --git a/lib/Krawfish/Query/Filter.pm b/lib/Krawfish/Query/Filter.pm
index 8c00f11..2f572dc 100644
--- a/lib/Krawfish/Query/Filter.pm
+++ b/lib/Krawfish/Query/Filter.pm
@@ -4,7 +4,7 @@
use strict;
use warnings;
-use constant DEBUG => 1;
+use constant DEBUG => 0;
# Filters a term to check, if it is
# in a supported document
diff --git a/lib/Krawfish/Result/Segment/Aggregate/Facets.pm b/lib/Krawfish/Result/Segment/Aggregate/Facets.pm
index 58b711a..c011e95 100644
--- a/lib/Krawfish/Result/Segment/Aggregate/Facets.pm
+++ b/lib/Krawfish/Result/Segment/Aggregate/Facets.pm
@@ -59,7 +59,7 @@
my $self = shift;
- print_log('aggr_facets', 'Load fields') if DEBUG;
+ print_log('aggr_facets', 'Create pointer on fields') if DEBUG;
# Load the ranked list - may be too large for memory!
$self->{field_pointer} = $self->{field_obj}->pointer;
@@ -74,17 +74,19 @@
print_log('aggr_facets', 'Aggregate on fields') if DEBUG;
-
my $doc_id = $current->doc_id;
my $pointer = $self->{field_pointer};
# Set match frequencies to all remembered doc frequencies
my $aggr = $self->{aggregation};
- $aggr->flush;
# Skip to document in question
- if ($pointer->skip_doc($doc_id)) {
+ # TODO:
+ # skip_doc should ALWAYS return either the document or NOMOREDOC!
+ if ($pointer->skip_doc($doc_id) != -1) {
+
+ $aggr->flush;
my $coll = $self->{collection};
@@ -92,8 +94,8 @@
my @fields;
if (DEBUG) {
- print_log('aggr_facets', 'Look for frequencies for ' .
- join(',', @{$self->{field_keys}}));
+ print_log('aggr_facets', 'Look for frequencies for key ids ' .
+ join(',', @{$self->{field_keys}}) . " in $doc_id");
};
# Iterate over all fields