Added facet search functionality
diff --git a/lib/Krawfish/Index/Dictionary.pm b/lib/Krawfish/Index/Dictionary.pm
index 45e4e6a..f663d7a 100644
--- a/lib/Krawfish/Index/Dictionary.pm
+++ b/lib/Krawfish/Index/Dictionary.pm
@@ -6,7 +6,7 @@
# TODO: Use Storable
-use constant DEBUG => 1;
+use constant DEBUG => 0;
sub new {
my $class = shift;
diff --git a/lib/Krawfish/Index/Fields.pm b/lib/Krawfish/Index/Fields.pm
index 4cc37e3..6a8fa03 100644
--- a/lib/Krawfish/Index/Fields.pm
+++ b/lib/Krawfish/Index/Fields.pm
@@ -1,7 +1,10 @@
package Krawfish::Index::Fields;
+use Krawfish::Log;
use strict;
use warnings;
+use constant DEBUG => 0;
+
sub new {
my $class = shift;
bless {
@@ -20,6 +23,11 @@
# Preset fields with doc_id
my $fields = ($self->{array}->[$doc_id] //= {});
+ print_log(
+ 'fields',
+ 'Store field ' . $key . ':' . $value . ' for ' . $doc_id
+ ) if DEBUG;
+
# TODO:
# This needs to have information whether it's a string
# or an integer (mainly for sorting)
@@ -30,7 +38,18 @@
my $self = shift;
my $doc_id = shift;
my $doc = $self->{array}->[$doc_id];
- return $doc->{$_[0]} if @_;
+
+ # Get specific field
+ if (@_) {
+ print_log(
+ 'fields',
+ 'Get field ' . $_[0] . ' for ' . $doc_id
+ ) if DEBUG;
+
+ return $doc->{$_[0]} ;
+ };
+
+ # Get all fields
return $doc;
};
@@ -62,6 +81,11 @@
sub docs_ranked {
my ($self, $field) = @_;
+ print_log(
+ 'fields',
+ 'Get rank vector for ' . $field
+ ) if DEBUG;
+
# TODO:
# Currently ranks are set absolutely - but they should be set
# multiple times to make sorts for multiple fields
@@ -69,8 +93,11 @@
# TODO: Check if the field needs to be sorted
# numerically or based on a collation
- # Lookup at disk
- return @{$self->{ranks}->{$field}} if $self->{ranks}->{$field};
+ if ($self->{ranks}->{$field}) {
+
+ # Lookup at disk
+ return @{$self->{ranks}->{$field}};
+ };
# TODO:
# $max_rank is important, because it indicates
@@ -84,6 +111,13 @@
# Store ranks for the future
$self->{ranks}->{$field} = [$max_rank, $ranked];
+ if (DEBUG) {
+ print_log(
+ 'fields',
+ 'Return rank vector for ' . $field . ' with ' . join(',', @$ranked)
+ );
+ };
+
# Return ranked list
return @{$self->{ranks}->{$field}};
};
diff --git a/lib/Krawfish/Index/PostingsList.pm b/lib/Krawfish/Index/PostingsList.pm
index 55f7f93..7d83684 100644
--- a/lib/Krawfish/Index/PostingsList.pm
+++ b/lib/Krawfish/Index/PostingsList.pm
@@ -3,7 +3,7 @@
use Krawfish::Log;
use strict;
use warnings;
-use constant DEBUG => 1;
+use constant DEBUG => 0;
# TODO: Use different PostingsList for different term types
#
diff --git a/lib/Krawfish/Search/FieldFacets.pm b/lib/Krawfish/Search/FieldFacets.pm
index 0edae49..9f4a3af 100644
--- a/lib/Krawfish/Search/FieldFacets.pm
+++ b/lib/Krawfish/Search/FieldFacets.pm
@@ -1,7 +1,10 @@
package Krawfish::Search::FieldFacets;
+use Krawfish::Log;
use strict;
use warnings;
+use constant DEBUG => 0;
+
# This search construct will collect frequencies for fields
# in buckets to make facet search possible
@@ -27,8 +30,9 @@
# Preload ranks
foreach (@{$self->{facet_fields}}) {
- # These may already be loaded in memory (for facets or sorting)
+ print_log('facet', 'Preload field_rank for ' . $_) if DEBUG;
+ # These may already be loaded in memory (for facets or sorting)
(my $max_rank, $self->{ranks}->{$_}) = $fields->docs_ranked($_);
};
@@ -42,12 +46,14 @@
my $field;
+ # Next query
if ($self->{query}->next) {
my $current = $self->{query}->current;
-
my $doc_id = $current->doc_id;
my $last_doc_id = $self->{doc_id};
+ print_log('facet', "Get facet info for $doc_id") if DEBUG;
+
# Iterate over all fields and collect ranks
foreach $field (@{$self->{facet_fields}}) {
@@ -55,12 +61,15 @@
my $rank = $self->{ranks}->{$field}->[$doc_id];
# The rank may be ordered ordinally or lexicographic
+ print_log('facet', " '$field' has rank $rank") if DEBUG;
+
# Field exists for document
if ($rank != 0) {
- # Get the field bucket from memor
- my $bucket = $self->{buckets}->{$field};
- $bucket //= [];
+ # Get the field bucket from memory
+ my $bucket = ($self->{buckets}->{$field} //= []);
+
+ print_log('facet', ' bucket is initialized') if DEBUG;
# This will contain 'doc_freq', 'freq', and an example 'doc_id'
my $freq_bucket = $bucket->[$rank] //= [0, 0, $doc_id];
@@ -74,6 +83,11 @@
# Increment occurrence frequency
$freq_bucket->[1]++;
+ print_log(
+ 'facet',
+ " '$field' has frequencies " .
+ $freq_bucket->[0] . '/' . $freq_bucket->[1]
+ ) if DEBUG;
};
};
@@ -103,7 +117,9 @@
my $fields = $self->{index}->fields;
# Iterate over all ranked buckets of the field
- foreach my $rank (@$bucket) {
+ foreach my $rank (grep { defined $_ } @$bucket) {
+
+ print_log('facet', "Get rank $rank for $field") if DEBUG;
# Get information from rank
my ($doc_freq, $freq, $example_doc_id) = @$rank;