blob: 15eb364c4590cf96c88672520daa12b76e9408b0 [file] [log] [blame]
package Krawfish::Result::Group::Fields;
use Krawfish::Log;
use strict;
use warnings;
use constant DEBUG => 0;
# This will group matches (especially document matches) by field
# This is useful e.g. for document browsing per corpus.
#
# Because the grouping is based on ranking, the sorting will be trivial.
sub new {
my $class = shift;
bless {
index => shift,
fields => shift,
groups => {},
ranks => undef,
# Store all example docs per field position at rank position
# in a hash [[example_doc_nr,example_doc_nr,...], [...], ...]
example_docs => undef
}, $class;
};
# Initialize group fetching
sub _init {
return if $_[0]->{ranks};
my $self = shift;
print_log('group_fields', 'Get ranks for fields') if DEBUG;
# Get fields object
my $fields = $self->{index}->fields;
# Lift ranks for each relevant field
# (may already be liftet for another job ...)
# and initialize example docs
my $ranks = ($self->{ranks} = []);
my $example_docs = ($self->{example_docs} = []);
my $i = 0;
my @fields = ();
foreach my $field (@{$self->{fields}}) {
print_log('group_fields', "Lift the ranks for '$field'") if DEBUG;
# Fetch rank
if (my $rankings = $fields->ranked_by($field)) {
push @$ranks, $rankings;
$self->{example_docs}->[$i] = [];
push @fields, $field;
};
$i++;
};
# In case they were no-ranked fields requested, the field request needs to be rewritten.
# WARNING: This needs to be notified to the user somehow ...
$self->{fields} = \@fields;
};
# Get the group signature for each match
# May well be renamed to "get_signature"
sub get_group {
my $self = shift;
$self->_init;
my $current = shift;
my $doc_id = $current->doc_id;
# Create a string with all necessary field information
my @group = ();
my $i = 0;
my $example_docs = $self->{example_docs};
# Iterate over all rankings
foreach my $rankings (@{$self->{ranks}}) {
# Get the rank of the match
my $rank = $rankings->get($doc_id);
# Store example document to later retrieve surface field
$example_docs->[$i++]->[$rank] //= $doc_id;
# push rank to signature
push @group, $rank;
};
# Create signature string
return join('___', @group);
};
# return group info as hash
sub to_hash {
my ($self, $signature, $doc_freq, $freq) = @_;
# Get field titles
my $fields = $self->{fields};
my $fields_obj = $self->{index}->fields;
my $example_docs = $self->{example_docs};
# Get field values
my @ranks = split('___', $signature);
# Store frequency information
my %hash = (
doc_freq => $doc_freq
);
$hash{freq} = $freq if defined $freq;
print_log('group_field', "Create hash for $signature") if DEBUG;
# Iterate over all ranks in the signature
# - this will be identical to the number of fields requested
for (my $i = 0; $i < scalar @ranks; $i++) {
# Get rankings
my $rank = $ranks[$i];
my $doc_id = $example_docs->[$i]->[$rank];
print_log('group_field', "Example doc is $doc_id") if DEBUG;
# Get field title
my $field_title = $fields_obj->get(
$example_docs->[$i]->[$rank],
$fields->[$i]
);
# Set field title and value
$hash{$fields->[$i]} = $field_title;
};
return \%hash;
};
sub to_string {
my $str = 'fields';
$str .= '[' . join(',', @{$_[0]->{fields}}) . ']';
return $str;
};
1;