blob: 94be22349a48b313e41b5fbf415fe43de987b816 [file] [log] [blame]
package Krawfish::Index::PostingPointer;
use parent 'Krawfish::Query';
use Krawfish::Log;
use Krawfish::Posting::Data;
use Krawfish::Util::Constants qw/NOMOREDOCS/;
use Krawfish::Posting;
use Scalar::Util qw/refaddr/;
use strict;
use warnings;
# Moving pointer in a posting list.
use constant {
DEBUG => 0,
DOC_ID => 0
};
# TODO:
# Implement skipping efficiently!!!
# TODO:
# Implement next_doc efficiently!!!
# TODO:
# Implement freq_in_doc efficiently!!!
# TODO:
# Add direct access to doc_id!
# TODO:
# Use Stream::Finger instead of PostingPointer
# TODO:
# Return different posting types using current
# Constructor
sub new {
my $class = shift;
bless {
list => shift,
pos => -1
}, $class;
};
# Get frequency of the list
# (probably copy value when posting pointer is lifted)
sub freq {
$_[0]->{list}->freq;
};
# Get the term id
# (probably copy value when posting pointer is lifted)
sub term_id {
$_[0]->{list}->term_id;
};
# Move to next posting
sub next {
my $self = shift;
my $pos = $self->{pos}++;
return ($pos + 1) < $self->freq ? 1 : 0;
};
# Get the frequency of the term in the document
# This is just a temporary implementation
sub freq_in_doc {
my $self = shift;
print_log('ppointer', refaddr($self) .
': TEMP SLOW Get the frequency of the term in the doc') if DEBUG;
# This is the doc_id
my $current_doc_id = $self->current->doc_id;
my $pos = $self->{pos};
my $freq = 0;
my $all_freq = $self->freq;
# Move to the start of the document
while ($pos > 0 && ($self->{list}->at($pos-1)->[DOC_ID] == $current_doc_id)) {
$pos--;
};
# Move to the end of the document
while ($pos < $self->freq && ($self->{list}->at($pos++)->[DOC_ID] == $current_doc_id)) {
$freq++;
};
# Return the frequency
return $freq;
};
# Get the current position in the list
sub pos {
return $_[0]->{pos};
};
# This does NOT return a posting, so it may be called differently
# This is called by different term types - so this could be named current_data
sub current {
my $self = shift;
my $data = $self->{list}->at($self->pos) or return;
Krawfish::Posting::Data->new(
$data
);
};
# Potentially close pointer
sub close {
...
};
# Skip to a certain document,
# return the new doc_id
sub skip_doc {
my ($self, $target_doc_id) = @_;
if (DEBUG) {
print_log('ppointer', refaddr($self) . ': TEMP SLOW Skip to chosen document');
};
while (!$self->current || $self->current->doc_id < $target_doc_id) {
$self->next or return NOMOREDOCS;
};
return $self->current->doc_id;
};
# Skip to a certain position in the list
sub skip_pos {
my ($self, $target_pos) = @_;
if (DEBUG) {
print_log('ppointer', refaddr($self) . ': TEMP SLOW Skip to chosen position or after');
};
unless ($self->current) {
$self->next or return;
};
my $current = $self->current;
my $start_doc_id = $current->doc_id;
while ($start_doc_id == $current->doc_id && $current->start <= $target_pos) {
$self->next or return;
$current = $self->current;
};
return $current->start;
};
1;