blob: acc5519a518deb96da49a9baed5092cb36e833d4 [file] [log] [blame]
Akron73ca2452016-11-20 17:09:39 +01001package Krawfish::Index::PostingPointer;
Akron1f3feac2017-05-05 17:05:45 +02002use parent 'Krawfish::Query';
3use Krawfish::Log;
Akron448bca92017-05-06 18:01:05 +02004use Krawfish::Posting::Data;
Akron73ca2452016-11-20 17:09:39 +01005use Krawfish::Posting;
Akronc40598b2017-08-07 12:13:34 +02006use Scalar::Util qw/refaddr/;
Akron73ca2452016-11-20 17:09:39 +01007use strict;
8use warnings;
9
Akron1f3feac2017-05-05 17:05:45 +020010use constant {
Akronc40598b2017-08-07 12:13:34 +020011 DEBUG => 1,
Akrondd024992017-05-07 13:02:06 +020012 DOC_ID => 0
Akron1f3feac2017-05-05 17:05:45 +020013};
14
15# TODO: Implement skipping efficiently!!!
16# TODO: Implement next_doc efficiently!!!
17# TODO: Implement freq_in_doc efficiently!!!
Akrondd024992017-05-07 13:02:06 +020018# TODO: Add direct access to doc_id!
Akron1f3feac2017-05-05 17:05:45 +020019
Akron91b0e472016-12-05 17:07:50 +010020# TODO: Use Stream::Finger instead of PostingPointer
21
Akron73ca2452016-11-20 17:09:39 +010022# Points to a position in a postings list
23
24# TODO: Return different posting types
Akron349747d2016-12-05 11:05:53 +010025# Using current
Akron73ca2452016-11-20 17:09:39 +010026
27sub new {
28 my $class = shift;
Akron1f3feac2017-05-05 17:05:45 +020029 bless {
Akron73ca2452016-11-20 17:09:39 +010030 list => shift,
31 pos => -1
32 }, $class;
Akron73ca2452016-11-20 17:09:39 +010033};
34
35sub freq {
Akron1f3feac2017-05-05 17:05:45 +020036 $_[0]->{list}->freq;
Akron73ca2452016-11-20 17:09:39 +010037};
38
Akron1f3feac2017-05-05 17:05:45 +020039
40# Get the term from the list
Akron73ca2452016-11-20 17:09:39 +010041sub term {
Akron349747d2016-12-05 11:05:53 +010042 $_[0]->{list}->term;
Akron73ca2452016-11-20 17:09:39 +010043};
44
Akron1f3feac2017-05-05 17:05:45 +020045
Akron6e13a062017-01-13 11:55:28 +010046sub term_id {
Akron1f3feac2017-05-05 17:05:45 +020047 $_[0]->{list}->term_id;
Akron6e13a062017-01-13 11:55:28 +010048};
49
Akron1f3feac2017-05-05 17:05:45 +020050
51# Forward position
Akron73ca2452016-11-20 17:09:39 +010052sub next {
53 my $self = shift;
54 my $pos = $self->{pos}++;
55 return ($pos + 1) < $self->freq ? 1 : 0;
56};
57
Akron854726b2016-12-05 14:39:07 +010058
Akron1f3feac2017-05-05 17:05:45 +020059# Get the frequency of the term in the document
60# This is just a temporary implementation
61sub freq_in_doc {
62 my $self = shift;
63
Akronc40598b2017-08-07 12:13:34 +020064 print_log('ppointer', refaddr($self) .
65 ': TEMP SLOW Get the frequency of the term in the doc') if DEBUG;
Akron1f3feac2017-05-05 17:05:45 +020066
67 # This is the doc_id
Akron448bca92017-05-06 18:01:05 +020068 my $current_doc_id = $self->current->doc_id;
Akron1f3feac2017-05-05 17:05:45 +020069 my $pos = $self->{pos};
70 my $freq = 0;
71 my $all_freq = $self->freq;
72
Akron448bca92017-05-06 18:01:05 +020073
Akron1f3feac2017-05-05 17:05:45 +020074 # Move to the start of the document
Akron448bca92017-05-06 18:01:05 +020075 while ($pos > 0 && ($self->{list}->at($pos-1)->[DOC_ID] == $current_doc_id)) {
Akron1f3feac2017-05-05 17:05:45 +020076 $pos--;
77 };
78
79 # Move to the end of the document
Akron448bca92017-05-06 18:01:05 +020080 while ($pos < $self->freq && ($self->{list}->at($pos++)->[DOC_ID] == $current_doc_id)) {
Akron1f3feac2017-05-05 17:05:45 +020081 $freq++;
82 };
83
84 # Return the frequency
85 return $freq;
86};
87
Akron854726b2016-12-05 14:39:07 +010088
Akron73ca2452016-11-20 17:09:39 +010089sub pos {
90 return $_[0]->{pos};
91};
92
Akron1f3feac2017-05-05 17:05:45 +020093
94# This does NOT return a posting, so it may be called differently
95# This is called by different term types - so this could be named current_data
Akron73ca2452016-11-20 17:09:39 +010096sub current {
97 my $self = shift;
Akron448bca92017-05-06 18:01:05 +020098
99 my $data = $self->{list}->at($self->pos) or return;
100
101 Krawfish::Posting::Data->new(
102 $data
103 );
Akron73ca2452016-11-20 17:09:39 +0100104};
105
Akron73ca2452016-11-20 17:09:39 +0100106
Akron349747d2016-12-05 11:05:53 +0100107sub close {
108 ...
109};
110
Akron73ca2452016-11-20 17:09:39 +0100111
Akron1563b0c2017-08-10 19:58:04 +0200112#sub list {
113# return $_[0]->{list};
114#};
Akron73ca2452016-11-20 17:09:39 +0100115
116
Akronc4bf5fb2017-07-18 02:20:40 +0200117# Skip to a certain document, return the current
118# doc_id
Akron1f3feac2017-05-05 17:05:45 +0200119sub skip_doc {
120 my ($self, $doc_id) = @_;
121
Akronc40598b2017-08-07 12:13:34 +0200122 print_log('ppointer', refaddr($self) . ': TEMP SLOW Skip to chosen document') if DEBUG;
Akron1f3feac2017-05-05 17:05:45 +0200123
Akron448bca92017-05-06 18:01:05 +0200124 while (!$self->current || $self->current->doc_id < $doc_id) {
Akron1f3feac2017-05-05 17:05:45 +0200125 $self->next or return;
126 };
Akronc4bf5fb2017-07-18 02:20:40 +0200127 return $self->current->doc_id;
Akron1f3feac2017-05-05 17:05:45 +0200128};
129
Akron576ebfc2017-08-06 22:50:15 +0200130
131sub skip_pos {
132 my ($self, $pos) = @_;
Akronc40598b2017-08-07 12:13:34 +0200133 print_log('ppointer', refaddr($self) . ': TEMP SLOW Skip to chosen position or after')
134 if DEBUG;
Akron576ebfc2017-08-06 22:50:15 +0200135
136 unless ($self->current) {
137 $self->next or return;
138 };
139
140 my $current = $self->current;
141 my $start_doc_id = $current->doc_id;
142
143 while ($start_doc_id == $current->doc_id && $current->start <= $pos) {
144 $self->next or return;
145 $current = $self->current;
146 };
147
148 return $current->start;
149};
150
Akron73ca2452016-11-20 17:09:39 +01001511;