| Akron | 73ca245 | 2016-11-20 17:09:39 +0100 | [diff] [blame] | 1 | package Krawfish::Index::PostingPointer; |
| Akron | 1f3feac | 2017-05-05 17:05:45 +0200 | [diff] [blame] | 2 | use parent 'Krawfish::Query'; |
| 3 | use Krawfish::Log; |
| Akron | 448bca9 | 2017-05-06 18:01:05 +0200 | [diff] [blame] | 4 | use Krawfish::Posting::Data; |
| Akron | 73ca245 | 2016-11-20 17:09:39 +0100 | [diff] [blame] | 5 | use Krawfish::Posting; |
| Akron | c40598b | 2017-08-07 12:13:34 +0200 | [diff] [blame] | 6 | use Scalar::Util qw/refaddr/; |
| Akron | 73ca245 | 2016-11-20 17:09:39 +0100 | [diff] [blame] | 7 | use strict; |
| 8 | use warnings; |
| 9 | |
| Akron | 1f3feac | 2017-05-05 17:05:45 +0200 | [diff] [blame] | 10 | use constant { |
| Akron | c40598b | 2017-08-07 12:13:34 +0200 | [diff] [blame] | 11 | DEBUG => 1, |
| Akron | dd02499 | 2017-05-07 13:02:06 +0200 | [diff] [blame] | 12 | DOC_ID => 0 |
| Akron | 1f3feac | 2017-05-05 17:05:45 +0200 | [diff] [blame] | 13 | }; |
| 14 | |
| 15 | # TODO: Implement skipping efficiently!!! |
| 16 | # TODO: Implement next_doc efficiently!!! |
| 17 | # TODO: Implement freq_in_doc efficiently!!! |
| Akron | dd02499 | 2017-05-07 13:02:06 +0200 | [diff] [blame] | 18 | # TODO: Add direct access to doc_id! |
| Akron | 1f3feac | 2017-05-05 17:05:45 +0200 | [diff] [blame] | 19 | |
| Akron | 91b0e47 | 2016-12-05 17:07:50 +0100 | [diff] [blame] | 20 | # TODO: Use Stream::Finger instead of PostingPointer |
| 21 | |
| Akron | 73ca245 | 2016-11-20 17:09:39 +0100 | [diff] [blame] | 22 | # Points to a position in a postings list |
| 23 | |
| 24 | # TODO: Return different posting types |
| Akron | 349747d | 2016-12-05 11:05:53 +0100 | [diff] [blame] | 25 | # Using current |
| Akron | 73ca245 | 2016-11-20 17:09:39 +0100 | [diff] [blame] | 26 | |
| 27 | sub new { |
| 28 | my $class = shift; |
| Akron | 1f3feac | 2017-05-05 17:05:45 +0200 | [diff] [blame] | 29 | bless { |
| Akron | 73ca245 | 2016-11-20 17:09:39 +0100 | [diff] [blame] | 30 | list => shift, |
| 31 | pos => -1 |
| 32 | }, $class; |
| Akron | 73ca245 | 2016-11-20 17:09:39 +0100 | [diff] [blame] | 33 | }; |
| 34 | |
| 35 | sub freq { |
| Akron | 1f3feac | 2017-05-05 17:05:45 +0200 | [diff] [blame] | 36 | $_[0]->{list}->freq; |
| Akron | 73ca245 | 2016-11-20 17:09:39 +0100 | [diff] [blame] | 37 | }; |
| 38 | |
| Akron | 1f3feac | 2017-05-05 17:05:45 +0200 | [diff] [blame] | 39 | |
| 40 | # Get the term from the list |
| Akron | 73ca245 | 2016-11-20 17:09:39 +0100 | [diff] [blame] | 41 | sub term { |
| Akron | 349747d | 2016-12-05 11:05:53 +0100 | [diff] [blame] | 42 | $_[0]->{list}->term; |
| Akron | 73ca245 | 2016-11-20 17:09:39 +0100 | [diff] [blame] | 43 | }; |
| 44 | |
| Akron | 1f3feac | 2017-05-05 17:05:45 +0200 | [diff] [blame] | 45 | |
| Akron | 6e13a06 | 2017-01-13 11:55:28 +0100 | [diff] [blame] | 46 | sub term_id { |
| Akron | 1f3feac | 2017-05-05 17:05:45 +0200 | [diff] [blame] | 47 | $_[0]->{list}->term_id; |
| Akron | 6e13a06 | 2017-01-13 11:55:28 +0100 | [diff] [blame] | 48 | }; |
| 49 | |
| Akron | 1f3feac | 2017-05-05 17:05:45 +0200 | [diff] [blame] | 50 | |
| 51 | # Forward position |
| Akron | 73ca245 | 2016-11-20 17:09:39 +0100 | [diff] [blame] | 52 | sub next { |
| 53 | my $self = shift; |
| 54 | my $pos = $self->{pos}++; |
| 55 | return ($pos + 1) < $self->freq ? 1 : 0; |
| 56 | }; |
| 57 | |
| Akron | 854726b | 2016-12-05 14:39:07 +0100 | [diff] [blame] | 58 | |
| Akron | 1f3feac | 2017-05-05 17:05:45 +0200 | [diff] [blame] | 59 | # Get the frequency of the term in the document |
| 60 | # This is just a temporary implementation |
| 61 | sub freq_in_doc { |
| 62 | my $self = shift; |
| 63 | |
| Akron | c40598b | 2017-08-07 12:13:34 +0200 | [diff] [blame] | 64 | print_log('ppointer', refaddr($self) . |
| 65 | ': TEMP SLOW Get the frequency of the term in the doc') if DEBUG; |
| Akron | 1f3feac | 2017-05-05 17:05:45 +0200 | [diff] [blame] | 66 | |
| 67 | # This is the doc_id |
| Akron | 448bca9 | 2017-05-06 18:01:05 +0200 | [diff] [blame] | 68 | my $current_doc_id = $self->current->doc_id; |
| Akron | 1f3feac | 2017-05-05 17:05:45 +0200 | [diff] [blame] | 69 | my $pos = $self->{pos}; |
| 70 | my $freq = 0; |
| 71 | my $all_freq = $self->freq; |
| 72 | |
| Akron | 448bca9 | 2017-05-06 18:01:05 +0200 | [diff] [blame] | 73 | |
| Akron | 1f3feac | 2017-05-05 17:05:45 +0200 | [diff] [blame] | 74 | # Move to the start of the document |
| Akron | 448bca9 | 2017-05-06 18:01:05 +0200 | [diff] [blame] | 75 | while ($pos > 0 && ($self->{list}->at($pos-1)->[DOC_ID] == $current_doc_id)) { |
| Akron | 1f3feac | 2017-05-05 17:05:45 +0200 | [diff] [blame] | 76 | $pos--; |
| 77 | }; |
| 78 | |
| 79 | # Move to the end of the document |
| Akron | 448bca9 | 2017-05-06 18:01:05 +0200 | [diff] [blame] | 80 | while ($pos < $self->freq && ($self->{list}->at($pos++)->[DOC_ID] == $current_doc_id)) { |
| Akron | 1f3feac | 2017-05-05 17:05:45 +0200 | [diff] [blame] | 81 | $freq++; |
| 82 | }; |
| 83 | |
| 84 | # Return the frequency |
| 85 | return $freq; |
| 86 | }; |
| 87 | |
| Akron | 854726b | 2016-12-05 14:39:07 +0100 | [diff] [blame] | 88 | |
| Akron | 73ca245 | 2016-11-20 17:09:39 +0100 | [diff] [blame] | 89 | sub pos { |
| 90 | return $_[0]->{pos}; |
| 91 | }; |
| 92 | |
| Akron | 1f3feac | 2017-05-05 17:05:45 +0200 | [diff] [blame] | 93 | |
| 94 | # This does NOT return a posting, so it may be called differently |
| 95 | # This is called by different term types - so this could be named current_data |
| Akron | 73ca245 | 2016-11-20 17:09:39 +0100 | [diff] [blame] | 96 | sub current { |
| 97 | my $self = shift; |
| Akron | 448bca9 | 2017-05-06 18:01:05 +0200 | [diff] [blame] | 98 | |
| 99 | my $data = $self->{list}->at($self->pos) or return; |
| 100 | |
| 101 | Krawfish::Posting::Data->new( |
| 102 | $data |
| 103 | ); |
| Akron | 73ca245 | 2016-11-20 17:09:39 +0100 | [diff] [blame] | 104 | }; |
| 105 | |
| Akron | 73ca245 | 2016-11-20 17:09:39 +0100 | [diff] [blame] | 106 | |
| Akron | 349747d | 2016-12-05 11:05:53 +0100 | [diff] [blame] | 107 | sub close { |
| 108 | ... |
| 109 | }; |
| 110 | |
| Akron | 73ca245 | 2016-11-20 17:09:39 +0100 | [diff] [blame] | 111 | |
| Akron | 1563b0c | 2017-08-10 19:58:04 +0200 | [diff] [blame] | 112 | #sub list { |
| 113 | # return $_[0]->{list}; |
| 114 | #}; |
| Akron | 73ca245 | 2016-11-20 17:09:39 +0100 | [diff] [blame] | 115 | |
| 116 | |
| Akron | c4bf5fb | 2017-07-18 02:20:40 +0200 | [diff] [blame] | 117 | # Skip to a certain document, return the current |
| 118 | # doc_id |
| Akron | 1f3feac | 2017-05-05 17:05:45 +0200 | [diff] [blame] | 119 | sub skip_doc { |
| 120 | my ($self, $doc_id) = @_; |
| 121 | |
| Akron | c40598b | 2017-08-07 12:13:34 +0200 | [diff] [blame] | 122 | print_log('ppointer', refaddr($self) . ': TEMP SLOW Skip to chosen document') if DEBUG; |
| Akron | 1f3feac | 2017-05-05 17:05:45 +0200 | [diff] [blame] | 123 | |
| Akron | 448bca9 | 2017-05-06 18:01:05 +0200 | [diff] [blame] | 124 | while (!$self->current || $self->current->doc_id < $doc_id) { |
| Akron | 1f3feac | 2017-05-05 17:05:45 +0200 | [diff] [blame] | 125 | $self->next or return; |
| 126 | }; |
| Akron | c4bf5fb | 2017-07-18 02:20:40 +0200 | [diff] [blame] | 127 | return $self->current->doc_id; |
| Akron | 1f3feac | 2017-05-05 17:05:45 +0200 | [diff] [blame] | 128 | }; |
| 129 | |
| Akron | 576ebfc | 2017-08-06 22:50:15 +0200 | [diff] [blame] | 130 | |
| 131 | sub skip_pos { |
| 132 | my ($self, $pos) = @_; |
| Akron | c40598b | 2017-08-07 12:13:34 +0200 | [diff] [blame] | 133 | print_log('ppointer', refaddr($self) . ': TEMP SLOW Skip to chosen position or after') |
| 134 | if DEBUG; |
| Akron | 576ebfc | 2017-08-06 22:50:15 +0200 | [diff] [blame] | 135 | |
| 136 | unless ($self->current) { |
| 137 | $self->next or return; |
| 138 | }; |
| 139 | |
| 140 | my $current = $self->current; |
| 141 | my $start_doc_id = $current->doc_id; |
| 142 | |
| 143 | while ($start_doc_id == $current->doc_id && $current->start <= $pos) { |
| 144 | $self->next or return; |
| 145 | $current = $self->current; |
| 146 | }; |
| 147 | |
| 148 | return $current->start; |
| 149 | }; |
| 150 | |
| Akron | 73ca245 | 2016-11-20 17:09:39 +0100 | [diff] [blame] | 151 | 1; |