blob: 3eb3d74dee2594fe6c9aee4f950456694a59301f [file] [log] [blame]
Akron71fc14c2016-10-31 23:44:43 +01001package Krawfish::Index::TokensList;
2use strict;
3use warnings;
4
Akron555de3b2017-01-17 00:27:29 +01005use constant DEBUG => 0;
Akron18ff5922017-01-13 10:09:45 +01006
Akron555de3b2017-01-17 00:27:29 +01007# This is a special PostingsList to store the length of tokens
8# in segments
Akron71fc14c2016-10-31 23:44:43 +01009#
10# It may also be used for extensions and distances with tokens
11# (instead of segments)
12#
Akron555de3b2017-01-17 00:27:29 +010013# That's why this postingslist has a special API for extensions
14# and word distances.
Akron71fc14c2016-10-31 23:44:43 +010015#
Akron555de3b2017-01-17 00:27:29 +010016# Structure may be: ([docid-delta]([seg-pos-delta][length-varbit])*)*
Akron71fc14c2016-10-31 23:44:43 +010017#
Akron555de3b2017-01-17 00:27:29 +010018# The problem is, this won't make it possible to go back and forth.
Akron71fc14c2016-10-31 23:44:43 +010019
20sub new {
21 my $class = shift;
22 bless {
23 array => [],
24 pos => -1,
Akron555de3b2017-01-17 00:27:29 +010025 index_file => shift,
Akron71fc14c2016-10-31 23:44:43 +010026 foundry => shift
27 }, $class;
28}
29
30sub append {
31 my $self = shift;
32 my ($token, $doc_id, $pos, $end) = @_;
Akron6d9341b2016-11-16 16:59:01 +010033 print_log('toklist', "Appended $token with $doc_id, $pos" . ($end ? "-$end" : '')) if DEBUG;
Akron71fc14c2016-10-31 23:44:43 +010034 push(@{$self->{array}}, [$doc_id, $pos, $end]);
35};
36
37sub next;
38
39sub pos {
40 return $_[0]->{pos};
41};
42
43sub token {
44 return $_[0]->{array}->[$_[0]->pos];
Akron555de3b2017-01-17 00:27:29 +010045};
46
47
48sub freq;
Akron71fc14c2016-10-31 23:44:43 +010049
50sub skip_to_doc;
51
52sub skip_to_pos;
53
Akron555de3b2017-01-17 00:27:29 +010054
55# Get an array of start positions that are in the range of min/max
56# Start with the lowest
57sub extend_to_left {
58 my ($self, $start, $min, $max) = @_;
59 # Returns an array of start positions
60 ...
61};
62
63# Get an array of end positions that are in the range of min/max
64# Start with the lowest
65sub extend_to_right {
66 my ($self, $end, $min, $max) = @_;
67 # Returns an array of end positions
68 ...
69};
70
71# Check if the number of tokens between end and start
72# is in the given range.
73#
74# This is necessary for token distance
75# a []{2,3} b
76sub check_tokens_between {
77 my ($self, $end, $start, $min, $max) = @_;
78
79 # First check if this is even possible based on segments
80 # then check on tokens
81 ...
82}
83
84
Akron71fc14c2016-10-31 23:44:43 +0100851;
Akron555de3b2017-01-17 00:27:29 +010086
87__END__