blob: 699efe2616bd9615da0f2cba891d125d065de5c0 [file] [log] [blame]
Akrona604ddd2016-11-24 12:29:59 +01001package Krawfish::Corpus;
Akron71fc0ec2017-11-02 17:34:21 +01002use strict;
3use warnings;
Akron21143fe2017-11-06 19:54:30 +01004use Krawfish::Util::Constants qw/NOMOREDOCS/;
Akronec351652017-11-01 16:04:38 +01005use Role::Tiny;
Akron71fc0ec2017-11-02 17:34:21 +01006use Krawfish::Log;
7
Akronec351652017-11-01 16:04:38 +01008requires qw/current
9 next
10 next_doc
11 skip_doc
12 same_doc
13 clone
14 max_freq
15 to_string
16 /;
Akrona604ddd2016-11-24 12:29:59 +010017
Akrona588d072017-10-13 14:45:34 +020018# Krawfish::Corpus is the base class for all corpus queries.
19
Akronec351652017-11-01 16:04:38 +010020use constant DEBUG => 0;
21
Akrona604ddd2016-11-24 12:29:59 +010022# Current span object
23sub current {
24 my $self = shift;
25 return unless defined $self->{doc_id};
Akrone1a8a1b2017-10-20 16:51:09 +020026 return Krawfish::Posting->new(
Akron9248c332017-10-23 20:06:31 +020027 doc_id => $self->{doc_id},
28 flags => $self->{flags}
Akrona604ddd2016-11-24 12:29:59 +010029 );
30};
31
32
Akrona588d072017-10-13 14:45:34 +020033# Overwrite query object
34sub next_doc {
35 return $_[0]->next;
36};
37
38
Akronec351652017-11-01 16:04:38 +010039# Overwrite
40# Skip to (or beyond) a certain doc id.
41# This should be overwritten to more effective methods.
42sub skip_doc {
43 my ($self, $target_doc_id) = @_;
44
45 print_log('corpus', refaddr($self) . ': skip to doc id ' . $target_doc_id) if DEBUG;
46
47 while (!$self->current || $self->current->doc_id < $target_doc_id) {
Akron21143fe2017-11-06 19:54:30 +010048 $self->next_doc or return NOMOREDOCS;
Akronec351652017-11-01 16:04:38 +010049 };
50
Akronec351652017-11-01 16:04:38 +010051 return $self->current->doc_id;
52};
53
54
55
56# Move both operands to the same document
57sub same_doc {
58 my ($self, $second) = @_;
59
60 my $first_c = $self->current or return;
61 my $second_c = $second->current or return;
62
63 # Iterate to the first matching document
64 while ($first_c->doc_id != $second_c->doc_id) {
65 print_log('corpus', 'Current span is not in docs') if DEBUG;
66
67 # Forward the first span to advance to the document of the second span
68 if ($first_c->doc_id < $second_c->doc_id) {
69 print_log('corpus', 'Forward first') if DEBUG;
Akron21143fe2017-11-06 19:54:30 +010070 if ($self->skip_doc($second_c->doc_id) == NOMOREDOCS) {
71 return;
72 };
Akronec351652017-11-01 16:04:38 +010073 $first_c = $self->current;
74 }
75
76 # Forward the second span to advance to the document of the first span
77 else {
78 print_log('corpus', 'Forward second') if DEBUG;
Akron21143fe2017-11-06 19:54:30 +010079 if ($second->skip_doc($first_c->doc_id) == NOMOREDOCS) {
80 return;
81 };
Akronec351652017-11-01 16:04:38 +010082 $second_c = $second->current;
83 };
84 };
85
86 return 1;
87};
88
89
90# Per default every operation is complex
91sub complex {
92 return 1;
93};
94
95
Akron255a8502017-11-12 20:07:34 +010096# Stop compilation of results in non-compile queries
97sub compile {
98 1;
99};
100
101
102
Akrona604ddd2016-11-24 12:29:59 +01001031;