blob: 1c85c07878b758b9e9bc4dd0eb17141dfe69d612 [file] [log] [blame]
Akrond1f2e8b2016-11-15 22:11:53 +01001package Krawfish::Query;
Akron71fc0ec2017-11-02 17:34:21 +01002use strict;
3use warnings;
Akron7aed51c2017-10-31 16:23:49 +01004use Role::Tiny;
Akron71fc0ec2017-11-02 17:34:21 +01005use Krawfish::Log;
6use Krawfish::Posting::Span;
7use Scalar::Util qw/blessed refaddr/;
8
Akronec351652017-11-01 16:04:38 +01009with 'Krawfish::Corpus';
10requires qw/skip_pos
Akron7aed51c2017-10-31 16:23:49 +010011 filter_by
Akronec351652017-11-01 16:04:38 +010012 requires_filter/;
Akrond1f2e8b2016-11-15 22:11:53 +010013
Akrona588d072017-10-13 14:45:34 +020014
15# Krawfish::Query is the base class for all span queries.
16
Akrona5086582017-10-21 18:00:12 +020017# TODO:
18# Use a boolean init value to indicate a
19# query needs a next first
20
Akron94256e62017-10-10 17:29:18 +020021use constant DEBUG => 0;
Akron6ff7b482017-02-09 01:29:29 +010022
Akron7aed51c2017-10-31 16:23:49 +010023# Current span posting object
Akrond1f2e8b2016-11-15 22:11:53 +010024sub current {
25 my $self = shift;
26 return unless defined $self->{doc_id};
Akrone1a8a1b2017-10-20 16:51:09 +020027 return Krawfish::Posting::Span->new(
Akron93271d82016-11-24 09:18:41 +010028 doc_id => $self->{doc_id},
29 start => $self->{start},
30 end => $self->{end},
Akron6fc5b712017-10-24 14:48:39 +020031 payload => $self->{payload},
32 flags => $self->{flags}
Akrond1f2e8b2016-11-15 22:11:53 +010033 );
Akron7db79e22016-12-08 23:02:32 +010034
35 # TODO: May have an offset value as well
Akrond1f2e8b2016-11-15 22:11:53 +010036};
37
Akron0c998cc2017-07-19 03:29:37 +020038
Akron1f3feac2017-05-05 17:05:45 +020039# This is only relevant for term posting lists
40sub next_doc {
41 my $self = shift;
Akrona5086582017-10-21 18:00:12 +020042
43 # TODO:
44 # There may be the need to
45 # have an _init value
46
47 my $current = $self->current or return;
48 my $current_doc_id = $current->doc_id;
Akron52751e62017-05-25 02:31:37 +020049
Akrona588d072017-10-13 14:45:34 +020050 if (DEBUG) {
51 print_log('query', refaddr($self) . ": go to next doc following $current_doc_id");
52 };
Akron52751e62017-05-25 02:31:37 +020053
Akron1f3feac2017-05-05 17:05:45 +020054 do {
55 $self->next or return;
56 } until ($self->current->doc_id > $current_doc_id);
Akron0c998cc2017-07-19 03:29:37 +020057
Akron1f3feac2017-05-05 17:05:45 +020058 return 1;
59};
60
Akronc4bf5fb2017-07-18 02:20:40 +020061
Akrona588d072017-10-13 14:45:34 +020062# Skip to (or beyond) a certain position in the doc.
Akron61e8bce2017-05-24 15:55:27 +020063# Returns true, if the new current is positioned
64# in the same document beyond the given pos.
65# Otherwise returns false.
Akrona588d072017-10-13 14:45:34 +020066# TODO:
67# This behaviour should be improved!
Akron61e8bce2017-05-24 15:55:27 +020068sub skip_pos {
Akrona588d072017-10-13 14:45:34 +020069 my ($self, $target_pos) = @_;
Akron61e8bce2017-05-24 15:55:27 +020070 my $current = $self->current or return;
71 my $doc_id = $current->doc_id;
72
Akron52751e62017-05-25 02:31:37 +020073 while (($current = $self->current) && $current->doc_id == $doc_id) {
74
Akrona588d072017-10-13 14:45:34 +020075 if ($current->start < $target_pos) {
Akron52751e62017-05-25 02:31:37 +020076 print_log('query', "Skip " . $current->to_string .
Akrona588d072017-10-13 14:45:34 +020077 " to pos $target_pos in doc id $doc_id") if DEBUG;
Akron61e8bce2017-05-24 15:55:27 +020078 $self->next;
Akron52751e62017-05-25 02:31:37 +020079 }
80 else {
81 return 1;
Akron61e8bce2017-05-24 15:55:27 +020082 };
Akron61e8bce2017-05-24 15:55:27 +020083 };
84 return;
85};
86
87
Akrona588d072017-10-13 14:45:34 +020088# TODO:
89# This is a value that should probably be stored
90# at span-beginnings and can help to jump through very long
91# sequences of spans
92sub max_length {
93 ...
94};
95
96
97sub freq_in_doc {
98 warn 'freq_in_doc only supported for term queries (see PostingPointer)';
99};
100
101
Akrona588d072017-10-13 14:45:34 +0200102# Get current match
Akron6638e812016-12-11 23:21:18 +0100103sub current_match {
104 return undef;
105};
106
Akronc4bf5fb2017-07-18 02:20:40 +0200107
108
Akron05b64ac2017-10-15 16:44:49 +0200109# Lose all information about the query
110sub close {
Akronc84f00c2017-12-03 17:24:21 +0100111 # Not yet implemented
Akron05b64ac2017-10-15 16:44:49 +0200112};
113
114
Akrond1f2e8b2016-11-15 22:11:53 +01001151;