blob: 67b4966ef7189f58472ee91d11335d5db76e9d90 [file] [log] [blame]
Akron5e4b2972017-08-05 20:59:48 +02001package Krawfish::Query::Match;
Akron5e4b2972017-08-05 20:59:48 +02002use strict;
3use warnings;
Akron71fc0ec2017-11-02 17:34:21 +01004use Role::Tiny::With;
5use Krawfish::Log;
Akron1ec9b8e2017-12-12 15:09:15 +01006use Krawfish::Util::Bits;
Akron5e4b2972017-08-05 20:59:48 +02007
Akron71fc0ec2017-11-02 17:34:21 +01008with 'Krawfish::Query';
Akrona588d072017-10-13 14:45:34 +02009
10# Get posting by doc id plus position and length.
11
Akronc40598b2017-08-07 12:13:34 +020012use constant DEBUG => 0;
Akron5e4b2972017-08-05 20:59:48 +020013
Akrona588d072017-10-13 14:45:34 +020014
15# Constructor
Akron5e4b2972017-08-05 20:59:48 +020016sub new {
17 my $class = shift;
18 bless {
19 doc => shift,
20 start => shift,
Akron1ec9b8e2017-12-12 15:09:15 +010021 end => shift,
22 payload => shift,
23 flags => shift
Akron5e4b2972017-08-05 20:59:48 +020024 }, $class;
25};
26
Akrona588d072017-10-13 14:45:34 +020027
28# Clone query
Akronb7653672017-08-07 14:34:14 +020029sub clone {
30 my $self = shift;
31 __PACKAGE__->new(
32 $self->{doc}->clone,
33 $self->{start},
Akron1ec9b8e2017-12-12 15:09:15 +010034 $self->{end},
35 $self->{payload},
36 $self->{flags}
Akronb7653672017-08-07 14:34:14 +020037 );
38};
Akron5e4b2972017-08-05 20:59:48 +020039
Akrona588d072017-10-13 14:45:34 +020040
41# Initialize
42sub _init {
Akron5e4b2972017-08-05 20:59:48 +020043 return if $_[0]->{init}++;
44 if (DEBUG) {
45 print_log('match', 'Init ' . $_[0]->{doc}->to_string);
46 };
47 $_[0]->{doc}->next;
48};
49
50
Akrona588d072017-10-13 14:45:34 +020051# Move to next posting
Akron5e4b2972017-08-05 20:59:48 +020052sub next {
53 my $self = shift;
54
Akrona588d072017-10-13 14:45:34 +020055 $self->_init;
Akron5e4b2972017-08-05 20:59:48 +020056
57 print_log('match', 'Check next valid match') if DEBUG;
58
59 my $doc = $self->{doc}->current;
60
61 if (!$doc) {
62 $self->{doc_id} = undef;
63 print_log('match', 'No more document') if DEBUG;
64 return;
65 };
66
67 print_log('match', 'Document ' . $doc->doc_id . ' is valid') if DEBUG;
68
69 $self->{doc_id} = $doc->doc_id;
Akron1ec9b8e2017-12-12 15:09:15 +010070
71 # TODO:
72 # probably check if start and end is in a valid area
73 # $self->{start} = $self->start;
74 # $self->{end} = $self->end;
Akron5e4b2972017-08-05 20:59:48 +020075
76 # $self->{payload} = $current->payload->add(
77 # 0,
78 # $self->{number},
79 # $self->{start},
80 # $self->{end}
81 # );
82
83 $self->{doc}->next;
84
85 print_log('match', 'Defined match ' . $self->current->to_string) if DEBUG;
86 return 1;
87};
88
89
Akrona588d072017-10-13 14:45:34 +020090# Get maximum frequency
Akron5e4b2972017-08-05 20:59:48 +020091sub max_freq {
Akrona588d072017-10-13 14:45:34 +020092 # Match can only occur once
93 # (although this requires a filter!)
Akron5e4b2972017-08-05 20:59:48 +020094 1;
95};
96
97
Akrona588d072017-10-13 14:45:34 +020098# Stringification
Akron5e4b2972017-08-05 20:59:48 +020099sub to_string {
100 my $self = shift;
Akron1ec9b8e2017-12-12 15:09:15 +0100101 my $str = '[[' . $self->{doc}->to_string . ':' . $self->{start} . '-' . $self->{end};
102
103 # In case a class != 0 is set - serialize
104 if ($self->{flags} && $self->{flags} & 0b0111_1111_1111_1111) {
105 $str .= '!' . join(',', flags_to_classes($self->{flags}));
106 };
107
108 $str .= '$' . $self->{payload}->to_string if $self->{payload};
109
110 $str .= ']]';
Akron5e4b2972017-08-05 20:59:48 +0200111};
112
113
Akrona588d072017-10-13 14:45:34 +0200114# Filter query by VC
115# This is useful to, e.g.,
116# make sure the document is live
Akron5e4b2972017-08-05 20:59:48 +0200117sub filter_by {
118 my ($self, $corpus) = @_;
119
Akrona588d072017-10-13 14:45:34 +0200120 # TODO:
121 # Check always that the query isn't moved forward yet!
122 $self->{doc} = Krawfish::Corpus::And->new(
123 $self->{doc},
124 $corpus->clone
125 );
Akron5e4b2972017-08-05 20:59:48 +0200126 $self;
127};
128
Akron2bc94da2017-10-27 15:20:36 +0200129
130# Requires filter
131sub requires_filter {
132 0;
133};
134
135
Akron5e4b2972017-08-05 20:59:48 +02001361;