| Akron | 5e4b297 | 2017-08-05 20:59:48 +0200 | [diff] [blame] | 1 | package Krawfish::Query::Match; |
| 2 | use parent 'Krawfish::Query'; |
| 3 | use Krawfish::Log; |
| 4 | use strict; |
| 5 | use warnings; |
| 6 | |
| Akron | c40598b | 2017-08-07 12:13:34 +0200 | [diff] [blame] | 7 | use constant DEBUG => 0; |
| Akron | 5e4b297 | 2017-08-05 20:59:48 +0200 | [diff] [blame] | 8 | |
| 9 | sub new { |
| 10 | my $class = shift; |
| 11 | bless { |
| 12 | doc => shift, |
| 13 | start => shift, |
| 14 | end => shift |
| 15 | }, $class; |
| 16 | }; |
| 17 | |
| Akron | b765367 | 2017-08-07 14:34:14 +0200 | [diff] [blame] | 18 | sub clone { |
| 19 | my $self = shift; |
| 20 | __PACKAGE__->new( |
| 21 | $self->{doc}->clone, |
| 22 | $self->{start}, |
| 23 | $self->{end} |
| 24 | ); |
| 25 | }; |
| Akron | 5e4b297 | 2017-08-05 20:59:48 +0200 | [diff] [blame] | 26 | |
| 27 | sub init { |
| 28 | return if $_[0]->{init}++; |
| 29 | if (DEBUG) { |
| 30 | print_log('match', 'Init ' . $_[0]->{doc}->to_string); |
| 31 | }; |
| 32 | $_[0]->{doc}->next; |
| 33 | }; |
| 34 | |
| 35 | |
| 36 | # Forward to next match |
| 37 | sub next { |
| 38 | my $self = shift; |
| 39 | |
| 40 | $self->init; |
| 41 | |
| 42 | print_log('match', 'Check next valid match') if DEBUG; |
| 43 | |
| 44 | my $doc = $self->{doc}->current; |
| 45 | |
| 46 | if (!$doc) { |
| 47 | $self->{doc_id} = undef; |
| 48 | print_log('match', 'No more document') if DEBUG; |
| 49 | return; |
| 50 | }; |
| 51 | |
| 52 | print_log('match', 'Document ' . $doc->doc_id . ' is valid') if DEBUG; |
| 53 | |
| 54 | $self->{doc_id} = $doc->doc_id; |
| 55 | $self->{start} = $self->start; |
| 56 | $self->{end} = $self->end; |
| 57 | |
| 58 | # $self->{payload} = $current->payload->add( |
| 59 | # 0, |
| 60 | # $self->{number}, |
| 61 | # $self->{start}, |
| 62 | # $self->{end} |
| 63 | # ); |
| 64 | |
| 65 | $self->{doc}->next; |
| 66 | |
| 67 | print_log('match', 'Defined match ' . $self->current->to_string) if DEBUG; |
| 68 | return 1; |
| 69 | }; |
| 70 | |
| 71 | |
| 72 | # Match can only occur once (although this requires a filter!) |
| 73 | sub max_freq { |
| 74 | 1; |
| 75 | }; |
| 76 | |
| 77 | |
| 78 | sub to_string { |
| 79 | my $self = shift; |
| 80 | return '[[' . $self->{doc}->to_string . ':' . $self->start . '-' . $self->end . ']]'; |
| 81 | }; |
| 82 | |
| 83 | |
| 84 | sub start { |
| 85 | $_[0]->{start}; |
| 86 | }; |
| 87 | |
| 88 | |
| 89 | sub end { |
| 90 | $_[0]->{end}; |
| 91 | }; |
| 92 | |
| 93 | |
| 94 | # This is useful to, e.g., make sure the document is live |
| 95 | sub filter_by { |
| 96 | my ($self, $corpus) = @_; |
| 97 | |
| 98 | # TODO: Check always that the query isn't moved forward yet! |
| Akron | c40598b | 2017-08-07 12:13:34 +0200 | [diff] [blame] | 99 | $self->{doc} = Krawfish::Corpus::And->new($self->{doc}, $corpus->clone); |
| Akron | 5e4b297 | 2017-08-05 20:59:48 +0200 | [diff] [blame] | 100 | $self; |
| 101 | }; |
| 102 | |
| 103 | 1; |