| Akron | 5e4b297 | 2017-08-05 20:59:48 +0200 | [diff] [blame] | 1 | package Krawfish::Query::Match; |
| Akron | 5e4b297 | 2017-08-05 20:59:48 +0200 | [diff] [blame] | 2 | use strict; |
| 3 | use warnings; |
| Akron | 71fc0ec | 2017-11-02 17:34:21 +0100 | [diff] [blame] | 4 | use Role::Tiny::With; |
| 5 | use Krawfish::Log; |
| Akron | 1ec9b8e | 2017-12-12 15:09:15 +0100 | [diff] [blame] | 6 | use Krawfish::Util::Bits; |
| Akron | 5e4b297 | 2017-08-05 20:59:48 +0200 | [diff] [blame] | 7 | |
| Akron | 71fc0ec | 2017-11-02 17:34:21 +0100 | [diff] [blame] | 8 | with 'Krawfish::Query'; |
| Akron | a588d07 | 2017-10-13 14:45:34 +0200 | [diff] [blame] | 9 | |
| 10 | # Get posting by doc id plus position and length. |
| 11 | |
| Akron | c40598b | 2017-08-07 12:13:34 +0200 | [diff] [blame] | 12 | use constant DEBUG => 0; |
| Akron | 5e4b297 | 2017-08-05 20:59:48 +0200 | [diff] [blame] | 13 | |
| Akron | a588d07 | 2017-10-13 14:45:34 +0200 | [diff] [blame] | 14 | |
| 15 | # Constructor |
| Akron | 5e4b297 | 2017-08-05 20:59:48 +0200 | [diff] [blame] | 16 | sub new { |
| 17 | my $class = shift; |
| 18 | bless { |
| 19 | doc => shift, |
| 20 | start => shift, |
| Akron | 1ec9b8e | 2017-12-12 15:09:15 +0100 | [diff] [blame] | 21 | end => shift, |
| 22 | payload => shift, |
| 23 | flags => shift |
| Akron | 5e4b297 | 2017-08-05 20:59:48 +0200 | [diff] [blame] | 24 | }, $class; |
| 25 | }; |
| 26 | |
| Akron | a588d07 | 2017-10-13 14:45:34 +0200 | [diff] [blame] | 27 | |
| 28 | # Clone query |
| Akron | b765367 | 2017-08-07 14:34:14 +0200 | [diff] [blame] | 29 | sub clone { |
| 30 | my $self = shift; |
| 31 | __PACKAGE__->new( |
| 32 | $self->{doc}->clone, |
| 33 | $self->{start}, |
| Akron | 1ec9b8e | 2017-12-12 15:09:15 +0100 | [diff] [blame] | 34 | $self->{end}, |
| 35 | $self->{payload}, |
| 36 | $self->{flags} |
| Akron | b765367 | 2017-08-07 14:34:14 +0200 | [diff] [blame] | 37 | ); |
| 38 | }; |
| Akron | 5e4b297 | 2017-08-05 20:59:48 +0200 | [diff] [blame] | 39 | |
| Akron | a588d07 | 2017-10-13 14:45:34 +0200 | [diff] [blame] | 40 | |
| 41 | # Initialize |
| 42 | sub _init { |
| Akron | 5e4b297 | 2017-08-05 20:59:48 +0200 | [diff] [blame] | 43 | return if $_[0]->{init}++; |
| 44 | if (DEBUG) { |
| 45 | print_log('match', 'Init ' . $_[0]->{doc}->to_string); |
| 46 | }; |
| 47 | $_[0]->{doc}->next; |
| 48 | }; |
| 49 | |
| 50 | |
| Akron | a588d07 | 2017-10-13 14:45:34 +0200 | [diff] [blame] | 51 | # Move to next posting |
| Akron | 5e4b297 | 2017-08-05 20:59:48 +0200 | [diff] [blame] | 52 | sub next { |
| 53 | my $self = shift; |
| 54 | |
| Akron | a588d07 | 2017-10-13 14:45:34 +0200 | [diff] [blame] | 55 | $self->_init; |
| Akron | 5e4b297 | 2017-08-05 20:59:48 +0200 | [diff] [blame] | 56 | |
| 57 | print_log('match', 'Check next valid match') if DEBUG; |
| 58 | |
| 59 | my $doc = $self->{doc}->current; |
| 60 | |
| 61 | if (!$doc) { |
| 62 | $self->{doc_id} = undef; |
| 63 | print_log('match', 'No more document') if DEBUG; |
| 64 | return; |
| 65 | }; |
| 66 | |
| 67 | print_log('match', 'Document ' . $doc->doc_id . ' is valid') if DEBUG; |
| 68 | |
| 69 | $self->{doc_id} = $doc->doc_id; |
| Akron | 1ec9b8e | 2017-12-12 15:09:15 +0100 | [diff] [blame] | 70 | |
| 71 | # TODO: |
| 72 | # probably check if start and end is in a valid area |
| 73 | # $self->{start} = $self->start; |
| 74 | # $self->{end} = $self->end; |
| Akron | 5e4b297 | 2017-08-05 20:59:48 +0200 | [diff] [blame] | 75 | |
| 76 | # $self->{payload} = $current->payload->add( |
| 77 | # 0, |
| 78 | # $self->{number}, |
| 79 | # $self->{start}, |
| 80 | # $self->{end} |
| 81 | # ); |
| 82 | |
| 83 | $self->{doc}->next; |
| 84 | |
| 85 | print_log('match', 'Defined match ' . $self->current->to_string) if DEBUG; |
| 86 | return 1; |
| 87 | }; |
| 88 | |
| 89 | |
| Akron | a588d07 | 2017-10-13 14:45:34 +0200 | [diff] [blame] | 90 | # Get maximum frequency |
| Akron | 5e4b297 | 2017-08-05 20:59:48 +0200 | [diff] [blame] | 91 | sub max_freq { |
| Akron | a588d07 | 2017-10-13 14:45:34 +0200 | [diff] [blame] | 92 | # Match can only occur once |
| 93 | # (although this requires a filter!) |
| Akron | 5e4b297 | 2017-08-05 20:59:48 +0200 | [diff] [blame] | 94 | 1; |
| 95 | }; |
| 96 | |
| 97 | |
| Akron | a588d07 | 2017-10-13 14:45:34 +0200 | [diff] [blame] | 98 | # Stringification |
| Akron | 5e4b297 | 2017-08-05 20:59:48 +0200 | [diff] [blame] | 99 | sub to_string { |
| 100 | my $self = shift; |
| Akron | 1ec9b8e | 2017-12-12 15:09:15 +0100 | [diff] [blame] | 101 | my $str = '[[' . $self->{doc}->to_string . ':' . $self->{start} . '-' . $self->{end}; |
| 102 | |
| 103 | # In case a class != 0 is set - serialize |
| 104 | if ($self->{flags} && $self->{flags} & 0b0111_1111_1111_1111) { |
| 105 | $str .= '!' . join(',', flags_to_classes($self->{flags})); |
| 106 | }; |
| 107 | |
| 108 | $str .= '$' . $self->{payload}->to_string if $self->{payload}; |
| 109 | |
| 110 | $str .= ']]'; |
| Akron | 5e4b297 | 2017-08-05 20:59:48 +0200 | [diff] [blame] | 111 | }; |
| 112 | |
| 113 | |
| Akron | a588d07 | 2017-10-13 14:45:34 +0200 | [diff] [blame] | 114 | # Filter query by VC |
| 115 | # This is useful to, e.g., |
| 116 | # make sure the document is live |
| Akron | 5e4b297 | 2017-08-05 20:59:48 +0200 | [diff] [blame] | 117 | sub filter_by { |
| 118 | my ($self, $corpus) = @_; |
| 119 | |
| Akron | a588d07 | 2017-10-13 14:45:34 +0200 | [diff] [blame] | 120 | # TODO: |
| 121 | # Check always that the query isn't moved forward yet! |
| 122 | $self->{doc} = Krawfish::Corpus::And->new( |
| 123 | $self->{doc}, |
| 124 | $corpus->clone |
| 125 | ); |
| Akron | 5e4b297 | 2017-08-05 20:59:48 +0200 | [diff] [blame] | 126 | $self; |
| 127 | }; |
| 128 | |
| Akron | 2bc94da | 2017-10-27 15:20:36 +0200 | [diff] [blame] | 129 | |
| 130 | # Requires filter |
| 131 | sub requires_filter { |
| 132 | 0; |
| 133 | }; |
| 134 | |
| 135 | |
| Akron | 5e4b297 | 2017-08-05 20:59:48 +0200 | [diff] [blame] | 136 | 1; |