| Akron | d1f2e8b | 2016-11-15 22:11:53 +0100 | [diff] [blame] | 1 | package Krawfish::Query; |
| Akron | 71fc0ec | 2017-11-02 17:34:21 +0100 | [diff] [blame] | 2 | use strict; |
| 3 | use warnings; | ||||
| Akron | 7aed51c | 2017-10-31 16:23:49 +0100 | [diff] [blame] | 4 | use Role::Tiny; |
| Akron | 71fc0ec | 2017-11-02 17:34:21 +0100 | [diff] [blame] | 5 | use Krawfish::Log; |
| 6 | use Krawfish::Posting::Span; | ||||
| 7 | use Scalar::Util qw/blessed refaddr/; | ||||
| 8 | |||||
| Akron | ec35165 | 2017-11-01 16:04:38 +0100 | [diff] [blame] | 9 | with 'Krawfish::Corpus'; |
| 10 | requires qw/skip_pos | ||||
| Akron | 7aed51c | 2017-10-31 16:23:49 +0100 | [diff] [blame] | 11 | filter_by |
| Akron | ec35165 | 2017-11-01 16:04:38 +0100 | [diff] [blame] | 12 | requires_filter/; |
| Akron | d1f2e8b | 2016-11-15 22:11:53 +0100 | [diff] [blame] | 13 | |
| Akron | a588d07 | 2017-10-13 14:45:34 +0200 | [diff] [blame] | 14 | |
| 15 | # Krawfish::Query is the base class for all span queries. | ||||
| 16 | |||||
| Akron | a508658 | 2017-10-21 18:00:12 +0200 | [diff] [blame] | 17 | # TODO: |
| 18 | # Use a boolean init value to indicate a | ||||
| 19 | # query needs a next first | ||||
| 20 | |||||
| Akron | 94256e6 | 2017-10-10 17:29:18 +0200 | [diff] [blame] | 21 | use constant DEBUG => 0; |
| Akron | 6ff7b48 | 2017-02-09 01:29:29 +0100 | [diff] [blame] | 22 | |
| Akron | 7aed51c | 2017-10-31 16:23:49 +0100 | [diff] [blame] | 23 | # Current span posting object |
| Akron | d1f2e8b | 2016-11-15 22:11:53 +0100 | [diff] [blame] | 24 | sub current { |
| 25 | my $self = shift; | ||||
| 26 | return unless defined $self->{doc_id}; | ||||
| Akron | e1a8a1b | 2017-10-20 16:51:09 +0200 | [diff] [blame] | 27 | return Krawfish::Posting::Span->new( |
| Akron | 93271d8 | 2016-11-24 09:18:41 +0100 | [diff] [blame] | 28 | doc_id => $self->{doc_id}, |
| 29 | start => $self->{start}, | ||||
| 30 | end => $self->{end}, | ||||
| Akron | 6fc5b71 | 2017-10-24 14:48:39 +0200 | [diff] [blame] | 31 | payload => $self->{payload}, |
| 32 | flags => $self->{flags} | ||||
| Akron | d1f2e8b | 2016-11-15 22:11:53 +0100 | [diff] [blame] | 33 | ); |
| Akron | 7db79e2 | 2016-12-08 23:02:32 +0100 | [diff] [blame] | 34 | |
| 35 | # TODO: May have an offset value as well | ||||
| Akron | d1f2e8b | 2016-11-15 22:11:53 +0100 | [diff] [blame] | 36 | }; |
| 37 | |||||
| Akron | 0c998cc | 2017-07-19 03:29:37 +0200 | [diff] [blame] | 38 | |
| Akron | 1f3feac | 2017-05-05 17:05:45 +0200 | [diff] [blame] | 39 | # This is only relevant for term posting lists |
| 40 | sub next_doc { | ||||
| 41 | my $self = shift; | ||||
| Akron | a508658 | 2017-10-21 18:00:12 +0200 | [diff] [blame] | 42 | |
| 43 | # TODO: | ||||
| 44 | # There may be the need to | ||||
| 45 | # have an _init value | ||||
| 46 | |||||
| 47 | my $current = $self->current or return; | ||||
| 48 | my $current_doc_id = $current->doc_id; | ||||
| Akron | 52751e6 | 2017-05-25 02:31:37 +0200 | [diff] [blame] | 49 | |
| Akron | a588d07 | 2017-10-13 14:45:34 +0200 | [diff] [blame] | 50 | if (DEBUG) { |
| 51 | print_log('query', refaddr($self) . ": go to next doc following $current_doc_id"); | ||||
| 52 | }; | ||||
| Akron | 52751e6 | 2017-05-25 02:31:37 +0200 | [diff] [blame] | 53 | |
| Akron | 1f3feac | 2017-05-05 17:05:45 +0200 | [diff] [blame] | 54 | do { |
| 55 | $self->next or return; | ||||
| 56 | } until ($self->current->doc_id > $current_doc_id); | ||||
| Akron | 0c998cc | 2017-07-19 03:29:37 +0200 | [diff] [blame] | 57 | |
| Akron | 1f3feac | 2017-05-05 17:05:45 +0200 | [diff] [blame] | 58 | return 1; |
| 59 | }; | ||||
| 60 | |||||
| Akron | c4bf5fb | 2017-07-18 02:20:40 +0200 | [diff] [blame] | 61 | |
| Akron | a588d07 | 2017-10-13 14:45:34 +0200 | [diff] [blame] | 62 | # Skip to (or beyond) a certain position in the doc. |
| Akron | 61e8bce | 2017-05-24 15:55:27 +0200 | [diff] [blame] | 63 | # Returns true, if the new current is positioned |
| 64 | # in the same document beyond the given pos. | ||||
| 65 | # Otherwise returns false. | ||||
| Akron | a588d07 | 2017-10-13 14:45:34 +0200 | [diff] [blame] | 66 | # TODO: |
| 67 | # This behaviour should be improved! | ||||
| Akron | 61e8bce | 2017-05-24 15:55:27 +0200 | [diff] [blame] | 68 | sub skip_pos { |
| Akron | a588d07 | 2017-10-13 14:45:34 +0200 | [diff] [blame] | 69 | my ($self, $target_pos) = @_; |
| Akron | 61e8bce | 2017-05-24 15:55:27 +0200 | [diff] [blame] | 70 | my $current = $self->current or return; |
| 71 | my $doc_id = $current->doc_id; | ||||
| 72 | |||||
| Akron | 52751e6 | 2017-05-25 02:31:37 +0200 | [diff] [blame] | 73 | while (($current = $self->current) && $current->doc_id == $doc_id) { |
| 74 | |||||
| Akron | a588d07 | 2017-10-13 14:45:34 +0200 | [diff] [blame] | 75 | if ($current->start < $target_pos) { |
| Akron | 52751e6 | 2017-05-25 02:31:37 +0200 | [diff] [blame] | 76 | print_log('query', "Skip " . $current->to_string . |
| Akron | a588d07 | 2017-10-13 14:45:34 +0200 | [diff] [blame] | 77 | " to pos $target_pos in doc id $doc_id") if DEBUG; |
| Akron | 61e8bce | 2017-05-24 15:55:27 +0200 | [diff] [blame] | 78 | $self->next; |
| Akron | 52751e6 | 2017-05-25 02:31:37 +0200 | [diff] [blame] | 79 | } |
| 80 | else { | ||||
| 81 | return 1; | ||||
| Akron | 61e8bce | 2017-05-24 15:55:27 +0200 | [diff] [blame] | 82 | }; |
| Akron | 61e8bce | 2017-05-24 15:55:27 +0200 | [diff] [blame] | 83 | }; |
| 84 | return; | ||||
| 85 | }; | ||||
| 86 | |||||
| 87 | |||||
| Akron | a588d07 | 2017-10-13 14:45:34 +0200 | [diff] [blame] | 88 | # TODO: |
| 89 | # This is a value that should probably be stored | ||||
| 90 | # at span-beginnings and can help to jump through very long | ||||
| 91 | # sequences of spans | ||||
| 92 | sub max_length { | ||||
| 93 | ... | ||||
| 94 | }; | ||||
| 95 | |||||
| 96 | |||||
| 97 | sub freq_in_doc { | ||||
| 98 | warn 'freq_in_doc only supported for term queries (see PostingPointer)'; | ||||
| 99 | }; | ||||
| 100 | |||||
| 101 | |||||
| Akron | a588d07 | 2017-10-13 14:45:34 +0200 | [diff] [blame] | 102 | # Get current match |
| Akron | 6638e81 | 2016-12-11 23:21:18 +0100 | [diff] [blame] | 103 | sub current_match { |
| 104 | return undef; | ||||
| 105 | }; | ||||
| 106 | |||||
| Akron | c4bf5fb | 2017-07-18 02:20:40 +0200 | [diff] [blame] | 107 | |
| 108 | |||||
| Akron | 05b64ac | 2017-10-15 16:44:49 +0200 | [diff] [blame] | 109 | # Lose all information about the query |
| 110 | sub close { | ||||
| Akron | c84f00c | 2017-12-03 17:24:21 +0100 | [diff] [blame] | 111 | # Not yet implemented |
| Akron | 05b64ac | 2017-10-15 16:44:49 +0200 | [diff] [blame] | 112 | }; |
| 113 | |||||
| 114 | |||||
| Akron | d1f2e8b | 2016-11-15 22:11:53 +0100 | [diff] [blame] | 115 | 1; |