| Akron | 31e088b | 2017-09-29 14:48:49 +0200 | [diff] [blame] | 1 | package Krawfish::Meta::Segment::Group::TermExistence; |
| 2 | use parent 'Krawfish::Meta'; |
| Akron | 79204af | 2017-08-15 12:32:51 +0200 | [diff] [blame] | 3 | use strict; |
| 4 | use warnings; |
| 5 | |
| 6 | # The query works similar to Or-query, but only accepts term ids. |
| 7 | |
| 8 | sub new { |
| 9 | my $class = shift; |
| 10 | bless { |
| 11 | term_id => shift, # Term Query |
| 12 | term_ids => shift, # Optional TermExistence-Query |
| 13 | filter => undef, |
| 14 | existence => [] |
| 15 | }, $class; |
| 16 | }; |
| 17 | |
| 18 | sub _init { |
| 19 | ... |
| 20 | }; |
| 21 | |
| 22 | |
| 23 | # TODO: |
| 24 | # Think about when next() is called, as it needs to be called on term_ids as well ... |
| 25 | # Mabe this should be done in _init as a while query somehow. |
| 26 | sub next { |
| 27 | my $self = shift; |
| 28 | |
| 29 | # Get the current document in the VC |
| 30 | my $filter = $self->{filter}; |
| 31 | my $doc_id = $filter->doc_id; |
| 32 | |
| 33 | # The next document to look for in the VC |
| 34 | my $next_doc_id; |
| 35 | |
| 36 | |
| 37 | # Check the single term_id for existence |
| 38 | |
| 39 | # The simple term does not exist |
| 40 | my $term = $self->{term_id}; |
| 41 | if (!$term) { |
| 42 | # Do nothing |
| 43 | } |
| 44 | |
| 45 | # Should never happen |
| 46 | elsif (!$term->current) { |
| 47 | $self->{term_id} = undef; |
| 48 | } |
| 49 | |
| 50 | # Term exists and can be checked |
| 51 | else { |
| 52 | |
| 53 | # Is the VC document beyond the current document id |
| 54 | if ($doc_id > $term->doc_id) { |
| 55 | |
| 56 | # Move the term document to the VC document |
| 57 | $term->skip_doc($doc_id); |
| 58 | }; |
| 59 | |
| 60 | # Are both terms in the same document? |
| 61 | if ($term->doc_id == $doc_id) { |
| 62 | |
| 63 | # Add this term to existence |
| 64 | $self->exists($term->term_id); |
| 65 | |
| 66 | # Close posting |
| 67 | $term->close; |
| 68 | |
| 69 | # Do not check any further |
| 70 | $self->{term_id} = undef; |
| 71 | } |
| 72 | |
| 73 | # Current term document is beyond current VC doc |
| 74 | else { |
| 75 | $next_doc_id = $term->doc_id; |
| 76 | }; |
| 77 | }; |
| 78 | |
| 79 | |
| 80 | # Check the complex term_ids for existence |
| 81 | |
| 82 | my $terms = $self->{term_ids}; |
| 83 | |
| 84 | if (!$terms) { |
| 85 | # Do nothing |
| 86 | } |
| 87 | |
| 88 | # Should never happen |
| 89 | elsif (!$terms->current) { |
| 90 | $self->{term_ids} = undef; |
| 91 | } |
| 92 | |
| 93 | else { |
| 94 | |
| 95 | # When there is a complex query, move on |
| 96 | if ($doc_id > $terms->doc_id) { |
| 97 | $terms->skip_doc($doc_id); |
| 98 | }; |
| 99 | |
| 100 | # There are no further matches |
| 101 | unless ($terms->current) { |
| 102 | |
| 103 | # Merge existence values |
| 104 | $self->exists($terms->existence); |
| 105 | $terms->close; |
| 106 | $self->{term_ids} = undef; |
| 107 | } |
| 108 | |
| 109 | # Current terms are beyond current VC doc |
| 110 | else { |
| 111 | |
| 112 | # Remember the next relevant document id |
| 113 | if (!$next_doc_id || $next_doc_id > $term->doc_id) { |
| 114 | $next_doc_id = $term->doc_id; |
| 115 | }; |
| 116 | }; |
| 117 | }; |
| 118 | |
| 119 | # There is a next document id defined - move on |
| 120 | if (defined $next_doc_id) { |
| 121 | |
| 122 | # Move the VC stream to the next relevant position |
| 123 | if ($filter->skip_doc($next_doc_id)) { |
| 124 | |
| 125 | # It's fine |
| 126 | return 1; |
| 127 | }; |
| 128 | }; |
| 129 | |
| 130 | return 0; |
| 131 | }; |
| 132 | |
| 133 | |
| 134 | # Add term ids to existence list |
| 135 | sub exists { |
| 136 | my ($self, $term_id) = @_; |
| 137 | |
| 138 | if (ref $term_id) { |
| 139 | push @{$terms->existence}, @$term_id; |
| 140 | } |
| 141 | else { |
| 142 | push @{$terms->existence}, $term_id; |
| 143 | }; |
| 144 | }; |
| 145 | |
| 146 | |
| 147 | # Return list of existing term ids |
| 148 | sub existence { |
| 149 | return $self->{existence} |
| 150 | }; |
| 151 | |
| 152 | |
| 153 | sub filter_by { |
| 154 | ... |
| 155 | # It is relevant to filter The query - but one filter may be enough |
| 156 | }; |
| 157 | |
| 158 | |
| 159 | 1; |