| Akron | ba87cba | 2016-10-24 03:13:51 +0200 | [diff] [blame] | 1 | package Krawfish::Posting; |
| Akron | 5f52153 | 2016-10-21 19:30:23 +0200 | [diff] [blame] | 2 | use overload '""' => sub { $_[0]->to_string }, fallback => 1; |
| Akron | ba0952d | 2017-10-23 18:59:55 +0200 | [diff] [blame] | 3 | use Krawfish::Util::Bits; |
| Akron | 875cc33 | 2016-11-15 13:06:58 +0100 | [diff] [blame] | 4 | use Krawfish::Posting::Payload; |
| Akron | ba0952d | 2017-10-23 18:59:55 +0200 | [diff] [blame] | 5 | use Krawfish::Log; |
| 6 | use bytes; |
| Akron | 5f52153 | 2016-10-21 19:30:23 +0200 | [diff] [blame] | 7 | use strict; |
| 8 | use warnings; |
| 9 | |
| Akron | ba0952d | 2017-10-23 18:59:55 +0200 | [diff] [blame] | 10 | use constant DEBUG => 0; |
| Akron | 90225de | 2017-10-19 18:33:03 +0200 | [diff] [blame] | 11 | |
| Akron | 875cc33 | 2016-11-15 13:06:58 +0100 | [diff] [blame] | 12 | # Constructor |
| Akron | 5f52153 | 2016-10-21 19:30:23 +0200 | [diff] [blame] | 13 | sub new { |
| 14 | my $class = shift; |
| 15 | bless { @_ }, $class; |
| 16 | }; |
| 17 | |
| Akron | 875cc33 | 2016-11-15 13:06:58 +0100 | [diff] [blame] | 18 | |
| Akron | 5f52153 | 2016-10-21 19:30:23 +0200 | [diff] [blame] | 19 | # Current document |
| Akron | f0d514a | 2016-11-01 14:16:25 +0100 | [diff] [blame] | 20 | sub doc_id { |
| 21 | return $_[0]->{doc_id}; |
| Akron | 5f52153 | 2016-10-21 19:30:23 +0200 | [diff] [blame] | 22 | }; |
| 23 | |
| 24 | |
| Akron | ba0952d | 2017-10-23 18:59:55 +0200 | [diff] [blame] | 25 | # Corpus classes |
| Akron | 6fc5b71 | 2017-10-24 14:48:39 +0200 | [diff] [blame^] | 26 | sub flags { |
| Akron | f0a7771 | 2017-10-18 16:39:18 +0200 | [diff] [blame] | 27 | my ($self, $flags) = @_; |
| 28 | |
| Akron | ba0952d | 2017-10-23 18:59:55 +0200 | [diff] [blame] | 29 | |
| 30 | # Class 0 is set per default |
| 31 | $self->{flags} //= 0b1000_0000_0000_0000; |
| 32 | |
| 33 | return $self->{flags} unless defined $flags; |
| 34 | return $self->{flags} & $flags; |
| Akron | f0a7771 | 2017-10-18 16:39:18 +0200 | [diff] [blame] | 35 | }; |
| 36 | |
| 37 | |
| Akron | 90225de | 2017-10-19 18:33:03 +0200 | [diff] [blame] | 38 | # Returns a list of matching query corpus classes |
| Akron | ba0952d | 2017-10-23 18:59:55 +0200 | [diff] [blame] | 39 | sub corpus_classes { |
| Akron | f0a7771 | 2017-10-18 16:39:18 +0200 | [diff] [blame] | 40 | my ($self, $query_flags) = @_; |
| Akron | ba0952d | 2017-10-23 18:59:55 +0200 | [diff] [blame] | 41 | |
| 42 | # Returns all flags requested and all flags existing |
| Akron | 6fc5b71 | 2017-10-24 14:48:39 +0200 | [diff] [blame^] | 43 | my $intersect = $self->flags($query_flags); |
| Akron | ba0952d | 2017-10-23 18:59:55 +0200 | [diff] [blame] | 44 | |
| Akron | f0a7771 | 2017-10-18 16:39:18 +0200 | [diff] [blame] | 45 | my @list = (); |
| 46 | |
| Akron | ba0952d | 2017-10-23 18:59:55 +0200 | [diff] [blame] | 47 | if (DEBUG) { |
| 48 | print_log( |
| 49 | 'post', |
| 50 | 'Intersection between stored and queried classes is <'. |
| 51 | reverse(bitstring($intersect)) . '>' |
| 52 | ); |
| 53 | }; |
| 54 | |
| 55 | # Remove zero class |
| 56 | $intersect &= 0b0111_1111_1111_1111; |
| 57 | |
| 58 | # Initialize move variable |
| 59 | my $move = 0b0100_0000_0000_0000; |
| 60 | |
| 61 | my $i = 1; |
| 62 | |
| 63 | # As long as there a set bits ... |
| 64 | while ($intersect) { |
| 65 | |
| 66 | if (DEBUG) { |
| 67 | print_log( |
| 68 | 'post', |
| 69 | 'Check move ' . reverse(bitstring($move)) . ' and intersect ' . |
| 70 | reverse(bitstring($intersect)) |
| 71 | ); |
| Akron | f0a7771 | 2017-10-18 16:39:18 +0200 | [diff] [blame] | 72 | }; |
| Akron | ba0952d | 2017-10-23 18:59:55 +0200 | [diff] [blame] | 73 | |
| 74 | if ($intersect & $move) { |
| 75 | if (DEBUG) { |
| 76 | print_log( |
| 77 | 'post', |
| 78 | 'Move ' . reverse(bitstring($move)) . ' matches ' . reverse(bitstring($intersect)) |
| 79 | ); |
| 80 | }; |
| 81 | push @list, $i; |
| 82 | $intersect &= ~$move; |
| 83 | }; |
| 84 | $move >>= 1; |
| 85 | $i++; |
| Akron | f0a7771 | 2017-10-18 16:39:18 +0200 | [diff] [blame] | 86 | }; |
| 87 | |
| 88 | # Return list of valid classes |
| 89 | return @list; |
| 90 | }; |
| 91 | |
| Akron | d8540bd | 2017-02-06 15:05:26 +0100 | [diff] [blame] | 92 | |
| Akron | a588d07 | 2017-10-13 14:45:34 +0200 | [diff] [blame] | 93 | # Check if two postings are identical |
| 94 | # WARNING: |
| 95 | # This should compare payloads separately, |
| 96 | # because classes may be in different order, |
| 97 | # though resulting in identical postings |
| Akron | 6fc5b71 | 2017-10-24 14:48:39 +0200 | [diff] [blame^] | 98 | # TODO: |
| 99 | # Serialization is also bad for flags!!! |
| Akron | a588d07 | 2017-10-13 14:45:34 +0200 | [diff] [blame] | 100 | sub same_as { |
| 101 | my ($self, $comp) = @_; |
| 102 | return unless $comp; |
| 103 | return $self->to_string eq $comp->to_string; |
| 104 | }; |
| 105 | |
| 106 | |
| Akron | e091453 | 2017-07-29 19:53:10 +0200 | [diff] [blame] | 107 | # Stringification |
| Akron | 5f52153 | 2016-10-21 19:30:23 +0200 | [diff] [blame] | 108 | sub to_string { |
| 109 | my $self = shift; |
| Akron | e1a8a1b | 2017-10-20 16:51:09 +0200 | [diff] [blame] | 110 | my $str = '[' . $self->{doc_id}; |
| Akron | 875cc33 | 2016-11-15 13:06:58 +0100 | [diff] [blame] | 111 | |
| Akron | ba0952d | 2017-10-23 18:59:55 +0200 | [diff] [blame] | 112 | # In case a class != 0 is set - serialize |
| Akron | 6fc5b71 | 2017-10-24 14:48:39 +0200 | [diff] [blame^] | 113 | if ($self->flags & 0b0111_1111_1111_1111) { |
| Akron | ba0952d | 2017-10-23 18:59:55 +0200 | [diff] [blame] | 114 | $str .= '!' . join(',', $self->corpus_classes); |
| Akron | 901bc37 | 2017-10-20 11:44:08 +0200 | [diff] [blame] | 115 | }; |
| 116 | |
| Akron | e1a8a1b | 2017-10-20 16:51:09 +0200 | [diff] [blame] | 117 | $str . ']'; |
| Akron | 5f52153 | 2016-10-21 19:30:23 +0200 | [diff] [blame] | 118 | }; |
| 119 | |
| Akron | d8540bd | 2017-02-06 15:05:26 +0100 | [diff] [blame] | 120 | |
| Akron | e1a8a1b | 2017-10-20 16:51:09 +0200 | [diff] [blame] | 121 | |
| Akron | 5f52153 | 2016-10-21 19:30:23 +0200 | [diff] [blame] | 122 | 1; |