| Akron | ba87cba | 2016-10-24 03:13:51 +0200 | [diff] [blame] | 1 | package Krawfish::Posting; |
| Akron | 71fc0ec | 2017-11-02 17:34:21 +0100 | [diff] [blame] | 2 | use strict; |
| 3 | use warnings; |
| Akron | 5df7d94 | 2017-11-01 17:42:34 +0100 | [diff] [blame] | 4 | use Role::Tiny; |
| Akron | 71fc0ec | 2017-11-02 17:34:21 +0100 | [diff] [blame] | 5 | use overload '""' => sub { $_[0]->to_string }, fallback => 1; |
| 6 | use Krawfish::Util::Bits; |
| 7 | use Krawfish::Posting::Payload; |
| 8 | use Krawfish::Log; |
| 9 | use bytes; |
| 10 | |
| Akron | 5df7d94 | 2017-11-01 17:42:34 +0100 | [diff] [blame] | 11 | requires qw/doc_id |
| 12 | flags |
| 13 | corpus_classes |
| 14 | same_as |
| 15 | to_string |
| 16 | clone/; |
| Akron | 5f52153 | 2016-10-21 19:30:23 +0200 | [diff] [blame] | 17 | |
| Akron | ce24263 | 2017-11-23 17:19:10 +0100 | [diff] [blame] | 18 | with 'Krawfish::Posting::Ranks'; |
| 19 | |
| Akron | ba0952d | 2017-10-23 18:59:55 +0200 | [diff] [blame] | 20 | use constant DEBUG => 0; |
| Akron | 90225de | 2017-10-19 18:33:03 +0200 | [diff] [blame] | 21 | |
| Akron | 875cc33 | 2016-11-15 13:06:58 +0100 | [diff] [blame] | 22 | # Constructor |
| Akron | 5f52153 | 2016-10-21 19:30:23 +0200 | [diff] [blame] | 23 | sub new { |
| 24 | my $class = shift; |
| 25 | bless { @_ }, $class; |
| 26 | }; |
| 27 | |
| Akron | 875cc33 | 2016-11-15 13:06:58 +0100 | [diff] [blame] | 28 | |
| Akron | 5f52153 | 2016-10-21 19:30:23 +0200 | [diff] [blame] | 29 | # Current document |
| Akron | f0d514a | 2016-11-01 14:16:25 +0100 | [diff] [blame] | 30 | sub doc_id { |
| 31 | return $_[0]->{doc_id}; |
| Akron | 5f52153 | 2016-10-21 19:30:23 +0200 | [diff] [blame] | 32 | }; |
| 33 | |
| 34 | |
| Akron | ba0952d | 2017-10-23 18:59:55 +0200 | [diff] [blame] | 35 | # Corpus classes |
| Akron | 6fc5b71 | 2017-10-24 14:48:39 +0200 | [diff] [blame] | 36 | sub flags { |
| Akron | f0a7771 | 2017-10-18 16:39:18 +0200 | [diff] [blame] | 37 | my ($self, $flags) = @_; |
| 38 | |
| Akron | ba0952d | 2017-10-23 18:59:55 +0200 | [diff] [blame] | 39 | |
| 40 | # Class 0 is set per default |
| 41 | $self->{flags} //= 0b1000_0000_0000_0000; |
| 42 | |
| 43 | return $self->{flags} unless defined $flags; |
| 44 | return $self->{flags} & $flags; |
| Akron | f0a7771 | 2017-10-18 16:39:18 +0200 | [diff] [blame] | 45 | }; |
| 46 | |
| 47 | |
| Akron | 90225de | 2017-10-19 18:33:03 +0200 | [diff] [blame] | 48 | # Returns a list of matching query corpus classes |
| Akron | ba0952d | 2017-10-23 18:59:55 +0200 | [diff] [blame] | 49 | sub corpus_classes { |
| Akron | f0a7771 | 2017-10-18 16:39:18 +0200 | [diff] [blame] | 50 | my ($self, $query_flags) = @_; |
| Akron | ba0952d | 2017-10-23 18:59:55 +0200 | [diff] [blame] | 51 | |
| 52 | # Returns all flags requested and all flags existing |
| Akron | 6fc5b71 | 2017-10-24 14:48:39 +0200 | [diff] [blame] | 53 | my $intersect = $self->flags($query_flags); |
| Akron | ba0952d | 2017-10-23 18:59:55 +0200 | [diff] [blame] | 54 | |
| Akron | f0a7771 | 2017-10-18 16:39:18 +0200 | [diff] [blame] | 55 | my @list = (); |
| 56 | |
| Akron | ba0952d | 2017-10-23 18:59:55 +0200 | [diff] [blame] | 57 | if (DEBUG) { |
| 58 | print_log( |
| 59 | 'post', |
| 60 | 'Intersection between stored and queried classes is <'. |
| 61 | reverse(bitstring($intersect)) . '>' |
| 62 | ); |
| 63 | }; |
| 64 | |
| 65 | # Remove zero class |
| Akron | 015093d | 2017-10-24 18:47:44 +0200 | [diff] [blame] | 66 | return flags_to_classes($intersect & 0b0111_1111_1111_1111); |
| Akron | f0a7771 | 2017-10-18 16:39:18 +0200 | [diff] [blame] | 67 | }; |
| 68 | |
| Akron | d8540bd | 2017-02-06 15:05:26 +0100 | [diff] [blame] | 69 | |
| Akron | a588d07 | 2017-10-13 14:45:34 +0200 | [diff] [blame] | 70 | # Check if two postings are identical |
| Akron | a588d07 | 2017-10-13 14:45:34 +0200 | [diff] [blame] | 71 | sub same_as { |
| 72 | my ($self, $comp) = @_; |
| 73 | return unless $comp; |
| Akron | d4bb15a | 2017-10-24 15:12:46 +0200 | [diff] [blame] | 74 | return if $self->doc_id != $comp->doc_id; |
| 75 | return if $self->flags != $comp->flags; |
| 76 | return 1; |
| Akron | a588d07 | 2017-10-13 14:45:34 +0200 | [diff] [blame] | 77 | }; |
| 78 | |
| 79 | |
| Akron | e091453 | 2017-07-29 19:53:10 +0200 | [diff] [blame] | 80 | # Stringification |
| Akron | 5f52153 | 2016-10-21 19:30:23 +0200 | [diff] [blame] | 81 | sub to_string { |
| 82 | my $self = shift; |
| Akron | e1a8a1b | 2017-10-20 16:51:09 +0200 | [diff] [blame] | 83 | my $str = '[' . $self->{doc_id}; |
| Akron | 875cc33 | 2016-11-15 13:06:58 +0100 | [diff] [blame] | 84 | |
| Akron | ba0952d | 2017-10-23 18:59:55 +0200 | [diff] [blame] | 85 | # In case a class != 0 is set - serialize |
| Akron | 6fc5b71 | 2017-10-24 14:48:39 +0200 | [diff] [blame] | 86 | if ($self->flags & 0b0111_1111_1111_1111) { |
| Akron | ba0952d | 2017-10-23 18:59:55 +0200 | [diff] [blame] | 87 | $str .= '!' . join(',', $self->corpus_classes); |
| Akron | 901bc37 | 2017-10-20 11:44:08 +0200 | [diff] [blame] | 88 | }; |
| 89 | |
| Akron | ce24263 | 2017-11-23 17:19:10 +0100 | [diff] [blame] | 90 | if ($self->ranks) { |
| 91 | $str .= '::' . join(',', map { $_ ? $_ : '0' } $self->ranks); |
| 92 | }; |
| 93 | |
| Akron | e1a8a1b | 2017-10-20 16:51:09 +0200 | [diff] [blame] | 94 | $str . ']'; |
| Akron | 5f52153 | 2016-10-21 19:30:23 +0200 | [diff] [blame] | 95 | }; |
| 96 | |
| Akron | d8540bd | 2017-02-06 15:05:26 +0100 | [diff] [blame] | 97 | |
| Akron | e1a8a1b | 2017-10-20 16:51:09 +0200 | [diff] [blame] | 98 | |
| Akron | 5f52153 | 2016-10-21 19:30:23 +0200 | [diff] [blame] | 99 | 1; |