blob: f18981fb0fa5dfa670d1c54921dce37e0de9d8fa [file] [log] [blame]
Akron5f9091c2017-03-24 20:37:35 +01001package Krawfish::Corpus::Or;
Akron5f9091c2017-03-24 20:37:35 +01002use strict;
3use warnings;
Akron71fc0ec2017-11-02 17:34:21 +01004use Role::Tiny::With;
5use Krawfish::Util::Bits;
6use Krawfish::Log;
7
8with 'Krawfish::Corpus';
Akron5f9091c2017-03-24 20:37:35 +01009
Akron2bc94da2017-10-27 15:20:36 +020010use constant DEBUG => 0;
Akron5f9091c2017-03-24 20:37:35 +010011
12sub new {
13 my $class = shift;
14 bless {
15 first => shift,
16 second => shift,
Akrona588d072017-10-13 14:45:34 +020017 doc_id => -1,
18 flags => 0b0000_0000_0000_0000
Akron5f9091c2017-03-24 20:37:35 +010019 }, $class;
20};
21
Akrona588d072017-10-13 14:45:34 +020022
23# Clone query object
Akronc40598b2017-08-07 12:13:34 +020024sub clone {
25 my $self = shift;
26 __PACKAGE__->new(
27 $self->{first}->clone,
28 $self->{second}->clone
29 );
30};
31
Akrona588d072017-10-13 14:45:34 +020032
33# Initialize query
34sub _init {
Akron5f9091c2017-03-24 20:37:35 +010035 return if $_[0]->{init}++;
36 $_[0]->{first}->next;
37 $_[0]->{second}->next;
38};
39
Akrona588d072017-10-13 14:45:34 +020040
41# Move to next posting
Akron5f9091c2017-03-24 20:37:35 +010042sub next {
43 my $self = shift;
Akrona588d072017-10-13 14:45:34 +020044 $self->_init;
Akron5f9091c2017-03-24 20:37:35 +010045
46 my $first = $self->{first}->current;
47 my $second = $self->{second}->current;
48
49 my $curr = 'first';
Akron9248c332017-10-23 20:06:31 +020050 my $both;
Akron5f9091c2017-03-24 20:37:35 +010051
52 while ($first || $second) {
53
54 # First span is no longer available
55 if (!$first) {
56 print_log('vc_or', 'Current is second operand (a)') if DEBUG;
57 $curr = 'second';
58 }
59
60 # Second span is no longer available
61 elsif (!$second) {
62 print_log('vc_or', 'Current is first operand (b)') if DEBUG;
63 $curr = 'first';
64 }
65
66 elsif ($first->doc_id < $second->doc_id) {
67 print_log('vc_or', 'Current is first operand (1)') if DEBUG;
68 $curr = 'first';
69 }
70 elsif ($first->doc_id > $second->doc_id) {
71 print_log('vc_or', 'Current is second operand (1)') if DEBUG;
72 $curr = 'second';
73 }
74 else {
75 print_log('vc_or', 'Current is first operand (4)') if DEBUG;
76 $curr = 'first';
Akron9248c332017-10-23 20:06:31 +020077 $both = 1;
Akron5f9091c2017-03-24 20:37:35 +010078 };
79
80 # Get the current posting of the respective operand
81 my $curr_post = $self->{$curr}->current;
82
83 # Only return unique identifier
84 if ($self->{doc_id} == $curr_post->doc_id) {
85
86 if (DEBUG) {
87 print_log('vc_or', 'Document ID already returned: '. $self->{doc_id});
88 };
89
90 # Forward
91 $self->{$curr}->next;
92
93 # Set current docs
94 $first = $self->{first}->current;
95 $second = $self->{second}->current;
96
Akron4204f172017-10-02 22:32:02 +020097 CORE::next;
Akron5f9091c2017-03-24 20:37:35 +010098 };
99
100 $self->{doc_id} = $curr_post->doc_id;
Akron6fc5b712017-10-24 14:48:39 +0200101 $self->{flags} = $curr_post->flags;
Akron9248c332017-10-23 20:06:31 +0200102
103 # Set flags
104 if ($both) {
105 if (DEBUG) {
106 print_log('vc_or', 'Current doc is ' . $self->current->to_string);
107 };
Akron6fc5b712017-10-24 14:48:39 +0200108 $self->{flags} |= $second->flags;
Akron9248c332017-10-23 20:06:31 +0200109 };
Akron5f9091c2017-03-24 20:37:35 +0100110
111 if (DEBUG) {
112 print_log('vc_or', 'Current doc is ' . $self->current->to_string);
113 print_log('vc_or', "Next on $curr operand");
114 };
115
116 $self->{$curr}->next;
117 return 1;
118 };
119
Akron9248c332017-10-23 20:06:31 +0200120 $self->{flags} = 0b0000_0000_0000_0000;
Akron5f9091c2017-03-24 20:37:35 +0100121 $self->{doc_id} = undef;
122 return;
123};
124
125
Akrona588d072017-10-13 14:45:34 +0200126# Stringification
Akron5f9091c2017-03-24 20:37:35 +0100127sub to_string {
128 my $self = shift;
129 return 'or(' . $self->{first}->to_string . ',' . $self->{second}->to_string . ')';
130};
131
Akronfaf76852017-07-19 17:37:07 +0200132
Akrona588d072017-10-13 14:45:34 +0200133# Get maximum frequency
Akronfaf76852017-07-19 17:37:07 +0200134sub max_freq {
135 my $self = shift;
136 $self->{first}->max_freq + $self->{second}->max_freq;
137};
138
139
Akron5f9091c2017-03-24 20:37:35 +01001401;