| Akron | 0a29cd2 | 2017-02-06 10:58:02 +0100 | [diff] [blame] | 1 | package Krawfish::Query::Length; |
| Akron | bcbe268 | 2017-02-05 13:05:55 +0100 | [diff] [blame] | 2 | use strict; |
| 3 | use warnings; |
| Akron | 71fc0ec | 2017-11-02 17:34:21 +0100 | [diff] [blame] | 4 | use Role::Tiny::With; |
| 5 | use Krawfish::Log; |
| 6 | |
| 7 | with 'Krawfish::Query'; |
| Akron | bcbe268 | 2017-02-05 13:05:55 +0100 | [diff] [blame] | 8 | |
| Akron | 0a29cd2 | 2017-02-06 10:58:02 +0100 | [diff] [blame] | 9 | use constant DEBUG => 0; |
| 10 | |
| Akron | a588d07 | 2017-10-13 14:45:34 +0200 | [diff] [blame] | 11 | # TODO: |
| 12 | # This should respect different tokenizations! |
| Akron | bcbe268 | 2017-02-05 13:05:55 +0100 | [diff] [blame] | 13 | |
| Akron | 0a29cd2 | 2017-02-06 10:58:02 +0100 | [diff] [blame] | 14 | # Constructor |
| Akron | bcbe268 | 2017-02-05 13:05:55 +0100 | [diff] [blame] | 15 | sub new { |
| 16 | my $class = shift; |
| 17 | bless { |
| Akron | 0a29cd2 | 2017-02-06 10:58:02 +0100 | [diff] [blame] | 18 | span => shift, |
| 19 | min => shift // 0, |
| 20 | max => shift, |
| 21 | tokens => shift, |
| 22 | current => undef |
| Akron | bcbe268 | 2017-02-05 13:05:55 +0100 | [diff] [blame] | 23 | }, $class; |
| 24 | }; |
| 25 | |
| Akron | a588d07 | 2017-10-13 14:45:34 +0200 | [diff] [blame] | 26 | |
| 27 | # Clone query |
| Akron | b765367 | 2017-08-07 14:34:14 +0200 | [diff] [blame] | 28 | sub clone { |
| 29 | my $self = shift; |
| 30 | __PACKAGE__->new( |
| 31 | $self->{span}->clone, |
| 32 | $self->{min}, |
| 33 | $self->{max}, |
| 34 | $self->{tokens}, |
| 35 | ); |
| 36 | }; |
| 37 | |
| Akron | a588d07 | 2017-10-13 14:45:34 +0200 | [diff] [blame] | 38 | |
| 39 | # Move to next posting |
| Akron | bcbe268 | 2017-02-05 13:05:55 +0100 | [diff] [blame] | 40 | sub next { |
| 41 | my $self = shift; |
| 42 | |
| 43 | my $span = $self->{span}; |
| 44 | |
| 45 | # Check if the length is between the given boundaries |
| 46 | while ($span->next) { |
| 47 | |
| Akron | 0a29cd2 | 2017-02-06 10:58:02 +0100 | [diff] [blame] | 48 | # Get current span |
| Akron | bcbe268 | 2017-02-05 13:05:55 +0100 | [diff] [blame] | 49 | my $current = $span->current; |
| 50 | |
| Akron | 0a29cd2 | 2017-02-06 10:58:02 +0100 | [diff] [blame] | 51 | my $length = $current->end - $current->start; |
| 52 | |
| 53 | print_log('length', "Check length $length") if DEBUG; |
| 54 | |
| 55 | # Max is given |
| 56 | if ($self->{max}) { |
| 57 | |
| 58 | # min and max are identical |
| 59 | if ($self->{min} == $self->{max} && $length == $self->{min}) { |
| 60 | |
| Akron | a588d07 | 2017-10-13 14:45:34 +0200 | [diff] [blame] | 61 | if (DEBUG) { |
| 62 | print_log( |
| 63 | 'length', |
| 64 | "! Length $length has the length " . $self->{min} |
| 65 | ); |
| 66 | }; |
| Akron | 0a29cd2 | 2017-02-06 10:58:02 +0100 | [diff] [blame] | 67 | |
| 68 | $self->{current} = $current; |
| 69 | return 1; |
| 70 | } |
| 71 | |
| 72 | # in min and max |
| 73 | elsif ($length >= $self->{min} && $length <= $self->{max}) { |
| 74 | |
| 75 | if (DEBUG) { |
| 76 | print_log( |
| 77 | 'length', |
| 78 | "! Length $length is between " . $self->{min} . '-' . $self->{max} |
| 79 | ); |
| 80 | }; |
| 81 | |
| 82 | $self->{current} = $current; |
| 83 | return 1; |
| 84 | }; |
| 85 | } |
| 86 | |
| 87 | # length >= min |
| 88 | elsif ($length > $self->{min}) { |
| 89 | |
| 90 | print_log('length', '! Length is larger than ' . $self->{min}) if DEBUG; |
| 91 | |
| Akron | bcbe268 | 2017-02-05 13:05:55 +0100 | [diff] [blame] | 92 | $self->{current} = $current; |
| 93 | return 1; |
| 94 | }; |
| 95 | }; |
| 96 | |
| 97 | $self->{current} = undef; |
| 98 | return 0; |
| 99 | }; |
| 100 | |
| 101 | |
| Akron | 0a29cd2 | 2017-02-06 10:58:02 +0100 | [diff] [blame] | 102 | # Get current posting |
| Akron | bcbe268 | 2017-02-05 13:05:55 +0100 | [diff] [blame] | 103 | sub current { |
| 104 | return $_[0]->{current}; |
| 105 | }; |
| 106 | |
| Akron | bcbe268 | 2017-02-05 13:05:55 +0100 | [diff] [blame] | 107 | |
| Akron | a588d07 | 2017-10-13 14:45:34 +0200 | [diff] [blame] | 108 | # Get maximum frequency |
| Akron | faf7685 | 2017-07-19 17:37:07 +0200 | [diff] [blame] | 109 | sub max_freq { |
| 110 | $_[0]->{span}->max_freq; |
| Akron | 0c998cc | 2017-07-19 03:29:37 +0200 | [diff] [blame] | 111 | }; |
| 112 | |
| Akron | 15fc197 | 2017-07-20 22:53:00 +0200 | [diff] [blame] | 113 | |
| Akron | a588d07 | 2017-10-13 14:45:34 +0200 | [diff] [blame] | 114 | # Filter query by VC |
| Akron | 15fc197 | 2017-07-20 22:53:00 +0200 | [diff] [blame] | 115 | sub filter_by { |
| 116 | my ($self, $corpus) = @_; |
| 117 | $self->{span} = $self->{span}->filter_by($corpus); |
| 118 | $self; |
| 119 | }; |
| 120 | |
| 121 | |
| Akron | 2bc94da | 2017-10-27 15:20:36 +0200 | [diff] [blame] | 122 | # Requires filter |
| 123 | sub requires_filter { |
| 124 | $_[0]->{span}->requires_filter; |
| 125 | }; |
| 126 | |
| 127 | |
| Akron | 0a29cd2 | 2017-02-06 10:58:02 +0100 | [diff] [blame] | 128 | # Stringification |
| Akron | bcbe268 | 2017-02-05 13:05:55 +0100 | [diff] [blame] | 129 | sub to_string { |
| 130 | my $self = shift; |
| 131 | my $str = 'length('; |
| 132 | $str .= $self->{min} . '-' . $self->{max}; |
| 133 | $str .= ';' . $self->{token} if $self->{token}; |
| 134 | $str .= ':' . $self->{span}->to_string; |
| 135 | return $str . ')'; |
| 136 | }; |
| 137 | |
| 138 | 1; |