| Akron | c1ed58c | 2017-08-04 17:26:30 +0200 | [diff] [blame] | 1 | package Krawfish::Koral::Meta::Node::Sort; |
| Akron | c5aef8b | 2017-09-25 19:20:21 +0200 | [diff] [blame^] | 2 | use Krawfish::Meta::Segment::Sort; |
| Akron | 5a5595b | 2017-09-10 13:00:57 +0200 | [diff] [blame] | 3 | use Krawfish::Query::Nowhere; |
| Akron | c1ed58c | 2017-08-04 17:26:30 +0200 | [diff] [blame] | 4 | use Krawfish::Log; |
| 5 | use strict; |
| 6 | use warnings; |
| 7 | |
| Akron | f703f6f | 2017-08-25 21:20:52 +0200 | [diff] [blame] | 8 | use constant ( |
| 9 | DEBUG => 1, |
| 10 | UNIQUE => 'id' |
| 11 | ); |
| Akron | c1ed58c | 2017-08-04 17:26:30 +0200 | [diff] [blame] | 12 | |
| 13 | sub new { |
| 14 | my $class = shift; |
| 15 | |
| 16 | if (DEBUG) { |
| Akron | f703f6f | 2017-08-25 21:20:52 +0200 | [diff] [blame] | 17 | print_log( |
| 18 | 'kq_n_sort', 'Create sort query with ' . |
| 19 | join(', ', map {$_ ? $_ : '?'} @_) |
| 20 | ); |
| Akron | c1ed58c | 2017-08-04 17:26:30 +0200 | [diff] [blame] | 21 | }; |
| 22 | |
| 23 | my $self = bless { |
| Akron | d15e2bb | 2017-08-11 18:23:14 +0200 | [diff] [blame] | 24 | query => shift, |
| 25 | sort => shift, |
| 26 | top_k => shift, |
| Akron | c1ed58c | 2017-08-04 17:26:30 +0200 | [diff] [blame] | 27 | filter => shift |
| 28 | }, $class; |
| 29 | }; |
| 30 | |
| 31 | |
| Akron | f703f6f | 2017-08-25 21:20:52 +0200 | [diff] [blame] | 32 | sub type { |
| 33 | 'sort'; |
| 34 | }; |
| 35 | |
| 36 | |
| Akron | c1ed58c | 2017-08-04 17:26:30 +0200 | [diff] [blame] | 37 | # Get identifiers |
| 38 | sub identify { |
| 39 | my ($self, $dict) = @_; |
| 40 | |
| 41 | my @identifier; |
| 42 | foreach (@{$self->{sort}}) { |
| 43 | |
| Akron | f703f6f | 2017-08-25 21:20:52 +0200 | [diff] [blame] | 44 | # Criterion may not exist in dictionary |
| 45 | my $criterion = $_->identify($dict); |
| 46 | if ($criterion) { |
| 47 | push @identifier, $criterion; |
| Akron | c1ed58c | 2017-08-04 17:26:30 +0200 | [diff] [blame] | 48 | }; |
| 49 | }; |
| 50 | |
| 51 | $self->{query} = $self->{query}->identify($dict); |
| 52 | |
| 53 | # Do not sort |
| 54 | if (@identifier == 0) { |
| 55 | warn 'There is currently no sorting defined'; |
| 56 | return $self->{query}; |
| 57 | }; |
| 58 | |
| 59 | $self->{sort} = \@identifier; |
| 60 | return $self; |
| 61 | }; |
| 62 | |
| 63 | |
| Akron | f703f6f | 2017-08-25 21:20:52 +0200 | [diff] [blame] | 64 | # Stringification |
| Akron | c1ed58c | 2017-08-04 17:26:30 +0200 | [diff] [blame] | 65 | sub to_string { |
| 66 | my $self = shift; |
| 67 | my $str = join(',', map { $_->to_string } @{$self->{sort}}); |
| 68 | |
| 69 | if ($self->{top_k}) { |
| 70 | $str .= ';k=' . $self->{top_k}; |
| 71 | }; |
| 72 | |
| 73 | if ($self->{filter}) { |
| 74 | $str .= ';sortFilter' |
| 75 | }; |
| 76 | |
| 77 | return 'sort(' . $str . ':' . $self->{query}->to_string . ')'; |
| 78 | }; |
| 79 | |
| 80 | |
| Akron | 45d3192 | 2017-09-15 17:05:36 +0200 | [diff] [blame] | 81 | # Optimize query for postingslist |
| Akron | d15e2bb | 2017-08-11 18:23:14 +0200 | [diff] [blame] | 82 | sub optimize { |
| 83 | my ($self, $segment) = @_; |
| 84 | |
| Akron | f703f6f | 2017-08-25 21:20:52 +0200 | [diff] [blame] | 85 | my $query = $self->{query}->optimize($segment); |
| Akron | d15e2bb | 2017-08-11 18:23:14 +0200 | [diff] [blame] | 86 | |
| Akron | f703f6f | 2017-08-25 21:20:52 +0200 | [diff] [blame] | 87 | if ($query->max_freq == 0) { |
| Akron | 5a5595b | 2017-09-10 13:00:57 +0200 | [diff] [blame] | 88 | return Krawfish::Query::Nowhere->new; |
| Akron | f703f6f | 2017-08-25 21:20:52 +0200 | [diff] [blame] | 89 | }; |
| 90 | |
| Akron | c5aef8b | 2017-09-25 19:20:21 +0200 | [diff] [blame^] | 91 | # Krawfish::Meta::Segment::Sort->new( |
| Akron | 30690fb | 2017-09-22 14:25:25 +0200 | [diff] [blame] | 92 | # query => $query, |
| 93 | # index => $segment, |
| 94 | # top_k => $top_k, |
| 95 | # ranks => |
| 96 | # unique => |
| 97 | # max_rank_ref => |
| 98 | # ) |
| 99 | |
| Akron | f703f6f | 2017-08-25 21:20:52 +0200 | [diff] [blame] | 100 | return $self; |
| Akron | d15e2bb | 2017-08-11 18:23:14 +0200 | [diff] [blame] | 101 | }; |
| 102 | |
| Akron | c1ed58c | 2017-08-04 17:26:30 +0200 | [diff] [blame] | 103 | 1; |