| Akron | c5aef8b | 2017-09-25 19:20:21 +0200 | [diff] [blame] | 1 | package Krawfish::Meta::Segment::Aggregate::TermFreq; |
| 2 | use parent 'Krawfish::Meta::Segment::Aggregate::Base'; |
| Akron | d246947 | 2017-06-28 13:28:10 +0200 | [diff] [blame] | 3 | use Krawfish::Util::String qw/squote/; |
| 4 | use Krawfish::Log; |
| 5 | use strict; |
| 6 | use warnings; |
| 7 | |
| Akron | 79204af | 2017-08-15 12:32:51 +0200 | [diff] [blame] | 8 | # Counts the frequency for each term in a TermFrequency |
| 9 | # query. This is necessary for co-occurrence search and the |
| 10 | # Glemm service. |
| 11 | |
| 12 | |
| Akron | 576ebfc | 2017-08-06 22:50:15 +0200 | [diff] [blame] | 13 | # TODO: |
| 14 | # This is rather a group query than an aggregation query. |
| 15 | |
| Akron | d246947 | 2017-06-28 13:28:10 +0200 | [diff] [blame] | 16 | use constant DEBUG => 0; |
| 17 | |
| 18 | sub new { |
| 19 | my $class = shift; |
| 20 | my $self = bless { |
| 21 | index => shift, |
| 22 | term_query => shift, |
| 23 | freq => 0 |
| 24 | }, $class; |
| 25 | |
| 26 | # The term never occurs |
| 27 | unless ($self->{term}->next) { |
| 28 | $self->{term_query} = undef; |
| 29 | }; |
| 30 | |
| 31 | return $self; |
| 32 | }; |
| 33 | |
| 34 | sub each_doc { |
| 35 | my ($self, $current) = @_; |
| 36 | |
| 37 | return unless $self->{term_query}; |
| 38 | |
| 39 | # Get the current doc_id from the VC |
| 40 | my $doc_id = $current->doc_id; |
| 41 | |
| 42 | my $term = $self->{term_query}; |
| 43 | |
| 44 | # Check, if the term occurs in the doc |
| 45 | if ($term->current->doc_id == $doc_id || $term->skip_doc($doc_id) == $doc_id) { |
| 46 | |
| 47 | # Add frequency in document to result |
| 48 | $self->{freq} += $term->freq_in_doc; |
| 49 | }; |
| 50 | }; |
| 51 | |
| 52 | |
| 53 | # Finish the result |
| 54 | sub on_finish { |
| 55 | my ($self, $result) = @_; |
| 56 | |
| 57 | my $term = $self->{term_query}->term; |
| 58 | my $freq = ($result->{freq} //= {}); |
| 59 | $frew->{$term} = $self->{freq}; |
| 60 | }; |
| 61 | |
| 62 | # Stringification |
| 63 | sub to_string { |
| 64 | return 'tfreq:' . squote($self->{term_query}->term); |
| 65 | }; |
| 66 | |
| 67 | 1; |