blob: 3ac987b7642241a1696cf1c3056b6e39e2fb9456 [file] [log] [blame]
Akronc5aef8b2017-09-25 19:20:21 +02001package Krawfish::Meta::Segment::Aggregate::TermFreq;
2use parent 'Krawfish::Meta::Segment::Aggregate::Base';
Akrond2469472017-06-28 13:28:10 +02003use Krawfish::Util::String qw/squote/;
4use Krawfish::Log;
5use strict;
6use warnings;
7
Akron79204af2017-08-15 12:32:51 +02008# Counts the frequency for each term in a TermFrequency
9# query. This is necessary for co-occurrence search and the
10# Glemm service.
11
12
Akron576ebfc2017-08-06 22:50:15 +020013# TODO:
14# This is rather a group query than an aggregation query.
15
Akrond2469472017-06-28 13:28:10 +020016use constant DEBUG => 0;
17
18sub new {
19 my $class = shift;
20 my $self = bless {
21 index => shift,
22 term_query => shift,
23 freq => 0
24 }, $class;
25
26 # The term never occurs
27 unless ($self->{term}->next) {
28 $self->{term_query} = undef;
29 };
30
31 return $self;
32};
33
34sub each_doc {
35 my ($self, $current) = @_;
36
37 return unless $self->{term_query};
38
39 # Get the current doc_id from the VC
40 my $doc_id = $current->doc_id;
41
42 my $term = $self->{term_query};
43
44 # Check, if the term occurs in the doc
45 if ($term->current->doc_id == $doc_id || $term->skip_doc($doc_id) == $doc_id) {
46
47 # Add frequency in document to result
48 $self->{freq} += $term->freq_in_doc;
49 };
50};
51
52
53# Finish the result
54sub on_finish {
55 my ($self, $result) = @_;
56
57 my $term = $self->{term_query}->term;
58 my $freq = ($result->{freq} //= {});
59 $frew->{$term} = $self->{freq};
60};
61
62# Stringification
63sub to_string {
64 return 'tfreq:' . squote($self->{term_query}->term);
65};
66
671;