Fixed frequency reporting in queries
diff --git a/lib/Krawfish/Corpus/All.pm b/lib/Krawfish/Corpus/All.pm
index 6e14f56..9a05e36 100644
--- a/lib/Krawfish/Corpus/All.pm
+++ b/lib/Krawfish/Corpus/All.pm
@@ -34,7 +34,7 @@
return $self->{live}->next;
};
-sub freq {
+sub max_freq {
$_[0]->{live}->freq;
};
diff --git a/lib/Krawfish/Corpus/And.pm b/lib/Krawfish/Corpus/And.pm
index 705a0bd..c322960 100644
--- a/lib/Krawfish/Corpus/And.pm
+++ b/lib/Krawfish/Corpus/And.pm
@@ -1,5 +1,6 @@
package Krawfish::Corpus::And;
use parent 'Krawfish::Corpus';
+use List::Util qw/min/;
use Krawfish::Log;
use strict;
use warnings;
@@ -82,4 +83,12 @@
return 'and(' . $self->{first}->to_string . ',' . $self->{second}->to_string . ')';
};
+
+# The maximum frequency is the minimum of both query frequencies
+sub max_freq {
+ my $self = shift;
+ min($self->{first}->max_freq, $self->{second}->max_freq);
+};
+
+
1;
diff --git a/lib/Krawfish/Corpus/Cache.pm b/lib/Krawfish/Corpus/Cache.pm
index 0e095c1..c9a1746 100644
--- a/lib/Krawfish/Corpus/Cache.pm
+++ b/lib/Krawfish/Corpus/Cache.pm
@@ -104,4 +104,8 @@
return 'cache(' . $self->{span}->to_string . ')';
};
+sub max_freq {
+ $_[0]->{span}->max_freq;
+};
+
1;
diff --git a/lib/Krawfish/Corpus/Class.pm b/lib/Krawfish/Corpus/Class.pm
index d6948e6..e6f0aa8 100644
--- a/lib/Krawfish/Corpus/Class.pm
+++ b/lib/Krawfish/Corpus/Class.pm
@@ -84,4 +84,8 @@
return $str;
};
+sub max_freq {
+ $_[0]->{corpus}->max_freq;
+};
+
1;
diff --git a/lib/Krawfish/Corpus/DocVector.pm b/lib/Krawfish/Corpus/DocVector.pm
index 2885fe9..dd00054 100644
--- a/lib/Krawfish/Corpus/DocVector.pm
+++ b/lib/Krawfish/Corpus/DocVector.pm
@@ -29,7 +29,7 @@
sub current;
-sub freq;
+sub max_freq;
sub to_string;
diff --git a/lib/Krawfish/Corpus/Field.pm b/lib/Krawfish/Corpus/Field.pm
index 2f98958..aa11601 100644
--- a/lib/Krawfish/Corpus/Field.pm
+++ b/lib/Krawfish/Corpus/Field.pm
@@ -48,7 +48,7 @@
);
}
-sub freq {
+sub max_freq {
$_[0]->{postings}->freq;
};
diff --git a/lib/Krawfish/Corpus/Negation.pm b/lib/Krawfish/Corpus/Negation.pm
index 605722e..afbcb70 100644
--- a/lib/Krawfish/Corpus/Negation.pm
+++ b/lib/Krawfish/Corpus/Negation.pm
@@ -10,7 +10,6 @@
# TODO: Remove in favor of WithOut!
-
use constant DEBUG => 0;
# TODO: Support deleted docs
@@ -96,9 +95,9 @@
);
}
-sub freq {
+sub max_freq {
my $self = shift;
- $self->{last_doc_id} - $self->{query}->freq;
+ $self->{last_doc_id} - $self->{query}->max_freq;
};
sub to_string {
diff --git a/lib/Krawfish/Corpus/Or.pm b/lib/Krawfish/Corpus/Or.pm
index 845f381..62bcbcc 100644
--- a/lib/Krawfish/Corpus/Or.pm
+++ b/lib/Krawfish/Corpus/Or.pm
@@ -98,4 +98,11 @@
return 'or(' . $self->{first}->to_string . ',' . $self->{second}->to_string . ')';
};
+
+sub max_freq {
+ my $self = shift;
+ $self->{first}->max_freq + $self->{second}->max_freq;
+};
+
+
1;
diff --git a/lib/Krawfish/Corpus/OrWithFlags.pm b/lib/Krawfish/Corpus/OrWithFlags.pm
index b143d4f..bf905aa 100644
--- a/lib/Krawfish/Corpus/OrWithFlags.pm
+++ b/lib/Krawfish/Corpus/OrWithFlags.pm
@@ -35,6 +35,8 @@
...;
};
+sub max_freq;
+
1;
__END__
diff --git a/lib/Krawfish/Corpus/Without.pm b/lib/Krawfish/Corpus/Without.pm
index 1630560..d685cae 100644
--- a/lib/Krawfish/Corpus/Without.pm
+++ b/lib/Krawfish/Corpus/Without.pm
@@ -90,8 +90,8 @@
return 0;
};
-sub freq {
- $_[0]->{first}->freq;
+sub max_freq {
+ $_[0]->{first}->max_freq;
};
@@ -100,4 +100,5 @@
return 'andNot(' . $self->{first}->to_string . ',' . $self->{second}->to_string . ')';
};
+
1;
diff --git a/lib/Krawfish/Koral/Corpus/AndNot.pm b/lib/Krawfish/Koral/Corpus/AndNot.pm
index 02cf7d1..0297e6d 100644
--- a/lib/Krawfish/Koral/Corpus/AndNot.pm
+++ b/lib/Krawfish/Koral/Corpus/AndNot.pm
@@ -48,14 +48,14 @@
# Get the positive query
my $pos_query = $pos->optimize($index);
- if ($pos_query->freq == 0) {
+ if ($pos_query->max_freq == 0) {
return Krawfish::Query::Nothing->new;
};
# Get the negative query
my $neg_query = $neg->optimize($index);
- if ($neg_query->freq == 0) {
+ if ($neg_query->max_freq == 0) {
return $pos_query;
};
diff --git a/lib/Krawfish/Koral/Corpus/Field.pm b/lib/Krawfish/Koral/Corpus/Field.pm
index 2d4b965..ee9b425 100644
--- a/lib/Krawfish/Koral/Corpus/Field.pm
+++ b/lib/Krawfish/Koral/Corpus/Field.pm
@@ -134,7 +134,7 @@
$self->to_term
);
- if ($query->freq == 0) {
+ if ($query->max_freq == 0) {
return Krawfish::Query::Nothing->new;
};
diff --git a/lib/Krawfish/Koral/Corpus/FieldGroup.pm b/lib/Krawfish/Koral/Corpus/FieldGroup.pm
index 00ed249..329c9d9 100644
--- a/lib/Krawfish/Koral/Corpus/FieldGroup.pm
+++ b/lib/Krawfish/Koral/Corpus/FieldGroup.pm
@@ -125,7 +125,7 @@
$i++;
# Check unless
- while ($query->freq == 0 && $i < @$ops) {
+ while ($query->max_freq == 0 && $i < @$ops) {
$first = $ops->[$i++];
$query = $first->optimize($index);
$i++;
@@ -141,7 +141,7 @@
# TODO: Check for negation!
my $next = $ops->[$i]->optimize($index);
- if ($next->freq != 0) {
+ if ($next->max_freq != 0) {
# TODO: Distinguish here between classes and non-classes!
$query = Krawfish::Corpus::Or->new(
@@ -160,7 +160,7 @@
# Get query operation for next operand
my $next = $ops->[$i]->optimize($index);
- if ($next->freq != 0) {
+ if ($next->max_freq != 0) {
# TODO: Distinguish here between classes and non-classes!
$query = Krawfish::Corpus::And->new(
@@ -179,7 +179,7 @@
warn 'Should never happen!';
};
- if ($query->freq == 0) {
+ if ($query->max_freq == 0) {
return Krawfish::Query::Nothing->new;
};
diff --git a/lib/Krawfish/Koral/Query/Class.pm b/lib/Krawfish/Koral/Query/Class.pm
index e814a50..7f1dc7d 100644
--- a/lib/Krawfish/Koral/Query/Class.pm
+++ b/lib/Krawfish/Koral/Query/Class.pm
@@ -93,7 +93,7 @@
my $span = $self->operand->optimize($index);
# Span has no match
- if ($span->freq == 0) {
+ if ($span->max_freq == 0) {
return $self->builder->nothing;
};
diff --git a/lib/Krawfish/Koral/Query/Constraint/NotBetween.pm b/lib/Krawfish/Koral/Query/Constraint/NotBetween.pm
index 5ccfcd6..00e1d17 100644
--- a/lib/Krawfish/Koral/Query/Constraint/NotBetween.pm
+++ b/lib/Krawfish/Koral/Query/Constraint/NotBetween.pm
@@ -39,7 +39,7 @@
my $query = $self->{query}->optimize($index);
# Span has no match
- return if $query->freq == 0;
+ return if $query->max_freq == 0;
return Krawfish::Query::Constraint::NotBetween->new($query);
};
diff --git a/lib/Krawfish/Koral/Query/Constraints.pm b/lib/Krawfish/Koral/Query/Constraints.pm
index 6e33fc5..9454797 100644
--- a/lib/Krawfish/Koral/Query/Constraints.pm
+++ b/lib/Krawfish/Koral/Query/Constraints.pm
@@ -157,12 +157,12 @@
# Optimize operands
my $first = $self->{operands}->[0]->optimize($index);
- if ($first->freq == 0) {
+ if ($first->max_freq == 0) {
return Krawfish::Query::Nothing->new;
};
my $second = $self->{operands}->[1]->optimize($index);
- if ($second->freq == 0) {
+ if ($second->max_freq == 0) {
return Krawfish::Query::Nothing->new;
};
diff --git a/lib/Krawfish/Koral/Query/Exclusion.pm b/lib/Krawfish/Koral/Query/Exclusion.pm
index 226ffe7..f1d7583 100644
--- a/lib/Krawfish/Koral/Query/Exclusion.pm
+++ b/lib/Krawfish/Koral/Query/Exclusion.pm
@@ -125,7 +125,7 @@
my $second = $self->operands->[1]->optimize($index);
# Second object does not occur
- if ($second->freq == 0) {
+ if ($second->max_freq == 0) {
return $first;
};
diff --git a/lib/Krawfish/Koral/Query/Focus.pm b/lib/Krawfish/Koral/Query/Focus.pm
index 8adf045..9eec971 100644
--- a/lib/Krawfish/Koral/Query/Focus.pm
+++ b/lib/Krawfish/Koral/Query/Focus.pm
@@ -4,6 +4,11 @@
use strict;
use warnings;
+# TODO:
+# If span is maybe_unsorted, use a sorted focus,
+# otherwise an unsorted focus.
+
+
sub new {
my $class = shift;
bless {
@@ -16,8 +21,8 @@
$_[0]->{nrs};
};
-# If span is maybe_unsorted, use a sorted focus, otherwise an unsorted focus.
+# Optimize query to potentially need sorting
sub optimize {
my ($self, $index) = @_;
@@ -30,7 +35,7 @@
};
# Span has no match
- if ($span->freq == 0) {
+ if ($span->max_freq == 0) {
return $self->builder->nothing;
};
diff --git a/lib/Krawfish/Koral/Query/Length.pm b/lib/Krawfish/Koral/Query/Length.pm
index fcb7385..1721e5c 100644
--- a/lib/Krawfish/Koral/Query/Length.pm
+++ b/lib/Krawfish/Koral/Query/Length.pm
@@ -127,7 +127,7 @@
my $span = $self->operand->optimize($index);
# Nothing set
- if ($span->freq == 0) {
+ if ($span->max_freq == 0) {
return Krawfish::Query::Nothing->new;
};
diff --git a/lib/Krawfish/Koral/Query/Or.pm b/lib/Krawfish/Koral/Query/Or.pm
index e6fee7c..4eac728 100644
--- a/lib/Krawfish/Koral/Query/Or.pm
+++ b/lib/Krawfish/Koral/Query/Or.pm
@@ -41,7 +41,7 @@
$i++;
# Check to get a valid first query
- while ($query->freq == 0 && $i < @$ops) {
+ while ($query->max_freq == 0 && $i < @$ops) {
$first = $ops->[$i++];
$query = $first->optimize($index);
$i++;
@@ -52,7 +52,7 @@
# TODO: Check for negation!
my $next = $ops->[$i]->optimize($index);
- if ($next->freq != 0) {
+ if ($next->max_freq != 0) {
$query = Krawfish::Query::Or->new(
$query,
$next
@@ -60,7 +60,7 @@
};
};
- if ($query->freq == 0) {
+ if ($query->max_freq == 0) {
return Krawfish::Query::Nothing->new;
};
diff --git a/lib/Krawfish/Koral/Query/Repetition.pm b/lib/Krawfish/Koral/Query/Repetition.pm
index 6ef2791..0c9fc91 100644
--- a/lib/Krawfish/Koral/Query/Repetition.pm
+++ b/lib/Krawfish/Koral/Query/Repetition.pm
@@ -220,7 +220,7 @@
my $span = $self->operand->optimize($index);
# Span matches nowhere
- return $span if $span->freq == 0;
+ return $span if $span->max_freq == 0;
# Create repetition span
return Krawfish::Query::Repetition->new(
diff --git a/lib/Krawfish/Koral/Query/Term.pm b/lib/Krawfish/Koral/Query/Term.pm
index b72ca18..5ad06b7 100644
--- a/lib/Krawfish/Koral/Query/Term.pm
+++ b/lib/Krawfish/Koral/Query/Term.pm
@@ -396,7 +396,7 @@
print_log('kq_term', 'Filter serialization is ' . $filter->to_string) if DEBUG;
# Filter is empty
- return $self->builder->nothing if $filter->freq == 0;
+ return $self->builder->nothing if $filter->max_freq == 0;
return Krawfish::Query::Filter->new(
Krawfish::Query::Term->new($index, $self->to_term),
diff --git a/lib/Krawfish/Koral/Query/TermGroup.pm b/lib/Krawfish/Koral/Query/TermGroup.pm
index 2cd71c2..e96618e 100644
--- a/lib/Krawfish/Koral/Query/TermGroup.pm
+++ b/lib/Krawfish/Koral/Query/TermGroup.pm
@@ -158,7 +158,7 @@
$i++;
# Check unless
- while ($query->freq == 0 && $i < @$ops) {
+ while ($query->max_freq == 0 && $i < @$ops) {
$first = $ops->[$i++];
$query = $first->optimize($index);
$i++;
@@ -174,7 +174,7 @@
# TODO: Check for negation!
my $next = $ops->[$i]->optimize($index);
- if ($next->freq != 0) {
+ if ($next->max_freq != 0) {
# TODO: Distinguish here between classes and non-classes!
$query = Krawfish::Query::Or->new(
@@ -194,7 +194,7 @@
# Get query operation for next operand
my $next = $ops->[$i]->optimize($index);
- if ($next->freq != 0) {
+ if ($next->max_freq != 0) {
# TODO: Distinguish here between classes and non-classes!
$query = Krawfish::Query::Constraints->new(
@@ -214,7 +214,7 @@
warn 'Should never happen!';
};
- if ($query->freq == 0) {
+ if ($query->max_freq == 0) {
return Krawfish::Query::Nothing->new;
};
diff --git a/lib/Krawfish/Koral/Query/Unique.pm b/lib/Krawfish/Koral/Query/Unique.pm
index dbbb51d..bb0d1b9 100644
--- a/lib/Krawfish/Koral/Query/Unique.pm
+++ b/lib/Krawfish/Koral/Query/Unique.pm
@@ -38,7 +38,7 @@
my $span = $self->operand->optimize($index) or return;
- if ($span->freq == 0) {
+ if ($span->max_freq == 0) {
return $self->builder->nothing;
};
diff --git a/lib/Krawfish/Koral/Util/Sequential.pm b/lib/Krawfish/Koral/Util/Sequential.pm
index 6df03d6..359f298 100644
--- a/lib/Krawfish/Koral/Util/Sequential.pm
+++ b/lib/Krawfish/Koral/Util/Sequential.pm
@@ -208,7 +208,7 @@
my $query = $ops->[$i]->optimize($index);
# Get frequency of operand
- my $freq = $query->freq;
+ my $freq = $query->max_freq;
if (DEBUG) {
print_log('kq_sequtil', 'Get frequencies for possible anchor ' . $query->to_string);
@@ -218,7 +218,7 @@
return Krawfish::Query::Nothing->new if $freq == 0;
# Current query is less common
- if (!defined $filterable_query || $freq < $queries[$filterable_query]->freq) {
+ if (!defined $filterable_query || $freq < $queries[$filterable_query]->max_freq) {
$filterable_query = $_;
};
$queries[$i] = [POS, $freq, $query, $ops->[$i]];
@@ -401,7 +401,7 @@
);
# Set new query
- $queries->[$index_a] = [POS, $new_query->freq, $new_query];
+ $queries->[$index_a] = [POS, $new_query->max_freq, $new_query];
# Remove old query
splice(@$queries, $surr_i, 1);
@@ -478,7 +478,7 @@
}
# Set new query
- $queries->[$index_a] = [POS, $new_query->freq, $new_query];
+ $queries->[$index_a] = [POS, $new_query->max_freq, $new_query];
# Remove old query
splice(@$queries, $index_b, 1);
@@ -545,7 +545,7 @@
# Set new query
$queries->[$index_a < $index_b ? $index_a : $index_b] =
- [POS, $new_query->freq, $new_query];
+ [POS, $new_query->max_freq, $new_query];
# Remove old query
splice(@$queries, $index_between, 2);
@@ -596,7 +596,7 @@
# Negative operand can't occur - rewrite to any query, but
# keep quantities intact (i.e. <!s> can have different length than [!a])
- if ($neg->freq == 0) {
+ if ($neg->max_freq == 0) {
if (DEBUG) {
print_log('kq_sequtil', 'Negative query ' . $query->to_string . ' never occurs');
};
@@ -628,7 +628,7 @@
# Set new query
$queries->[$index_a < $index_b ? $index_a : $index_b] =
- [POS, $new_query->freq, $new_query];
+ [POS, $new_query->max_freq, $new_query];
# Remove old query
splice(@$queries, $index_between, 2);
@@ -654,7 +654,7 @@
# Optimize
$opt = $opt->finalize->optimize($index);
$queries->[$index_between]->[QUERY] = $opt;
- $queries->[$index_between]->[FREQ] = $opt->freq;
+ $queries->[$index_between]->[FREQ] = $opt->max_freq;
my $constraint = {};
@@ -663,7 +663,7 @@
my $query_b = $queries->[$index_b];
# One element matches nowhere - the whole sequence matches nowhere
- if ($opt->freq == 0) {
+ if ($opt->max_freq == 0) {
if (DEBUG) {
print_log(
@@ -684,7 +684,7 @@
}
# Set new query
- $queries->[$index_a] = [POS, $new_query->freq, $new_query];
+ $queries->[$index_a] = [POS, $new_query->max_freq, $new_query];
# Remove old query
splice(@$queries, $index_between, 2);
@@ -741,7 +741,7 @@
# Set new query
$queries->[$index_a < $index_b ? $index_a : $index_b] =
- [POS, $new_query->freq, $new_query];
+ [POS, $new_query->max_freq, $new_query];
# Remove old query
splice(@$queries, $index_between, 2);
@@ -780,7 +780,7 @@
# Optimize both surroundings
unless ($surr_l_query->[QUERY]) {
$surr_l_query->[QUERY] = $surr_l_query->[KQUERY]->finalize->optimize($index);
- $surr_l_query->[FREQ] = $surr_l_query->[QUERY]->freq;
+ $surr_l_query->[FREQ] = $surr_l_query->[QUERY]->max_freq;
if (DEBUG) {
print_log('kq_sequtil', 'Optimize query ' . $surr_l_query->[KQUERY]->to_string);
};
@@ -791,7 +791,7 @@
unless ($surr_r_query->[QUERY]) {
$surr_r_query->[QUERY] = $surr_r_query->[KQUERY]->finalize->optimize($index);
- $surr_r_query->[FREQ] = $surr_r_query->[QUERY]->freq;
+ $surr_r_query->[FREQ] = $surr_r_query->[QUERY]->max_freq;
if (DEBUG) {
print_log('kq_sequtil', 'Optimize query ' . $surr_r_query->[KQUERY]->to_string);
};
@@ -823,7 +823,7 @@
# Optimize right surrounding
unless ($surr_r_query->[QUERY]) {
$surr_r_query->[QUERY] = $surr_r_query->[KQUERY]->finalize->optimize($index);
- $surr_r_query->[FREQ] = $surr_r_query->[QUERY]->freq;
+ $surr_r_query->[FREQ] = $surr_r_query->[QUERY]->max_freq;
if (DEBUG) {
print_log('kq_sequtil', 'Optimize query ' . $surr_r_query->[KQUERY]->to_string);
};
@@ -887,7 +887,7 @@
$new_query = _or($query_a->[KQUERY]->optimize($index), $new_query);
# Add new query
- $queries->[$index_a] = [POS, $new_query->freq, $new_query];
+ $queries->[$index_a] = [POS, $new_query->max_freq, $new_query];
# Remove old query
splice(@$queries, $index_ext, 1);
@@ -1133,7 +1133,7 @@
my $next_i = shift @consecutives;
# Create a precedes directly
- if ($query->freq <= $queries[$next_i]->freq) {
+ if ($query->max_freq <= $queries[$next_i]->max_freq) {
print_log(
'kq_sequtil',
diff --git a/lib/Krawfish/Query.pm b/lib/Krawfish/Query.pm
index 460c814..95fba7f 100644
--- a/lib/Krawfish/Query.pm
+++ b/lib/Krawfish/Query.pm
@@ -1,5 +1,6 @@
package Krawfish::Query;
use Krawfish::Log;
+use Scalar::Util qw/blessed/;
use strict;
use warnings;
@@ -128,8 +129,10 @@
# return doc;
# }
-sub freq {
- -1;
+
+# The maximum possible frequency of the query
+sub max_freq {
+ warn 'Not implemented for this query: ' . blessed $_[0];
};
diff --git a/lib/Krawfish/Query/Base/Single.pm b/lib/Krawfish/Query/Base/Single.pm
deleted file mode 100644
index e69de29..0000000
--- a/lib/Krawfish/Query/Base/Single.pm
+++ /dev/null
diff --git a/lib/Krawfish/Query/Cache.pm b/lib/Krawfish/Query/Cache.pm
index 31664ed..5f5c4b1 100644
--- a/lib/Krawfish/Query/Cache.pm
+++ b/lib/Krawfish/Query/Cache.pm
@@ -19,4 +19,6 @@
# so sorting with offstes is supported
sub next;
+sub max_freq;
+
1;
diff --git a/lib/Krawfish/Query/Class.pm b/lib/Krawfish/Query/Class.pm
index 9df6c66..6ac6652 100644
--- a/lib/Krawfish/Query/Class.pm
+++ b/lib/Krawfish/Query/Class.pm
@@ -43,8 +43,8 @@
};
-sub freq {
- $_[0]->{span}->freq;
+sub max_freq {
+ $_[0]->{span}->max_freq;
};
diff --git a/lib/Krawfish/Query/Constraints.pm b/lib/Krawfish/Query/Constraints.pm
index b96faec..b9f46db 100644
--- a/lib/Krawfish/Query/Constraints.pm
+++ b/lib/Krawfish/Query/Constraints.pm
@@ -86,11 +86,10 @@
};
-# The frequency is the minimum of both query frequencies
-# Maybe 'cost' is the better term
-sub freq {
+# The maximum frequency is the minimum of both query frequencies
+sub max_freq {
my $self = shift;
- min($self->{first}->freq, $self->{second}->freq);
+ min($self->{first}->max_freq, $self->{second}->max_freq);
};
diff --git a/lib/Krawfish/Query/Exclusion.pm b/lib/Krawfish/Query/Exclusion.pm
index 07bb3b7..b3d3b9f 100644
--- a/lib/Krawfish/Query/Exclusion.pm
+++ b/lib/Krawfish/Query/Exclusion.pm
@@ -45,7 +45,7 @@
buffer => Krawfish::Util::Buffer->new,
}, $class;
- # TODO: Return 'first', if second->freq == 0
+ # TODO: Return 'first', if second->max_freq == 0
};
@@ -136,9 +136,8 @@
};
-sub freq {
- my $self = shift;
- $self->{first}->freq;
+sub max_freq {
+ $_[0]->{first}->max_freq;
};
1;
diff --git a/lib/Krawfish/Query/Extension.pm b/lib/Krawfish/Query/Extension.pm
index d9d5232..f2acc81 100644
--- a/lib/Krawfish/Query/Extension.pm
+++ b/lib/Krawfish/Query/Extension.pm
@@ -36,7 +36,7 @@
-sub freq {
+sub max_freq {
# TODO:
...
};
diff --git a/lib/Krawfish/Query/Filter.pm b/lib/Krawfish/Query/Filter.pm
index 43e3cc3..278bdbd 100644
--- a/lib/Krawfish/Query/Filter.pm
+++ b/lib/Krawfish/Query/Filter.pm
@@ -77,8 +77,8 @@
};
-# Get the frequency of the term in a corpus
-sub freq {
+# Get the maximim frequency of the term in a corpus
+sub max_freq {
my $self = shift;
my $freq = 0;
diff --git a/lib/Krawfish/Query/Length.pm b/lib/Krawfish/Query/Length.pm
index 8e88568..aa5cf28 100644
--- a/lib/Krawfish/Query/Length.pm
+++ b/lib/Krawfish/Query/Length.pm
@@ -83,8 +83,8 @@
};
-sub freq {
- $_[0]->{span}->freq;
+sub max_freq {
+ $_[0]->{span}->max_freq;
};
# Stringification
diff --git a/lib/Krawfish/Query/Nothing.pm b/lib/Krawfish/Query/Nothing.pm
index b59bc99..0a04d21 100644
--- a/lib/Krawfish/Query/Nothing.pm
+++ b/lib/Krawfish/Query/Nothing.pm
@@ -22,7 +22,7 @@
return;
};
-sub freq {
+sub max_freq {
0
};
diff --git a/lib/Krawfish/Query/Or.pm b/lib/Krawfish/Query/Or.pm
index 783cea9..2bcb1f8 100644
--- a/lib/Krawfish/Query/Or.pm
+++ b/lib/Krawfish/Query/Or.pm
@@ -112,14 +112,14 @@
};
-sub freq {
+sub max_freq {
my $self = shift;
- if ($self->{first}->freq == -1 || $self->{second}->freq == -1) {
+ if ($self->{first}->max_freq == -1 || $self->{second}->max_freq == -1) {
return -1;
}
else {
- return $self->{first}->freq + $self->{second}->freq;
+ return $self->{first}->max_freq + $self->{second}->max_freq;
};
};
diff --git a/lib/Krawfish/Query/Reference.pm b/lib/Krawfish/Query/Reference.pm
index 9b7bc85..d791e9a 100644
--- a/lib/Krawfish/Query/Reference.pm
+++ b/lib/Krawfish/Query/Reference.pm
@@ -44,4 +44,6 @@
my $self = shift;
};
+sub max_freq;
+
1;
diff --git a/lib/Krawfish/Query/Repetition.pm b/lib/Krawfish/Query/Repetition.pm
index 499a370..7502d51 100644
--- a/lib/Krawfish/Query/Repetition.pm
+++ b/lib/Krawfish/Query/Repetition.pm
@@ -175,8 +175,13 @@
};
-sub freq {
- ...
+# The maximum frequency is based on the occurrence of the span,
+# multiplied by the difference of min and max values, so
+# freq([a]{3}) == freq([a])
+# freq([a]{1,2}) == freq([a])*2
+sub max_freq {
+ my $self = shift;
+ $self->{span}->max_freq * ($self->{max} - $self->{min} + 1)
};
1;
diff --git a/lib/Krawfish/Query/Span.pm b/lib/Krawfish/Query/Span.pm
index 49fdd30..c5683ea 100644
--- a/lib/Krawfish/Query/Span.pm
+++ b/lib/Krawfish/Query/Span.pm
@@ -21,5 +21,4 @@
);
};
-
1;
diff --git a/lib/Krawfish/Query/Term.pm b/lib/Krawfish/Query/Term.pm
index 86734ca..dac0b2d 100644
--- a/lib/Krawfish/Query/Term.pm
+++ b/lib/Krawfish/Query/Term.pm
@@ -54,7 +54,7 @@
);
};
-sub freq {
+sub max_freq {
$_[0]->{postings}->freq;
};
diff --git a/lib/Krawfish/Query/Unique.pm b/lib/Krawfish/Query/Unique.pm
index f7e4f29..113e73f 100644
--- a/lib/Krawfish/Query/Unique.pm
+++ b/lib/Krawfish/Query/Unique.pm
@@ -50,8 +50,8 @@
};
-sub freq {
- $_[0]->{span}->freq;
+sub max_freq {
+ $_[0]->{span}->max_freq;
};
1;