Added span() corpus query method
Change-Id: Ie63f7efad70128111c1c33e15effb425afbc220d
diff --git a/lib/Krawfish/Corpus/Span.pm b/lib/Krawfish/Corpus/Span.pm
new file mode 100644
index 0000000..10cc0d2
--- /dev/null
+++ b/lib/Krawfish/Corpus/Span.pm
@@ -0,0 +1,64 @@
+package Krawfish::Corpus::Span;
+use parent 'Krawfish::Corpus';
+use strict;
+use warnings;
+
+# Search for intratextual features
+
+sub new {
+ my $class = shift;
+ bless {
+ query => shift,
+ _init => undef
+ }, $class;
+};
+
+
+# Clone query
+sub clone {
+ my $self = shift;
+ __PACKAGE__->new(
+ $self->{query}->clone
+ );
+};
+
+
+# Move to next document
+sub next {
+
+ my $self = shift;
+
+ unless ($self->{_init}) {
+ $self->{_init}++;
+ return $self->{query}->next;
+ };
+ return $self->{query}->next_doc;
+};
+
+sub current {
+ my $self = shift;
+ my $current = $self->{query}->current or return;
+ return Krawfish::Posting->new(
+ doc_id => $current->doc_id
+ );
+};
+
+
+# Skip to target document
+sub skip_to {
+ return $_[0]->skip_to($_[1]);
+};
+
+
+# stringification
+sub to_string {
+ 'span(' . $_[0]->{query}->to_string . ')'
+};
+
+
+# Return maximum frequency
+sub max_freq {
+ $_[0]->{query}->max_freq;
+};
+
+1;
diff --git a/lib/Krawfish/Koral/Corpus/AndNot.pm b/lib/Krawfish/Koral/Corpus/AndNot.pm
index b7f5e8f..c109530 100644
--- a/lib/Krawfish/Koral/Corpus/AndNot.pm
+++ b/lib/Krawfish/Koral/Corpus/AndNot.pm
@@ -48,7 +48,7 @@
my ($pos, $neg) = @{$self->operands};
if (DEBUG) {
- print_log('kq_andnot', 'Plan andnot') if DEBUG;
+ print_log('kq_andnot', 'Plan andnot');
};
# Get the positive query
diff --git a/lib/Krawfish/Koral/Corpus/Builder.pm b/lib/Krawfish/Koral/Corpus/Builder.pm
index 103e2f9..4ad3f13 100644
--- a/lib/Krawfish/Koral/Corpus/Builder.pm
+++ b/lib/Krawfish/Koral/Corpus/Builder.pm
@@ -5,6 +5,7 @@
use Krawfish::Koral::Corpus::Nowhere;
use Krawfish::Koral::Corpus::Cache;
use Krawfish::Koral::Corpus::AndNot;
+use Krawfish::Koral::Corpus::Span;
use strict;
use warnings;
@@ -13,6 +14,7 @@
bless \(my $self = ''), $class;
};
+
# Create 'and' group
sub bool_and {
shift;
@@ -71,6 +73,12 @@
};
+# Create span query
+sub span {
+ shift;
+ return Krawfish::Koral::Corpus::Span->new(@_);
+};
+
# Create 'date' field
# May be renamed to 'field_date'
sub date {
diff --git a/lib/Krawfish/Koral/Corpus/Span.pm b/lib/Krawfish/Koral/Corpus/Span.pm
new file mode 100644
index 0000000..065d806
--- /dev/null
+++ b/lib/Krawfish/Koral/Corpus/Span.pm
@@ -0,0 +1,154 @@
+package Krawfish::Koral::Corpus::Span;
+use parent 'Krawfish::Koral::Corpus';
+use Krawfish::Util::Constants ':PREFIX';
+use Krawfish::Query::Nowhere;
+use Krawfish::Corpus::Span;
+use strict;
+use warnings;
+
+use constant DEBUG => 0;
+
+sub new {
+ my $class = shift;
+ bless {
+ operands => [shift]
+ }, $class;
+};
+
+
+# Query type
+sub type {
+ 'corpusSpan';
+};
+
+
+# Toggle negativity if required
+sub toggle_negativity {
+ ...
+};
+
+
+# Span query is not a leaf
+sub is_leaf { 0 };
+
+
+# Normalize query
+sub normalize {
+ my $self = shift;
+
+
+ if (DEBUG) {
+ print_log('kq_c_span', 'Normalize span query') if DEBUG;
+ };
+
+ # Remove classes from operand (the can't be used)
+ my $span = $self->operand->remove_classes;
+
+ # Normalize operand
+ unless ($span = $self->operand->normalize) {
+
+ $self->copy_info_from($self->operand);
+ return;
+ };
+
+ # Deal with anywhere spans
+ if ($span->is_anywhere || $span->is_optional || $span->is_null) {
+ return $self->builder->anywhere;
+ };
+
+ # Finalize span query to ensure,
+ # There is no invalid extension
+ my $final;
+ unless ($final = $span->finalize) {
+
+ $self->copy_info_from($span);
+ return;
+ };
+
+ # Set operand
+ $self->{operand} = $final;
+
+ return $self;
+};
+
+
+# Optimize query
+sub optimize {
+ my ($self, $segment) = @_;
+
+ if (DEBUG) {
+ print_log('kq_c_span', 'Plan span corpus query');
+ };
+
+ # Optimize span against segment
+ my $span = $self->operand->optimize($segment);
+
+ # Can't match anywhere
+ if ($span->max_freq == 0) {
+ return Krawfish::Query::Nowhere->new;
+ };
+
+ # Return span query
+ return Krawfish::Corpus::Span->new(
+ $span
+ );
+};
+
+
+# The span query can't have classes
+sub has_classes {
+ 0;
+};
+
+
+# Check for negativity
+sub is_negative {
+ $_[0]->operand->is_negative;
+};
+
+
+# Toggle negativity
+sub toggle_negative {
+ ...
+};
+
+
+# Check if the query matches anywhere
+sub is_anywhere {
+ $_[0]->operand->is_anywhere
+};
+
+
+# Check if the query matches nowhere
+sub is_nowhere {
+ $_[0]->operand->is_nowhere
+};
+
+
+# Check if thew query is neglectable
+sub is_null {
+ $_[0]->operand->is_null;
+};
+
+
+# Stringify
+sub to_string {
+ my $self = shift;
+ return 'span(' . $self->operand->to_string . ')'
+};
+
+
+# Serialize to KoralQuery
+sub to_koral_fragment {
+ my $self = shift;
+ return {
+ '@type' => 'koral:query',
+ 'span' => $self->operand->to_koral_fragment
+ }
+};
+
+
+1;
+
+__END__
+
diff --git a/lib/Krawfish/Koral/Result/Match.pm b/lib/Krawfish/Koral/Result/Match.pm
index 40860d7..9ee930b 100644
--- a/lib/Krawfish/Koral/Result/Match.pm
+++ b/lib/Krawfish/Koral/Result/Match.pm
@@ -20,7 +20,7 @@
# sorting_criteria
# segment_id
# match_id
-
+# corpus flags
# Add an enrichment
sub add {
diff --git a/lib/Krawfish/Query.pm b/lib/Krawfish/Query.pm
index aada326..91a88f2 100644
--- a/lib/Krawfish/Query.pm
+++ b/lib/Krawfish/Query.pm
@@ -8,6 +8,10 @@
# Krawfish::Query is the base class for all span queries.
+# TODO:
+# Use a boolean init value to indicate a
+# query needs a next first
+
use constant DEBUG => 0;
# Current span object
@@ -36,7 +40,13 @@
# This is only relevant for term posting lists
sub next_doc {
my $self = shift;
- my $current_doc_id = $self->current->doc_id;
+
+ # TODO:
+ # There may be the need to
+ # have an _init value
+
+ my $current = $self->current or return;
+ my $current_doc_id = $current->doc_id;
if (DEBUG) {
print_log('query', refaddr($self) . ": go to next doc following $current_doc_id");