Return NOMOREDOCS constant on skip_doc
Change-Id: I65a9511c29cbac342f00f0dda0cb2206215ddfee
diff --git a/lib/Krawfish/Corpus.pm b/lib/Krawfish/Corpus.pm
index 54f41e0..6b7d7f9 100644
--- a/lib/Krawfish/Corpus.pm
+++ b/lib/Krawfish/Corpus.pm
@@ -1,6 +1,7 @@
package Krawfish::Corpus;
use strict;
use warnings;
+use Krawfish::Util::Constants qw/NOMOREDOCS/;
use Role::Tiny;
use Krawfish::Log;
@@ -44,12 +45,9 @@
print_log('corpus', refaddr($self) . ': skip to doc id ' . $target_doc_id) if DEBUG;
while (!$self->current || $self->current->doc_id < $target_doc_id) {
- $self->next_doc or return;
+ $self->next_doc or return NOMOREDOCS;
};
- # TODO:
- # Return NOMORE in case no more
- # documents exist
return $self->current->doc_id;
};
@@ -69,14 +67,18 @@
# Forward the first span to advance to the document of the second span
if ($first_c->doc_id < $second_c->doc_id) {
print_log('corpus', 'Forward first') if DEBUG;
- $self->skip_doc($second_c->doc_id) or return;
+ if ($self->skip_doc($second_c->doc_id) == NOMOREDOCS) {
+ return;
+ };
$first_c = $self->current;
}
# Forward the second span to advance to the document of the first span
else {
print_log('corpus', 'Forward second') if DEBUG;
- $second->skip_doc($first_c->doc_id) or return;
+ if ($second->skip_doc($first_c->doc_id) == NOMOREDOCS) {
+ return;
+ };
$second_c = $second->current;
};
};
diff --git a/lib/Krawfish/Corpus/And.pm b/lib/Krawfish/Corpus/And.pm
index bf5542d..726145d 100644
--- a/lib/Krawfish/Corpus/And.pm
+++ b/lib/Krawfish/Corpus/And.pm
@@ -2,6 +2,7 @@
use strict;
use warnings;
use Role::Tiny::With;
+use Krawfish::Util::Constants qw/NOMOREDOCS/;
use List::Util qw/min/;
use Scalar::Util qw/refaddr/;
use Krawfish::Log;
@@ -105,7 +106,7 @@
);
};
- unless (defined $self->{first}->skip_doc($second->doc_id)) {
+ if ($self->{first}->skip_doc($second->doc_id) == NOMOREDOCS) {
$self->{doc_id} = undef;
return;
}
diff --git a/lib/Krawfish/Corpus/Cache.pm b/lib/Krawfish/Corpus/Cache.pm
index 556f43c..a1fa528 100644
--- a/lib/Krawfish/Corpus/Cache.pm
+++ b/lib/Krawfish/Corpus/Cache.pm
@@ -4,6 +4,7 @@
use Role::Tiny;
use Krawfish::Index::Stream;
use Krawfish::Cache;
+use Krawfish::Util::Constants qw/NOMOREDOCS/;
with 'Krawfish::Corpus';
@@ -30,6 +31,9 @@
# be indexed next time
# - the signature of the koralquery can be used for caching
+# TODO:
+# - support corpus classes
+
# A cache may not necessarily be invalidated.
# It may be filtered using the live document vector (so it is
# not necessary to invalidate all caches on an updated)
@@ -105,7 +109,7 @@
return $self->{doc_id} if $doc_id >= $self->{doc_id};
};
- return;
+ return NOMOREDOCS;
};
diff --git a/lib/Krawfish/Corpus/Class.pm b/lib/Krawfish/Corpus/Class.pm
index 2ef7363..265f18b 100644
--- a/lib/Krawfish/Corpus/Class.pm
+++ b/lib/Krawfish/Corpus/Class.pm
@@ -2,6 +2,7 @@
use strict;
use warnings;
use Role::Tiny;
+use Krawfish::Util::Constants qw/NOMOREDOCS/;
use Krawfish::Util::Bits qw/classes_to_flags bitstring/;
use Krawfish::Log;
@@ -111,14 +112,14 @@
# Skip to target document
sub skip_doc {
my $self = shift;
- if ($self->{corpus}->skip_doc(shift)) {
+ if ($self->{corpus}->skip_doc(shift) != NOMOREDOCS) {
my $current = $self->{corpus}->current;
$self->{doc_id} = $current->doc_id;
$self->{flags} = $current->flags | $self->{flag};
return $self->{doc_id};
};
$self->{doc_id} = undef;
- return;
+ return NOMOREDOCS;
};
diff --git a/lib/Krawfish/Corpus/Distribution.pm b/lib/Krawfish/Corpus/Distribution.pm
index 6120a1a..4b84c32 100644
--- a/lib/Krawfish/Corpus/Distribution.pm
+++ b/lib/Krawfish/Corpus/Distribution.pm
@@ -25,7 +25,10 @@
#
# However - in case skips are not available,
# this may be slow ...
-#
+
+# TODO:
+# Support corpus classes
+
# TODO:
# Another distribution variant would be
# distr(doc[1:1]: 'author:Goethe', 'author:Schiller')
diff --git a/lib/Krawfish/Index/Dictionary.pm b/lib/Krawfish/Index/Dictionary.pm
index 88f5cba..756d8b8 100644
--- a/lib/Krawfish/Index/Dictionary.pm
+++ b/lib/Krawfish/Index/Dictionary.pm
@@ -469,6 +469,8 @@
sub term_ids {
my ($self, $re) = @_;
+ # In a golang port, this should use https://golang.org/pkg/regexp/syntax/
+
if ($re) {
my $hash = $self->{hash};
diff --git a/lib/Krawfish/Index/Fields/Pointer.pm b/lib/Krawfish/Index/Fields/Pointer.pm
index f01b53d..212bae4 100644
--- a/lib/Krawfish/Index/Fields/Pointer.pm
+++ b/lib/Krawfish/Index/Fields/Pointer.pm
@@ -2,6 +2,7 @@
use Krawfish::Koral::Document::FieldInt;
use Krawfish::Koral::Document::FieldStore;
use Krawfish::Koral::Document::FieldString;
+use Krawfish::Util::Constants qw/NOMOREDOCS/;
use Krawfish::Log;
use warnings;
use strict;
@@ -87,7 +88,8 @@
$self->{pos} = 0;
return $doc_id;
};
- return -1;
+
+ return NOMOREDOCS;
};
diff --git a/lib/Krawfish/Index/Forward/Pointer.pm b/lib/Krawfish/Index/Forward/Pointer.pm
index 2e7cd22..8d77dae 100644
--- a/lib/Krawfish/Index/Forward/Pointer.pm
+++ b/lib/Krawfish/Index/Forward/Pointer.pm
@@ -1,6 +1,7 @@
package Krawfish::Index::Forward::Pointer;
use Krawfish::Posting::Forward;
use Krawfish::Log;
+use Krawfish::Util::Constants qw/NOMOREDOCS/;
use warnings;
use strict;
@@ -10,8 +11,7 @@
# This currently is not combined with live documents per default
use constant {
- DEBUG => 0,
- NOMORE => -1
+ DEBUG => 0
};
# API:
@@ -123,7 +123,7 @@
return $target_doc_id;
};
- return NOMORE;
+ return NOMOREDOCS;
};
diff --git a/lib/Krawfish/Index/PostingLivePointer.pm b/lib/Krawfish/Index/PostingLivePointer.pm
index 196da53..908d658 100644
--- a/lib/Krawfish/Index/PostingLivePointer.pm
+++ b/lib/Krawfish/Index/PostingLivePointer.pm
@@ -1,5 +1,6 @@
package Krawfish::Index::PostingLivePointer;
use parent 'Krawfish::Query';
+use Krawfish::Util::Constants qw/NOMOREDOCS/;
use Krawfish::Log;
use strict;
use warnings;
@@ -119,9 +120,23 @@
sub skip_doc {
my ($self, $target_doc_id) = @_;
- if ($target_doc_id >= $self->{next_doc_id} || $target_doc_id < $self->{doc_id}) {
- $self->{doc_id} = $self->{next_doc_id};
- return;
+ if (DEBUG) {
+ print_log(
+ 'live_p',
+ "Skip live pointer to $target_doc_id vs " . $self->{next_doc_id}
+ );
+ };
+
+ # Target exceeds doc length
+ if ($target_doc_id >= $self->{next_doc_id}) {
+ $self->{doc_id} = undef;
+ return NOMOREDOCS;
+ };
+
+ # Target is before current position
+ if ($target_doc_id < $self->{doc_id}) {
+ # $self->{doc_id} = $self->{next_doc_id};
+ return NOMOREDOCS; # $self->{doc_id};
};
my $list = $self->{list_copy};
@@ -140,8 +155,10 @@
$self->{pos}++;
};
- # TODO: Can this happen?
- return if $target_doc_id >= $self->{next_doc_id};
+ # No more documents
+ if ($target_doc_id >= $self->{next_doc_id}) {
+ return NOMOREDOCS;
+ };
# Set document id
return $self->{doc_id} = $target_doc_id;
diff --git a/lib/Krawfish/Index/PostingPointer.pm b/lib/Krawfish/Index/PostingPointer.pm
index 7b5939e..94be223 100644
--- a/lib/Krawfish/Index/PostingPointer.pm
+++ b/lib/Krawfish/Index/PostingPointer.pm
@@ -2,6 +2,7 @@
use parent 'Krawfish::Query';
use Krawfish::Log;
use Krawfish::Posting::Data;
+use Krawfish::Util::Constants qw/NOMOREDOCS/;
use Krawfish::Posting;
use Scalar::Util qw/refaddr/;
use strict;
@@ -125,13 +126,12 @@
sub skip_doc {
my ($self, $target_doc_id) = @_;
- # TODO:
- # Return NOMORE in case there are no more postings.
-
- print_log('ppointer', refaddr($self) . ': TEMP SLOW Skip to chosen document') if DEBUG;
+ if (DEBUG) {
+ print_log('ppointer', refaddr($self) . ': TEMP SLOW Skip to chosen document');
+ };
while (!$self->current || $self->current->doc_id < $target_doc_id) {
- $self->next or return;
+ $self->next or return NOMOREDOCS;
};
return $self->current->doc_id;
diff --git a/lib/Krawfish/Koral/Query/Term.pm b/lib/Krawfish/Koral/Query/Term.pm
index 4aeaf5f..2426335 100644
--- a/lib/Krawfish/Koral/Query/Term.pm
+++ b/lib/Krawfish/Koral/Query/Term.pm
@@ -358,6 +358,7 @@
sub identify {
my ($self, $dict) = @_;
+ # Term is no regular expression
unless ($self->is_regex) {
my $term = $self->to_term;
diff --git a/lib/Krawfish/Query/Nowhere.pm b/lib/Krawfish/Query/Nowhere.pm
index 5ab9c5f..89fb1f2 100644
--- a/lib/Krawfish/Query/Nowhere.pm
+++ b/lib/Krawfish/Query/Nowhere.pm
@@ -1,6 +1,7 @@
package Krawfish::Query::Nowhere;
use strict;
use warnings;
+use Krawfish::Util::Constants qw/NOMOREDOCS/;
use Role::Tiny::With;
with 'Krawfish::Query';
@@ -33,7 +34,7 @@
# Skip to target document (invalid)
sub skip_doc {
- return;
+ return NOMOREDOCS;
};
diff --git a/lib/Krawfish/Util/Constants.pm b/lib/Krawfish/Util/Constants.pm
index 636fa0e..2714767 100644
--- a/lib/Krawfish/Util/Constants.pm
+++ b/lib/Krawfish/Util/Constants.pm
@@ -14,7 +14,8 @@
ATTR_PREF => '@',
REL_L_PREF => '>',
REL_R_PREF => '<',
- PTI_CLASS => 0 # Payload identifier for classes
+ PTI_CLASS => 0, # Payload identifier for classes
+ NOMOREDOCS => 4_294_967_295 # (maximum value for 32 bit)
};
our $ANNO_PREFIX_RE = qr/(?:\:|\-|\@|\>|\<)/;
@@ -30,7 +31,8 @@
REL_L_PREF
REL_R_PREF
PTI_CLASS
- $ANNO_PREFIX_RE/);
+ $ANNO_PREFIX_RE
+ NOMOREDOCS/);
our %EXPORT_TAGS = (
PREFIX => [qw/KEY_PREF
diff --git a/t/index/postings_live.t b/t/index/postings_live.t
index 7940124..54722b9 100644
--- a/t/index/postings_live.t
+++ b/t/index/postings_live.t
@@ -2,6 +2,7 @@
use warnings;
use Test::More;
use Test::Krawfish;
+use Krawfish::Util::Constants qw/NOMOREDOCS/;
use Krawfish::Index::PostingsLive;
@@ -62,12 +63,13 @@
is($p2->skip_doc(9), 9, 'Skipped to 9');
is($p2->doc_id, 9, 'Get doc_id');
-ok(!$p2->skip_doc(10), 'Skipped to 9');
-is($p2->doc_id, 10, 'Get doc_id');
-ok(!$p2->skip_doc(11), 'Skipped to 9');
-is($p2->doc_id, 10, 'Get doc_id');
-ok(!$p2->skip_doc(12), 'Skipped to 9');
-is($p2->doc_id, 10, 'Get doc_id');
+is($p2->skip_doc(10), NOMOREDOCS, 'Skipped to 10');
+ok(!$p2->doc_id, 'Get doc_id');
+is($p2->skip_doc(11), NOMOREDOCS, 'Skipped to 9');
+
+ok(!$p2->doc_id, 'Get doc_id');
+is($p2->skip_doc(12), NOMOREDOCS, 'Skipped to 9');
+ok(!$p2->doc_id, 'Get doc_id');
# Test with real index
diff --git a/t/query/nowhere.t b/t/query/nowhere.t
index 0a69f8a..2ec28dc 100644
--- a/t/query/nowhere.t
+++ b/t/query/nowhere.t
@@ -2,6 +2,7 @@
use strict;
use warnings;
use Data::Dumper;
+use Krawfish::Util::Constants qw/NOMOREDOCS/;
use File::Basename 'dirname';
use File::Spec::Functions 'catfile';
@@ -9,7 +10,7 @@
ok(my $q = Krawfish::Query::Nowhere->new, 'New nowhere');
ok(!$q->next, 'No nowhere');
-ok(!$q->skip_doc, 'No nowhere');
+is($q->skip_doc, NOMOREDOCS, 'No nowhere');
is($q->to_string, '[0]', 'Nada');
ok(!$q->current, 'No nowhere');