Enforce prefix on all dictionary entries
Change-Id: I0060de59503b2e4d3be95faa209c5e87a02dccb7
diff --git a/lib/Krawfish/Koral/Document.pm b/lib/Krawfish/Koral/Document.pm
index b3745e2..8fc886a 100644
--- a/lib/Krawfish/Koral/Document.pm
+++ b/lib/Krawfish/Koral/Document.pm
@@ -316,6 +316,9 @@
$term->value($item->{value});
};
+ # Make token default term type
+ $term->term_type('token');
+
return $term;
#my $key = '';
diff --git a/lib/Krawfish/Koral/Query/Builder.pm b/lib/Krawfish/Koral/Query/Builder.pm
index 107fdb5..722e986 100644
--- a/lib/Krawfish/Koral/Query/Builder.pm
+++ b/lib/Krawfish/Koral/Query/Builder.pm
@@ -1,4 +1,9 @@
package Krawfish::Koral::Query::Builder;
+use strict;
+use warnings;
+
+use Krawfish::Util::Constants qw/:PREFIX/;
+
use Krawfish::Koral::Query::Term;
use Krawfish::Koral::Query::Token;
use Krawfish::Koral::Query::Span;
@@ -62,7 +67,8 @@
sub term {
shift;
- Krawfish::Koral::Query::Term->new(@_);
+ my $term = shift;
+ return Krawfish::Koral::Query::Term->new(TOKEN_PREF . $term);
};
sub term_neg {
diff --git a/lib/Krawfish/Koral/Query/Term.pm b/lib/Krawfish/Koral/Query/Term.pm
index 68a1cb5..cd65a4a 100644
--- a/lib/Krawfish/Koral/Query/Term.pm
+++ b/lib/Krawfish/Koral/Query/Term.pm
@@ -18,6 +18,9 @@
# a utility class Krawfish::Util::Koral::Term or so
# TODO:
+# Field is probably useless
+
+# TODO:
# Rename to_term to to_neutral!
use constant DEBUG => 1;
@@ -113,7 +116,7 @@
$_[0]->{prefix} = $_[1];
return $_[0];
};
- $_[0]->{prefix};
+ $_[0]->{prefix} // TOKEN_PREF;
};
@@ -131,11 +134,17 @@
# Todo: This doesn't respect
# direction
$self->prefix(REL_L_PREF);
+ }
+ elsif ($_[0] eq 'token') {
+
+ # Todo: This doesn't respect
+ # direction
+ $self->prefix(TOKEN_PREF);
};
return $self;
}
else {
- return 'token' unless $self->prefix; # if $self->prefix eq TOKEN_PREF;
+ return 'token' if $self->prefix eq TOKEN_PREF;
return 'span' if $self->prefix eq SPAN_PREF;
return 'attribute' if $self->prefix eq ATTR_PREF;
return 'relation';
@@ -308,7 +317,7 @@
if ($str) {
$str .= ':';
};
- $str .= $self->prefix if $self->prefix;
+ $str .= $self->prefix;
my $term = $self->to_string;
if ($self->operator ne '=') {
$term =~ s/!?[=~]/=/;
@@ -325,7 +334,8 @@
sub to_term_escaped {
my $self = shift;
my $term = $self->to_term;
- if ($term =~ m!^((?:[^:]+?\:)?(?:[^/]+?\/)?(?:[^=]+?)\=)(.+?)$!) {
+ # (?:[^:]+?\:)?
+ if ($term =~ m!^(.(?:[^/]+?\/)?(?:[^=]+?)\=)(.+?)$!) {
return quotemeta($1). $2;
};
return $term;
diff --git a/lib/Krawfish/Koral/Query/Token.pm b/lib/Krawfish/Koral/Query/Token.pm
index b054117..1b6af61 100644
--- a/lib/Krawfish/Koral/Query/Token.pm
+++ b/lib/Krawfish/Koral/Query/Token.pm
@@ -1,6 +1,7 @@
package Krawfish::Koral::Query::Token;
use parent 'Krawfish::Koral::Query';
# use Krawfish::Koral::Query::Token;
+use Krawfish::Util::Constants qw/:PREFIX/;
use Krawfish::Koral::Query::Term;
# use Krawfish::Query::Term;
use Krawfish::Log;
@@ -28,7 +29,7 @@
# Token is a string
unless (blessed $token) {
return bless {
- operands => [Krawfish::Koral::Query::Term->new($token)]
+ operands => [Krawfish::Koral::Query::Term->new(TOKEN_PREF . $token)]
}, $class;
};
diff --git a/lib/Krawfish/Util/Constants.pm b/lib/Krawfish/Util/Constants.pm
index 05655e1..38a8476 100644
--- a/lib/Krawfish/Util/Constants.pm
+++ b/lib/Krawfish/Util/Constants.pm
@@ -4,19 +4,19 @@
use Exporter 'import';
use constant {
- KEY_PREF => '!', # ! Field keys
- FIELD_PREF => '+', # + Field values
- FOUNDRY_PREF => '^', # ^
- LAYER_PREF => '&', # &
- SUBTERM_PREF => '.', # *
- TOKEN_PREF => '', # TODO!
- SPAN_PREF => '-', # <> Spans
+ KEY_PREF => '!', # Field keys
+ FIELD_PREF => '+', # Field values
+ FOUNDRY_PREF => '^', # Foundry
+ LAYER_PREF => '&', # Layer
+ SUBTERM_PREF => '.', # * before
+ TOKEN_PREF => ':', # Empty before
+ SPAN_PREF => '-', # <> Spans before
ATTR_PREF => '@',
REL_L_PREF => '>',
REL_R_PREF => '<'
};
-our $ANNO_PREFIX_RE = qr/(?:\-|\@|\>|\<)/;
+our $ANNO_PREFIX_RE = qr/(?:\:|\-|\@|\>|\<)/;
our @EXPORT_OK = (qw/KEY_PREF
FIELD_PREF
diff --git a/t/corpus/and.t b/t/corpus/and.t
index 02df51a..3a690e2 100644
--- a/t/corpus/and.t
+++ b/t/corpus/and.t
@@ -13,6 +13,10 @@
genre => 'novel',
integer_age => 4
} => [qw/aa bb/], 'Add complex document');
+
+
+done_testing;
+__END__
ok_index($index, {
integer_id => 3,
author => 'Peter',
@@ -42,6 +46,7 @@
is($query->to_string, 'age=4&author=Peter', 'Stringification');
ok(!$query->is_negative, 'Check negativity');
+
ok(my $plan = $query->normalize->identify($index->dict)->optimize($index->segment), 'Planning');
is($plan->to_string, "and(#2,#6)", 'Stringification');
diff --git a/t/index/forward_stream.t b/t/index/forward_stream.t
index ec12e8e..175131b 100644
--- a/t/index/forward_stream.t
+++ b/t/index/forward_stream.t
@@ -30,7 +30,6 @@
my $qb = $koral->query_builder;
my $query;
-
# Check data by query retrieval
# Search for <akron/c=NP>
$koral->query($qb->span('akron/c=NP'));
@@ -39,6 +38,7 @@
is($query->to_string, 'filter(#11,[1])', 'Stringification');
matches($query, [qw/[0:0-3] [0:4-8]/], 'Search');
+
# Search for akron=Bau-Leiter
$koral->query($qb->token('akron=Bau-Leiter'));
ok($query = $koral->to_query->identify($index->dict)->optimize($index->segment),
@@ -98,7 +98,8 @@
ok(my @anno = $fwd->current->annotations, 'Get annotations');
is($anno[0]->[0], 14, 'Annotation');
-is($index->dict->term_by_term_id($anno[0]->[0]), 'akron=Bau-Leiter', 'Annotation');
+is($index->dict->term_by_term_id($anno[0]->[0]),
+ TOKEN_PREF . 'akron=Bau-Leiter', 'Annotation');
ok($fwd = $index->segment->forward->pointer, 'Get pointer');
ok(defined $fwd->skip_doc(0), 'Skip to first document');
@@ -114,7 +115,7 @@
my $foundry_id = $dict->term_id_by_term(FOUNDRY_PREF . 'opennlp');
my $layer_id = $dict->term_id_by_term(LAYER_PREF . 'p');
-my $anno_id = $dict->term_id_by_term('opennlp/p=V');
+my $anno_id = $dict->term_id_by_term(TOKEN_PREF . 'opennlp/p=V');
is_deeply($fwd->current->annotation(
$foundry_id,
diff --git a/t/index/segment.t b/t/index/segment.t
index c3438a1..507d058 100644
--- a/t/index/segment.t
+++ b/t/index/segment.t
@@ -1,5 +1,6 @@
use Test::More;
use Test::Krawfish;
+use Krawfish::Util::Constants qw/:PREFIX/;
use strict;
use warnings;
@@ -11,13 +12,13 @@
sub _postings {
my $term = shift;
- my $term_id = $index->dict->term_id_by_term($term) or return;
+ my $term_id = $index->dict->term_id_by_term(TOKEN_PREF . $term) or return;
return $index->segment->postings($term_id)->pointer;
};
# Get terms from the term dictionary
my $term_id;
-ok($term_id = $index->dict->term_id_by_term('Der'), 'Get term id');
+ok($term_id = $index->dict->term_id_by_term(TOKEN_PREF . 'Der'), 'Get term id');
is($term_id, 10, 'Term id valid');
ok(!$index->dict->term_id_by_term('Haus'), 'Get term id');
diff --git a/t/koral/doc.t b/t/koral/doc.t
index b13fb9b..a43c14a 100644
--- a/t/koral/doc.t
+++ b/t/koral/doc.t
@@ -24,7 +24,7 @@
# );
is(substr($doc->stream->to_string, 0, 40),
- q!(0)<>['Der';'Der'$1](1)< >['alte';'alte'!,
+ q!(0)<>['Der';'! . TOKEN_PREF . q!Der'$1](1)< >['alte';'! . TOKEN_PREF . q!alt!,
'Get stream');
is($doc->fields->to_string,
@@ -43,7 +43,7 @@
# );
is(substr($doc->stream->to_string, 0, 100),
- q!(0)<>['Der';'akron=Der'$1;'! .SPAN_PREF. q!akron/c=NP'$3](1)< >['Bau';'akron=Bau-Leiter'$3](2)<->['Leiter'](3)< >['!,
+ q!(0)<>['Der';'! . TOKEN_PREF . q!akron=Der'$1;'! .SPAN_PREF. q!akron/c=NP'$3](1)< >['Bau';'! . TOKEN_PREF . q!akron=Bau-Leiter'$3](2)<->['Leiter'](3)< >!,
'Get stream');
diff --git a/t/koral/inflate.t b/t/koral/inflate.t
index 1f8c2f2..b7a30cc 100644
--- a/t/koral/inflate.t
+++ b/t/koral/inflate.t
@@ -13,6 +13,7 @@
ok(my $qb = Krawfish::Koral::Query::Builder->new, 'Create Builder');
ok(my $q = $qb->term_re('[ac].'), 'Regex');
ok($q = $q->normalize->finalize, 'Prepare query');
+
is($q->to_string, "/[ac]./", 'Stringification');
ok($q = $q->identify($index->dict), 'Prepare query');
is($q->to_string, '(#10)|(#12)|(#2)|(#8)', 'Stringification');
@@ -20,7 +21,6 @@
ok($q = $q->optimize($index->segment), 'Prepare query');
is($q->to_string, "or(or(or(#10,#12),#8),#2)", 'Stringification');
-
# Class
ok($q = $qb->class(
$qb->term_re('[ac].'),
@@ -50,9 +50,15 @@
$qb->term('aa'),
$qb->term('bb')
), 'Regex in constraint');
-ok($q = $q->normalize->finalize->identify($index->dict), 'Prepare query');
+
+ok($q = $q->normalize->finalize, 'Prepare query');
+is($q->to_string, "constr(pos=precedes,between=1-1,notBetween=/[ac]./:aa,bb)",
+ 'Stringification');
+
+ok($q = $q->identify($index->dict), 'Prepare query');
is($q->to_string, "constr(pos=precedes,between=1-1,notBetween=(#10)|(#12)|(#2)|(#8):#2,#4)",
'Stringification');
+
ok($q = $q->optimize($index->segment), 'Prepare query');
is($q->to_string, "constr(pos=1,between=1-1,notBetween=or(or(or(#10,#12),#8),#2):#2,#4)",
'Stringification');
diff --git a/t/plan/term_group.t b/t/plan/term_group.t
index 394abce..79d5bdf 100644
--- a/t/plan/term_group.t
+++ b/t/plan/term_group.t
@@ -98,8 +98,8 @@
"or(constr(pos=32:#4,#2),constr(pos=32:#8,#6))",
'Stringification');
-is($index->dict->term_by_term_id(6), 'third', 'Check mapping');
-is($index->dict->term_by_term_id(8), 'fourth', 'Check mapping');
+is($index->dict->term_by_term_id(6), ':third', 'Check mapping');
+is($index->dict->term_by_term_id(8), ':fourth', 'Check mapping');
$query = $qb->token(
$qb->bool_or(
diff --git a/t/term.t b/t/term.t
index 47e4176..630c6d8 100644
--- a/t/term.t
+++ b/t/term.t
@@ -8,7 +8,7 @@
my $term = Krawfish::Koral::Query::Term->new('baum');
ok(!$term->field, 'No field');
-ok(!$term->prefix, 'No prefix');
+is($term->prefix, TOKEN_PREF, 'Token prefix');
ok(!$term->foundry, 'No foundry');
ok(!$term->layer, 'No layer');
is($term->key, 'baum', 'Key');
@@ -24,7 +24,7 @@
$term = Krawfish::Koral::Query::Term->new('opennlp=baum');
ok(!$term->field, 'No field');
-ok(!$term->prefix, 'No prefix');
+is($term->prefix, TOKEN_PREF, 'Token prefix');
is($term->foundry, 'opennlp', 'Foundry');
ok(!$term->layer, 'No layer');
is($term->key, 'baum', 'Key');
@@ -32,7 +32,7 @@
$term = Krawfish::Koral::Query::Term->new('opennlp/c=baum');
ok(!$term->field, 'No field');
-ok(!$term->prefix, 'No prefix');
+is($term->prefix, TOKEN_PREF, 'Token prefix');
is($term->foundry, 'opennlp', 'Foundry');
is($term->layer, 'c', 'Layer');
is($term->key, 'baum', 'Key');
@@ -40,7 +40,7 @@
$term = Krawfish::Koral::Query::Term->new('opennlp/p=gender:m');
ok(!$term->field, 'No field');
-ok(!$term->prefix, 'No prefix');
+is($term->prefix, TOKEN_PREF, 'Token prefix');
is($term->foundry, 'opennlp', 'Foundry');
is($term->layer, 'p', 'Layer');
is($term->key, 'gender', 'Key');
@@ -48,7 +48,7 @@
$term = Krawfish::Koral::Query::Term->new('opennlp/p != gender:m');
ok(!$term->field, 'No field');
-ok(!$term->prefix, 'No prefix');
+is($term->prefix, TOKEN_PREF, 'Token prefix');
is($term->foundry, 'opennlp', 'Foundry');
is($term->match, '!=', 'Layer');
is($term->layer, 'p', 'Layer');