Fixed a sequence serialization issue with complex non-anchor operands
diff --git a/lib/Krawfish/Info.pm b/lib/Krawfish/Info.pm
index 423f23d..508f395 100644
--- a/lib/Krawfish/Info.pm
+++ b/lib/Krawfish/Info.pm
@@ -1,18 +1,27 @@
package Krawfish::Info;
+use Krawfish::Log;
use strict;
use warnings;
+use constant DEBUG => 1;
+
# Add error
sub error {
- return shift->_info('error', @_);
+ my $self = shift;
+ print_log('info', 'Error: ' . join(' ', @_)) if DEBUG;
+ return $self->_info('error', @_);
};
sub warning {
- return shift->_info('warning', @_);
+ my $self = shift;
+ print_log('info', 'Warning: ' . join(' ', @_)) if DEBUG;
+ return $self->_info('warning', @_);
};
sub message {
- return shift->_info('message', @_);
+ my $self = shift;
+ print_log('info', 'Message: ' . join(' ', @_)) if DEBUG;
+ return $self->_info('message', @_);
};
# Is there an error?
diff --git a/lib/Krawfish/Koral/Query.pm b/lib/Krawfish/Koral/Query.pm
index 9ea48d2..6bb0bd3 100644
--- a/lib/Krawfish/Koral/Query.pm
+++ b/lib/Krawfish/Koral/Query.pm
@@ -3,6 +3,7 @@
# TODO: Use the same parent as Koral::Corpus
use Krawfish::Koral::Query::Builder;
use Krawfish::Koral::Query::Importer;
+use Krawfish::Log;
use Mojo::Util qw/md5_sum/;
use warnings;
use strict;
@@ -18,7 +19,8 @@
# TODO:
# This is now double with Krawfish::Koral!
use constant {
- CONTEXT => 'http://korap.ids-mannheim.de/ns/koral/0.6/context.jsonld'
+ CONTEXT => 'http://korap.ids-mannheim.de/ns/koral/0.6/context.jsonld',
+ DEBUG => 0
};
sub new {
@@ -103,6 +105,10 @@
sub finalize {
my $self = shift;
+ if (DEBUG) {
+ print_log('kq_query', 'Finalize query ' . $self->to_string);
+ };
+
my $query = $self;
# The query matches everywhere
@@ -111,6 +117,7 @@
return;
};
+
# The query matches nowhere
if ($query->is_nothing) {
return $query->builder->nothing;
diff --git a/lib/Krawfish/Koral/Query/Constraint/NotBetween.pm b/lib/Krawfish/Koral/Query/Constraint/NotBetween.pm
index dde9b5e..5f340a9 100644
--- a/lib/Krawfish/Koral/Query/Constraint/NotBetween.pm
+++ b/lib/Krawfish/Koral/Query/Constraint/NotBetween.pm
@@ -9,9 +9,7 @@
# Check that a query between two operands is does nmot occur.
# In case this operand never occurs, it will at least set a relevant length.
-# TODO:
-# Check min_tokens and max_tokens
-
+# Constructor
sub new {
my $class = shift;
bless {
diff --git a/lib/Krawfish/Koral/Query/Sequence.pm b/lib/Krawfish/Koral/Query/Sequence.pm
index c0c1fe6..3a2b6da 100644
--- a/lib/Krawfish/Koral/Query/Sequence.pm
+++ b/lib/Krawfish/Koral/Query/Sequence.pm
@@ -54,13 +54,20 @@
return if $self->{_checked};
+ if (DEBUG) {
+ print_log('kq_seq', 'Check ' . $self->to_string . ' with ' . (@{$self->operands}) . ' operands');
+ };
+
# Check all operands
foreach (@{$self->operands}) {
+ if (DEBUG) {
+ print_log('kq_seq', 'Check operand ' . $_->to_string);
+ };
+
# If one operand is set - return null
unless ($_->is_null) {
$self->{null} = 0;
- $self->{any} = 0;
};
unless ($_->is_any) {
@@ -70,14 +77,20 @@
if ($_->maybe_unsorted) {
$self->{maybe_unsorted} = 1;
};
+
+ if (DEBUG) {
+ print_log('kq_seq', 'Operand ' . $_->to_string . ' is checked');
+ };
};
$self->{_checked} = 1;
};
+
sub is_any {
my $self = shift;
$self->_check;
+ print_log('kq_seq', 'Check for any: ' . $self->to_string . ' is ' . $self->{any}) if DEBUG;
return $self->{any};
};
diff --git a/lib/Krawfish/Koral/Util/Sequential.pm b/lib/Krawfish/Koral/Util/Sequential.pm
index 0a88528..21599f0 100644
--- a/lib/Krawfish/Koral/Util/Sequential.pm
+++ b/lib/Krawfish/Koral/Util/Sequential.pm
@@ -30,8 +30,12 @@
sub normalize {
my $self = shift;
+ print_log('kq_sequtil', 'Normalize query ' . $self->to_string) if DEBUG;
+
my $ops = $self->operands;
+ print_log('kq_sequtil', '1st pass - flatten and mark anchors') if DEBUG;
+
# First pass - mark anchors
my $problems = 0;
for (my $i = 0; $i < @$ops; $i++) {
@@ -39,14 +43,19 @@
# Operand in question
my $op = $ops->[$i];
+ print_log('kq_sequtil', 'Check operand in sequence ' . $op->to_string) if DEBUG;
+
# Sequences are no constraints!
if ($op->type eq 'sequence') {
+ print_log('kq_sequtil', 'Flatten embedded sequence ' . $op->to_string) if DEBUG;
+
# TODO:
# This currently ignores negative sequences
# Replace operand with operand list
splice @$ops, $i, 1, @{$op->operands};
+ redo;
};
# Operand can be ignored
@@ -62,15 +71,73 @@
};
# Normalize operands
- $ops->[$i] = $ops->[$i]->normalize;
+ my $new_op = $op->normalize;
+
+ # New op can't be normalized, for example it is a
+ # classed sequence of any-operators
+ if (!$new_op) {
+
+ if (DEBUG) {
+ print_log('kq_sequtil', 'Operand ' . $op->to_string . ' is not normalizable');
+ print_log('kq_sequtil', 'Strip potential classes');
+ };
+
+ # First unpack classes
+ my @classes = ();
+ while ($op->type eq 'class') {
+ push @classes, $op->number;
+ $op = $op->operand;
+ };
+
+ # Operand matches somehow anywhere
+ # This can be the case with something like {1:[]{2:[]}}
+ if ($op->is_any) {
+
+ if (DEBUG) {
+ print_log('kq_sequtil', 'Query matches anywhere ' . $op->to_string);
+ };
+
+ my $qb = $self->builder;
+
+ # Create any span
+ $new_op = $qb->repeat(
+ $qb->any,
+ $op->min_span,
+ $op->max_span
+ );
+
+ # Readd classes
+ foreach (@classes) {
+ $new_op = $qb->class($new_op, $_);
+ };
+
+ # A minor warning that we cheated
+ $self->warning(
+ 000,
+ 'Nested classes in empty token sequences are not yet supported',
+ $op->to_string
+ );
+ }
+
+ # I don't know when this could happen ...
+ else {
+ $self->error(000, 'Subsequence is not normalizable', $op->to_string);
+ return;
+ };
+
+ # Normalize newly build query
+ $new_op = $new_op->normalize;
+ };
+
+ $ops->[$i] = $new_op;
# Push to problem array
unless ($op->maybe_anchor) {
+ print_log('kq_sequtil', 'Operand is no anchor: ' . $op->to_string) if DEBUG;
$problems++;
};
};
-
# No operands left
unless (scalar @$ops) {
@@ -101,11 +168,10 @@
# Simplify repetitions
# $self = $self->_resolve_consecutive_repetitions;
- # There are no problems
- return $self unless $problems;
+ print_log('kq_sequtil', 'Sequence has ' . ($problems+0) . ' problems') if DEBUG;
# Remember problems
- $self->{_problems} = 1;
+ $self->{_problems} = 1 if $problems;
return $self;
};
@@ -494,6 +560,11 @@
sub _combine_any {
my ($queries, $index_a, $index_b, $index_between) = @_;
+ if (DEBUG) {
+ print_log('kq_sequtil', "Combine to ANY distance with positions " .
+ "$index_a:$index_between:$index_b");
+ };
+
my $new_query;
my $any = $queries->[$index_between]->[KQUERY];
my $constraint = {};
@@ -509,6 +580,8 @@
$constraint->{classes} //= [];
push @{$constraint->{classes}}, $any->number;
+ print_log('kq_sequtil', "Unpack classed query " . $any->to_string) if DEBUG;
+
# Return inner-query
$any = $any->operand;
};
@@ -570,6 +643,10 @@
my $query = $queries->[$index_between]->[KQUERY];
+ # TODO:
+ # Better use the builder and normalize - this will do all
+ # the optimizations on the fly (including min_span/max_span optimization)
+
# Negative element is optional
if ($query->is_optional) {
$constraint->{optional} = 1;