Introduced squote as an util method
diff --git a/lib/Krawfish/Controller/Corpus.pm b/lib/Krawfish/Controller/Corpus.pm
index 93201e1..caecd2c 100644
--- a/lib/Krawfish/Controller/Corpus.pm
+++ b/lib/Krawfish/Controller/Corpus.pm
@@ -48,4 +48,48 @@
$c->render(json => $koral->to_result($index));
};
+
+# Get information per text
+sub text {
+ my $self = shift;
+
+ my $koral = Krawfish::Koral->new;
+ my $meta = $koral->meta_builder;
+
+ my $v = $c->validation;
+ $v->optional('fields');
+
+
+ # Get the text sigle from the stash
+ my $corpus_id = $c->stash('corpus_id');
+ my $doc_id = $c->stash('doc_id');
+ my $text_id = $c->stash('text_id');
+
+ my $sigle = join('/', $corpus_id, $doc_id, $text_id);
+
+ # Set corpus
+ $koral->corpus(
+ $koral->corpus_builder->string('text_sigle' => $text_sigle)
+ );
+
+ # Get the field information
+ my $fields = b($v->param('fields'))->split(',')->uniq->to_array;
+ if ($fields->[0]) {
+ $meta->fields($fields);
+ };
+
+ # Limit to a single match
+ $meta->limit(1);
+
+ # Set meta
+ $koral->meta($meta);
+
+ # Get segment index
+ my $index = $c->index->segment;
+
+ # Prepare query on index
+ $c->render(json => $koral->to_result($index));
+};
+
+
1;
diff --git a/lib/Krawfish/Corpus/Class.pm b/lib/Krawfish/Corpus/Class.pm
index 1a270e7..d6948e6 100644
--- a/lib/Krawfish/Corpus/Class.pm
+++ b/lib/Krawfish/Corpus/Class.pm
@@ -15,6 +15,12 @@
# the classes position, meaning
# only 8 classes are supported.
+# Class queries may also be useful to
+# have insights into the distribution
+# of a virtual corpus, e.g.
+# {1:lang=de}|{2:lang!=de}
+# getting the stats.
+
# TODO:
# Alternatively there could be a Compare() query
diff --git a/lib/Krawfish/Koral/Corpus/Field.pm b/lib/Krawfish/Koral/Corpus/Field.pm
index 3401fbf..0d26046 100644
--- a/lib/Krawfish/Koral/Corpus/Field.pm
+++ b/lib/Krawfish/Koral/Corpus/Field.pm
@@ -5,7 +5,10 @@
use strict;
use warnings;
-# TODO: Check for valid parameters
+# TODO:
+# - Check for valid parameters
+# - Only support positive terms
+# - Wrap in negative field!
sub new {
my $class = shift;
@@ -21,6 +24,7 @@
sub is_leaf { 1 };
+# Equal
sub eq {
my $self = shift;
$self->{match} = 'eq';
@@ -28,6 +32,8 @@
return $self;
};
+
+# Not equal
sub ne {
my $self = shift;
$self->{match} = 'eq';
@@ -36,6 +42,8 @@
return $self;
};
+
+# Check for negativity
sub is_negative {
my $self = shift;
if (scalar @_ == 1) {
@@ -81,6 +89,8 @@
return $self;
};
+
+# Contains the value in multi-token field
sub contains {
my $self = shift;
$self->{match} = 'contains';
@@ -88,6 +98,8 @@
return $self;
};
+
+# Does not contain the value in multi-token field
sub excludes {
my $self = shift;
$self->{match} = 'excludes';
@@ -95,6 +107,9 @@
return $self;
};
+sub can_toggle_negativity {
+};
+
sub plan_for {
my ($self, $index) = @_;
diff --git a/lib/Krawfish/Koral/Corpus/Negation.pm b/lib/Krawfish/Koral/Corpus/Negation.pm
new file mode 100644
index 0000000..9421b0c
--- /dev/null
+++ b/lib/Krawfish/Koral/Corpus/Negation.pm
@@ -0,0 +1,35 @@
+package Krawfish::Koral::Corpus::Negation;
+use parent 'Krawfish::Koral::Corpus';
+use strict;
+use warnings;
+use constant DEBUG => 0;
+
+sub new {
+ my $class = shift;
+ bless {
+ operand => shift
+ }, $class;
+};
+
+sub type {
+ 'neg';
+};
+
+sub operand {
+ $_[0]->{operand};
+};
+
+sub is_negative {
+ 1;
+};
+
+sub has_classes {
+ $_[0]->{operand}->has_classes;
+};
+
+sub to_koral_fragment {
+ my $self = shift;
+};
+
+
+1;
diff --git a/lib/Krawfish/Koral/Meta/Builder.pm b/lib/Krawfish/Koral/Meta/Builder.pm
index 785517c..a7098b9 100644
--- a/lib/Krawfish/Koral/Meta/Builder.pm
+++ b/lib/Krawfish/Koral/Meta/Builder.pm
@@ -34,4 +34,17 @@
$self;
};
+sub limit {
+ my $self = shift;
+ if (@_ == 2) {
+ $self->start_index(shift());
+ $self->items_per_page(shift());
+ }
+ else {
+ $self->start_index(0);
+ $self->items_per_page(shift());
+ };
+ $self;
+};
+
1;
diff --git a/lib/Krawfish/Koral/Query/Builder.pm b/lib/Krawfish/Koral/Query/Builder.pm
index c4471b0..4ae34bc 100644
--- a/lib/Krawfish/Koral/Query/Builder.pm
+++ b/lib/Krawfish/Koral/Query/Builder.pm
@@ -17,6 +17,7 @@
# TODO: Not all constraints need to be wrapped
use Krawfish::Koral::Query::Constraint::Position;
use Krawfish::Koral::Query::Constraint::ClassDistance;
+use Krawfish::Koral::Query::Constraint::NotBetween;
sub new {
my $class = shift;
@@ -118,6 +119,11 @@
Krawfish::Koral::Query::Constraint::ClassDistance->new(@_);
};
+sub c_not_between {
+ shift;
+ Krawfish::Koral::Query::Constraint::NotBetween->new(@_);
+};
+
sub length {
shift;
Krawfish::Koral::Query::Length->new(@_);
diff --git a/lib/Krawfish/Koral/Query/Constraint/NotBetween.pm b/lib/Krawfish/Koral/Query/Constraint/NotBetween.pm
new file mode 100644
index 0000000..72c5eda
--- /dev/null
+++ b/lib/Krawfish/Koral/Query/Constraint/NotBetween.pm
@@ -0,0 +1,25 @@
+package Krawfish::Koral::Query::Constraint::NotBetween;
+use Krawfish::Query::Constraint::NotBetween;
+use Krawfish::Util::String qw/squote/;
+use strict;
+use warnings;
+
+sub new {
+ my $class = shift;
+ bless {
+ query => shift
+ }, $class;
+};
+
+sub to_string {
+ my $self = shift;
+ return 'notBetween=' . squote($self->{query}->to_string);
+};
+
+sub plan_for {
+ my ($self, $index) = @_;
+ my $query = $self->{query}->plan_for($index);
+ Krawfish::Query::Constraint::NotBetween->new($query);
+};
+
+1;
diff --git a/lib/Krawfish/Koral/Query/Sequence.pm b/lib/Krawfish/Koral/Query/Sequence.pm
index b2d2682..053e2c7 100644
--- a/lib/Krawfish/Koral/Query/Sequence.pm
+++ b/lib/Krawfish/Koral/Query/Sequence.pm
@@ -7,6 +7,8 @@
# TODO: Optimize if there is an identical subquery
# in a direct sequence - make this a repetition!!!
+# Todo: Check for queries like "Der {[pos!=ADJ]*} Mann"
+
use constant DEBUG => 0;
sub new {
diff --git a/lib/Krawfish/Koral/Util/BooleanTree.pm b/lib/Krawfish/Koral/Util/BooleanTree.pm
index d034e68..9063e89 100644
--- a/lib/Krawfish/Koral/Util/BooleanTree.pm
+++ b/lib/Krawfish/Koral/Util/BooleanTree.pm
@@ -8,10 +8,14 @@
use constant DEBUG => 1;
+# TODO:
+# To simplify this, it may be useful to use Negation instead of is_negative().
+# This means, fields with "ne" won't be "ne"-fields, but become not(term).
+# It's also easier to detect double negation.
+
# To disjunctive normal form / DNF
sub _normalize {
-
};
# TODO:
@@ -113,8 +117,11 @@
# - function: TF_Idempotent -> DONE
+# TODO:
+# This should return a cloned query instead of in-place creation
sub planned_tree {
my $self = shift;
+
foreach my $op (@{$self->operands}) {
if ($op && $op->type eq $self->type) {
$op->planned_tree
diff --git a/lib/Krawfish/Posting/Match.pm b/lib/Krawfish/Posting/Match.pm
index 7d697f9..06f35f4 100644
--- a/lib/Krawfish/Posting/Match.pm
+++ b/lib/Krawfish/Posting/Match.pm
@@ -1,5 +1,6 @@
package Krawfish::Posting::Match;
use parent 'Krawfish::Posting';
+use Krawfish::Util::String qw/squote/;
use JSON::XS;
use warnings;
use strict;
@@ -35,19 +36,12 @@
if ($self->{fields}) {
$str .= '|';
$str .= join ';', map {
- $_ . '=' . _squote($self->{fields}->{$_})
+ $_ . '=' . squote($self->{fields}->{$_})
} sort keys %{$self->{fields}};
};
return $str . ']';
};
-# From Mojo::Util
-sub _squote {
- my $str = shift;
- $str =~ s/(['\\])/\\$1/g;
- return qq{'$str'};
-};
-
1;
diff --git a/lib/Krawfish/Query.pm b/lib/Krawfish/Query.pm
index 3759b4e..6a2db97 100644
--- a/lib/Krawfish/Query.pm
+++ b/lib/Krawfish/Query.pm
@@ -35,10 +35,6 @@
return 1;
};
-
-# Forward to next start position
-sub next_greater_start;
-
sub freq_in_doc {
warn 'freq_in_doc only supported for term queries (see PostingPointer)';
};
@@ -58,6 +54,26 @@
};
+# Skip to (or beyond) a certain position
+# Returns true, if the new current is positioned
+# in the same document beyond the given pos.
+# Otherwise returns false.
+sub skip_pos {
+ my ($self, $pos) = @_;
+ my $current = $self->current or return;
+ my $doc_id = $current->doc_id;
+
+ while ($current->doc_id == $doc_id) {
+ if ($current->start < $pos) {
+ $self->next;
+ next;
+ };
+ return 1;
+ };
+ return;
+};
+
+
# Move both spans to the same document
sub same_doc {
my ($self, $second) = @_;
diff --git a/lib/Krawfish/Query/Constraint/NotBetween.pm b/lib/Krawfish/Query/Constraint/NotBetween.pm
index a08b0a4..fa7f689 100644
--- a/lib/Krawfish/Query/Constraint/NotBetween.pm
+++ b/lib/Krawfish/Query/Constraint/NotBetween.pm
@@ -1,8 +1,13 @@
package Krawfish::Query::Constraint::NotBetween;
-use Krawfish::Query::Constraint::Position; # Export constants
use strict;
use warnings;
+use constant {
+ NEXTA => 1,
+ NEXTB => 2,
+ MATCH => 4
+};
+
# Check, if a negative token is in between.
# Like [orth=Der][orth!=alte][orth=Mann].
@@ -10,20 +15,24 @@
# Ensure, when this constraint is used,
# that the constraint precedes(first,second) is true.
-use constant ALL_MATCH => NEXTA | NEXTB | MATCH;
+
+
+use constant ALL_MATCH => (NEXTA | NEXTB | MATCH);
sub new {
my $class = shift;
bless {
- query => shift
+ query => shift,
+ buffer => Krawfish::Util::Buffer->new
}, $class;
};
-sub _init {
+sub init {
my $self = shift;
- return if $self->{_init}++;
+ return if $self->{init}++;
$self->{query}->next;
+# $self->{buffer}->remember($self->{query}->current);
};
@@ -31,7 +40,10 @@
my $self = shift;
my ($payload, $first, $second) = @_;
- $self->_init;
+ $self->init;
+
+ # TODO:
+ # Use buffer API here
my $query = $self->{query};
@@ -52,7 +64,7 @@
};
# [NEG]..[FIRST] | [NEG][FIRST] | [FIRST[NEG]..]
- if ($negativ->start < $first->end) {
+ if ($negative->start < $first->end) {
# Move negative query to at least the end of the next position
$query->next_pos($first->end);
diff --git a/lib/Krawfish/Result/Segment/Aggregate/Facets.pm b/lib/Krawfish/Result/Segment/Aggregate/Facets.pm
index 106ddfd..af58c74 100644
--- a/lib/Krawfish/Result/Segment/Aggregate/Facets.pm
+++ b/lib/Krawfish/Result/Segment/Aggregate/Facets.pm
@@ -1,5 +1,6 @@
package Krawfish::Result::Segment::Aggregate::Facets;
use parent 'Krawfish::Result::Segment::Aggregate::Base';
+use Krawfish::Util::String qw/squote/;
use Krawfish::Log;
use strict;
use warnings;
@@ -119,15 +120,7 @@
};
sub to_string {
- return 'facet:' . _squote($_[0]->{field});
-};
-
-
-# From Mojo::Util
-sub _squote {
- my $str = shift;
- $str =~ s/(['\\])/\\$1/g;
- return qq{'$str'};
+ return 'facet:' . squote($_[0]->{field});
};
diff --git a/lib/Krawfish/Result/Snippet.pm b/lib/Krawfish/Result/Snippet.pm
index ed0d3d2..a87ffb5 100644
--- a/lib/Krawfish/Result/Snippet.pm
+++ b/lib/Krawfish/Result/Snippet.pm
@@ -79,12 +79,4 @@
};
-# From Mojo::Util
-sub _squote {
- my $str = shift;
- $str =~ s/(['\\])/\\$1/g;
- return qq{'$str'};
-};
-
-
1;
diff --git a/lib/Krawfish/Result/Sort.pm b/lib/Krawfish/Result/Sort.pm
index 8aeee7e..0d7b6ec 100644
--- a/lib/Krawfish/Result/Sort.pm
+++ b/lib/Krawfish/Result/Sort.pm
@@ -1,6 +1,7 @@
package Krawfish::Result::Sort;
# use Krawfish::Result::Sort::InitRank;
# use Krawfish::Result::Sort::Rank;
+use Krawfish::Util::String qw/squote/;
use Krawfish::Log;
use strict;
use warnings;
@@ -198,20 +199,12 @@
sub to_string {
my $self = shift;
my $str = 'resultSorted(';
- $str .= '[' . join(',', map { _squote($_) } @{$self->{fields}}) . ']:';
+ $str .= '[' . join(',', map { squote($_) } @{$self->{fields}}) . ']:';
$str .= $self->{query}->to_string;
return $str . ')';
};
-# From Mojo::Util
-sub _squote {
- my $str = shift;
- $str =~ s/(['\\])/\\$1/g;
- return qq{'$str'};
-};
-
-
1;
diff --git a/lib/Krawfish/Result/Sort/PriorityCascade.pm b/lib/Krawfish/Result/Sort/PriorityCascade.pm
index 04cc68e..e0d17cd 100644
--- a/lib/Krawfish/Result/Sort/PriorityCascade.pm
+++ b/lib/Krawfish/Result/Sort/PriorityCascade.pm
@@ -1,5 +1,6 @@
package Krawfish::Result::Sort::PriorityCascade;
use parent 'Krawfish::Result';
+use Krawfish::Util::String qw/squote/;
use Krawfish::Util::PriorityQueue::PerDoc;
use Krawfish::Posting::Bundle;
use Krawfish::Log;
@@ -451,21 +452,13 @@
sub to_string {
my $self = shift;
my $str = 'resultSorted([';
- $str .= join(',', map { _squote($_->[0]) . ($_->[1] ? '>' : '<') } @{$self->{fields}});
+ $str .= join(',', map { squote($_->[0]) . ($_->[1] ? '>' : '<') } @{$self->{fields}});
$str .= ']';
$str .= ',0-' . $self->{top_k} if $self->{top_k};
$str .= ':' . $self->{query}->to_string;
return $str . ')';
};
-# From Mojo::Util
-sub _squote {
- my $str = shift;
- $str =~ s/(['\\])/\\$1/g;
- return qq{'$str'};
-};
-
-
1;
diff --git a/lib/Krawfish/Util/String.pm b/lib/Krawfish/Util/String.pm
index a6756cd..cdf0303 100644
--- a/lib/Krawfish/Util/String.pm
+++ b/lib/Krawfish/Util/String.pm
@@ -7,7 +7,7 @@
use parent 'Exporter';
use utf8;
-our @EXPORT = qw/fold_case remove_diacritics normalize_nfkc/;
+our @EXPORT = qw/fold_case remove_diacritics normalize_nfkc squote/;
# Helper package for unicode handling
@@ -45,4 +45,11 @@
return normalize('KC',$_[0]);
};
+# From Mojo::Util
+sub squote {
+ my $str = shift;
+ $str =~ s/(['\\])/\\$1/g;
+ return qq{'$str'};
+};
+
1;