Fixed identification for contexts
Change-Id: I64611f7dde84d2cf24e78d4c1963ce563f333a77
diff --git a/lib/Krawfish/Index/Dictionary.pm b/lib/Krawfish/Index/Dictionary.pm
index 341ebe8..c6e7d4e 100644
--- a/lib/Krawfish/Index/Dictionary.pm
+++ b/lib/Krawfish/Index/Dictionary.pm
@@ -5,6 +5,9 @@
use warnings;
use Krawfish::Log;
+# TODO:
+# Create a central prefix constant class!
+
# This class is the basic dictionary class. It provides a
# homogeneous interface to K::I::Dictionary::Dynamic and
# K::I::Dictionary::Static (versioned).
diff --git a/lib/Krawfish/Koral/Meta/Enrich/Snippet.pm b/lib/Krawfish/Koral/Meta/Enrich/Snippet.pm
index f1a8cb6..9e10131 100644
--- a/lib/Krawfish/Koral/Meta/Enrich/Snippet.pm
+++ b/lib/Krawfish/Koral/Meta/Enrich/Snippet.pm
@@ -1,4 +1,5 @@
package Krawfish::Koral::Meta::Enrich::Snippet;
+use Krawfish::Koral::Meta::Enrich::Snippet::Match;
use Krawfish::Koral::Meta::Node::Enrich::Snippet;
use strict;
use warnings;
@@ -25,6 +26,8 @@
'snippet'
};
+
+# Get left context object
sub left_context {
my $self = shift;
if (ref $self->{context} eq 'ARRAY') {
@@ -34,6 +37,7 @@
};
+# Get right context object
sub right_context {
my $self = shift;
if (ref $self->{context} eq 'ARRAY') {
@@ -42,11 +46,28 @@
return $self->{context};
};
+
+# Get match object
+sub match {
+ my $self = shift;
+ if ($self->{match}) {
+ return $self->{match};
+ };
+
+ # Create empty match object
+ $self->{match} = Krawfish::Koral::Meta::Enrich::Snippet::Match->new;
+ return $self->{match};
+};
+
+
+# TODO:
+# Normalize contexts here!
sub normalize {
$_[0];
};
+# Stringification
sub to_string {
my $self = shift;
my $str = 'snippet=[';
@@ -57,7 +78,7 @@
if ($self->right_context) {
$str .= 'right:' . $self->right_context->to_string . ',';
};
- $str .= 'match';
+ $str .= $self->match->to_string;
return $str . ']';
};
@@ -68,9 +89,11 @@
my ($self, $query) = @_;
return Krawfish::Koral::Meta::Node::Enrich::Snippet->new(
query => $query,
- left => $self->left_context,
- right => $self->right_context
+ left => $self->left_context,
+ right => $self->right_context,
+ match => $self->match
);
};
+
1;
diff --git a/lib/Krawfish/Koral/Meta/Enrich/Snippet/Context/Separator.pm b/lib/Krawfish/Koral/Meta/Enrich/Snippet/Context/Separator.pm
new file mode 100644
index 0000000..49c1dd1
--- /dev/null
+++ b/lib/Krawfish/Koral/Meta/Enrich/Snippet/Context/Separator.pm
@@ -0,0 +1,7 @@
+# Some sources may have different requirements regarding the expansion size.
+# To make it easier for post-filtering, this context modifier
+# can add separators to the context, e.g. to restrict the visibility
+# of a match to 3 sentences, even if the user asked for a whole paragraph.
+#
+# This however may be part of the different context systems
+# (and the matching tool, of course)
diff --git a/lib/Krawfish/Koral/Meta/Enrich/Snippet/Context/Span.pm b/lib/Krawfish/Koral/Meta/Enrich/Snippet/Context/Span.pm
index 8ee3b93..439f94a 100644
--- a/lib/Krawfish/Koral/Meta/Enrich/Snippet/Context/Span.pm
+++ b/lib/Krawfish/Koral/Meta/Enrich/Snippet/Context/Span.pm
@@ -1,8 +1,11 @@
package Krawfish::Koral::Meta::Enrich::Snippet::Context::Span;
use Krawfish::Koral::Query::Term;
+use Krawfish::Log;
use strict;
use warnings;
+use constant DEBUG => 1;
+
sub new {
my ($class, $term_str, $count) = @_;
@@ -10,7 +13,12 @@
my $term = Krawfish::Koral::Query::Term->new($term_str);
if ($term->term_type ne 'span') {
- warn qq!Term "$term_str" is no span, but a ! . $term->term_type . '!';
+ if (DEBUG) {
+ print_log(
+ 'k_context_span',
+ qq!Term "$term_str" is no span, but a ! . $term->term_type . '!'
+ );
+ };
return;
};
@@ -24,7 +32,9 @@
'context_span'
};
-sub operations;
+sub operations {
+
+};
sub normalize {
$_[0];
@@ -35,14 +45,29 @@
my ($self, $dict) = @_;
my $term = $self->{term};
+
$self->{anno_id} = $dict->term_id_by_term($term->to_term);
+ if (DEBUG) {
+ print_log('k_context_span', 'Identify annotation for ' . $term->to_term);
+ };
+
# Term not found
return unless $self->{anno_id};
# Translate all other elements
- $self->{layer_id} = $dict->term_id_by_term('&' . $term->layer);
$self->{foundry_id} = $dict->term_id_by_term('^' . $term->foundry);
+
+ if (DEBUG) {
+ print_log('k_context_span', 'Identify layer for ^' . $term->foundry);
+ };
+
+ $self->{layer_id} = $dict->term_id_by_term('&' . $term->layer);
+
+ if (DEBUG) {
+ print_log('k_context_span', 'Identify layer for &' . $term->layer);
+ };
+
return $self;
};
@@ -56,7 +81,14 @@
sub to_string {
my $self = shift;
- return 'span(' . $self->term->to_string . ',' . $self->count . ')';
+ my $str = 'span(';
+ if ($self->{anno_id}) {
+ $str .= '#' . $self->{foundry_id} . '/#' . $self->{layer_id} . '=#' . $self->{anno_id}
+ }
+ else {
+ $str .= $self->term->to_string
+ };
+ return $str . ',' . $self->count . ')';
};
diff --git a/lib/Krawfish/Koral/Meta/Enrich/Snippet/Match.pm b/lib/Krawfish/Koral/Meta/Enrich/Snippet/Match.pm
new file mode 100644
index 0000000..8abc326
--- /dev/null
+++ b/lib/Krawfish/Koral/Meta/Enrich/Snippet/Match.pm
@@ -0,0 +1,21 @@
+package Krawfish::Koral::Meta::Enrich::Snippet::Match;
+use strict;
+use warnings;
+
+# Define the match object
+# (e.g. which annotations should occur)
+
+sub new {
+ my $class = shift;
+ bless { @_ }, $class;
+};
+
+sub identify {
+ $_[0];
+};
+
+sub to_string {
+ return 'match';
+};
+
+1;
diff --git a/lib/Krawfish/Koral/Meta/Node/Enrich/Snippet.pm b/lib/Krawfish/Koral/Meta/Node/Enrich/Snippet.pm
index 1c36175..bcfef54 100644
--- a/lib/Krawfish/Koral/Meta/Node/Enrich/Snippet.pm
+++ b/lib/Krawfish/Koral/Meta/Node/Enrich/Snippet.pm
@@ -26,7 +26,7 @@
if ($self->{right}) {
$str .= 'right=' . $self->{right}->to_string . ',';
};
- $str .= '?';
+ $str .= $self->{match}->to_string;
$str .= ':' . $self->{query}->to_string . ')';
};
@@ -37,6 +37,8 @@
my ($self, $dict) = @_;
# Identify contexts
+ # This may result in undef (no context) in case
+ # the requested span or token foundry does not exist
if ($self->{left}) {
$self->{left} = $self->{left}->identify($dict);
};
@@ -44,7 +46,14 @@
$self->{right} = $self->{right}->identify($dict);
};
+ # Identify match
+ # This will at least define a "surface only" match object,
+ # even if requested annotations do not exist
+ $self->{match} = $self->{match}->identify($dict);
+
+ # Identify query
$self->{query} = $self->{query}->identify($dict);
+
return $self;
};
diff --git a/t/plan/meta.t b/t/plan/meta.t
index ca5e6a4..bf45c1f 100644
--- a/t/plan/meta.t
+++ b/t/plan/meta.t
@@ -105,7 +105,7 @@
);
-# Introduce redundant operations and new sorts
+# Introduce snippet
$koral->meta(
$mb->enrich(
$mb->e_snippet(
@@ -124,7 +124,37 @@
'stringification');
$query = $koral->to_query;
-is($query->to_string, 'snippet(left=span(opennlp/s=s,0),right=span(opennlp/s=s,0),?:filter(a,[1]))', 'Stringification');
+is($query->to_string, 'snippet(left=span(opennlp/s=s,0),right=span(opennlp/s=s,0),match:filter(a,[1]))', 'Stringification');
+
+# The element doesn't exist, so the context is ignored
+$query = $query->identify($index->dict);
+is($query->to_string, 'snippet(match:[0])', 'Stringification');
+
+
+# Add new document
+ok_index($index, {
+ id => 7,
+ author => 'Stefan',
+ genre => 'novel',
+ age => 19
+} => '<1:xy>[aa]<2:opennlp/s=s>[aa]</1>[corenlp/c=cc|dd][aa]</2>', 'Add complex document');
+
+
+# Introduce snippet
+$koral->meta(
+ $mb->enrich(
+ $mb->e_snippet(
+ context => $mb->e_span_context('<>opennlp/s=s', 0)
+ )
+ )
+);
+
+ok($query = $koral->to_query->identify($index->dict), 'Create query');
+
+# Better not check term ids ...
+is($query->to_string, 'snippet(left=span(#27/#28=#26,0),right=span(#27/#28=#26,0),match:[0])', 'Stringification');
+
+
done_testing;
__END__
diff --git a/t/result/segment/enrich_snippet.t b/t/result/segment/enrich_snippet.t
index e88408e..5789a3d 100644
--- a/t/result/segment/enrich_snippet.t
+++ b/t/result/segment/enrich_snippet.t
@@ -33,7 +33,7 @@
# This is a query that is fine to be send to nodes
is($koral_query->to_string,
- "snippet(?:filter(aa|bb,[1]))",
+ "snippet(match:filter(aa|bb,[1]))",
'Stringification');
# This is a query that is fine to be send to segments:
@@ -41,7 +41,7 @@
# This is a query that is fine to be send to nodes
is($koral_query->to_string,
- "snippet(?:filter(#10|#8,[1]))",
+ "snippet(match:filter(#10|#8,[1]))",
'Stringification');
ok(my $query = $koral_query->optimize($index->segment), 'Optimize');