Improve snippet generation with different markup classes
Change-Id: I96cc269b49c2cadcd692ae9030b077b4a8a25b70
diff --git a/lib/Krawfish/Compile/Remote/Sort.pm b/lib/Krawfish/Compile/Remote/Sort.pm
index 8292a67..489c912 100644
--- a/lib/Krawfish/Compile/Remote/Sort.pm
+++ b/lib/Krawfish/Compile/Remote/Sort.pm
@@ -21,6 +21,9 @@
# as well. This will override rank sorting on terms and fields
# (and is probably slow).
+# TODO:
+# see https://www.elastic.co/guide/en/elasticsearch/reference/6.0/modules-cross-cluster-search.html
+
sub new {
my $class = shift;
bless {
diff --git a/lib/Krawfish/Compile/Segment/Enrich/Snippet.pm b/lib/Krawfish/Compile/Segment/Enrich/Snippet.pm
index 2bde737..d7f166a 100644
--- a/lib/Krawfish/Compile/Segment/Enrich/Snippet.pm
+++ b/lib/Krawfish/Compile/Segment/Enrich/Snippet.pm
@@ -3,6 +3,10 @@
use warnings;
use Krawfish::Koral::Result::Enrich::Snippet;
# use Krawfish::Compile::Segment::Enrich::Snippet::Highlights;
+
+use Krawfish::Koral::Result::Enrich::Snippet::Hit;
+use Krawfish::Koral::Result::Enrich::Snippet::Highlight;
+
use Krawfish::Koral::Document::Stream;
use Krawfish::Koral::Document::Subtoken;
use Krawfish::Log;
@@ -12,6 +16,7 @@
use constant DEBUG => 1;
+
# TODO:
# It may be more efficient to first collect all required
# annotations (for decoration, context, hit etc.) and
@@ -47,11 +52,14 @@
sub new {
my $class = shift;
- # query
- # fwd_obj
- # left
- # right
- # hit
+ # - query
+ # - fwd_obj
+ # - left
+ # - right
+ # - hit
+ #
+ # TODO:
+ # Pass highlight class list
return bless { @_ }, $class;
};
@@ -65,25 +73,7 @@
};
-# Get extension element
-sub extension {
- $_[0]->{extension}
-};
-
-
-# Get left context object
-sub left_context {
- $_[0]->{left};
-};
-
-
-# Get right object context
-sub right_context {
- $_[0]->{right};
-};
-
-
-# Iterated through the ordered matches
+# Iterate through the ordered matches
sub next {
my $self = shift;
$self->_init;
@@ -104,22 +94,42 @@
# Get current match from query
my $match = $self->match_from_query;
- print_log('c_snippet', 'match is ' . $match->to_string) if DEBUG;
+ if (DEBUG) {
+ print_log('c_snippet', 'match is ' . $match->to_string);
+ };
- # Create new snippet object
+ # Create hit object
+ my $hit = Krawfish::Koral::Result::Enrich::Snippet::Hit->new(
+ start => $match->start,
+ end => $match->end
+ );
+
+ # Create new snippet result object
my $new_snippet = Krawfish::Koral::Result::Enrich::Snippet->new(
- hit_start => $match->start,
- hit_end => $match->end,
doc_id => $match->doc_id
);
+ # Add hit object
+ $new_snippet->add($hit);
- # TODO:
- # Check for classes with supported highlights!
+ # Retrieve classes from match
foreach my $highlight ($match->get_classes) {
- # Add highlight
- $new_snippet->add_highlight($highlight);
+ # TODO:
+ # Check for classes with supported highlights!
+
+ if ($highlight->[0] >= $new_snippet->hit_start &&
+ $highlight->[1] <= $new_snippet->hit_end) {
+
+ my $e = Krawfish::Koral::Result::Enrich::Snippet::Highlight->new(
+ number => $highlight->[0],
+ start => $highlight->[1],
+ end => $highlight->[2],
+ );
+
+ # Add highlight
+ $new_snippet->add($e);
+ };
};
@@ -137,6 +147,25 @@
};
+# Get possible extension element, that will extend the scope
+# of the hit to the match
+sub extension {
+ $_[0]->{extension}
+};
+
+
+# Get left context object
+sub left_context {
+ $_[0]->{left};
+};
+
+
+# Get right context object
+sub right_context {
+ $_[0]->{right};
+};
+
+
# Add all relevant annotations from the forward stream
# from the start to the end - including extensions and context!
@@ -192,7 +221,7 @@
else {
# set optional extension end to same value as hit end
- $snippet->extension_end($snippet->hit_end);
+ $snippet->focus_end($snippet->hit_end);
};
# Get context, if left context is defined
@@ -252,8 +281,10 @@
last unless $forward->next;
# TODO:
- # $subtoken->add_annotation(
- # Krawfish::Koral::Document::Annotation->()
+ # $snippet->add(
+ # Krawfish::Koral::Document::Annotation->()
+ # or
+ # Krawfish::Koral::Result::Enrich::Snippet::Span etc.
# );
# TODO:
diff --git a/lib/Krawfish/Compile/Segment/Enrich/Snippet/Highlights.pm b/lib/Krawfish/Compile/Segment/Enrich/Snippet/Highlights.pm
deleted file mode 100644
index 0f8bfb3..0000000
--- a/lib/Krawfish/Compile/Segment/Enrich/Snippet/Highlights.pm
+++ /dev/null
@@ -1,564 +0,0 @@
-package Krawfish::Compile::Segment::Enrich::Snippet::Highlights;
-use Krawfish::Log;
-use strict;
-use warnings;
-
-use constant DEBUG => 0;
-
-# -1 is match highlight
-# $annotation_nr_counter = 256;
-# $relation_number_counter = 2048;
-# $identifier_number_counter = -2;
-
-# private HashMap<Integer, String> annotationNumber = new HashMap<>(16);
-# private HashMap<Integer, Relation> relationNumber = new HashMap<>(16);
-# private HashMap<Integer, Integer> identifierNumber = new HashMap<>(16);
-
-sub new {
- my $class = shift;
- bless {
- highlights => shift,
- segments => shift,
- list => [], # Combined array
- stack => [] # Stack for balancing the elements
- }, $class;
-};
-
-
-sub clear {
- ...
-};
-
-
-sub add_open {
- my $self = shift;
-};
-
-sub process {
- ...
-};
-
-1;
-
-__END__
-
-
-sub parse_simple {
- my $self = shift;
-
- my $segments = $self->{segments};
-
- # TODO:
- # In Krill, offsets are collected in advance,
- # but I guess it's cleaner to do on the fly
-
- print_log('c_highl', 'Process highlight stack') if DEBUG;
-
- my @highlights = ();
-
- # my $start_seg = $segments->get($match->doc_id, $match->start);
- # my $end_seg = $segments->get($match->doc_id, $match->end - 1);
-
- # Add match as highlight
- push @highlights, _highlight($match->start, $match->end, -1);
-
- # TODO:
- # Check that highlights are between these values
- # my $start_pos = $match->start;
- # my $end_pos = $match->end;
-
- # TODO:
- # Filter multiple identifiers, that may be introduced and would
- # result in invalid xml
- # this._filterMultipleIdentifiers();
-
- my @open_list = sort _open_sort @highlights;
- my @close_list = sort _close_sort @highlights;
-
- # Final highlight stack
- my @stack = ();
-
- # Create sorted stack unless both lists are empty
- while (scalar @open_list || scalar @close_list) {
-
- # Shortcut for list ending
- if (!scalar @open_list) {
- push @stack, map { $_->[4] = } @close_list;
- last;
- }
-
- # Not sure about this - but may happen
- elsif (!scalar @close_list) {
- last;
- };
-
- # Type 0: Textual data
- # Type 1: Opening
- # Type 2: Closing
-
- # Check if the opening tag starts before the closing tag ends
- if ($open_list[0]->[0] < $close_list[0]->[1]) {
-
- # Clone highlight
- my $element = [@{(shift(@open_list))}];
-
- # Set element to be terminal
- $element->[3] = 1; # terminal
- $element->[4] = 1; # Opening
- push @stack, $element;
- }
-
- # No - then close
- else {
- my $element = shift(@close_list);
- $element->[4] = 2; # closing
- push @stack, $element;
- };
- };
-
- $self->{stack} = \@stack; # is a position stack!
-
- # TODO:
- # Problem to solve is now discontinuing elements!
-
- return $self;
-};
-
-
-sub _highlight {
- my ($start, $end, $class, $terminal) = @_;
- return [$start, $end, $class, $terminal // 0];
-};
-
-# Sort opening tags by start, end and class number
-sub _open_sort {
-
- # Compare start position
- if ($a->start > $b->start) {
- return 1;
- }
- elsif ($a->start == $b->start) {
- # Compare end position
- if ($a->end > $b->end) {
- return -1;
- }
- elsif ($a->end == $b->end) {
- # Compare class number
- if ($a->[2] > $b->[2]) {
- return 1;
- }
- elsif ($a->[2] < $b->[2]) {
- return -1;
- };
- return 0;
- };
- }
- return -1;
-};
-
-
-# Sort closing tags by end and start
-sub _close_sort {
-
- # Compare end positions
- if ($a->[1] > $b->[1]) {
- return 1;
- }
- elsif ($a->[1] == $b->[1]) {
-
- # Compare start position
- if ($a->[0] < $b->[0]) {
- return 1;
- }
- elsif ($a->[0] == $b->[0]) {
- return 0;
- };
- return -1;
- };
- return -1;
-};
-
-
-
-
-
-1;
-
-
-
-
-__END__
-
-sub parse {
- my ($self, $match) = @_;
-
- my $segments = $self->{segments};
-
- # Collect offsets for match
- # TODO: In Krill, offsets are collected in advance,
- # but I guess it's cleaner to do on the fly
- my $start_seg = $segments->get($match->doc_id, $match->start);
- my $end_seg = $segments->get($match->doc_id, $match->end - 1);
-
- # TODO: Collect offsets for inner match
-
- # match number
-
- # TODO: Parse identifier string
-
- # $self->add_open(0, $start_seg->[0]);
- # $self->add_close(0, $end_seg->[1]);
-
- # foreach (@highlights) {
- # if ($_->start >= $match->start && $_->end <= $self->end) {
- #
- # };
- # }
-
- my $stack = $self->_process_highlight_stack;
- return $self->{list};
-};
-
-
-sub _process_highlight_spans {
- my $self = shift;
-
- # TODO:
- # Check potential start and end characters here
-
- my $identifier = undef;
-
- # my $array = $self->_process_offset_chars($match->doc_id);
-
- # foreach my $highlight (@{$self->highlights}) {
- # my $start = $self->{segments}->get($match->doc_id, $highlight->start);
- # my $end = $self->{segments}->get($match->doc_id, $highlight->end);
-
- # return if $start < 0 || $end < 0;
- # $self->{span}->add($start, $end, $highlight->nr);
- # };
-};
-
-
-# TODO: Process context, primary data
-sub _process_offset_chars {
-# my $self = shift;
- # if ($context) {}
-};
-
-sub _process_highlight_stack {
- my $self = shift;
-
- print_log('c_highl', 'Process highlight stack') if DEBUG;
-
- my @open_list = ();
- my @close_list = ();
-
- # TODO:
- # Filter multiple identifiers, that may be introduced and would
- # result in invalid xml
- # this._filterMultipleIdentifiers();
-
- my @highlights = @_;
-
- push @open_list, @highlights;
- push @close_list, @highlights;
-
- @open_list = sort _open_sort @open_list;
- @close_list = sort _close_sort @open_list;
-
- my @stack = ();
-
- # Create sorted stack unless both lists are empty
- while (scalar @open_list || scalar @close_list) {
- if (!scalar @open_list) {
- push @stack, @close_list;
- last;
- }
-
- # Not sure about this - but may happen
- elsif (!scalar @close_list) {
- last;
- };
-
- if ($open_list[0]->start < $close_list[0]->end) {
-
- my $e = (shift(@open_list))->clone;
- $e->[3] = 1;
- push @stack, $e;
- }
- else {
- push @stack, shift(@close_list)
- };
- };
- return \@stack;
-};
-
-sub add_close {
- my $self = shift;
- my ($nr, $end) = @_;
-
- $self->{temp_stack} = [];
-
- # Check if there is an opening tag
- unless ($self->{stack}->[0]) {
- warn 'Nothing to close on stack';
- return;
- };
-
- if (DEBUG) {
- print_log(
- 'c_highl',
- "Stack for checkinmg with class $nr is " .
- join('|', @{$self->{stack}})
- );
- };
-
-
- # Class number of the last element
- my $eold = pop @{$self->{stack}};
-
- my $last_combinator;
-
- # the closing element is not balanced, i.e. the last element differs
- while ($eold != $nr) {
-
- # Get last element
- $last_combinator = $self->{list}->[-1];
-
- if (DEBUG) {
- print_log(
- 'c_highl',
- 'Closing element is unbalanced - ' .
- $eold . ' != ' . $nr . ' with last combinator ' .
- join('|',
- $last_combinator->{type},
- $last_combinator->{nr},
- $last_combinator->{chars}
- )
- );
- };
-
- # combinator is opening and the number is not equal to the last
- # element on the balanceStack
- if ($last_combinator->{type} == 1 && $last_combinator->{nr} == $eold) {
-
- # Remove the last element - it's empty and uninteresting!
- pop @{$self->{list}};
- }
-
- # combinator is either closing (??) or another opener
- else {
-
- print_log('c_highl', "Close element a) $eold") if DEBUG;
-
- # Add close element of the unclosed element
- # This will be continued
- push @{$self->{list}}, Krawfish::Collection::Snippet::Highlights::Combinator->new_node(
- 2, $eold, 0
- );
- };
-
- # add this element number temporarily on the stack
- push @{$self->{temp_stack}}, $eold;
-
- # Check next element
- $eold = pop @{$self->{stack}};
- };
-
- # Get last combinator on the stack
- $last_combinator = $self->{list}->[-1];
-
- if (DEBUG) {
- print_log(
- 'c_highl',
- "LastComb: " .
- join('|',
- $last_combinator->{type},
- $last_combinator->{nr},
- $last_combinator->{chars}
- ) .
- " for $nr"
- );
- };
-
- if ($last_combinator->{type} == 1 && $last_combinator->{nr} == $nr) {
-
- while ($last_combinator->{type} == 1 && $last_combinator->{nr} == $nr) {
- # Remove the damn thing - It's empty and uninteresting!
- pop @{$self->{list}};
- $last_combinator = $self->{list}->[-1];
- };
- }
-
- else {
- print_log('c_highl', "Close element b) $nr") if DEBUG;
-
- # Add closer
- push @{$self->{list}}, Krawfish::Collection::Snippet::Highlights::Combinator->new_node(
- 2, $eold, 1
- );
- };
-
- for my $e (@{$self->{temp_stack}}) {
- print_log('c_highl', "Reopen element $e") if DEBUG;
- push @{$self->{list}}, Krawfish::Collection::Snippet::Highlights::Combinator->new_node(
- 1, $e
- );
-
- push @{$self->{stack}}, $e;
- };
-};
-
-
-sub get_first {
- $_[0]->{list}->[0];
-};
-
-
-sub get_last {
- $_[0]->{list}->[-1];
-};
-
-
-sub get {
- $_[0]->{list}->[$_[1]];
-};
-
-
-sub size {
- scalar @{$_[0]->{list}}
-};
-
-
-# Add textual element
-sub add_string {
- my ($self, $string) = @_;
- my $element = Krawfish::Collection::Snippet::Highlights::Combinator->new_text(
- $string
- );
- push @{$self->{list}}, $element;
-};
-
-
-# Open element
-sub add_open {
- my ($self, $number) = @_;
-
- my $element = Krawfish::Collection::Snippet::Highlights::Combinator->new_node(
- 1 => $number
- );
- push @{$self->{list}}, $element;
- push @{$self->{stack}}, $number;
-};
-
-
-sub to_string {
- my $self = shift;
- my $str = '';
- foreach (@{$self->{list}}) {
- $str .= $_->to_string . "\n";
- };
- return $str;
-};
-
-
-package Krawfish::Collection::Snippet::Highlights::Combinator;
-use strict;
-use warnings;
-
-# Type 0: Textual data
-# Type 1: Opening
-# Type 2: Closing
-
-# Constructor for nodes
-sub new_node {
- my $class = shift;
- my $self = bless {
- type => shift, # byte
- nr => shift, # integer
- terminal => shift // 1, # boolean
- chars => ''
- }, $class;
-
- # Terminal elements are closed and won't be reopened
-
- return $self;
-};
-
-
-# Constructor for textual data
-sub new_text {
- my $class = shift;
- bless {
- type => 0,
- chars => shift,
- nr => 0,
- terminal => 1
- }, $class;
-};
-
-
-# TODO: This may not be set here
-sub to_bracket {
- my $self = shift;
- my $match = shift;
-
- my $str = '';
-
- # Closing bracket
- if ($self->{type} == 2) {
-
- # Close matching element
- if ($self->{nr} == -1) {
- return ']';
- };
-
- # Close matching highlight, relation, span ...
- return '}';
- }
-
- elsif ($self->{type} == 1) {
- if ($self->{nr} == -1) {
- $str .= '[';
- }
-
- # Is identifier
- elsif ($self->{nr} < -1) {
- $str .= '{#' . $match->class_id($self->{nr}) . ':';
- }
-
- # Highlight, relation, Span
- else {
- $str .= '{';
-
- # Todo: Use highlight directive
-
- if ($self->{nr} >= 256) {
-
- # Is an annotation?
- if ($self->{nr} < 2048) {
- $str .= $match->annotation_id($self->{nr});
- }
-
- # Relation
- else {
- my $rel = $match->relation_id($self->{nr});
- $str .= $rel->annotation;
- $str .= '>';
- }
- }
-
- # Highlight
- elsif ($self->{nr} != 0) {
- $str .= $self->{nr} . ':';
- }
-
- return $str;
- };
-
- return $self->{chars};
- };
-};
-
-1;
diff --git a/lib/Krawfish/Compile/Segment/Enrich/Snippet/Hit.pm b/lib/Krawfish/Compile/Segment/Enrich/Snippet/Hit.pm
deleted file mode 100644
index 8528a34..0000000
--- a/lib/Krawfish/Compile/Segment/Enrich/Snippet/Hit.pm
+++ /dev/null
@@ -1,63 +0,0 @@
-package Krawfish::Compile::Segment::Enrich::Snippet::Hit;
-use Krawfish::Koral::Document::Subtoken;
-use Krawfish::Log;
-use strict;
-use warnings;
-
-use constant DEBUG => 0;
-
-
-# Constructor
-sub new {
- my $class = shift;
- warn 'DEPRECATED!';
-
- bless {
- @_
- }, $class;
-};
-
-
-# This will read the hit content
-sub content {
- my ($self, $match, $forward) = @_;
-
- if ($match->start != $forward->pos) {
- warn 'The current position is not at the start position of the match';
- return;
- };
-
- # Get all surface tokens of the match and store in data stream
- # TODO:
- # Data may be an abstract snippet object!
- my @data;
- my $length = $match->end - $match->start;
- while ($length > 0) {
-
- # Get the current token
- my $current = $forward->current;
-
- # Add token to text
- push @data, Krawfish::Koral::Document::Subtoken->new_by_term_id(
- $current->preceding_data,
- $current->term_id
- );
-
- # Get the surface data
- $length--;
- $forward->next or last;
- };
-
- if (DEBUG) {
- print_log('c_snippet', 'Add snippet match data: ' . join(',', @data));
- };
-
- return \@data;
-};
-
-sub to_string {
- 'hit';
-};
-
-
-1;
diff --git a/lib/Krawfish/Compile/Segment/Enrich/Snippet/Spans.pm b/lib/Krawfish/Compile/Segment/Enrich/Snippet/Spans.pm
deleted file mode 100644
index f7ad380..0000000
--- a/lib/Krawfish/Compile/Segment/Enrich/Snippet/Spans.pm
+++ /dev/null
@@ -1,32 +0,0 @@
-package Krawfish::Compile::Segment::Enrich::Snippet::Spans;
-use Krawfish::Log;
-use strict;
-use warnings;
-
-use constant DEBUG => 0;
-
-sub new {
- my $class = shift;
- bless {
- elements => [],
- text => ''
- }, $class;
-};
-
-sub add_element {
- my ($self, $element) = @_;
- push @{$self->{elements}}, $element;
-};
-
-sub add_text {
- my ($self, $text) = @_;
- $self->{text} .= $text;
- return $self;
-};
-
-sub to_html {
- my $self = shift;
- return $self->{text};
-};
-
-1;
diff --git a/lib/Krawfish/Koral/Document.pm b/lib/Krawfish/Koral/Document.pm
index bfc465a..74297f7 100644
--- a/lib/Krawfish/Koral/Document.pm
+++ b/lib/Krawfish/Koral/Document.pm
@@ -26,6 +26,13 @@
# TODO:
# Don't forget to deal with TUIs!
+# TODO:
+# Add character extensions to the forward index only
+
+# TODO:
+# Fields need - depending on the type -
+# a prefix AND a postfix!
+
use constant DEBUG => 0;
# Parse the document and create an inverted index file
diff --git a/lib/Krawfish/Koral/Document/Annotation.pm b/lib/Krawfish/Koral/Document/Annotation.pm
index 4108dde..a4c5573 100644
--- a/lib/Krawfish/Koral/Document/Annotation.pm
+++ b/lib/Krawfish/Koral/Document/Annotation.pm
@@ -5,6 +5,11 @@
use strict;
use Krawfish::Koral::Query::Term;
+# TODO:
+# Have common methods with
+# Krawfish::Koral::Result::Enrich::Snippet::Markup
+
+
# Accepts a Krawfish::Koral::Query::Term object
sub new {
my $class = shift;
diff --git a/lib/Krawfish/Koral/Result/Enrich/Snippet.pm b/lib/Krawfish/Koral/Result/Enrich/Snippet.pm
index a03ac06..af7935a 100644
--- a/lib/Krawfish/Koral/Result/Enrich/Snippet.pm
+++ b/lib/Krawfish/Koral/Result/Enrich/Snippet.pm
@@ -5,6 +5,22 @@
with 'Krawfish::Koral::Result::Inflatable';
+# The structure of a match is as follows:
+#
+# <context>
+# <more />
+# ... # Pure text and decorations
+# <focus> # Possible extension to elements
+# ... # Pure text, decorations and annotations
+# <hit> # The concrete hit
+# ... # Pure text, decorations, annotations and highlights
+# </hit>
+# ... # Pure text, decorations and annotations
+# </focus>
+# ... # Pure text and decorations
+# <more />
+# </context>
+
# TODO:
# Make sure this works for right-to-left (RTL) language scripts as well!
@@ -14,10 +30,18 @@
sub new {
my $class = shift;
+ # stream
+ # stream_offset
+ # doc_id
+
# match_ids
- bless {
+ my $self = bless {
@_
}, $class;
+
+
+ $self->{annotations} //= [];
+ return $self;
};
@@ -36,50 +60,6 @@
};
-# Set context end position
-sub context_end {
- my $self = shift;
- if (@_) {
- $self->{context_end} = shift;
- return $self;
- };
- return $self->{context_end};
-};
-
-
-# Set extension end position
-sub extension_end {
- my $self = shift;
- if (@_) {
- $self->{extension_end} = shift;
- return $self;
- };
- return $self->{extension_end};
-};
-
-
-# Set context start position
-sub hit_start {
- my $self = shift;
- if (@_) {
- $self->{hit_start} = shift;
- return $self;
- };
- return $self->{hit_start};
-};
-
-
-# Set context end position
-sub hit_end {
- my $self = shift;
- if (@_) {
- $self->{hit_end} = shift;
- return $self;
- };
- return $self->{hit_end};
-};
-
-
# Set doc id
sub doc_id {
my $self = shift;
@@ -91,28 +71,6 @@
};
-# Add highlight to snippet
-sub add_highlight {
- my ($self, $highlight) = @_;
- my $hls = ($self->{highlights} //= []);
- push @$hls, $highlight;
-};
-
-
-# Add annotations to be retrieved in hit
-sub add_annotation {
- ...
-};
-
-
-# All annotations to be retrieved in hit
-sub annotations_sorted {
- # TODO:
- # Sort all requested annotations numerically by
- # foundry_id > layer_id > anno_id!
- return ();
-};
-
# This stores a Krawfish::Koral::Document::Stream
# with the stream_offset subtoken at 0
sub stream {
@@ -142,6 +100,7 @@
my $str = $self->key . ':' . $self->stream->to_string($id);
};
+
# Key for KQ serialization
sub key {
'snippet'
@@ -156,4 +115,163 @@
};
+sub _order_markup {
+ my ($self, $stream) = @_;
+ # This is based on processHighlightStack() in Krill
+ #
+ # 1. Take all markup and split into opening and closing tags
+ # - Milestones are only added as starts
+ my (@open, @close);
+ # 2. Sort the open tags:
+ # - by start position
+ # - by start character extension
+ # - by end position
+ # - by class number
+ # 3. Sort the closing tags
+ # - by end position
+ # - by end character extension
+ # - by start position
+ # - by class number
+ # 4. Create a stack or a list of the doubled length of
+ # the opening list
+ my @stack;
+
+ while (@open || @close) {
+
+ # No more open tags
+ if (!@open) {
+ push @stack, pop @close;
+ next;
+ }
+
+ # No more end tags
+ elsif (!@close) {
+ last;
+ };
+
+ # The opener starts before the closer ends
+ if ($open[0] < $close[0]) {
+ push @stack, shift @open;
+ }
+
+ # First let the closer end
+ else {
+ push(@stack, shift(@close));
+ };
+ };
+
+ return @stack;
+
+ # 5. Iterate over the stream and add all annotations.
+ # Stream is:
+ # Krawfish::Koral::Document::Stream
+ # with surface annotations only
+ my $length = $self->stream->length;
+ while ($length > 0) {
+ ...
+ };
+};
+
+# Add annotation
+sub add {
+ my $self = shift;
+ my $e = shift;
+
+ # Add markup objects
+ if (Role::Tiny::does_role($e, 'Krawfish::Koral::Result::Enrich::Snippet::Markup')) {
+
+ # Add the hit boundaries
+ if (Role::Tiny::does_role($e, 'Krawfish::Koral::Result::Enrich::Snippet::Hit')) {
+ $self->hit_start($e->start);
+ $self->hit_end($e->end);
+ }
+
+ # Context information
+ elsif (Role::Tiny::does_role($e, 'Krawfish::Koral::Result::Enrich::Snippet::Context')) {
+ $self->context_start($e->start);
+ $self->context_end($e->end);
+ }
+
+ # Scope extended by, e.g., spans
+ elsif (Role::Tiny::does_role($e, 'Krawfish::Koral::Result::Enrich::Snippet::Focus')) {
+ $self->focus_start($e->start);
+ $self->focus_end($e->end);
+ };
+
+ # Push to annotation list
+ push @{$self->{annotations}}, $_[0];
+ };
+};
+
+
+
+# Set context start position
+sub context_start {
+ my $self = shift;
+ if (@_) {
+ $self->{context_start} = shift;
+ return $self;
+ };
+ return $self->{context_start};
+};
+
+
+# Set context end position
+sub context_end {
+ my $self = shift;
+ if (@_) {
+ $self->{context_end} = shift;
+ return $self;
+ };
+ return $self->{context_end};
+};
+
+
+
+# Set extension start position
+sub focus_start {
+ my $self = shift;
+ if (@_) {
+ $self->{focus_start} = shift;
+ return $self;
+ };
+ return $self->{focus_start};
+};
+
+
+# Set extension end position
+sub focus_end {
+ my $self = shift;
+ if (@_) {
+ $self->{focus_end} = shift;
+ return $self;
+ };
+ return $self->{focus_end};
+};
+
+
+# Set hit start position
+sub hit_start {
+ my $self = shift;
+ if (@_) {
+ $self->{hit_start} = shift;
+ return $self;
+ };
+ return $self->{hit_start};
+};
+
+
+# Set hit end position
+sub hit_end {
+ my $self = shift;
+ if (@_) {
+ $self->{hit_end} = shift;
+ return $self;
+ };
+ return $self->{hit_end};
+};
+
+
+
+
1;
diff --git a/lib/Krawfish/Koral/Result/Enrich/Snippet/Annotation.pm b/lib/Krawfish/Koral/Result/Enrich/Snippet/Annotation.pm
new file mode 100644
index 0000000..1150bb6
--- /dev/null
+++ b/lib/Krawfish/Koral/Result/Enrich/Snippet/Annotation.pm
@@ -0,0 +1,28 @@
+package Krawfish::Koral::Result::Enrich::Snippet::Annotation;
+use strict;
+use warnings;
+use Role::Tiny;
+
+# TODO:
+# This role needs the term identifier
+# role!
+
+sub foundry {};
+
+sub layer {};
+
+sub key {};
+
+sub value {};
+
+# Certainty of the annotation
+sub certainty {
+ my $self = shift;
+ if (@_) {
+ $self->{certainty} = shift;
+ return $self;
+ };
+ return $self->{certainty};
+};
+
+1;
diff --git a/lib/Krawfish/Koral/Result/Enrich/Snippet/Attribute.pm b/lib/Krawfish/Koral/Result/Enrich/Snippet/Attribute.pm
new file mode 100644
index 0000000..de7cc7a
--- /dev/null
+++ b/lib/Krawfish/Koral/Result/Enrich/Snippet/Attribute.pm
@@ -0,0 +1,23 @@
+package Krawfish::Koral::Result::Enrich::Snippet::Attribute;
+use strict;
+use warnings;
+use Role::Tiny;
+
+with 'Krawfish::Koral::Result::Enrich::Snippet::Markup';
+with 'Krawfish::Koral::Result::Enrich::Snippet::Annotation';
+
+
+# Start position of target
+sub ref_tui {
+ my $self = shift;
+ if (@_) {
+ $self->{target_start} = shift;
+ return $self;
+ };
+ return $self->{target_start};
+};
+
+
+
+
+1;
diff --git a/lib/Krawfish/Koral/Result/Enrich/Snippet/Focus.pm b/lib/Krawfish/Koral/Result/Enrich/Snippet/Focus.pm
new file mode 100644
index 0000000..265bd97
--- /dev/null
+++ b/lib/Krawfish/Koral/Result/Enrich/Snippet/Focus.pm
@@ -0,0 +1,11 @@
+package Krawfish::Koral::Result::Enrich::Snippet::Focus;
+use strict;
+use warnings;
+use Role::Tiny;
+use Krawfish::Log;
+
+with 'Krawfish::Koral::Result::Enrich::Snippet::Markup';
+
+use constant DEBUG => 0;
+
+1;
diff --git a/lib/Krawfish/Koral/Result/Enrich/Snippet/Highlight.pm b/lib/Krawfish/Koral/Result/Enrich/Snippet/Highlight.pm
new file mode 100644
index 0000000..b70df94
--- /dev/null
+++ b/lib/Krawfish/Koral/Result/Enrich/Snippet/Highlight.pm
@@ -0,0 +1,23 @@
+package Krawfish::Koral::Result::Enrich::Snippet::Highlight;
+use strict;
+use warnings;
+use Role::Tiny;
+use Krawfish::Log;
+
+with 'Krawfish::Koral::Result::Enrich::Snippet::Markup';
+
+use constant DEBUG => 0;
+
+
+# Class number of highlight
+sub number {
+ my $self = shift;
+ if (@_) {
+ $self->{number} = shift;
+ return $self;
+ };
+ return $self->{number};
+};
+
+
+1;
diff --git a/lib/Krawfish/Koral/Result/Enrich/Snippet/Hit.pm b/lib/Krawfish/Koral/Result/Enrich/Snippet/Hit.pm
new file mode 100644
index 0000000..8819d6f
--- /dev/null
+++ b/lib/Krawfish/Koral/Result/Enrich/Snippet/Hit.pm
@@ -0,0 +1,19 @@
+package Krawfish::Koral::Result::Enrich::Snippet::Hit;
+use strict;
+use warnings;
+use Role::Tiny;
+use Krawfish::Log;
+
+with 'Krawfish::Koral::Result::Enrich::Snippet::Markup';
+
+use constant DEBUG => 0;
+
+
+# Stringify to brackets
+sub to_brackets {
+ my $self = shift;
+ return $self->is_opening ? '[' : ']';
+};
+
+
+1;
diff --git a/lib/Krawfish/Koral/Result/Enrich/Snippet/Markup.pm b/lib/Krawfish/Koral/Result/Enrich/Snippet/Markup.pm
new file mode 100644
index 0000000..403dce1
--- /dev/null
+++ b/lib/Krawfish/Koral/Result/Enrich/Snippet/Markup.pm
@@ -0,0 +1,87 @@
+package Krawfish::Koral::Result::Enrich::Snippet::Markup;
+use strict;
+use warnings;
+use Role::Tiny;
+
+requires qw/start
+ end
+ start_char
+ end_char/;
+
+# TODO:
+# Have common methods with
+# Krawfish::Koral::Document::Annotation
+
+# TODO:
+# This is the base class for
+# - hit
+# - highlight
+# - relation
+# - anchor
+# - Annotation
+
+# TODO:
+# All these role may very well
+# be under Koral - as index data types.
+
+sub new {
+ my $class = shift;
+ bless { @_ }, $class;
+};
+
+
+# Start position
+sub start {
+ my $self = shift;
+ if (@_) {
+ $self->{start} = shift;
+ return $self;
+ };
+ return $self->{start};
+};
+
+
+# End position
+sub end {
+ my $self = shift;
+ if (@_) {
+ $self->{end} = shift;
+ return $self;
+ };
+ return $self->{end};
+};
+
+
+# Start char
+sub start_char {
+ my $self = shift;
+ if (@_) {
+ $self->{start_char} = shift;
+ return $self;
+ };
+ return $self->{start_char};
+};
+
+
+# End char
+sub end_char {
+ my $self = shift;
+ if (@_) {
+ $self->{end_char} = shift;
+ return $self;
+ };
+ return $self->{end_char};
+};
+
+
+# The element occurs as an opening tag
+sub is_opening {
+ my $self = shift;
+ if (@_ > 0) {
+ $self->{opening} = shift;
+ return $self;
+ };
+ return $self->{opening};
+};
+
+1;
diff --git a/lib/Krawfish/Koral/Result/Enrich/Snippet/Milestone.pm b/lib/Krawfish/Koral/Result/Enrich/Snippet/Milestone.pm
new file mode 100644
index 0000000..7d997dc
--- /dev/null
+++ b/lib/Krawfish/Koral/Result/Enrich/Snippet/Milestone.pm
@@ -0,0 +1,20 @@
+package Krawfish::Koral::Result::Enrich::Snippet::Milestone;
+use strict;
+use warnings;
+use Role::Tiny;
+use Krawfish::Log;
+
+with 'Krawfish::Koral::Result::Enrich::Snippet::Markup';
+with 'Krawfish::Koral::Result::Enrich::Snippet::Annotation';
+
+use constant DEBUG => 0;
+
+# The milestone element always is embedded before
+# the actual position
+
+# Milestones have identical start and end positions
+sub end {
+ $_[0]->start;
+};
+
+1;
diff --git a/lib/Krawfish/Koral/Result/Enrich/Snippet/Relation.pm b/lib/Krawfish/Koral/Result/Enrich/Snippet/Relation.pm
new file mode 100644
index 0000000..10a78b7
--- /dev/null
+++ b/lib/Krawfish/Koral/Result/Enrich/Snippet/Relation.pm
@@ -0,0 +1,61 @@
+package Krawfish::Koral::Result::Enrich::Snippet::Relation;
+use strict;
+use warnings;
+use Role::Tiny;
+use Krawfish::Log;
+
+with 'Krawfish::Koral::Result::Enrich::Snippet::Markup';
+with 'Krawfish::Koral::Result::Enrich::Snippet::TUI';
+with 'Krawfish::Koral::Result::Enrich::Snippet::Annotation';
+
+use constant DEBUG => 0;
+
+sub left_to_right {
+ return $self->{left_to_right};
+};
+
+# Start position of right part
+sub right_start {
+ my $self = shift;
+ if (@_) {
+ $self->{target_start} = shift;
+ return $self;
+ };
+ return $self->{target_start};
+};
+
+
+# End position of the right part
+sub right_end {
+ my $self = shift;
+ if (@_) {
+ $self->{target_end} = shift;
+ return $self;
+ };
+ return $self->{target_end};
+};
+
+
+# TUI of source
+sub source_tui {
+ my $self = shift;
+ if (@_) {
+ $self->{source_tui} = shift;
+ return $self;
+ };
+ return $self->{source_tui};
+};
+
+
+# TUI of target
+sub target_tui {
+ my $self = shift;
+ if (@_) {
+ $self->{target_tui} = shift;
+ return $self;
+ };
+ return $self->{target_tui};
+};
+
+
+1;
diff --git a/lib/Krawfish/Koral/Result/Enrich/Snippet/Span.pm b/lib/Krawfish/Koral/Result/Enrich/Snippet/Span.pm
new file mode 100644
index 0000000..374ff81
--- /dev/null
+++ b/lib/Krawfish/Koral/Result/Enrich/Snippet/Span.pm
@@ -0,0 +1,26 @@
+package Krawfish::Koral::Result::Enrich::Snippet::Span;
+use strict;
+use warnings;
+use Role::Tiny;
+use Krawfish::Log;
+
+with 'Krawfish::Koral::Result::Enrich::Snippet::Markup';
+with 'Krawfish::Koral::Result::Enrich::Snippet::TUI';
+with 'Krawfish::Koral::Result::Enrich::Snippet::Certainty';
+
+# Spans are used for token as well as span annotations,
+# therefore even tokens can have a depth information
+
+use constant DEBUG => 0;
+
+# Depth
+sub depth {
+ my $self = shift;
+ if (@_) {
+ $self->{depth} = shift;
+ return $self;
+ };
+ return $self->{depth};
+};
+
+1;
diff --git a/lib/Krawfish/Koral/Result/Enrich/Snippet/TUI.pm b/lib/Krawfish/Koral/Result/Enrich/Snippet/TUI.pm
new file mode 100644
index 0000000..34218ad
--- /dev/null
+++ b/lib/Krawfish/Koral/Result/Enrich/Snippet/TUI.pm
@@ -0,0 +1,16 @@
+package Krawfish::Koral::Result::Enrich::Snippet::TUI;
+use strict;
+use warnings;
+use Role::Tiny;
+
+# Token unique identifier
+sub tui {
+ my $self = shift;
+ if (@_) {
+ $self->{tui} = shift;
+ return $self;
+ };
+ return $self->{tui};
+};
+
+1;
diff --git a/t/compile/segment/enrich_snippet_elements.t b/t/compile/segment/enrich_snippet_elements.t
deleted file mode 100644
index 7c9f896..0000000
--- a/t/compile/segment/enrich_snippet_elements.t
+++ /dev/null
@@ -1,17 +0,0 @@
-use Test::More;
-use Test::Krawfish;
-use strict;
-use warnings;
-
-use_ok('Krawfish::Compile::Segment::Enrich::Snippet::Spans');
-
-my $spans = Krawfish::Compile::Segment::Enrich::Snippet::Spans->new;
-
-$spans->add_text('Der')->add_text(' ')->add_text('alte');
-
-is($spans->to_html, 'Der alte', 'HTML');
-
-
-done_testing;
-
-1;
diff --git a/t/koral/result/snippet.t b/t/koral/result/snippet.t
new file mode 100644
index 0000000..d37fb91
--- /dev/null
+++ b/t/koral/result/snippet.t
@@ -0,0 +1,35 @@
+use Test::More;
+use strict;
+use warnings;
+
+use_ok('Krawfish::Koral::Result::Enrich::Snippet');
+use_ok('Krawfish::Koral::Result::Enrich::Snippet::Hit');
+use_ok('Krawfish::Koral::Result::Enrich::Snippet::Highlight');
+
+# Create snippet object
+my $snippet = Krawfish::Koral::Result::Enrich::Snippet->new(
+ doc_id => 5
+);
+
+# Create hit object
+my $hit = Krawfish::Koral::Result::Enrich::Snippet::Hit->new(
+ start => 1,
+ end => 4
+);
+
+ok($snippet->add($hit), 'Add hit');
+
+is($snippet->hit_start, 1, 'Hit start');
+is($snippet->hit_end, 4, 'Hit end');
+
+my $highlight = Krawfish::Koral::Result::Enrich::Snippet::Highlight->new(
+ start => 2,
+ end => 3,
+ number => 4
+);
+
+ok($snippet->add($highlight), 'Add highlight');
+
+done_testing;
+
+__END__
diff --git a/t/query/clone.t b/t/query/clone.t
index 4d4c7a3..adf9473 100644
--- a/t/query/clone.t
+++ b/t/query/clone.t
@@ -194,6 +194,7 @@
# Test cloning (and running)
diag 'Check compile queries';
+diag 'Check that sort never mixes with enrich!';
done_testing;
__END__