Improve snippet generation with different markup classes

Change-Id: I96cc269b49c2cadcd692ae9030b077b4a8a25b70
diff --git a/lib/Krawfish/Koral/Document.pm b/lib/Krawfish/Koral/Document.pm
index bfc465a..74297f7 100644
--- a/lib/Krawfish/Koral/Document.pm
+++ b/lib/Krawfish/Koral/Document.pm
@@ -26,6 +26,13 @@
 # TODO:
 #   Don't forget to deal with TUIs!
 
+# TODO:
+#   Add character extensions to the forward index only
+
+# TODO:
+#   Fields need - depending on the type -
+#   a prefix AND a postfix!
+
 use constant DEBUG => 0;
 
 # Parse the document and create an inverted index file
diff --git a/lib/Krawfish/Koral/Document/Annotation.pm b/lib/Krawfish/Koral/Document/Annotation.pm
index 4108dde..a4c5573 100644
--- a/lib/Krawfish/Koral/Document/Annotation.pm
+++ b/lib/Krawfish/Koral/Document/Annotation.pm
@@ -5,6 +5,11 @@
 use strict;
 use Krawfish::Koral::Query::Term;
 
+# TODO:
+#   Have common methods with
+#   Krawfish::Koral::Result::Enrich::Snippet::Markup
+
+
 # Accepts a Krawfish::Koral::Query::Term object
 sub new {
   my $class = shift;
diff --git a/lib/Krawfish/Koral/Result/Enrich/Snippet.pm b/lib/Krawfish/Koral/Result/Enrich/Snippet.pm
index a03ac06..af7935a 100644
--- a/lib/Krawfish/Koral/Result/Enrich/Snippet.pm
+++ b/lib/Krawfish/Koral/Result/Enrich/Snippet.pm
@@ -5,6 +5,22 @@
 
 with 'Krawfish::Koral::Result::Inflatable';
 
+# The structure of a match is as follows:
+#
+# <context>
+#   <more />
+#   ...             # Pure text and decorations
+#   <focus>         # Possible extension to elements
+#     ...           # Pure text, decorations and annotations
+#     <hit>         # The concrete hit
+#       ...         # Pure text, decorations, annotations and highlights
+#     </hit>
+#     ...           # Pure text, decorations and annotations
+#   </focus>
+#   ...             # Pure text and decorations
+#   <more />
+# </context>
+
 
 # TODO:
 #   Make sure this works for right-to-left (RTL) language scripts as well!
@@ -14,10 +30,18 @@
 sub new {
   my $class = shift;
 
+  # stream
+  # stream_offset
+  # doc_id
+
   # match_ids
-  bless {
+  my $self = bless {
     @_
   }, $class;
+
+
+  $self->{annotations} //= [];
+  return $self;
 };
 
 
@@ -36,50 +60,6 @@
 };
 
 
-# Set context end position
-sub context_end {
-  my $self = shift;
-  if (@_) {
-    $self->{context_end} = shift;
-    return $self;
-  };
-  return $self->{context_end};
-};
-
-
-# Set extension end position
-sub extension_end {
-  my $self = shift;
-  if (@_) {
-    $self->{extension_end} = shift;
-    return $self;
-  };
-  return $self->{extension_end};
-};
-
-
-# Set context start position
-sub hit_start {
-  my $self = shift;
-  if (@_) {
-    $self->{hit_start} = shift;
-    return $self;
-  };
-  return $self->{hit_start};
-};
-
-
-# Set context end position
-sub hit_end {
-  my $self = shift;
-  if (@_) {
-    $self->{hit_end} = shift;
-    return $self;
-  };
-  return $self->{hit_end};
-};
-
-
 # Set doc id
 sub doc_id {
   my $self = shift;
@@ -91,28 +71,6 @@
 };
 
 
-# Add highlight to snippet
-sub add_highlight {
-  my ($self, $highlight) = @_;
-  my $hls = ($self->{highlights} //= []);
-  push @$hls, $highlight;
-};
-
-
-# Add annotations to be retrieved in hit
-sub add_annotation {
-  ...
-};
-
-
-# All annotations to be retrieved in hit
-sub annotations_sorted {
-  # TODO:
-  #   Sort all requested annotations numerically by
-  #   foundry_id > layer_id > anno_id!
-  return ();
-};
-
 # This stores a Krawfish::Koral::Document::Stream
 # with the stream_offset subtoken at 0
 sub stream {
@@ -142,6 +100,7 @@
   my $str = $self->key . ':' . $self->stream->to_string($id);
 };
 
+
 # Key for KQ serialization
 sub key {
   'snippet'
@@ -156,4 +115,163 @@
 };
 
 
+sub _order_markup {
+  my ($self, $stream) = @_;
+  # This is based on processHighlightStack() in Krill
+  #
+  # 1. Take all markup and split into opening and closing tags
+  #    - Milestones are only added as starts
+  my (@open, @close);
+  # 2. Sort the open tags:
+  #    - by start position
+  #    - by start character extension
+  #    - by end position
+  #    - by class number
+  # 3. Sort the closing tags
+  #    - by end position
+  #    - by end character extension
+  #    - by start position
+  #    - by class number
+  # 4. Create a stack or a list of the doubled length of
+  #    the opening list
+  my @stack;
+
+  while (@open || @close) {
+
+    # No more open tags
+    if (!@open) {
+      push @stack, pop @close;
+      next;
+    }
+
+    # No more end tags
+    elsif (!@close) {
+      last;
+    };
+
+    # The opener starts before the closer ends
+    if ($open[0] < $close[0]) {
+      push @stack, shift @open;
+    }
+
+    # First let the closer end
+    else {
+      push(@stack, shift(@close));
+    };
+  };
+
+  return @stack;
+
+  # 5. Iterate over the stream and add all annotations.
+  #    Stream is:
+  #    Krawfish::Koral::Document::Stream
+  #    with surface annotations only
+  my $length = $self->stream->length;
+  while ($length > 0) {
+    ...
+  };
+};
+
+# Add annotation
+sub add {
+  my $self = shift;
+  my $e = shift;
+
+  # Add markup objects
+  if (Role::Tiny::does_role($e, 'Krawfish::Koral::Result::Enrich::Snippet::Markup')) {
+
+    # Add the hit boundaries
+    if (Role::Tiny::does_role($e, 'Krawfish::Koral::Result::Enrich::Snippet::Hit')) {
+      $self->hit_start($e->start);
+      $self->hit_end($e->end);
+    }
+
+    # Context information
+    elsif (Role::Tiny::does_role($e, 'Krawfish::Koral::Result::Enrich::Snippet::Context')) {
+      $self->context_start($e->start);
+      $self->context_end($e->end);
+    }
+
+    # Scope extended by, e.g., spans
+    elsif (Role::Tiny::does_role($e, 'Krawfish::Koral::Result::Enrich::Snippet::Focus')) {
+      $self->focus_start($e->start);
+      $self->focus_end($e->end);
+    };
+
+    # Push to annotation list
+    push @{$self->{annotations}}, $_[0];
+  };
+};
+
+
+
+# Set context start position
+sub context_start {
+  my $self = shift;
+  if (@_) {
+    $self->{context_start} = shift;
+    return $self;
+  };
+  return $self->{context_start};
+};
+
+
+# Set context end position
+sub context_end {
+  my $self = shift;
+  if (@_) {
+    $self->{context_end} = shift;
+    return $self;
+  };
+  return $self->{context_end};
+};
+
+
+
+# Set extension start position
+sub focus_start {
+  my $self = shift;
+  if (@_) {
+    $self->{focus_start} = shift;
+    return $self;
+  };
+  return $self->{focus_start};
+};
+
+
+# Set extension end position
+sub focus_end {
+  my $self = shift;
+  if (@_) {
+    $self->{focus_end} = shift;
+    return $self;
+  };
+  return $self->{focus_end};
+};
+
+
+# Set hit start position
+sub hit_start {
+  my $self = shift;
+  if (@_) {
+    $self->{hit_start} = shift;
+    return $self;
+  };
+  return $self->{hit_start};
+};
+
+
+# Set hit end position
+sub hit_end {
+  my $self = shift;
+  if (@_) {
+    $self->{hit_end} = shift;
+    return $self;
+  };
+  return $self->{hit_end};
+};
+
+
+
+
 1;
diff --git a/lib/Krawfish/Koral/Result/Enrich/Snippet/Annotation.pm b/lib/Krawfish/Koral/Result/Enrich/Snippet/Annotation.pm
new file mode 100644
index 0000000..1150bb6
--- /dev/null
+++ b/lib/Krawfish/Koral/Result/Enrich/Snippet/Annotation.pm
@@ -0,0 +1,28 @@
+package Krawfish::Koral::Result::Enrich::Snippet::Annotation;
+use strict;
+use warnings;
+use Role::Tiny;
+
+# TODO:
+#   This role needs the term identifier
+#   role!
+
+sub foundry {};
+
+sub layer {};
+
+sub key {};
+
+sub value {};
+
+# Certainty of the annotation
+sub certainty {
+  my $self = shift;
+  if (@_) {
+    $self->{certainty} = shift;
+    return $self;
+  };
+  return $self->{certainty};
+};
+
+1;
diff --git a/lib/Krawfish/Koral/Result/Enrich/Snippet/Attribute.pm b/lib/Krawfish/Koral/Result/Enrich/Snippet/Attribute.pm
new file mode 100644
index 0000000..de7cc7a
--- /dev/null
+++ b/lib/Krawfish/Koral/Result/Enrich/Snippet/Attribute.pm
@@ -0,0 +1,23 @@
+package Krawfish::Koral::Result::Enrich::Snippet::Attribute;
+use strict;
+use warnings;
+use Role::Tiny;
+
+with 'Krawfish::Koral::Result::Enrich::Snippet::Markup';
+with 'Krawfish::Koral::Result::Enrich::Snippet::Annotation';
+
+
+# Start position of target
+sub ref_tui {
+  my $self = shift;
+  if (@_) {
+    $self->{target_start} = shift;
+    return $self;
+  };
+  return $self->{target_start};
+};
+
+
+
+
+1;
diff --git a/lib/Krawfish/Koral/Result/Enrich/Snippet/Focus.pm b/lib/Krawfish/Koral/Result/Enrich/Snippet/Focus.pm
new file mode 100644
index 0000000..265bd97
--- /dev/null
+++ b/lib/Krawfish/Koral/Result/Enrich/Snippet/Focus.pm
@@ -0,0 +1,11 @@
+package Krawfish::Koral::Result::Enrich::Snippet::Focus;
+use strict;
+use warnings;
+use Role::Tiny;
+use Krawfish::Log;
+
+with 'Krawfish::Koral::Result::Enrich::Snippet::Markup';
+
+use constant DEBUG => 0;
+
+1;
diff --git a/lib/Krawfish/Koral/Result/Enrich/Snippet/Highlight.pm b/lib/Krawfish/Koral/Result/Enrich/Snippet/Highlight.pm
new file mode 100644
index 0000000..b70df94
--- /dev/null
+++ b/lib/Krawfish/Koral/Result/Enrich/Snippet/Highlight.pm
@@ -0,0 +1,23 @@
+package Krawfish::Koral::Result::Enrich::Snippet::Highlight;
+use strict;
+use warnings;
+use Role::Tiny;
+use Krawfish::Log;
+
+with 'Krawfish::Koral::Result::Enrich::Snippet::Markup';
+
+use constant DEBUG => 0;
+
+
+# Class number of highlight
+sub number {
+  my $self = shift;
+  if (@_) {
+    $self->{number} = shift;
+    return $self;
+  };
+  return $self->{number};
+};
+
+
+1;
diff --git a/lib/Krawfish/Koral/Result/Enrich/Snippet/Hit.pm b/lib/Krawfish/Koral/Result/Enrich/Snippet/Hit.pm
new file mode 100644
index 0000000..8819d6f
--- /dev/null
+++ b/lib/Krawfish/Koral/Result/Enrich/Snippet/Hit.pm
@@ -0,0 +1,19 @@
+package Krawfish::Koral::Result::Enrich::Snippet::Hit;
+use strict;
+use warnings;
+use Role::Tiny;
+use Krawfish::Log;
+
+with 'Krawfish::Koral::Result::Enrich::Snippet::Markup';
+
+use constant DEBUG => 0;
+
+
+# Stringify to brackets
+sub to_brackets {
+  my $self = shift;
+  return $self->is_opening ? '[' : ']';
+};
+
+
+1;
diff --git a/lib/Krawfish/Koral/Result/Enrich/Snippet/Markup.pm b/lib/Krawfish/Koral/Result/Enrich/Snippet/Markup.pm
new file mode 100644
index 0000000..403dce1
--- /dev/null
+++ b/lib/Krawfish/Koral/Result/Enrich/Snippet/Markup.pm
@@ -0,0 +1,87 @@
+package Krawfish::Koral::Result::Enrich::Snippet::Markup;
+use strict;
+use warnings;
+use Role::Tiny;
+
+requires qw/start
+            end
+            start_char
+            end_char/;
+
+# TODO:
+#   Have common methods with
+#   Krawfish::Koral::Document::Annotation
+
+# TODO:
+#   This is the base class for
+#   - hit
+#   - highlight
+#   - relation
+#   - anchor
+#   - Annotation
+
+# TODO:
+#   All these role may very well
+#   be under Koral - as index data types.
+
+sub new {
+  my $class = shift;
+  bless { @_ }, $class;
+};
+
+
+# Start position
+sub start {
+  my $self = shift;
+  if (@_) {
+    $self->{start} = shift;
+    return $self;
+  };
+  return $self->{start};
+};
+
+
+# End position
+sub end {
+  my $self = shift;
+  if (@_) {
+    $self->{end} = shift;
+    return $self;
+  };
+  return $self->{end};
+};
+
+
+# Start char
+sub start_char {
+  my $self = shift;
+  if (@_) {
+    $self->{start_char} = shift;
+    return $self;
+  };
+  return $self->{start_char};
+};
+
+
+# End char
+sub end_char {
+  my $self = shift;
+  if (@_) {
+    $self->{end_char} = shift;
+    return $self;
+  };
+  return $self->{end_char};
+};
+
+
+# The element occurs as an opening tag
+sub is_opening {
+  my $self = shift;
+  if (@_ > 0) {
+    $self->{opening} = shift;
+    return $self;
+  };
+  return $self->{opening};
+};
+
+1;
diff --git a/lib/Krawfish/Koral/Result/Enrich/Snippet/Milestone.pm b/lib/Krawfish/Koral/Result/Enrich/Snippet/Milestone.pm
new file mode 100644
index 0000000..7d997dc
--- /dev/null
+++ b/lib/Krawfish/Koral/Result/Enrich/Snippet/Milestone.pm
@@ -0,0 +1,20 @@
+package Krawfish::Koral::Result::Enrich::Snippet::Milestone;
+use strict;
+use warnings;
+use Role::Tiny;
+use Krawfish::Log;
+
+with 'Krawfish::Koral::Result::Enrich::Snippet::Markup';
+with 'Krawfish::Koral::Result::Enrich::Snippet::Annotation';
+
+use constant DEBUG => 0;
+
+# The milestone element always is embedded before
+# the actual position
+
+# Milestones have identical start and end positions
+sub end {
+  $_[0]->start;
+};
+
+1;
diff --git a/lib/Krawfish/Koral/Result/Enrich/Snippet/Relation.pm b/lib/Krawfish/Koral/Result/Enrich/Snippet/Relation.pm
new file mode 100644
index 0000000..10a78b7
--- /dev/null
+++ b/lib/Krawfish/Koral/Result/Enrich/Snippet/Relation.pm
@@ -0,0 +1,61 @@
+package Krawfish::Koral::Result::Enrich::Snippet::Relation;
+use strict;
+use warnings;
+use Role::Tiny;
+use Krawfish::Log;
+
+with 'Krawfish::Koral::Result::Enrich::Snippet::Markup';
+with 'Krawfish::Koral::Result::Enrich::Snippet::TUI';
+with 'Krawfish::Koral::Result::Enrich::Snippet::Annotation';
+
+use constant DEBUG => 0;
+
+sub left_to_right {
+  return $self->{left_to_right};
+};
+
+# Start position of right part
+sub right_start {
+  my $self = shift;
+  if (@_) {
+    $self->{target_start} = shift;
+    return $self;
+  };
+  return $self->{target_start};
+};
+
+
+# End position of the right part
+sub right_end {
+  my $self = shift;
+  if (@_) {
+    $self->{target_end} = shift;
+    return $self;
+  };
+  return $self->{target_end};
+};
+
+
+# TUI of source
+sub source_tui {
+  my $self = shift;
+  if (@_) {
+    $self->{source_tui} = shift;
+    return $self;
+  };
+  return $self->{source_tui};
+};
+
+
+# TUI of target
+sub target_tui {
+  my $self = shift;
+  if (@_) {
+    $self->{target_tui} = shift;
+    return $self;
+  };
+  return $self->{target_tui};
+};
+
+
+1;
diff --git a/lib/Krawfish/Koral/Result/Enrich/Snippet/Span.pm b/lib/Krawfish/Koral/Result/Enrich/Snippet/Span.pm
new file mode 100644
index 0000000..374ff81
--- /dev/null
+++ b/lib/Krawfish/Koral/Result/Enrich/Snippet/Span.pm
@@ -0,0 +1,26 @@
+package Krawfish::Koral::Result::Enrich::Snippet::Span;
+use strict;
+use warnings;
+use Role::Tiny;
+use Krawfish::Log;
+
+with 'Krawfish::Koral::Result::Enrich::Snippet::Markup';
+with 'Krawfish::Koral::Result::Enrich::Snippet::TUI';
+with 'Krawfish::Koral::Result::Enrich::Snippet::Certainty';
+
+# Spans are used for token as well as span annotations,
+# therefore even tokens can have a depth information
+
+use constant DEBUG => 0;
+
+# Depth
+sub depth {
+  my $self = shift;
+  if (@_) {
+    $self->{depth} = shift;
+    return $self;
+  };
+  return $self->{depth};
+};
+
+1;
diff --git a/lib/Krawfish/Koral/Result/Enrich/Snippet/TUI.pm b/lib/Krawfish/Koral/Result/Enrich/Snippet/TUI.pm
new file mode 100644
index 0000000..34218ad
--- /dev/null
+++ b/lib/Krawfish/Koral/Result/Enrich/Snippet/TUI.pm
@@ -0,0 +1,16 @@
+package Krawfish::Koral::Result::Enrich::Snippet::TUI;
+use strict;
+use warnings;
+use Role::Tiny;
+
+# Token unique identifier
+sub tui {
+  my $self = shift;
+  if (@_) {
+    $self->{tui} = shift;
+    return $self;
+  };
+  return $self->{tui};
+};
+
+1;