Started example forward index deserialization

commit: 14ff0c4e3db6fdd132519250dd7607d8bf8a0e37 [log] [tgz]
author: Akron <nils@diewald-online.de> Wed Aug 09 20:49:52 2017 +0200
committer: Akron <nils@diewald-online.de> Wed Aug 09 20:49:52 2017 +0200
tree: 3693f902451d7584b9f13be85447d4c1a6201a0b
parent: dd10fb91819cf3ac77a5032c145e22a8322b79f4 [diff]
diff --git a/lib/Krawfish/Index/Dictionary.pm b/lib/Krawfish/Index/Dictionary.pm
index e51452e..eda1626 100644
--- a/lib/Krawfish/Index/Dictionary.pm
+++ b/lib/Krawfish/Index/Dictionary.pm

@@ -18,9 +18,13 @@
 #   terms:       *
 #   (casefolded) '
 #   subterms:    ~
+#   foundry:     ^
+#   layer:       °
 #   annotations
-#     token      #
+#     token      #   (not yet supported)
 #     span       <>
+#     relations  <, >
+#     attributes @
 #   fields:      +
 #   fieldkeys:   !
 #
@@ -124,7 +128,7 @@
 #   requested, for example, by the term_id API for co-occurrence search.
 #   That's why all subterms need to be stored as well.
 
-use constant DEBUG => 0;
+use constant DEBUG => 1;
 
 sub new {
   my $class = shift;

diff --git a/lib/Krawfish/Index/FieldValue.pm b/lib/Krawfish/Index/FieldValue.pm
index b77e475..80793d2 100644
--- a/lib/Krawfish/Index/FieldValue.pm
+++ b/lib/Krawfish/Index/FieldValue.pm

@@ -4,6 +4,9 @@
 use strict;
 use warnings;
 
+# TODO:
+#   This is deprecated in favor of Forward::*
+
 # All values are stored as varints in a skiplist
 # augmented postingslist
 

diff --git a/lib/Krawfish/Index/Fields.pm b/lib/Krawfish/Index/Fields.pm
index ade74f2..4a7dffa 100644
--- a/lib/Krawfish/Index/Fields.pm
+++ b/lib/Krawfish/Index/Fields.pm

@@ -7,6 +7,11 @@
 use constant DEBUG => 0;
 
 # TODO:
+#   This is deprecated in favor of Forward::*, though ranking is
+#   a separate issue.
+
+
+# TODO:
 #   Currently ranking is not collation based. It should be possible
 #   to define a collation per field and
 #   use one collation for prefix and suffix sorting.

diff --git a/lib/Krawfish/Index/Forward.pm b/lib/Krawfish/Index/Forward.pm
new file mode 100644
index 0000000..ea5a278
--- /dev/null
+++ b/lib/Krawfish/Index/Forward.pm

@@ -0,0 +1,82 @@
+package Krawfish::Index::Forward;
+use Krawfish::Index::Forward::Stream;
+use Krawfish::Index::Forward::Doc;
+# use Krawfish::Index::Store::V1::ForwardIndex;
+use warnings;
+use strict;
+
+# TODO:
+#   This API needs to be backed up by a store version.
+
+# API:
+# ->next_doc
+# ->to_doc($doc_id)
+# ->skip_pos($pos)
+# ->next_subtoken (fails, when the document ends)
+# ->prev_subtoken
+#
+# ->doc_id                # The current doc_id
+# ->pos                   # The current subtoken position
+#
+# ->current               # The current subtoken object
+#   ->preceding_data      # The whitespace data before the subtoken
+#   ->subterm_id          # The current subterm identifier
+#   ->annotations         # Get all annotations as terms
+#   ->annotations(foundry_id)
+#   ->annotations(foundry_id, layer_id)
+#
+# ->fields                # All fields as terms
+# ->fields(field_key_id*) # All fields with the key_ids
+
+
+sub new {
+  my $class = shift;
+
+  bless {
+    docs => [],
+    last_doc_id => 0
+  }, $class;
+};
+
+
+# Get last document identifier aka max_doc_id
+sub last_doc_id {
+  $_[0]->{last_doc_id};
+};
+
+
+# Accept a Krawfish::Koral::Document object
+sub add {
+  my ($self, $doc) = @_;
+  my $doc_id = $self->{last_doc_id}++;
+
+  # This should
+  $self->{docs}->[$self->last_doc_id] = $self->to_forward_index($doc);
+
+  return $doc_id;
+};
+
+
+# Get a specific forward indexed document by doc_id
+sub get {
+  my ($self, $doc_id) = @_;
+
+  if ($doc_id <= $self->last_doc_id) {
+    return $self->{docs}->[$doc_id];
+  };
+
+  return;
+};
+
+
+# Add document to forward index
+sub to_forward_index {
+  my ($self, $doc) = @_;
+
+  # Build a structure
+  return Krawfish::Index::Forward::Doc->new($doc);
+  # Krawfish::Index::Store::V1::ForwardIndex->new;
+};
+
+
+1;

diff --git a/lib/Krawfish/Index/Forward/Annotation.pm b/lib/Krawfish/Index/Forward/Annotation.pm
index a2f9b15..fda029e 100644
--- a/lib/Krawfish/Index/Forward/Annotation.pm
+++ b/lib/Krawfish/Index/Forward/Annotation.pm

@@ -1,11 +1,11 @@
 package Krawfish::Index::Forward::Annotation;
+use Krawfish::Koral::Query::Term;
 use Krawfish::Util::String qw/squote/;
 use warnings;
 use strict;
 
-# TODO:
-#   This should contain type, foundry, layer, key, value ... etc.
 
+# Accepts a Krawfish::Koral::Query::Term object
 sub new {
   my $class = shift;
   bless {
@@ -25,15 +25,55 @@
 };
 
 
+sub foundry_id {
+  $_[0]->{foundry_id} // 0;
+};
+
+
+sub layer_id {
+  $_[0]->{layer_id} // 0;
+};
+
+
+sub term_id {
+  $_[0]->{term_id};
+};
+
+
 sub identify {
   my ($self, $dict) = @_;
-  my $term_id = $dict->term_id_by_term($self->{term});
 
-  if (defined $term_id) {
+  my $term_id;
+  my $term = $self->{term};
+  my $term_str = $term->to_term;
+
+  $term_id = $dict->term_id_by_term($term_str);
+
+  # Term id is already known!
+  if ($term_id) {
     $self->{term_id} = $term_id;
+    $self->{foundry_id} = $dict->term_id_by_term('^' . $term->foundry);
+    $self->{layer_id} = $dict->term_id_by_term('°' . $term->layer);
+    return $self;
   }
+
+  # Term id is not yet given
   else {
-    $self->{term_id} = $dict->add_term($self->{term});;
+    $self->{term_id} = $dict->add_term($term_str);
+  };
+
+  # Get term_id for foundry
+  if ($term->foundry) {
+    $term_id = $dict->term_id_by_term('^' . $term->foundry);
+    $self->{foundry_id} = $term_id ? $term_id :
+      $dict->add_term('^' . $term->foundry);
+  };
+
+  # Get term_id for layer
+  if ($term->layer) {
+    $term_id = $dict->term_id_by_term('°' . $term->layer);
+    $self->{layer_id} = $term_id ? $term_id :
+      $dict->add_term('°' . $term->layer);
   };
 
   return $self;
@@ -49,9 +89,9 @@
   }
 
   else {
-    $str .= squote($self->{term});
+    $str .= squote($self->{term}->to_term);
   };
-  return $str . '$' . join(',', @{$self->{data}});
+  return $str . '$' . join(',',  @{$self->{data}});
 };
 
 1;

diff --git a/lib/Krawfish/Index/Forward/Doc.pm b/lib/Krawfish/Index/Forward/Doc.pm
new file mode 100644
index 0000000..08c7c66
--- /dev/null
+++ b/lib/Krawfish/Index/Forward/Doc.pm

@@ -0,0 +1,101 @@
+package Krawfish::Index::Forward::Doc;
+use warnings;
+use strict;
+
+sub new {
+  my $class = shift;
+  my $doc = shift;
+
+  # Create fields
+  my $fields = $doc->fields;
+
+  # Sort fields by term identifiers
+  # Should probably be part of the doczument
+  my @sorted_fields = sort {
+    if ($a->key_id < $b->key_id) {
+      return -1;
+    }
+    elsif ($a->key_id > $b->key_id) {
+      return 1;
+    }
+    elsif ($a->term_id < $b->term_id) {
+      return -1;
+    }
+    elsif ($a->term_id > $b->term_id) {
+      return 1;
+    }
+    else {
+      warn 'Multiple fields given!';
+      return 0;
+    };
+  } @$fields;
+
+
+  # Add field data
+  my @data = ();
+  foreach (@sorted_fields) {
+    push @data, $_->key_id;     # Key data
+    push @data, $_->type;       # Key type marker
+                                # Store term or value!
+    push @data, ($_->type eq 'int' ? $_->value : $_->term_id);
+  };
+  push @data, 'EOF';
+  push @data, 0;           # Point to previous subtoken (should be xor)
+
+  my $start_marker;
+
+  # Add annotation data
+  my $stream = $doc->stream;
+  foreach my $subtoken (@$stream) {
+
+    push @data, 0;           # Point to next subtoken (should be xor)
+    $start_marker = $#data;
+
+    push @data, $subtoken->term_id;
+    push @data, $subtoken->preceding;
+
+    my @sorted_annotations = sort {
+      if ($a->foundry_id < $b->foundry_id) {
+        -1;
+      }
+      elsif ($a->foundry_id > $b->foundry_id) {
+        1;
+      }
+      elsif ($a->layer_id < $b->layer_id) {
+        -1;
+      }
+      elsif ($a->layer_id > $b->layer_id) {
+        1;
+      }
+      elsif ($a->term_id < $b->term_id) {
+        -1;
+      }
+      elsif ($a->term_id > $b->term_id) {
+        1;
+      }
+      else {
+        0;
+      };
+    } @{$subtoken->annotations};
+
+    # Add all annotations to the stream
+    foreach (@sorted_annotations) {
+      push @data, $_->foundry_id;
+      push @data, $_->layer_id;
+      push @data, $_->term_id;
+      push @data, [@{$_->data}];
+    };
+
+    push @data, $start_marker;         # Point to previous subtoken
+    $data[$start_marker] = $#data;     # Update last subtoken
+  };
+
+  bless {
+    stream => \@data
+  }, $class;
+};
+
+
+
+
+1;

diff --git a/lib/Krawfish/Index/Forward/FieldInt.pm b/lib/Krawfish/Index/Forward/FieldInt.pm
index 12b5787..0af392b 100644
--- a/lib/Krawfish/Index/Forward/FieldInt.pm
+++ b/lib/Krawfish/Index/Forward/FieldInt.pm

@@ -11,6 +11,24 @@
   }, $class;
 };
 
+sub type {
+  'int';
+};
+
+# Get key_value combination
+sub term_id {
+  $_[0]->{key_value_id};
+};
+
+
+# Get key identifier
+sub key_id {
+  $_[0]->{key_id};
+};
+
+sub value {
+  $_[0]->{value};
+};
 
 
 sub identify {

diff --git a/lib/Krawfish/Index/Forward/FieldString.pm b/lib/Krawfish/Index/Forward/FieldString.pm
index 01fedde..aaec9b3 100644
--- a/lib/Krawfish/Index/Forward/FieldString.pm
+++ b/lib/Krawfish/Index/Forward/FieldString.pm

@@ -12,6 +12,22 @@
 };
 
 
+sub type {
+  'string';
+};
+
+# Get key_value combination
+sub term_id {
+  $_[0]->{key_value_id};
+};
+
+
+# Get key identifier
+sub key_id {
+  $_[0]->{key_id};
+};
+
+
 sub identify {
   my ($self, $dict) = @_;
 

diff --git a/lib/Krawfish/Index/Forward/Fields.pm b/lib/Krawfish/Index/Forward/Fields.pm
index 347a91b..4681834 100644
--- a/lib/Krawfish/Index/Forward/Fields.pm
+++ b/lib/Krawfish/Index/Forward/Fields.pm

@@ -40,4 +40,6 @@
   return $self;
 };
 
+
+
 1;

diff --git a/lib/Krawfish/Index/Forward/Stream.pm b/lib/Krawfish/Index/Forward/Stream.pm
index 01bb531..a677483 100644
--- a/lib/Krawfish/Index/Forward/Stream.pm
+++ b/lib/Krawfish/Index/Forward/Stream.pm

@@ -4,6 +4,8 @@
 use strict;
 
 # This is one single stream of the forward index;
+# TODO:
+#   This should probably be part of Koral::Document::*
 
 sub new {
   my $class = shift;
@@ -27,6 +29,10 @@
   return join '', map { '(' . ($i++) . ')' .  $_->to_string } @{$_[0]}
 };
 
+sub length {
+  @{$_[0]};
+};
+
 
 sub identify {
   my ($self, $dict) = @_;

diff --git a/lib/Krawfish/Index/Forward/Subtoken.pm b/lib/Krawfish/Index/Forward/Subtoken.pm
index 4a00e7b..0e47335 100644
--- a/lib/Krawfish/Index/Forward/Subtoken.pm
+++ b/lib/Krawfish/Index/Forward/Subtoken.pm

@@ -27,6 +27,14 @@
   $_[0]->{subterm};
 };
 
+sub term_id {
+  $_[0]->{subterm_id};
+};
+
+
+sub annotations {
+  $_[0]->{anno};
+};
 
 # Add annotations
 sub add_annotation {
@@ -38,6 +46,9 @@
 sub identify {
   my ($self, $dict) = @_;
 
+  # This is the final subtoken that's only required for preceding bytes
+  return $self unless $self->{subterm};
+
   my $term = '*' . $self->{subterm};
   my $term_id = $dict->term_id_by_term($term);
 

diff --git a/lib/Krawfish/Index/PostingsList.pm b/lib/Krawfish/Index/PostingsList.pm
index b2c4d48..0e0f0d2 100644
--- a/lib/Krawfish/Index/PostingsList.pm
+++ b/lib/Krawfish/Index/PostingsList.pm

@@ -4,7 +4,7 @@
 use strict;
 use warnings;
 
-use constant DEBUG => 1;
+use constant DEBUG => 0;
 
 # TODO:
 #   Use different PostingsList (or rather different PostingPointer)

diff --git a/lib/Krawfish/Index/Segment.pm b/lib/Krawfish/Index/Segment.pm
index cf65d2e..c8eaee2 100644
--- a/lib/Krawfish/Index/Segment.pm
+++ b/lib/Krawfish/Index/Segment.pm

@@ -1,9 +1,10 @@
 package Krawfish::Index::Segment;
 use Krawfish::Index::Subtokens;
-use Krawfish::Index::PrimaryData;
-use Krawfish::Index::Fields;
+use Krawfish::Index::PrimaryData;  # Maybe irrelevant
+use Krawfish::Index::Fields;       # Maybe irrelevant
 use Krawfish::Index::PostingsLive;
 use Krawfish::Index::PostingsList;
+use Krawfish::Index::Forward;
 use Krawfish::Cache;
 use Krawfish::Log;
 use Scalar::Util qw!blessed!;
@@ -40,7 +41,7 @@
     file => $file
   }, $class;
 
-  print_log('segment', 'Instantiate new segment') if DEBUG;
+  print_log('seg', 'Instantiate new segment') if DEBUG;
 
   # Load offsets
   $self->{subtokens} = Krawfish::Index::Subtokens->new(
@@ -79,6 +80,9 @@
   # Add cache
   $self->{cache} = Krawfish::Cache->new;
 
+  # Add forward index
+  $self->{forward} = Krawfish::Index::Forward->new;
+
   return $self;
 };
 
@@ -143,4 +147,75 @@
   return $self->{$term_id};
 };
 
+
+sub forward {
+  $_[0]->{forward};
+};
+
+
+# This will make add() in Krawfish::Index obsolete
+sub add {
+  my ($self, $doc) = @_;
+
+  # TODO:
+  # Alternatively get this from the forward index
+  # Get new doc_id for the segment
+  my $doc_id = $self->live->incr;
+
+  # TODO:
+  #   The document should already have a field with __1:1 and id!
+
+  # TODO:
+  #   Index forward index
+  #   Alternatively, this could be done in the same method here!
+  my $doc_id_2 = $self->forward->add($doc);
+
+  # TODO:
+  #   Rank fields!
+
+  # TODO:
+  #   Deal with sortables!
+
+  # $self->invert->add()
+
+  # Create term index for fields
+  my $fields = $doc->fields;
+  foreach (@$fields) {
+    if (DEBUG) {
+      print_log('seg', 'Added field #' . $_->term_id . ' for doc_id=' . $doc_id);
+    };
+    $self->postings($_->term_id)->append($doc_id);
+  };
+
+  # TODO:
+  #   This should probably collect all [term_id => data] in advanced,
+  #   so skiplist info, freq_in_doc etc. can be adjusted in advance
+  my $stream = $doc->stream;
+  for (my $start = 0; $start < $stream->length; $start++) {
+    my $subtoken = $stream->subtoken($start);
+
+    # This is the last token - only existing for preceeding bytes
+    next unless $subtoken->term_id;
+
+    # Add subtoken to postingslist
+    $self->postings($subtoken->term_id)->append($doc_id, $start, $start + 1);
+
+    if (DEBUG) {
+      print_log('seg', 'Added subterm #' . $subtoken->term_id . ' for doc_id=' . $doc_id);
+    };
+
+    # Add all annotations
+    foreach (@{$subtoken->annotations}) {
+      $self->postings($_->term_id)->append($doc_id, $start, @{$_->data});
+
+      if (DEBUG) {
+        print_log('seg', 'Added anno term #' . $_->term_id . ' for doc_id=' . $doc_id);
+      };
+    };
+  };
+
+  return $doc_id;
+};
+
+
 1;

diff --git a/lib/Krawfish/Index/Store/V1/ForwardIndex.pm b/lib/Krawfish/Index/Store/V1/ForwardIndex.pm
index db9b2ca..fef6207 100644
--- a/lib/Krawfish/Index/Store/V1/ForwardIndex.pm
+++ b/lib/Krawfish/Index/Store/V1/ForwardIndex.pm

@@ -7,24 +7,37 @@
 use warnings;
 use Data::BitStream;
 
-# TODO:
-#   ForwardIndex and Fields should be stored in one file!
+
+# To be stored as
+#   [field-data-length]          # Necessary for skipping to annotations
+#   (                            # These are sorted in term_id order
+#     [field-key-termid-varint]
+#     [field-type-bit]
+#     [field-value-termid-varint|field-value-int]
+#   )*
+#   [annotation-data-length]     # Necessary for skipping to next doc
+#   (
+#     [next-subtoken-xor-int]    # xor-double-linked-list for next and prev
+#     [subterm-termid-varint]    # Necessary for primary data retrieval,
+#                                # co-occurrence search ...
+#     [subterm-length-varint]    # Necessary for character offsets for snippet contexts
+#                                # and potentially character-length sorting
+#     [preceding-data-string]    # Necessary for primary data retrieval,
+#                                # may need preceeding length information
+#     (                          # These are sorted in term_id order
+#       [foundry-id-varint]
+#       [layer-id-varint]
+#       [term-id|term-string]    # Value is optional for hapax-legomena dealing
+#       [payload-length]
+#       [payload]                # Redundancy of payload is unfortunate
+#     )*
+#   )*
+#
+#   The positions are augmented with SkipList marker
+
 
 # TODO:
-#   This should probably be renamed to ForwardStream,
-#   while the index needs to contain an index pointing to the
-#   offsets for the documents in question!
-#
-# TODO:
-#   This should store all document data using
-#   term-IDs (where possible).
-#   Structure like
-#   [length][subtoken-surface-token-ID][foundry-layer-ID][term] ...
-#   [length][plain-text]
-#   [length][subtoken-surface-token-ID][foundry-layer-ID][term] ...
-#
-#   The plain text contains blanks, commata, etc.
-#   The subtokens point to byte offsets in the highly compressed forward index.
+#   This should probably be renamed to ForwardStream
 
 use constant {
   SUBTOKEN_MARKER    => 0b0000_0000,

diff --git a/lib/Krawfish/Koral/Document.pm b/lib/Krawfish/Koral/Document.pm
index 0a70d2e..96f7378 100644
--- a/lib/Krawfish/Koral/Document.pm
+++ b/lib/Krawfish/Koral/Document.pm

@@ -1,6 +1,7 @@
 package Krawfish::Koral::Document;
 use Krawfish::Index::Forward::Stream;
 use Krawfish::Index::Forward::Fields;
+use Krawfish::Koral::Query::Term;
 use Krawfish::Log;
 use Mojo::File;
 use Mojo::JSON qw/encode_json decode_json/;
@@ -19,6 +20,12 @@
 #   can be added with all freq_in_doc information
 
 
+# TODO:
+#   Don't forget to deal with TUIs!
+
+# foundry and layer may need separated term_ids so they are exceptional small.
+
+
 use constant DEBUG => 1;
 
 # Parse the document and create an inverted index file
@@ -51,11 +58,13 @@
 };
 
 
+# Get the stream object
 sub stream {
   $_[0]->{stream};
 };
 
 
+# Get the fields object
 sub fields {
   $_[0]->{fields};
 };
@@ -65,6 +74,9 @@
   $_[0]->{sortable};
 };
 
+
+# Translate all terms into term_ids and
+# add unknown terms to the dictionary
 sub identify {
   my ($self, $dict) = @_;
   $self->{fields} = $self->{fields}->identify($dict);
@@ -73,6 +85,7 @@
 };
 
 
+# Stringification
 sub to_string {
   my $self = shift;
   return '[' . $self->fields->to_string . ']' . $self->stream->to_string;
@@ -238,8 +251,10 @@
       foreach (@keys) {
 
         # Add token annotation
-        my $length = $subtoken_offset[1] ? ($subtoken_offset[1]-$subtoken_offset[0]-1) : 0;
-        $stream->subtoken($subtoken_offset[0])->add_annotation('#' . $_, $length);
+        # my $length = $subtoken_offset[1] ? ($subtoken_offset[1]-$subtoken_offset[0]-1) : 0;
+        $stream->subtoken(
+          $subtoken_offset[0]
+        )->add_annotation($_, $subtoken_offset[1] ? $subtoken_offset[1] : $subtoken_offset[0] + 1);
       };
     }
 
@@ -247,14 +262,17 @@
     elsif ($item->{'@type'} eq 'koral:span') {
 
       # Create key string
-      my $key = '<>' . _term($item->{wrap});
-
+      my $term = _term($item->{wrap});
+      $term->term_type('span');
 
       # Add span to forward stream
-      my $length = $item->{subtokens}->[1] ? (
-        $item->{subtokens}->[-1] - $item->{subtokens}->[0]
-      ) : 0;
-      $stream->subtoken($item->{subtokens}->[0])->add_annotation($key, $length);
+      #my $length = $item->{subtokens}->[1] ? (
+      #  $item->{subtokens}->[-1] - $item->{subtokens}->[0]
+      #) : 0;
+      $stream->subtoken($item->{subtokens}->[0])->add_annotation(
+        $term,
+        $item->{subtokens}->[-1] + 1
+      );
     };
   };
 
@@ -273,17 +291,36 @@
 # Potentially with a prefix
 sub _term {
   my $item = shift;
+  my $term = Krawfish::Koral::Query::Term->new;
 
-  my $key = '';
-  # Create term for term dictionary
   if ($item->{foundry}) {
-    $key .= $item->{foundry};
-    if ($item->{layer}) {
-      $key .= '/' . $item->{layer};
-    }
-    $key .= '=';
+    $term->foundry($item->{foundry});
   };
-  return $key . ($item->{key} // '');
+
+  if ($item->{layer}) {
+    $term->layer($item->{layer});
+  };
+
+  if ($item->{key}) {
+    $term->key($item->{key});
+  };
+
+  if ($item->{value}) {
+    $term->value($item->{value});
+  };
+
+  return $term;
+
+  #my $key = '';
+  ## Create term for term dictionary
+  #if ($item->{foundry}) {
+  #  $key .= $item->{foundry};
+  #  if ($item->{layer}) {
+  #    $key .= '/' . $item->{layer};
+  #  }
+  #  $key .= '=';
+  #};
+  #return $key . ($item->{key} // '');
 }
 
 

diff --git a/lib/Krawfish/Koral/Query/Constraints.pm b/lib/Krawfish/Koral/Query/Constraints.pm
index 8cadd43..ba81984 100644
--- a/lib/Krawfish/Koral/Query/Constraints.pm
+++ b/lib/Krawfish/Koral/Query/Constraints.pm

@@ -272,6 +272,10 @@
 sub optimize {
   my ($self, $segment) = @_;
 
+  if (DEBUG) {
+    print_log('kq_constr', 'Optimize constraint for ' . $self->to_string);
+  };
+
   # Optimize operands
   my $first = $self->{operands}->[0]->optimize($segment);
   if ($first->max_freq == 0) {

diff --git a/lib/Krawfish/Koral/Query/Filter.pm b/lib/Krawfish/Koral/Query/Filter.pm
index 8ac27d9..a7f82df 100644
--- a/lib/Krawfish/Koral/Query/Filter.pm
+++ b/lib/Krawfish/Koral/Query/Filter.pm

@@ -1,5 +1,6 @@
 package Krawfish::Koral::Query::Filter;
 use parent 'Krawfish::Koral::Query';
+use Krawfish::Log;
 use Krawfish::Query::Nothing;
 use strict;
 use warnings;
@@ -106,6 +107,10 @@
 
   # Filter would rule out everything
   if ($corpus->max_freq == 0) {
+
+    if (DEBUG) {
+      print_log('kq_filter', 'Corpus ' . $self->corpus->to_string . ' is empty');
+    };
     return Krawfish::Query::Nothing->new;
   };
 

diff --git a/lib/Krawfish/Koral/Query/Term.pm b/lib/Krawfish/Koral/Query/Term.pm
index ff33c6d..74eb863 100644
--- a/lib/Krawfish/Koral/Query/Term.pm
+++ b/lib/Krawfish/Koral/Query/Term.pm

@@ -6,11 +6,10 @@
 use strict;
 use warnings;
 
-# TODO:
-#   Inflate may be renamed to ->identify(),
-#   Because while regexes are inflated, terms
-#   should probably already be rewritten to term_ids
 
+# TODO:
+#  Probably introduce '#' as a prefix for
+#  token annotations!
 
 # TODO: Support escaping! Especially for regex!
commit	14ff0c4e3db6fdd132519250dd7607d8bf8a0e37	[log] [tgz]
author	Akron <nils@diewald-online.de>	Wed Aug 09 20:49:52 2017 +0200
committer	Akron <nils@diewald-online.de>	Wed Aug 09 20:49:52 2017 +0200
tree	3693f902451d7584b9f13be85447d4c1a6201a0b
parent	dd10fb91819cf3ac77a5032c145e22a8322b79f4 [diff]