Started example forward index deserialization
diff --git a/lib/Krawfish/Index/Dictionary.pm b/lib/Krawfish/Index/Dictionary.pm
index e51452e..eda1626 100644
--- a/lib/Krawfish/Index/Dictionary.pm
+++ b/lib/Krawfish/Index/Dictionary.pm
@@ -18,9 +18,13 @@
# terms: *
# (casefolded) '
# subterms: ~
+# foundry: ^
+# layer: °
# annotations
-# token #
+# token # (not yet supported)
# span <>
+# relations <, >
+# attributes @
# fields: +
# fieldkeys: !
#
@@ -124,7 +128,7 @@
# requested, for example, by the term_id API for co-occurrence search.
# That's why all subterms need to be stored as well.
-use constant DEBUG => 0;
+use constant DEBUG => 1;
sub new {
my $class = shift;
diff --git a/lib/Krawfish/Index/FieldValue.pm b/lib/Krawfish/Index/FieldValue.pm
index b77e475..80793d2 100644
--- a/lib/Krawfish/Index/FieldValue.pm
+++ b/lib/Krawfish/Index/FieldValue.pm
@@ -4,6 +4,9 @@
use strict;
use warnings;
+# TODO:
+# This is deprecated in favor of Forward::*
+
# All values are stored as varints in a skiplist
# augmented postingslist
diff --git a/lib/Krawfish/Index/Fields.pm b/lib/Krawfish/Index/Fields.pm
index ade74f2..4a7dffa 100644
--- a/lib/Krawfish/Index/Fields.pm
+++ b/lib/Krawfish/Index/Fields.pm
@@ -7,6 +7,11 @@
use constant DEBUG => 0;
# TODO:
+# This is deprecated in favor of Forward::*, though ranking is
+# a separate issue.
+
+
+# TODO:
# Currently ranking is not collation based. It should be possible
# to define a collation per field and
# use one collation for prefix and suffix sorting.
diff --git a/lib/Krawfish/Index/Forward.pm b/lib/Krawfish/Index/Forward.pm
new file mode 100644
index 0000000..ea5a278
--- /dev/null
+++ b/lib/Krawfish/Index/Forward.pm
@@ -0,0 +1,82 @@
+package Krawfish::Index::Forward;
+use Krawfish::Index::Forward::Stream;
+use Krawfish::Index::Forward::Doc;
+# use Krawfish::Index::Store::V1::ForwardIndex;
+use warnings;
+use strict;
+
+# TODO:
+# This API needs to be backed up by a store version.
+
+# API:
+# ->next_doc
+# ->to_doc($doc_id)
+# ->skip_pos($pos)
+# ->next_subtoken (fails, when the document ends)
+# ->prev_subtoken
+#
+# ->doc_id # The current doc_id
+# ->pos # The current subtoken position
+#
+# ->current # The current subtoken object
+# ->preceding_data # The whitespace data before the subtoken
+# ->subterm_id # The current subterm identifier
+# ->annotations # Get all annotations as terms
+# ->annotations(foundry_id)
+# ->annotations(foundry_id, layer_id)
+#
+# ->fields # All fields as terms
+# ->fields(field_key_id*) # All fields with the key_ids
+
+
+sub new {
+ my $class = shift;
+
+ bless {
+ docs => [],
+ last_doc_id => 0
+ }, $class;
+};
+
+
+# Get last document identifier aka max_doc_id
+sub last_doc_id {
+ $_[0]->{last_doc_id};
+};
+
+
+# Accept a Krawfish::Koral::Document object
+sub add {
+ my ($self, $doc) = @_;
+ my $doc_id = $self->{last_doc_id}++;
+
+ # This should
+ $self->{docs}->[$self->last_doc_id] = $self->to_forward_index($doc);
+
+ return $doc_id;
+};
+
+
+# Get a specific forward indexed document by doc_id
+sub get {
+ my ($self, $doc_id) = @_;
+
+ if ($doc_id <= $self->last_doc_id) {
+ return $self->{docs}->[$doc_id];
+ };
+
+ return;
+};
+
+
+# Add document to forward index
+sub to_forward_index {
+ my ($self, $doc) = @_;
+
+ # Build a structure
+ return Krawfish::Index::Forward::Doc->new($doc);
+ # Krawfish::Index::Store::V1::ForwardIndex->new;
+};
+
+
+1;
diff --git a/lib/Krawfish/Index/Forward/Annotation.pm b/lib/Krawfish/Index/Forward/Annotation.pm
index a2f9b15..fda029e 100644
--- a/lib/Krawfish/Index/Forward/Annotation.pm
+++ b/lib/Krawfish/Index/Forward/Annotation.pm
@@ -1,11 +1,11 @@
package Krawfish::Index::Forward::Annotation;
+use Krawfish::Koral::Query::Term;
use Krawfish::Util::String qw/squote/;
use warnings;
use strict;
-# TODO:
-# This should contain type, foundry, layer, key, value ... etc.
+# Accepts a Krawfish::Koral::Query::Term object
sub new {
my $class = shift;
bless {
@@ -25,15 +25,55 @@
};
+sub foundry_id {
+ $_[0]->{foundry_id} // 0;
+};
+
+
+sub layer_id {
+ $_[0]->{layer_id} // 0;
+};
+
+
+sub term_id {
+ $_[0]->{term_id};
+};
+
+
sub identify {
my ($self, $dict) = @_;
- my $term_id = $dict->term_id_by_term($self->{term});
- if (defined $term_id) {
+ my $term_id;
+ my $term = $self->{term};
+ my $term_str = $term->to_term;
+
+ $term_id = $dict->term_id_by_term($term_str);
+
+ # Term id is already known!
+ if ($term_id) {
$self->{term_id} = $term_id;
+ $self->{foundry_id} = $dict->term_id_by_term('^' . $term->foundry);
+ $self->{layer_id} = $dict->term_id_by_term('°' . $term->layer);
+ return $self;
}
+
+ # Term id is not yet given
else {
- $self->{term_id} = $dict->add_term($self->{term});;
+ $self->{term_id} = $dict->add_term($term_str);
+ };
+
+ # Get term_id for foundry
+ if ($term->foundry) {
+ $term_id = $dict->term_id_by_term('^' . $term->foundry);
+ $self->{foundry_id} = $term_id ? $term_id :
+ $dict->add_term('^' . $term->foundry);
+ };
+
+ # Get term_id for layer
+ if ($term->layer) {
+ $term_id = $dict->term_id_by_term('°' . $term->layer);
+ $self->{layer_id} = $term_id ? $term_id :
+ $dict->add_term('°' . $term->layer);
};
return $self;
@@ -49,9 +89,9 @@
}
else {
- $str .= squote($self->{term});
+ $str .= squote($self->{term}->to_term);
};
- return $str . '$' . join(',', @{$self->{data}});
+ return $str . '$' . join(',', @{$self->{data}});
};
1;
diff --git a/lib/Krawfish/Index/Forward/Doc.pm b/lib/Krawfish/Index/Forward/Doc.pm
new file mode 100644
index 0000000..08c7c66
--- /dev/null
+++ b/lib/Krawfish/Index/Forward/Doc.pm
@@ -0,0 +1,101 @@
+package Krawfish::Index::Forward::Doc;
+use warnings;
+use strict;
+
+sub new {
+ my $class = shift;
+ my $doc = shift;
+
+ # Create fields
+ my $fields = $doc->fields;
+
+ # Sort fields by term identifiers
+ # Should probably be part of the doczument
+ my @sorted_fields = sort {
+ if ($a->key_id < $b->key_id) {
+ return -1;
+ }
+ elsif ($a->key_id > $b->key_id) {
+ return 1;
+ }
+ elsif ($a->term_id < $b->term_id) {
+ return -1;
+ }
+ elsif ($a->term_id > $b->term_id) {
+ return 1;
+ }
+ else {
+ warn 'Multiple fields given!';
+ return 0;
+ };
+ } @$fields;
+
+
+ # Add field data
+ my @data = ();
+ foreach (@sorted_fields) {
+ push @data, $_->key_id; # Key data
+ push @data, $_->type; # Key type marker
+ # Store term or value!
+ push @data, ($_->type eq 'int' ? $_->value : $_->term_id);
+ };
+ push @data, 'EOF';
+ push @data, 0; # Point to previous subtoken (should be xor)
+
+ my $start_marker;
+
+ # Add annotation data
+ my $stream = $doc->stream;
+ foreach my $subtoken (@$stream) {
+
+ push @data, 0; # Point to next subtoken (should be xor)
+ $start_marker = $#data;
+
+ push @data, $subtoken->term_id;
+ push @data, $subtoken->preceding;
+
+ my @sorted_annotations = sort {
+ if ($a->foundry_id < $b->foundry_id) {
+ -1;
+ }
+ elsif ($a->foundry_id > $b->foundry_id) {
+ 1;
+ }
+ elsif ($a->layer_id < $b->layer_id) {
+ -1;
+ }
+ elsif ($a->layer_id > $b->layer_id) {
+ 1;
+ }
+ elsif ($a->term_id < $b->term_id) {
+ -1;
+ }
+ elsif ($a->term_id > $b->term_id) {
+ 1;
+ }
+ else {
+ 0;
+ };
+ } @{$subtoken->annotations};
+
+ # Add all annotations to the stream
+ foreach (@sorted_annotations) {
+ push @data, $_->foundry_id;
+ push @data, $_->layer_id;
+ push @data, $_->term_id;
+ push @data, [@{$_->data}];
+ };
+
+ push @data, $start_marker; # Point to previous subtoken
+ $data[$start_marker] = $#data; # Update last subtoken
+ };
+
+ bless {
+ stream => \@data
+ }, $class;
+};
+
+
+
+
+1;
diff --git a/lib/Krawfish/Index/Forward/FieldInt.pm b/lib/Krawfish/Index/Forward/FieldInt.pm
index 12b5787..0af392b 100644
--- a/lib/Krawfish/Index/Forward/FieldInt.pm
+++ b/lib/Krawfish/Index/Forward/FieldInt.pm
@@ -11,6 +11,24 @@
}, $class;
};
+sub type {
+ 'int';
+};
+
+# Get key_value combination
+sub term_id {
+ $_[0]->{key_value_id};
+};
+
+
+# Get key identifier
+sub key_id {
+ $_[0]->{key_id};
+};
+
+sub value {
+ $_[0]->{value};
+};
sub identify {
diff --git a/lib/Krawfish/Index/Forward/FieldString.pm b/lib/Krawfish/Index/Forward/FieldString.pm
index 01fedde..aaec9b3 100644
--- a/lib/Krawfish/Index/Forward/FieldString.pm
+++ b/lib/Krawfish/Index/Forward/FieldString.pm
@@ -12,6 +12,22 @@
};
+sub type {
+ 'string';
+};
+
+# Get key_value combination
+sub term_id {
+ $_[0]->{key_value_id};
+};
+
+
+# Get key identifier
+sub key_id {
+ $_[0]->{key_id};
+};
+
+
sub identify {
my ($self, $dict) = @_;
diff --git a/lib/Krawfish/Index/Forward/Fields.pm b/lib/Krawfish/Index/Forward/Fields.pm
index 347a91b..4681834 100644
--- a/lib/Krawfish/Index/Forward/Fields.pm
+++ b/lib/Krawfish/Index/Forward/Fields.pm
@@ -40,4 +40,6 @@
return $self;
};
+
+
1;
diff --git a/lib/Krawfish/Index/Forward/Stream.pm b/lib/Krawfish/Index/Forward/Stream.pm
index 01bb531..a677483 100644
--- a/lib/Krawfish/Index/Forward/Stream.pm
+++ b/lib/Krawfish/Index/Forward/Stream.pm
@@ -4,6 +4,8 @@
use strict;
# This is one single stream of the forward index;
+# TODO:
+# This should probably be part of Koral::Document::*
sub new {
my $class = shift;
@@ -27,6 +29,10 @@
return join '', map { '(' . ($i++) . ')' . $_->to_string } @{$_[0]}
};
+sub length {
+ @{$_[0]};
+};
+
sub identify {
my ($self, $dict) = @_;
diff --git a/lib/Krawfish/Index/Forward/Subtoken.pm b/lib/Krawfish/Index/Forward/Subtoken.pm
index 4a00e7b..0e47335 100644
--- a/lib/Krawfish/Index/Forward/Subtoken.pm
+++ b/lib/Krawfish/Index/Forward/Subtoken.pm
@@ -27,6 +27,14 @@
$_[0]->{subterm};
};
+sub term_id {
+ $_[0]->{subterm_id};
+};
+
+
+sub annotations {
+ $_[0]->{anno};
+};
# Add annotations
sub add_annotation {
@@ -38,6 +46,9 @@
sub identify {
my ($self, $dict) = @_;
+ # This is the final subtoken that's only required for preceding bytes
+ return $self unless $self->{subterm};
+
my $term = '*' . $self->{subterm};
my $term_id = $dict->term_id_by_term($term);
diff --git a/lib/Krawfish/Index/PostingsList.pm b/lib/Krawfish/Index/PostingsList.pm
index b2c4d48..0e0f0d2 100644
--- a/lib/Krawfish/Index/PostingsList.pm
+++ b/lib/Krawfish/Index/PostingsList.pm
@@ -4,7 +4,7 @@
use strict;
use warnings;
-use constant DEBUG => 1;
+use constant DEBUG => 0;
# TODO:
# Use different PostingsList (or rather different PostingPointer)
diff --git a/lib/Krawfish/Index/Segment.pm b/lib/Krawfish/Index/Segment.pm
index cf65d2e..c8eaee2 100644
--- a/lib/Krawfish/Index/Segment.pm
+++ b/lib/Krawfish/Index/Segment.pm
@@ -1,9 +1,10 @@
package Krawfish::Index::Segment;
use Krawfish::Index::Subtokens;
-use Krawfish::Index::PrimaryData;
-use Krawfish::Index::Fields;
+use Krawfish::Index::PrimaryData; # Maybe irrelevant
+use Krawfish::Index::Fields; # Maybe irrelevant
use Krawfish::Index::PostingsLive;
use Krawfish::Index::PostingsList;
+use Krawfish::Index::Forward;
use Krawfish::Cache;
use Krawfish::Log;
use Scalar::Util qw!blessed!;
@@ -40,7 +41,7 @@
file => $file
}, $class;
- print_log('segment', 'Instantiate new segment') if DEBUG;
+ print_log('seg', 'Instantiate new segment') if DEBUG;
# Load offsets
$self->{subtokens} = Krawfish::Index::Subtokens->new(
@@ -79,6 +80,9 @@
# Add cache
$self->{cache} = Krawfish::Cache->new;
+ # Add forward index
+ $self->{forward} = Krawfish::Index::Forward->new;
+
return $self;
};
@@ -143,4 +147,75 @@
return $self->{$term_id};
};
+
+sub forward {
+ $_[0]->{forward};
+};
+
+
+# This will make add() in Krawfish::Index obsolete
+sub add {
+ my ($self, $doc) = @_;
+
+ # TODO:
+ # Alternatively get this from the forward index
+ # Get new doc_id for the segment
+ my $doc_id = $self->live->incr;
+
+ # TODO:
+ # The document should already have a field with __1:1 and id!
+
+ # TODO:
+ # Index forward index
+ # Alternatively, this could be done in the same method here!
+ my $doc_id_2 = $self->forward->add($doc);
+
+ # TODO:
+ # Rank fields!
+
+ # TODO:
+ # Deal with sortables!
+
+ # $self->invert->add()
+
+ # Create term index for fields
+ my $fields = $doc->fields;
+ foreach (@$fields) {
+ if (DEBUG) {
+ print_log('seg', 'Added field #' . $_->term_id . ' for doc_id=' . $doc_id);
+ };
+ $self->postings($_->term_id)->append($doc_id);
+ };
+
+ # TODO:
+ # This should probably collect all [term_id => data] in advanced,
+ # so skiplist info, freq_in_doc etc. can be adjusted in advance
+ my $stream = $doc->stream;
+ for (my $start = 0; $start < $stream->length; $start++) {
+ my $subtoken = $stream->subtoken($start);
+
+ # This is the last token - only existing for preceeding bytes
+ next unless $subtoken->term_id;
+
+ # Add subtoken to postingslist
+ $self->postings($subtoken->term_id)->append($doc_id, $start, $start + 1);
+
+ if (DEBUG) {
+ print_log('seg', 'Added subterm #' . $subtoken->term_id . ' for doc_id=' . $doc_id);
+ };
+
+ # Add all annotations
+ foreach (@{$subtoken->annotations}) {
+ $self->postings($_->term_id)->append($doc_id, $start, @{$_->data});
+
+ if (DEBUG) {
+ print_log('seg', 'Added anno term #' . $_->term_id . ' for doc_id=' . $doc_id);
+ };
+ };
+ };
+
+ return $doc_id;
+};
+
+
1;
diff --git a/lib/Krawfish/Index/Store/V1/ForwardIndex.pm b/lib/Krawfish/Index/Store/V1/ForwardIndex.pm
index db9b2ca..fef6207 100644
--- a/lib/Krawfish/Index/Store/V1/ForwardIndex.pm
+++ b/lib/Krawfish/Index/Store/V1/ForwardIndex.pm
@@ -7,24 +7,37 @@
use warnings;
use Data::BitStream;
-# TODO:
-# ForwardIndex and Fields should be stored in one file!
+
+# To be stored as
+# [field-data-length] # Necessary for skipping to annotations
+# ( # These are sorted in term_id order
+# [field-key-termid-varint]
+# [field-type-bit]
+# [field-value-termid-varint|field-value-int]
+# )*
+# [annotation-data-length] # Necessary for skipping to next doc
+# (
+# [next-subtoken-xor-int] # xor-double-linked-list for next and prev
+# [subterm-termid-varint] # Necessary for primary data retrieval,
+# # co-occurrence search ...
+# [subterm-length-varint] # Necessary for character offsets for snippet contexts
+# # and potentially character-length sorting
+# [preceding-data-string] # Necessary for primary data retrieval,
+# # may need preceeding length information
+# ( # These are sorted in term_id order
+# [foundry-id-varint]
+# [layer-id-varint]
+# [term-id|term-string] # Value is optional for hapax-legomena dealing
+# [payload-length]
+# [payload] # Redundancy of payload is unfortunate
+# )*
+# )*
+#
+# The positions are augmented with SkipList marker
+
# TODO:
-# This should probably be renamed to ForwardStream,
-# while the index needs to contain an index pointing to the
-# offsets for the documents in question!
-#
-# TODO:
-# This should store all document data using
-# term-IDs (where possible).
-# Structure like
-# [length][subtoken-surface-token-ID][foundry-layer-ID][term] ...
-# [length][plain-text]
-# [length][subtoken-surface-token-ID][foundry-layer-ID][term] ...
-#
-# The plain text contains blanks, commata, etc.
-# The subtokens point to byte offsets in the highly compressed forward index.
+# This should probably be renamed to ForwardStream
use constant {
SUBTOKEN_MARKER => 0b0000_0000,
diff --git a/lib/Krawfish/Koral/Document.pm b/lib/Krawfish/Koral/Document.pm
index 0a70d2e..96f7378 100644
--- a/lib/Krawfish/Koral/Document.pm
+++ b/lib/Krawfish/Koral/Document.pm
@@ -1,6 +1,7 @@
package Krawfish::Koral::Document;
use Krawfish::Index::Forward::Stream;
use Krawfish::Index::Forward::Fields;
+use Krawfish::Koral::Query::Term;
use Krawfish::Log;
use Mojo::File;
use Mojo::JSON qw/encode_json decode_json/;
@@ -19,6 +20,12 @@
# can be added with all freq_in_doc information
+# TODO:
+# Don't forget to deal with TUIs!
+
+# foundry and layer may need separated term_ids so they are exceptional small.
+
+
use constant DEBUG => 1;
# Parse the document and create an inverted index file
@@ -51,11 +58,13 @@
};
+# Get the stream object
sub stream {
$_[0]->{stream};
};
+# Get the fields object
sub fields {
$_[0]->{fields};
};
@@ -65,6 +74,9 @@
$_[0]->{sortable};
};
+
+# Translate all terms into term_ids and
+# add unknown terms to the dictionary
sub identify {
my ($self, $dict) = @_;
$self->{fields} = $self->{fields}->identify($dict);
@@ -73,6 +85,7 @@
};
+# Stringification
sub to_string {
my $self = shift;
return '[' . $self->fields->to_string . ']' . $self->stream->to_string;
@@ -238,8 +251,10 @@
foreach (@keys) {
# Add token annotation
- my $length = $subtoken_offset[1] ? ($subtoken_offset[1]-$subtoken_offset[0]-1) : 0;
- $stream->subtoken($subtoken_offset[0])->add_annotation('#' . $_, $length);
+ # my $length = $subtoken_offset[1] ? ($subtoken_offset[1]-$subtoken_offset[0]-1) : 0;
+ $stream->subtoken(
+ $subtoken_offset[0]
+ )->add_annotation($_, $subtoken_offset[1] ? $subtoken_offset[1] : $subtoken_offset[0] + 1);
};
}
@@ -247,14 +262,17 @@
elsif ($item->{'@type'} eq 'koral:span') {
# Create key string
- my $key = '<>' . _term($item->{wrap});
-
+ my $term = _term($item->{wrap});
+ $term->term_type('span');
# Add span to forward stream
- my $length = $item->{subtokens}->[1] ? (
- $item->{subtokens}->[-1] - $item->{subtokens}->[0]
- ) : 0;
- $stream->subtoken($item->{subtokens}->[0])->add_annotation($key, $length);
+ #my $length = $item->{subtokens}->[1] ? (
+ # $item->{subtokens}->[-1] - $item->{subtokens}->[0]
+ #) : 0;
+ $stream->subtoken($item->{subtokens}->[0])->add_annotation(
+ $term,
+ $item->{subtokens}->[-1] + 1
+ );
};
};
@@ -273,17 +291,36 @@
# Potentially with a prefix
sub _term {
my $item = shift;
+ my $term = Krawfish::Koral::Query::Term->new;
- my $key = '';
- # Create term for term dictionary
if ($item->{foundry}) {
- $key .= $item->{foundry};
- if ($item->{layer}) {
- $key .= '/' . $item->{layer};
- }
- $key .= '=';
+ $term->foundry($item->{foundry});
};
- return $key . ($item->{key} // '');
+
+ if ($item->{layer}) {
+ $term->layer($item->{layer});
+ };
+
+ if ($item->{key}) {
+ $term->key($item->{key});
+ };
+
+ if ($item->{value}) {
+ $term->value($item->{value});
+ };
+
+ return $term;
+
+ #my $key = '';
+ ## Create term for term dictionary
+ #if ($item->{foundry}) {
+ # $key .= $item->{foundry};
+ # if ($item->{layer}) {
+ # $key .= '/' . $item->{layer};
+ # }
+ # $key .= '=';
+ #};
+ #return $key . ($item->{key} // '');
}
diff --git a/lib/Krawfish/Koral/Query/Constraints.pm b/lib/Krawfish/Koral/Query/Constraints.pm
index 8cadd43..ba81984 100644
--- a/lib/Krawfish/Koral/Query/Constraints.pm
+++ b/lib/Krawfish/Koral/Query/Constraints.pm
@@ -272,6 +272,10 @@
sub optimize {
my ($self, $segment) = @_;
+ if (DEBUG) {
+ print_log('kq_constr', 'Optimize constraint for ' . $self->to_string);
+ };
+
# Optimize operands
my $first = $self->{operands}->[0]->optimize($segment);
if ($first->max_freq == 0) {
diff --git a/lib/Krawfish/Koral/Query/Filter.pm b/lib/Krawfish/Koral/Query/Filter.pm
index 8ac27d9..a7f82df 100644
--- a/lib/Krawfish/Koral/Query/Filter.pm
+++ b/lib/Krawfish/Koral/Query/Filter.pm
@@ -1,5 +1,6 @@
package Krawfish::Koral::Query::Filter;
use parent 'Krawfish::Koral::Query';
+use Krawfish::Log;
use Krawfish::Query::Nothing;
use strict;
use warnings;
@@ -106,6 +107,10 @@
# Filter would rule out everything
if ($corpus->max_freq == 0) {
+
+ if (DEBUG) {
+ print_log('kq_filter', 'Corpus ' . $self->corpus->to_string . ' is empty');
+ };
return Krawfish::Query::Nothing->new;
};
diff --git a/lib/Krawfish/Koral/Query/Term.pm b/lib/Krawfish/Koral/Query/Term.pm
index ff33c6d..74eb863 100644
--- a/lib/Krawfish/Koral/Query/Term.pm
+++ b/lib/Krawfish/Koral/Query/Term.pm
@@ -6,11 +6,10 @@
use strict;
use warnings;
-# TODO:
-# Inflate may be renamed to ->identify(),
-# Because while regexes are inflated, terms
-# should probably already be rewritten to term_ids
+# TODO:
+# Probably introduce '#' as a prefix for
+# token annotations!
# TODO: Support escaping! Especially for regex!