Introduced stored fields
diff --git a/lib/Krawfish/Index/Fields/Doc.pm b/lib/Krawfish/Index/Fields/Doc.pm
index 1ae47a9..e6d0d37 100644
--- a/lib/Krawfish/Index/Fields/Doc.pm
+++ b/lib/Krawfish/Index/Fields/Doc.pm
@@ -45,8 +45,8 @@
push @data, $_->key_id; # Key data
push @data, $_->type; # Key type marker
# Store term or value!
- push @data, $_->term_id;
- push @data, $_->value if $_->type eq 'int';;
+ push @data, $_->term_id unless $_->type eq 'store';
+ push @data, $_->value if $_->type eq 'int' || $_->type eq 'store';
};
push @data, 'EOF';
diff --git a/lib/Krawfish/Index/Fields/Pointer.pm b/lib/Krawfish/Index/Fields/Pointer.pm
index 7527858..5ac97b1 100644
--- a/lib/Krawfish/Index/Fields/Pointer.pm
+++ b/lib/Krawfish/Index/Fields/Pointer.pm
@@ -1,4 +1,7 @@
package Krawfish::Index::Fields::Pointer;
+use Krawfish::Koral::Document::FieldInt;
+use Krawfish::Koral::Document::FieldStore;
+use Krawfish::Koral::Document::FieldString;
use Krawfish::Log;
use warnings;
use strict;
@@ -72,6 +75,7 @@
};
+# This returns only int-values - so it may need to be renamed
sub values {
my $self = shift;
my @key_ids = @_; # Need to be sorted in order!
@@ -99,7 +103,7 @@
$key_id = $doc->[$self->{pos}++];
$type = $doc->[$self->{pos}++];
- # Skip key term
+ # Skip key term or value (in case of store)
$self->{pos}++;
# There is a value to aggregate
@@ -107,7 +111,10 @@
if (DEBUG) {
print_log('f_point', "Found value for " . $key_ids[$key_pos] . ' at ' . $key_pos);
};
- push @values, [$key_id, $doc->[$self->{pos}++]];
+ push @values, Krawfish::Koral::Document::FieldInt->new(
+ key_id => $key_id,
+ value => $doc->[$self->{pos}++]
+ );
};
$key_pos++;
@@ -125,7 +132,7 @@
$self->{pos}++;
$type = $doc->[$self->{pos}++];
$self->{pos}++;
- $self->{pos}++ if $type eq 'int'
+ $self->{pos}++ if $type eq 'int' || $type eq 'store'
};
# Remember the current field
@@ -135,6 +142,7 @@
return @values;
};
+
# Get all field term ids.
# If key ids are passed, they need to be in numerical order!
sub fields {
@@ -149,15 +157,7 @@
unless (@_ > 0) {
while ($current && $current ne 'EOF') {
- # The structure [key_id, key] is necessary for multivalued fields!
- $key_id = $self->{pos}++;
-
- $type = $doc->[$self->{pos}++];
-
- push @fields, [$key_id, $doc->[$self->{pos}++]];
-
- # Skip value
- $self->{pos}++ if $type eq 'int';
+ push @fields, $self->_get_by_type($doc);
$current = $doc->[$self->{pos}];
};
}
@@ -186,14 +186,12 @@
# The key id matches the first id
if ($current == $key_ids[$key_pos]) {
- # The structure [key_id, key] is necessary for multivalued fields!
- $self->{pos}++;
- $type = $doc->[$self->{pos}++];
- my $field = $doc->[$self->{pos}++];
- push @fields, [$current, $field];
+ push @fields, $self->_get_by_type($doc);
if (DEBUG) {
- print_log('f_point', "Found field_id $field for " . $key_ids[$key_pos] . ' at ' . $key_pos);
+ print_log('f_point', 'Found field ' .
+ $fields[-1]->to_string .
+ ' for ' . $key_ids[$key_pos]);
};
$key_pos++;
@@ -210,11 +208,10 @@
else {
$self->{pos}++;
$type = $doc->[$self->{pos}++];
- $self->{pos}++;
+ $self->{pos}++ if $type ne 'store';
+ $self->{pos}++ if $type eq 'int' || $type eq 'store';
};
- # Skip value
- $self->{pos}++ if $type eq 'int';
# Remember the current field
$current = $doc->[$self->{pos}];
@@ -225,9 +222,44 @@
};
+sub _get_by_type {
+ my ($self, $doc) = @_;
+
+ my $key_id = $doc->[$self->{pos}++];
+
+ my $type = $doc->[$self->{pos}++];
+
+ # Read integer
+ if ($type eq 'int') {
+ return Krawfish::Koral::Document::FieldInt->new(
+ key_id => $key_id,
+ key_value_id => $doc->[$self->{pos}++],
+ value => $doc->[$self->{pos}++]
+ );
+ }
+
+ # read string
+ elsif ($type eq 'string') {
+ return Krawfish::Koral::Document::FieldString->new(
+ key_id => $key_id,
+ key_value_id => $doc->[$self->{pos}++]
+ );
+ }
+
+ # read store
+ elsif ($type eq 'store') {
+ return Krawfish::Koral::Document::FieldStore->new(
+ key_id => $key_id,
+ value => $doc->[$self->{pos}++]
+ );
+ };
+};
+
+
sub field_terms {
my $self = shift;
- return map { $_->[1] } $self->fields(@_);
+ warn 'probably wrong!';
+ return map { $_->term_id } $self->fields(@_);
};
diff --git a/lib/Krawfish/Index/Merge.pm b/lib/Krawfish/Index/Merge.pm
index 4ef971b..f6881de 100644
--- a/lib/Krawfish/Index/Merge.pm
+++ b/lib/Krawfish/Index/Merge.pm
@@ -17,10 +17,10 @@
sub new {
- my ($class, $index_a, $index_b) = @_;
+ my ($class, $segment_a, $segment_b) = @_;
bless {
- index_a => $index_a,
- index_b => $index_b
+ segment_a => $segment_a,
+ segment_b => $segment_b
}, $class;
};
@@ -64,6 +64,7 @@
$self->_launch;
};
+
sub _launch {
# TODO:
# - If the dictionary is new
diff --git a/lib/Krawfish/Index/Segment.pm b/lib/Krawfish/Index/Segment.pm
index e23dcaf..0a0a2dd 100644
--- a/lib/Krawfish/Index/Segment.pm
+++ b/lib/Krawfish/Index/Segment.pm
@@ -165,6 +165,7 @@
# Create term index for fields
my $fields = $doc->fields;
foreach (@$fields) {
+ next if $_->type eq 'store';
if (DEBUG) {
print_log('seg', 'Added field #' . $_->term_id . ' for doc_id=' . $doc_id);
};
diff --git a/lib/Krawfish/Koral/Document.pm b/lib/Krawfish/Koral/Document.pm
index a2d81ec..aa96468 100644
--- a/lib/Krawfish/Koral/Document.pm
+++ b/lib/Krawfish/Koral/Document.pm
@@ -33,7 +33,6 @@
my $class = shift;
my $self = bless {
- primary => '',
sortable => {},
stream => Krawfish::Koral::Document::Stream->new,
fields => Krawfish::Koral::Document::Fields->new
@@ -52,12 +51,6 @@
};
-# Get the primary data
-sub primary_data {
- $_[0]->{primary};
-};
-
-
# Get the stream object
sub stream {
$_[0]->{stream};
@@ -110,8 +103,6 @@
$primary = $doc->{primaryData};
};
- $self->{primary} = $primary;
-
# Add metadata fields
my $pos = 0;
my %sortable;
@@ -132,11 +123,17 @@
# Prepare for summarization
- if ($field->{type} && $field->{type} eq 'type:integer') {
+ if (!$field->{type} || $field->{type} eq 'type:string') {
+ $fields->add_string($field->{key}, $field->{value});
+ }
+ elsif ($field->{type} eq 'type:integer') {
$fields->add_int($field->{key}, $field->{value});
}
+ elsif ($field->{type} eq 'type:store') {
+ $fields->add_store($field->{key}, $field->{value});
+ }
else {
- $fields->add_string($field->{key}, $field->{value});
+ warn 'unknown field type: ' . $field->{type};
};
# This will later be indexed for search as well as retrieval in
diff --git a/lib/Krawfish/Koral/Document/FieldInt.pm b/lib/Krawfish/Koral/Document/FieldInt.pm
index c7ebe6f..71f4967 100644
--- a/lib/Krawfish/Koral/Document/FieldInt.pm
+++ b/lib/Krawfish/Koral/Document/FieldInt.pm
@@ -1,14 +1,15 @@
package Krawfish::Koral::Document::FieldInt;
use Krawfish::Util::String qw/squote/;
+# TODO:
+# Probably use Krawfish::Koral::Meta::Type::KeyID and
+# Krawfish::Koral::Meta::Type::Key.
use warnings;
use strict;
sub new {
my $class = shift;
- bless {
- key => shift,
- value => shift
- }, $class;
+ # key, value, key_id, key_value_id
+ bless { @_ }, $class;
};
sub type {
@@ -35,6 +36,8 @@
sub identify {
my ($self, $dict) = @_;
+ return if $self->{key_id};
+
my $key = '!' . $self->{key};
my $term = '+' . $self->{key} . ':' . $self->{value};
@@ -67,12 +70,16 @@
};
+sub inflate {
+ ...
+};
+
sub to_string {
my $self = shift;
unless ($self->{key_id}) {
return squote($self->{key}) . '=' . $self->{value};
};
- return $self->{key_id} . '=' . $self->{key_value_id} . '(' . $self->{value} . ')';
+ return '#' . $self->{key_id} . '=' . '#' . $self->{key_value_id} . '(' . $self->{value} . ')';
};
diff --git a/lib/Krawfish/Koral/Document/FieldStore.pm b/lib/Krawfish/Koral/Document/FieldStore.pm
new file mode 100644
index 0000000..76c8c97
--- /dev/null
+++ b/lib/Krawfish/Koral/Document/FieldStore.pm
@@ -0,0 +1,67 @@
+package Krawfish::Koral::Document::FieldStore;
+use Krawfish::Util::String qw/squote/;
+use warnings;
+use strict;
+
+# Class for store-only fields
+# (not indexed in the dictionary)
+
+sub new {
+ my $class = shift;
+ # key, value
+ bless { @_ }, $class;
+};
+
+sub type {
+ 'store'
+};
+
+
+sub key {
+ $_[0]->{key};
+};
+
+sub key_id {
+ $_[0]->{key_id};
+};
+
+sub term_id {
+ # There is no term_id, but it's required for sorting
+ undef;
+};
+
+sub value {
+ $_[0]->{value};
+};
+
+sub identify {
+ my ($self, $dict) = @_;
+
+ my $key = '!' . $self->{key};
+
+ # Get key term_id
+ # TODO:
+ # Add new method that introduces the term
+ # in case it doesn't exist
+ my $key_id = $dict->term_id_by_term($key);
+
+ # Not given yet
+ if (defined $key_id) {
+ $self->{key_id} = $key_id;
+ }
+
+ else {
+ $self->{key_id} = $dict->add_term($key);
+ };
+ return $self;
+};
+
+
+sub to_string {
+ my $self = shift;
+ my $str = $self->{key_id} ? '#' . $self->{key_id} : squote($self->{key});
+ return $str . '=' . squote($self->{value});
+};
+
+
+1;
diff --git a/lib/Krawfish/Koral/Document/FieldString.pm b/lib/Krawfish/Koral/Document/FieldString.pm
index a93035a..1a1c296 100644
--- a/lib/Krawfish/Koral/Document/FieldString.pm
+++ b/lib/Krawfish/Koral/Document/FieldString.pm
@@ -3,12 +3,12 @@
use warnings;
use strict;
+# Class for string fields
+
sub new {
my $class = shift;
- bless {
- key => shift,
- value => shift
- }, $class;
+ # key, value
+ bless { @_ }, $class;
};
@@ -68,7 +68,7 @@
unless ($self->{key_id}) {
return squote($self->{key}) . '=' . squote($self->{value});
};
- return $self->{key_id} . '=' . $self->{key_value_id};
+ return '#' . $self->{key_id} . '=' . '#' . $self->{key_value_id};
};
1;
diff --git a/lib/Krawfish/Koral/Document/Fields.pm b/lib/Krawfish/Koral/Document/Fields.pm
index 0cdb964..4c13c57 100644
--- a/lib/Krawfish/Koral/Document/Fields.pm
+++ b/lib/Krawfish/Koral/Document/Fields.pm
@@ -1,6 +1,7 @@
package Krawfish::Koral::Document::Fields;
use Krawfish::Koral::Document::FieldString;
use Krawfish::Koral::Document::FieldInt;
+use Krawfish::Koral::Document::FieldStore;
use warnings;
use strict;
@@ -14,7 +15,10 @@
my ($key, $value) = @_;
# This may be an integer value
- push @$self, Krawfish::Koral::Document::FieldString->new($key, $value);
+ push @$self, Krawfish::Koral::Document::FieldString->new(
+ key => $key,
+ value => $value
+ );
};
@@ -23,7 +27,22 @@
my ($key, $value) = @_;
# This may be an integer value
- push @$self, Krawfish::Koral::Document::FieldInt->new($key, $value);
+ push @$self, Krawfish::Koral::Document::FieldInt->new(
+ key => $key,
+ value => $value
+ );
+};
+
+
+sub add_store {
+ my $self = shift;
+ my ($key, $value) = @_;
+
+ # This may be an integer value
+ push @$self, Krawfish::Koral::Document::FieldStore->new(
+ key => $key,
+ value => $value
+ );
};
diff --git a/lib/Krawfish/Posting/Match/Fields.pm b/lib/Krawfish/Posting/Match/Fields.pm
index 4293eed..64f69c8 100644
--- a/lib/Krawfish/Posting/Match/Fields.pm
+++ b/lib/Krawfish/Posting/Match/Fields.pm
@@ -2,23 +2,18 @@
use strict;
use warnings;
+# The fields are represented as Krawfish::Koral::Document::Field* objects!
+
sub new {
my $class = shift;
bless {
- field_ids => [@_],
- fields => undef,
+ fields => [@_]
}, $class;
};
sub to_string {
my $self = shift;
-
- if ($self->{fields}) {
- return 'fields:' . join(',', @{$self->{fields}});
- }
- else {
- return 'fields:' . join(',', map { '#' . $_ } @{$self->{field_ids}});
- }
+ return 'fields:' . join(',', map { $_->to_string } @{$self->{fields}});
};
1;
diff --git a/lib/Krawfish/Result/Segment/Aggregate/Facets.pm b/lib/Krawfish/Result/Segment/Aggregate/Facets.pm
index 7d1607a..da44242 100644
--- a/lib/Krawfish/Result/Segment/Aggregate/Facets.pm
+++ b/lib/Krawfish/Result/Segment/Aggregate/Facets.pm
@@ -13,6 +13,11 @@
# Simplify the counting by mapping the requested fields to
# an array, that points to a map.
+# TODO:
+# Look for fast int => int hash maps
+# http://java-performance.info/implementing-world-fastest-java-int-to-int-hash-map/
+# http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx
+# https://gist.github.com/badboy/6267743
# TODO:
# Support corpus classes!
@@ -102,11 +107,14 @@
# Iterate over all fields
foreach my $field ($pointer->fields(@{$self->{field_keys}})) {
+ # This should probably be a method in the fields pointer!
+ next if $field->type eq 'store';
+
# Increment occurrence
- $aggr->incr_doc($field->[0], $field->[1]);
+ $aggr->incr_doc($field->key_id, $field->term_id);
if (DEBUG) {
- print_log('aggr_facets', '#' . $field->[0] . ' has frequencies');
+ print_log('aggr_facets', '#' . $field->term_id . ' has frequencies');
};
};
}
diff --git a/lib/Krawfish/Result/Segment/Aggregate/Values.pm b/lib/Krawfish/Result/Segment/Aggregate/Values.pm
index e18c59c..8cd9a79 100644
--- a/lib/Krawfish/Result/Segment/Aggregate/Values.pm
+++ b/lib/Krawfish/Result/Segment/Aggregate/Values.pm
@@ -69,7 +69,7 @@
foreach my $field (@values) {
# Aggregate value
- $aggr->add($field->[0], $field->[1]);
+ $aggr->add($field->key_id, $field->value);
};
};
};
diff --git a/lib/Krawfish/Result/Segment/Enrich/Fields.pm b/lib/Krawfish/Result/Segment/Enrich/Fields.pm
index bd7c829..d41b992 100644
--- a/lib/Krawfish/Result/Segment/Enrich/Fields.pm
+++ b/lib/Krawfish/Result/Segment/Enrich/Fields.pm
@@ -98,7 +98,7 @@
};
# Get the fields from the fields stream
- my @fields = $fields->field_terms(@{$self->{fields}});
+ my @fields = $fields->fields(@{$self->{fields}});
$self->{last_doc_id} = $match->doc_id;
$self->{last_fields} = [@fields];
diff --git a/lib/Test/Krawfish.pm b/lib/Test/Krawfish.pm
index e3a8409..d453c40 100644
--- a/lib/Test/Krawfish.pm
+++ b/lib/Test/Krawfish.pm
@@ -276,7 +276,7 @@
foreach my $key (sort keys %$hash) {
my $type = 'string';
my $long_key = $key;
- if ($key =~ s/^(string|integer)_//) {
+ if ($key =~ s/^(string|integer|store)_//) {
$type = $1;
};