blob: f56aae9dfebb68f7784f518c9f602e55154da66d [file] [log] [blame]
Akron71fc14c2016-10-31 23:44:43 +01001package Krawfish::Index::Fields;
Akrond6a87ff2017-08-11 00:17:30 +02002use Krawfish::Index::Fields::Doc;
Akronf703f6f2017-08-25 21:20:52 +02003use Krawfish::Index::Fields::Ranks;
Akrond6a87ff2017-08-11 00:17:30 +02004use Krawfish::Index::Fields::Pointer;
Akron8781e6b2016-12-09 02:04:17 +01005use Krawfish::Log;
Akron71fc14c2016-10-31 23:44:43 +01006use warnings;
Akrond6a87ff2017-08-11 00:17:30 +02007use strict;
Akron71fc14c2016-10-31 23:44:43 +01008
Akrondc8dceb2017-08-22 20:25:39 +02009use constant DEBUG => 0;
Akron8781e6b2016-12-09 02:04:17 +010010
Akronc1ed58c2017-08-04 17:26:30 +020011
12# TODO:
13# Reranking a field is not necessary, if the field value is already given.
14# In that case, look up the dictionary if the value is already given,
15# take the example doc of that field value and add the rank of that
16# doc for the new doc.
17# If the field is not yet given, take the next or previous value in dictionary
18# order and use the rank to rerank the field (see K::I::Dictionary).
19# BUT: This only works if the field has the same collation as the
20# dictionary!
21
Akron4a46e6e2017-08-16 17:49:16 +020022
23# Merging the fields index is pretty simple, as it only needs to be indexed
24# on the document level and then simply be appended.
25
Akron2ee89f12016-12-07 18:33:52 +010026# Sort documents by a field and attach a numerical rank.
Akron7db79e22016-12-08 23:02:32 +010027# Returns the maximum rank and a vector of ranks at doc id position.
Akron2ee89f12016-12-07 18:33:52 +010028# Ranks can be set multiple timnes
Akron7db79e22016-12-08 23:02:32 +010029#
30# TODO:
31# These ranks may also be used for facet search, because
32# remembering the ranks and increment their values will
33# return the most common k facets of the field quickly.
34# Returning the fields per rank, however, may become
35# a linear search for the first rank in the ranked fields,
36# which may be slow.
37# But nonetheless, the max_rank field may also give a hint,
38# if the field is good for faceting! (unique ranks per field
39# are bad, for example!)
Akrond6a87ff2017-08-11 00:17:30 +020040
41
Akrona588d072017-10-13 14:45:34 +020042# Constructor
Akrond6a87ff2017-08-11 00:17:30 +020043sub new {
44 my $class = shift;
45 bless {
46 docs => [],
Akronf703f6f2017-08-25 21:20:52 +020047 last_doc_id => -1,
48 ranks => {}
Akrond6a87ff2017-08-11 00:17:30 +020049 }, $class;
50};
51
Akronb00c2be2017-08-16 14:45:07 +020052
Akrond6a87ff2017-08-11 00:17:30 +020053# Get last document identifier aka max_doc_id
54sub last_doc_id {
55 $_[0]->{last_doc_id};
56};
57
58
Akrondefcc592017-08-19 10:02:29 +020059# Accepts a Krawfish::Koral::Document
Akrond6a87ff2017-08-11 00:17:30 +020060sub add {
61 my ($self, $doc) = @_;
62 my $doc_id = $self->{last_doc_id}++;
63
64 # TODO:
65 # use Krawfish::Index::Store::V1::Fields->new;
Akrona588d072017-10-13 14:45:34 +020066 $self->{docs}->[$self->last_doc_id] =
67 Krawfish::Index::Fields::Doc->new($doc);
Akrond6a87ff2017-08-11 00:17:30 +020068 return $doc_id;
69};
70
Akronb00c2be2017-08-16 14:45:07 +020071
Akrondefcc592017-08-19 10:02:29 +020072# Get doc from list (as long as the list provides random access to docs)
Akrond6a87ff2017-08-11 00:17:30 +020073sub doc {
74 my ($self, $doc_id) = @_;
75 print_log('fields', 'Get document for id ' . $doc_id) if DEBUG;
76 return $self->{docs}->[$doc_id];
77};
78
79
80# Get a specific forward indexed document by doc_id
81sub pointer {
82 my $self = shift;
83 return Krawfish::Index::Fields::Pointer->new($self);
84};
85
86
Akron71fc14c2016-10-31 23:44:43 +0100871;
Akrond6a87ff2017-08-11 00:17:30 +020088
89
90__END__