blob: 89438351a02057523fbc21da01cdce9a15f2dd6e [file] [log] [blame]
Akronc8e9ad12017-07-28 15:17:14 +02001package Krawfish::Result::Segment::Enrich::Snippet;
Akron6e629862017-01-01 17:08:27 +01002use parent 'Krawfish::Result';
Akronbdedfc72017-08-14 16:20:05 +02003use Krawfish::Posting::Match::Snippet;
4# use Krawfish::Result::Segment::Enrich::Snippet::Highlights;
Akrona1fbdeb2016-12-12 02:06:08 +01005use Krawfish::Log;
6use strict;
7use warnings;
8
Akronbdedfc72017-08-14 16:20:05 +02009use constant DEBUG => 1;
Akrona1fbdeb2016-12-12 02:06:08 +010010
Akronc001d362016-12-12 19:07:52 +010011# TODO:
Akronfb31ea42017-09-22 14:25:05 +020012# It may be more efficient to first collect all required
13# annotations (for decoration, context, hit etc.) and
14# then iterate over left context, hit, right context
15# and get all annotations per token at a time
16
Akronc001d362016-12-12 19:07:52 +010017
Akrona1fbdeb2016-12-12 02:06:08 +010018sub new {
19 my $class = shift;
Akronfb31ea42017-09-22 14:25:05 +020020 # query
21 # fwd_obj
22 # left
23 # right
24 # hit
25 return bless { @_ }, $class;
Akrona1fbdeb2016-12-12 02:06:08 +010026};
27
28
Akronbdedfc72017-08-14 16:20:05 +020029# Initialize forward index
30sub _init {
31 return if $_[0]->{_init}++;
32
33 my $self = shift;
Akronfb31ea42017-09-22 14:25:05 +020034 $self->{fwd_pointer} = $self->{fwd_obj}->pointer;
Akronbdedfc72017-08-14 16:20:05 +020035};
36
37
Akronfb31ea42017-09-22 14:25:05 +020038# Iterated through the ordered matches
Akrona1fbdeb2016-12-12 02:06:08 +010039sub next {
40 my $self = shift;
Akronbdedfc72017-08-14 16:20:05 +020041 $self->_init;
Akrona1fbdeb2016-12-12 02:06:08 +010042 $self->{match} = undef;
43 return $self->{query}->next;
44};
45
46
Akroneed53912017-02-17 03:09:45 +010047# Return the current match
Akrona1fbdeb2016-12-12 02:06:08 +010048sub current_match {
49 my $self = shift;
50
51 print_log('c_snippet', 'Get current match') if DEBUG;
52
53 # Match is already set
54 return $self->{match} if $self->{match};
55
56 # Get current match from query
57 my $match = $self->match_from_query;
58
Akronbdedfc72017-08-14 16:20:05 +020059 print_log('c_snippet', 'match is ' . $match) if DEBUG;
Akrona1fbdeb2016-12-12 02:06:08 +010060
Akronbdedfc72017-08-14 16:20:05 +020061 # Get forward query
62 my $forward = $self->{fwd_pointer};
Akrona1fbdeb2016-12-12 02:06:08 +010063
Akronbdedfc72017-08-14 16:20:05 +020064 # TODO:
65 # Fetch preceding context
Akronc001d362016-12-12 19:07:52 +010066
Akron40406212017-09-18 11:51:27 +020067 my $doc_id = $match->doc_id;
Akronfb31ea42017-09-22 14:25:05 +020068 unless ($forward->skip_doc($doc_id) == $doc_id) {
Akronbdedfc72017-08-14 16:20:05 +020069
Akronfb31ea42017-09-22 14:25:05 +020070 # TODO: This should never happen!
71 return;
Akronbdedfc72017-08-14 16:20:05 +020072 };
73
Akronfb31ea42017-09-22 14:25:05 +020074 if (DEBUG) {
75 print_log('c_snippet', 'Move to match doc position');
76 };
Akronbdedfc72017-08-14 16:20:05 +020077
Akronbdedfc72017-08-14 16:20:05 +020078
Akronfb31ea42017-09-22 14:25:05 +020079 # Move pointer to start position of match
80 unless ($forward->skip_pos($match->start)) {
81
82 # This should never happen!
83 return;
84 };
85
86 # Get data from hit
87 my $hit_data = $self->{hit}->content($match, $forward);
88
89 if (DEBUG) {
90 print_log('c_snippet', 'Move to match position');
91 };
92
93 # Create snippet posting
94 my $snippet = Krawfish::Posting::Match::Snippet->new(
95 hit_ids => $hit_data
96 );
97
98 # Add snippet to match
99 $match->add($snippet);
100
101 # Deal with left
102 # Deal with hit
103 # Deal with right
Akrona1fbdeb2016-12-12 02:06:08 +0100104
105 $self->{match} = $match;
106 return $match;
107};
108
Akroneed53912017-02-17 03:09:45 +0100109
110# Stringification
Akrona1fbdeb2016-12-12 02:06:08 +0100111sub to_string {
112 my $self = shift;
Akronbdedfc72017-08-14 16:20:05 +0200113 my $str = 'snippet(';
Akronfb31ea42017-09-22 14:25:05 +0200114 if ($self->{left}) {
115 $str .= $self->{left}->to_string . ',';
116 };
117 if ($self->{right}) {
118 $str .= $self->{right}->to_string . ',';
119 };
120 $str .= $self->{hit}->to_string . ':';
Akrona1fbdeb2016-12-12 02:06:08 +0100121 $str .= $self->{query}->to_string;
122 return $str . ')';
123};
124
Akroneed53912017-02-17 03:09:45 +0100125
Akrona1fbdeb2016-12-12 02:06:08 +01001261;