| Akron | c8e9ad1 | 2017-07-28 15:17:14 +0200 | [diff] [blame] | 1 | package Krawfish::Result::Segment::Enrich::Snippet; |
| Akron | 6e62986 | 2017-01-01 17:08:27 +0100 | [diff] [blame] | 2 | use parent 'Krawfish::Result'; |
| Akron | bdedfc7 | 2017-08-14 16:20:05 +0200 | [diff] [blame] | 3 | use Krawfish::Posting::Match::Snippet; |
| 4 | # use Krawfish::Result::Segment::Enrich::Snippet::Highlights; |
| Akron | a1fbdeb | 2016-12-12 02:06:08 +0100 | [diff] [blame] | 5 | use Krawfish::Log; |
| 6 | use strict; |
| 7 | use warnings; |
| 8 | |
| Akron | bdedfc7 | 2017-08-14 16:20:05 +0200 | [diff] [blame] | 9 | use constant DEBUG => 1; |
| Akron | a1fbdeb | 2016-12-12 02:06:08 +0100 | [diff] [blame] | 10 | |
| Akron | c001d36 | 2016-12-12 19:07:52 +0100 | [diff] [blame] | 11 | # TODO: |
| Akron | fb31ea4 | 2017-09-22 14:25:05 +0200 | [diff] [blame] | 12 | # It may be more efficient to first collect all required |
| 13 | # annotations (for decoration, context, hit etc.) and |
| 14 | # then iterate over left context, hit, right context |
| 15 | # and get all annotations per token at a time |
| 16 | |
| Akron | c001d36 | 2016-12-12 19:07:52 +0100 | [diff] [blame] | 17 | |
| Akron | a1fbdeb | 2016-12-12 02:06:08 +0100 | [diff] [blame] | 18 | sub new { |
| 19 | my $class = shift; |
| Akron | fb31ea4 | 2017-09-22 14:25:05 +0200 | [diff] [blame] | 20 | # query |
| 21 | # fwd_obj |
| 22 | # left |
| 23 | # right |
| 24 | # hit |
| 25 | return bless { @_ }, $class; |
| Akron | a1fbdeb | 2016-12-12 02:06:08 +0100 | [diff] [blame] | 26 | }; |
| 27 | |
| 28 | |
| Akron | bdedfc7 | 2017-08-14 16:20:05 +0200 | [diff] [blame] | 29 | # Initialize forward index |
| 30 | sub _init { |
| 31 | return if $_[0]->{_init}++; |
| 32 | |
| 33 | my $self = shift; |
| Akron | fb31ea4 | 2017-09-22 14:25:05 +0200 | [diff] [blame] | 34 | $self->{fwd_pointer} = $self->{fwd_obj}->pointer; |
| Akron | bdedfc7 | 2017-08-14 16:20:05 +0200 | [diff] [blame] | 35 | }; |
| 36 | |
| 37 | |
| Akron | fb31ea4 | 2017-09-22 14:25:05 +0200 | [diff] [blame] | 38 | # Iterated through the ordered matches |
| Akron | a1fbdeb | 2016-12-12 02:06:08 +0100 | [diff] [blame] | 39 | sub next { |
| 40 | my $self = shift; |
| Akron | bdedfc7 | 2017-08-14 16:20:05 +0200 | [diff] [blame] | 41 | $self->_init; |
| Akron | a1fbdeb | 2016-12-12 02:06:08 +0100 | [diff] [blame] | 42 | $self->{match} = undef; |
| 43 | return $self->{query}->next; |
| 44 | }; |
| 45 | |
| 46 | |
| Akron | eed5391 | 2017-02-17 03:09:45 +0100 | [diff] [blame] | 47 | # Return the current match |
| Akron | a1fbdeb | 2016-12-12 02:06:08 +0100 | [diff] [blame] | 48 | sub current_match { |
| 49 | my $self = shift; |
| 50 | |
| 51 | print_log('c_snippet', 'Get current match') if DEBUG; |
| 52 | |
| 53 | # Match is already set |
| 54 | return $self->{match} if $self->{match}; |
| 55 | |
| 56 | # Get current match from query |
| 57 | my $match = $self->match_from_query; |
| 58 | |
| Akron | bdedfc7 | 2017-08-14 16:20:05 +0200 | [diff] [blame] | 59 | print_log('c_snippet', 'match is ' . $match) if DEBUG; |
| Akron | a1fbdeb | 2016-12-12 02:06:08 +0100 | [diff] [blame] | 60 | |
| Akron | bdedfc7 | 2017-08-14 16:20:05 +0200 | [diff] [blame] | 61 | # Get forward query |
| 62 | my $forward = $self->{fwd_pointer}; |
| Akron | a1fbdeb | 2016-12-12 02:06:08 +0100 | [diff] [blame] | 63 | |
| Akron | bdedfc7 | 2017-08-14 16:20:05 +0200 | [diff] [blame] | 64 | # TODO: |
| 65 | # Fetch preceding context |
| Akron | c001d36 | 2016-12-12 19:07:52 +0100 | [diff] [blame] | 66 | |
| Akron | 4040621 | 2017-09-18 11:51:27 +0200 | [diff] [blame] | 67 | my $doc_id = $match->doc_id; |
| Akron | fb31ea4 | 2017-09-22 14:25:05 +0200 | [diff] [blame] | 68 | unless ($forward->skip_doc($doc_id) == $doc_id) { |
| Akron | bdedfc7 | 2017-08-14 16:20:05 +0200 | [diff] [blame] | 69 | |
| Akron | fb31ea4 | 2017-09-22 14:25:05 +0200 | [diff] [blame] | 70 | # TODO: This should never happen! |
| 71 | return; |
| Akron | bdedfc7 | 2017-08-14 16:20:05 +0200 | [diff] [blame] | 72 | }; |
| 73 | |
| Akron | fb31ea4 | 2017-09-22 14:25:05 +0200 | [diff] [blame] | 74 | if (DEBUG) { |
| 75 | print_log('c_snippet', 'Move to match doc position'); |
| 76 | }; |
| Akron | bdedfc7 | 2017-08-14 16:20:05 +0200 | [diff] [blame] | 77 | |
| Akron | bdedfc7 | 2017-08-14 16:20:05 +0200 | [diff] [blame] | 78 | |
| Akron | fb31ea4 | 2017-09-22 14:25:05 +0200 | [diff] [blame] | 79 | # Move pointer to start position of match |
| 80 | unless ($forward->skip_pos($match->start)) { |
| 81 | |
| 82 | # This should never happen! |
| 83 | return; |
| 84 | }; |
| 85 | |
| 86 | # Get data from hit |
| 87 | my $hit_data = $self->{hit}->content($match, $forward); |
| 88 | |
| 89 | if (DEBUG) { |
| 90 | print_log('c_snippet', 'Move to match position'); |
| 91 | }; |
| 92 | |
| 93 | # Create snippet posting |
| 94 | my $snippet = Krawfish::Posting::Match::Snippet->new( |
| 95 | hit_ids => $hit_data |
| 96 | ); |
| 97 | |
| 98 | # Add snippet to match |
| 99 | $match->add($snippet); |
| 100 | |
| 101 | # Deal with left |
| 102 | # Deal with hit |
| 103 | # Deal with right |
| Akron | a1fbdeb | 2016-12-12 02:06:08 +0100 | [diff] [blame] | 104 | |
| 105 | $self->{match} = $match; |
| 106 | return $match; |
| 107 | }; |
| 108 | |
| Akron | eed5391 | 2017-02-17 03:09:45 +0100 | [diff] [blame] | 109 | |
| 110 | # Stringification |
| Akron | a1fbdeb | 2016-12-12 02:06:08 +0100 | [diff] [blame] | 111 | sub to_string { |
| 112 | my $self = shift; |
| Akron | bdedfc7 | 2017-08-14 16:20:05 +0200 | [diff] [blame] | 113 | my $str = 'snippet('; |
| Akron | fb31ea4 | 2017-09-22 14:25:05 +0200 | [diff] [blame] | 114 | if ($self->{left}) { |
| 115 | $str .= $self->{left}->to_string . ','; |
| 116 | }; |
| 117 | if ($self->{right}) { |
| 118 | $str .= $self->{right}->to_string . ','; |
| 119 | }; |
| 120 | $str .= $self->{hit}->to_string . ':'; |
| Akron | a1fbdeb | 2016-12-12 02:06:08 +0100 | [diff] [blame] | 121 | $str .= $self->{query}->to_string; |
| 122 | return $str . ')'; |
| 123 | }; |
| 124 | |
| Akron | eed5391 | 2017-02-17 03:09:45 +0100 | [diff] [blame] | 125 | |
| Akron | a1fbdeb | 2016-12-12 02:06:08 +0100 | [diff] [blame] | 126 | 1; |