| Akron | 5cf5fca | 2017-10-09 19:01:47 +0200 | [diff] [blame] | 1 | package Krawfish::Compile::Segment::Group::Character; |
| Akron | 18ff592 | 2017-01-13 10:09:45 +0100 | [diff] [blame] | 2 | use Krawfish::Log; |
| 3 | use strict; |
| 4 | use warnings; |
| 5 | |
| Akron | 31e088b | 2017-09-29 14:48:49 +0200 | [diff] [blame] | 6 | |
| 7 | # This groups on prefix or suffixes of subterms. |
| 8 | # Necessary to support "Ansicht nach Wortendungen" for example. |
| 9 | # It's possible to first group on terms and then - per term, |
| 10 | # request the term surface in the dictionary and group by |
| 11 | # the result. |
| 12 | |
| 13 | |
| Akron | 18ff592 | 2017-01-13 10:09:45 +0100 | [diff] [blame] | 14 | use constant DEBUG => 0; |
| 15 | |
| 16 | sub new { |
| 17 | my $class = shift; |
| 18 | bless { |
| 19 | segments => shift, # Krawfish::Index::Segments object |
| Akron | 97a7cba | 2017-05-26 13:39:06 +0200 | [diff] [blame] | 20 | # TODO: May as well be a subtoken object |
| Akron | 18ff592 | 2017-01-13 10:09:45 +0100 | [diff] [blame] | 21 | from_start => shift, # boolean - otherwise from end |
| 22 | char_count => shift |
| 23 | nrs => [@_] |
| 24 | }, $class; |
| 25 | }; |
| 26 | |
| 27 | |
| 28 | sub get_group { |
| 29 | my ($self, $match) = @_; |
| 30 | |
| 31 | # Get all classes from the match |
| Akron | 97a7cba | 2017-05-26 13:39:06 +0200 | [diff] [blame] | 32 | my @classes = $match->get_classes($self->{nrs}); |
| Akron | 18ff592 | 2017-01-13 10:09:45 +0100 | [diff] [blame] | 33 | |
| 34 | my $segments = $self->{segments}; |
| 35 | |
| 36 | my %group; |
| 37 | |
| 38 | # Classes have nr, start, end |
| 39 | foreach my $class (sort { $a->start <=> $b->start } @classes) { |
| 40 | |
| 41 | if ($self->{from_start}) { |
| 42 | |
| 43 | # This will retrieve the segment from the segments stream |
| 44 | my $segment = $stream->get($match->doc_id, $class->start); |
| 45 | |
| 46 | if ($segment->) |
| 47 | |
| 48 | # The character count can be satisfied by the |
| 49 | my $first_chars = $segment->first_chars; |
| 50 | |
| 51 | if (length($first_chars) <= $self->{char_count} { |
| 52 | substr($first_chars); |
| 53 | } |
| 54 | |
| 55 | # Check, if the segment only spans one segment |
| 56 | if ($class->end != $class->start+1) { |
| 57 | |
| 58 | }; |
| 59 | } |
| 60 | else { |
| 61 | ... |
| 62 | }; |
| 63 | }; |
| 64 | }; |
| 65 | |
| 66 | 1; |