blob: e5b1da7ba4f81c973f1e168bd4dd34ac9deb4bd3 [file] [log] [blame]
Akron5cf5fca2017-10-09 19:01:47 +02001package Krawfish::Compile::Segment::Group::Character;
Akron18ff5922017-01-13 10:09:45 +01002use Krawfish::Log;
3use strict;
4use warnings;
5
Akron31e088b2017-09-29 14:48:49 +02006
7# This groups on prefix or suffixes of subterms.
8# Necessary to support "Ansicht nach Wortendungen" for example.
9# It's possible to first group on terms and then - per term,
10# request the term surface in the dictionary and group by
11# the result.
12
13
Akron18ff5922017-01-13 10:09:45 +010014use constant DEBUG => 0;
15
16sub new {
17 my $class = shift;
18 bless {
19 segments => shift, # Krawfish::Index::Segments object
Akron97a7cba2017-05-26 13:39:06 +020020 # TODO: May as well be a subtoken object
Akron18ff5922017-01-13 10:09:45 +010021 from_start => shift, # boolean - otherwise from end
22 char_count => shift
23 nrs => [@_]
24 }, $class;
25};
26
27
28sub get_group {
29 my ($self, $match) = @_;
30
31 # Get all classes from the match
Akron97a7cba2017-05-26 13:39:06 +020032 my @classes = $match->get_classes($self->{nrs});
Akron18ff5922017-01-13 10:09:45 +010033
34 my $segments = $self->{segments};
35
36 my %group;
37
38 # Classes have nr, start, end
39 foreach my $class (sort { $a->start <=> $b->start } @classes) {
40
41 if ($self->{from_start}) {
42
43 # This will retrieve the segment from the segments stream
44 my $segment = $stream->get($match->doc_id, $class->start);
45
46 if ($segment->)
47
48 # The character count can be satisfied by the
49 my $first_chars = $segment->first_chars;
50
51 if (length($first_chars) <= $self->{char_count} {
52 substr($first_chars);
53 }
54
55 # Check, if the segment only spans one segment
56 if ($class->end != $class->start+1) {
57
58 };
59 }
60 else {
61 ...
62 };
63 };
64};
65
661;