Blame - lib/Krawfish/Compile/Segment/Group/Characters.pm - KorAP/Krawfish-prototype

blob: e5b1da7ba4f81c973f1e168bd4dd34ac9deb4bd3 [file] [log] [blame]

Akron	5cf5fca	2017-10-09 19:01:47 +0200	[diff] [blame]	1	package Krawfish::Compile::Segment::Group::Character;
Akron	18ff592	2017-01-13 10:09:45 +0100	[diff] [blame]	2	use Krawfish::Log;
				3	use strict;
				4	use warnings;
				5
Akron	31e088b	2017-09-29 14:48:49 +0200	[diff] [blame]	6
				7	# This groups on prefix or suffixes of subterms.
				8	# Necessary to support "Ansicht nach Wortendungen" for example.
				9	# It's possible to first group on terms and then - per term,
				10	# request the term surface in the dictionary and group by
				11	# the result.
				12
				13
Akron	18ff592	2017-01-13 10:09:45 +0100	[diff] [blame]	14	use constant DEBUG => 0;
				15
				16	sub new {
				17	my $class = shift;
				18	bless {
				19	segments => shift, # Krawfish::Index::Segments object
Akron	97a7cba	2017-05-26 13:39:06 +0200	[diff] [blame]	20	# TODO: May as well be a subtoken object
Akron	18ff592	2017-01-13 10:09:45 +0100	[diff] [blame]	21	from_start => shift, # boolean - otherwise from end
				22	char_count => shift
				23	nrs => [@_]
				24	}, $class;
				25	};
				26
				27
				28	sub get_group {
				29	my ($self, $match) = @_;
				30
				31	# Get all classes from the match
Akron	97a7cba	2017-05-26 13:39:06 +0200	[diff] [blame]	32	my @classes = $match->get_classes($self->{nrs});
Akron	18ff592	2017-01-13 10:09:45 +0100	[diff] [blame]	33
				34	my $segments = $self->{segments};
				35
				36	my %group;
				37
				38	# Classes have nr, start, end
				39	foreach my $class (sort { $a->start <=> $b->start } @classes) {
				40
				41	if ($self->{from_start}) {
				42
				43	# This will retrieve the segment from the segments stream
				44	my $segment = $stream->get($match->doc_id, $class->start);
				45
				46	if ($segment->)
				47
				48	# The character count can be satisfied by the
				49	my $first_chars = $segment->first_chars;
				50
				51	if (length($first_chars) <= $self->{char_count} {
				52	substr($first_chars);
				53	}
				54
				55	# Check, if the segment only spans one segment
				56	if ($class->end != $class->start+1) {
				57
				58	};
				59	}
				60	else {
				61	...
				62	};
				63	};
				64	};
				65
				66	1;