Blame - lib/Krawfish/Compile/Segment/Enrich/SortCriterion.pm - KorAP/Krawfish-prototype

blob: 7f2e08dde0ebcb0bfd0440cfd72a6b87b6c757ca [file] [log] [blame]

Akron	5cf5fca	2017-10-09 19:01:47 +0200	[diff] [blame]	1	package Krawfish::Compile::Segment::Enrich::SortCriterion;
Akron	5238ce5	2017-10-09 15:30:55 +0200	[diff] [blame]	2	use warnings;
				3	use strict;
Akron	b907cf3	2017-11-08 16:22:08 +0100	[diff] [blame]	4	use Role::Tiny;
				5
				6	with 'Krawfish::Compile';
Akron	5238ce5	2017-10-09 15:30:55 +0200	[diff] [blame]	7
Akron	a894fe1	2017-10-02 22:30:52 +0200	[diff] [blame]	8	# Enrich an item with sort criteria.
				9	# This is necessary to sort items beyond the segment.
				10	# The problem with this enrichment is,
				11	# that it needs to augment the sorted items after sorting,
				12	# so they are not in a proper order to go through
				13	# the fields lists (for example) to collect the field values
				14	# or through the forward index to collect term_ids (though
				15	# this may be a different API).
				16	#
				17	# A proper way to do this would be to go through the sorted
				18	# lists and create a new sorted list in doc order (or to somehow
				19	# keep match order) to make it possible to enrich with all
				20	# sorting criteria.
				21	#
				22	# 1. For Fields: Create a list of all docs to enrich in doc_id order
				23	# (Ignore duplicates)
				24	# 2. Prepare all requested fields in field order
				25	# 3. Go through all fields and collect values or term_ids
				26	# 4. Create criterion vectors per match based on these information
				27	#
Akron	5238ce5	2017-10-09 15:30:55 +0200	[diff] [blame]	28	# But:
				29	# It may very well be possible to only enrich if required
				30	# on the node level.
				31	#
Akron	a894fe1	2017-10-02 22:30:52 +0200	[diff] [blame]	32	# On the node level, the relevant criteria (top_k) will be inflated,
Akron	5238ce5	2017-10-09 15:30:55 +0200	[diff] [blame]	33	# taken the ordering into account (which means following matches may
Akron	a894fe1	2017-10-02 22:30:52 +0200	[diff] [blame]	34	# have a lot of criteria in common.
Akron	5238ce5	2017-10-09 15:30:55 +0200	[diff] [blame]	35
				36
				37	sub new {
				38	my $class = shift;
				39	bless {
				40	query => shift,
				41
				42	# Store all criteria in sorted order,
				43	# which may include terms and fields.
				44	# This will also keep the direction
				45	# and possibly the collation.
				46	criteria => shift
				47	}, $class
				48	};
				49
				50	sub _init {
				51	my $self = shift;
				52
				53	return if $self->{init}++;
				54
				55	# TODO:
				56	# Go through all criteria and collect required field IDs.
				57	# Bring required field IDs in order.
				58	# Create an array for field_id => criterion_position to
				59	# map the surface term to the criterion after fetching.
				60	# Remember the criterion position for optional term sorting.
				61	};
				62
				63
				64	sub current_match {
				65	# TODO:
				66	# Create an empty list for sorting criteria.
				67	# a) Retrieve for the document id all the relevant fields
				68	# if there are fields to retrieve.
				69	# Add in the position of the criteria list.
				70	# b) The surface term is already retrieved and enriched.
				71	# Add in the position of the criteria list.
				72	};
				73
				74
				75	1;