| Akron | 5cf5fca | 2017-10-09 19:01:47 +0200 | [diff] [blame] | 1 | package Krawfish::Compile::Segment::Enrich::SortCriterion; |
| Akron | 5238ce5 | 2017-10-09 15:30:55 +0200 | [diff] [blame] | 2 | use warnings; |
| 3 | use strict; |
| Akron | b907cf3 | 2017-11-08 16:22:08 +0100 | [diff] [blame] | 4 | use Role::Tiny; |
| 5 | |
| 6 | with 'Krawfish::Compile'; |
| Akron | 5238ce5 | 2017-10-09 15:30:55 +0200 | [diff] [blame] | 7 | |
| Akron | a894fe1 | 2017-10-02 22:30:52 +0200 | [diff] [blame] | 8 | # Enrich an item with sort criteria. |
| 9 | # This is necessary to sort items beyond the segment. |
| 10 | # The problem with this enrichment is, |
| 11 | # that it needs to augment the sorted items after sorting, |
| 12 | # so they are not in a proper order to go through |
| 13 | # the fields lists (for example) to collect the field values |
| 14 | # or through the forward index to collect term_ids (though |
| 15 | # this may be a different API). |
| 16 | # |
| 17 | # A proper way to do this would be to go through the sorted |
| 18 | # lists and create a new sorted list in doc order (or to somehow |
| 19 | # keep match order) to make it possible to enrich with all |
| 20 | # sorting criteria. |
| 21 | # |
| 22 | # 1. For Fields: Create a list of all docs to enrich in doc_id order |
| 23 | # (Ignore duplicates) |
| 24 | # 2. Prepare all requested fields in field order |
| 25 | # 3. Go through all fields and collect values or term_ids |
| 26 | # 4. Create criterion vectors per match based on these information |
| 27 | # |
| Akron | 5238ce5 | 2017-10-09 15:30:55 +0200 | [diff] [blame] | 28 | # But: |
| 29 | # It may very well be possible to only enrich if required |
| 30 | # on the node level. |
| 31 | # |
| Akron | a894fe1 | 2017-10-02 22:30:52 +0200 | [diff] [blame] | 32 | # On the node level, the relevant criteria (top_k) will be inflated, |
| Akron | 5238ce5 | 2017-10-09 15:30:55 +0200 | [diff] [blame] | 33 | # taken the ordering into account (which means following matches may |
| Akron | a894fe1 | 2017-10-02 22:30:52 +0200 | [diff] [blame] | 34 | # have a lot of criteria in common. |
| Akron | 5238ce5 | 2017-10-09 15:30:55 +0200 | [diff] [blame] | 35 | |
| 36 | |
| 37 | sub new { |
| 38 | my $class = shift; |
| 39 | bless { |
| 40 | query => shift, |
| 41 | |
| 42 | # Store all criteria in sorted order, |
| 43 | # which may include terms and fields. |
| 44 | # This will also keep the direction |
| 45 | # and possibly the collation. |
| 46 | criteria => shift |
| 47 | }, $class |
| 48 | }; |
| 49 | |
| 50 | sub _init { |
| 51 | my $self = shift; |
| 52 | |
| 53 | return if $self->{init}++; |
| 54 | |
| 55 | # TODO: |
| 56 | # Go through all criteria and collect required field IDs. |
| 57 | # Bring required field IDs in order. |
| 58 | # Create an array for field_id => criterion_position to |
| 59 | # map the surface term to the criterion after fetching. |
| 60 | # Remember the criterion position for optional term sorting. |
| 61 | }; |
| 62 | |
| 63 | |
| 64 | sub current_match { |
| 65 | # TODO: |
| 66 | # Create an empty list for sorting criteria. |
| 67 | # a) Retrieve for the document id all the relevant fields |
| 68 | # if there are fields to retrieve. |
| 69 | # Add in the position of the criteria list. |
| 70 | # b) The surface term is already retrieved and enriched. |
| 71 | # Add in the position of the criteria list. |
| 72 | }; |
| 73 | |
| 74 | |
| 75 | 1; |