| Akron | 33f1dcb | 2016-10-29 17:27:23 +0200 | [diff] [blame] | 1 | package Krawfish::Koral::Query::Span; |
| Akron | ee06a13 | 2017-12-08 16:59:27 +0100 | [diff] [blame] | 2 | use Role::Tiny::With; |
| Akron | 2814aa9 | 2017-09-24 22:00:35 +0200 | [diff] [blame] | 3 | use Krawfish::Util::Constants ':PREFIX'; |
| Akron | 33f1dcb | 2016-10-29 17:27:23 +0200 | [diff] [blame] | 4 | use Krawfish::Koral::Query::Term; |
| Akron | c048b18 | 2017-06-13 01:29:03 +0200 | [diff] [blame] | 5 | use Krawfish::Log; |
| Akron | 06eb4d3 | 2016-11-11 14:05:52 +0100 | [diff] [blame] | 6 | use Scalar::Util qw/blessed/; |
| Akron | 33f1dcb | 2016-10-29 17:27:23 +0200 | [diff] [blame] | 7 | use strict; |
| 8 | use warnings; | ||||
| 9 | |||||
| Akron | ee06a13 | 2017-12-08 16:59:27 +0100 | [diff] [blame] | 10 | with 'Krawfish::Koral::Query'; |
| 11 | |||||
| Akron | bc7dd43 | 2017-07-18 14:21:51 +0200 | [diff] [blame] | 12 | use constant DEBUG => 0; |
| Akron | c048b18 | 2017-06-13 01:29:03 +0200 | [diff] [blame] | 13 | |
| Akron | 33f1dcb | 2016-10-29 17:27:23 +0200 | [diff] [blame] | 14 | sub new { |
| Akron | 02203f1 | 2017-12-09 13:55:34 +0100 | [diff] [blame] | 15 | my ($class, $span) = @_; |
| 16 | |||||
| 17 | unless ($span) { | ||||
| 18 | } | ||||
| Akron | 33f1dcb | 2016-10-29 17:27:23 +0200 | [diff] [blame] | 19 | |
| Akron | 06eb4d3 | 2016-11-11 14:05:52 +0100 | [diff] [blame] | 20 | # Span is a string |
| 21 | unless (blessed $span) { | ||||
| 22 | return bless { | ||||
| Akron | 2814aa9 | 2017-09-24 22:00:35 +0200 | [diff] [blame] | 23 | operands => [Krawfish::Koral::Query::Term->new(SPAN_PREF . $span)] |
| Akron | 06eb4d3 | 2016-11-11 14:05:52 +0100 | [diff] [blame] | 24 | }, $class; |
| 25 | }; | ||||
| 26 | |||||
| 27 | bless { | ||||
| Akron | 5b6264f | 2017-07-19 01:14:01 +0200 | [diff] [blame] | 28 | operands => [$span] |
| Akron | 06eb4d3 | 2016-11-11 14:05:52 +0100 | [diff] [blame] | 29 | }, $class; |
| Akron | 33f1dcb | 2016-10-29 17:27:23 +0200 | [diff] [blame] | 30 | }; |
| 31 | |||||
| Akron | b00c2be | 2017-08-16 14:45:07 +0200 | [diff] [blame] | 32 | |
| Akron | 440799d | 2017-12-26 14:55:03 +0100 | [diff] [blame] | 33 | # Type |
| 34 | sub type { | ||||
| 35 | 'span'; | ||||
| 36 | }; | ||||
| Akron | 774c5db | 2016-11-09 16:11:38 +0100 | [diff] [blame] | 37 | |
| Akron | b00c2be | 2017-08-16 14:45:07 +0200 | [diff] [blame] | 38 | |
| Akron | 5b6264f | 2017-07-19 01:14:01 +0200 | [diff] [blame] | 39 | # There are no classes allowed in spans |
| Akron | 5ddc38f | 2017-07-18 00:16:22 +0200 | [diff] [blame] | 40 | sub remove_classes { |
| 41 | $_[0]; | ||||
| 42 | }; | ||||
| Akron | 06eb4d3 | 2016-11-11 14:05:52 +0100 | [diff] [blame] | 43 | |
| Akron | b00c2be | 2017-08-16 14:45:07 +0200 | [diff] [blame] | 44 | |
| Akron | 440799d | 2017-12-26 14:55:03 +0100 | [diff] [blame] | 45 | # Serialize to KQ |
| Akron | 33f1dcb | 2016-10-29 17:27:23 +0200 | [diff] [blame] | 46 | sub to_koral_fragment { |
| 47 | my $self = shift; | ||||
| Akron | 06eb4d3 | 2016-11-11 14:05:52 +0100 | [diff] [blame] | 48 | my $span = { |
| 49 | '@type' => 'koral:span' | ||||
| Akron | 33f1dcb | 2016-10-29 17:27:23 +0200 | [diff] [blame] | 50 | }; |
| Akron | 5b6264f | 2017-07-19 01:14:01 +0200 | [diff] [blame] | 51 | if ($self->operand) { |
| 52 | $span->{wrap} = $self->operand->to_koral_fragment | ||||
| Akron | 33f1dcb | 2016-10-29 17:27:23 +0200 | [diff] [blame] | 53 | }; |
| Akron | 06eb4d3 | 2016-11-11 14:05:52 +0100 | [diff] [blame] | 54 | |
| 55 | return $span; | ||||
| Akron | 33f1dcb | 2016-10-29 17:27:23 +0200 | [diff] [blame] | 56 | }; |
| 57 | |||||
| Akron | 17c0a3d | 2017-06-11 23:19:16 +0200 | [diff] [blame] | 58 | |
| 59 | # TODO: Some error handling | ||||
| 60 | sub normalize { | ||||
| 61 | return $_[0]; | ||||
| 62 | }; | ||||
| 63 | |||||
| Akron | 3ab2e97 | 2017-08-02 19:10:10 +0200 | [diff] [blame] | 64 | |
| Akron | 440799d | 2017-12-26 14:55:03 +0100 | [diff] [blame] | 65 | # Turn terms into ids |
| Akron | 3ab2e97 | 2017-08-02 19:10:10 +0200 | [diff] [blame] | 66 | sub identify { |
| Akron | c048b18 | 2017-06-13 01:29:03 +0200 | [diff] [blame] | 67 | my ($self, $dict) = @_; |
| 68 | |||||
| Akron | 3ab2e97 | 2017-08-02 19:10:10 +0200 | [diff] [blame] | 69 | # This is currently not supported |
| 70 | unless ($self->is_regex) { | ||||
| Akron | c048b18 | 2017-06-13 01:29:03 +0200 | [diff] [blame] | 71 | |
| Akron | 3ab2e97 | 2017-08-02 19:10:10 +0200 | [diff] [blame] | 72 | my $term = $self->to_term; |
| 73 | |||||
| 74 | print_log('kq_span', "Translate span $term to term_id") if DEBUG; | ||||
| 75 | |||||
| Akron | 2814aa9 | 2017-09-24 22:00:35 +0200 | [diff] [blame] | 76 | my $term_id = $dict->term_id_by_term(SPAN_PREF . $term); |
| Akron | 5864cf0 | 2017-08-02 19:38:41 +0200 | [diff] [blame] | 77 | |
| Akron | 5a5595b | 2017-09-10 13:00:57 +0200 | [diff] [blame] | 78 | return $self->builder->nowhere unless defined $term_id; |
| Akron | 5864cf0 | 2017-08-02 19:38:41 +0200 | [diff] [blame] | 79 | |
| Akron | 3d1df33 | 2017-12-23 16:21:21 +0100 | [diff] [blame] | 80 | return Krawfish::Koral::Query::Term->new($term_id); |
| Akron | 3ab2e97 | 2017-08-02 19:10:10 +0200 | [diff] [blame] | 81 | }; |
| 82 | |||||
| 83 | warn 'Regexes are currently not supported'; | ||||
| Akron | c048b18 | 2017-06-13 01:29:03 +0200 | [diff] [blame] | 84 | }; |
| Akron | 17c0a3d | 2017-06-11 23:19:16 +0200 | [diff] [blame] | 85 | |
| Akron | 55fb308 | 2017-07-18 13:24:53 +0200 | [diff] [blame] | 86 | |
| Akron | 5864cf0 | 2017-08-02 19:38:41 +0200 | [diff] [blame] | 87 | # TODO: |
| 88 | # Currently not supported | ||||
| 89 | sub is_regex { | ||||
| 90 | 0; | ||||
| 91 | }; | ||||
| 92 | |||||
| 93 | |||||
| 94 | sub to_term { | ||||
| 95 | $_[0]->operand->to_string; | ||||
| 96 | }; | ||||
| 97 | |||||
| Akron | 48fabe5 | 2017-08-07 16:48:12 +0200 | [diff] [blame] | 98 | |
| Akron | 17c0a3d | 2017-06-11 23:19:16 +0200 | [diff] [blame] | 99 | # Todo: May be more complicated |
| 100 | sub optimize { | ||||
| Akron | 48fabe5 | 2017-08-07 16:48:12 +0200 | [diff] [blame] | 101 | warn 'Span queries need to be identified before'; |
| Akron | 17c0a3d | 2017-06-11 23:19:16 +0200 | [diff] [blame] | 102 | }; |
| 103 | |||||
| Akron | 48fabe5 | 2017-08-07 16:48:12 +0200 | [diff] [blame] | 104 | |
| Akron | 704ec06 | 2017-07-24 15:46:21 +0200 | [diff] [blame] | 105 | # A span may have length 0 in case it is an empty annotation |
| 106 | # like a page break | ||||
| 107 | sub min_span { | ||||
| 108 | 0; | ||||
| 109 | }; | ||||
| 110 | |||||
| 111 | |||||
| 112 | # A termGroup always spans exactly one token | ||||
| 113 | sub max_span { | ||||
| 114 | return 0 if $_[0]->is_null; | ||||
| 115 | -1; | ||||
| 116 | }; | ||||
| Akron | 17c0a3d | 2017-06-11 23:19:16 +0200 | [diff] [blame] | 117 | |
| Akron | c3657bf | 2016-10-31 00:15:43 +0100 | [diff] [blame] | 118 | |
| Akron | 440799d | 2017-12-26 14:55:03 +0100 | [diff] [blame] | 119 | # Not unsorted |
| 120 | sub maybe_unsorted { | ||||
| 121 | 0; | ||||
| 122 | }; | ||||
| Akron | c3657bf | 2016-10-31 00:15:43 +0100 | [diff] [blame] | 123 | |
| Akron | 5864cf0 | 2017-08-02 19:38:41 +0200 | [diff] [blame] | 124 | |
| Akron | 944091b | 2016-11-24 16:40:58 +0100 | [diff] [blame] | 125 | # Todo: Change the term_type! |
| Akron | 02203f1 | 2017-12-09 13:55:34 +0100 | [diff] [blame] | 126 | sub from_koral { |
| 127 | my ($class, $kq) = @_; | ||||
| Akron | 3feb4d8 | 2017-12-12 19:33:46 +0100 | [diff] [blame] | 128 | my $qb = $class->builder; |
| Akron | 02203f1 | 2017-12-09 13:55:34 +0100 | [diff] [blame] | 129 | |
| 130 | # No wrap | ||||
| 131 | unless ($kq->{'wrap'}) { | ||||
| 132 | |||||
| 133 | # TODO: | ||||
| 134 | # This should return an error! | ||||
| 135 | warn 'Wrap not supported!' | ||||
| 136 | } | ||||
| 137 | |||||
| 138 | # Wrap is a term | ||||
| 139 | else { | ||||
| 140 | my $wrap = $kq->{wrap}; | ||||
| 141 | if ($wrap->{'@type'} eq 'koral:term') { | ||||
| Akron | 3feb4d8 | 2017-12-12 19:33:46 +0100 | [diff] [blame] | 142 | return $class->new($qb->from_koral_term($wrap)->term_type('span')); |
| Akron | 02203f1 | 2017-12-09 13:55:34 +0100 | [diff] [blame] | 143 | } |
| 144 | else { | ||||
| 145 | warn 'Wrap type not supported!' | ||||
| 146 | }; | ||||
| 147 | } | ||||
| 148 | }; | ||||
| Akron | 944091b | 2016-11-24 16:40:58 +0100 | [diff] [blame] | 149 | |
| Akron | 5864cf0 | 2017-08-02 19:38:41 +0200 | [diff] [blame] | 150 | |
| Akron | 440799d | 2017-12-26 14:55:03 +0100 | [diff] [blame] | 151 | # Stringification |
| Akron | a211bf5 | 2016-10-29 18:03:29 +0200 | [diff] [blame] | 152 | sub to_string { |
| Akron | 3d1df33 | 2017-12-23 16:21:21 +0100 | [diff] [blame] | 153 | my ($self, $id) = @_; |
| 154 | return '<' . $self->operand->to_string($id) . '>'; | ||||
| Akron | a211bf5 | 2016-10-29 18:03:29 +0200 | [diff] [blame] | 155 | }; |
| 156 | |||||
| Akron | 440799d | 2017-12-26 14:55:03 +0100 | [diff] [blame] | 157 | |
| Akron | 33f1dcb | 2016-10-29 17:27:23 +0200 | [diff] [blame] | 158 | 1; |