| Akron | 33f1dcb | 2016-10-29 17:27:23 +0200 | [diff] [blame] | 1 | package Krawfish::Koral::Query::Sequence; |
| 2 | use parent 'Krawfish::Koral::Query'; |
| Akron | 27fb743 | 2016-12-11 18:07:32 +0100 | [diff] [blame] | 3 | use Krawfish::Log; |
| Akron | 33f1dcb | 2016-10-29 17:27:23 +0200 | [diff] [blame] | 4 | use strict; |
| 5 | use warnings; |
| 6 | |
| Akron | 18ff592 | 2017-01-13 10:09:45 +0100 | [diff] [blame] | 7 | # TODO: Optimize if there is an identical subquery |
| Akron | 84ae657 | 2017-02-03 19:26:36 +0100 | [diff] [blame] | 8 | # in a direct sequence - make this a repetition!!! |
| Akron | 18ff592 | 2017-01-13 10:09:45 +0100 | [diff] [blame] | 9 | |
| Akron | 27fb743 | 2016-12-11 18:07:32 +0100 | [diff] [blame] | 10 | use constant DEBUG => 0; |
| 11 | |
| Akron | 33f1dcb | 2016-10-29 17:27:23 +0200 | [diff] [blame] | 12 | sub new { |
| 13 | my $class = shift; |
| Akron | 4763ea6 | 2016-11-02 19:36:18 +0100 | [diff] [blame] | 14 | my $self = $class->SUPER::new; |
| 15 | $self->{array} = [@_]; |
| Akron | 554e357 | 2016-11-05 12:02:00 +0100 | [diff] [blame] | 16 | $self->{planned} = 0; |
| Akron | dc9f116 | 2016-11-05 15:31:40 +0100 | [diff] [blame] | 17 | $self->{info} = undef; |
| Akron | 4763ea6 | 2016-11-02 19:36:18 +0100 | [diff] [blame] | 18 | return $self; |
| Akron | 33f1dcb | 2016-10-29 17:27:23 +0200 | [diff] [blame] | 19 | }; |
| 20 | |
| Akron | 554e357 | 2016-11-05 12:02:00 +0100 | [diff] [blame] | 21 | |
| Akron | 774c5db | 2016-11-09 16:11:38 +0100 | [diff] [blame] | 22 | # Get number of operands |
| Akron | 4763ea6 | 2016-11-02 19:36:18 +0100 | [diff] [blame] | 23 | sub size { |
| 24 | scalar @{$_[0]->{array}}; |
| 25 | }; |
| 26 | |
| Akron | 774c5db | 2016-11-09 16:11:38 +0100 | [diff] [blame] | 27 | |
| Akron | 8aee4a6 | 2016-11-14 21:33:12 +0100 | [diff] [blame] | 28 | sub type { 'sequence' }; |
| Akron | 774c5db | 2016-11-09 16:11:38 +0100 | [diff] [blame] | 29 | |
| Akron | 554e357 | 2016-11-05 12:02:00 +0100 | [diff] [blame] | 30 | |
| Akron | 4763ea6 | 2016-11-02 19:36:18 +0100 | [diff] [blame] | 31 | # TODO: Order by frequency, so the most common occurrence is at the outside |
| 32 | sub plan_for { |
| Akron | 554e357 | 2016-11-05 12:02:00 +0100 | [diff] [blame] | 33 | my $self = shift; |
| Akron | 573e7ec | 2016-11-05 19:03:01 +0100 | [diff] [blame] | 34 | my $index = shift; |
| Akron | 774c5db | 2016-11-09 16:11:38 +0100 | [diff] [blame] | 35 | |
| Akron | 554e357 | 2016-11-05 12:02:00 +0100 | [diff] [blame] | 36 | # Only one element available |
| 37 | if ($self->size == 1) { |
| 38 | |
| 39 | # Return this element |
| Akron | 573e7ec | 2016-11-05 19:03:01 +0100 | [diff] [blame] | 40 | return $self->{array}->[0]->plan_for( |
| 41 | $index |
| Akron | 554e357 | 2016-11-05 12:02:00 +0100 | [diff] [blame] | 42 | ); |
| 43 | }; |
| 44 | |
| Akron | 774c5db | 2016-11-09 16:11:38 +0100 | [diff] [blame] | 45 | # From a sequence, create a binary tree |
| 46 | my $tree = $self->planned_tree; |
| 47 | |
| 48 | return unless $tree; |
| 49 | |
| 50 | return $tree->plan_for($index); |
| Akron | 554e357 | 2016-11-05 12:02:00 +0100 | [diff] [blame] | 51 | }; |
| 52 | |
| Akron | 774c5db | 2016-11-09 16:11:38 +0100 | [diff] [blame] | 53 | |
| Akron | 7d1dc8e | 2016-11-13 15:54:11 +0100 | [diff] [blame] | 54 | # Left extensions are always prefered! |
| Akron | 774c5db | 2016-11-09 16:11:38 +0100 | [diff] [blame] | 55 | sub _solve_problems { |
| Akron | 554e357 | 2016-11-05 12:02:00 +0100 | [diff] [blame] | 56 | my $self = shift; |
| Akron | 774c5db | 2016-11-09 16:11:38 +0100 | [diff] [blame] | 57 | |
| 58 | return 1 if $self->{planned_array}; |
| 59 | |
| 60 | # Cloned for planning |
| 61 | my @elements = @{$self->{array}}; |
| 62 | |
| Akron | 554e357 | 2016-11-05 12:02:00 +0100 | [diff] [blame] | 63 | # First pass - mark anchors |
| Akron | 774c5db | 2016-11-09 16:11:38 +0100 | [diff] [blame] | 64 | my @problems = (); |
| 65 | for (my $i = 0; $i < @elements; $i++) { |
| 66 | |
| 67 | # Element in question |
| 68 | my $element = $elements[$i]; |
| 69 | |
| 70 | if ($element->type eq 'sequence') { |
| 71 | # has_constraints ... |
| 72 | }; |
| 73 | |
| 74 | # Push to problem array |
| 75 | unless ($element->maybe_anchor) { |
| 76 | push @problems, $i; |
| Akron | 554e357 | 2016-11-05 12:02:00 +0100 | [diff] [blame] | 77 | }; |
| 78 | }; |
| Akron | 774c5db | 2016-11-09 16:11:38 +0100 | [diff] [blame] | 79 | |
| 80 | # Second pass |
| 81 | # TODO: Order by frequency |
| 82 | my $problems = 0; |
| 83 | foreach my $p (reverse @problems) { |
| 84 | |
| 85 | # Remove element |
| 86 | if ($elements[$p]->is_null) { |
| 87 | splice @elements, $p, 1; |
| 88 | next; |
| 89 | }; |
| 90 | |
| Akron | 27fb743 | 2016-12-11 18:07:32 +0100 | [diff] [blame] | 91 | print_log('kq_seq', $elements[$p]->to_string . " is problematic") if DEBUG; |
| Akron | 7d1dc8e | 2016-11-13 15:54:11 +0100 | [diff] [blame] | 92 | |
| Akron | 774c5db | 2016-11-09 16:11:38 +0100 | [diff] [blame] | 93 | # Problem has a following anchor |
| 94 | if ($elements[$p+1] && $elements[$p+1]->maybe_anchor) { |
| 95 | my $next = $elements[$p+1]; |
| Akron | 27fb743 | 2016-12-11 18:07:32 +0100 | [diff] [blame] | 96 | |
| 97 | print_log('kq_seq', 'Extend left with ' . $next->to_string) if DEBUG; |
| 98 | |
| Akron | 7d1dc8e | 2016-11-13 15:54:11 +0100 | [diff] [blame] | 99 | splice @elements, $p, 2, $self->builder->ext_left( |
| Akron | 774c5db | 2016-11-09 16:11:38 +0100 | [diff] [blame] | 100 | $next, |
| 101 | $elements[$p] |
| 102 | ); |
| 103 | } |
| 104 | |
| 105 | # Problem has a preceeding anchor |
| 106 | elsif ($elements[$p-1] && $elements[$p-1]->maybe_anchor) { |
| 107 | my $previous = $elements[$p-1]; |
| Akron | 27fb743 | 2016-12-11 18:07:32 +0100 | [diff] [blame] | 108 | |
| 109 | print_log('kq_seq', 'Extend right with ' . $previous->to_string) if DEBUG; |
| 110 | |
| Akron | 7d1dc8e | 2016-11-13 15:54:11 +0100 | [diff] [blame] | 111 | splice @elements, $p-1, 2, $self->builder->ext_right( |
| Akron | 774c5db | 2016-11-09 16:11:38 +0100 | [diff] [blame] | 112 | $previous, |
| 113 | $elements[$p] |
| 114 | ); |
| 115 | } |
| Akron | 7d1dc8e | 2016-11-13 15:54:11 +0100 | [diff] [blame] | 116 | |
| 117 | # Problem remains |
| Akron | 774c5db | 2016-11-09 16:11:38 +0100 | [diff] [blame] | 118 | else { |
| 119 | $problems = 1; |
| 120 | }; |
| 121 | }; |
| 122 | |
| 123 | # Store as a separate array |
| 124 | $self->{planned_array} = \@elements; |
| 125 | |
| 126 | # set variables etc. |
| 127 | return if $problems; |
| 128 | return 1; |
| Akron | 4763ea6 | 2016-11-02 19:36:18 +0100 | [diff] [blame] | 129 | }; |
| 130 | |
| Akron | 774c5db | 2016-11-09 16:11:38 +0100 | [diff] [blame] | 131 | |
| 132 | sub planned_tree { |
| 133 | my $self = shift; |
| 134 | |
| Akron | 8aee4a6 | 2016-11-14 21:33:12 +0100 | [diff] [blame] | 135 | # Return tree |
| Akron | 774c5db | 2016-11-09 16:11:38 +0100 | [diff] [blame] | 136 | if ($self->{planned_tree}) { |
| 137 | return $self->{planned_tree}; |
| 138 | }; |
| 139 | |
| 140 | return unless $self->_solve_problems; |
| 141 | |
| 142 | my @elements = @{$self->{planned_array}}; |
| 143 | |
| 144 | my $tree = shift @elements; |
| 145 | |
| 146 | my $builder = $self->builder; |
| 147 | |
| 148 | # TODO: Sort this by frequency |
| 149 | foreach (@elements) { |
| 150 | $tree = $builder->position( |
| 151 | ['precedesDirectly'], |
| 152 | $tree, |
| 153 | $_ |
| 154 | ) |
| 155 | }; |
| 156 | |
| 157 | $self->{planned_tree} = $tree; |
| 158 | return $tree; |
| 159 | }; |
| 160 | |
| 161 | |
| Akron | 4763ea6 | 2016-11-02 19:36:18 +0100 | [diff] [blame] | 162 | sub is_any { |
| Akron | 554e357 | 2016-11-05 12:02:00 +0100 | [diff] [blame] | 163 | my $self = shift; |
| Akron | 8aee4a6 | 2016-11-14 21:33:12 +0100 | [diff] [blame] | 164 | my $tree = $self->planned_tree; |
| 165 | return $tree->is_any; |
| Akron | 33f1dcb | 2016-10-29 17:27:23 +0200 | [diff] [blame] | 166 | }; |
| 167 | |
| Akron | 554e357 | 2016-11-05 12:02:00 +0100 | [diff] [blame] | 168 | |
| 169 | sub is_null { |
| Akron | 8aee4a6 | 2016-11-14 21:33:12 +0100 | [diff] [blame] | 170 | my $self = shift; |
| 171 | my $tree = $self->planned_tree; |
| 172 | return $tree->is_null; |
| Akron | 554e357 | 2016-11-05 12:02:00 +0100 | [diff] [blame] | 173 | }; |
| 174 | |
| Akron | 1b09c5b | 2016-11-20 15:59:34 +0100 | [diff] [blame] | 175 | |
| 176 | sub maybe_unsorted { |
| 177 | my $self = shift; |
| 178 | my $tree = $self->planned_tree; |
| 179 | return $tree->maybe_unsorted; |
| 180 | }; |
| 181 | |
| 182 | |
| Akron | 33f1dcb | 2016-10-29 17:27:23 +0200 | [diff] [blame] | 183 | sub to_koral_fragment { |
| 184 | my $self = shift; |
| 185 | return { |
| 186 | '@type' => 'koral:group', |
| 187 | 'operation' => 'operation:sequence', |
| 188 | 'operands' => [ |
| Akron | 4763ea6 | 2016-11-02 19:36:18 +0100 | [diff] [blame] | 189 | map { $_->to_koral_fragment } @{$self->{array}} |
| Akron | 33f1dcb | 2016-10-29 17:27:23 +0200 | [diff] [blame] | 190 | ] |
| 191 | }; |
| 192 | }; |
| 193 | |
| Akron | a211bf5 | 2016-10-29 18:03:29 +0200 | [diff] [blame] | 194 | sub to_string { |
| Akron | 4763ea6 | 2016-11-02 19:36:18 +0100 | [diff] [blame] | 195 | return join '', map { $_->to_string } @{$_[0]->{array}}; |
| Akron | a211bf5 | 2016-10-29 18:03:29 +0200 | [diff] [blame] | 196 | }; |
| 197 | |
| Akron | 944091b | 2016-11-24 16:40:58 +0100 | [diff] [blame] | 198 | sub from_koral { |
| 199 | my $class = shift; |
| 200 | my $kq = shift; |
| 201 | |
| 202 | my $importer = $class->importer; |
| 203 | |
| 204 | return $class->new( |
| 205 | map { $importer->all($_) } @{$kq->{operands}} |
| 206 | ); |
| 207 | }; |
| Akron | a211bf5 | 2016-10-29 18:03:29 +0200 | [diff] [blame] | 208 | |
| Akron | 33f1dcb | 2016-10-29 17:27:23 +0200 | [diff] [blame] | 209 | 1; |
| Akron | 4763ea6 | 2016-11-02 19:36:18 +0100 | [diff] [blame] | 210 | |
| 211 | |
| 212 | __END__ |
| 213 | |
| 214 | Rewrite rules: |
| 215 | - [Der][alte][Mann]? -> |
| 216 | [Der]optExt([alte],[Mann]) |
| 217 | |