| Akron | 06eb4d3 | 2016-11-11 14:05:52 +0100 | [diff] [blame] | 1 | use Test::More; |
| Akron | e020194 | 2016-11-26 01:11:31 +0100 | [diff] [blame] | 2 | use Test::Krawfish; |
| Akron | 06eb4d3 | 2016-11-11 14:05:52 +0100 | [diff] [blame] | 3 | use strict; |
| 4 | use warnings; |
| Akron | b6615af | 2016-11-21 19:19:41 +0100 | [diff] [blame] | 5 | |
| Akron | 06eb4d3 | 2016-11-11 14:05:52 +0100 | [diff] [blame] | 6 | |
| 7 | use_ok('Krawfish::Koral'); |
| 8 | use_ok('Krawfish::Index'); |
| 9 | |
| Akron | b6615af | 2016-11-21 19:19:41 +0100 | [diff] [blame] | 10 | |
| Akron | 06eb4d3 | 2016-11-11 14:05:52 +0100 | [diff] [blame] | 11 | my $index = Krawfish::Index->new; |
| 12 | |
| Akron | 56422cf | 2017-08-16 14:17:01 +0200 | [diff] [blame] | 13 | ok_index($index, [qw/first second third fourth fifth sixth/], 'Add new document'); |
| Akron | 06eb4d3 | 2016-11-11 14:05:52 +0100 | [diff] [blame] | 14 | |
| 15 | my $koral = Krawfish::Koral->new; |
| 16 | |
| 17 | my $qb = $koral->query_builder; |
| 18 | |
| 19 | my $query = $qb->token( |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 20 | $qb->bool_and('first', 'second') |
| Akron | 06eb4d3 | 2016-11-11 14:05:52 +0100 | [diff] [blame] | 21 | ); |
| Akron | 704ec06 | 2017-07-24 15:46:21 +0200 | [diff] [blame] | 22 | is($query->min_span, 1, 'Span length'); |
| 23 | is($query->max_span, 1, 'Span length'); |
| Akron | 655a10a | 2017-09-11 14:13:18 +0200 | [diff] [blame] | 24 | ok(!$query->is_anywhere, 'Isn\'t anywhere'); |
| Akron | 06eb4d3 | 2016-11-11 14:05:52 +0100 | [diff] [blame] | 25 | ok(!$query->is_optional, 'Isn\'t optional'); |
| 26 | ok(!$query->is_null, 'Isn\'t null'); |
| 27 | ok(!$query->is_negative, 'Isn\'t negative'); |
| 28 | ok(!$query->is_extended, 'Isn\'t extended'); |
| Akron | ded01ae | 2016-11-23 13:43:54 +0100 | [diff] [blame] | 29 | is($query->to_string, '[first&second]', 'Stringification'); |
| Akron | 704ec06 | 2017-07-24 15:46:21 +0200 | [diff] [blame] | 30 | is($query->min_span, 1, 'Span length'); |
| 31 | is($query->max_span, 1, 'Span length'); |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 32 | ok($query = $query->normalize, 'Normalization'); |
| Akron | c552937 | 2017-06-21 15:56:18 +0200 | [diff] [blame] | 33 | is($query->to_string, 'first&second', 'Stringification'); |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 34 | ok($query = $query->finalize, 'Finalization'); |
| Akron | c552937 | 2017-06-21 15:56:18 +0200 | [diff] [blame] | 35 | is($query->to_string, 'first&second', 'Stringification'); |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 36 | |
| Akron | 704ec06 | 2017-07-24 15:46:21 +0200 | [diff] [blame] | 37 | |
| 38 | |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 39 | $query = $qb->token( |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 40 | $qb->bool_and('first', 'second','first', 'third') |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 41 | ); |
| Akron | 704ec06 | 2017-07-24 15:46:21 +0200 | [diff] [blame] | 42 | is($query->min_span, 1, 'Span length'); |
| 43 | is($query->max_span, 1, 'Span length'); |
| Akron | c552937 | 2017-06-21 15:56:18 +0200 | [diff] [blame] | 44 | is($query->to_string, '[first&first&second&third]', 'Stringification'); |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 45 | ok($query = $query->normalize, 'Normalization'); |
| Akron | c552937 | 2017-06-21 15:56:18 +0200 | [diff] [blame] | 46 | is($query->to_string, 'first&second&third', 'Stringification'); |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 47 | ok($query = $query->finalize, 'Finalization'); |
| Akron | c552937 | 2017-06-21 15:56:18 +0200 | [diff] [blame] | 48 | is($query->to_string, 'first&second&third', 'Stringification'); |
| Akron | 704ec06 | 2017-07-24 15:46:21 +0200 | [diff] [blame] | 49 | is($query->min_span, 1, 'Span length'); |
| 50 | is($query->max_span, 1, 'Span length'); |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 51 | |
| 52 | $query = $qb->token( |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 53 | $qb->bool_and('first', 'second') |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 54 | ); |
| Akron | 2dcd8bb | 2017-07-23 10:14:55 +0200 | [diff] [blame] | 55 | # The ordering is alphabetically, with the first in order being treated |
| 56 | # like the least common operand, which in a constraint query means, |
| 57 | # it's the second one |
| Akron | 5864cf0 | 2017-08-02 19:38:41 +0200 | [diff] [blame] | 58 | is($query->normalize->finalize->identify($index->dict)->optimize($index->segment)->to_string, |
| Akron | 92d9f67 | 2017-08-16 12:23:11 +0200 | [diff] [blame] | 59 | "constr(pos=32:#4,#2)", 'Planned Stringification'); |
| Akron | 4de6620 | 2016-11-11 14:13:43 +0100 | [diff] [blame] | 60 | |
| Akron | 704ec06 | 2017-07-24 15:46:21 +0200 | [diff] [blame] | 61 | |
| Akron | 4de6620 | 2016-11-11 14:13:43 +0100 | [diff] [blame] | 62 | $query = $qb->token( |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 63 | $qb->bool_or('opennlp/c=NP', 'tt/p=NN') |
| Akron | 4de6620 | 2016-11-11 14:13:43 +0100 | [diff] [blame] | 64 | ); |
| Akron | 704ec06 | 2017-07-24 15:46:21 +0200 | [diff] [blame] | 65 | is($query->min_span, 1, 'Span length'); |
| 66 | is($query->max_span, 1, 'Span length'); |
| Akron | 655a10a | 2017-09-11 14:13:18 +0200 | [diff] [blame] | 67 | ok(!$query->is_anywhere, 'Isn\'t anywhere'); |
| Akron | 4de6620 | 2016-11-11 14:13:43 +0100 | [diff] [blame] | 68 | ok(!$query->is_optional, 'Isn\'t optional'); |
| 69 | ok(!$query->is_null, 'Isn\'t null'); |
| 70 | ok(!$query->is_negative, 'Isn\'t negative'); |
| 71 | ok(!$query->is_extended, 'Isn\'t extended'); |
| 72 | is($query->to_string, '[opennlp/c=NP|tt/p=NN]', 'Stringification'); |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 73 | ok($query = $query->normalize->finalize, 'finalize'); |
| 74 | is($query->to_string, |
| Akron | c552937 | 2017-06-21 15:56:18 +0200 | [diff] [blame] | 75 | 'opennlp/c=NP|tt/p=NN', 'Stringification'); |
| Akron | 5864cf0 | 2017-08-02 19:38:41 +0200 | [diff] [blame] | 76 | ok($query = $query->identify($index->dict)->optimize($index->segment), 'finalize'); |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 77 | is($query->to_string, |
| 78 | '[0]', 'Stringification'); |
| 79 | |
| Akron | 704ec06 | 2017-07-24 15:46:21 +0200 | [diff] [blame] | 80 | |
| Akron | 4de6620 | 2016-11-11 14:13:43 +0100 | [diff] [blame] | 81 | $query = $qb->token( |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 82 | $qb->bool_or( |
| 83 | $qb->bool_and('first', 'second'), |
| 84 | $qb->bool_and('third', 'fourth'), |
| Akron | 4de6620 | 2016-11-11 14:13:43 +0100 | [diff] [blame] | 85 | ) |
| 86 | ); |
| 87 | |
| Akron | 655a10a | 2017-09-11 14:13:18 +0200 | [diff] [blame] | 88 | ok(!$query->is_anywhere, 'Isn\'t anywhere'); |
| Akron | 4de6620 | 2016-11-11 14:13:43 +0100 | [diff] [blame] | 89 | ok(!$query->is_optional, 'Isn\'t optional'); |
| 90 | ok(!$query->is_null, 'Isn\'t null'); |
| 91 | ok(!$query->is_negative, 'Isn\'t negative'); |
| 92 | ok(!$query->is_extended, 'Isn\'t extended'); |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 93 | is($query->to_string, '[(first&second)|(fourth&third)]', 'Stringification'); |
| Akron | c552937 | 2017-06-21 15:56:18 +0200 | [diff] [blame] | 94 | ok($query = $query->normalize, 'Normalize'); |
| 95 | is($query->to_string, '(first&second)|(fourth&third)', 'Stringification'); |
| Akron | 5864cf0 | 2017-08-02 19:38:41 +0200 | [diff] [blame] | 96 | ok($query = $query->finalize->identify($index->dict)->optimize($index->segment), 'Normalize'); |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 97 | is($query->to_string, |
| Akron | 92d9f67 | 2017-08-16 12:23:11 +0200 | [diff] [blame] | 98 | "or(constr(pos=32:#4,#2),constr(pos=32:#8,#6))", |
| Akron | 5864cf0 | 2017-08-02 19:38:41 +0200 | [diff] [blame] | 99 | 'Stringification'); |
| 100 | |
| Akron | 7b4e4d9 | 2017-09-25 12:18:29 +0200 | [diff] [blame] | 101 | is($index->dict->term_by_term_id(6), ':third', 'Check mapping'); |
| 102 | is($index->dict->term_by_term_id(8), ':fourth', 'Check mapping'); |
| Akron | 4de6620 | 2016-11-11 14:13:43 +0100 | [diff] [blame] | 103 | |
| 104 | $query = $qb->token( |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 105 | $qb->bool_or( |
| 106 | $qb->bool_and('first', 'second'), |
| 107 | $qb->bool_and( |
| Akron | 4de6620 | 2016-11-11 14:13:43 +0100 | [diff] [blame] | 108 | 'third', |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 109 | $qb->bool_or('fourth', 'fifth') |
| Akron | 4de6620 | 2016-11-11 14:13:43 +0100 | [diff] [blame] | 110 | ), |
| 111 | 'sixth' |
| 112 | ) |
| 113 | ); |
| 114 | |
| Akron | c552937 | 2017-06-21 15:56:18 +0200 | [diff] [blame] | 115 | |
| Akron | 655a10a | 2017-09-11 14:13:18 +0200 | [diff] [blame] | 116 | ok(!$query->is_anywhere, 'Isn\'t anywhere'); |
| Akron | 4de6620 | 2016-11-11 14:13:43 +0100 | [diff] [blame] | 117 | ok(!$query->is_optional, 'Isn\'t optional'); |
| 118 | ok(!$query->is_null, 'Isn\'t null'); |
| 119 | ok(!$query->is_negative, 'Isn\'t negative'); |
| 120 | ok(!$query->is_extended, 'Isn\'t extended'); |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 121 | is($query->to_string, '[((fifth|fourth)&third)|(first&second)|sixth]', 'Stringification'); |
| Akron | c552937 | 2017-06-21 15:56:18 +0200 | [diff] [blame] | 122 | |
| 123 | ok($query = $query->normalize, 'Normalize'); |
| 124 | is($query->to_string, '((fifth|fourth)&third)|(first&second)|sixth', 'Stringification'); |
| Akron | 5864cf0 | 2017-08-02 19:38:41 +0200 | [diff] [blame] | 125 | ok($query = $query->identify($index->dict)->optimize($index->segment), 'Optimize'); |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 126 | is($query->to_string, |
| Akron | 92d9f67 | 2017-08-16 12:23:11 +0200 | [diff] [blame] | 127 | "or(or(#12,constr(pos=32:#4,#2)),constr(pos=32:or(#10,#8),#6))", |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 128 | 'Stringification'); |
| 129 | |
| Akron | ded01ae | 2016-11-23 13:43:54 +0100 | [diff] [blame] | 130 | # Group with null |
| Akron | 2083bc4 | 2016-11-13 21:07:32 +0100 | [diff] [blame] | 131 | $query = $qb->token( |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 132 | $qb->bool_and('first', $qb->null) |
| Akron | 2083bc4 | 2016-11-13 21:07:32 +0100 | [diff] [blame] | 133 | ); |
| Akron | c552937 | 2017-06-21 15:56:18 +0200 | [diff] [blame] | 134 | is($query->to_string, '[-&first]', 'Stringifications'); |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 135 | ok($query = $query->normalize, 'Normalize'); |
| Akron | c552937 | 2017-06-21 15:56:18 +0200 | [diff] [blame] | 136 | is($query->to_string, 'first', 'Stringifications'); |
| Akron | 5864cf0 | 2017-08-02 19:38:41 +0200 | [diff] [blame] | 137 | ok($query = $query->identify($index->dict)->optimize($index->segment), 'Optimize'); |
| Akron | 92d9f67 | 2017-08-16 12:23:11 +0200 | [diff] [blame] | 138 | is($query->to_string, "#2", 'Stringifications'); |
| Akron | 2083bc4 | 2016-11-13 21:07:32 +0100 | [diff] [blame] | 139 | |
| Akron | c552937 | 2017-06-21 15:56:18 +0200 | [diff] [blame] | 140 | |
| Akron | ded01ae | 2016-11-23 13:43:54 +0100 | [diff] [blame] | 141 | # Group with negation |
| 142 | # [first&!second] |
| 143 | $query = $qb->token( |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 144 | $qb->bool_and('first', $qb->term_neg('second')) |
| Akron | ded01ae | 2016-11-23 13:43:54 +0100 | [diff] [blame] | 145 | ); |
| Akron | c552937 | 2017-06-21 15:56:18 +0200 | [diff] [blame] | 146 | is($query->to_string, '[!second&first]', 'Stringifications'); |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 147 | ok($query = $query->normalize, 'Normalize'); |
| Akron | 1a75a52 | 2017-12-11 09:13:30 +0100 | [diff] [blame] | 148 | is($query->to_string, 'excl(matches:first,second)', 'Stringifications'); |
| Akron | 5864cf0 | 2017-08-02 19:38:41 +0200 | [diff] [blame] | 149 | ok($query = $query->identify($index->dict)->optimize($index->segment), 'Optimize'); |
| Akron | 92d9f67 | 2017-08-16 12:23:11 +0200 | [diff] [blame] | 150 | is($query->to_string, "excl(32:#2,#4)", 'Stringifications'); |
| Akron | ded01ae | 2016-11-23 13:43:54 +0100 | [diff] [blame] | 151 | |
| Akron | c552937 | 2017-06-21 15:56:18 +0200 | [diff] [blame] | 152 | |
| Akron | ded01ae | 2016-11-23 13:43:54 +0100 | [diff] [blame] | 153 | # Group with negation and zero freq |
| Akron | 2083bc4 | 2016-11-13 21:07:32 +0100 | [diff] [blame] | 154 | # [first&opennlp/c!=NN] |
| 155 | $query = $qb->token( |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 156 | $qb->bool_and('first', 'opennlp/c!=NN') |
| Akron | 2083bc4 | 2016-11-13 21:07:32 +0100 | [diff] [blame] | 157 | ); |
| 158 | is($query->to_string, '[first&opennlp/c!=NN]', 'Stringifications'); |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 159 | ok($query = $query->normalize, 'Normalize'); |
| Akron | 1a75a52 | 2017-12-11 09:13:30 +0100 | [diff] [blame] | 160 | is($query->to_string, 'excl(matches:first,opennlp/c=NN)', 'Stringifications'); |
| Akron | 5864cf0 | 2017-08-02 19:38:41 +0200 | [diff] [blame] | 161 | ok($query = $query->identify($index->dict)->optimize($index->segment), 'Optimize'); |
| Akron | 92d9f67 | 2017-08-16 12:23:11 +0200 | [diff] [blame] | 162 | is($query->to_string, "#2", 'Stringifications'); |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 163 | |
| 164 | |
| Akron | 2083bc4 | 2016-11-13 21:07:32 +0100 | [diff] [blame] | 165 | |
| Akron | ded01ae | 2016-11-23 13:43:54 +0100 | [diff] [blame] | 166 | # [first&!third&second&!fourth] |
| 167 | $query = $qb->token( |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 168 | $qb->bool_and( |
| 169 | $qb->bool_and('first', $qb->term_neg('third')), |
| 170 | $qb->bool_and('second', $qb->term_neg('fourth')) |
| Akron | ded01ae | 2016-11-23 13:43:54 +0100 | [diff] [blame] | 171 | ) |
| 172 | ); |
| Akron | c552937 | 2017-06-21 15:56:18 +0200 | [diff] [blame] | 173 | is($query->to_string, '[(!fourth&second)&(!third&first)]', 'Stringifications'); |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 174 | ok($query = $query->normalize, 'Normalize'); |
| Akron | 1a75a52 | 2017-12-11 09:13:30 +0100 | [diff] [blame] | 175 | is($query->to_string, 'excl(matches:first&second,fourth|third)', 'Stringifications'); |
| Akron | 5864cf0 | 2017-08-02 19:38:41 +0200 | [diff] [blame] | 176 | ok($query = $query->identify($index->dict)->optimize($index->segment), 'Optimize'); |
| Akron | 92d9f67 | 2017-08-16 12:23:11 +0200 | [diff] [blame] | 177 | is($query->to_string, "excl(32:constr(pos=32:#4,#2),or(#6,#8))", 'Stringifications'); |
| Akron | 6b19563 | 2017-06-09 23:47:49 +0200 | [diff] [blame] | 178 | |
| Akron | ded01ae | 2016-11-23 13:43:54 +0100 | [diff] [blame] | 179 | # And group with not-founds |
| Akron | 2083bc4 | 2016-11-13 21:07:32 +0100 | [diff] [blame] | 180 | # [first&opennlp/c!=NN&second&third&tt/p!=ADJA] |
| 181 | $query = $qb->token( |
| Akron | b945c57 | 2017-07-23 14:55:00 +0200 | [diff] [blame] | 182 | $qb->bool_and( |
| 183 | $qb->bool_and('first', 'opennlp/c!=NN'), |
| 184 | $qb->bool_and('second', 'tt/p!=ADJA') |
| Akron | 2083bc4 | 2016-11-13 21:07:32 +0100 | [diff] [blame] | 185 | ) |
| 186 | ); |
| 187 | is($query->to_string, '[(first&opennlp/c!=NN)&(second&tt/p!=ADJA)]', 'Stringifications'); |
| Akron | c552937 | 2017-06-21 15:56:18 +0200 | [diff] [blame] | 188 | ok($query = $query->normalize->finalize, 'Normalize'); |
| Akron | 1a75a52 | 2017-12-11 09:13:30 +0100 | [diff] [blame] | 189 | is($query->to_string, 'excl(matches:first&second,opennlp/c=NN|tt/p=ADJA)', 'Stringifications'); |
| Akron | 5864cf0 | 2017-08-02 19:38:41 +0200 | [diff] [blame] | 190 | ok($query = $query->identify($index->dict)->optimize($index->segment), 'Optimize'); |
| Akron | 92d9f67 | 2017-08-16 12:23:11 +0200 | [diff] [blame] | 191 | is($query->to_string, "constr(pos=32:#4,#2)", 'Stringifications'); |
| Akron | 2083bc4 | 2016-11-13 21:07:32 +0100 | [diff] [blame] | 192 | |
| Akron | 06eb4d3 | 2016-11-11 14:05:52 +0100 | [diff] [blame] | 193 | done_testing; |
| Akron | 06eb4d3 | 2016-11-11 14:05:52 +0100 | [diff] [blame] | 194 | __END__ |