blob: 7f67d6b1b02379f79f04edf704f76924ea728c34 [file] [log] [blame]
Akron06eb4d32016-11-11 14:05:52 +01001use Test::More;
Akrone0201942016-11-26 01:11:31 +01002use Test::Krawfish;
Akron06eb4d32016-11-11 14:05:52 +01003use strict;
4use warnings;
Akronb6615af2016-11-21 19:19:41 +01005
Akron06eb4d32016-11-11 14:05:52 +01006
7use_ok('Krawfish::Koral');
8use_ok('Krawfish::Index');
9
Akronb6615af2016-11-21 19:19:41 +010010
Akron06eb4d32016-11-11 14:05:52 +010011my $index = Krawfish::Index->new;
12
Akron56422cf2017-08-16 14:17:01 +020013ok_index($index, [qw/first second third fourth fifth sixth/], 'Add new document');
Akron06eb4d32016-11-11 14:05:52 +010014
15my $koral = Krawfish::Koral->new;
16
17my $qb = $koral->query_builder;
18
19my $query = $qb->token(
Akronb945c572017-07-23 14:55:00 +020020 $qb->bool_and('first', 'second')
Akron06eb4d32016-11-11 14:05:52 +010021);
Akron704ec062017-07-24 15:46:21 +020022is($query->min_span, 1, 'Span length');
23is($query->max_span, 1, 'Span length');
Akron655a10a2017-09-11 14:13:18 +020024ok(!$query->is_anywhere, 'Isn\'t anywhere');
Akron06eb4d32016-11-11 14:05:52 +010025ok(!$query->is_optional, 'Isn\'t optional');
26ok(!$query->is_null, 'Isn\'t null');
27ok(!$query->is_negative, 'Isn\'t negative');
28ok(!$query->is_extended, 'Isn\'t extended');
Akronded01ae2016-11-23 13:43:54 +010029is($query->to_string, '[first&second]', 'Stringification');
Akron704ec062017-07-24 15:46:21 +020030is($query->min_span, 1, 'Span length');
31is($query->max_span, 1, 'Span length');
Akron6b195632017-06-09 23:47:49 +020032ok($query = $query->normalize, 'Normalization');
Akronc5529372017-06-21 15:56:18 +020033is($query->to_string, 'first&second', 'Stringification');
Akron6b195632017-06-09 23:47:49 +020034ok($query = $query->finalize, 'Finalization');
Akronc5529372017-06-21 15:56:18 +020035is($query->to_string, 'first&second', 'Stringification');
Akron6b195632017-06-09 23:47:49 +020036
Akron704ec062017-07-24 15:46:21 +020037
38
Akron6b195632017-06-09 23:47:49 +020039$query = $qb->token(
Akronb945c572017-07-23 14:55:00 +020040 $qb->bool_and('first', 'second','first', 'third')
Akron6b195632017-06-09 23:47:49 +020041);
Akron704ec062017-07-24 15:46:21 +020042is($query->min_span, 1, 'Span length');
43is($query->max_span, 1, 'Span length');
Akronc5529372017-06-21 15:56:18 +020044is($query->to_string, '[first&first&second&third]', 'Stringification');
Akron6b195632017-06-09 23:47:49 +020045ok($query = $query->normalize, 'Normalization');
Akronc5529372017-06-21 15:56:18 +020046is($query->to_string, 'first&second&third', 'Stringification');
Akron6b195632017-06-09 23:47:49 +020047ok($query = $query->finalize, 'Finalization');
Akronc5529372017-06-21 15:56:18 +020048is($query->to_string, 'first&second&third', 'Stringification');
Akron704ec062017-07-24 15:46:21 +020049is($query->min_span, 1, 'Span length');
50is($query->max_span, 1, 'Span length');
Akron6b195632017-06-09 23:47:49 +020051
52$query = $qb->token(
Akronb945c572017-07-23 14:55:00 +020053 $qb->bool_and('first', 'second')
Akron6b195632017-06-09 23:47:49 +020054);
Akron2dcd8bb2017-07-23 10:14:55 +020055# The ordering is alphabetically, with the first in order being treated
56# like the least common operand, which in a constraint query means,
57# it's the second one
Akron5864cf02017-08-02 19:38:41 +020058is($query->normalize->finalize->identify($index->dict)->optimize($index->segment)->to_string,
Akron92d9f672017-08-16 12:23:11 +020059 "constr(pos=32:#4,#2)", 'Planned Stringification');
Akron4de66202016-11-11 14:13:43 +010060
Akron704ec062017-07-24 15:46:21 +020061
Akron4de66202016-11-11 14:13:43 +010062$query = $qb->token(
Akronb945c572017-07-23 14:55:00 +020063 $qb->bool_or('opennlp/c=NP', 'tt/p=NN')
Akron4de66202016-11-11 14:13:43 +010064);
Akron704ec062017-07-24 15:46:21 +020065is($query->min_span, 1, 'Span length');
66is($query->max_span, 1, 'Span length');
Akron655a10a2017-09-11 14:13:18 +020067ok(!$query->is_anywhere, 'Isn\'t anywhere');
Akron4de66202016-11-11 14:13:43 +010068ok(!$query->is_optional, 'Isn\'t optional');
69ok(!$query->is_null, 'Isn\'t null');
70ok(!$query->is_negative, 'Isn\'t negative');
71ok(!$query->is_extended, 'Isn\'t extended');
72is($query->to_string, '[opennlp/c=NP|tt/p=NN]', 'Stringification');
Akron6b195632017-06-09 23:47:49 +020073ok($query = $query->normalize->finalize, 'finalize');
74is($query->to_string,
Akronc5529372017-06-21 15:56:18 +020075 'opennlp/c=NP|tt/p=NN', 'Stringification');
Akron5864cf02017-08-02 19:38:41 +020076ok($query = $query->identify($index->dict)->optimize($index->segment), 'finalize');
Akron6b195632017-06-09 23:47:49 +020077is($query->to_string,
78 '[0]', 'Stringification');
79
Akron704ec062017-07-24 15:46:21 +020080
Akron4de66202016-11-11 14:13:43 +010081$query = $qb->token(
Akronb945c572017-07-23 14:55:00 +020082 $qb->bool_or(
83 $qb->bool_and('first', 'second'),
84 $qb->bool_and('third', 'fourth'),
Akron4de66202016-11-11 14:13:43 +010085 )
86);
87
Akron655a10a2017-09-11 14:13:18 +020088ok(!$query->is_anywhere, 'Isn\'t anywhere');
Akron4de66202016-11-11 14:13:43 +010089ok(!$query->is_optional, 'Isn\'t optional');
90ok(!$query->is_null, 'Isn\'t null');
91ok(!$query->is_negative, 'Isn\'t negative');
92ok(!$query->is_extended, 'Isn\'t extended');
Akron6b195632017-06-09 23:47:49 +020093is($query->to_string, '[(first&second)|(fourth&third)]', 'Stringification');
Akronc5529372017-06-21 15:56:18 +020094ok($query = $query->normalize, 'Normalize');
95is($query->to_string, '(first&second)|(fourth&third)', 'Stringification');
Akron5864cf02017-08-02 19:38:41 +020096ok($query = $query->finalize->identify($index->dict)->optimize($index->segment), 'Normalize');
Akron6b195632017-06-09 23:47:49 +020097is($query->to_string,
Akron92d9f672017-08-16 12:23:11 +020098 "or(constr(pos=32:#4,#2),constr(pos=32:#8,#6))",
Akron5864cf02017-08-02 19:38:41 +020099 'Stringification');
100
Akron7b4e4d92017-09-25 12:18:29 +0200101is($index->dict->term_by_term_id(6), ':third', 'Check mapping');
102is($index->dict->term_by_term_id(8), ':fourth', 'Check mapping');
Akron4de66202016-11-11 14:13:43 +0100103
104$query = $qb->token(
Akronb945c572017-07-23 14:55:00 +0200105 $qb->bool_or(
106 $qb->bool_and('first', 'second'),
107 $qb->bool_and(
Akron4de66202016-11-11 14:13:43 +0100108 'third',
Akronb945c572017-07-23 14:55:00 +0200109 $qb->bool_or('fourth', 'fifth')
Akron4de66202016-11-11 14:13:43 +0100110 ),
111 'sixth'
112 )
113);
114
Akronc5529372017-06-21 15:56:18 +0200115
Akron655a10a2017-09-11 14:13:18 +0200116ok(!$query->is_anywhere, 'Isn\'t anywhere');
Akron4de66202016-11-11 14:13:43 +0100117ok(!$query->is_optional, 'Isn\'t optional');
118ok(!$query->is_null, 'Isn\'t null');
119ok(!$query->is_negative, 'Isn\'t negative');
120ok(!$query->is_extended, 'Isn\'t extended');
Akron6b195632017-06-09 23:47:49 +0200121is($query->to_string, '[((fifth|fourth)&third)|(first&second)|sixth]', 'Stringification');
Akronc5529372017-06-21 15:56:18 +0200122
123ok($query = $query->normalize, 'Normalize');
124is($query->to_string, '((fifth|fourth)&third)|(first&second)|sixth', 'Stringification');
Akron5864cf02017-08-02 19:38:41 +0200125ok($query = $query->identify($index->dict)->optimize($index->segment), 'Optimize');
Akron6b195632017-06-09 23:47:49 +0200126is($query->to_string,
Akron92d9f672017-08-16 12:23:11 +0200127 "or(or(#12,constr(pos=32:#4,#2)),constr(pos=32:or(#10,#8),#6))",
Akron6b195632017-06-09 23:47:49 +0200128 'Stringification');
129
Akronded01ae2016-11-23 13:43:54 +0100130# Group with null
Akron2083bc42016-11-13 21:07:32 +0100131$query = $qb->token(
Akronb945c572017-07-23 14:55:00 +0200132 $qb->bool_and('first', $qb->null)
Akron2083bc42016-11-13 21:07:32 +0100133);
Akronc5529372017-06-21 15:56:18 +0200134is($query->to_string, '[-&first]', 'Stringifications');
Akron6b195632017-06-09 23:47:49 +0200135ok($query = $query->normalize, 'Normalize');
Akronc5529372017-06-21 15:56:18 +0200136is($query->to_string, 'first', 'Stringifications');
Akron5864cf02017-08-02 19:38:41 +0200137ok($query = $query->identify($index->dict)->optimize($index->segment), 'Optimize');
Akron92d9f672017-08-16 12:23:11 +0200138is($query->to_string, "#2", 'Stringifications');
Akron2083bc42016-11-13 21:07:32 +0100139
Akronc5529372017-06-21 15:56:18 +0200140
Akronded01ae2016-11-23 13:43:54 +0100141# Group with negation
142# [first&!second]
143$query = $qb->token(
Akronb945c572017-07-23 14:55:00 +0200144 $qb->bool_and('first', $qb->term_neg('second'))
Akronded01ae2016-11-23 13:43:54 +0100145);
Akronc5529372017-06-21 15:56:18 +0200146is($query->to_string, '[!second&first]', 'Stringifications');
Akron6b195632017-06-09 23:47:49 +0200147ok($query = $query->normalize, 'Normalize');
Akron1a75a522017-12-11 09:13:30 +0100148is($query->to_string, 'excl(matches:first,second)', 'Stringifications');
Akron5864cf02017-08-02 19:38:41 +0200149ok($query = $query->identify($index->dict)->optimize($index->segment), 'Optimize');
Akron92d9f672017-08-16 12:23:11 +0200150is($query->to_string, "excl(32:#2,#4)", 'Stringifications');
Akronded01ae2016-11-23 13:43:54 +0100151
Akronc5529372017-06-21 15:56:18 +0200152
Akronded01ae2016-11-23 13:43:54 +0100153# Group with negation and zero freq
Akron2083bc42016-11-13 21:07:32 +0100154# [first&opennlp/c!=NN]
155$query = $qb->token(
Akronb945c572017-07-23 14:55:00 +0200156 $qb->bool_and('first', 'opennlp/c!=NN')
Akron2083bc42016-11-13 21:07:32 +0100157);
158is($query->to_string, '[first&opennlp/c!=NN]', 'Stringifications');
Akron6b195632017-06-09 23:47:49 +0200159ok($query = $query->normalize, 'Normalize');
Akron1a75a522017-12-11 09:13:30 +0100160is($query->to_string, 'excl(matches:first,opennlp/c=NN)', 'Stringifications');
Akron5864cf02017-08-02 19:38:41 +0200161ok($query = $query->identify($index->dict)->optimize($index->segment), 'Optimize');
Akron92d9f672017-08-16 12:23:11 +0200162is($query->to_string, "#2", 'Stringifications');
Akron6b195632017-06-09 23:47:49 +0200163
164
Akron2083bc42016-11-13 21:07:32 +0100165
Akronded01ae2016-11-23 13:43:54 +0100166# [first&!third&second&!fourth]
167$query = $qb->token(
Akronb945c572017-07-23 14:55:00 +0200168 $qb->bool_and(
169 $qb->bool_and('first', $qb->term_neg('third')),
170 $qb->bool_and('second', $qb->term_neg('fourth'))
Akronded01ae2016-11-23 13:43:54 +0100171 )
172);
Akronc5529372017-06-21 15:56:18 +0200173is($query->to_string, '[(!fourth&second)&(!third&first)]', 'Stringifications');
Akron6b195632017-06-09 23:47:49 +0200174ok($query = $query->normalize, 'Normalize');
Akron1a75a522017-12-11 09:13:30 +0100175is($query->to_string, 'excl(matches:first&second,fourth|third)', 'Stringifications');
Akron5864cf02017-08-02 19:38:41 +0200176ok($query = $query->identify($index->dict)->optimize($index->segment), 'Optimize');
Akron92d9f672017-08-16 12:23:11 +0200177is($query->to_string, "excl(32:constr(pos=32:#4,#2),or(#6,#8))", 'Stringifications');
Akron6b195632017-06-09 23:47:49 +0200178
Akronded01ae2016-11-23 13:43:54 +0100179# And group with not-founds
Akron2083bc42016-11-13 21:07:32 +0100180# [first&opennlp/c!=NN&second&third&tt/p!=ADJA]
181$query = $qb->token(
Akronb945c572017-07-23 14:55:00 +0200182 $qb->bool_and(
183 $qb->bool_and('first', 'opennlp/c!=NN'),
184 $qb->bool_and('second', 'tt/p!=ADJA')
Akron2083bc42016-11-13 21:07:32 +0100185 )
186);
187is($query->to_string, '[(first&opennlp/c!=NN)&(second&tt/p!=ADJA)]', 'Stringifications');
Akronc5529372017-06-21 15:56:18 +0200188ok($query = $query->normalize->finalize, 'Normalize');
Akron1a75a522017-12-11 09:13:30 +0100189is($query->to_string, 'excl(matches:first&second,opennlp/c=NN|tt/p=ADJA)', 'Stringifications');
Akron5864cf02017-08-02 19:38:41 +0200190ok($query = $query->identify($index->dict)->optimize($index->segment), 'Optimize');
Akron92d9f672017-08-16 12:23:11 +0200191is($query->to_string, "constr(pos=32:#4,#2)", 'Stringifications');
Akron2083bc42016-11-13 21:07:32 +0100192
Akron06eb4d32016-11-11 14:05:52 +0100193done_testing;
Akron06eb4d32016-11-11 14:05:52 +0100194__END__