blob: 7890827ff1f1bf6382dc2eaf47b34370ff132174 [file] [log] [blame]
Akrone1cc43e2017-12-08 19:00:04 +01001use Test::More;
2use strict;
3use warnings;
4use Mojo::JSON qw/encode_json decode_json/;
5use Mojo::File;
6use Data::Dumper;
7
Akron3feb4d82017-12-12 19:33:46 +01008use_ok('Krawfish::Koral::Query::Builder');
Akrone1cc43e2017-12-08 19:00:04 +01009
Akron3feb4d82017-12-12 19:33:46 +010010ok(my $qb = Krawfish::Koral::Query::Builder->new, 'New importer');
Akrone1cc43e2017-12-08 19:00:04 +010011
Akron02203f12017-12-09 13:55:34 +010012
Akron142b7342017-12-10 17:40:30 +010013# Check serialization and deserialization match
Akron02203f12017-12-09 13:55:34 +010014sub serialize_deserialize_ok {
15 my $query = shift;
16 my $serialized = $query->to_string;
17 unless ($serialized) {
18 fail('Query not serializable');
19 };
20 my $fragment = $query->to_koral_fragment;
21 unless ($fragment) {
22 fail('Fragment not generated');
23 };
Akron3feb4d82017-12-12 19:33:46 +010024 my $deserialized = $qb->from_koral($fragment);
Akron02203f12017-12-09 13:55:34 +010025 unless ($deserialized) {
26 fail('Fragment not deserializable');
27 };
28 is($deserialized->to_string, $serialized, 'Serialization is equal');
29};
30
31
Akrone3fd4242017-12-11 08:24:26 +010032# group:sequence, token, term, group:class(nr)
Akron3feb4d82017-12-12 19:33:46 +010033ok(my $query = $qb->from_koral(
Akrone1cc43e2017-12-08 19:00:04 +010034 {
35 '@type' => 'koral:group',
36 'operation' => 'operation:sequence',
37 'operands' => [
38 {
39 '@type' => 'koral:token',
40 'wrap' => {
41 '@type' => 'koral:term',
42 'foundry' => 'tt',
43 'key' => 'NN',
44 'layer' => 'p',
45 'match' => 'match:eq'
46 }
47 },{
48 '@type' => 'koral:group',
49 'classOut' => 2,
50 'operation' => 'operation:class',
51 'operands' => [
52 {
53 '@type' => 'koral:token',
54 'wrap' => {
55 '@type' => 'koral:term',
56 'foundry' => 'tt',
57 'key' => 'NN',
58 'layer' => 'p',
59 'match' => 'match:ne'
60 }
61 }
62 ]
63 }
64 ]
65 }
66), 'Import Sequence, Token, Term, Class');
67
Akron02203f12017-12-09 13:55:34 +010068is($query->to_string, '[tt/p=NN]{2:[tt/p!=NN]}', 'Stringification');
69serialize_deserialize_ok($query);
Akrone1cc43e2017-12-08 19:00:04 +010070
Akrone3fd4242017-12-11 08:24:26 +010071# group:repetition, span
Akron3feb4d82017-12-12 19:33:46 +010072ok($query = $qb->from_koral({
Akrone1cc43e2017-12-08 19:00:04 +010073 '@type' => 'koral:group',
74 'operation' => 'operation:repetition',
75 'boundary' => {
76 '@type' => 'koral:boundary',
77 min => 2,
78 max => 3
79 },
80 'operands' => [
81 {
Akron02203f12017-12-09 13:55:34 +010082 '@type' => 'koral:span',
Akrone1cc43e2017-12-08 19:00:04 +010083 'wrap' => {
84 '@type' => 'koral:term',
Akron02203f12017-12-09 13:55:34 +010085 'foundry' => 'cnx',
86 'key' => 'NP',
87 'layer' => 'c'
Akrone1cc43e2017-12-08 19:00:04 +010088 }
89 }
90 ]
Akron02203f12017-12-09 13:55:34 +010091}), 'Import Repetition, Span, Term');
Akrone1cc43e2017-12-08 19:00:04 +010092
Akron02203f12017-12-09 13:55:34 +010093is($query->to_string, '<cnx/c=NP>{2,3}', 'Stringification');
94serialize_deserialize_ok($query);
Akrone1cc43e2017-12-08 19:00:04 +010095
Akron3feb4d82017-12-12 19:33:46 +010096
Akrone3fd4242017-12-11 08:24:26 +010097# group:length, termgroup, group:class(no nr)
Akron3feb4d82017-12-12 19:33:46 +010098ok($query = $qb->from_koral({
Akron142b7342017-12-10 17:40:30 +010099 '@type' => 'koral:group',
Akrone3fd4242017-12-11 08:24:26 +0100100 'operation' => 'operation:length',
Akron142b7342017-12-10 17:40:30 +0100101 'boundary' => {
102 '@type' => 'koral:boundary',
103 min => 2,
104 max => 3
105 },
106 'operands' => [
107 {
Akrone3fd4242017-12-11 08:24:26 +0100108 '@type' => 'koral:group',
109 'operation' => 'operation:class',
110 'operands' => [
111 {
112 '@type' => 'koral:token',
113 'wrap' => {
Akron142b7342017-12-10 17:40:30 +0100114 '@type' => 'koral:termGroup',
Akrone3fd4242017-12-11 08:24:26 +0100115 'operation' => 'operation:and',
Akron142b7342017-12-10 17:40:30 +0100116 'operands' => [
117 {
118 '@type' => 'koral:term',
Akrone3fd4242017-12-11 08:24:26 +0100119 'foundry' => 'cnx',
120 'key' => 'NP',
121 'layer' => 'c'
Akron142b7342017-12-10 17:40:30 +0100122 },
123 {
124 '@type' => 'koral:term',
Akrone3fd4242017-12-11 08:24:26 +0100125 'foundry' => 'cnx',
126 'key' => 'VP',
127 'layer' => 'c'
128 },
129 {
130 '@type' => 'koral:termGroup',
131 'operation' => 'operation:or',
132 'operands' => [
133 {
134 '@type' => 'koral:term',
135 'foundry' => 'tt',
136 'key' => 'NN',
137 'layer' => 'p'
138 },
139 {
140 '@type' => 'koral:term',
141 'foundry' => 'opennlp',
142 'key' => 'NN',
143 'layer' => 'p'
144 }
145 ]
Akron142b7342017-12-10 17:40:30 +0100146 }
147 ]
148 }
Akrone3fd4242017-12-11 08:24:26 +0100149 }
150 ]
Akron142b7342017-12-10 17:40:30 +0100151 }
152 ]
153}), 'Import Repetition, Span, Term');
Akrone1cc43e2017-12-08 19:00:04 +0100154
155
Akron142b7342017-12-10 17:40:30 +0100156is($query->to_string,
Akrone3fd4242017-12-11 08:24:26 +0100157 'length(2-3:{1:[cnx/c=NP&cnx/c=VP&(opennlp/p=NN|tt/p=NN)]})',
Akron142b7342017-12-10 17:40:30 +0100158 'Stringification');
159
160serialize_deserialize_ok($query);
161
Akron1a75a522017-12-11 09:13:30 +0100162
163# group:exclusion
Akron3feb4d82017-12-12 19:33:46 +0100164ok($query = $qb->from_koral({
Akron1a75a522017-12-11 09:13:30 +0100165 '@type' => 'koral:group',
166 'operation' => 'operation:exclusion',
167 'frame' => [
168 'frames:overlapsRight',
169 'frames:endsWith',
170 'frames:isAround',
171 'frames:overlapsLeft',
172 'frames:startsWith',
173 'frames:matches'
174 ],
175 'operands' => [
176 {
177 '@type' => 'koral:span',
178 wrap => {
179 '@type' => 'koral:term',
180 'foundry' => 'cnx',
181 'key' => 'VP',
182 'layer' => 'c'
183 },
184 },
185 {
186 '@type' => 'koral:token',
187 'wrap' => {
188 '@type' => 'koral:term',
189 'foundry' => 'tt',
190 'key' => 'V',
191 'layer' => 'p'
192 },
193 }
194 ]
195}), 'Import Repetition, Span, Term');
196
197
198is($query->to_string,
199 'excl(endsWith;isAround;matches;startsWith:<cnx/c=VP>,[tt/p=V])',
200 'Stringification');
201
202serialize_deserialize_ok($query);
Akron142b7342017-12-10 17:40:30 +0100203
204
Akrone57ddc52017-12-11 11:33:03 +0100205
206# group:position, group:disjunction/or
Akron3feb4d82017-12-12 19:33:46 +0100207ok($query = $qb->from_koral({
Akrone57ddc52017-12-11 11:33:03 +0100208 '@type' => 'koral:group',
209 'operation' => 'operation:position',
210 'frames' => [
211 'frames:overlapsRight',
212 'frames:endsWith',
213 'frames:isAround',
214 'frames:overlapsLeft',
215 'frames:startsWith',
216 'frames:matches'
217 ],
218 'operands' => [
219 {
220 '@type' => 'koral:span',
221 wrap => {
222 '@type' => 'koral:term',
223 'foundry' => 'cnx',
224 'key' => 'VP',
225 'layer' => 'c'
226 },
227 },
228 {
229 '@type' => 'koral:group',
230 'operation' => 'operation:disjunction',
231 operands => [
232 {
233 '@type' => 'koral:token',
234 'wrap' => {
235 '@type' => 'koral:term',
236 'foundry' => 'tt',
237 'key' => 'V',
238 'layer' => 'p'
239 },
240 },
241 {
242 '@type' => 'koral:token',
243 'wrap' => {
244 '@type' => 'koral:term',
245 'foundry' => 'opennlp',
246 'key' => 'V',
247 'layer' => 'p'
248 },
249 }
250 ]
251 }
252 ]
253}), 'Import Repetition, Span, Term');
254
255is($query->to_string,
256 'constr(pos=endsWith;isAround;matches;overlapsLeft;overlapsRight;startsWith:'.
257 '<cnx/c=VP>,([opennlp/p=V])|([tt/p=V]))',
258 'Stringification');
259
260serialize_deserialize_ok($query);
261
262
Akron581e8932017-12-11 14:25:03 +0100263
264# group:position, group:disjunction/or
Akron3feb4d82017-12-12 19:33:46 +0100265ok($query = $qb->from_koral({
Akron581e8932017-12-11 14:25:03 +0100266 '@type' => 'koral:group',
267 'operation' => 'operation:constraint',
268 'constraints' => [
269 {
270 '@type' => 'constraint:position',
271 frames => ['frames:matches']
272 },
273 {
274 '@type' => 'constraint:classBetween',
275 'classOut' => 5
276 },
277 {
278 '@type' => 'constraint:notBetween',
279 'wrap' => {
280 '@type' => 'koral:span',
281 wrap => {
282 '@type' => 'koral:term',
283 foundry => 'corenlp',
284 layer => 'p',
285 key => 'V'
286 }
287 }
288 },
289 {
290 '@type' => 'constraint:inBetween',
291 'boundary' => {
292 '@type' => 'koral:boundary',
293 'min' => 3,
294 'max' => 7
295 }
296 }
297
298 ],
299 'operands' => [{
300 '@type' => 'koral:token',
301 'wrap' => {
302 '@type' => 'koral:term',
303 'foundry' => 'tt',
304 'key' => 'V',
305 'layer' => 'p'
306 },
307 },{
308 '@type' => 'koral:token',
309 'wrap' => {
310 '@type' => 'koral:term',
311 'foundry' => 'opennlp',
312 'key' => 'V',
313 'layer' => 'p'
314 },
315 }
316 ]
317}), 'Import Repetition, Span, Term');
318
319
320is($query->to_string,
321 'constr(pos=matches,class=5,notBetween=<corenlp/p=V>,between=3-7:[tt/p=V],[opennlp/p=V])',
322 'Stringification');
323
324serialize_deserialize_ok($query);
325
326
Akron2947aeb2017-12-11 16:34:25 +0100327
328# nowhere
Akron3feb4d82017-12-12 19:33:46 +0100329ok($query = $qb->from_koral({
Akron2947aeb2017-12-11 16:34:25 +0100330 '@type' => 'koral:nowhere'
331}), 'Nowhere');
332
333is($query->to_string,
334 '[0]',
335 'Stringification');
336
337serialize_deserialize_ok($query);
338
339# Term ID
Akron3feb4d82017-12-12 19:33:46 +0100340ok($query = $qb->from_koral({
Akron2947aeb2017-12-11 16:34:25 +0100341 '@type' => 'koral:token',
342 'wrap' => {
343 '@type' => 'koral:term',
Akron3feb4d82017-12-12 19:33:46 +0100344 '@id' => 'term:15'
Akron2947aeb2017-12-11 16:34:25 +0100345 }
346}), 'Term identifier');
347
348is($query->to_string,
349 '[#15]',
350 'Stringification');
351
352serialize_deserialize_ok($query);
353
354
355
356# Unique
Akron3feb4d82017-12-12 19:33:46 +0100357ok($query = $qb->from_koral({
Akron2947aeb2017-12-11 16:34:25 +0100358 '@type' => 'koral:group',
359 'operation' => 'operation:unique',
360 'operands' => [
361 {
362 '@type' => 'koral:token',
363 'wrap' => {
364 '@type' => 'koral:term',
365 'foundry' => 'opennlp',
366 'key' => 'V',
367 'layer' => 'p'
368 },
369 }
370 ]
371}), 'Term identifier');
372
373is($query->to_string,
374 'unique([opennlp/p=V])',
375 'Stringification');
376
377serialize_deserialize_ok($query);
378
379
380
Akron5e2351c2017-12-12 18:11:08 +0100381# match
Akron3feb4d82017-12-12 19:33:46 +0100382ok($query = $qb->from_koral({
Akron5e2351c2017-12-12 18:11:08 +0100383 '@type' => 'koral:match',
384 '@id' => 'match:doc-1/p0-1_h(1)1-2_h(2)1-2_c5_c8'
385}), 'Import Repetition, Span, Term');
386
387is($query->to_string,
388 '[[id=doc-1:0-1!5,8$0,1,1,2|0,2,1,2]]',
389 'Stringification');
390
391serialize_deserialize_ok($query);
392
Akron2947aeb2017-12-11 16:34:25 +0100393
Akron3feb4d82017-12-12 19:33:46 +0100394
Akron142b7342017-12-10 17:40:30 +0100395diag 'Test deserialization failures';
396# E.g.
397# - span without wrap
398# - termGroups
399# - without operation
400# - without operation but relation
401# - without operands
402# - with a single operand
Akron1a75a522017-12-11 09:13:30 +0100403# - opreration:position & operation:exclusion
404# - frames are not lists
405# - operands != 2
Akron142b7342017-12-10 17:40:30 +0100406
Akrone1cc43e2017-12-08 19:00:04 +0100407done_testing;
408
409__END__