| Akron | e1cc43e | 2017-12-08 19:00:04 +0100 | [diff] [blame] | 1 | use Test::More; |
| 2 | use strict; |
| 3 | use warnings; |
| 4 | use Mojo::JSON qw/encode_json decode_json/; |
| 5 | use Mojo::File; |
| 6 | use Data::Dumper; |
| 7 | |
| Akron | 3feb4d8 | 2017-12-12 19:33:46 +0100 | [diff] [blame] | 8 | use_ok('Krawfish::Koral::Query::Builder'); |
| Akron | e1cc43e | 2017-12-08 19:00:04 +0100 | [diff] [blame] | 9 | |
| Akron | 3feb4d8 | 2017-12-12 19:33:46 +0100 | [diff] [blame] | 10 | ok(my $qb = Krawfish::Koral::Query::Builder->new, 'New importer'); |
| Akron | e1cc43e | 2017-12-08 19:00:04 +0100 | [diff] [blame] | 11 | |
| Akron | 02203f1 | 2017-12-09 13:55:34 +0100 | [diff] [blame] | 12 | |
| Akron | 142b734 | 2017-12-10 17:40:30 +0100 | [diff] [blame] | 13 | # Check serialization and deserialization match |
| Akron | 02203f1 | 2017-12-09 13:55:34 +0100 | [diff] [blame] | 14 | sub serialize_deserialize_ok { |
| 15 | my $query = shift; |
| 16 | my $serialized = $query->to_string; |
| 17 | unless ($serialized) { |
| 18 | fail('Query not serializable'); |
| 19 | }; |
| 20 | my $fragment = $query->to_koral_fragment; |
| 21 | unless ($fragment) { |
| 22 | fail('Fragment not generated'); |
| 23 | }; |
| Akron | 3feb4d8 | 2017-12-12 19:33:46 +0100 | [diff] [blame] | 24 | my $deserialized = $qb->from_koral($fragment); |
| Akron | 02203f1 | 2017-12-09 13:55:34 +0100 | [diff] [blame] | 25 | unless ($deserialized) { |
| 26 | fail('Fragment not deserializable'); |
| 27 | }; |
| 28 | is($deserialized->to_string, $serialized, 'Serialization is equal'); |
| 29 | }; |
| 30 | |
| 31 | |
| Akron | e3fd424 | 2017-12-11 08:24:26 +0100 | [diff] [blame] | 32 | # group:sequence, token, term, group:class(nr) |
| Akron | 3feb4d8 | 2017-12-12 19:33:46 +0100 | [diff] [blame] | 33 | ok(my $query = $qb->from_koral( |
| Akron | e1cc43e | 2017-12-08 19:00:04 +0100 | [diff] [blame] | 34 | { |
| 35 | '@type' => 'koral:group', |
| 36 | 'operation' => 'operation:sequence', |
| 37 | 'operands' => [ |
| 38 | { |
| 39 | '@type' => 'koral:token', |
| 40 | 'wrap' => { |
| 41 | '@type' => 'koral:term', |
| 42 | 'foundry' => 'tt', |
| 43 | 'key' => 'NN', |
| 44 | 'layer' => 'p', |
| 45 | 'match' => 'match:eq' |
| 46 | } |
| 47 | },{ |
| 48 | '@type' => 'koral:group', |
| 49 | 'classOut' => 2, |
| 50 | 'operation' => 'operation:class', |
| 51 | 'operands' => [ |
| 52 | { |
| 53 | '@type' => 'koral:token', |
| 54 | 'wrap' => { |
| 55 | '@type' => 'koral:term', |
| 56 | 'foundry' => 'tt', |
| 57 | 'key' => 'NN', |
| 58 | 'layer' => 'p', |
| 59 | 'match' => 'match:ne' |
| 60 | } |
| 61 | } |
| 62 | ] |
| 63 | } |
| 64 | ] |
| 65 | } |
| 66 | ), 'Import Sequence, Token, Term, Class'); |
| 67 | |
| Akron | 02203f1 | 2017-12-09 13:55:34 +0100 | [diff] [blame] | 68 | is($query->to_string, '[tt/p=NN]{2:[tt/p!=NN]}', 'Stringification'); |
| 69 | serialize_deserialize_ok($query); |
| Akron | e1cc43e | 2017-12-08 19:00:04 +0100 | [diff] [blame] | 70 | |
| Akron | e3fd424 | 2017-12-11 08:24:26 +0100 | [diff] [blame] | 71 | # group:repetition, span |
| Akron | 3feb4d8 | 2017-12-12 19:33:46 +0100 | [diff] [blame] | 72 | ok($query = $qb->from_koral({ |
| Akron | e1cc43e | 2017-12-08 19:00:04 +0100 | [diff] [blame] | 73 | '@type' => 'koral:group', |
| 74 | 'operation' => 'operation:repetition', |
| 75 | 'boundary' => { |
| 76 | '@type' => 'koral:boundary', |
| 77 | min => 2, |
| 78 | max => 3 |
| 79 | }, |
| 80 | 'operands' => [ |
| 81 | { |
| Akron | 02203f1 | 2017-12-09 13:55:34 +0100 | [diff] [blame] | 82 | '@type' => 'koral:span', |
| Akron | e1cc43e | 2017-12-08 19:00:04 +0100 | [diff] [blame] | 83 | 'wrap' => { |
| 84 | '@type' => 'koral:term', |
| Akron | 02203f1 | 2017-12-09 13:55:34 +0100 | [diff] [blame] | 85 | 'foundry' => 'cnx', |
| 86 | 'key' => 'NP', |
| 87 | 'layer' => 'c' |
| Akron | e1cc43e | 2017-12-08 19:00:04 +0100 | [diff] [blame] | 88 | } |
| 89 | } |
| 90 | ] |
| Akron | 02203f1 | 2017-12-09 13:55:34 +0100 | [diff] [blame] | 91 | }), 'Import Repetition, Span, Term'); |
| Akron | e1cc43e | 2017-12-08 19:00:04 +0100 | [diff] [blame] | 92 | |
| Akron | 02203f1 | 2017-12-09 13:55:34 +0100 | [diff] [blame] | 93 | is($query->to_string, '<cnx/c=NP>{2,3}', 'Stringification'); |
| 94 | serialize_deserialize_ok($query); |
| Akron | e1cc43e | 2017-12-08 19:00:04 +0100 | [diff] [blame] | 95 | |
| Akron | 3feb4d8 | 2017-12-12 19:33:46 +0100 | [diff] [blame] | 96 | |
| Akron | e3fd424 | 2017-12-11 08:24:26 +0100 | [diff] [blame] | 97 | # group:length, termgroup, group:class(no nr) |
| Akron | 3feb4d8 | 2017-12-12 19:33:46 +0100 | [diff] [blame] | 98 | ok($query = $qb->from_koral({ |
| Akron | 142b734 | 2017-12-10 17:40:30 +0100 | [diff] [blame] | 99 | '@type' => 'koral:group', |
| Akron | e3fd424 | 2017-12-11 08:24:26 +0100 | [diff] [blame] | 100 | 'operation' => 'operation:length', |
| Akron | 142b734 | 2017-12-10 17:40:30 +0100 | [diff] [blame] | 101 | 'boundary' => { |
| 102 | '@type' => 'koral:boundary', |
| 103 | min => 2, |
| 104 | max => 3 |
| 105 | }, |
| 106 | 'operands' => [ |
| 107 | { |
| Akron | e3fd424 | 2017-12-11 08:24:26 +0100 | [diff] [blame] | 108 | '@type' => 'koral:group', |
| 109 | 'operation' => 'operation:class', |
| 110 | 'operands' => [ |
| 111 | { |
| 112 | '@type' => 'koral:token', |
| 113 | 'wrap' => { |
| Akron | 142b734 | 2017-12-10 17:40:30 +0100 | [diff] [blame] | 114 | '@type' => 'koral:termGroup', |
| Akron | e3fd424 | 2017-12-11 08:24:26 +0100 | [diff] [blame] | 115 | 'operation' => 'operation:and', |
| Akron | 142b734 | 2017-12-10 17:40:30 +0100 | [diff] [blame] | 116 | 'operands' => [ |
| 117 | { |
| 118 | '@type' => 'koral:term', |
| Akron | e3fd424 | 2017-12-11 08:24:26 +0100 | [diff] [blame] | 119 | 'foundry' => 'cnx', |
| 120 | 'key' => 'NP', |
| 121 | 'layer' => 'c' |
| Akron | 142b734 | 2017-12-10 17:40:30 +0100 | [diff] [blame] | 122 | }, |
| 123 | { |
| 124 | '@type' => 'koral:term', |
| Akron | e3fd424 | 2017-12-11 08:24:26 +0100 | [diff] [blame] | 125 | 'foundry' => 'cnx', |
| 126 | 'key' => 'VP', |
| 127 | 'layer' => 'c' |
| 128 | }, |
| 129 | { |
| 130 | '@type' => 'koral:termGroup', |
| 131 | 'operation' => 'operation:or', |
| 132 | 'operands' => [ |
| 133 | { |
| 134 | '@type' => 'koral:term', |
| 135 | 'foundry' => 'tt', |
| 136 | 'key' => 'NN', |
| 137 | 'layer' => 'p' |
| 138 | }, |
| 139 | { |
| 140 | '@type' => 'koral:term', |
| 141 | 'foundry' => 'opennlp', |
| 142 | 'key' => 'NN', |
| 143 | 'layer' => 'p' |
| 144 | } |
| 145 | ] |
| Akron | 142b734 | 2017-12-10 17:40:30 +0100 | [diff] [blame] | 146 | } |
| 147 | ] |
| 148 | } |
| Akron | e3fd424 | 2017-12-11 08:24:26 +0100 | [diff] [blame] | 149 | } |
| 150 | ] |
| Akron | 142b734 | 2017-12-10 17:40:30 +0100 | [diff] [blame] | 151 | } |
| 152 | ] |
| 153 | }), 'Import Repetition, Span, Term'); |
| Akron | e1cc43e | 2017-12-08 19:00:04 +0100 | [diff] [blame] | 154 | |
| 155 | |
| Akron | 142b734 | 2017-12-10 17:40:30 +0100 | [diff] [blame] | 156 | is($query->to_string, |
| Akron | e3fd424 | 2017-12-11 08:24:26 +0100 | [diff] [blame] | 157 | 'length(2-3:{1:[cnx/c=NP&cnx/c=VP&(opennlp/p=NN|tt/p=NN)]})', |
| Akron | 142b734 | 2017-12-10 17:40:30 +0100 | [diff] [blame] | 158 | 'Stringification'); |
| 159 | |
| 160 | serialize_deserialize_ok($query); |
| 161 | |
| Akron | 1a75a52 | 2017-12-11 09:13:30 +0100 | [diff] [blame] | 162 | |
| 163 | # group:exclusion |
| Akron | 3feb4d8 | 2017-12-12 19:33:46 +0100 | [diff] [blame] | 164 | ok($query = $qb->from_koral({ |
| Akron | 1a75a52 | 2017-12-11 09:13:30 +0100 | [diff] [blame] | 165 | '@type' => 'koral:group', |
| 166 | 'operation' => 'operation:exclusion', |
| 167 | 'frame' => [ |
| 168 | 'frames:overlapsRight', |
| 169 | 'frames:endsWith', |
| 170 | 'frames:isAround', |
| 171 | 'frames:overlapsLeft', |
| 172 | 'frames:startsWith', |
| 173 | 'frames:matches' |
| 174 | ], |
| 175 | 'operands' => [ |
| 176 | { |
| 177 | '@type' => 'koral:span', |
| 178 | wrap => { |
| 179 | '@type' => 'koral:term', |
| 180 | 'foundry' => 'cnx', |
| 181 | 'key' => 'VP', |
| 182 | 'layer' => 'c' |
| 183 | }, |
| 184 | }, |
| 185 | { |
| 186 | '@type' => 'koral:token', |
| 187 | 'wrap' => { |
| 188 | '@type' => 'koral:term', |
| 189 | 'foundry' => 'tt', |
| 190 | 'key' => 'V', |
| 191 | 'layer' => 'p' |
| 192 | }, |
| 193 | } |
| 194 | ] |
| 195 | }), 'Import Repetition, Span, Term'); |
| 196 | |
| 197 | |
| 198 | is($query->to_string, |
| 199 | 'excl(endsWith;isAround;matches;startsWith:<cnx/c=VP>,[tt/p=V])', |
| 200 | 'Stringification'); |
| 201 | |
| 202 | serialize_deserialize_ok($query); |
| Akron | 142b734 | 2017-12-10 17:40:30 +0100 | [diff] [blame] | 203 | |
| 204 | |
| Akron | e57ddc5 | 2017-12-11 11:33:03 +0100 | [diff] [blame] | 205 | |
| 206 | # group:position, group:disjunction/or |
| Akron | 3feb4d8 | 2017-12-12 19:33:46 +0100 | [diff] [blame] | 207 | ok($query = $qb->from_koral({ |
| Akron | e57ddc5 | 2017-12-11 11:33:03 +0100 | [diff] [blame] | 208 | '@type' => 'koral:group', |
| 209 | 'operation' => 'operation:position', |
| 210 | 'frames' => [ |
| 211 | 'frames:overlapsRight', |
| 212 | 'frames:endsWith', |
| 213 | 'frames:isAround', |
| 214 | 'frames:overlapsLeft', |
| 215 | 'frames:startsWith', |
| 216 | 'frames:matches' |
| 217 | ], |
| 218 | 'operands' => [ |
| 219 | { |
| 220 | '@type' => 'koral:span', |
| 221 | wrap => { |
| 222 | '@type' => 'koral:term', |
| 223 | 'foundry' => 'cnx', |
| 224 | 'key' => 'VP', |
| 225 | 'layer' => 'c' |
| 226 | }, |
| 227 | }, |
| 228 | { |
| 229 | '@type' => 'koral:group', |
| 230 | 'operation' => 'operation:disjunction', |
| 231 | operands => [ |
| 232 | { |
| 233 | '@type' => 'koral:token', |
| 234 | 'wrap' => { |
| 235 | '@type' => 'koral:term', |
| 236 | 'foundry' => 'tt', |
| 237 | 'key' => 'V', |
| 238 | 'layer' => 'p' |
| 239 | }, |
| 240 | }, |
| 241 | { |
| 242 | '@type' => 'koral:token', |
| 243 | 'wrap' => { |
| 244 | '@type' => 'koral:term', |
| 245 | 'foundry' => 'opennlp', |
| 246 | 'key' => 'V', |
| 247 | 'layer' => 'p' |
| 248 | }, |
| 249 | } |
| 250 | ] |
| 251 | } |
| 252 | ] |
| 253 | }), 'Import Repetition, Span, Term'); |
| 254 | |
| 255 | is($query->to_string, |
| 256 | 'constr(pos=endsWith;isAround;matches;overlapsLeft;overlapsRight;startsWith:'. |
| 257 | '<cnx/c=VP>,([opennlp/p=V])|([tt/p=V]))', |
| 258 | 'Stringification'); |
| 259 | |
| 260 | serialize_deserialize_ok($query); |
| 261 | |
| 262 | |
| Akron | 581e893 | 2017-12-11 14:25:03 +0100 | [diff] [blame] | 263 | |
| 264 | # group:position, group:disjunction/or |
| Akron | 3feb4d8 | 2017-12-12 19:33:46 +0100 | [diff] [blame] | 265 | ok($query = $qb->from_koral({ |
| Akron | 581e893 | 2017-12-11 14:25:03 +0100 | [diff] [blame] | 266 | '@type' => 'koral:group', |
| 267 | 'operation' => 'operation:constraint', |
| 268 | 'constraints' => [ |
| 269 | { |
| 270 | '@type' => 'constraint:position', |
| 271 | frames => ['frames:matches'] |
| 272 | }, |
| 273 | { |
| 274 | '@type' => 'constraint:classBetween', |
| 275 | 'classOut' => 5 |
| 276 | }, |
| 277 | { |
| 278 | '@type' => 'constraint:notBetween', |
| 279 | 'wrap' => { |
| 280 | '@type' => 'koral:span', |
| 281 | wrap => { |
| 282 | '@type' => 'koral:term', |
| 283 | foundry => 'corenlp', |
| 284 | layer => 'p', |
| 285 | key => 'V' |
| 286 | } |
| 287 | } |
| 288 | }, |
| 289 | { |
| 290 | '@type' => 'constraint:inBetween', |
| 291 | 'boundary' => { |
| 292 | '@type' => 'koral:boundary', |
| 293 | 'min' => 3, |
| 294 | 'max' => 7 |
| 295 | } |
| 296 | } |
| 297 | |
| 298 | ], |
| 299 | 'operands' => [{ |
| 300 | '@type' => 'koral:token', |
| 301 | 'wrap' => { |
| 302 | '@type' => 'koral:term', |
| 303 | 'foundry' => 'tt', |
| 304 | 'key' => 'V', |
| 305 | 'layer' => 'p' |
| 306 | }, |
| 307 | },{ |
| 308 | '@type' => 'koral:token', |
| 309 | 'wrap' => { |
| 310 | '@type' => 'koral:term', |
| 311 | 'foundry' => 'opennlp', |
| 312 | 'key' => 'V', |
| 313 | 'layer' => 'p' |
| 314 | }, |
| 315 | } |
| 316 | ] |
| 317 | }), 'Import Repetition, Span, Term'); |
| 318 | |
| 319 | |
| 320 | is($query->to_string, |
| 321 | 'constr(pos=matches,class=5,notBetween=<corenlp/p=V>,between=3-7:[tt/p=V],[opennlp/p=V])', |
| 322 | 'Stringification'); |
| 323 | |
| 324 | serialize_deserialize_ok($query); |
| 325 | |
| 326 | |
| Akron | 2947aeb | 2017-12-11 16:34:25 +0100 | [diff] [blame] | 327 | |
| 328 | # nowhere |
| Akron | 3feb4d8 | 2017-12-12 19:33:46 +0100 | [diff] [blame] | 329 | ok($query = $qb->from_koral({ |
| Akron | 2947aeb | 2017-12-11 16:34:25 +0100 | [diff] [blame] | 330 | '@type' => 'koral:nowhere' |
| 331 | }), 'Nowhere'); |
| 332 | |
| 333 | is($query->to_string, |
| 334 | '[0]', |
| 335 | 'Stringification'); |
| 336 | |
| 337 | serialize_deserialize_ok($query); |
| 338 | |
| 339 | # Term ID |
| Akron | 3feb4d8 | 2017-12-12 19:33:46 +0100 | [diff] [blame] | 340 | ok($query = $qb->from_koral({ |
| Akron | 2947aeb | 2017-12-11 16:34:25 +0100 | [diff] [blame] | 341 | '@type' => 'koral:token', |
| 342 | 'wrap' => { |
| 343 | '@type' => 'koral:term', |
| Akron | 3feb4d8 | 2017-12-12 19:33:46 +0100 | [diff] [blame] | 344 | '@id' => 'term:15' |
| Akron | 2947aeb | 2017-12-11 16:34:25 +0100 | [diff] [blame] | 345 | } |
| 346 | }), 'Term identifier'); |
| 347 | |
| 348 | is($query->to_string, |
| 349 | '[#15]', |
| 350 | 'Stringification'); |
| 351 | |
| 352 | serialize_deserialize_ok($query); |
| 353 | |
| 354 | |
| 355 | |
| 356 | # Unique |
| Akron | 3feb4d8 | 2017-12-12 19:33:46 +0100 | [diff] [blame] | 357 | ok($query = $qb->from_koral({ |
| Akron | 2947aeb | 2017-12-11 16:34:25 +0100 | [diff] [blame] | 358 | '@type' => 'koral:group', |
| 359 | 'operation' => 'operation:unique', |
| 360 | 'operands' => [ |
| 361 | { |
| 362 | '@type' => 'koral:token', |
| 363 | 'wrap' => { |
| 364 | '@type' => 'koral:term', |
| 365 | 'foundry' => 'opennlp', |
| 366 | 'key' => 'V', |
| 367 | 'layer' => 'p' |
| 368 | }, |
| 369 | } |
| 370 | ] |
| 371 | }), 'Term identifier'); |
| 372 | |
| 373 | is($query->to_string, |
| 374 | 'unique([opennlp/p=V])', |
| 375 | 'Stringification'); |
| 376 | |
| 377 | serialize_deserialize_ok($query); |
| 378 | |
| 379 | |
| 380 | |
| Akron | 5e2351c | 2017-12-12 18:11:08 +0100 | [diff] [blame] | 381 | # match |
| Akron | 3feb4d8 | 2017-12-12 19:33:46 +0100 | [diff] [blame] | 382 | ok($query = $qb->from_koral({ |
| Akron | 5e2351c | 2017-12-12 18:11:08 +0100 | [diff] [blame] | 383 | '@type' => 'koral:match', |
| 384 | '@id' => 'match:doc-1/p0-1_h(1)1-2_h(2)1-2_c5_c8' |
| 385 | }), 'Import Repetition, Span, Term'); |
| 386 | |
| 387 | is($query->to_string, |
| 388 | '[[id=doc-1:0-1!5,8$0,1,1,2|0,2,1,2]]', |
| 389 | 'Stringification'); |
| 390 | |
| 391 | serialize_deserialize_ok($query); |
| 392 | |
| Akron | 2947aeb | 2017-12-11 16:34:25 +0100 | [diff] [blame] | 393 | |
| Akron | 3feb4d8 | 2017-12-12 19:33:46 +0100 | [diff] [blame] | 394 | |
| Akron | 142b734 | 2017-12-10 17:40:30 +0100 | [diff] [blame] | 395 | diag 'Test deserialization failures'; |
| 396 | # E.g. |
| 397 | # - span without wrap |
| 398 | # - termGroups |
| 399 | # - without operation |
| 400 | # - without operation but relation |
| 401 | # - without operands |
| 402 | # - with a single operand |
| Akron | 1a75a52 | 2017-12-11 09:13:30 +0100 | [diff] [blame] | 403 | # - opreration:position & operation:exclusion |
| 404 | # - frames are not lists |
| 405 | # - operands != 2 |
| Akron | 142b734 | 2017-12-10 17:40:30 +0100 | [diff] [blame] | 406 | |
| Akron | e1cc43e | 2017-12-08 19:00:04 +0100 | [diff] [blame] | 407 | done_testing; |
| 408 | |
| 409 | __END__ |