Akron | 340a9cb | 2020-05-20 12:55:22 +0200 | [diff] [blame] | 1 | #!/usr/bin/env perl |
| 2 | use strict; |
| 3 | use warnings; |
| 4 | use Test::More; |
| 5 | use File::Basename; |
| 6 | use File::Spec::Functions; |
Akron | 1c07045 | 2020-05-25 11:28:30 +0200 | [diff] [blame] | 7 | use Data::Dumper; |
Akron | 99d2d08 | 2024-07-18 16:17:08 +0200 | [diff] [blame] | 8 | use utf8; |
Akron | 340a9cb | 2020-05-20 12:55:22 +0200 | [diff] [blame] | 9 | |
| 10 | use Test::Output; |
| 11 | use Mojo::JSON 'decode_json'; |
Akron | 99d2d08 | 2024-07-18 16:17:08 +0200 | [diff] [blame] | 12 | use Mojo::Util qw'decode encode'; |
Akron | 340a9cb | 2020-05-20 12:55:22 +0200 | [diff] [blame] | 13 | |
Akron | dd0aa3a | 2024-04-10 11:03:38 +0200 | [diff] [blame] | 14 | my $script = catfile(dirname(__FILE__), '..', 'script', 'cosmasvc2koralquery'); |
Akron | 340a9cb | 2020-05-20 12:55:22 +0200 | [diff] [blame] | 15 | my $list1 = catfile(dirname(__FILE__), 'data', 'list2.def'); |
| 16 | |
| 17 | # Check STDOUT |
| 18 | stdout_like( |
| 19 | sub { |
Akron | a3f74f5 | 2024-07-17 17:18:17 +0200 | [diff] [blame] | 20 | system($script, 'def', $list1); |
Akron | 340a9cb | 2020-05-20 12:55:22 +0200 | [diff] [blame] | 21 | }, |
| 22 | qr!^\{\"\@context\".+?\}$!, |
| 23 | "check stdout" |
| 24 | ); |
| 25 | |
| 26 | # Check JSON |
Akron | a3f74f5 | 2024-07-17 17:18:17 +0200 | [diff] [blame] | 27 | my $json = decode_json(join('', `$script def $list1`)); |
Akron | 340a9cb | 2020-05-20 12:55:22 +0200 | [diff] [blame] | 28 | |
| 29 | is($json->{'collection'}->{'@type'}, 'koral:docGroup', 'type'); |
| 30 | is($json->{'collection'}->{'operation'}, 'operation:or', 'operation'); |
| 31 | |
| 32 | my $op1 = $json->{'collection'}->{'operands'}->[0]; |
| 33 | is($op1->{'@type'}, 'koral:doc', 'type'); |
| 34 | is($op1->{'key'}, 'docSigle', 'key'); |
| 35 | is($op1->{'match'}, 'match:eq', 'match'); |
| 36 | is($op1->{'value'}->[0], "BRZ05/SEP", 'value'); |
| 37 | is($op1->{'value'}->[1], ,"BRZ05/OKT", 'value'); |
| 38 | is($op1->{'value'}->[-1], ,"BRZ08/FEB", 'value'); |
Akron | 68746a1 | 2020-05-20 15:19:55 +0200 | [diff] [blame] | 39 | |
| 40 | my $op2 = $json->{'collection'}->{'operands'}->[1]; |
| 41 | is($op2->{'@type'}, 'koral:doc', 'type'); |
| 42 | is($op2->{'key'}, 'textSigle', 'key'); |
| 43 | is($op2->{'match'}, 'match:eq', 'match'); |
| 44 | is($op2->{'value'}->[0], "B19/AUG/01665", 'value'); |
| 45 | is($op2->{'value'}->[1], ,"B19/AUG/01666", 'value'); |
| 46 | |
Akron | 1c07045 | 2020-05-25 11:28:30 +0200 | [diff] [blame] | 47 | my $list3 = catfile(dirname(__FILE__), 'data', 'list3.def'); |
Akron | 323881c | 2020-05-20 17:15:42 +0200 | [diff] [blame] | 48 | |
Akron | 34a4f58 | 2020-05-27 12:18:57 +0200 | [diff] [blame] | 49 | |
Akron | 323881c | 2020-05-20 17:15:42 +0200 | [diff] [blame] | 50 | # Check JSON |
| 51 | # Only return extended area |
Akron | a3f74f5 | 2024-07-17 17:18:17 +0200 | [diff] [blame] | 52 | $json = decode_json(join('', `$script def $list3`)); |
Akron | 323881c | 2020-05-20 17:15:42 +0200 | [diff] [blame] | 53 | |
Akron | 34a4f58 | 2020-05-27 12:18:57 +0200 | [diff] [blame] | 54 | is($json->{'collection'}->{'@type'}, 'koral:doc', 'type'); |
| 55 | |
| 56 | |
Akron | 286b46e | 2020-05-25 17:07:48 +0200 | [diff] [blame] | 57 | is($json->{'collection'}->{'comment'}, 'name:"VAS-N91 (Stand \"2013\", korr. 2017)"', 'type'); |
Akron | 323881c | 2020-05-20 17:15:42 +0200 | [diff] [blame] | 58 | |
Akron | 34a4f58 | 2020-05-27 12:18:57 +0200 | [diff] [blame] | 59 | $op1 = $json->{'collection'}; |
Akron | 323881c | 2020-05-20 17:15:42 +0200 | [diff] [blame] | 60 | is($op1->{'@type'}, 'koral:doc', 'type'); |
| 61 | is($op1->{'key'}, 'textSigle', 'key'); |
| 62 | is($op1->{'match'}, 'match:eq', 'match'); |
| 63 | is($op1->{'value'}->[0], "A00/APR/23232", 'value'); |
Akron | 286b46e | 2020-05-25 17:07:48 +0200 | [diff] [blame] | 64 | is($op1->{'value'}->[1], "A00/APR/23233", 'value'); |
Akron | 323881c | 2020-05-20 17:15:42 +0200 | [diff] [blame] | 65 | |
Akron | 1c07045 | 2020-05-25 11:28:30 +0200 | [diff] [blame] | 66 | my $list4 = catfile(dirname(__FILE__), 'data', 'list4.def'); |
| 67 | |
| 68 | # Only contains intended area |
Akron | a3f74f5 | 2024-07-17 17:18:17 +0200 | [diff] [blame] | 69 | $json = decode_json(join('', `$script def $list4`)); |
Akron | 1c07045 | 2020-05-25 11:28:30 +0200 | [diff] [blame] | 70 | |
| 71 | is($json->{'collection'}->{'@type'}, 'koral:docGroup', 'type'); |
Akron | 34a4f58 | 2020-05-27 12:18:57 +0200 | [diff] [blame] | 72 | is($json->{'collection'}->{'comment'}, 'name:"VAS N91"', 'name'); |
Akron | 286b46e | 2020-05-25 17:07:48 +0200 | [diff] [blame] | 73 | like($json->{'collection'}->{'comment'}, qr!^name:"VAS N91"!, 'name'); |
Akron | 1c07045 | 2020-05-25 11:28:30 +0200 | [diff] [blame] | 74 | |
Akron | 34a4f58 | 2020-05-27 12:18:57 +0200 | [diff] [blame] | 75 | |
| 76 | my $bz = $json->{'collection'}->{operands}->[0]->{operands}->[0]; |
| 77 | is($bz->{operation}, 'operation:and', 'Intersection'); |
| 78 | is(scalar @{$bz->{operands}}, 3, 'Flatten operands'); |
| 79 | |
| 80 | my $faz = $json->{'collection'}->{operands}->[0]->{operands}->[1]; |
| 81 | is($faz->{'@type'}, 'koral:doc', 'DocVec'); |
| 82 | is($faz->{value}->[0], 'F97', 'Value'); |
| 83 | is($faz->{value}->[1], 'F99', 'Value'); |
Akron | 1c07045 | 2020-05-25 11:28:30 +0200 | [diff] [blame] | 84 | |
Akron | 431d957 | 2024-07-18 13:21:02 +0200 | [diff] [blame] | 85 | my $list_long = catfile(dirname(__FILE__), 'data', 'corp-w-short.def'); |
| 86 | $json = decode_json(join('', `$script def $list_long`))->{collection}; |
| 87 | is($json->{'@type'}, 'koral:doc', 'DocVec'); |
| 88 | is($json->{key}, 'textSigle', 'Key'); |
| 89 | is($json->{value}->[0], 'A97/APR/00001', 'Value'); |
| 90 | is($json->{value}->[1], 'A97/APR/00002', 'Value'); |
| 91 | is($json->{value}->[-1], 'A97/APR/01001', 'Value'); |
| 92 | |
Akron | 99d2d08 | 2024-07-18 16:17:08 +0200 | [diff] [blame] | 93 | my $corpa = catfile(dirname(__FILE__), 'data', 'corp-a.def'); |
| 94 | $json = decode_json(encode('utf-8',join('', `$script def $corpa`))); |
| 95 | |
| 96 | is($json->{'collection'}->{'@type'}, 'koral:doc', 'type'); |
| 97 | is($json->{'collection'}->{'key'}, 'pubPlaceKey', 'type'); |
| 98 | is($json->{'collection'}->{'value'}, 'A', 'type'); |
| 99 | is($json->{'collection'}->{'comment'}, 'name:"Korpora aus Österreich"', 'type'); |
| 100 | |
Akron | 8c1b032 | 2024-07-18 17:01:28 +0200 | [diff] [blame] | 101 | my $corpw = catfile(dirname(__FILE__), 'data', 'corp-w-vas-n91-kor17'); |
| 102 | |
| 103 | $json = decode_json(encode('utf-8',join('', `$script def $corpw`))); |
| 104 | |
| 105 | is($json->{'collection'}->{'@type'}, 'koral:doc', 'type'); |
| 106 | is($json->{'collection'}->{'key'}, 'textSigle', 'type'); |
| 107 | is($json->{'collection'}->{'value'}->[0], 'A00/APR/23232', 'type'); |
| 108 | is($json->{'collection'}->{'comment'}, 'name:"VAS-N91 (Stand 2013, korr. 2017)"', 'type'); |
| 109 | |
Akron | 99d2d08 | 2024-07-18 16:17:08 +0200 | [diff] [blame] | 110 | |
Akron | 340a9cb | 2020-05-20 12:55:22 +0200 | [diff] [blame] | 111 | done_testing; |
Akron | 34a4f58 | 2020-05-27 12:18:57 +0200 | [diff] [blame] | 112 | __END__ |