Akron | ef8544f | 2016-01-16 14:51:07 +0100 | [diff] [blame] | 1 | #!/usr/bin/env perl |
| 2 | use strict; |
| 3 | use warnings; |
| 4 | use utf8; |
| 5 | use Test::More; |
Akron | dcbee64 | 2020-10-30 18:01:43 +0100 | [diff] [blame] | 6 | use KorAP::XML::Annotation::DeReKo::Structure; |
Akron | 151676d | 2016-03-14 20:12:14 +0100 | [diff] [blame] | 7 | use lib 't/annotation'; |
Akron | ef8544f | 2016-01-16 14:51:07 +0100 | [diff] [blame] | 8 | use TestInit; |
| 9 | use Scalar::Util qw/weaken/; |
| 10 | use Data::Dumper; |
| 11 | |
Akron | ef8544f | 2016-01-16 14:51:07 +0100 | [diff] [blame] | 12 | ok(my $tokens = TestInit::tokens('0001'), 'Parse tokens'); |
| 13 | |
Akron | 41ac10b | 2017-02-08 22:47:25 +0100 | [diff] [blame] | 14 | ok($tokens->add('DeReKo', 'Structure', 'pagebreaks'), 'Add Structure'); |
Akron | ef8544f | 2016-01-16 14:51:07 +0100 | [diff] [blame] | 15 | |
| 16 | my $data = $tokens->to_data->{data}; |
| 17 | |
| 18 | like($data->{foundries}, qr!dereko/structure!, 'data'); |
| 19 | like($data->{layerInfos}, qr!dereko/s=spans!, 'data'); |
| 20 | |
| 21 | # Empty element (from 0 to 0) on level 1, with TUI 2 |
| 22 | is($data->{stream}->[0]->[1], |
Akron | 69a4a2f | 2016-01-17 12:55:50 +0100 | [diff] [blame] | 23 | '<>:dereko/s:idsHeader$<b>65<i>0<i>0<i>0<b>1<s>2', |
Akron | ef8544f | 2016-01-16 14:51:07 +0100 | [diff] [blame] | 24 | 'Empty element'); |
| 25 | |
Akron | ef8544f | 2016-01-16 14:51:07 +0100 | [diff] [blame] | 26 | |
Akron | dec4312 | 2020-03-03 11:22:25 +0100 | [diff] [blame] | 27 | is($data->{stream}->[0]->[5], '<>:base/s:t$<b>64<i>0<i>129<i>18<b>0', 'Text boundary'); |
Akron | ee443f9 | 2016-02-25 23:56:49 +0100 | [diff] [blame] | 28 | |
| 29 | # Attributes: |
Akron | ef8544f | 2016-01-16 14:51:07 +0100 | [diff] [blame] | 30 | is($data->{stream}->[0]->[11], |
Akron | ee443f9 | 2016-02-25 23:56:49 +0100 | [diff] [blame] | 31 | '@:dereko/s:type:text$<b>17<s>2', |
Akron | ef8544f | 2016-01-16 14:51:07 +0100 | [diff] [blame] | 32 | 'Attribute of idsHeader'); |
| 33 | |
| 34 | is($data->{stream}->[0]->[12], |
| 35 | '@:dereko/s:status:new$<b>17<s>2', |
| 36 | 'Attribute of idsHeader'); |
| 37 | |
| 38 | is($data->{stream}->[0]->[13], |
Akron | ee443f9 | 2016-02-25 23:56:49 +0100 | [diff] [blame] | 39 | '@:dereko/s:version:1.1$<b>17<s>2', |
Akron | ef8544f | 2016-01-16 14:51:07 +0100 | [diff] [blame] | 40 | 'Attribute of idsHeader'); |
| 41 | |
| 42 | is($data->{stream}->[0]->[14], |
| 43 | '@:dereko/s:pattern:text$<b>17<s>2', |
| 44 | 'Attribute of idsHeader'); |
| 45 | |
Akron | b62d92a | 2020-03-01 16:32:00 +0100 | [diff] [blame] | 46 | is($data->{stream}->[5]->[1], |
Akron | 69a4a2f | 2016-01-17 12:55:50 +0100 | [diff] [blame] | 47 | '<>:dereko/s:s$<b>64<i>32<i>42<i>6<b>6<s>1', |
| 48 | 'Sentence span'); |
| 49 | |
Akron | b62d92a | 2020-03-01 16:32:00 +0100 | [diff] [blame] | 50 | is($data->{stream}->[5]->[2], |
Akron | 69a4a2f | 2016-01-17 12:55:50 +0100 | [diff] [blame] | 51 | '@:dereko/s:broken:no$<b>17<s>1<i>6', |
| 52 | 'Attribute of sentence span'); |
| 53 | |
Akron | b62d92a | 2020-03-01 16:32:00 +0100 | [diff] [blame] | 54 | is($data->{stream}->[7]->[0], |
| 55 | '<>:dereko/s:pb$<b>65<i>42<i>42<i>7<b>6<s>1', |
Akron | 69a4a2f | 2016-01-17 12:55:50 +0100 | [diff] [blame] | 56 | 'Pagebreak element'); |
Akron | ef8544f | 2016-01-16 14:51:07 +0100 | [diff] [blame] | 57 | |
Akron | b62d92a | 2020-03-01 16:32:00 +0100 | [diff] [blame] | 58 | is($data->{stream}->[7]->[-1], |
Akron | 41ac10b | 2017-02-08 22:47:25 +0100 | [diff] [blame] | 59 | '~:base/s:pb$<i>2<i>42', |
| 60 | 'Pagebreak element'); |
| 61 | |
Akron | ef8544f | 2016-01-16 14:51:07 +0100 | [diff] [blame] | 62 | done_testing; |
| 63 | |
| 64 | __END__ |