Nils Diewald | 7364d1f | 2013-11-05 19:26:35 +0000 | [diff] [blame^] | 1 | #!/usr/bin/env perl |
| 2 | use strict; |
| 3 | use warnings; |
| 4 | use Test::More; |
| 5 | use Mojo::ByteStream 'b'; |
| 6 | use utf8; |
| 7 | use lib 'lib', '../lib'; |
| 8 | |
| 9 | use_ok('KorAP::Document::Primary'); |
| 10 | |
| 11 | my $t = "Der März ging vorüber und demnächst würde es Herbstblätter regnen."; |
| 12 | |
| 13 | ok(my $p = KorAP::Document::Primary->new($t), 'Constructor'); |
| 14 | |
| 15 | is($p->data_length, 66, 'Text has correct length'); |
| 16 | |
| 17 | is($p->data, $t, 'Text is identical'); |
| 18 | is($p->data(0,3), 'Der', 'Text is identical'); |
| 19 | is($p->data(4,8), 'März', 'Text is identical'); |
| 20 | is($p->data(26,35), 'demnächst', 'Text is identical'); |
| 21 | |
| 22 | is($p->data_bytes(0,3), 'Der', 'Text is identical'); |
| 23 | is($p->data_bytes(4,9), 'März', 'Text is identical'); |
| 24 | is($p->data_bytes(28,38), 'demnächst', 'Text is identical'); |
| 25 | |
| 26 | is($p->bytes2chars(4), 4, 'Byte offset matches'); |
| 27 | is($p->bytes2chars(9), 8, 'Byte offset matches'); |
| 28 | is($p->bytes2chars(28), 26, 'Byte offset matches'); |
| 29 | is($p->bytes2chars(38), 35, 'Byte offset matches'); |
| 30 | |
| 31 | is( |
| 32 | $p->data( |
| 33 | $p->bytes2chars(17), |
| 34 | $p->bytes2chars(45) |
| 35 | ), |
| 36 | $p->data_bytes(17,45), |
| 37 | 'Text is identical' |
| 38 | ); |
| 39 | |
| 40 | $t = 'Er dächte, daß dies „für alle Elemente gilt“.'; |
| 41 | |
| 42 | ok($p = KorAP::Document::Primary->new($t), 'Constructor'); |
| 43 | |
| 44 | is($p->data_length, 45, 'Text has correct length'); |
| 45 | |
| 46 | is($p->data, $t, 'Text is identical'); |
| 47 | is($p->data(0,2), 'Er', 'Text is identical'); |
| 48 | is($p->data(3,9), 'dächte', 'Text is identical'); |
| 49 | is($p->data(21,24), 'für', 'Text is identical'); |
| 50 | is($p->data(20,21), '„', 'Text is identical'); |
| 51 | is($p->data(43,44), '“', 'Text is identical'); |
| 52 | is($p->data(44,45), '.', 'Text is identical'); |
| 53 | |
| 54 | is($p->data_bytes(0,2), 'Er', 'Text is identical'); |
| 55 | is($p->bytes2chars(0),0, 'b2c correct'); |
| 56 | is($p->bytes2chars(2),2, 'b2c correct'); |
| 57 | is($p->data_bytes(3,10), 'dächte', 'Text is identical'); |
| 58 | is($p->bytes2chars(3),3, 'b2c correct'); |
| 59 | is($p->bytes2chars(10),9, 'b2c correct'); |
| 60 | is($p->data_bytes(25,29), 'für', 'Text is identical'); |
| 61 | is($p->bytes2chars(25),21, 'b2c correct'); |
| 62 | is($p->bytes2chars(29),24, 'b2c correct'); |
| 63 | is($p->data_bytes(22,25), '„', 'Text is identical'); |
| 64 | is($p->bytes2chars(22),20, 'b2c correct'); |
| 65 | is($p->bytes2chars(25),21, 'b2c correct'); |
| 66 | is($p->data_bytes(48,51), '“', 'Text is identical'); |
| 67 | is($p->bytes2chars(48),43, 'b2c correct'); |
| 68 | is($p->bytes2chars(51),44, 'b2c correct'); |
| 69 | is($p->data_bytes(51,52), '.', 'Text is identical'); |
| 70 | is($p->bytes2chars(52),45, 'b2c correct'); |
| 71 | |
| 72 | is( |
| 73 | $p->data( |
| 74 | $p->bytes2chars(17), |
| 75 | $p->bytes2chars(45) |
| 76 | ), |
| 77 | $p->data_bytes(17,45), |
| 78 | 'Text is identical' |
| 79 | ); |
| 80 | |
| 81 | |
| 82 | #ok($p = KorAP::Document::Primary->new($t), 'Constructor'); |
| 83 | is($p->xip2chars(0), 0, 'Fine'); |
| 84 | is($p->xip2chars(7), 6, 'Fine'); |
| 85 | #diag $p->data($p->latinbytes2chars(3),$p->latinbytes2chars(9)); |
| 86 | |
| 87 | |
| 88 | done_testing; |