| use strict; | 
 | use warnings; | 
 | use Test::More; | 
 | use File::Basename 'dirname'; | 
 | use File::Spec::Functions qw/catfile/; | 
 | use Test::XML::Loy; | 
 |  | 
 | use FindBin; | 
 | use utf8; | 
 |  | 
 | BEGIN { | 
 |   unshift @INC, "$FindBin::Bin/../lib"; | 
 | }; | 
 |  | 
 | require_ok('KorAP::XML::TEI::Tokenizer::External'); | 
 |  | 
 | my $f = dirname(__FILE__); | 
 | my $cmd = catfile($f, 'cmd', 'tokenizer.pl'); | 
 |  | 
 | # Test aggressive | 
 | my $ext = KorAP::XML::TEI::Tokenizer::External->new( | 
 |   'perl ' . $cmd | 
 |   #  'java -cp Ingestion/target/KorAP-Ingestion-pipeline.jar de.ids_mannheim.korap.tokenizer.KorAPTokenizerImpl' | 
 | ); | 
 |  | 
 | $ext->tokenize("Der alte Mann"); | 
 | my $str = $ext->to_string('unknown'); | 
 | my $t = Test::XML::Loy->new($str); | 
 | $t->attr_is('layer spanList span:nth-child(1)', 'to', 3); | 
 | $t->attr_is('layer spanList span:nth-child(2)', 'from', 4); | 
 | $t->attr_is('layer spanList span:nth-child(2)', 'to', 8); | 
 | $t->attr_is('layer spanList span:nth-child(3)', 'from', 9); | 
 | $t->attr_is('layer spanList span:nth-child(3)', 'to', 13); | 
 | $t->element_count_is('layer spanList span', 3); | 
 |  | 
 | $ext->tokenize("ging über die Straße"); | 
 | $str = $ext->to_string('unknown'); | 
 | $t = Test::XML::Loy->new($str); | 
 | $t->attr_is('layer spanList span:nth-child(1)', 'to', 4); | 
 | $t->attr_is('layer spanList span:nth-child(2)', 'from', 5); | 
 | $t->attr_is('layer spanList span:nth-child(2)', 'to', 9); | 
 | $t->attr_is('layer spanList span:nth-child(3)', 'from', 10); | 
 | $t->attr_is('layer spanList span:nth-child(3)', 'to', 13); | 
 | $t->attr_is('layer spanList span:nth-child(4)', 'from', 14); | 
 | $t->attr_is('layer spanList span:nth-child(4)', 'to', 20); | 
 | $t->element_count_is('layer spanList span', 4); | 
 |  | 
 | $ext->reset; | 
 | $ext->tokenize("Hu aha\x{04}\ndas ist cool"); | 
 |  | 
 | $str = $ext->to_string('unknown'); | 
 | $t = Test::XML::Loy->new($str); | 
 | $t->attr_is('layer spanList span:nth-child(1)', 'to', 2); | 
 | $t->attr_is('layer spanList span:nth-child(2)', 'from', 3); | 
 | $t->attr_is('layer spanList span:nth-child(2)', 'to', 6); | 
 | $t->element_count_is('layer spanList span', 2); | 
 |  | 
 |  | 
 | done_testing; |