Update KorAP-Tokenizer to v2.2.2 (single quote bug fix)
Change-Id: I28f58470e7615db62a5a9c5dc18fd29aa09875f5
diff --git a/Changes b/Changes
index ffc191f..b996114 100644
--- a/Changes
+++ b/Changes
@@ -3,6 +3,7 @@
- Check for valid sigles to avoid broken directories
- Introduce exclusivity for inline tokens handling.
- Use single dash for STDIN.
+ - Update KorAP-Tokenizer to v2.2.2 (single quote bug fix)
2.2.0 2021-08-26 Release
- Remove unnecessary branch in recursive call
diff --git a/lib/KorAP/XML/TEI/Tokenizer/KorAP.pm b/lib/KorAP/XML/TEI/Tokenizer/KorAP.pm
index b0fadcd..acc30af 100644
--- a/lib/KorAP/XML/TEI/Tokenizer/KorAP.pm
+++ b/lib/KorAP/XML/TEI/Tokenizer/KorAP.pm
@@ -20,7 +20,7 @@
my $tokenizer_jar = dist_file(
'tei2korapxml',
- 'KorAP-Tokenizer-2.2.0-standalone.jar'
+ 'KorAP-Tokenizer-2.2.2-standalone.jar'
);
diff --git a/share/KorAP-Tokenizer-2.2.0-standalone.jar b/share/KorAP-Tokenizer-2.2.0-standalone.jar
deleted file mode 100644
index 9e45fa7..0000000
--- a/share/KorAP-Tokenizer-2.2.0-standalone.jar
+++ /dev/null
Binary files differ
diff --git a/share/KorAP-Tokenizer-2.2.2-standalone.jar b/share/KorAP-Tokenizer-2.2.2-standalone.jar
new file mode 100644
index 0000000..c24afbc
--- /dev/null
+++ b/share/KorAP-Tokenizer-2.2.2-standalone.jar
Binary files differ
diff --git a/t/tokenization-korap.t b/t/tokenization-korap.t
index 43583f3..d852b4e 100644
--- a/t/tokenization-korap.t
+++ b/t/tokenization-korap.t
@@ -1,6 +1,6 @@
use strict;
use warnings;
-use Test::More tests => 32;
+use Test::More tests => 33;
use File::Basename 'dirname';
use File::Spec::Functions qw/catfile/;
use Test::XML::Loy;
@@ -102,4 +102,11 @@
$t = Test::XML::Loy->new($str);
$t->element_count_is('layer spanList span', 3);
+$string = "'Luhafen 'Wschaft";
+$ext->reset;
+$ext->tokenize($string);
+$str = $ext->to_string('unknown');
+$t = Test::XML::Loy->new($str);
+$t->element_count_is('layer spanList span', 4);
+
done_testing;