Bump tokenizer to v2.4.1 and our version to 2.7.3
Change-Id: Id8780e465e522d65091d0203304007e69c483d43
diff --git a/Changes b/Changes
index 45d24ca..fd52f16 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,10 @@
+2.7.3 2026-04-03
+ - Upgrade KorAP-Tokenizer to v2.4.1.
+ - KorAP-Tokenizer now fixes Unicode surrogate-pair handling in
+ German gender-sensitive forms to avoid crashes on unmatched
+ characters.
+ - Restart tokenizer on failure.
+
2.7.2 2026-03-05
- Fix XML parser error caused by elements (e.g. <ref>) whose
attributes span multiple lines.
diff --git a/lib/KorAP/XML/TEI/Tokenizer/KorAP.pm b/lib/KorAP/XML/TEI/Tokenizer/KorAP.pm
index ab91d57..8c94950 100644
--- a/lib/KorAP/XML/TEI/Tokenizer/KorAP.pm
+++ b/lib/KorAP/XML/TEI/Tokenizer/KorAP.pm
@@ -4,7 +4,7 @@
use warnings;
use File::Share ':all';
-our $VERSION = '2.7.2';
+our $VERSION = '2.7.3';
my $MIN_JAVA_VERSION = 21;
use constant {
@@ -27,7 +27,7 @@
my $tokenizer_jar = dist_file(
'tei2korapxml',
- 'KorAP-Tokenizer-2.4.0-standalone.jar'
+ 'KorAP-Tokenizer-2.4.1-standalone.jar'
);
unless (-f $tokenizer_jar) {
diff --git a/script/tei2korapxml b/script/tei2korapxml
index f8a26c2..8b3cf96 100755
--- a/script/tei2korapxml
+++ b/script/tei2korapxml
@@ -25,7 +25,7 @@
use KorAP::XML::TEI::Header;
use KorAP::XML::TEI::Inline;
-our $VERSION = '2.7.2';
+our $VERSION = '2.7.3';
our $VERSION_MSG = "\ntei2korapxml - v$VERSION\n";
diff --git a/share/KorAP-Tokenizer-2.4.0-standalone.jar b/share/KorAP-Tokenizer-2.4.0-standalone.jar
deleted file mode 100644
index 0ea5606..0000000
--- a/share/KorAP-Tokenizer-2.4.0-standalone.jar
+++ /dev/null
Binary files differ
diff --git a/share/KorAP-Tokenizer-2.4.1-standalone.jar b/share/KorAP-Tokenizer-2.4.1-standalone.jar
new file mode 100644
index 0000000..6710fa4
--- /dev/null
+++ b/share/KorAP-Tokenizer-2.4.1-standalone.jar
Binary files differ