Bump KorAP-Tokenizer to v2.3.0 and our version to 2.6.2

Change-Id: I7345e72cd67326797ca574bbf5f63bc3fb34de44
diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
index 7334fa7..0ffd43a 100644
--- a/.github/workflows/linux.yml
+++ b/.github/workflows/linux.yml
@@ -20,6 +20,11 @@
         with:
           perl-version: ${{ matrix.perl }}
       - run: perl -V
+      - name: Set up JDK 21
+        uses: actions/setup-java@v3
+        with:
+          java-version: '21'
+          distribution: 'temurin'
       - name: Install dependencies
         run: |
           cpanm File::ShareDir::Install
diff --git a/Changes b/Changes
index 6b6d671..7added2 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,8 @@
+2.6.2 2025-12-10
+        - Upgrade KorAP-Tokenizer to v2.3.0 (resolves issues with
+          gendersternchen after hyphens, emoji clusters, and Wikipedia templates).
+        - Upgrade Java dependency to 21.
+
 2.6.1 2025-04-16
         - Fix ASCII entity resolution.
         - Make KorAP-Tokenizer heap size configurable via environment
diff --git a/Readme.pod b/Readme.pod
index c097fd2..8cb2375 100644
--- a/Readme.pod
+++ b/Readme.pod
@@ -83,7 +83,7 @@
 In case everything went well, the C<tei2korapxml> tool will
 be available on your command line immediately.
 
-Minimum requirement for L<KorAP::XML::TEI> is Perl 5.16.
+Minimum requirement for L<KorAP::XML::TEI> is Perl 5.38.
 
 =head1 OPTIONS
 
diff --git a/lib/KorAP/XML/TEI/Tokenizer/KorAP.pm b/lib/KorAP/XML/TEI/Tokenizer/KorAP.pm
index ac5aae8..1aadc4c 100644
--- a/lib/KorAP/XML/TEI/Tokenizer/KorAP.pm
+++ b/lib/KorAP/XML/TEI/Tokenizer/KorAP.pm
@@ -4,8 +4,8 @@
 use warnings;
 use File::Share ':all';
 
-our $VERSION = '2.6.1';
-my $MIN_JAVA_VERSION = 17;
+our $VERSION = '2.6.2';
+my $MIN_JAVA_VERSION = 21;
 
 use constant {
   WAIT_SECS => 30
@@ -27,7 +27,7 @@
 
 my $tokenizer_jar = dist_file(
   'tei2korapxml',
-  'KorAP-Tokenizer-2.2.5-standalone.jar'
+  'KorAP-Tokenizer-2.3.0-standalone.jar'
 );
 
 unless (-f $tokenizer_jar) {
diff --git a/script/tei2korapxml b/script/tei2korapxml
index 3e5b335..089b5da 100755
--- a/script/tei2korapxml
+++ b/script/tei2korapxml
@@ -25,7 +25,7 @@
 use KorAP::XML::TEI::Header;
 use KorAP::XML::TEI::Inline;
 
-our $VERSION = '2.6.1';
+our $VERSION = '2.6.2';
 
 our $VERSION_MSG = "\ntei2korapxml - v$VERSION\n";
 
diff --git a/share/KorAP-Tokenizer-2.2.5-standalone.jar b/share/KorAP-Tokenizer-2.3.0-standalone.jar
similarity index 64%
rename from share/KorAP-Tokenizer-2.2.5-standalone.jar
rename to share/KorAP-Tokenizer-2.3.0-standalone.jar
index 5b5a8ea..fc4c135 100644
--- a/share/KorAP-Tokenizer-2.2.5-standalone.jar
+++ b/share/KorAP-Tokenizer-2.3.0-standalone.jar
Binary files differ
diff --git a/t/tokenization-korap.t b/t/tokenization-korap.t
index ae21638..be90c4a 100644
--- a/t/tokenization-korap.t
+++ b/t/tokenization-korap.t
@@ -109,4 +109,55 @@
 $t = Test::XML::Loy->new($str);
 $t->element_count_is('layer spanList span', 4);
 
+
+# Tests for issue #115
+$string = "Die Serb*innen wie die Kosovo-Albaner*innen";
+$ext->reset;
+$ext->tokenize($string);
+$str = $ext->to_string('issue-115');
+$t = Test::XML::Loy->new($str);
+$t->element_count_is('layer spanList span', 5, 'Issue #115 - token count');
+$t->attr_is('layer spanList span:nth-child(2)', 'from', 4, 'Issue #115 - Serb*innen from');
+$t->attr_is('layer spanList span:nth-child(2)', 'to', 14, 'Issue #115 - Serb*innen to');
+$t->attr_is('layer spanList span:nth-child(5)', 'from', 23, 'Issue #115 - Kosovo-Albaner*innen from');
+$t->attr_is('layer spanList span:nth-child(5)', 'to', 43, 'Issue #115 - Kosovo-Albaner*innen to');
+
+# Tests for issue #114
+$string = "[_EMOJI:{{S|;)}}_]";
+$ext->reset;
+$ext->tokenize($string);
+$str = $ext->to_string('issue-114');
+$t = Test::XML::Loy->new($str);
+$t->element_count_is('layer spanList span', 1, 'Issue #114 - token count');
+$t->element_exists('layer spanList span:nth-child(1)[from="0"]', 'Issue #114 - EMOJI from');
+$t->attr_is('layer spanList span:nth-child(1)', 'to', 18, 'Issue #114 - EMOJI to');
+
+# Tests for issue #113
+$string = "✊🏿";
+$ext->reset;
+$ext->tokenize($string);
+$str = $ext->to_string('issue-113-1');
+$t = Test::XML::Loy->new($str);
+$t->element_count_is('layer spanList span', 1, 'Issue #113 - emoji modifier count');
+$t->element_exists('layer spanList span:nth-child(1)[from="0"]', 'Issue #113 - emoji modifier from');
+$t->attr_is('layer spanList span:nth-child(1)', 'to', 2, 'Issue #113 - emoji modifier to');
+
+$string = "👨‍👨‍👦"; # U+1F468 U+200D U+1F468 U+200D U+1F466
+$ext->reset;
+$ext->tokenize($string);
+$str = $ext->to_string('issue-113-2');
+$t = Test::XML::Loy->new($str);
+$t->element_count_is('layer spanList span', 1, 'Issue #113 - emoji ZWJ family 1 count');
+$t->element_exists('layer spanList span:nth-child(1)[from="0"]', 'Issue #113 - emoji ZWJ family 1 from');
+$t->attr_is('layer spanList span:nth-child(1)', 'to', 5, 'Issue #113 - emoji ZWJ family 1 to');
+
+$string = "👨‍👦‍👦"; # U+1F468 U+200D U+1F466 U+200D U+1F466
+$ext->reset;
+$ext->tokenize($string);
+$str = $ext->to_string('issue-113-3');
+$t = Test::XML::Loy->new($str);
+$t->element_count_is('layer spanList span', 1, 'Issue #113 - emoji ZWJ family 2 count');
+$t->element_exists('layer spanList span:nth-child(1)[from="0"]', 'Issue #113 - emoji ZWJ family 2 from');
+$t->attr_is('layer spanList span:nth-child(1)', 'to', 5, 'Issue #113 - emoji ZWJ family 2 to');
+
 done_testing;