Update KorAP-Tokenizer to v2.0.0-SNAPSHOT
Change-Id: Ifc08d660e81cdae9144c2a0b863b9a8abb790d20
diff --git a/Changes b/Changes
index 49ccd01..b47c446 100644
--- a/Changes
+++ b/Changes
@@ -1,2 +1,5 @@
+0.02 2020-10-14
+ - Update KorAP-Tokenizer to v2.0.0.
+
0.01 2020-09-28
- Initial release to GitHub.
diff --git a/Makefile.PL b/Makefile.PL
index 25eae0a..4fbf708 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -21,6 +21,7 @@
'Pod::Usage' => 0,
'Dumbbench' => '0.111',
'DateTime' => '1.51',
+ 'File::Share' => '0.25',
'Capture::Tiny' => '0.48'
},
PREREQ_PM => {
diff --git a/lib/KorAP/XML/TEI/Tokenizer/KorAP.pm b/lib/KorAP/XML/TEI/Tokenizer/KorAP.pm
index b0ad51e..4e2f522 100644
--- a/lib/KorAP/XML/TEI/Tokenizer/KorAP.pm
+++ b/lib/KorAP/XML/TEI/Tokenizer/KorAP.pm
@@ -20,7 +20,7 @@
my $tokenizer_jar = dist_file(
'tei2korapxml',
- 'KorAP-Tokenizer-1.3-SNAPSHOT-6cc760f-standalone.jar'
+ 'KorAP-Tokenizer-2.0.0-SNAPSHOT-standalone.jar'
);
diff --git a/script/tei2korapxml b/script/tei2korapxml
index 09f2a81..8c35e90 100755
--- a/script/tei2korapxml
+++ b/script/tei2korapxml
@@ -34,7 +34,7 @@
1;
};
-our $VERSION = '0.01';
+our $VERSION = '0.02';
our $VERSION_MSG = "\ntei2korapxml - v$VERSION\n";
diff --git a/share/KorAP-Tokenizer-1.3-SNAPSHOT-6cc760f-standalone.jar b/share/KorAP-Tokenizer-1.3-SNAPSHOT-6cc760f-standalone.jar
deleted file mode 100644
index 748b341..0000000
--- a/share/KorAP-Tokenizer-1.3-SNAPSHOT-6cc760f-standalone.jar
+++ /dev/null
Binary files differ
diff --git a/share/KorAP-Tokenizer-2.0.0-SNAPSHOT-standalone.jar b/share/KorAP-Tokenizer-2.0.0-SNAPSHOT-standalone.jar
new file mode 100644
index 0000000..60cbb4e
--- /dev/null
+++ b/share/KorAP-Tokenizer-2.0.0-SNAPSHOT-standalone.jar
Binary files differ
diff --git a/t/tokenization-korap.t b/t/tokenization-korap.t
index 809dd45..a4c547e 100644
--- a/t/tokenization-korap.t
+++ b/t/tokenization-korap.t
@@ -63,4 +63,13 @@
$t->attr_is('layer spanList span:nth-child(14)', 'from', 80);
$t->attr_is('layer spanList span:nth-child(14)', 'to', 92);
$t->element_count_is('layer spanList span', 14);
+
+$string = "Gefunden auf www.wikipedia.de";
+$ext->reset;
+$ext->tokenize($string);
+$str = $ext->to_string('unknown');
+$t = Test::XML::Loy->new($str);
+$t->attr_is('layer spanList span:nth-child(3)', 'from', 13);
+$t->attr_is('layer spanList span:nth-child(3)', 'to', 29);
+$t->element_count_is('layer spanList span', 3);
done_testing;