Fixed casefolding for case insensitivity
Change-Id: Ibad88cb8158cc9f4884330250a5464b750f00a1c
diff --git a/Changes b/Changes
index 0da993f..2e6ea97 100644
--- a/Changes
+++ b/Changes
@@ -1,4 +1,4 @@
-0.25 2017-02-20
+0.25 2017-03-14
- Updated to Mojolicious 7.20
- Fixed meta treatment in case analytic and monogr
are available
@@ -9,6 +9,7 @@
- Renamed "pages" to "srcPages".
- Fixed handling of prefixes for text sigles.
- Support for MarMoT.
+ - Fix case insensitivity.
0.24 2016-12-21
- Added --base-sentences and --base-paragraphs options
diff --git a/Makefile.PL b/Makefile.PL
index 56ab8ee..966bda5 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -28,6 +28,7 @@
'File::Temp' => 0,
'Directory::Iterator' => 0,
'Benchmark' => 0,
+ 'Unicode::CaseFold' => 1.00,
'Carp' => 0,
'strict' => 0,
'warnings' => 0,
diff --git a/lib/KorAP/XML/Tokenizer.pm b/lib/KorAP/XML/Tokenizer.pm
index b89f92c..4889417 100644
--- a/lib/KorAP/XML/Tokenizer.pm
+++ b/lib/KorAP/XML/Tokenizer.pm
@@ -11,6 +11,7 @@
use KorAP::XML::Tokenizer::Spans;
use KorAP::XML::Tokenizer::Tokens;
use KorAP::XML::Index::MultiTermTokenStream;
+use Unicode::CaseFold;
use List::MoreUtils 'uniq';
use JSON::XS;
use Log::Log4perl;
@@ -156,7 +157,7 @@
$mtt->add('s:' . $token);
# Add case insensitive term
- $mtt->add('i:' . lc $token);
+ $mtt->add('i:' . fc $token);
# Add offset information
$mtt->o_start($from);