Fix casefolding for case-insensitive queries
Change-Id: I23db7454c7ab0a54fee4c9c450665b294ccc1324
diff --git a/Changes b/Changes
index 185fd0f..38072b5 100644
--- a/Changes
+++ b/Changes
@@ -1,5 +1,7 @@
-0.61.3 2023-07-11
+0.61.3 2023-07-17
- Add totalResources to results (diewald)
+ - [bugfix] Fix casefolding for case-insensitive queries
+ (diewald).
0.61.2 2023-04-05
- [bugfix] Fix pagebreak retrieval (margaretha, diewald)
diff --git a/src/main/java/de/ids_mannheim/korap/KrillQuery.java b/src/main/java/de/ids_mannheim/korap/KrillQuery.java
index c10d93c..6729521 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillQuery.java
@@ -1309,7 +1309,22 @@
value.setLength(offset);
// Add key to value
- value.append(isCaseInsensitive ? key.toLowerCase() : key);
+
+ if (isCaseInsensitive) {
+
+ // This supports both legacy search and locale-dependent case-folding.
+ // It mimics the Perl fc behaviour probably better than icu4j.
+ if (key.toLowerCase().equals(key.toUpperCase().toLowerCase())) {
+ value.append(key.toLowerCase());
+ } else {
+ value.append(key.toLowerCase());
+ values.push(value.toString());
+ value.setLength(offset);
+ value.append(key.toUpperCase().toLowerCase());
+ };
+ } else {
+ value.append(key);
+ };
// TODO:
// This should iterate over all values as well
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestKrill.java b/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
index f36cc28..168b83d 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
@@ -154,6 +154,34 @@
assertTrue(res.at("/matches/0/snippet").isMissingNode());
assertEquals("dem", res.at("/matches/0/tokens/left/0").asText());
assertEquals("Buchstaben", res.at("/matches/0/tokens/match/0").asText());
+
+ // The test-data is old and therefore precedes the correct testfolding.
+ // However, we can check the correct behaviour nonetheless.
+ String json = "{\"query\":{\"@type\":\"koral:token\",\"wrap\":{\"@type\":\"koral:term\",\"flags\": [\"flags:caseInsensitive\"],\"key\": \"Grösstenteils\",\"layer\":\"orth\",\"match\": \"match:eq\"}}}";
+
+ ObjectMapper mapper = new ObjectMapper();
+
+ ks = new Krill(json);
+ kr = ks.apply(ki);
+ assertEquals(kr.getTotalResults(), 0);
+ assertEquals(kr.getItemsPerPage(), 25);
+ assertEquals(kr.getMatches().size(), 0);
+
+ res = mapper.readTree(kr.toJsonString());
+ assertEquals(res.at("/meta/serialQuery").asText(),"tokens:i:grösstenteils");
+
+ json = "{\"query\":{\"@type\":\"koral:token\",\"wrap\":{\"@type\":\"koral:term\",\"flags\": [\"flags:caseInsensitive\"],\"key\": \"Größtenteils\",\"layer\":\"orth\",\"match\": \"match:eq\"}}}";
+
+ ks = new Krill(json);
+ kr = ks.apply(ki);
+
+ assertEquals(kr.getTotalResults(), 2);
+ assertEquals(kr.getItemsPerPage(), 25);
+ assertEquals(kr.getMatches().size(), 2);
+
+ res = mapper.readTree(kr.toJsonString());
+ assertEquals(res.at("/meta/serialQuery").asText(),
+ "spanOr([tokens:i:grösstenteils, tokens:i:größtenteils])");
};