Fix escaping of $ and #
Change-Id: I7a509e11c378f303516c8327cb3b654d4d18fd5a
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
index 58306c3..966736b 100644
--- a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXmlTool.kt
@@ -4015,9 +4015,9 @@
// The # character is used for offset notation in Krill format
// Both must be percent-encoded when they appear in actual annotation values
fun String.escapeKrillValue(): String {
- // Use URL/percent encoding like escapeKrillAttribute
- return this.replace("#", "%23")
- .replace("$", "%24")
+ // Match legacy korapxml2krill escaping that uses backslashes
+ return this.replace("#", "\\#")
+ .replace("$", "\\$")
}
fun jsonString(value: String): String = "\"${value.escapeJson()}\""
diff --git a/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KorapXmlToolTest.kt b/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KorapXmlToolTest.kt
index c4ad2c1..6eddd09 100644
--- a/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KorapXmlToolTest.kt
+++ b/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KorapXmlToolTest.kt
@@ -803,7 +803,15 @@
assertEquals(referenceJsons.keys, kotlinJsons.keys, "Kotlin and reference JSON sets differ (nwt)")
- val tokensToCheck = listOf("\"s:,\"", "\"s:.\"", "\"s:!\"")
+ val tokensToCheck = listOf(
+ "\"s:,\"",
+ "\"s:.\"",
+ "\"s:!\"",
+ "\"marmot/p:\\$,\"",
+ "\"spacy/p:\\$,\"",
+ "\"opennlp/p:\\$,\"",
+ "\"tt/p:\\$,\""
+ )
referenceJsons.forEach { (doc, referenceJson) ->
val kotlinJson = kotlinJsons.getValue(doc)
tokensToCheck.forEach { token ->