commit | e3af15beb2bcea9004bd1eed42301488ec3365d8 | [log] [tgz] |
---|---|---|
author | Marc Kupietz <kupietz@ids-mannheim.de> | Thu Mar 28 10:16:13 2024 +0100 |
committer | Marc Kupietz <kupietz@ids-mannheim.de> | Thu Mar 28 10:16:13 2024 +0100 |
tree | a91877488c5e82190f682163ec7a55e2ea14fb43 | |
parent | 323e93e7323f3a62794850656955328c396d90ca [diff] |
Catch sentence indices out of bounds like in v0 Change-Id: I27ee856890af1bc6604c0f32e3172fa2404a90e4
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt index 01ebd58..763ecaf 100644 --- a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt +++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
@@ -504,7 +504,10 @@ } if (morpho[docId]?.containsKey("${span.from}-${span.to}") == true) { val mfs = morpho[docId]!!["${span.from}-${span.to}"] - + if (span.to > texts[docId]!!.length) { + span.to = texts[docId]!!.length + LOGGER.warning("Offset error: could not retrieve token at ${span.from}-${span.to} – ending with: ${texts[docId]!!.substring(span.from, span.to)}") + } output.append( printConlluToken( token_index,