Auto infer base name if only annotation zip given
Change-Id: Ie58c311b40bf0a38f16200887a0a4d862b4465d6
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
index c6fb696..6950d02 100644
--- a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
@@ -13,6 +13,7 @@
import org.w3c.dom.Element
import org.w3c.dom.NodeList
import org.xml.sax.InputSource
+import java.io.File
import java.io.InputStreamReader
import java.util.logging.Logger
@@ -27,16 +28,28 @@
val morpho: ConcurrentHashMap<String, MutableMap<String, MorphoSpan>> = ConcurrentHashMap()
val fnames: ConcurrentHashMap<String, String> = ConcurrentHashMap()
- Arrays.stream(args).forEach { zipFilePath ->
+ if (args == null || args.isEmpty() || args[0] == null) {
+ LOGGER.severe("Usage: KorapXml2Conllu <zipfile1> [<zipfile2> ...]")
+ return
+ }
+ var zips:Array<String?> = args
+ if (args.size == 1 && args[0]!!.matches(Regex(".*\\.([^/.]+)\\.zip$")) == true) {
+ val baseZip = args[0]!!.replace(Regex("\\.([^/.]+)\\.zip$"), ".zip")
+ if (File(baseZip).exists()) {
+ zips = arrayOf(baseZip, zips[0])
+ LOGGER.info("Processing base zip file: $baseZip")
+ }
+ }
+ Arrays.stream(zips).forEach { zipFilePath ->
executor.submit {
processZipFile(
- zipFilePath ?: "",
+ (zipFilePath ?: "").toString(),
texts,
sentences,
tokens,
fnames,
morpho,
- args!!.size > 1
+ zips.size > 1
)
}
}
@@ -205,7 +218,7 @@
var i = token_index
var start_offsets_string = ""
var end_offsets_string = ""
- while (i < tokens[docId]!!.size && tokens[docId]!![i].to <= sentenceEndOffset) {
+ while (tokens[docId]!=null && i < tokens[docId]!!.size && tokens[docId]!![i].to <= sentenceEndOffset) {
start_offsets_string += " " + tokens[docId]!![i].from
end_offsets_string += " " + tokens[docId]!![i].to
i++
@@ -236,7 +249,7 @@
.mapToObj(fsSpans::item)
.forEach { node ->
val features = (node as Element).getElementsByTagName("f")
- var fs = MorphoSpan()
+ val fs = MorphoSpan()
val fromTo = node.getAttribute("from") + "-" + node.getAttribute("to")
IntStream.range(0, features.length).mapToObj(features::item)
.forEach { feature ->
diff --git a/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KorapXml2ConlluTest.kt b/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KorapXml2ConlluTest.kt
index f8ef4cb..5930903 100644
--- a/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KorapXml2ConlluTest.kt
+++ b/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KorapXml2ConlluTest.kt
@@ -53,6 +53,16 @@
"9\tentzücke\tentzücken\t_\tVVFIN\t_\t_\t_\t_\t1.000000"
)
}
+ @Test
+ fun canInferBaseName() {
+ val classUnderTest = KorapXml2Conllu()
+ val args = arrayOf(loadResource("goe.tree_tagger.zip").path)
+ classUnderTest.main(args)
+ assertContains(
+ outContent.toString(),
+ "9\tentzücke\tentzücken\t_\tVVFIN\t_\t_\t_\t_\t1.000000"
+ )
+ }
@Test
fun canConvertWfdWithMorphoAnnotations() {