Auto infer base name if only annotation zip given

Change-Id: Ie58c311b40bf0a38f16200887a0a4d862b4465d6
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
index c6fb696..6950d02 100644
--- a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
@@ -13,6 +13,7 @@
 import org.w3c.dom.Element
 import org.w3c.dom.NodeList
 import org.xml.sax.InputSource
+import java.io.File
 import java.io.InputStreamReader
 import java.util.logging.Logger
 
@@ -27,16 +28,28 @@
         val morpho: ConcurrentHashMap<String, MutableMap<String, MorphoSpan>> = ConcurrentHashMap()
         val fnames: ConcurrentHashMap<String, String> = ConcurrentHashMap()
 
-        Arrays.stream(args).forEach { zipFilePath ->
+        if (args == null || args.isEmpty() || args[0] == null) {
+                LOGGER.severe("Usage: KorapXml2Conllu <zipfile1> [<zipfile2> ...]")
+                return
+        }
+        var zips:Array<String?> = args
+        if (args.size == 1 && args[0]!!.matches(Regex(".*\\.([^/.]+)\\.zip$")) == true) {
+            val baseZip = args[0]!!.replace(Regex("\\.([^/.]+)\\.zip$"), ".zip")
+            if (File(baseZip).exists()) {
+                zips = arrayOf(baseZip, zips[0])
+                LOGGER.info("Processing base zip file: $baseZip")
+            }
+        }
+        Arrays.stream(zips).forEach { zipFilePath ->
             executor.submit {
                 processZipFile(
-                    zipFilePath ?: "",
+                    (zipFilePath ?: "").toString(),
                     texts,
                     sentences,
                     tokens,
                     fnames,
                     morpho,
-                    args!!.size > 1
+                    zips.size > 1
                 )
             }
         }
@@ -205,7 +218,7 @@
         var i = token_index
         var start_offsets_string = ""
         var end_offsets_string = ""
-        while (i < tokens[docId]!!.size && tokens[docId]!![i].to <= sentenceEndOffset) {
+        while (tokens[docId]!=null && i < tokens[docId]!!.size && tokens[docId]!![i].to <= sentenceEndOffset) {
             start_offsets_string += " " + tokens[docId]!![i].from
             end_offsets_string += " " + tokens[docId]!![i].to
             i++
@@ -236,7 +249,7 @@
             .mapToObj(fsSpans::item)
             .forEach { node ->
                 val features = (node as Element).getElementsByTagName("f")
-                var fs = MorphoSpan()
+                val fs = MorphoSpan()
                 val fromTo = node.getAttribute("from") + "-" + node.getAttribute("to")
                 IntStream.range(0, features.length).mapToObj(features::item)
                     .forEach { feature ->
diff --git a/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KorapXml2ConlluTest.kt b/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KorapXml2ConlluTest.kt
index f8ef4cb..5930903 100644
--- a/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KorapXml2ConlluTest.kt
+++ b/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KorapXml2ConlluTest.kt
@@ -53,6 +53,16 @@
             "9\tentzücke\tentzücken\t_\tVVFIN\t_\t_\t_\t_\t1.000000"
         )
     }
+    @Test
+    fun canInferBaseName() {
+        val classUnderTest = KorapXml2Conllu()
+        val args = arrayOf(loadResource("goe.tree_tagger.zip").path)
+        classUnderTest.main(args)
+        assertContains(
+            outContent.toString(),
+            "9\tentzücke\tentzücken\t_\tVVFIN\t_\t_\t_\t_\t1.000000"
+        )
+    }
 
     @Test
     fun canConvertWfdWithMorphoAnnotations() {