Import command line options from perl implementation

mostly unused, for now

Change-Id: I32e88ddeb06aeb2062d98b65cbd80e80a5131003
diff --git a/app/build.gradle b/app/build.gradle
index 489dee5..dbce1b5 100644
--- a/app/build.gradle
+++ b/app/build.gradle
@@ -14,6 +14,7 @@
     // Apply the application plugin to add support for building a CLI application in Java.
     id 'application'
     id 'com.github.johnrengelman.shadow' version '8.1.1'
+    id ("org.jetbrains.kotlin.kapt") version "2.0.0-Beta4"
 }
 
 
@@ -32,6 +33,10 @@
     // This dependency is used by the application.
     implementation 'com.google.guava:guava:33.0.0-jre'
 
+
+    kapt("info.picocli:picocli-codegen:4.7.5")
+    implementation ("info.picocli:picocli:4.7.5")
+
     // Use the Kotlin test library.
     testImplementation 'org.jetbrains.kotlin:kotlin-test'
 
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
index b5b47b7..32bc9c0 100644
--- a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
@@ -13,15 +13,74 @@
 import org.w3c.dom.Element
 import org.w3c.dom.NodeList
 import org.xml.sax.InputSource
+import picocli.CommandLine
+import picocli.CommandLine.Parameters
+import picocli.CommandLine.Option
+import picocli.CommandLine.Command
 import java.io.File
 import java.io.InputStreamReader
 import java.util.HashMap
+import java.util.concurrent.Callable
 import java.util.logging.Logger
+import kotlin.system.exitProcess
 
-class KorapXml2Conllu {
+@Command(
+    name = "KorapXml2Conllu",
+    mixinStandardHelpOptions = true,
+    version = ["KorapXml2Conllu 2.0-alpha-01"],
+    description = ["Converts KorAP XML files to CoNLL-U format"]
+)
+
+class KorapXml2Conllu : Callable<Int> {
+
+    @Parameters(arity = "1..*", description = ["At least one zip file name"])
+    var zipFileNames: Array<String>? = null
+
+    @Option(names = ["--sigle-pattern", "-p"], paramLabel = "PATTERN",
+        description = ["Not yet implemented: sigle pattern"])
+    var siglePattern: String = ""
+
+    @Option(names = ["--extract-attributes-regex", "-e"], paramLabel = "REGEX",
+        description = ["Not yet implemented: extract attributes regex"])
+    var extractAttributesRegex: String = ""
+
+    @Option(names = ["--s-bounds-from-morpho"],
+        description = ["Not yet implemented: s bounds from morpho"])
+    var sBoundsFromMorpho: Boolean = false
+
+    @Option(names = ["--log", "-l"], paramLabel = "LEVEL",
+        description = ["Not yet implemented: log level"])
+    var logLevel: String = "warn"
+
+    @Option(names = ["--columns", "-c"], paramLabel = "NUMBER",
+        description = ["Not yet implemented: columns"])
+    var columns: Int = 10
+
+    @Option(names = ["--word2vec", "-w"], description = ["Not yet implemented: word2vec"])
+    var lmTrainingData: Boolean = false
+
+    @Option(names = ["--token-separator", "-s"], paramLabel = "SEPARATOR",
+        description = ["Not yet implemented: token separator"])
+    var tokenSeparator: String = "\n"
+
+    @Option(names = ["--offsets"], description = ["Not yet implemented: offsets"])
+    var offsets: Boolean = false
+
+    @Option(names = ["--comments"], description = ["Not yet implemented: comments"])
+    var comments: Boolean = false
+
+    @Option(names = ["--extract-metadata-regex", "-m"], paramLabel = "REGEX",
+        description = ["Not yet implemented: extract metadata regex"])
+    var extractMetadataRegex: MutableList<String> = mutableListOf()
+
+    override fun call(): Int {
+        LOGGER.info("Processing zip files: " + zipFileNames!!.joinToString(", "))
+        korapxml2conllu(zipFileNames!!)// Your application logic here
+        return 0
+    }
     private val LOGGER: Logger = Logger.getLogger(KorapXml2Conllu::class.java.name)
 
-    fun main(args: Array<String?>?) {
+    fun korapxml2conllu(args: Array<String>) {
         val executor: ExecutorService = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors())
         val texts: ConcurrentHashMap<String, String> = ConcurrentHashMap()
         val sentences: ConcurrentHashMap<String, Array<Span>> = ConcurrentHashMap()
@@ -33,7 +92,7 @@
                 LOGGER.severe("Usage: KorapXml2Conllu <zipfile1> [<zipfile2> ...]")
                 return
         }
-        var zips:Array<String?> = args
+        var zips:Array<String> = args
         if (args.size == 1 && args[0]!!.matches(Regex(".*\\.([^/.]+)\\.zip$")) == true) {
             val baseZip = args[0]!!.replace(Regex("\\.([^/.]+)\\.zip$"), ".zip")
             if (File(baseZip).exists()) {
@@ -71,7 +130,7 @@
         return zipFileName.replace(Regex(".*\\.([^/.]+)\\.zip$"), "$1")
     }
 
-    private fun getFoundryFromZipFileNames(zipFileNames: Array<String?>): String {
+    private fun getFoundryFromZipFileNames(zipFileNames: Array<String>): String {
         for (zipFileName in zipFileNames) {
             val foundry = getFoundryFromZipFileName(zipFileName!!)
             if (foundry != "base") {
@@ -316,9 +375,8 @@
 
 }
 
+fun main(args: Array<String>) : Unit = exitProcess(CommandLine(KorapXml2Conllu()).execute(*args))
 
-fun main(args: Array<String?>?) {
-    System.setProperty("file.encoding", "UTF-8")
-    KorapXml2Conllu().main(args)
+fun debug(args: Array<String>): Int {
+    return(CommandLine(KorapXml2Conllu()).execute(*args))
 }
-
diff --git a/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KorapXml2ConlluTest.kt b/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KorapXml2ConlluTest.kt
index 8142f67..9ec3d5b 100644
--- a/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KorapXml2ConlluTest.kt
+++ b/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KorapXml2ConlluTest.kt
@@ -39,9 +39,8 @@
 
     @Test
     fun canConvertGOE() {
-        val classUnderTest = KorapXml2Conllu()
         val args = arrayOf(loadResource("goe.zip").path)
-        classUnderTest.main(args)
+        debug(args)
         assertContains(
             outContent.toString(),
             "# foundry = base"
@@ -53,9 +52,8 @@
     }
     @Test
     fun canConvertWithMorphoAnnotations() {
-        val classUnderTest = KorapXml2Conllu()
         val args = arrayOf(loadResource("goe.zip").path, loadResource("goe.tree_tagger.zip").path)
-        classUnderTest.main(args)
+        debug(args)
         assertContains(
             outContent.toString(),
             "# foundry = tree_tagger"
@@ -67,9 +65,8 @@
     }
     @Test
     fun canInferBaseName() {
-        val classUnderTest = KorapXml2Conllu()
         val args = arrayOf(goeTreeTagger)
-        classUnderTest.main(args)
+        debug(args)
         assertContains(
             outContent.toString(),
             "# foundry = tree_tagger"
@@ -82,10 +79,8 @@
 
     @Test
     fun canConvertWfdWithMorphoAnnotations() {
-        val classUnderTest = KorapXml2Conllu()
         val args = arrayOf(loadResource("wdf19.zip").path, loadResource("wdf19.tree_tagger.zip").path)
-        System.setOut(PrintStream(outContent))
-        classUnderTest.main(args)
+        debug(args)
         assertContains(
             outContent.toString(),
             "# foundry = tree_tagger"
@@ -96,11 +91,19 @@
         )
     }
 
+    @Test
+    fun canPrintHelp() {
+        debug(arrayOf("-h"))
+        assertContains(
+            outContent.toString(),
+            "--s-bounds-from-morpho"
+        )
+    }
+
     @Ignore("for some reason not working")
     fun canConvertMorphoFeatureAnnotations() {
-        val classUnderTest = KorapXml2Conllu()
         val args = arrayOf(goe, goeMarmot)
-        classUnderTest.main(args)
+        debug(args)
         assertContains(
             outContent.toString(),
             "9\tentzücke\tentzücken\t_\tVVFIN\tnumber=sg|person=3|tense=pres|mood=subj\t_\t_\t_\t1.000000"