Import command line options from perl implementation
mostly unused, for now
Change-Id: I32e88ddeb06aeb2062d98b65cbd80e80a5131003
diff --git a/app/build.gradle b/app/build.gradle
index 489dee5..dbce1b5 100644
--- a/app/build.gradle
+++ b/app/build.gradle
@@ -14,6 +14,7 @@
// Apply the application plugin to add support for building a CLI application in Java.
id 'application'
id 'com.github.johnrengelman.shadow' version '8.1.1'
+ id ("org.jetbrains.kotlin.kapt") version "2.0.0-Beta4"
}
@@ -32,6 +33,10 @@
// This dependency is used by the application.
implementation 'com.google.guava:guava:33.0.0-jre'
+
+ kapt("info.picocli:picocli-codegen:4.7.5")
+ implementation ("info.picocli:picocli:4.7.5")
+
// Use the Kotlin test library.
testImplementation 'org.jetbrains.kotlin:kotlin-test'
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
index b5b47b7..32bc9c0 100644
--- a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
@@ -13,15 +13,74 @@
import org.w3c.dom.Element
import org.w3c.dom.NodeList
import org.xml.sax.InputSource
+import picocli.CommandLine
+import picocli.CommandLine.Parameters
+import picocli.CommandLine.Option
+import picocli.CommandLine.Command
import java.io.File
import java.io.InputStreamReader
import java.util.HashMap
+import java.util.concurrent.Callable
import java.util.logging.Logger
+import kotlin.system.exitProcess
-class KorapXml2Conllu {
+@Command(
+ name = "KorapXml2Conllu",
+ mixinStandardHelpOptions = true,
+ version = ["KorapXml2Conllu 2.0-alpha-01"],
+ description = ["Converts KorAP XML files to CoNLL-U format"]
+)
+
+class KorapXml2Conllu : Callable<Int> {
+
+ @Parameters(arity = "1..*", description = ["At least one zip file name"])
+ var zipFileNames: Array<String>? = null
+
+ @Option(names = ["--sigle-pattern", "-p"], paramLabel = "PATTERN",
+ description = ["Not yet implemented: sigle pattern"])
+ var siglePattern: String = ""
+
+ @Option(names = ["--extract-attributes-regex", "-e"], paramLabel = "REGEX",
+ description = ["Not yet implemented: extract attributes regex"])
+ var extractAttributesRegex: String = ""
+
+ @Option(names = ["--s-bounds-from-morpho"],
+ description = ["Not yet implemented: s bounds from morpho"])
+ var sBoundsFromMorpho: Boolean = false
+
+ @Option(names = ["--log", "-l"], paramLabel = "LEVEL",
+ description = ["Not yet implemented: log level"])
+ var logLevel: String = "warn"
+
+ @Option(names = ["--columns", "-c"], paramLabel = "NUMBER",
+ description = ["Not yet implemented: columns"])
+ var columns: Int = 10
+
+ @Option(names = ["--word2vec", "-w"], description = ["Not yet implemented: word2vec"])
+ var lmTrainingData: Boolean = false
+
+ @Option(names = ["--token-separator", "-s"], paramLabel = "SEPARATOR",
+ description = ["Not yet implemented: token separator"])
+ var tokenSeparator: String = "\n"
+
+ @Option(names = ["--offsets"], description = ["Not yet implemented: offsets"])
+ var offsets: Boolean = false
+
+ @Option(names = ["--comments"], description = ["Not yet implemented: comments"])
+ var comments: Boolean = false
+
+ @Option(names = ["--extract-metadata-regex", "-m"], paramLabel = "REGEX",
+ description = ["Not yet implemented: extract metadata regex"])
+ var extractMetadataRegex: MutableList<String> = mutableListOf()
+
+ override fun call(): Int {
+ LOGGER.info("Processing zip files: " + zipFileNames!!.joinToString(", "))
+ korapxml2conllu(zipFileNames!!)// Your application logic here
+ return 0
+ }
private val LOGGER: Logger = Logger.getLogger(KorapXml2Conllu::class.java.name)
- fun main(args: Array<String?>?) {
+ fun korapxml2conllu(args: Array<String>) {
val executor: ExecutorService = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors())
val texts: ConcurrentHashMap<String, String> = ConcurrentHashMap()
val sentences: ConcurrentHashMap<String, Array<Span>> = ConcurrentHashMap()
@@ -33,7 +92,7 @@
LOGGER.severe("Usage: KorapXml2Conllu <zipfile1> [<zipfile2> ...]")
return
}
- var zips:Array<String?> = args
+ var zips:Array<String> = args
if (args.size == 1 && args[0]!!.matches(Regex(".*\\.([^/.]+)\\.zip$")) == true) {
val baseZip = args[0]!!.replace(Regex("\\.([^/.]+)\\.zip$"), ".zip")
if (File(baseZip).exists()) {
@@ -71,7 +130,7 @@
return zipFileName.replace(Regex(".*\\.([^/.]+)\\.zip$"), "$1")
}
- private fun getFoundryFromZipFileNames(zipFileNames: Array<String?>): String {
+ private fun getFoundryFromZipFileNames(zipFileNames: Array<String>): String {
for (zipFileName in zipFileNames) {
val foundry = getFoundryFromZipFileName(zipFileName!!)
if (foundry != "base") {
@@ -316,9 +375,8 @@
}
+fun main(args: Array<String>) : Unit = exitProcess(CommandLine(KorapXml2Conllu()).execute(*args))
-fun main(args: Array<String?>?) {
- System.setProperty("file.encoding", "UTF-8")
- KorapXml2Conllu().main(args)
+fun debug(args: Array<String>): Int {
+ return(CommandLine(KorapXml2Conllu()).execute(*args))
}
-
diff --git a/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KorapXml2ConlluTest.kt b/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KorapXml2ConlluTest.kt
index 8142f67..9ec3d5b 100644
--- a/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KorapXml2ConlluTest.kt
+++ b/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KorapXml2ConlluTest.kt
@@ -39,9 +39,8 @@
@Test
fun canConvertGOE() {
- val classUnderTest = KorapXml2Conllu()
val args = arrayOf(loadResource("goe.zip").path)
- classUnderTest.main(args)
+ debug(args)
assertContains(
outContent.toString(),
"# foundry = base"
@@ -53,9 +52,8 @@
}
@Test
fun canConvertWithMorphoAnnotations() {
- val classUnderTest = KorapXml2Conllu()
val args = arrayOf(loadResource("goe.zip").path, loadResource("goe.tree_tagger.zip").path)
- classUnderTest.main(args)
+ debug(args)
assertContains(
outContent.toString(),
"# foundry = tree_tagger"
@@ -67,9 +65,8 @@
}
@Test
fun canInferBaseName() {
- val classUnderTest = KorapXml2Conllu()
val args = arrayOf(goeTreeTagger)
- classUnderTest.main(args)
+ debug(args)
assertContains(
outContent.toString(),
"# foundry = tree_tagger"
@@ -82,10 +79,8 @@
@Test
fun canConvertWfdWithMorphoAnnotations() {
- val classUnderTest = KorapXml2Conllu()
val args = arrayOf(loadResource("wdf19.zip").path, loadResource("wdf19.tree_tagger.zip").path)
- System.setOut(PrintStream(outContent))
- classUnderTest.main(args)
+ debug(args)
assertContains(
outContent.toString(),
"# foundry = tree_tagger"
@@ -96,11 +91,19 @@
)
}
+ @Test
+ fun canPrintHelp() {
+ debug(arrayOf("-h"))
+ assertContains(
+ outContent.toString(),
+ "--s-bounds-from-morpho"
+ )
+ }
+
@Ignore("for some reason not working")
fun canConvertMorphoFeatureAnnotations() {
- val classUnderTest = KorapXml2Conllu()
val args = arrayOf(goe, goeMarmot)
- classUnderTest.main(args)
+ debug(args)
assertContains(
outContent.toString(),
"9\tentzücke\tentzücken\t_\tVVFIN\tnumber=sg|person=3|tense=pres|mood=subj\t_\t_\t_\t1.000000"