Implement -c option
Change-Id: I6600ebcaa723734699180f35e782034d808d3e93
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
index cc36537..bdaf25b 100644
--- a/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxmltools/KorapXml2Conllu.kt
@@ -20,6 +20,7 @@
import java.util.zip.ZipFile
import javax.xml.parsers.DocumentBuilder
import javax.xml.parsers.DocumentBuilderFactory
+import kotlin.math.min
import kotlin.system.exitProcess
@Command(
@@ -61,7 +62,9 @@
var logLevel: String = "WARNING"
@Option(
- names = ["--columns", "-c"], paramLabel = "NUMBER", description = ["Not yet implemented: columns"]
+ names = ["--columns", "-c"],
+ paramLabel = "NUMBER",
+ description = ["Number of columns. 1 means just the token. Default: ${"$"}{DEFAULT-VALUE}", "Possible values: 1-10"]
)
var columns: Int = 10
@@ -81,7 +84,7 @@
@Option(names = ["--offsets"], description = ["Not yet implemented: offsets"])
var offsets: Boolean = false
- @Option(names = ["--comments"], description = ["Not yet implemented: comments"])
+ @Option(names = ["--comments", "-C"], description = ["Not yet implemented: comments"])
var comments: Boolean = false
@Option(
@@ -267,13 +270,14 @@
mfs.head!!,
mfs.deprel!!,
mfs.deps!!,
- mfs.misc!!
+ mfs.misc!!,
+ columns
)
)
} else {
output.append(
printConlluToken(
- token_index, texts[docId]!!.substring(span.from, span.to)
+ token_index, texts[docId]!!.substring(span.from, span.to), columns = columns
)
)
}
@@ -311,9 +315,15 @@
head: String = "_",
deprel: String = "_",
deps: String = "_",
- misc: String = "_"
+ misc: String = "_",
+ columns: Int = 10
): String {
- return ("$token_index\t$token\t$lemma\t$upos\t$xpos\t$feats\t$head\t$deprel\t$deps\t$misc\n")
+ when (columns) {
+ 1 -> return ("$token\n")
+ 10 -> return ("$token_index\t$token\t$lemma\t$upos\t$xpos\t$feats\t$head\t$deprel\t$deps\t$misc\n")
+ else -> return arrayOf(token_index, token, lemma, upos, xpos, feats, head, deprel, deps, misc).slice(0..min(columns, 10) - 1)
+ .joinToString("\t") + "\n"
+ }
}
private fun tokenOffsetsInSentence(
diff --git a/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KorapXml2ConlluTest.kt b/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KorapXml2ConlluTest.kt
index 3efbb6a..fe47dab 100644
--- a/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KorapXml2ConlluTest.kt
+++ b/app/src/test/kotlin/de/ids_mannheim/korapxmltools/KorapXml2ConlluTest.kt
@@ -113,6 +113,26 @@
}
@Test
+ fun respectsColumnsParam() {
+ val args = arrayOf("-c","5", loadResource("wdf19.zip").path)
+ debug(args)
+ assertContains(
+ outContent.toString(),
+ "42\tparfaitement\t_\t_\t_\n"
+ )
+ }
+
+ @Test
+ fun respectsSpecial1ColumnsParam() {
+ val args = arrayOf("-c","1", loadResource("wdf19.zip").path)
+ debug(args)
+ assertContains(
+ outContent.toString(),
+ "\nparfaitement\n"
+ )
+ }
+
+ @Test
fun w2vOptionWorks() {
val args = arrayOf("-w", loadResource("wdf19.zip").path)
debug(args)