Initial import

Change-Id: Ifd0a927bc5c9fea0e675ccd730e11d501632eebb
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..00a51af
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,6 @@
+#
+# https://help.github.com/articles/dealing-with-line-endings/
+#
+# These are explicitly windows files and should use crlf
+*.bat           text eol=crlf
+
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..1b6985c
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+# Ignore Gradle project-specific cache directory
+.gradle
+
+# Ignore Gradle build output directory
+build
diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..26d3352
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,3 @@
+# Default ignored files
+/shelf/
+/workspace.xml
diff --git a/.idea/compiler.xml b/.idea/compiler.xml
new file mode 100644
index 0000000..b589d56
--- /dev/null
+++ b/.idea/compiler.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="CompilerConfiguration">
+    <bytecodeTargetLevel target="17" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/gradle.xml b/.idea/gradle.xml
new file mode 100644
index 0000000..efd7cbf
--- /dev/null
+++ b/.idea/gradle.xml
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="GradleSettings">
+    <option name="linkedExternalProjectsSettings">
+      <GradleProjectSettings>
+        <option name="externalProjectPath" value="$PROJECT_DIR$" />
+        <option name="modules">
+          <set>
+            <option value="$PROJECT_DIR$" />
+            <option value="$PROJECT_DIR$/app" />
+          </set>
+        </option>
+      </GradleProjectSettings>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/jarRepositories.xml b/.idea/jarRepositories.xml
new file mode 100644
index 0000000..fdc392f
--- /dev/null
+++ b/.idea/jarRepositories.xml
@@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="RemoteRepositoriesConfiguration">
+    <remote-repository>
+      <option name="id" value="central" />
+      <option name="name" value="Maven Central repository" />
+      <option name="url" value="https://repo1.maven.org/maven2" />
+    </remote-repository>
+    <remote-repository>
+      <option name="id" value="jboss.community" />
+      <option name="name" value="JBoss Community repository" />
+      <option name="url" value="https://repository.jboss.org/nexus/content/repositories/public/" />
+    </remote-repository>
+    <remote-repository>
+      <option name="id" value="MavenRepo" />
+      <option name="name" value="MavenRepo" />
+      <option name="url" value="https://repo.maven.apache.org/maven2/" />
+    </remote-repository>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/kotlinc.xml b/.idea/kotlinc.xml
new file mode 100644
index 0000000..8d81632
--- /dev/null
+++ b/.idea/kotlinc.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="KotlinJpsPluginSettings">
+    <option name="version" value="1.9.22" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..49504ef
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ExternalStorageConfigurationManager" enabled="true" />
+  <component name="ProjectRootManager" version="2" languageLevel="JDK_17" default="true" project-jdk-name="17" project-jdk-type="JavaSDK" />
+</project>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..288b36b
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$/.." vcs="Git" />
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/app/.idea/.gitignore b/app/.idea/.gitignore
new file mode 100644
index 0000000..26d3352
--- /dev/null
+++ b/app/.idea/.gitignore
@@ -0,0 +1,3 @@
+# Default ignored files
+/shelf/
+/workspace.xml
diff --git a/app/.idea/gradle.xml b/app/.idea/gradle.xml
new file mode 100644
index 0000000..038e045
--- /dev/null
+++ b/app/.idea/gradle.xml
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="GradleSettings">
+    <option name="linkedExternalProjectsSettings">
+      <GradleProjectSettings>
+        <option name="externalProjectPath" value="$PROJECT_DIR$" />
+        <option name="gradleJvm" value="17" />
+        <option name="modules">
+          <set>
+            <option value="$PROJECT_DIR$" />
+          </set>
+        </option>
+      </GradleProjectSettings>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/app/.idea/misc.xml b/app/.idea/misc.xml
new file mode 100644
index 0000000..6ed36dd
--- /dev/null
+++ b/app/.idea/misc.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ExternalStorageConfigurationManager" enabled="true" />
+</project>
\ No newline at end of file
diff --git a/app/.idea/vcs.xml b/app/.idea/vcs.xml
new file mode 100644
index 0000000..b2bdec2
--- /dev/null
+++ b/app/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$/../.." vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/app/build.gradle b/app/build.gradle
new file mode 100644
index 0000000..5d9953d
--- /dev/null
+++ b/app/build.gradle
@@ -0,0 +1,72 @@
+import org.jetbrains.kotlin.gradle.tasks.KotlinCompile
+
+/*
+ * This file was generated by the Gradle 'init' task.
+ *
+ * This generated file contains a sample Kotlin application project to get you started.
+ * For more details take a look at the 'Building Java & JVM projects' chapter in the Gradle
+ * User Manual available at https://docs.gradle.org/7.4.2/userguide/building_java_projects.html
+ */
+
+
+plugins {
+    // Apply the org.jetbrains.kotlin.jvm Plugin to add support for Kotlin.
+    id 'org.jetbrains.kotlin.jvm' version '1.9.22'
+
+    // Apply the application plugin to add support for building a CLI application in Java.
+    id 'application'
+    id 'com.github.johnrengelman.shadow' version '7.1.2'
+}
+
+
+repositories {
+    // Use Maven Central for resolving dependencies.
+    mavenCentral()
+}
+
+dependencies {
+    // Align versions of all Kotlin components
+    implementation platform('org.jetbrains.kotlin:kotlin-bom')
+
+    // Use the Kotlin JDK 8 standard library.
+    implementation 'org.jetbrains.kotlin:kotlin-stdlib'
+
+    // This dependency is used by the application.
+    implementation 'com.google.guava:guava:33.0.0-jre'
+
+    // Use the Kotlin test library.
+    testImplementation 'org.jetbrains.kotlin:kotlin-test'
+
+    // Use the Kotlin JUnit integration.
+    testImplementation 'org.jetbrains.kotlin:kotlin-test-junit'
+    testImplementation "org.jetbrains.kotlin:kotlin-test:1.9.22"
+}
+
+
+application {
+    // Define the main class for the application.
+    mainClass = 'de.ids_mannheim.korapxml2conllu.AppKt'
+}
+
+jar {
+    // Will include every single one of your dependencies, project or not
+    def lowerCasedName = baseName.toLowerCase()
+    def normalizedName = lowerCasedName.substring(0,1).toUpperCase() + lowerCasedName.substring(1)
+
+    manifest.attributes(
+            'Class-Path': configurations.compileClasspath.collect { it.getName() }.join(' '),
+            'Main-Class': "de.ids_mannheim.korapxml2conllu.AppKt"
+    )
+
+}
+
+
+configurations {
+    runtimeLib.extendsFrom implementation
+}
+
+tasks.withType(KotlinCompile).configureEach {
+    kotlinOptions {
+        jvmTarget = '17'
+    }
+}
\ No newline at end of file
diff --git a/app/src/main/kotlin/de/ids_mannheim/korapxml2conllu/App.kt b/app/src/main/kotlin/de/ids_mannheim/korapxml2conllu/App.kt
new file mode 100644
index 0000000..e49b01a
--- /dev/null
+++ b/app/src/main/kotlin/de/ids_mannheim/korapxml2conllu/App.kt
@@ -0,0 +1,184 @@
+package de.ids_mannheim.korapxml2conllu
+
+import javax.xml.parsers.DocumentBuilder
+import javax.xml.parsers.DocumentBuilderFactory
+import java.io.InputStream
+import java.util.Arrays
+import java.util.concurrent.ConcurrentHashMap
+import java.util.concurrent.ExecutorService
+import java.util.concurrent.Executors
+import java.util.stream.IntStream
+import java.util.zip.ZipFile
+import org.w3c.dom.Document
+import org.w3c.dom.Element
+import org.w3c.dom.NodeList
+import org.xml.sax.InputSource
+import java.io.InputStreamReader
+import java.util.logging.Logger
+
+class App {
+    private val LOGGER: Logger = Logger.getLogger(App::class.java.name)
+
+    fun main(args: Array<String?>?) {
+        val executor: ExecutorService = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors())
+        val texts: ConcurrentHashMap<String, String> = ConcurrentHashMap()
+        val sentences: ConcurrentHashMap<String, Array<Span>> = ConcurrentHashMap()
+        val tokens: ConcurrentHashMap<String, Array<Span>> = ConcurrentHashMap()
+
+        Arrays.stream(args).forEach { zipFilePath ->
+            executor.submit {
+                processZipFile(
+                    zipFilePath ?: "",
+                    texts,
+                    sentences,
+                    tokens
+                )
+            }
+        }
+
+        executor.shutdown()
+        while (!executor.isTerminated) {
+            // Wait for all tasks to finish
+        }
+
+        // Further processing as needed
+    }
+
+    private fun processZipFile(
+        zipFilePath: String,
+        texts: ConcurrentHashMap<String, String>,
+        sentences: ConcurrentHashMap<String, Array<Span>>,
+        tokens: ConcurrentHashMap<String, Array<Span>>
+    ) {
+        try {
+            ZipFile(zipFilePath).use { zipFile ->
+                zipFile.stream().parallel().forEach { zipEntry ->
+                    try {
+                        if (zipEntry.name.matches(Regex(".*(data|tokens|structure)\\.xml$"))) {
+                            val inputStream: InputStream = zipFile.getInputStream(zipEntry)
+                            val dbFactory: DocumentBuilderFactory = DocumentBuilderFactory.newInstance()
+                            val dBuilder: DocumentBuilder = dbFactory.newDocumentBuilder()
+                            val doc: Document = dBuilder.parse( InputSource( InputStreamReader(inputStream, "UTF-8")))
+
+                            doc.documentElement.normalize()
+                            val docId: String = doc.documentElement.getAttribute("docid")
+
+                            // LOGGER.info("Processing file: " + zipEntry.getName())
+                            val fileName =
+                                zipEntry.name.replace(Regex(".*?/((data|tokens|structure)\\.xml)$"), "$1")
+                            var token_index = 0
+                            var real_token_index = 0
+                            var sentence_index = 0
+                            var tokens_fname= ""
+                            when (fileName) {
+                                "data.xml" -> {
+                                    val textsList: NodeList = doc.getElementsByTagName("text")
+                                    if (textsList.length > 0) {
+                                        texts[docId] = textsList.item(0).textContent
+                                    }
+                                }
+
+                                "structure.xml" -> {
+                                    val spans: NodeList = doc.getElementsByTagName("span")
+                                    val sentenceSpans =
+                                        extractSentenceSpans(spans)
+                                    sentences[docId] = sentenceSpans
+                                }
+
+                                "tokens.xml" -> {
+                                    tokens_fname = zipEntry.name
+                                    val tokenSpans: NodeList = doc.getElementsByTagName("span")
+                                    val tokenSpanObjects =
+                                        extractSpans(tokenSpans)
+                                    tokens[docId] = tokenSpanObjects
+                                }
+                            }
+                            if (texts[docId] != null && sentences[docId] != null && tokens[docId] != null) {
+                                synchronized(System.out) {
+                                    println("# foundry = base")
+                                    println("# filename = $tokens_fname")
+                                    printTokenOffsetsInSentence(sentences, docId, sentence_index, real_token_index, tokens)
+                                    tokens[docId]?.forEach { span ->
+                                        token_index++
+                                        if (span.from >= sentences[docId]!![sentence_index].to) {
+                                            println()
+                                            sentence_index++
+                                            token_index = 1
+                                            printTokenOffsetsInSentence(sentences, docId, sentence_index, real_token_index, tokens)
+                                        }
+                                        println("$token_index\t${span.from}\t${span.to}\t${sentences[docId]!![sentence_index].to}\t" + (texts[docId]?.substring(span.from, span.to) ?: ""))
+                                        real_token_index++
+
+                                    }
+                                }
+
+                            }
+                        }
+                    } catch (e: Exception) {
+                        e.printStackTrace()
+                    }
+                }
+            }
+        } catch (e: Exception) {
+            e.printStackTrace()
+        }
+    }
+
+    private fun printTokenOffsetsInSentence(
+        sentences: ConcurrentHashMap<String, Array<Span>>,
+        docId: String,
+        sentence_index: Int,
+        token_index: Int,
+        tokens: ConcurrentHashMap<String, Array<Span>>
+    ) {
+        val sentenceEndOffset = sentences[docId]!![sentence_index].to
+        var i = token_index
+        var start_offsets_string = ""
+        var end_offsets_string = ""
+        while (i < tokens[docId]!!.size && tokens[docId]!![i].to < sentenceEndOffset) {
+            start_offsets_string += " " + tokens[docId]!![i].from
+            end_offsets_string += " " + tokens[docId]!![i].to
+            i++
+        }
+        println("# start_offsets = " + tokens[docId]!![token_index].from + start_offsets_string)
+        println("# end_offsets = " + sentenceEndOffset + end_offsets_string)
+    }
+
+    private fun extractSpans(spans: NodeList): Array<Span> {
+        return IntStream.range(0, spans.length)
+            .mapToObj(spans::item)
+            .filter { node -> node is Element }
+            .map { node ->
+                Span(
+                    Integer.parseInt((node as Element).getAttribute("from")),
+                    Integer.parseInt(node.getAttribute("to"))
+                )
+            }
+            .toArray { size -> arrayOfNulls(size) }
+    }
+
+    private fun extractSentenceSpans(spans: NodeList): Array<Span> {
+        return IntStream.range(0, spans.length)
+            .mapToObj(spans::item)
+            .filter { node -> node is Element && node.getElementsByTagName("f").item(0).textContent.equals("s") }
+            .map { node ->
+                Span(
+                    Integer.parseInt((node as Element).getAttribute("from")),
+                    Integer.parseInt(node.getAttribute("to"))
+                )
+            }
+            .toArray { size -> arrayOfNulls(size) }
+    }
+
+
+    internal class Span(var from: Int, var to: Int)
+
+
+}
+
+
+fun main(args: Array<String?>?) {
+    System.setProperty("file.encoding", "UTF-8")
+    println(App().main(args))
+}
+
diff --git a/app/src/test/kotlin/de/ids_mannheim/korapxml2conllu/AppTest.kt b/app/src/test/kotlin/de/ids_mannheim/korapxml2conllu/AppTest.kt
new file mode 100644
index 0000000..2fe90b3
--- /dev/null
+++ b/app/src/test/kotlin/de/ids_mannheim/korapxml2conllu/AppTest.kt
@@ -0,0 +1,19 @@
+package de.ids_mannheim.korapxml2conllu
+
+import java.net.URL
+import kotlin.test.Test
+import kotlin.test.assertNotNull
+
+class AppTest {
+    fun loadResource(path: String): URL {
+        val resource = Thread.currentThread().contextClassLoader.getResource(path)
+        requireNotNull(resource) { "Resource $path not found" }
+        return resource
+    }
+
+    @Test fun appHasAGreeting() {
+        val classUnderTest = App()
+        val args = arrayOf(loadResource("goe.zip").path)
+        assertNotNull(classUnderTest.main(args), "app should have a greeting")
+    }
+}
diff --git a/app/src/test/resources/goe.zip b/app/src/test/resources/goe.zip
new file mode 100644
index 0000000..db44e94
--- /dev/null
+++ b/app/src/test/resources/goe.zip
Binary files differ
diff --git a/build.gradle b/build.gradle
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/build.gradle
diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar
new file mode 100644
index 0000000..41d9927
--- /dev/null
+++ b/gradle/wrapper/gradle-wrapper.jar
Binary files differ
diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties
new file mode 100644
index 0000000..aa991fc
--- /dev/null
+++ b/gradle/wrapper/gradle-wrapper.properties
@@ -0,0 +1,5 @@
+distributionBase=GRADLE_USER_HOME
+distributionPath=wrapper/dists
+distributionUrl=https\://services.gradle.org/distributions/gradle-7.4.2-bin.zip
+zipStoreBase=GRADLE_USER_HOME
+zipStorePath=wrapper/dists
diff --git a/gradlew b/gradlew
new file mode 100755
index 0000000..1b6c787
--- /dev/null
+++ b/gradlew
@@ -0,0 +1,234 @@
+#!/bin/sh
+
+#
+# Copyright © 2015-2021 the original authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+##############################################################################
+#
+#   Gradle start up script for POSIX generated by Gradle.
+#
+#   Important for running:
+#
+#   (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
+#       noncompliant, but you have some other compliant shell such as ksh or
+#       bash, then to run this script, type that shell name before the whole
+#       command line, like:
+#
+#           ksh Gradle
+#
+#       Busybox and similar reduced shells will NOT work, because this script
+#       requires all of these POSIX shell features:
+#         * functions;
+#         * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
+#           «${var#prefix}», «${var%suffix}», and «$( cmd )»;
+#         * compound commands having a testable exit status, especially «case»;
+#         * various built-in commands including «command», «set», and «ulimit».
+#
+#   Important for patching:
+#
+#   (2) This script targets any POSIX shell, so it avoids extensions provided
+#       by Bash, Ksh, etc; in particular arrays are avoided.
+#
+#       The "traditional" practice of packing multiple parameters into a
+#       space-separated string is a well documented source of bugs and security
+#       problems, so this is (mostly) avoided, by progressively accumulating
+#       options in "$@", and eventually passing that to Java.
+#
+#       Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
+#       and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
+#       see the in-line comments for details.
+#
+#       There are tweaks for specific operating systems such as AIX, CygWin,
+#       Darwin, MinGW, and NonStop.
+#
+#   (3) This script is generated from the Groovy template
+#       https://github.com/gradle/gradle/blob/master/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
+#       within the Gradle project.
+#
+#       You can find Gradle at https://github.com/gradle/gradle/.
+#
+##############################################################################
+
+# Attempt to set APP_HOME
+
+# Resolve links: $0 may be a link
+app_path=$0
+
+# Need this for daisy-chained symlinks.
+while
+    APP_HOME=${app_path%"${app_path##*/}"}  # leaves a trailing /; empty if no leading path
+    [ -h "$app_path" ]
+do
+    ls=$( ls -ld "$app_path" )
+    link=${ls#*' -> '}
+    case $link in             #(
+      /*)   app_path=$link ;; #(
+      *)    app_path=$APP_HOME$link ;;
+    esac
+done
+
+APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit
+
+APP_NAME="Gradle"
+APP_BASE_NAME=${0##*/}
+
+# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
+
+# Use the maximum available, or set MAX_FD != -1 to use that value.
+MAX_FD=maximum
+
+warn () {
+    echo "$*"
+} >&2
+
+die () {
+    echo
+    echo "$*"
+    echo
+    exit 1
+} >&2
+
+# OS specific support (must be 'true' or 'false').
+cygwin=false
+msys=false
+darwin=false
+nonstop=false
+case "$( uname )" in                #(
+  CYGWIN* )         cygwin=true  ;; #(
+  Darwin* )         darwin=true  ;; #(
+  MSYS* | MINGW* )  msys=true    ;; #(
+  NONSTOP* )        nonstop=true ;;
+esac
+
+CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
+
+
+# Determine the Java command to use to start the JVM.
+if [ -n "$JAVA_HOME" ] ; then
+    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+        # IBM's JDK on AIX uses strange locations for the executables
+        JAVACMD=$JAVA_HOME/jre/sh/java
+    else
+        JAVACMD=$JAVA_HOME/bin/java
+    fi
+    if [ ! -x "$JAVACMD" ] ; then
+        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+    fi
+else
+    JAVACMD=java
+    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+fi
+
+# Increase the maximum file descriptors if we can.
+if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
+    case $MAX_FD in #(
+      max*)
+        MAX_FD=$( ulimit -H -n ) ||
+            warn "Could not query maximum file descriptor limit"
+    esac
+    case $MAX_FD in  #(
+      '' | soft) :;; #(
+      *)
+        ulimit -n "$MAX_FD" ||
+            warn "Could not set maximum file descriptor limit to $MAX_FD"
+    esac
+fi
+
+# Collect all arguments for the java command, stacking in reverse order:
+#   * args from the command line
+#   * the main class name
+#   * -classpath
+#   * -D...appname settings
+#   * --module-path (only if needed)
+#   * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
+
+# For Cygwin or MSYS, switch paths to Windows format before running java
+if "$cygwin" || "$msys" ; then
+    APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
+    CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
+
+    JAVACMD=$( cygpath --unix "$JAVACMD" )
+
+    # Now convert the arguments - kludge to limit ourselves to /bin/sh
+    for arg do
+        if
+            case $arg in                                #(
+              -*)   false ;;                            # don't mess with options #(
+              /?*)  t=${arg#/} t=/${t%%/*}              # looks like a POSIX filepath
+                    [ -e "$t" ] ;;                      #(
+              *)    false ;;
+            esac
+        then
+            arg=$( cygpath --path --ignore --mixed "$arg" )
+        fi
+        # Roll the args list around exactly as many times as the number of
+        # args, so each arg winds up back in the position where it started, but
+        # possibly modified.
+        #
+        # NB: a `for` loop captures its iteration list before it begins, so
+        # changing the positional parameters here affects neither the number of
+        # iterations, nor the values presented in `arg`.
+        shift                   # remove old arg
+        set -- "$@" "$arg"      # push replacement arg
+    done
+fi
+
+# Collect all arguments for the java command;
+#   * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of
+#     shell script including quotes and variable substitutions, so put them in
+#     double quotes to make sure that they get re-expanded; and
+#   * put everything else in single quotes, so that it's not re-expanded.
+
+set -- \
+        "-Dorg.gradle.appname=$APP_BASE_NAME" \
+        -classpath "$CLASSPATH" \
+        org.gradle.wrapper.GradleWrapperMain \
+        "$@"
+
+# Use "xargs" to parse quoted args.
+#
+# With -n1 it outputs one arg per line, with the quotes and backslashes removed.
+#
+# In Bash we could simply go:
+#
+#   readarray ARGS < <( xargs -n1 <<<"$var" ) &&
+#   set -- "${ARGS[@]}" "$@"
+#
+# but POSIX shell has neither arrays nor command substitution, so instead we
+# post-process each arg (as a line of input to sed) to backslash-escape any
+# character that might be a shell metacharacter, then use eval to reverse
+# that process (while maintaining the separation between arguments), and wrap
+# the whole thing up as a single "set" statement.
+#
+# This will of course break if any of these variables contains a newline or
+# an unmatched quote.
+#
+
+eval "set -- $(
+        printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
+        xargs -n1 |
+        sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
+        tr '\n' ' '
+    )" '"$@"'
+
+exec "$JAVACMD" "$@"
diff --git a/gradlew.bat b/gradlew.bat
new file mode 100644
index 0000000..107acd3
--- /dev/null
+++ b/gradlew.bat
@@ -0,0 +1,89 @@
+@rem
+@rem Copyright 2015 the original author or authors.
+@rem
+@rem Licensed under the Apache License, Version 2.0 (the "License");
+@rem you may not use this file except in compliance with the License.
+@rem You may obtain a copy of the License at
+@rem
+@rem      https://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+@rem
+
+@if "%DEBUG%" == "" @echo off
+@rem ##########################################################################
+@rem
+@rem  Gradle startup script for Windows
+@rem
+@rem ##########################################################################
+
+@rem Set local scope for the variables with windows NT shell
+if "%OS%"=="Windows_NT" setlocal
+
+set DIRNAME=%~dp0
+if "%DIRNAME%" == "" set DIRNAME=.
+set APP_BASE_NAME=%~n0
+set APP_HOME=%DIRNAME%
+
+@rem Resolve any "." and ".." in APP_HOME to make it shorter.
+for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
+
+@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
+
+@rem Find java.exe
+if defined JAVA_HOME goto findJavaFromJavaHome
+
+set JAVA_EXE=java.exe
+%JAVA_EXE% -version >NUL 2>&1
+if "%ERRORLEVEL%" == "0" goto execute
+
+echo.
+echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:findJavaFromJavaHome
+set JAVA_HOME=%JAVA_HOME:"=%
+set JAVA_EXE=%JAVA_HOME%/bin/java.exe
+
+if exist "%JAVA_EXE%" goto execute
+
+echo.
+echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:execute
+@rem Setup the command line
+
+set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
+
+
+@rem Execute Gradle
+"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
+
+:end
+@rem End local scope for the variables with windows NT shell
+if "%ERRORLEVEL%"=="0" goto mainEnd
+
+:fail
+rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
+rem the _cmd.exe /c_ return code!
+if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
+exit /b 1
+
+:mainEnd
+if "%OS%"=="Windows_NT" endlocal
+
+:omega
diff --git a/settings.gradle b/settings.gradle
new file mode 100644
index 0000000..98db4da
--- /dev/null
+++ b/settings.gradle
@@ -0,0 +1,11 @@
+/*
+ * This file was generated by the Gradle 'init' task.
+ *
+ * The settings file is used to specify which projects to include in your build.
+ *
+ * Detailed information about configuring a multi-project build in Gradle can be found
+ * in the user manual at https://docs.gradle.org/7.4.2/userguide/multi_project_builds.html
+ */
+
+rootProject.name = 'korapxml2conllu'
+include('app')