Update to modern korapxmltool

Change-Id: I1fbc2135ac1eac212e076b2da6e2aa581623e379
diff --git a/Makefile b/Makefile
index 996b372..8ce1cab 100644
--- a/Makefile
+++ b/Makefile
@@ -16,9 +16,12 @@
 MAKE ?= make -j $(shell nproc)
 SLACK ?= slack
 KORAPXMLTOOL_HEAP ?= $(shell echo "$$(($(MAX_THREADS) * 2500))")
-KORAPXMLTOOL ?= java -Xmx$(KORAPXMLTOOL_HEAP)m -jar lib/korapxmltool.jar
-MARMOTMALTOOL ?= java -Xmx96000m -jar lib/korapxmltool.jar
-SPACYXMLTOOL ?= java -Xmx250g -jar lib/korapxmltool.jar
+KORAPXMLTOOL ?= bin/korapxmltool
+MARMOTMALTOOL ?= bin/korapxmltool
+SPACYXMLTOOL ?= bin/korapxmltool
+KORAPXMLTOOL_MODELS_PATH ?= models
+export KORAPXMLTOOL_MODELS_PATH
+
 SAXON ?= java -Djava.util.logging.config.file=/logging.properties -cp lib/saxon-ee-12.5.jar:lib/xmlresolver-5.2.2.jar:lib/textclassifier.jar:lib/xmlresolver-5.2.2-data.jar net.sf.saxon.Transform -expand:off -catalog:"lib/dtds/xhtml11/xhtmlcatalog.xml;lib/dtds/xhtml/dtd/xhtmlcatalog.xml"
 
 .DELETE_ON_ERROR:
@@ -26,7 +29,7 @@
 .PHONY: all clean test i5 i5valid krill malt index deploy show-server-log show-server-status
 
 
-.PRECIOUS: $(TARGET_DIR)/%.i5.xml $(TARGET_DIR)/dnb%.pre.i5.xml %.zip %.tree_tagger.zip %.ud.zip %.marmot-malt.zip %.spacy.zip %.i5.xml %.tar
+.PRECIOUS: $(TARGET_DIR)/%.i5.xml $(TARGET_DIR)/dnb%.pre.i5.xml %.zip %.tree_tagger.zip %.ud.zip %.marmot-malt.zip %.spacy.zip %.corenlp.zip %.i5.xml %.tar
 
 all: index
 
@@ -84,11 +87,11 @@
 
 
 %.tree_tagger.zip: %.zip
-	$(KORAPXMLTOOL) -T 1 -A "docker run --rm -i korap/conllu2treetagger -l german" -f zip --overwrite $<
+	$(KORAPXMLTOOL) -A "docker run -v ./models/:/local/models--rm -i korap/conllu-treetagger -l german -p" -t zip -f -D $(TARGET_DIR) $<
 #	 $(KORAPXMLTOOL) $< | pv | docker run --rm -i korap/conllu2treetagger -l german | conllu2korapxml > $@
 
 %.spacy.zip: %.zip
-	$(SPACYXMLTOOL) -T 8 -A "docker run -e SPACY_USE_DEPENDENCIES=True --rm -i korap/conllu2spacy:latest" -f zip --overwrite $<
+	$(SPACYXMLTOOL) -A "docker run -e SPACY_USE_DEPENDENCIES=True --rm -i korap/conllu2spacy:latest" -t zip -f -D $(TARGET_DIR) $<
 
 models/de.marmot:
 	mkdir -p models
@@ -102,18 +105,30 @@
 	mkdir -p models
 	curl -sL -o $@ https://corpora.ids-mannheim.de/tools/$@
 
+models/german-fast.tagger:
+	mkdir -p models
+	curl -sL -o $@ https://corpora.ids-mannheim.de/tools/$@
+
+models/germanSR.ser.gz:
+	mkdir -p models
+	curl -sL -o $@ https://corpora.ids-mannheim.de/tools/$@
+
 %.marmot-malt.zip: %.zip models/de.marmot models/german.mco
-	$(MARMOTMALTOOL) -T $(MAX_THREADS) -t marmot:models/de.marmot -P malt:models/german.mco -f zip --overwrite $<
+	$(MARMOTMALTOOL) -T marmot -P malt -t zip -f -D $(TARGET_DIR) $<
+
+%.corenlp.zip: %.zip models/de.marmot models/german.mco
+	$(MARMOTMALTOOL) -T corenlp -P corenlp -t zip -f -D $(TARGET_DIR) $<
 
 malt: $(foreach year,$(YEARS),$(TARGET_DIR)/dnb$(year).marmot-malt.zip)
 
 %.ud.zip: %.zip
 	$(KORAPXMLTOOL) $< | pv | ./scripts/udpipe2 | conllu2korapxml > $@
 
-%.krill.tar: %.zip %.marmot-malt.zip %.tree_tagger.zip
+%.krill.tar: %.zip %.marmot-malt.zip %.tree_tagger.zip %.spacy.zip %.corenlp.zip
 	mkdir -p ${BUILD_DIR}/krill/$(basename $@)
 	mkdir -p $(basename $@)
-	K2K_PUBLISHER_STRING=1 K2K_TRANSLATOR_TEXT=1 korapxml2krill archive --quiet -w -z -cfg krill-korap4dnb.cfg -c ${BUILD_DIR}/krill/$(basename $@)/korapxml2krill.cache -j 30 -te ${BUILD_DIR}/krill/$(basename $@) --non-word-tokens --meta I5 -i $< -i $(word 2,$^) -i $(word 3,$^) -o $(basename $@)
+	#K2K_PUBLISHER_STRING=1 K2K_TRANSLATOR_TEXT=1 korapxml2krill archive -w -z -cfg krill-korap4dnb.cfg -c ${BUILD_DIR}/krill/$(basename $@)/korapxml2krill.cache -j 30 -te ${BUILD_DIR}/krill/$(basename $@) --non-word-tokens --meta I5 -i $< -i $(word 2,$^) -i $(word 3,$^) -i $(word 4,$^) -o $(basename $@)
+	K2K_PUBLISHER_STRING=1 K2K_TRANSLATOR_TEXT=1 $(KORAPXMLTOOL) --non-word-tokens -linfo -f -t krill -D $(TARGET_DIR) $(basename $<)*.zip
 	$(SLACK) "$(basename $@) krill archive created"
 
 %.json: %.krill.tar
@@ -144,4 +159,3 @@
 $(TARGET_DIR)/dnb.index: $(foreach year,$(YEARS),$(TARGET_DIR)/dnb$(year).krill.tar)
 	rm -rf $@
 	java -jar lib/Krill-Indexer.jar --progress -c lib/krill.conf -i $(subst " ",;,$^) -o $@
-
diff --git a/bin/korapxmltool b/bin/korapxmltool
new file mode 100755
index 0000000..3d168fa
--- /dev/null
+++ b/bin/korapxmltool
Binary files differ
diff --git a/lib/korapxmltool-2.20.jar b/lib/korapxmltool-2.20.jar
deleted file mode 100644
index 51e39a7..0000000
--- a/lib/korapxmltool-2.20.jar
+++ /dev/null
Binary files differ
diff --git a/lib/korapxmltool.jar b/lib/korapxmltool.jar
deleted file mode 120000
index b3d2709..0000000
--- a/lib/korapxmltool.jar
+++ /dev/null
@@ -1 +0,0 @@
-korapxmltool-2.20.jar
\ No newline at end of file