Drop slow udpipe2 and use maltparser and marmot instead
diff --git a/Makefile b/Makefile
index 9198048..eb0aca7 100644
--- a/Makefile
+++ b/Makefile
@@ -7,7 +7,7 @@
.PHONY: all clean test krill index deploy server-log server-status
-.PRECIOUS: %.zip %.tree_tagger.zip %.ud.zip %.spacy.zip %.i5.xml %.tar
+.PRECIOUS: %.zip %.tree_tagger.zip %.ud.zip %.marmot-malt.zip %.spacy.zip %.i5.xml %.tar
.DELETE_ON_ERROR:
@@ -45,12 +45,23 @@
%.spacy.zip: %.zip
$(KORAPXML2CONLLU) $< | pv | docker run --rm -i korap/conllu2spacy | conllu2korapxml > $@
+models/de.marmot:
+ mkdir -p models
+ wget -O $@ https://cistern.cis.lmu.de/marmot/models/CURRENT/spmrl/de.marmot
+
+models/german.mco:
+ mkdir -p models
+ wget -O $@ https://corpora.ids-mannheim.de/tools/$@
+
+%.marmot-malt.zip: %.zip models/de.marmot models/german.mco
+ $(KORAPXML2CONLLU) -t marmot:models/de.marmot -P malt:models/german.mco $< | tee $(TARGET_DIR)/dnb.marmot-malt.conllu | conllu2korapxml > $@
+
%.ud.zip: %.zip
$(KORAPXML2CONLLU) $< | pv | ./scripts/udpipe2 | conllu2korapxml > $@
-%.krill.tar: %.zip %.ud.zip %.tree_tagger.zip %.spacy.zip
+%.krill.tar: %.zip %.marmot-malt.zip %.tree_tagger.zip %.spacy.zip
mkdir -p $(basename $@)
- korapxml2krill archive --quiet -w -z -cfg krill-korap4dnb.cfg --non-word-tokens --meta I5 -i $< -i $(word 2,$^) -i $(word 3,$^) -o $(basename $@)
+ korapxml2krill archive --quiet -w -z -cfg krill-korap4dnb.cfg --non-word-tokens --meta I5 -i $< -i $(word 2,$^) -i $(word 3,$^) -i $(word 4,$^) -o $(basename $@)
%.json: %.krill.tar
rm -rf $@
diff --git a/lib/korapxml2conllu.jar b/lib/korapxml2conllu.jar
index e8f666a..9a4f2d3 100644
--- a/lib/korapxml2conllu.jar
+++ b/lib/korapxml2conllu.jar
Binary files differ