Add gender annotations to pipeline
Change-Id: Ib3544a83915038e8a917a366663701729f84f21b
diff --git a/Makefile b/Makefile
index bb4ec13..02da6d4 100644
--- a/Makefile
+++ b/Makefile
@@ -29,7 +29,7 @@
.PHONY: all clean test i5 i5valid krill malt index deploy show-server-log show-server-status
-.PRECIOUS: $(TARGET_DIR)/%.i5.xml $(TARGET_DIR)/dnb%.pre.i5.xml %.zip %.tree_tagger.zip %.ud.zip %.marmot-malt.zip %.spacy.zip %.corenlp.zip %.i5.xml %.tar
+.PRECIOUS: $(TARGET_DIR)/%.i5.xml $(TARGET_DIR)/dnb%.pre.i5.xml %.zip %.tree_tagger.zip %.ud.zip %.marmot-malt.zip %.spacy.zip %.corenlp.zip %.gender.zip %.i5.xml %.tar
all: index
@@ -86,7 +86,7 @@
%.tree_tagger.zip: %.zip
$(KORAPXMLTOOL) -T treetagger -t zip -f -D $(TARGET_DIR) $<
-%.gender.zip: %.zip bin/conllu-gender
+%.gender.zip: %.zip | bin/conllu-gender
$(KORAPXMLTOOL) -j 1 -A "bin/conllu-gender -s" -l WARNING -F gender -t zip --force -D $(TARGET_DIR) $<
%.spacy.zip: %.zip | bin/korapxmltool
@@ -112,6 +112,11 @@
mkdir -p models
curl -sL -o $@ https://corpora.ids-mannheim.de/tools/$@
+bin/conllu-gender:
+ mkdir -p bin
+ curl -sL -o $@ https://corpora.ids-mannheim.de/tools/$@
+ chmod +x $@
+
%.marmot-malt.zip: %.zip models/de.marmot models/german.mco
$(MARMOTMALTOOL) -T marmot -P malt -t zip -f -D $(TARGET_DIR) $<
@@ -123,11 +128,11 @@
%.ud.zip: %.zip
$(KORAPXMLTOOL) $< | pv | ./scripts/udpipe2 | conllu2korapxml > $@
-%.krill.tar: %.zip %.marmot-malt.zip %.tree_tagger.zip %.spacy.zip %.corenlp.zip
- mkdir -p ${BUILD_DIR}/krill/$(basename $@)
- mkdir -p $(basename $@)
- #K2K_PUBLISHER_STRING=1 K2K_TRANSLATOR_TEXT=1 korapxml2krill archive -w -z -cfg krill-korap4dnb.cfg -c ${BUILD_DIR}/krill/$(basename $@)/korapxml2krill.cache -j 30 -te ${BUILD_DIR}/krill/$(basename $@) --non-word-tokens --meta I5 -i $< -i $(word 2,$^) -i $(word 3,$^) -i $(word 4,$^) -o $(basename $@)
- K2K_PUBLISHER_STRING=1 K2K_TRANSLATOR_TEXT=1 $(KORAPXMLTOOL) --non-word-tokens -linfo -f -t krill -D $(TARGET_DIR) $(basename $<)*.zip
+%.krill.tar: %.zip %.marmot-malt.zip %.tree_tagger.zip %.spacy.zip %.corenlp.zip %.gender.zip
+ # mkdir -p ${BUILD_DIR}/krill/$(basename $@)
+ # mkdir -p $(basename $@)
+ # K2K_PUBLISHER_STRING=1 K2K_TRANSLATOR_TEXT=1 korapxml2krill archive -w -z -cfg krill-korap4dnb.cfg -c ${BUILD_DIR}/krill/$(basename $@)/korapxml2krill.cache -j 30 -te ${BUILD_DIR}/krill/$(basename $@) --non-word-tokens --meta I5 -i $< -i $(word 2,$^) -i $(word 3,$^) -i $(word 4,$^) -o $(basename $@)
+ K2K_PUBLISHER_STRING=1 K2K_TRANSLATOR_TEXT=1 $(KORAPXMLTOOL) --non-word-tokens -linfo -f -t krill -D $(TARGET_DIR) $^
$(SLACK) "$(basename $@) krill archive created"
%.json: %.krill.tar