Bump korapxmltool to v2.10 and update spaCy pipeline
diff --git a/Buchpreis/LongList b/Buchpreis/LongList
index 8e6a65f..0f38056 120000
--- a/Buchpreis/LongList
+++ b/Buchpreis/LongList
@@ -1 +1 @@
-/mnt/data/LongList/E-Pub
\ No newline at end of file
+/mnt/data/KorAP@DNB/LongList/E-Pub
\ No newline at end of file
diff --git a/Makefile b/Makefile
index d6ffca4..a851556 100644
--- a/Makefile
+++ b/Makefile
@@ -12,11 +12,12 @@
DEPLOY_HOST ?= compute.ids-mannheim.de
DEPLOY_USER ?= korap
DEPLOY_PATH ?= /export/netapp/korap4dnb
-MAX_THREADS ?= 4 # $(shell nproc)
+MAX_THREADS ?= 8 # $(shell nproc)
MAKE ?= make -j $(shell nproc)
KORAPXMLTOOL_HEAP ?= $(shell echo "$$(($(MAX_THREADS) * 2500))")
KORAPXMLTOOL ?= java -Xmx$(KORAPXMLTOOL_HEAP)m -jar lib/korapxmltool.jar
MARMOTMALTOOL ?= java -Xmx96000m -jar lib/korapxmltool.jar
+SPACYXMLTOOL ?= java -Xmx250g -jar lib/korapxmltool.jar
SAXON ?= java -Djava.util.logging.config.file=/logging.properties -cp lib/saxon-ee-12.5.jar:lib/xmlresolver-5.2.2.jar:lib/textclassifier.jar:lib/xmlresolver-5.2.2-data.jar net.sf.saxon.Transform -expand:off -catalog:"lib/dtds/xhtml11/xhtmlcatalog.xml;lib/dtds/xhtml/dtd/xhtmlcatalog.xml"
.DELETE_ON_ERROR:
@@ -85,7 +86,7 @@
$(KORAPXMLTOOL) $< | pv | docker run --rm -i korap/conllu2treetagger -l german | conllu2korapxml > $@
%.spacy.zip: %.zip
- $(KORAPXMLTOOL) $< | pv | docker run --rm -i korap/conllu2spacy | conllu2korapxml > $@
+ $(SPACYXMLTOOL) -T 8 -A "docker run -e SPACY_USE_DEPENDENCIES=False --rm -i korap/conllu2spacy:latest" -f zip --overwrite $<
models/de.marmot:
mkdir -p models
diff --git a/lib/korapxmltool-2.10.jar b/lib/korapxmltool-2.10.jar
new file mode 100644
index 0000000..b361aea
--- /dev/null
+++ b/lib/korapxmltool-2.10.jar
Binary files differ
diff --git a/lib/korapxmltool.jar b/lib/korapxmltool.jar
deleted file mode 100644
index 0b8bad1..0000000
--- a/lib/korapxmltool.jar
+++ /dev/null
Binary files differ
diff --git a/lib/korapxmltool.jar b/lib/korapxmltool.jar
new file mode 120000
index 0000000..342a5e5
--- /dev/null
+++ b/lib/korapxmltool.jar
@@ -0,0 +1 @@
+korapxmltool-2.10.jar
\ No newline at end of file