blob: 0b24f0964491477555b33602c534825d50df7541 [file] [log] [blame]
.DELETE_ON_ERROR:
all: json
./output/corpus.p5.xml: eng2tei.py
mkdir -p output
python eng2tei.py `find /export/coin/ICC/ICC-EN_written_XML_April2023 -name "*.xml"`
icc-eng.p5.xml: ./output/corpus.p5.xml
cat $< | xmllint - > $@
icc-eng.zip: icc-eng.p5.xml
pv $< | tei2korapxml -s -tk - > $@
icc-eng.ud.zip: icc-eng.zip
korapxml2conllu $< | pv | /usr/local/kl/bin/udpipe2 -r -m english-partut-ud-2.10-220711 | conllu2korapxml > $@
icc-eng.krill.tar: icc-eng.zip icc-eng.ud.zip
korapxml2krill archive -w -cfg /vol/corpora/ICC/icc-eng.cfg -j 0 --meta ICC -i icc-eng.zip -i icc-eng.ud.zip -o icc-eng.krill
json: icc-eng.krill.tar
rm -rf json
mkdir -p json
tar -C json -xf icc-eng.krill.tar
rsync -avz --delete json korap@korap-worker-07:/opt/korap/icc/eng/
ssh korap@korap-worker-07 "cd /opt/korap/icc/eng && rm -rf index && mkdir -p index && docker run -u root --rm -v /opt/korap/icc/eng:/data:z korap/kustvakt Krill-Indexer.jar -c /kustvakt/kustvakt-lite.conf -i /data/json -o /data/index/ && INDEX=./index docker-compose --profile=full -p icc-eng restart"