blob: 99fc907569e30ae6b9d39a7ebcd26eeb06a5222e [file] [log] [blame]
Marc Kupietzc4ab77a2023-05-26 10:51:07 +02001.DELETE_ON_ERROR:
2
3all: json
4
Marc Kupietz62869dd2023-05-26 11:53:27 +02005icc-ger.disposable.xml: /export/netapp/fi2/luengen/ICC/ICC-German.p5.xml
6 pv $< | ./map-ger-icc-genres.pl > $@
Marc Kupietzc4ab77a2023-05-26 10:51:07 +02007
Marc Kupietz62869dd2023-05-26 11:53:27 +02008icc-ger.zip: icc-ger.disposable.xml
Marc Kupietzc4ab77a2023-05-26 10:51:07 +02009 pv $< | tei2korapxml --xmlid-to-textsigle 'ICC.German\.([^.]+\.?[^.]+)\.(.+)@GER/$$1/$$2' -s -tk - > $@
10
11icc-ger.ud.zip: icc-ger.zip
12 korapxml2conllu $< | pv | /usr/local/kl/bin/udpipe2 | conllu2korapxml > $@
13
14icc-ger.tree_tagger.zip: icc-ger.zip
15 /usr/local/kl/korap/Ingestion/treetagger/korap2tt.pl -D . icc-ger.zip
16
17icc-ger.krill.tar: icc-ger.zip icc-ger.tree_tagger.zip icc-ger.ud.zip
18 korapxml2krill archive -w -cfg /vol/corpora/ICC/icc-ger.cfg -j 0 --meta ICC -i icc-ger.zip -i icc-ger.tree_tagger.zip -i icc-ger.ud.zip -o icc-ger.krill
19
20json: icc-ger.krill.tar
21 rm -rf json
22 mkdir -p json
23 tar -C json -xf icc-ger.krill.tar
24 rsync -avz --delete json korap@korap-worker-07:/opt/korap/icc/ger/
25 ssh korap@korap-worker-07 "cd /opt/korap/icc/ger && rm -rf index && mkdir -p index && docker run -u root --rm -v /opt/korap/icc/ger:/data:z korap/kustvakt Krill-Indexer.jar -c /kustvakt/kustvakt-lite.conf -i /data/json -o /data/index/ && INDEX=./index docker-compose --profile=full -p icc-ger restart"