Marc Kupietz | c4ab77a | 2023-05-26 10:51:07 +0200 | [diff] [blame^] | 1 | .DELETE_ON_ERROR: |
| 2 | |
| 3 | all: json |
| 4 | |
| 5 | icc-ger.p5.xml: /export/netapp/fi2/luengen/ICC/ICC-German.p5.xml |
| 6 | cp -p $< $@ |
| 7 | |
| 8 | icc-ger.zip: icc-ger.p5.xml |
| 9 | pv $< | tei2korapxml --xmlid-to-textsigle 'ICC.German\.([^.]+\.?[^.]+)\.(.+)@GER/$$1/$$2' -s -tk - > $@ |
| 10 | |
| 11 | icc-ger.ud.zip: icc-ger.zip |
| 12 | korapxml2conllu $< | pv | /usr/local/kl/bin/udpipe2 | conllu2korapxml > $@ |
| 13 | |
| 14 | icc-ger.tree_tagger.zip: icc-ger.zip |
| 15 | /usr/local/kl/korap/Ingestion/treetagger/korap2tt.pl -D . icc-ger.zip |
| 16 | |
| 17 | icc-ger.krill.tar: icc-ger.zip icc-ger.tree_tagger.zip icc-ger.ud.zip |
| 18 | korapxml2krill archive -w -cfg /vol/corpora/ICC/icc-ger.cfg -j 0 --meta ICC -i icc-ger.zip -i icc-ger.tree_tagger.zip -i icc-ger.ud.zip -o icc-ger.krill |
| 19 | |
| 20 | json: icc-ger.krill.tar |
| 21 | rm -rf json |
| 22 | mkdir -p json |
| 23 | tar -C json -xf icc-ger.krill.tar |
| 24 | rsync -avz --delete json korap@korap-worker-07:/opt/korap/icc/ger/ |
| 25 | ssh korap@korap-worker-07 "cd /opt/korap/icc/ger && rm -rf index && mkdir -p index && docker run -u root --rm -v /opt/korap/icc/ger:/data:z korap/kustvakt Krill-Indexer.jar -c /kustvakt/kustvakt-lite.conf -i /data/json -o /data/index/ && INDEX=./index docker-compose --profile=full -p icc-ger restart" |