blob: 29bb1af32796f336c779f84c466c49d7874b3abd [file] [log] [blame]
Marc Kupietz4fe7c872023-04-19 13:33:42 +02001.DELETE_ON_ERROR:
2
3all: json
4
Marc Kupietz723058d2023-05-24 14:09:58 +02005./output/corpus.p5.xml: nor2tei.py
Marc Kupietz4fe7c872023-04-19 13:33:42 +02006 mkdir -p output
7 python nor2tei.py `find /export/coin/ICC/NO/Originaldaten -name "*.xml"`
8
Marc Kupietz723058d2023-05-24 14:09:58 +02009icc-nor.p5.xml: ./output/corpus.p5.xml
Marc Kupietz4fe7c872023-04-19 13:33:42 +020010 perl -C255 -pe 's/([^>])\n/$1 /g; s/^\s*([^<])/$1/; s/\&gt /\&gt; /g;' $< | xmllint - > $@
11
12icc-nor.zip: icc-nor.p5.xml
13 pv $< | tei2korapxml -s -tk - > $@
14
15icc-nor.ud.zip: icc-nor.zip
16 korapxml2conllu $< | pv | /usr/local/kl/bin/udpipe2 -r -m norwegian-bokmaal-ud-2.10-220711 | conllu2korapxml > $@
17
18
19icc-nor.krill.tar: icc-nor.zip icc-nor.ud.zip
Marc Kupietzc112d062023-05-24 14:10:30 +020020 korapxml2krill archive -w -cfg /vol/corpora/ICC/icc-nor.cfg -j 0 --meta ICC -i icc-nor.zip -i icc-nor.ud.zip -o icc-nor.krill
Marc Kupietz4fe7c872023-04-19 13:33:42 +020021
22json: icc-nor.krill.tar
23 rm -rf json
24 mkdir -p json
25 tar -C json -xf icc-nor.krill.tar
Marc Kupietze80ccc62023-05-24 14:14:48 +020026 rsync -avz --delete json korap@korap-worker-07:/opt/korap/icc/nor/
27 ssh korap@korap-worker-07 "cd /opt/korap/icc/nor && rm -rf index && mkdir -p index && docker run -u root --rm -v /opt/korap/icc/nor:/data:z korap/kustvakt Krill-Indexer.jar -c /kustvakt/kustvakt-lite.conf -i /data/json -o /data/index/ && INDEX=./index docker-compose --profile=full -p icc-nor restart"