Adapt to ICC-ENG

First Version 2023-04-27

Change-Id: I7b05bdadcf23f3416c2488ad3a86283676d855d9
diff --git a/Makefile b/Makefile
index 1b96617..7b5877b 100644
--- a/Makefile
+++ b/Makefile
@@ -2,26 +2,26 @@
 
 all: json
 
-./output/corpus.p5.xml: nor2tei.py Makefile
+./output/corpus.p5.xml: eng2tei.py Makefile
 	mkdir -p output
-	python nor2tei.py `find /export/coin/ICC/NO/Originaldaten -name "*.xml"`
+	python eng2tei.py `find /export/coin/ICC/ICC-EN_written_XML_April2023 -name "*.xml"`
 
-icc-nor.p5.xml: ./output/corpus.p5.xml Makefile
-	perl -C255 -pe 's/([^>])\n/$1 /g; s/^\s*([^<])/$1/; s/\&gt /\&gt; /g;' $< | xmllint - > $@
+icc-eng.p5.xml: ./output/corpus.p5.xml Makefile
+	cat $< | xmllint - > $@
 
-icc-nor.zip: icc-nor.p5.xml
+icc-eng.zip: icc-eng.p5.xml
 	pv $< | tei2korapxml -s -tk - > $@
 
-icc-nor.ud.zip: icc-nor.zip
-	korapxml2conllu $< | pv | /usr/local/kl/bin/udpipe2 -r -m norwegian-bokmaal-ud-2.10-220711 | conllu2korapxml > $@
+icc-eng.ud.zip: icc-eng.zip
+	korapxml2conllu $< | pv | /usr/local/kl/bin/udpipe2 -r -m english-partut-ud-2.10-220711 | conllu2korapxml > $@
 
 
-icc-nor.krill.tar: icc-nor.zip icc-nor.ud.zip
-	korapxml2krill archive -w -cfg /vol/corpora/ICC/icc-nor.cfg -i icc-nor.zip -i icc-nor.ud.zip -o icc-nor.krill
+icc-eng.krill.tar: icc-eng.zip icc-eng.ud.zip
+	korapxml2krill archive -w -cfg /vol/corpora/ICC/icc-eng.cfg -i icc-eng.zip -i icc-eng.ud.zip -o icc-eng.krill
 
-json: icc-nor.krill.tar
+json: icc-eng.krill.tar
 	rm -rf json
 	mkdir -p json
-	tar -C json -xf icc-nor.krill.tar
-	rsync -avz --delete json korap@korap-worker-07:/opt/korap/icc/nor/KorAP-Docker/
-	ssh korap@korap-worker-07 cd /opt/korap/icc/nor/KorAP-Docker && rm -rf index && mkdir -p index && docker run  -u root --rm -v /opt/korap/icc/nor/KorAP-Docker:/data:z korap/kustvakt Krill-Indexer.jar -c /kustvakt/kustvakt-lite.conf -i /data/json -o /data/index/
+	tar -C json -xf icc-eng.krill.tar
+	rsync -avz --delete json korap@korap-worker-07:/opt/korap/icc/eng/KorAP-Docker/
+	ssh korap@korap-worker-07 cd /opt/korap/icc/eng/KorAP-Docker && rm -rf index && mkdir -p index && docker run  -u root --rm -v /opt/korap/icc/eng/KorAP-Docker:/data:z korap/kustvakt Krill-Indexer.jar -c /kustvakt/kustvakt-lite.conf -i /data/json -o /data/index/