Hack iccGenre mapping from ger
using a disposable xml file
Change-Id: Id43a0f7ef1c555b1f73163fc186520d159a97462
diff --git a/Makefile b/Makefile
index a0c9857..99fc907 100644
--- a/Makefile
+++ b/Makefile
@@ -2,10 +2,10 @@
all: json
-icc-ger.p5.xml: /export/netapp/fi2/luengen/ICC/ICC-German.p5.xml
- cp -p $< $@
+icc-ger.disposable.xml: /export/netapp/fi2/luengen/ICC/ICC-German.p5.xml
+ pv $< | ./map-ger-icc-genres.pl > $@
-icc-ger.zip: icc-ger.p5.xml
+icc-ger.zip: icc-ger.disposable.xml
pv $< | tei2korapxml --xmlid-to-textsigle 'ICC.German\.([^.]+\.?[^.]+)\.(.+)@GER/$$1/$$2' -s -tk - > $@
icc-ger.ud.zip: icc-ger.zip
diff --git a/map-ger-icc-genres.pl b/map-ger-icc-genres.pl
new file mode 100755
index 0000000..216cdd9
--- /dev/null
+++ b/map-ger-icc-genres.pl
@@ -0,0 +1,19 @@
+#!/usr/bin/env perl
+
+while (<STDIN>) {
+ s/Administrative\/regulatory prose<\/classCode>/Instructional:AdministrativeRegulatoryProse<\/classCode>/;
+ s/Blogs<\/classCode>/Blog<\/classCode>/;
+ s/Creative<\/classCode>/Creative:Novels_ShortStories<\/classCode>/;
+ s/Learned_Humanities<\/classCode>/Informational:Learned:Humanities<\/classCode>/;
+ s/Learned_Natural_Sciences<\/classCode>/Informational:Learned:NaturalSciences<\/classCode>/;
+ s/Learned_Social_Sciences<\/classCode>/Informational:Learned:SocialSciences<\/classCode>/;
+ s/Learned_Technology<\/classCode>/Informational:Learned:Technology<\/classCode>/;
+ s/Popular_Humanities<\/classCode>/Informational:Popular:Humanities<\/classCode>/;
+ s/Popular_Natural_Sciences<\/classCode>/Informational:Popular:NaturalSciences<\/classCode>/;
+ s/Popular_Social_Sciences<\/classCode>/Informational:Popular:SocialSciences<\/classCode>/;
+ s/Popular_Technology<\/classCode>/Informational:Popular:Technology<\/classCode>/;
+ s/Press_comments<\/classCode>/Persuasive<\/classCode>/;
+ s/Press_news_reports<\/classCode>/Informational:Reportage<\/classCode>/;
+ s/SkillsHobbies<\/classCode>/Instructional:Skills_Hobbies<\/classCode>/;
+ print;
+}