conllu2korapxml2: escape &, <, >
Change-Id: Ieb1d5fc36b27783bc652b3345bb24e52fead3220
diff --git a/script/conllu2korapxml b/script/conllu2korapxml
index 3e97fa7..53cdeca 100755
--- a/script/conllu2korapxml
+++ b/script/conllu2korapxml
@@ -137,7 +137,13 @@
next MAIN if m!^\s*$!s;
}
};
- my @parsed=split('\t');
+ my @parsed = map {
+ my $s = $_;
+ $s =~ s/&/&/g;
+ $s =~ s/</</g;
+ $s =~ s/>/>/g;
+ $s;
+ } split('\t');
chomp $parsed[9];
if (@parsed != 10) {
$log->warn("WARNING: skipping strange parser output line in $docid");