Support scrambling of header files in scramble_korapxml tool
Change-Id: Ia87ef3f1164f16c2dd8bd2163a0dac25ca44aeec
diff --git a/t/tools/example_rules.json b/t/tools/example_rules.json
index a2117ea..dc3fc6d 100644
--- a/t/tools/example_rules.json
+++ b/t/tools/example_rules.json
@@ -7,5 +7,13 @@
["f[name=pos]", "~"]
]
],
- ["struct/structure.xml"]
+ ["struct/structure.xml"],
+ [
+ "header.xml",
+ [
+ ["forename"]
+ ]
+ ],
+ ["../header.xml"],
+ ["../../header.xml"]
]
diff --git a/tools/scramble_korapxml.pl b/tools/scramble_korapxml.pl
index e92095a..95e727e 100644
--- a/tools/scramble_korapxml.pl
+++ b/tools/scramble_korapxml.pl
@@ -134,7 +134,11 @@
my $dom = Mojo::DOM->new->xml(1)->parse(b($data)->decode);
foreach (@$rules) {
- transform($dom, $_->[0], $_->[1]);
+ if ($input =~ /header\.xml$/) {
+ transform_header($dom, $_->[0]);
+ } else {
+ transform($dom, $_->[0], $_->[1]);
+ };
};
$data = b($dom->to_string)->encode;
@@ -193,6 +197,23 @@
};
+# Transform header file
+sub transform_header {
+ my ($dom, $selector) = @_;
+
+ $dom->find($selector)->each(
+ sub {
+ my $word = $_->text;
+
+ # The random rule means the word is replaced by
+ # with a random word with the same characterisms.
+ $_->content(get_rnd_word($word));
+ }
+ )
+};
+
+
+
__END__
=pod
@@ -255,7 +276,8 @@
CSS selector rules followed by a transformation type marker
are used to transform elements of the file.
-All CSS selectors are nested in C<spanList > span>.
+All CSS selectors for annotation files
+are nested in C<spanList > span>.
The following markers are supported:
@@ -279,4 +301,7 @@
=back
+For header files, the rules are not nested and only the
+randomized marker C<~> is supported.
+
=back