Do not split metadata extraction regexes at commas
Better allow using commas in the metadata extraction regexes, instead
and use multiple -m <regex> arguments.
Change-Id: Ia890565cf405478c3383e3a1109190cea8f070da
diff --git a/script/korapxml2conllu b/script/korapxml2conllu
index 8fd898b..61d48eb 100755
--- a/script/korapxml2conllu
+++ b/script/korapxml2conllu
@@ -65,7 +65,6 @@
if (@extract_metadata_regex) {
$extract_metadata = 1;
- @extract_metadata_regex = split(/,/,join(',',@extract_metadata_regex));
}
# Establish logger
diff --git a/t/test.t b/t/test.t
index 99f6503..3a34be9 100644
--- a/t/test.t
+++ b/t/test.t
@@ -107,7 +107,7 @@
fail("cannot open file $w2v_fname");
next;
}
- script_runs([ 'script/korapxml2conllu', '-m', '<textSigle>([^<.]+)', '-m', '<creatDate>([^<]{7})', '--word2vec', $base_fname ], "Runs korapxml2conllu with base input and w2v and metadata output");
+ script_runs([ 'script/korapxml2conllu', '-m', '<textSigle>([^<.]+)', '-m', '<creatDate>([^<]{4,7})', '--word2vec', $base_fname ], "Runs korapxml2conllu with base input and w2v and metadata output");
script_stdout_is $expected, "Converts $base_fname correctly to word2vec input format together with some metadata";
}