Trim filenames to fix double space after filename metadata
Change-Id: I578914ad892373e2ad09232d4f5dceb3bb75c740
diff --git a/script/korapxml2conllu b/script/korapxml2conllu
index 7d9b452..b06136c 100755
--- a/script/korapxml2conllu
+++ b/script/korapxml2conllu
@@ -156,12 +156,16 @@
while (<MORPHO_OR_TOKENPIPE>) {
if (/^ inflating: (.*)/) {
$filename=$1;
+ $filename =~ s/^\s+|\s+$//g;
while($processedFilenames{$filename} && !eof(MORPHO_OR_TOKENPIPE)) {
$log->warn("$filename already processed");
while (<MORPHO_OR_TOKENPIPE>) {
last if(/\s+inflating:\s+(.*)/);
}
- $filename=$1 if(!eof(MORPHO_OR_TOKENPIPE) && /\s+inflating:\s+(.*)/);
+ if(!eof(MORPHO_OR_TOKENPIPE) && /\s+inflating:\s+(.*)/) {
+ $filename=$1;
+ $filename =~ s/^\s+|\s+$//g;
+ }
}
} elsif(m@(?:^|\s)docid="([^"]+)"@) {
last if($test && $text_no++ > 3);