2korapxml: Trim docid
to avoid spurious trailing spaces
Change-Id: I2b7d80a6add6c6b8867f785813c16dbd0f5c43cd
diff --git a/script/conllu2korapxml b/script/conllu2korapxml
index eaa06aa..e285abf 100755
--- a/script/conllu2korapxml
+++ b/script/conllu2korapxml
@@ -115,6 +115,7 @@
}
} elsif(/^(?:#|0\.2)\s+text_id\s*[:=]\s*(.*)/) {
$docid=$1;
+ $docid =~ s/\s+$//;
my $docSigle = $docid;
$docSigle =~ s/\..*//;
if($docSigle ne $lastDocSigle) {
diff --git a/t/data/goe.ud.conllu b/t/data/goe.ud.conllu
index bfb4456..8170a6e 100644
--- a/t/data/goe.ud.conllu
+++ b/t/data/goe.ud.conllu
@@ -4,7 +4,7 @@
# udpipe_model_licence = CC BY-NC-SA
# foundry = base
# filename = GOE/AGA/00000/base/tokens.xml
-# text_id = GOE_AGA.00000
+# text_id = GOE_AGA.00000
# start_offsets = 0 0 9 12
# end_offsets = 22 8 11 22
1 Campagne Campagne NOUN NN Gender=Fem|Number=Sing|Person=3 0 root _ _
diff --git a/t/data/wdf19.morpho.conllu b/t/data/wdf19.morpho.conllu
index e19d483..2f4354e 100644
--- a/t/data/wdf19.morpho.conllu
+++ b/t/data/wdf19.morpho.conllu
@@ -1,6 +1,6 @@
# foundry = tree_tagger
# filename = WDF19/A0000/13072/tree_tagger/morpho.xml
-# text_id = WDF19_A0000.13072
+# text_id = WDF19_A0000.13072
# start_offsets = 0 0 14 17 25 30 35 42 44 52 60 73 76 79 82 85 92 96 99 103 111 122 123 135 140 143 147 150 163 168 170 178 179 189 192 198 203 205 207 208 211 212 216 218 222 227 229 234 236 242 244 253 256 264 269 271 277 278 282
# end_offsets = 283 12 16 24 29 34 41 43 51 59 72 74 78 81 84 91 95 98 102 110 122 123 134 139 142 146 149 162 167 170 178 179 188 191 197 202 204 207 208 210 212 216 217 221 226 228 233 236 242 243 252 255 263 268 270 276 278 282 283
1 Australasien -- NAM NAM _ _ _ _ 0.966912
diff --git a/t/test.t b/t/test.t
index bff3ccf..d095392 100644
--- a/t/test.t
+++ b/t/test.t
@@ -25,6 +25,7 @@
if (open(my $fh, '<', $conllu_fname)) {
local $/;
$expected = <$fh>;
+ $expected =~ s/^(# text_id\s*=\s*\S+)\s*$/$1/mg;
close($fh);
} else {
fail("cannot open file $conllu_fname");