Improve offset error identification
Change-Id: Id5c9dadb043a3adff9ec2c70f3fd957c38fa9ee8
diff --git a/script/korapxml2conllu b/script/korapxml2conllu
index b771474..9f19987 100755
--- a/script/korapxml2conllu
+++ b/script/korapxml2conllu
@@ -215,7 +215,7 @@
}
# $log->debug("found span: $current_id $current_from $current_to");
$token = substr($plain_texts{$docid}, $current_from, $current_to - $current_from);
- if (!defined $token) {
+ if (!defined $token || length($token) == 0) {
$log->warn("$docid: could not retrieve token at $current_from-$current_to/", length($plain_texts{$docid}), " - ending with: ", substr($plain_texts{$docid},length($plain_texts{$docid})-10));
$token = "_";
}