Bugfix: intern tokenization
retr_info(): $_GEN_TOK_INT: text starting with 2 blanks is ignored
Change-Id: I06e8a157bc566fca4b44737230f9b3dc236d0a98
diff --git a/script/tei2korapxml b/script/tei2korapxml
index dde0146..c94e3cb 100755
--- a/script/tei2korapxml
+++ b/script/tei2korapxml
@@ -1073,15 +1073,11 @@
$txt = $e->[1];
- if ( substr( $txt, 0, 1 ) ne ' ' || substr( $txt, 1, 1) ne ' ' ){ # $txt has at least 2 chars, if it's not empty or equal to ' '
+ # TODO: implement outside retr_info() (like $ext_tok) on whole $data, instead on every text-node (more efficient and $offset not needed anymore)
+ $cons_tok->tokenize($txt, $offset);
+ $aggr_tok->tokenize($txt, $offset);
- # TODO: implement outside retr_info() (like $ext_tok) on whole $data, instead on every text-node (more efficient and $offset not needed anymore)
- $cons_tok->tokenize($txt, $offset);
- $aggr_tok->tokenize($txt, $offset);
-
- $offset = $dl;
-
- }
+ $offset = $dl;
#~~~~~