Rename token_symbol to token_bound Change-Id: I3e4f35c1fc65b542e948659d94437e818fc1f136

commit: b15acb9406974883d9c9f931cf40c5e008406871 [log] [tgz]
author: Akron <nils@diewald-online.de> Sat Apr 16 11:01:46 2022 +0200
committer: Akron <nils@diewald-online.de> Sat Apr 16 11:01:46 2022 +0200
tree: 8d10b18b284013a63e542594bc7ecfc4bbcd2145
parent: d47c67e5ba3fc8c3f815fca196ddf37020920797 [diff]
diff --git a/Changes b/Changes
index 5fca5a1..b9359ff 100644
--- a/Changes
+++ b/Changes

@@ -1,3 +1,6 @@
+0.1.6 2022-04-14
+    - Rename TOKEN_SYMBOL to TOKEN_BOUND.
+
 0.1.5 2022-03-28
     - Improve Emoticon-List.
 

diff --git a/Readme.md b/Readme.md
index f02e452..353bc36 100644
--- a/Readme.md
+++ b/Readme.md

@@ -59,14 +59,14 @@
 - Character accepting arcs need to be translated
   *only* to themselves or to ε (the empty symbol).
 - Multi-character symbols are not allowed,
-  except for the `@_TOKEN_SYMBOL_@`,
+  except for the `@_TOKEN_BOUND_@`,
   that denotes the end of a token.
 - ε accepting arcs (transitions not consuming
   any character) need to be translated to
-  the `@_TOKEN_SYMBOL_@`.
-- Two consecutive `@_TOKEN_SYMBOL_@`s mark a sentence end.
+  the `@_TOKEN_BOUND_@`.
+- Two consecutive `@_TOKEN_BOUND_@`s mark a sentence end.
 - Flag diacritics are not supported.
-- Final states are ignored. The `@_TOKEN_SYMBOL_@` marks
+- Final states are ignored. The `@_TOKEN_BOUND_@` marks
   the end of a token instead.
 
 A minimal usable tokenizer written in XFST and following
@@ -74,7 +74,7 @@
 and Beesley (2004) would look like this:
 
 ```xfst
-define TE "@_TOKEN_SYMBOL_@";
+define TB "@_TOKEN_BOUND_@";
 
 define WS [" "|"\u000a"|"\u0009"];
 
@@ -84,17 +84,17 @@
 
 define Word Char+;
 
-! Compose token ends
-define Tokenizer [[Word|PUNCT] @-> ... TE] .o.
+! Compose token bounds
+define Tokenizer [[Word|PUNCT] @-> ... TB] .o.
 ! Compose Whitespace ignorance
        [WS+ @-> 0] .o.
 ! Compose sentence ends
-       [[PUNCT+] @-> ... TE \/ TE _ ];
+       [[PUNCT+] @-> ... TB \/ TB _ ];
 
 read regex Tokenizer;
 ```
 
-> *Hint*: For development it's easier to replace `@_TOKEN_SYMBOL_@`
+> *Hint*: For development it's easier to replace `@_TOKEN_BOUND_@`
 with a newline.
 
 ## Building

diff --git a/fomafile.go b/fomafile.go
index 8f8f046..9d7106c 100644
--- a/fomafile.go
+++ b/fomafile.go

@@ -409,10 +409,16 @@
 							auto.identity = number
 						}
 
+						// Deprecated
 					case "@_TOKEN_SYMBOL_@":
 						{
 							auto.tokenend = number
 						}
+
+					case "@_TOKEN_BOUND_@":
+						{
+							auto.tokenend = number
+						}
 					default:
 						{
 							// MCS not supported

diff --git a/src/all/allpref.xfst b/src/all/allpref.xfst
index 7175d0b..6d8f3bb 100644
--- a/src/all/allpref.xfst
+++ b/src/all/allpref.xfst

@@ -8,7 +8,7 @@
 !  - https://github.com/coltekin/TRmorph/tokenize.xfst by Çağrı Çöltekin (2011-2015)
 !    under the MIT License
 
-define NLout "@_TOKEN_SYMBOL_@";
+define NLout "@_TOKEN_BOUND_@";
 ! define NLout "\u000a";
 
 define Digit [%0|1|2|3|4|5|6|7|8|9];
commit	b15acb9406974883d9c9f931cf40c5e008406871	[log] [tgz]
author	Akron <nils@diewald-online.de>	Sat Apr 16 11:01:46 2022 +0200
committer	Akron <nils@diewald-online.de>	Sat Apr 16 11:01:46 2022 +0200
tree	8d10b18b284013a63e542594bc7ecfc4bbcd2145
parent	d47c67e5ba3fc8c3f815fca196ddf37020920797 [diff]