Add arrows
Change-Id: I250a30cafacb5ddfd7c87d5c11ab7c0ead923dd0
diff --git a/matrix_test.go b/matrix_test.go
index 6681b2e..adce916 100644
--- a/matrix_test.go
+++ b/matrix_test.go
@@ -1116,6 +1116,14 @@
assert.Equal(tokens[7], "-_-;;;")
assert.Equal(tokens[8], "-_-^")
assert.Equal(len(tokens), 9)
+
+ tokens = ttokenize(mat, w, "das -> Lustig<-!")
+ assert.Equal("das", tokens[0])
+ assert.Equal("->", tokens[1])
+ assert.Equal("Lustig", tokens[2])
+ assert.Equal("<-", tokens[3])
+ assert.Equal("!", tokens[4])
+ assert.Equal(5, len(tokens))
}
func TestMatrixFullTokenizerXML(t *testing.T) {
diff --git a/src/all/allpost.xfst b/src/all/allpost.xfst
index 6ea6dcf..32e7a90 100644
--- a/src/all/allpost.xfst
+++ b/src/all/allpost.xfst
@@ -16,6 +16,9 @@
! XML entities
source all/entities.xfst
+! Arrows
+define Arrows [Alldash ">" | "<" Alldash];
+
! Technical protocols
source all/protocols.xfst
diff --git a/src/all/allpref.xfst b/src/all/allpref.xfst
index bb5183f..7175d0b 100644
--- a/src/all/allpref.xfst
+++ b/src/all/allpref.xfst
@@ -58,6 +58,7 @@
define Emdash [%- %- (%-)+ | ["\u2014"|"\u2015"|"\u2e3a"|"\u2e3b"|"\ufe58"]+];
define Dash ["-"|"\u2011"|"\u2012"|"\u2013"|"\u2e1a"|"\ufe63"|"\uff0d"];
+define Alldash ["-"|"\u2011"|"\u2012"|"\u2013"|"\u2e1a"|"\ufe63"|"\uff0d"|"\u2014"|"\u2015"|"\u2e3a"|"\u2e3b"|"\ufe58"];
define Slash ["⁄"|"∕"|"/"|"/"];
define Asterisk ["*"];
diff --git a/src/de/tokenizer.xfst b/src/de/tokenizer.xfst
index 407c482..9670f9a 100644
--- a/src/de/tokenizer.xfst
+++ b/src/de/tokenizer.xfst
@@ -73,7 +73,7 @@
Email @-> ... NLout,
File @-> ... NLout,
Domain @-> ... NLout,
- Emoticons @-> ... NLout
+ [Emoticons|Arrows] @-> ... NLout
];
source all/allsentencesplit.xfst