Add XML entities

Change-Id: Ib22e51ec8427e0af23a9dcf83c01e6e16837c91e
diff --git a/matrix_test.go b/matrix_test.go
index fe96c62..a762f1d 100644
--- a/matrix_test.go
+++ b/matrix_test.go
@@ -753,6 +753,22 @@
 	assert.Equal(tokens[4], ".")
 	assert.Equal(5, len(tokens))
 
+	// XML entities
+	tokens = ttokenize(mat, w, "Das ist 1:30 Stunden&20 Minuten zu spät >.")
+	assert.Equal(tokens[0], "Das")
+	assert.Equal(tokens[1], "ist")
+	assert.Equal(tokens[2], " ")
+	assert.Equal(tokens[3], "1:30")
+	assert.Equal(tokens[4], "Stunden")
+	assert.Equal(tokens[5], "&")
+	assert.Equal(tokens[6], "20")
+	assert.Equal(tokens[7], "Minuten")
+	assert.Equal(tokens[8], "zu")
+	assert.Equal(tokens[9], "spät")
+	assert.Equal(tokens[10], ">")
+	assert.Equal(tokens[11], ".")
+	assert.Equal(12, len(tokens))
+
 	/*
 		@Test
 		public void englishTokenizerSeparatesEnglishContractionsAndClitics () {
diff --git a/src/entities.xfst b/src/entities.xfst
new file mode 100644
index 0000000..92430ef
--- /dev/null
+++ b/src/entities.xfst
@@ -0,0 +1,5 @@
+define XMLentitiesDec ["#" Digit Digit+ ];
+define XMLentitiesHex ["#" ["x"|"X"] HexLetter HexLetter+ ];
+define XMLentitiesStr [[[AsciiLetter .o. Caseinsensitive].l | Digit | "_" | "-" ]+ ];
+
+read regex ["&" [XMLentitiesDec|XMLentitiesHex|XMLentitiesStr] ";"];
\ No newline at end of file
diff --git a/src/tokenizer.xfst b/src/tokenizer.xfst
index 742f147..dca20c4 100644
--- a/src/tokenizer.xfst
+++ b/src/tokenizer.xfst
@@ -54,7 +54,7 @@
               ! from book
               [%‘ %‘]|[%’ %’]];
 
-define Sym ["-"|"+"|"<"|">"|"*"|"/"|%=|%@];
+define Sym ["-"|"+"|"<"|">"|"*"|"/"|%=|%@|%&];
 define Apos %'|%’|%`;
 define Punct [LP|RP|Sym];
 !define nonSym \[WS|LP|RP|Sym];
@@ -163,7 +163,12 @@
     ] WS* ">"
 ].u;
 
-!define Email [Alpha [URLChar-At]* At Alpha URLChar* [Dot [[Alpha URLChar+]-Dot-At]]+];
+! XML entities
+source entities.xfst
+define XMLEntities;
+
+
+! Email addresses
 define Email URLChar+ At URLChar+ [Dot URLChar+]+;
 
 ! Twitter user, hashtag, Google+
@@ -225,6 +230,7 @@
 echo - Introduce Token splitter
 
 define Token [
+  XMLEntities @-> ... NLout,
   Abbr @-> ... NLout,
   RealToken @-> ... NLout,
   Plusampersand @-> ... NLout,