Introduce XML tests
diff --git a/src/tokenizer.xfst b/src/tokenizer.xfst
index 6d21d8d..f7a089f 100644
--- a/src/tokenizer.xfst
+++ b/src/tokenizer.xfst
@@ -11,7 +11,6 @@
 ! The abbreviation list is part of the sentence splitter tool
 ! of the IDS.
 
-! define NLout "\u000a";
 define NLout "@_TOKEN_SYMBOL_@";
 ! define NLout "\u000a";
 
@@ -55,7 +54,7 @@
 
 define Sym ["-"|"+"|"<"|">"|"*"|"/"|%=|%@];
 define Apos %'|%’|%`;
-define Punct LP|RP|Sym;
+define Punct [LP|RP|Sym];
 !define nonSym \[WS|LP|RP|Sym];
 !!!!!!!!!!!!!!!!!!
 ! </from tmorph> !
@@ -136,8 +135,24 @@
 
 define Domain Char+ [Dash Char+]* Dot TldEnd;
 
-!define XML "<" Alpha URLChar* (">");
-define XML "<" URLChar+ (">");
+! XML rule
+define XMLns [AsciiLetter [AsciiLetter|Digit|%-]* (%: AsciiLetter [AsciiLetter|Digit|%-]*)] .o. Caseinsensitive;
+define XML [
+  "<" [
+      [
+        XMLns
+        [WS+ XMLns WS*
+          (%= WS*
+            [[%" [? - %" - %>]+ %"] | [%' [? - %' - %>]+ %']]
+          )
+        ]*
+      ]
+      |
+      [
+        "/" XMLns
+      ]
+    ] WS* ">"
+].u;
 
 !define Email [Alpha [URLChar-At]* At Alpha URLChar* [Dot [[Alpha URLChar+]-Dot-At]]+];
 define Email URLChar+ At URLChar+ [Dot URLChar+]+;
@@ -193,17 +208,24 @@
 ! |============= Core =============|
 
 
+echo - Compile Real Token
 
-define RealToken [XML|Email|URL|SNS|Abbr|Plusampersand|Streetname|Omission|Domain|AcronymDep|File|Emdash|Punct|Ord|Num|Years|Emoji|Word];
+define RealToken [Punct|Word|XML|Email|URL|SNS|Domain|AcronymDep|File|Ord|Num|Years];
 
 echo - Introduce Token splitter
-define Token [RealToken @-> ... NLout]
-! .o. [NL -> 0]
-.o. [WS+ @-> 0]
+
+define Token [
+  RealToken @-> ... NLout,
+  Abbr @-> ... NLout,
+  Plusampersand @-> ... NLout,
+  Emoji @-> ... NLout,
+  [Streetname|Omission|Emdash] @-> ... NLout
+  ]
+.o. [WS+ @-> 0 || NLout _ ]
 ;
 
 echo - Introduce Sentence splitter
-read regex Token .o. [[["."|"!"|"?"]+|"…"] @-> ... NLout \/ NLout _];
+read regex Token .o. [[["."|"!"|"?"]+|"…"] @-> ... NLout \/ NLout _ ];
 
 ! foma -e "source tokenizer.xfst" -q -s && cat text.txt | flookup tokenizer.fst -x -b