Restructure XFST sources

Change-Id: I92c899a124caf724cc782cd168a96252e81ce832
diff --git a/src/emoticons.xfst b/src/emoticons.xfst
index 74bffff..9664848 100644
--- a/src/emoticons.xfst
+++ b/src/emoticons.xfst
@@ -15,7 +15,7 @@
 [ ["o"|"O"] ["_"|"."|"-"]+ ["o"|"O"] ] 
 ];
 
-read regex [
+define Emoticons [
 ["<" ("/") "3"+] |
 verticalemoticon (";"+|"^") |
 ["(" verticalemoticon ")"] |
diff --git a/src/entities.xfst b/src/entities.xfst
index f185e31..a5b3149 100644
--- a/src/entities.xfst
+++ b/src/entities.xfst
@@ -2,4 +2,4 @@
 define XMLentitiesHex ["#" ["x"|"X"] HexLetter HexLetter+ ];
 define XMLentitiesStr [[[AsciiLetter .o. Caseinsensitive].l | Digit | "_" | "-" ] ];
 
-read regex ["&" [XMLentitiesDec|XMLentitiesHex|XMLentitiesStr XMLentitiesStr+] ";"];
\ No newline at end of file
+define XMLEntities ["&" [XMLentitiesDec|XMLentitiesHex|XMLentitiesStr XMLentitiesStr+] ";"];
\ No newline at end of file
diff --git a/src/protocols.xfst b/src/protocols.xfst
new file mode 100644
index 0000000..34b7b41
--- /dev/null
+++ b/src/protocols.xfst
@@ -0,0 +1,39 @@
+define URLChar [Char|[Sym - ["<"|">"|%"]]];
+
+define Dot "."|[["["|"("] "d" "o" "t" [")"|"]"]] .o. Caseinsensitive;
+define At "@"|[["["|"("] "a" "t" [")"|"]"]] .o. Caseinsensitive;
+
+define TldEnd [{org}|{de}|{com}] .o. Caseinsensitive;
+
+! Very relaxed URL scheme, not based on the strict Lucene implementation
+define URL [ [ [{http} (s) | {ftp} | {file}] ":" "/" "/"] | [{www} Dot] ]
+URLChar [URLChar|SP]* URLChar
+.o. Caseinsensitive;
+
+define Domain Char+ [Dash Char+]* Dot TldEnd;
+
+! Email addresses
+define Email URLChar+ At URLChar+ [Dot URLChar+]+;
+
+! Twitter user, hashtag, Google+
+define SNS ["@"|"#"|"+"] Char+;
+
+define FileEnd [
+                [{htm} ("l")]|
+                [{doc} ("x")]|
+                {pdf}|
+                ["j" "p" ("e") "g"]|
+                ["m" "p" ["3"|"4"]]|
+                {ogg}|
+                {png}|
+                [{ppt} ("x")]|
+                {avi}|
+                {txt}|
+                {xls}|
+                {xml}|
+                {aac}|
+                {gif}|
+                {exe}
+                ] .o. Caseinsensitive;
+
+define File (( AsciiLetter ":" %\ | "/" ) [ Char | "_" | "-" | Char [ %\ | "/" ] ]*) [Char | "-" | "_" ]+ "." FileEnd;
\ No newline at end of file
diff --git a/src/tokenizer.xfst b/src/tokenizer.xfst
index b75ece5..f20d7e2 100644
--- a/src/tokenizer.xfst
+++ b/src/tokenizer.xfst
@@ -69,8 +69,6 @@
 define Plusampersand @txt"txt/plusampersand.txt";
 define Word [Plusampersand | Word] (Dash [Plusampersand | Word])*;
 
-
-define URLChar [Char|[Sym - ["<"|">"|%"]]];
 !define Alpha ["a"|"b"|"c"|"d"|"e"|"f"|"g"|"h"|"i"|"j"|"k"|"l"|"m"|"n"|"o"|"p"|"q"|"r"|"s"|"t"|"u"|"v"|"w"|"x"|"y"|"z"|"_"];
 
 define Caseinsensitive [
@@ -119,59 +117,21 @@
 ! 20:00 Uhr, 00:12:25,34 Minuten
 define Times [ ( [%0|1|2|3|4|5] ) Digit [ ":" [%0|1|2|3|4|5] Digit ]^{1,2} ( "," [ Digit ]^{1,3} ) ];
 
+! Emoticons
 source emoticons.xfst
-define Emoticons;
 
 ! acronyms: U.S.A., I.B.M., etc.
 ! use a post-filter to remove dots
 define AcronymDep Letter %. Letter %. [Letter %.]+;
 
-define Dot "."|[["["|"("] "d" "o" "t" [")"|"]"]] .o. Caseinsensitive;
-define At "@"|[["["|"("] "a" "t" [")"|"]"]] .o. Caseinsensitive;
-
-define TldEnd [{org}|{de}|{com}] .o. Caseinsensitive;
-
-! Very relaxed URL scheme, not based on the strict Lucene implementation
-define URL [ [ [{http} (s) | {ftp} | {file}] ":" "/" "/"] | [{www} Dot] ]
-URLChar [URLChar|SP]* URLChar
-.o. Caseinsensitive;
-
-define Domain Char+ [Dash Char+]* Dot TldEnd;
-
 ! XML sources
 source xml.xfst
-define XML;
 
 ! XML entities
 source entities.xfst
-define XMLEntities;
 
-
-! Email addresses
-define Email URLChar+ At URLChar+ [Dot URLChar+]+;
-
-! Twitter user, hashtag, Google+
-define SNS ["@"|"#"|"+"] Char+;
-
-define FileEnd [
-                [{htm} ("l")]|
-                [{doc} ("x")]|
-                {pdf}|
-                ["j" "p" ("e") "g"]|
-                ["m" "p" ["3"|"4"]]|
-                {ogg}|
-                {png}|
-                [{ppt} ("x")]|
-                {avi}|
-                {txt}|
-                {xls}|
-                {xml}|
-                {aac}|
-                {gif}|
-                {exe}
-                ] .o. Caseinsensitive;
-
-define File (( AsciiLetter ":" %\ | "/" ) [ Char | "_" | "-" | Char [ %\ | "/" ] ]*) [Char | "-" | "_" ]+ "." FileEnd;
+! Technical protocols
+source protocols.xfst
 
 define Streetname Word {str} %.;
 
diff --git a/src/xml.xfst b/src/xml.xfst
index 1c7421f..06e247d 100644
--- a/src/xml.xfst
+++ b/src/xml.xfst
@@ -1,6 +1,6 @@
 ! XML rule
 define XMLns [AsciiLetter [AsciiLetter|Digit|%-]* (%: AsciiLetter [AsciiLetter|Digit|%-]*)] .o. Caseinsensitive;
-read regex [
+define XML [
   "<" [
       [
         XMLns