Improve Emoticons

Change-Id: I0d72781b41381aa2c86e41287b8f824af4af95d1
diff --git a/src/emoji.xfst b/src/emoji.xfst
deleted file mode 100644
index d36ed38..0000000
--- a/src/emoji.xfst
+++ /dev/null
@@ -1,28 +0,0 @@
-read regex [
-["<" ("/") "3"+]|
-["ಠ" "_" "ಠ"]|
-["(" "T" ["_"|"."] "T" ")"]|
-["(" "♥" ["_"|"."] "♥" ")"]|
-["(" "-" ["_"|"."] "-" ")"]|
-
-! May be end of brackets as well, like
-!   Author (2018):
-[[")"|"("] ("'"|"-"|"o") [":"|"="|"x"]]|
-! May be end of xml, like
-!   <b class="emp">=</b>
-[["<"*|">"*] ["B"|"X"|"8"|":"|";"|"="|"x"] ("'"|"-"|"o") ["/"|"<"|"C"|"["|")"|"("|"D"|"P"|"d"|"p"|"3"|">"|"o"|"O"|"*"]]|
-[["D"|">"] ("'") ":"]|
-
-! May be end of square bracket
-!   Author [2018]:
-["]" ":"]|
-["x" "("]|
-["^" (".") "^"]|
-["o" (".") "O"]|
-[%\ ["{" "o" "}"|"o"|"m"] "/"]|
-["*" "_" "*"]|
-["." "_" "."]|
-[":" ["a"|"b"|"c"|"d"|"e"|"f"|"g"|"h"|"i"|"j"|"k"|"l"|"m"|"n"|"o"|"p"|"q"|"r"|"s"|"t"|"u"|"v"|"w"|"x"|"y"|"z"|"_"|"-"]+ ":"]|
-[">" "_" "<"]|
-["*" "<" ":" "-" ")"]
-];
diff --git a/src/emoticons.xfst b/src/emoticons.xfst
new file mode 100644
index 0000000..74bffff
--- /dev/null
+++ b/src/emoticons.xfst
@@ -0,0 +1,42 @@
+! Partially by Park, Barash, Fink & Cha (2013)
+
+define verticalemoticon [
+[ "ಠ" "_" "ಠ"]|
+[ "T" ["_"|"."|"-"]+ "T"] |
+[ "♥" ["_"|"."|"-"]+ "♥" ] |
+[ "@" ["_"|"."|"-"]* "@" ] |
+[ "*" ["_"|"."|"-"]+ "*" ] |
+[ "x" ["_"|"."|"-"]+ "x" ] |
+[ "X" ["_"|"."|"-"]+ "X" ] |
+[ "-" ["_"|"."]+ "-" ] |
+[ "." ["_"]+ "." ] |
+[ "^" ["_"|"."|"-"]* "^" ] |
+[ ">" ["_"|"."|"-"]* "<" ] |
+[ ["o"|"O"] ["_"|"."|"-"]+ ["o"|"O"] ] 
+];
+
+read regex [
+["<" ("/") "3"+] |
+verticalemoticon (";"+|"^") |
+["(" verticalemoticon ")"] |
+
+! May be end of brackets as well, like
+!   Author (2018):
+[ [")"|"("] ["'"|"-"|"o"]* [":"|"="|"x"] ] |
+! May be end of xml, like
+!   <b class="emp">=</b>
+[ ["<"*|">"*] ["B"|"X"|"8"|":"|";"|"="|"x"] ["'"|"-"|"o"]* ["/"|"<"|"C"|"["|")"|"("|"D"|"P"|"d"|"p"|"3"|">"|"o"|"O"|"*"]] |
+[ ["D"|">"] ("'") ":"] |
+
+! May be end of square bracket
+!   Author [2018]:
+["]" ":"] |
+[(">") [";"|":"] ["-"|"*"]* [ ")" | "(" | %] | %[ ]+ ] |
+[(">") [";"|":"] ["-"]* ["*"|"P"|"p"|"o"|"O"|"D"]] |
+["x" "("] |
+["^" (".") "^"] |
+[%\ ["{" "o" "}"|"o"|"m"] "/"] |
+[":" ["a"|"b"|"c"|"d"|"e"|"f"|"g"|"h"|"i"|"j"|"k"|"l"|"m"|"n"|"o"|"p"|"q"|"r"|"s"|"t"|"u"|"v"|"w"|"x"|"y"|"z"|"_"|"-"]+ ":"] |
+[">" "_" "<"] |
+["*" "<" ":" "-" ")"]
+];
diff --git a/src/tokenizer.xfst b/src/tokenizer.xfst
index 5fe1aec..4372d4a 100644
--- a/src/tokenizer.xfst
+++ b/src/tokenizer.xfst
@@ -119,8 +119,8 @@
 ! 20:00 Uhr, 00:12:25,34 Minuten
 define Times [ ( [%0|1|2|3|4|5] ) Digit [ ":" [%0|1|2|3|4|5] Digit ]^{1,2} ( "," [ Digit ]^{1,3} ) ];
 
-source emoji.xfst
-define Emoji;
+source emoticons.xfst
+define Emoticons;
 
 ! acronyms: U.S.A., I.B.M., etc.
 ! use a post-filter to remove dots
@@ -215,7 +215,7 @@
   Email @-> ... NLout,
   File @-> ... NLout,
   Domain @-> ... NLout,
-  Emoji @-> ... NLout
+  Emoticons @-> ... NLout
 ];
 
 echo - Introduce Sentence splitter