Added context rule for I
Change-Id: I5f3449c3994960503b616fa735de9d2ab9951ff7
diff --git a/src/en/tokenizer.xfst b/src/en/tokenizer.xfst
index 6b3cc78..adcdac8 100644
--- a/src/en/tokenizer.xfst
+++ b/src/en/tokenizer.xfst
@@ -107,6 +107,10 @@
define RealToken [Punct|Emdash|Abbr|Word|SNS|AcronymDep|Ord|Num|Years|Times|XMLEntities|Omission];
+! Treatmeant for I as a word in "M. I. Baxter was killed in World War I. So was I."
+define NonAbbrI [ {am}|{was}|{will}|{have}|{had}|{would}|{do}|{did}|{and}|{War}|{than}|{not}|[P|p]{art} ];
+
+
echo - Introduce Token splitter
define Token [
@@ -117,7 +121,7 @@
File @-> ... NLout,
Domain @-> ... NLout,
[Emoticons|Arrows] @-> ... NLout
-];
+] .o. ["I" @-> ... NLout \/ NonAbbrI [WS | NLout ]+ _ ];
source all/allsentencesplit.xfst