Fix emoticon matching before letters (e.g., Wikipedia:Diskussionen)

Added trailing context to emoticon rule so :D only matches when NOT
followed by a letter. This prevents false emoticon matches in patterns
like Wikipedia:Diskussionen where the colon is a namespace separator.

Before: Wikipedia:Diskussionen → Wikipedia :D iskussionen
After:  Wikipedia:Diskussionen → Wikipedia : Diskussionen

Resolves #134

Change-Id: Ia9d6659e604eb514172e2182c94a206b5b45023f
diff --git a/src/main/jpc/jflex/de/ids_mannheim/korap/tokenizer/DerekoDfaTokenizer.jflex b/src/main/jpc/jflex/de/ids_mannheim/korap/tokenizer/DerekoDfaTokenizer.jflex
index 5582821..1d93e6f 100644
--- a/src/main/jpc/jflex/de/ids_mannheim/korap/tokenizer/DerekoDfaTokenizer.jflex
+++ b/src/main/jpc/jflex/de/ids_mannheim/korap/tokenizer/DerekoDfaTokenizer.jflex
@@ -910,7 +910,7 @@
 ([.][.]+|…+)                                                 {return currentToken("...");}
 {LONG_END_PUNCT}                                        { return currentToken();}
 {PUNCT}                                               { return currentToken();}
-{EMOTICON}                                          { return currentToken();}
+{EMOTICON} / [^[:letter:]]                         { return currentToken();}
 {DASH}{DoubleLiteral}                               { return currentToken();}
 {EMOJI_COMPLEX}                                    { return currentToken();}
 <<EOF>>                                             { fileEnd(); return null;}
diff --git a/src/test/java/de/ids_mannheim/korap/tokenizer/TokenizerTest.java b/src/test/java/de/ids_mannheim/korap/tokenizer/TokenizerTest.java
index e50e2e1..b0ee8c3 100644
--- a/src/test/java/de/ids_mannheim/korap/tokenizer/TokenizerTest.java
+++ b/src/test/java/de/ids_mannheim/korap/tokenizer/TokenizerTest.java
@@ -1195,6 +1195,24 @@
         assertEquals(".", tokens[6]);
         assertEquals(7, tokens.length);
     }
+
+    // Regression test for emoticon not matching before letters
+    // Wikipedia:Diskussionen should NOT tokenize :D as an emoticon
+    @Test
+    public void testEmoticonNotMatchBeforeLetter() {
+        DerekoDfaTokenizer_de tok = new DerekoDfaTokenizer_de();
+        
+        String[] tokens = tok.tokenize("Wikipedia:Diskussionen");
+        assertEquals("Wikipedia", tokens[0]);
+        assertEquals(":", tokens[1]);
+        assertEquals("Diskussionen", tokens[2]);
+        assertEquals(3, tokens.length);
+        
+        // But emoticons followed by space/punct should still work
+        tokens = tok.tokenize("Great :D!");
+        assertEquals("Great", tokens[0]);
+        assertEquals(":D", tokens[1]);
+        assertEquals("!", tokens[2]);
+        assertEquals(3, tokens.length);
+    }
 }
-
-