Add unicode emoji features
e.g.
5 😂 😂 EMOIMG EMOIMG
g=smileys_&_emotion|s=face_smiling|q=fully_qualified|v=E0.6|n=face_with_tears_of_joy
_ _ _ _
Change-Id: I96abc0b1933e86ab9a9ae015da919a4dc10a22b4
diff --git a/src/index.js b/src/index.js
index 7f2b2ce..3bd2d5d 100755
--- a/src/index.js
+++ b/src/index.js
@@ -8,6 +8,14 @@
const actionWordRegex = /^:[^:]+:$/;
const wikiEmojiRegex = /^\[_EMOJI:[^\]]+\]$/;
+// Load emoji data
+let emojiData = {};
+try {
+ emojiData = require('./emoji_data.json');
+} catch (e) {
+ // Silent fallback if file doesn't exist (e.g. during initial setup before script run)
+}
+
// Function to strip emoji modifiers and zero-width joiners to get base emoji
function getBaseEmoji(emoji) {
const stripped = emoji
@@ -146,8 +154,17 @@
columns[5] = '_';
// For EMOIMG tokens, set lemma to the base emoji (without modifiers)
if (new_tag === 'EMOIMG') {
- columns[2] = getBaseEmoji(word);
+ const base = getBaseEmoji(word);
+ columns[2] = base;
columns[3] = 'EMOIMG';
+
+ // Look up emoji metadata
+ // Try exact match first, then base emoji
+ const data = emojiData[word] || emojiData[base];
+ if (data) {
+ // g=group|s=subgroup|q=qualified|v=version|n=name
+ columns[5] = `g=${data.g}|s=${data.s}|q=${data.q}|v=${data.v}|n=${data.n}`;
+ }
}
if (global.standalone) {
process.stdout.write(fileheader);