Add unicode emoji features
e.g.
5 😂 😂 EMOIMG EMOIMG
g=smileys_&_emotion|s=face_smiling|q=fully_qualified|v=E0.6|n=face_with_tears_of_joy
_ _ _ _
Change-Id: I96abc0b1933e86ab9a9ae015da919a4dc10a22b4
diff --git a/scripts/update_emoji_db.js b/scripts/update_emoji_db.js
new file mode 100644
index 0000000..1a82339
--- /dev/null
+++ b/scripts/update_emoji_db.js
@@ -0,0 +1,104 @@
+const https = require('https');
+const fs = require('fs');
+const path = require('path');
+
+const url = 'https://www.unicode.org/Public/UCD/latest/emoji/emoji-test.txt';
+const outputPath = path.join(__dirname, '../src/emoji_data.json');
+
+console.log(`Downloading ${url}...`);
+
+https.get(url, (res) => {
+ let data = '';
+
+ res.on('data', (chunk) => {
+ data += chunk;
+ });
+
+ res.on('end', () => {
+ console.log('Download complete. Parsing data...');
+ const emojiData = parseEmojiData(data);
+ fs.writeFileSync(outputPath, JSON.stringify(emojiData, null, 2));
+ console.log(`Wrote ${Object.keys(emojiData).length} emojis to ${outputPath}`);
+ });
+
+}).on('error', (err) => {
+ console.error('Error downloading file: ' + err.message);
+ process.exit(1);
+});
+
+function parseEmojiData(text) {
+ const lines = text.split('\n');
+ const result = {};
+ let currentGroup = '';
+ let currentSubgroup = '';
+
+ for (const line of lines) {
+ // Skip empty lines
+ if (!line.trim()) continue;
+
+ // Parse Group
+ if (line.startsWith('# group:')) {
+ currentGroup = normalize(line.substring(8).trim());
+ continue;
+ }
+
+ // Parse Subgroup
+ if (line.startsWith('# subgroup:')) {
+ currentSubgroup = normalize(line.substring(11).trim());
+ continue;
+ }
+
+ // Skip comments that don't look like data (lines starting with #)
+ if (line.startsWith('#')) continue;
+
+ // Parse data line
+ // Format: code_points ; status # emoji name
+ // Example: 1F607 ; fully-qualified # 😇 E1.0 smiling face with halo
+ const parts = line.split(';');
+ if (parts.length < 2) continue;
+
+ const statusPart = parts[1].split('#');
+ const status = normalize(statusPart[0].trim()); // e.g., fully-qualified
+
+ // The part after # contains: emoji char, version, name
+ // Example: " 😇 E1.0 smiling face with halo"
+ const commentPart = statusPart[1].trim();
+
+ // We need to extract the actual emoji character(s) to use as key.
+ // It's the first 'word' in the comment part usually.
+ // But extracting it from the code points is safer/standard.
+ const codePoints = parts[0].trim().split(' ');
+ const emojiKey = String.fromCodePoint(...codePoints.map(cp => parseInt(cp, 16)));
+
+ // Extract Version and Name from the comment part
+ // Format is: [Emoji] E[Version] [Name]
+ // Example: 😇 E1.0 smiling face with halo
+ const versionMatch = commentPart.match(/E(\d+\.\d+)/);
+ let version = '';
+ let name = '';
+
+ if (versionMatch) {
+ version = 'E' + versionMatch[1];
+ // Name is everything after the version
+ const nameIndex = commentPart.indexOf(version) + version.length;
+ name = normalize(commentPart.substring(nameIndex).trim());
+ } else {
+ // Fallback if regex fails (shouldn't happen with standard file)
+ console.warn(`Could not parse version for line: ${line}`);
+ }
+
+ result[emojiKey] = {
+ g: currentGroup,
+ s: currentSubgroup,
+ q: status,
+ v: version,
+ n: name
+ };
+ }
+ return result;
+}
+
+function normalize(str) {
+ // Replace spaces with underscores and lowercase
+ return str.toLowerCase().replace(/[ \-]+/g, '_');
+}