| const { execSync } = require('child_process'); |
| const exp = require('constants'); |
| |
| describe('conllu2cmc', () => { |
| test('Test sparse mode', (done) => { |
| // Modify the command based on your script's location and options |
| const command = 'node src/index.js -s < test/data/ndy.conllu'; |
| const stdout = execSync(command).toString(); |
| expect(stdout).toContain('😂\t😂\t_\tEMOIMG'); |
| var emoimg_count = (stdout.match(/EMOIMG/g) || []).length; |
| expect(emoimg_count).toBe(191); |
| var ascimg_count = (stdout.match(/EMOASC/g) || []).length; |
| expect(ascimg_count).toBe(30); |
| var url_count = (stdout.match(/\tURL\t/g) || []).length; |
| expect(url_count).toBe(4); |
| var adr_count = (stdout.match(/\tADR\t/g) || []).length; |
| expect(adr_count).toBe(1); |
| var eot_count = (stdout.match(/\n# eot/g) || []).length; |
| expect(eot_count).toBe(1); |
| var eof_count = (stdout.match(/\n# eof/g) || []).length; |
| expect(eof_count).toBe(1); |
| var lines_count = (stdout.split("\n")).length; |
| expect(lines_count).toBe(746); |
| done(); |
| }); |
| |
| test('Test full mode', (done) => { |
| const command = 'node src/index.js < test/data/ndy.conllu'; |
| const stdout = execSync(command).toString(); |
| expect(stdout).toContain('😂\t😂\t_\tEMOIMG'); |
| var emoimg_count = (stdout.match(/EMOIMG/g) || []).length; |
| expect(emoimg_count).toBe(191); |
| var ascimg_count = (stdout.match(/EMOASC/g) || []).length; |
| expect(ascimg_count).toBe(30); |
| var url_count = (stdout.match(/\tURL\t/g) || []).length; |
| expect(url_count).toBe(4); |
| var adr_count = (stdout.match(/\tADR\t/g) || []).length; |
| expect(adr_count).toBe(1); |
| var lines_count = (stdout.split("\n")).length; |
| expect(lines_count).toBe(6202); |
| done(); |
| }); |
| |
| test('Regression test for issue #113: emoji modifiers and ZWJ', (done) => { |
| // Test that compound emojis with modifiers and ZWJ are recognized as single EMOIMG tokens |
| const testInput = `# foundry = base |
| # text_id = test-113 |
| # text = ✊🏿 and 👨👨👦 |
| 1 ✊🏿 _ _ _ _ _ _ _ _ |
| 2 and _ CCONJ _ _ _ _ _ _ |
| 3 👨👨👦 _ _ _ _ _ _ _ _ |
| |
| `; |
| const { execSync } = require('child_process'); |
| const stdout = execSync('node src/index.js', { input: testInput }).toString(); |
| |
| // Check that compound emojis are tagged as EMOIMG and lemma has base emoji |
| expect(stdout).toContain('✊🏿\t✊\t_\tEMOIMG'); |
| expect(stdout).toContain('👨👨👦\t👨\t_\tEMOIMG'); |
| |
| // Count EMOIMG occurrences (should be 1 per emoji - only in XPOS column) |
| var emoimg_count = (stdout.match(/EMOIMG/g) || []).length; |
| expect(emoimg_count).toBe(2); // 2 emojis × 1 column = 2 |
| done(); |
| }); |
| |
| test('Regression test for issue #114: Wikipedia emoji templates', (done) => { |
| // Test that Wikipedia emoji templates are recognized as EMOWIKI tokens |
| const testInput = `# foundry = base |
| # text_id = test-114 |
| # text = [_EMOJI:{{S|;)}}_] and [_EMOJI:{{cool}}_] |
| 1 [_EMOJI:{{S|;)}}_] _ _ _ _ _ _ _ _ |
| 2 and _ CCONJ _ _ _ _ _ _ |
| 3 [_EMOJI:{{cool}}_] _ _ _ _ _ _ _ _ |
| |
| `; |
| const { execSync } = require('child_process'); |
| const stdout = execSync('node src/index.js', { input: testInput }).toString(); |
| |
| // Check that Wikipedia emoji templates are tagged as EMOWIKI in XPOS column only |
| expect(stdout).toContain('[_EMOJI:{{S|;)}}_]\t_\t_\tEMOWIKI'); |
| expect(stdout).toContain('[_EMOJI:{{cool}}_]\t_\t_\tEMOWIKI'); |
| |
| // Count EMOWIKI occurrences (should be 1 per template - only in XPOS column) |
| var emowiki_count = (stdout.match(/EMOWIKI/g) || []).length; |
| expect(emowiki_count).toBe(2); // 2 templates × 1 column = 2 |
| done(); |
| }); |
| |
| test('Test emoji metadata in FEATS column', (done) => { |
| // Test that EMOIMG tokens have populated FEATS column |
| const testInput = `# foundry = base |
| # text_id = test-feats |
| # text = 😇 |
| 1 😇 _ _ _ _ _ _ _ _ |
| |
| `; |
| const { execSync } = require('child_process'); |
| const stdout = execSync('node src/index.js', { input: testInput }).toString(); |
| |
| // Check that 😇 has correct metadata |
| // g=smileys_and_emotion|s=face_smiling|q=fully_qualified|v=E1.0|n=smiling_face_with_halo |
| // Note: spaces in data are replaced by _ in our script |
| expect(stdout).toContain('g=smileys_&_emotion|s=face_smiling|q=fully_qualified|v=E1.0|n=smiling_face_with_halo'); |
| |
| // Also check for the base emoji lemma and tags |
| expect(stdout).toContain('😇\t😇\t_\tEMOIMG'); |
| |
| done(); |
| }); |
| }); |