| const { execSync } = require('child_process'); |
| const exp = require('constants'); |
| |
| describe('conllu2cmc', () => { |
| test('Test sparse mode', (done) => { |
| // Modify the command based on your script's location and options |
| const command = 'node src/index.js -s < test/data/ndy.conllu'; |
| const stdout = execSync(command).toString(); |
| expect(stdout).toContain('😂\t_\tEMOIMG\tEMOIMG'); |
| var emoimg_count = (stdout.match(/EMOIMG/g) || []).length; |
| expect(emoimg_count).toBe(382); |
| var ascimg_count = (stdout.match(/EMOASC/g) || []).length; |
| expect(ascimg_count).toBe(60); |
| var ascimg_count = (stdout.match(/URL/g) || []).length; |
| expect(ascimg_count).toBe(8); |
| var adr_count = (stdout.match(/ADR/g) || []).length; |
| expect(adr_count).toBe(2); |
| var eot_count = (stdout.match(/\n# eot/g) || []).length; |
| expect(eot_count).toBe(1); |
| var eof_count = (stdout.match(/\n# eof/g) || []).length; |
| expect(eof_count).toBe(1); |
| var lines_count = (stdout.split("\n")).length; |
| expect(lines_count).toBe(746); |
| done(); |
| }); |
| |
| test('Test full mode', (done) => { |
| const command = 'node src/index.js < test/data/ndy.conllu'; |
| const stdout = execSync(command).toString(); |
| expect(stdout).toContain('😂\t_\tEMOIMG\tEMOIMG'); |
| var emoimg_count = (stdout.match(/EMOIMG/g) || []).length; |
| expect(emoimg_count).toBe(382); |
| var ascimg_count = (stdout.match(/EMOASC/g) || []).length; |
| expect(ascimg_count).toBe(60); |
| var ascimg_count = (stdout.match(/URL/g) || []).length; |
| expect(ascimg_count).toBe(8); |
| var adr_count = (stdout.match(/ADR/g) || []).length; |
| expect(adr_count).toBe(2); |
| var lines_count = (stdout.split("\n")).length; |
| expect(lines_count).toBe(6202); |
| done(); |
| }); |
| |
| test('Regression test for issue #113: emoji modifiers and ZWJ', (done) => { |
| // Test that compound emojis with modifiers and ZWJ are recognized as single EMOIMG tokens |
| const testInput = `# foundry = base |
| # text_id = test-113 |
| # text = ✊🏿 and 👨👨👦 |
| 1 ✊🏿 _ _ _ _ _ _ _ _ |
| 2 and _ CCONJ _ _ _ _ _ _ |
| 3 👨👨👦 _ _ _ _ _ _ _ _ |
| |
| `; |
| const { execSync } = require('child_process'); |
| const stdout = execSync('node src/index.js', { input: testInput }).toString(); |
| |
| // Check that compound emojis are tagged as EMOIMG |
| expect(stdout).toContain('✊🏿\t_\tEMOIMG\tEMOIMG'); |
| expect(stdout).toContain('👨👨👦\t_\tEMOIMG\tEMOIMG'); |
| |
| // Count EMOIMG occurrences (should be 2 for each emoji - columns 3 and 4) |
| var emoimg_count = (stdout.match(/EMOIMG/g) || []).length; |
| expect(emoimg_count).toBe(4); // 2 emojis × 2 columns = 4 |
| done(); |
| }); |
| |
| test('Regression test for issue #114: Wikipedia emoji templates', (done) => { |
| // Test that Wikipedia emoji templates are recognized as EMOWIKI tokens |
| const testInput = `# foundry = base |
| # text_id = test-114 |
| # text = [_EMOJI:{{S|;)}}_] and [_EMOJI:{{cool}}_] |
| 1 [_EMOJI:{{S|;)}}_] _ _ _ _ _ _ _ _ |
| 2 and _ CCONJ _ _ _ _ _ _ |
| 3 [_EMOJI:{{cool}}_] _ _ _ _ _ _ _ _ |
| |
| `; |
| const { execSync } = require('child_process'); |
| const stdout = execSync('node src/index.js', { input: testInput }).toString(); |
| |
| // Check that Wikipedia emoji templates are tagged as EMOWIKI |
| expect(stdout).toContain('[_EMOJI:{{S|;)}}_]\t_\tEMOWIKI\tEMOWIKI'); |
| expect(stdout).toContain('[_EMOJI:{{cool}}_]\t_\tEMOWIKI\tEMOWIKI'); |
| |
| // Count EMOWIKI occurrences (should be 2 for each template - columns 3 and 4) |
| var emowiki_count = (stdout.match(/EMOWIKI/g) || []).length; |
| expect(emowiki_count).toBe(4); // 2 templates × 2 columns = 4 |
| done(); |
| }); |
| }); |