blob: a6be3496cfbd221094441d563ee5b5aa13bf0a9b [file] [log] [blame]
Marc Kupietzb43a5182024-02-03 18:09:10 +01001const { execSync } = require('child_process');
2const exp = require('constants');
3
4describe('conllu2cmc', () => {
5 test('Test sparse mode', (done) => {
6 // Modify the command based on your script's location and options
7 const command = 'node src/index.js -s < test/data/ndy.conllu';
8 const stdout = execSync(command).toString();
9 expect(stdout).toContain('😂\t_\tEMOIMG\tEMOIMG');
10 var emoimg_count = (stdout.match(/EMOIMG/g) || []).length;
11 expect(emoimg_count).toBe(382);
12 var ascimg_count = (stdout.match(/EMOASC/g) || []).length;
13 expect(ascimg_count).toBe(60);
14 var ascimg_count = (stdout.match(/URL/g) || []).length;
15 expect(ascimg_count).toBe(8);
16 var adr_count = (stdout.match(/ADR/g) || []).length;
17 expect(adr_count).toBe(2);
Marc Kupietzfd92b1d2024-03-13 10:51:29 +010018 var eot_count = (stdout.match(/\n# eot/g) || []).length;
19 expect(eot_count).toBe(1);
20 var eof_count = (stdout.match(/\n# eof/g) || []).length;
21 expect(eof_count).toBe(1);
Marc Kupietzb43a5182024-02-03 18:09:10 +010022 var lines_count = (stdout.split("\n")).length;
Marc Kupietzfd92b1d2024-03-13 10:51:29 +010023 expect(lines_count).toBe(746);
Marc Kupietzb43a5182024-02-03 18:09:10 +010024 done();
25 });
26
27 test('Test full mode', (done) => {
28 const command = 'node src/index.js < test/data/ndy.conllu';
29 const stdout = execSync(command).toString();
30 expect(stdout).toContain('😂\t_\tEMOIMG\tEMOIMG');
31 var emoimg_count = (stdout.match(/EMOIMG/g) || []).length;
32 expect(emoimg_count).toBe(382);
33 var ascimg_count = (stdout.match(/EMOASC/g) || []).length;
34 expect(ascimg_count).toBe(60);
35 var ascimg_count = (stdout.match(/URL/g) || []).length;
36 expect(ascimg_count).toBe(8);
37 var adr_count = (stdout.match(/ADR/g) || []).length;
38 expect(adr_count).toBe(2);
39 var lines_count = (stdout.split("\n")).length;
Marc Kupietzfd92b1d2024-03-13 10:51:29 +010040 expect(lines_count).toBe(6202);
Marc Kupietzb43a5182024-02-03 18:09:10 +010041 done();
42 });
Marc Kupietz7497fc42025-12-11 15:47:34 +010043 test('Regression test for issue #114: Wikipedia emoji templates', (done) => {
44 // Test that Wikipedia emoji templates are recognized as EMOWIKI tokens
45 const testInput = `# foundry = base
46# text_id = test-114
47# text = [_EMOJI:{{S|;)}}_] and [_EMOJI:{{cool}}_]
481 [_EMOJI:{{S|;)}}_] _ _ _ _ _ _ _ _
492 and _ CCONJ _ _ _ _ _ _
503 [_EMOJI:{{cool}}_] _ _ _ _ _ _ _ _
51
52`;
53 const { execSync } = require('child_process');
54 const stdout = execSync('node src/index.js', { input: testInput }).toString();
55
56 // Check that Wikipedia emoji templates are tagged as EMOWIKI
57 expect(stdout).toContain('[_EMOJI:{{S|;)}}_]\t_\tEMOWIKI\tEMOWIKI');
58 expect(stdout).toContain('[_EMOJI:{{cool}}_]\t_\tEMOWIKI\tEMOWIKI');
59
60 // Count EMOWIKI occurrences (should be 2 for each template - columns 3 and 4)
61 var emowiki_count = (stdout.match(/EMOWIKI/g) || []).length;
62 expect(emowiki_count).toBe(4); // 2 templates × 2 columns = 4
63 done();
64 });
Marc Kupietzb43a5182024-02-03 18:09:10 +010065});