blob: ee1d4980c1eb173726f32b284c79ee89063e0b74 [file] [log] [blame]
const { execSync } = require('child_process');
const exp = require('constants');
describe('conllu2cmc', () => {
test('Test sparse mode', (done) => {
// Modify the command based on your script's location and options
const command = 'node src/index.js -s < test/data/ndy.conllu';
const stdout = execSync(command).toString();
expect(stdout).toContain('😂\t_\t_\tEMOIMG');
var emoimg_count = (stdout.match(/EMOIMG/g) || []).length;
expect(emoimg_count).toBe(191);
var ascimg_count = (stdout.match(/EMOASC/g) || []).length;
expect(ascimg_count).toBe(30);
var url_count = (stdout.match(/\tURL\t/g) || []).length;
expect(url_count).toBe(4);
var adr_count = (stdout.match(/\tADR\t/g) || []).length;
expect(adr_count).toBe(1);
var eot_count = (stdout.match(/\n# eot/g) || []).length;
expect(eot_count).toBe(1);
var eof_count = (stdout.match(/\n# eof/g) || []).length;
expect(eof_count).toBe(1);
var lines_count = (stdout.split("\n")).length;
expect(lines_count).toBe(746);
done();
});
test('Test full mode', (done) => {
const command = 'node src/index.js < test/data/ndy.conllu';
const stdout = execSync(command).toString();
expect(stdout).toContain('😂\t_\t_\tEMOIMG');
var emoimg_count = (stdout.match(/EMOIMG/g) || []).length;
expect(emoimg_count).toBe(191);
var ascimg_count = (stdout.match(/EMOASC/g) || []).length;
expect(ascimg_count).toBe(30);
var url_count = (stdout.match(/\tURL\t/g) || []).length;
expect(url_count).toBe(4);
var adr_count = (stdout.match(/\tADR\t/g) || []).length;
expect(adr_count).toBe(1);
var lines_count = (stdout.split("\n")).length;
expect(lines_count).toBe(6202);
done();
});
test('Regression test for issue #113: emoji modifiers and ZWJ', (done) => {
// Test that compound emojis with modifiers and ZWJ are recognized as single EMOIMG tokens
const testInput = `# foundry = base
# text_id = test-113
# text = ✊🏿 and 👨‍👨‍👦
1 ✊🏿 _ _ _ _ _ _ _ _
2 and _ CCONJ _ _ _ _ _ _
3 👨‍👨‍👦 _ _ _ _ _ _ _ _
`;
const { execSync } = require('child_process');
const stdout = execSync('node src/index.js', { input: testInput }).toString();
// Check that compound emojis are tagged as EMOIMG in XPOS column only
expect(stdout).toContain('✊🏿\t_\t_\tEMOIMG');
expect(stdout).toContain('👨‍👨‍👦\t_\t_\tEMOIMG');
// Count EMOIMG occurrences (should be 1 per emoji - only in XPOS column)
var emoimg_count = (stdout.match(/EMOIMG/g) || []).length;
expect(emoimg_count).toBe(2); // 2 emojis × 1 column = 2
done();
});
test('Regression test for issue #114: Wikipedia emoji templates', (done) => {
// Test that Wikipedia emoji templates are recognized as EMOWIKI tokens
const testInput = `# foundry = base
# text_id = test-114
# text = [_EMOJI:{{S|;)}}_] and [_EMOJI:{{cool}}_]
1 [_EMOJI:{{S|;)}}_] _ _ _ _ _ _ _ _
2 and _ CCONJ _ _ _ _ _ _
3 [_EMOJI:{{cool}}_] _ _ _ _ _ _ _ _
`;
const { execSync } = require('child_process');
const stdout = execSync('node src/index.js', { input: testInput }).toString();
// Check that Wikipedia emoji templates are tagged as EMOWIKI in XPOS column only
expect(stdout).toContain('[_EMOJI:{{S|;)}}_]\t_\t_\tEMOWIKI');
expect(stdout).toContain('[_EMOJI:{{cool}}_]\t_\t_\tEMOWIKI');
// Count EMOWIKI occurrences (should be 1 per template - only in XPOS column)
var emowiki_count = (stdout.match(/EMOWIKI/g) || []).length;
expect(emowiki_count).toBe(2); // 2 templates × 1 column = 2
done();
});
});