Add regression test for HST
Change-Id: I0091f5b7523c695ff841bd93630dcf7a7f5efb90
diff --git a/test/test.js b/test/test.js
index 4a6be49..0f670e7 100644
--- a/test/test.js
+++ b/test/test.js
@@ -11,6 +11,8 @@
expect(emoimg_count).toBe(191);
var ascimg_count = (stdout.match(/EMOASC/g) || []).length;
expect(ascimg_count).toBe(30);
+ var hst_count = (stdout.match(/\tHST\t/g) || []).length;
+ expect(hst_count).toBe(14);
var url_count = (stdout.match(/\tURL\t/g) || []).length;
expect(url_count).toBe(4);
var adr_count = (stdout.match(/\tADR\t/g) || []).length;
@@ -20,25 +22,46 @@
var eof_count = (stdout.match(/\n# eof/g) || []).length;
expect(eof_count).toBe(1);
var lines_count = (stdout.split("\n")).length;
- expect(lines_count).toBe(746);
+ expect(lines_count).toBe(810);
done();
});
test('Test full mode', (done) => {
const command = 'node src/index.js < test/data/ndy.conllu';
const stdout = execSync(command).toString();
- expect(stdout).toContain('😂\t😂\t_\tEMOIMG');
- var emoimg_count = (stdout.match(/EMOIMG/g) || []).length;
- expect(emoimg_count).toBe(191);
- var ascimg_count = (stdout.match(/EMOASC/g) || []).length;
- expect(ascimg_count).toBe(30);
- var url_count = (stdout.match(/\tURL\t/g) || []).length;
- expect(url_count).toBe(4);
- var adr_count = (stdout.match(/\tADR\t/g) || []).length;
- expect(adr_count).toBe(1);
- var lines_count = (stdout.split("\n")).length;
- expect(lines_count).toBe(6202);
- done();
+ expect(stdout).toContain('😂\t😂\t_\tEMOIMG');
+ var emoimg_count = (stdout.match(/EMOIMG/g) || []).length;
+ expect(emoimg_count).toBe(191);
+ var ascimg_count = (stdout.match(/EMOASC/g) || []).length;
+ expect(ascimg_count).toBe(30);
+ var hst_count = (stdout.match(/\tHST\t/g) || []).length;
+ expect(hst_count).toBe(14);
+ var url_count = (stdout.match(/\tURL\t/g) || []).length;
+ expect(url_count).toBe(4);
+ var adr_count = (stdout.match(/\tADR\t/g) || []).length;
+ expect(adr_count).toBe(1);
+ var lines_count = (stdout.split("\n")).length;
+ expect(lines_count).toBe(6202);
+ done();
+ });
+
+ test('Regression test for hashtags: emit HST', (done) => {
+ const testInput = [
+ '# foundry = base',
+ '# text_id = test-hashtag',
+ '# text = #KorAP #10',
+ ['1', '#KorAP', '_', '_', '_', '_', '_', '_', '_', '_'].join('\t'),
+ ['2', '#10', '_', '_', '_', '_', '_', '_', '_', '_'].join('\t'),
+ ''
+ ].join('\n');
+ const stdout = execSync('node src/index.js', { input: testInput }).toString();
+
+ expect(stdout).toContain('#KorAP\t_\t_\tHST');
+ expect(stdout).toContain('#10\t_\t_\tHST');
+
+ var hst_count = (stdout.match(/\tHST\t/g) || []).length;
+ expect(hst_count).toBe(2);
+ done();
});
test('Regression test for issue #113: emoji modifiers and ZWJ', (done) => {
@@ -53,11 +76,11 @@
`;
const { execSync } = require('child_process');
const stdout = execSync('node src/index.js', { input: testInput }).toString();
-
+
// Check that compound emojis are tagged as EMOIMG and lemma has base emoji
expect(stdout).toContain('✊🏿\t✊\t_\tEMOIMG');
expect(stdout).toContain('👨👨👦\t👨\t_\tEMOIMG');
-
+
// Count EMOIMG occurrences (should be 1 per emoji - only in XPOS column)
var emoimg_count = (stdout.match(/EMOIMG/g) || []).length;
expect(emoimg_count).toBe(2); // 2 emojis × 1 column = 2
@@ -76,11 +99,11 @@
`;
const { execSync } = require('child_process');
const stdout = execSync('node src/index.js', { input: testInput }).toString();
-
+
// Check that Wikipedia emoji templates are tagged as EMOWIKI in XPOS column only
expect(stdout).toContain('[_EMOJI:{{S|;)}}_]\t_\t_\tEMOWIKI');
expect(stdout).toContain('[_EMOJI:{{cool}}_]\t_\t_\tEMOWIKI');
-
+
// Count EMOWIKI occurrences (should be 1 per template - only in XPOS column)
var emowiki_count = (stdout.match(/EMOWIKI/g) || []).length;
expect(emowiki_count).toBe(2); // 2 templates × 1 column = 2
@@ -97,15 +120,15 @@
`;
const { execSync } = require('child_process');
const stdout = execSync('node src/index.js', { input: testInput }).toString();
-
+
// Check that 😇 has correct metadata
// g=smileys_and_emotion|s=face_smiling|q=fully_qualified|v=E1.0|n=smiling_face_with_halo
// Note: spaces in data are replaced by _ in our script
expect(stdout).toContain('g=smileys_&_emotion|s=face_smiling|q=fully_qualified|v=E1.0|n=smiling_face_with_halo');
-
+
// Also check for the base emoji lemma and tags
expect(stdout).toContain('😇\t😇\t_\tEMOIMG');
-
+
done();
});
});