Fix more FP
Change-Id: I1f8045b4f93f64a7c8597dfbc35a1744af3aaf20
diff --git a/test/data/gender.conllu b/test/data/gender.conllu
index e764a9e..87ab0e6 100644
--- a/test/data/gender.conllu
+++ b/test/data/gender.conllu
@@ -212,3 +212,27 @@
2 traf _ VERB VVFIN _ _ _ _ _
3 xier _ _ _ _ _ _ _ _
+# foundry = base
+# filename = TEST/gender/000019/base/tokens.xml
+# text_id = GENDER_TEST.000019
+# text = Müller et al. berichten über neue Befunde
+1 Müller _ NOUN NN _ _ _ _ _
+2 et _ PART PTKA _ _ _ _ _
+3 al. _ NOUN NN _ _ _ _ _
+4 berichten _ VERB VVFIN _ _ _ _ _
+5 über _ ADP APPR _ _ _ _ _
+6 neue _ ADJ ADJA _ _ _ _ _
+7 Befunde _ NOUN NN _ _ _ _ _
+
+# foundry = base
+# filename = TEST/gender/000020/base/tokens.xml
+# text_id = GENDER_TEST.000020
+# text = "their results confirmed the hypothesis"
+1 " _ PUNCT $( _ _ _ _ _
+2 their _ PRON PPER _ _ _ _ _
+3 results _ NOUN NN _ _ _ _ _
+4 confirmed _ VERB VVFIN _ _ _ _ _
+5 the _ DET ART _ _ _ _ _
+6 hypothesis _ NOUN NN _ _ _ _ _
+7 " _ PUNCT $( _ _ _ _ _
+
diff --git a/test/test.js b/test/test.js
index b954e06..66cf37c 100644
--- a/test/test.js
+++ b/test/test.js
@@ -110,7 +110,7 @@
const command = 'node src/index.js < test/data/gender.conllu';
const stdout = execSync(command).toString();
const foundry_count = (stdout.match(/# foundry = gender/g) || []).length;
- expect(foundry_count).toBe(18);
+ expect(foundry_count).toBe(20);
});
test('Full mode: non-gender tokens pass through unchanged', () => {
@@ -232,6 +232,15 @@
expect(stdout).toContain('6\tsin\t_\tNOUN');
});
+ test('No false positives from gender.conllu: et (in et al.) and their (English) pass through unchanged', () => {
+ const command = 'node src/index.js < test/data/gender.conllu';
+ const stdout = execSync(command).toString();
+ // et in "Müller et al." must not be tagged
+ expect(stdout).toContain('2\tet\t_\tPART');
+ // their in English quotation must not be tagged
+ expect(stdout).toContain('2\ttheir\t_\tPRON');
+ });
+
test('No false positive: Mensch mid-sentence must not be tagged as neo-pronoun', () => {
const testInput = `# foundry = base
# text_id = fp-003