Add neo pronoun support
diff --git a/test/data/gender.conllu b/test/data/gender.conllu
index bc3c76c..9319d7c 100644
--- a/test/data/gender.conllu
+++ b/test/data/gender.conllu
@@ -115,3 +115,26 @@
3 Spieler/-innen _ _ _ _ _ _ _ _
4 lasen _ VERB VVFIN _ _ _ _ _
+# foundry = base
+# filename = TEST/gender/000011/base/tokens.xml
+# text_id = GENDER_TEST.000011
+# text = sier trifft xier jeden Tag
+1 sier _ _ _ _ _ _ _ _
+2 trifft _ VERB VVFIN _ _ _ _ _
+3 xier _ _ _ _ _ _ _ _
+4 jeden _ DET PIAT _ _ _ _ _
+5 Tag _ NOUN NN _ _ _ _ _
+
+# foundry = base
+# filename = TEST/gender/000012/base/tokens.xml
+# text_id = GENDER_TEST.000012
+# text = oj dankte el und hen für die Hilfe
+1 oj _ _ _ _ _ _ _ _
+2 dankte _ VERB VVFIN _ _ _ _ _
+3 el _ _ _ _ _ _ _ _
+4 und _ CCONJ KON _ _ _ _ _
+5 hen _ _ _ _ _ _ _ _
+6 für _ ADP APPR _ _ _ _ _
+7 die _ DET ART _ _ _ _ _
+8 Hilfe _ NOUN NN _ _ _ _ _
+
diff --git a/test/test.js b/test/test.js
index 6367049..94391a3 100644
--- a/test/test.js
+++ b/test/test.js
@@ -91,11 +91,26 @@
expect(stdout).toContain('sie*er\tsie*er\tPRON\tPPER\tGender=NonBin');
});
+ test('Full mode: neo-pronoun lexicon forms (PRON PPER Gender=NonBin|PronType=Prs)', () => {
+ const command = 'node src/index.js < test/data/gender.conllu';
+ const stdout = execSync(command).toString();
+ // sier: NOM of sier-paradigm
+ expect(stdout).toContain('sier\tsier\tPRON\tPPER\tGender=NonBin|PronType=Prs');
+ // xier: NOM of xier-paradigm
+ expect(stdout).toContain('xier\txier\tPRON\tPPER\tGender=NonBin|PronType=Prs');
+ // oj: NOM of oj-paradigm
+ expect(stdout).toContain('oj\toj\tPRON\tPPER\tGender=NonBin|PronType=Prs');
+ // el: NOM of el-paradigm
+ expect(stdout).toContain('el\tel\tPRON\tPPER\tGender=NonBin|PronType=Prs');
+ // hen: NOM of hen-paradigm
+ expect(stdout).toContain('hen\then\tPRON\tPPER\tGender=NonBin|PronType=Prs');
+ });
+
test('Full mode: foundry comment changed to gender', () => {
const command = 'node src/index.js < test/data/gender.conllu';
const stdout = execSync(command).toString();
const foundry_count = (stdout.match(/# foundry = gender/g) || []).length;
- expect(foundry_count).toBe(10);
+ expect(foundry_count).toBe(12);
});
test('Full mode: non-gender tokens pass through unchanged', () => {
@@ -120,16 +135,16 @@
const featsAnnotated = cols[5] !== '_';
expect(lemmaAnnotated || featsAnnotated).toBe(true);
});
- // Count: 18 NOUN + 5 DET + 1 PRON = 24 annotated tokens
- expect(tokenLines.length).toBe(24);
+ // Count: 18 NOUN + 5 DET + 1 PRON (sie*er) + 5 neo-pronouns (sier,xier,oj,el,hen) = 29
+ expect(tokenLines.length).toBe(29);
});
test('Sparse mode: sentence headers are emitted for sentences with matches', () => {
const command = 'node src/index.js -s < test/data/gender.conllu';
const stdout = execSync(command).toString();
- // All 10 test sentences have at least one gender form
+ // All 12 test sentences have at least one gender form
const text_id_count = (stdout.match(/# text_id = /g) || []).length;
- expect(text_id_count).toBe(10);
+ expect(text_id_count).toBe(12);
});
test('Inline input: basic Genderstern annotation', () => {