Fix some false positives

Change-Id: Ifaec3023569865919c524dc4cd72a4a418949711
diff --git a/test/test.js b/test/test.js
index 31a39e2..b954e06 100644
--- a/test/test.js
+++ b/test/test.js
@@ -110,7 +110,7 @@
     const command = 'node src/index.js < test/data/gender.conllu';
     const stdout = execSync(command).toString();
     const foundry_count = (stdout.match(/# foundry = gender/g) || []).length;
-    expect(foundry_count).toBe(12);
+    expect(foundry_count).toBe(18);
   });
 
   test('Full mode: non-gender tokens pass through unchanged', () => {
@@ -135,16 +135,17 @@
       const featsAnnotated  = cols[5] !== '_';
       expect(lemmaAnnotated || featsAnnotated).toBe(true);
     });
-    // Count: 18 NOUN + 5 DET + 1 PRON (sie*er) + 5 neo-pronouns (sier,xier,oj,el,hen) = 29
-    expect(tokenLines.length).toBe(29);
+    // Count: 18 NOUN + 5 DET + 1 PRON (sie*er) + 5 neo-pronouns (sier,xier,oj,el,hen)
+    //       + 7 new neo-pronouns from sentences 16–18 (el,em,ey,y,mensch,Mensch,xier) = 36
+    expect(tokenLines.length).toBe(36);
   });
 
   test('Sparse mode: sentence headers are emitted for sentences with matches', () => {
     const command = 'node src/index.js -s < test/data/gender.conllu';
     const stdout = execSync(command).toString();
-    // All 12 test sentences have at least one gender form
+    // 12 original + 3 new sentences (16–18) have at least one gender form
     const text_id_count = (stdout.match(/# text_id = /g) || []).length;
-    expect(text_id_count).toBe(12);
+    expect(text_id_count).toBe(15);
   });
 
   test('Inline input: basic Genderstern annotation', () => {
@@ -185,4 +186,112 @@
     // jede (without gender marker): unchanged
     expect(stdout).toContain('jede\t_\tDET\tPIAT\tGender=Fem|Number=Sing');
   });
+
+  // ---------------------------------------------------------------------------
+  // Regression tests: false-positive tokens that must NOT be tagged
+  // ---------------------------------------------------------------------------
+
+  test('No false positives: *, Y, per, EL, EM, Ey, sin mid-sentence pass through unchanged', () => {
+    // Each of these appeared as spurious neo-pronoun matches in the original code.
+    // They must not receive a neo-pronoun annotation.
+    const testInput = `# foundry = base
+# text_id = fp-001
+# text = Hinweis auf * und Y sowie per Einschreiben
+1\tHinweis\t_\tNOUN\tNN\t_\t_\t_\t_\t_
+2\tauf\t_\tADP\tAPPR\t_\t_\t_\t_\t_
+3\t*\t_\tPUNCT\t$(\t_\t_\t_\t_\t_
+4\tund\t_\tCCONJ\tKON\t_\t_\t_\t_\t_
+5\tY\t_\tNOUN\tNN\t_\t_\t_\t_\t_
+6\tsowie\t_\tCCONJ\tKON\t_\t_\t_\t_\t_
+7\tper\t_\tADP\tAPPR\t_\t_\t_\t_\t_
+8\tEinschreiben\t_\tNOUN\tNN\t_\t_\t_\t_\t_
+
+# foundry = base
+# text_id = fp-002
+# text = Verweise auf EL EM Ey sin im Text
+1\tVerweise\t_\tNOUN\tNN\t_\t_\t_\t_\t_
+2\tauf\t_\tADP\tAPPR\t_\t_\t_\t_\t_
+3\tEL\t_\tNOUN\tNN\t_\t_\t_\t_\t_
+4\tEM\t_\tNOUN\tNN\t_\t_\t_\t_\t_
+5\tEy\t_\tITJ\tITJ\t_\t_\t_\t_\t_
+6\tsin\t_\tNOUN\tNN\t_\t_\t_\t_\t_
+7\tim\t_\tADP\tAPPRART\t_\t_\t_\t_\t_
+8\tText\t_\tNOUN\tNN\t_\t_\t_\t_\t_
+
+`;
+    const stdout = execSync('node src/index.js', { input: testInput }).toString();
+    // None of the false-positive tokens should receive a neo-pronoun annotation.
+    // A token passes through unchanged when its lemma column stays '_' and
+    // its upos/xpos/feats are not overwritten to PRON/PPER.
+    expect(stdout).toContain('3\t*\t_\tPUNCT');
+    expect(stdout).toContain('5\tY\t_\tNOUN');
+    expect(stdout).toContain('7\tper\t_\tADP');
+    expect(stdout).toContain('3\tEL\t_\tNOUN');
+    expect(stdout).toContain('4\tEM\t_\tNOUN');
+    expect(stdout).toContain('5\tEy\t_\tITJ');
+    expect(stdout).toContain('6\tsin\t_\tNOUN');
+  });
+
+  test('No false positive: Mensch mid-sentence must not be tagged as neo-pronoun', () => {
+    const testInput = `# foundry = base
+# text_id = fp-003
+# text = Jeder Mensch hat Würde
+1\tJeder\t_\tDET\tPIAT\t_\t_\t_\t_\t_
+2\tMensch\t_\tNOUN\tNN\t_\t_\t_\t_\t_
+3\that\t_\tAUX\tVAFIN\t_\t_\t_\t_\t_
+4\tWürde\t_\tNOUN\tNN\t_\t_\t_\t_\t_
+
+`;
+    const stdout = execSync('node src/index.js', { input: testInput }).toString();
+    // 'Mensch' at position 2 (not sentence-initial) must not be tagged.
+    expect(stdout).toContain('2\tMensch\t_\tNOUN\tNN\t_');
+  });
+
+  test('Neo-pronoun: lowercase el, em, ey, y mid-sentence are still tagged', () => {
+    const testInput = `# foundry = base
+# text_id = neo-lc-001
+# text = dankte el und em für ey und y
+1\tdankte\t_\tVERB\tVVFIN\t_\t_\t_\t_\t_
+2\tel\t_\t_\t_\t_\t_\t_\t_\t_
+3\tund\t_\tCCONJ\tKON\t_\t_\t_\t_\t_
+4\tem\t_\t_\t_\t_\t_\t_\t_\t_
+5\tfür\t_\tADP\tAPPR\t_\t_\t_\t_\t_
+6\tey\t_\t_\t_\t_\t_\t_\t_\t_
+7\tund\t_\tCCONJ\tKON\t_\t_\t_\t_\t_
+8\ty\t_\t_\t_\t_\t_\t_\t_\t_
+
+`;
+    const stdout = execSync('node src/index.js', { input: testInput }).toString();
+    expect(stdout).toContain('el\tel\tPRON\tPPER\tGender=Fem,Masc,NonBin|PronType=Prs');
+    expect(stdout).toContain('em\tem\tPRON\tPPER\tGender=Fem,Masc,NonBin|PronType=Prs');
+    expect(stdout).toContain('ey\tey\tPRON\tPPER\tGender=Fem,Masc,NonBin|PronType=Prs');
+    expect(stdout).toContain('y\tY\tPRON\tPPER\tGender=Fem,Masc,NonBin|PronType=Prs');
+  });
+
+  test('Neo-pronoun: mensch lowercase and sentence-initial Mensch are tagged', () => {
+    const testInput = `# foundry = base
+# text_id = neo-mensch-001
+# text = mensch fragte und Mensch antwortete
+1\tmensch\t_\t_\t_\t_\t_\t_\t_\t_
+2\tfragte\t_\tVERB\tVVFIN\t_\t_\t_\t_\t_
+3\tund\t_\tCCONJ\tKON\t_\t_\t_\t_\t_
+4\tMensch\t_\tNOUN\tNN\t_\t_\t_\t_\t_
+5\tantwortete\t_\tVERB\tVVFIN\t_\t_\t_\t_\t_
+
+# foundry = base
+# text_id = neo-mensch-002
+# text = Mensch traf xier
+1\tMensch\t_\t_\t_\t_\t_\t_\t_\t_
+2\ttraf\t_\tVERB\tVVFIN\t_\t_\t_\t_\t_
+3\txier\t_\t_\t_\t_\t_\t_\t_\t_
+
+`;
+    const stdout = execSync('node src/index.js', { input: testInput }).toString();
+    // lowercase 'mensch' → neo-pronoun
+    expect(stdout).toContain('1\tmensch\tmensch\tPRON\tPPER\tGender=Fem,Masc,NonBin|PronType=Prs');
+    // 'Mensch' mid-sentence (position 4) → unchanged common noun
+    expect(stdout).toContain('4\tMensch\t_\tNOUN\tNN\t_');
+    // sentence-initial 'Mensch' (position 1) → neo-pronoun
+    expect(stdout).toContain('1\tMensch\tmensch\tPRON\tPPER\tGender=Fem,Masc,NonBin|PronType=Prs');
+  });
 });