Add neo pronoun support
diff --git a/src/index.js b/src/index.js
index e2f22aa..38f21b5 100644
--- a/src/index.js
+++ b/src/index.js
@@ -68,6 +68,142 @@
const neopronGenderStarPairRegex = /^(sie|er|ihr|ihn?|ihm?|dich|sich|mich|mir|uns|euch|ihnen|seinen?|ihrem?|deren?|denen)([*:_])(sie|er|ihr|ihn?|ihm?|dich|sich|mich|mir|uns|euch|ihnen|seinen?|ihrem?|deren?|denen)$/i;
// ---------------------------------------------------------------------------
+// Neo-pronoun lexicon (source: pronomen.net/beliebige:neopronomen)
+// Maps lowercased surface form → { lemma, upos, xpos, feats }.
+//
+// Lemma: nominative form as listed on pronomen.net.
+// UPOS: PRON | XPOS: PPER | FEATS: Gender=NonBin|PronType=Prs
+//
+// Excluded (too ambiguous with standard German words):
+// 'dem' – dative definite article / demonstrative pronoun
+// 'deren' – relative/demonstrative genitive pronoun
+// 'denen' – relative/demonstrative dative pronoun
+// 'per' – common German preposition
+// 'pers' – excluded together with 'per'
+//
+// Shared/ambiguous oblique forms:
+// 'sier','siem','sien' – NOM/DAT/ACC of sier-paradigm; also GEN/DAT/ACC of
+// et/siem-paradigm (both annotated with lemma 'sier')
+// 'em' – NOM of em/em-paradigm; also DAT of el/em and en/em
+// 'ems' – GEN of both el/em and em/em (annotated as lemma 'em')
+// 'en' – NOM/ACC/DAT of en/en; NOM/ACC of en/em (lemma 'en')
+// 'ens' – GEN of en/em; also all forms of ens/ens (lemma 'ens')
+// ---------------------------------------------------------------------------
+
+function neoPron(lemma) {
+ return { lemma, upos: 'PRON', xpos: 'PPER', feats: 'Gender=NonBin|PronType=Prs' };
+}
+
+const NEO_PRONOUN_FORMS = new Map([
+ // ---- Verschmelzung (blend pronouns) ------------------------------------
+ // sier/siem (NOM=sier, GEN=sies, DAT=siem, ACC=sien)
+ ['sier', neoPron('sier')],
+ ['sies', neoPron('sier')],
+ ['siem', neoPron('sier')],
+ ['sien', neoPron('sier')],
+ // xier/xiem (NOM=xier, GEN=xies, DAT=xiem, ACC=xien)
+ ['xier', neoPron('xier')],
+ ['xies', neoPron('xier')],
+ ['xiem', neoPron('xier')],
+ ['xien', neoPron('xier')],
+ // ersie/ihmihr (NOM=ersie, GEN=seinihr, DAT=ihmihr, ACC=ihnsie)
+ ['ersie', neoPron('ersie')],
+ ['seinihr', neoPron('ersie')],
+ ['ihmihr', neoPron('ersie')],
+ ['ihnsie', neoPron('ersie')],
+
+ // ---- They-ähnlich (they-like pronouns) ---------------------------------
+ // dej/denen/dej (NOM=dej, GEN=deren, DAT=denen, ACC=dej)
+ // 'deren' and 'denen' omitted (overlap with standard German pronouns)
+ ['dej', neoPron('dej')],
+ // dey/denen/dem and dey/denen/demm (NOM=dey; 'dem' excluded)
+ ['dey', neoPron('dey')],
+ ['demm', neoPron('dey')], // ACC of dey/denen/demm
+ // ey/emm (NOM=ey, GEN=eys, DAT=emm, ACC=emm)
+ ['ey', neoPron('ey')],
+ ['eys', neoPron('ey')],
+ ['emm', neoPron('ey')],
+ // they/them (NOM=they, GEN=their, DAT=them, ACC=them)
+ ['they', neoPron('they')],
+ ['their', neoPron('they')],
+ ['them', neoPron('they')],
+
+ // ---- Neuer Stamm (new-stem pronouns) -----------------------------------
+ // el/em (NOM=el, GEN=ems, DAT=em, ACC=en)
+ // 'ems' mapped to 'em'-paradigm below; 'em'/'en' mapped to their own NOM paradigms
+ ['el', neoPron('el')],
+ // em/em (NOM=em, GEN=ems, DAT=em, ACC=em)
+ ['em', neoPron('em')],
+ ['ems', neoPron('em')], // GEN shared with el/em paradigm
+ // en/en (NOM=en, GEN=enses, DAT=en, ACC=en)
+ // en/em (NOM=en, GEN=ens, DAT=em, ACC=en) — DAT 'em' mapped to em-paradigm
+ ['en', neoPron('en')],
+ ['enses', neoPron('en')],
+ // ens/ens (NOM=ens, GEN=ens, DAT=ens, ACC=ens)
+ // 'ens' takes priority as NOM of ens-paradigm (also GEN of en/em)
+ ['ens', neoPron('ens')],
+ // et/siem (NOM=et, GEN=sier, DAT=siem, ACC=sien)
+ // oblique forms 'sier'/'siem'/'sien' already mapped to sier-paradigm above
+ ['et', neoPron('et')],
+ // ex/ex (all forms = ex)
+ ['ex', neoPron('ex')],
+ // hän/sim (NOM=hän, GEN=sir, DAT=sim, ACC=sin)
+ ['hän', neoPron('hän')],
+ ['sir', neoPron('hän')],
+ ['sim', neoPron('hän')],
+ ['sin', neoPron('hän')],
+ // hen/hem (NOM=hen, GEN=hens, DAT=hem, ACC=hen)
+ ['hen', neoPron('hen')],
+ ['hens', neoPron('hen')],
+ ['hem', neoPron('hen')],
+ // hie/hiem (NOM=hie, GEN=hein, DAT=hiem, ACC=hie)
+ ['hie', neoPron('hie')],
+ ['hein', neoPron('hie')],
+ ['hiem', neoPron('hie')],
+ // iks/iks (NOM=iks, GEN=ikses, DAT=iks, ACC=iks)
+ ['iks', neoPron('iks')],
+ ['ikses', neoPron('iks')],
+ // ind/inde (NOM=ind, GEN=inds, DAT=inde, ACC=ind)
+ ['ind', neoPron('ind')],
+ ['inds', neoPron('ind')],
+ ['inde', neoPron('ind')],
+ // mensch/mensch (NOM=mensch, GEN=menschs, DAT=mensch, ACC=mensch)
+ // Note: case-insensitive match means sentence-initial 'Mensch' (common noun)
+ // will also be tagged; acceptable in a gender-language–focused tagger.
+ ['mensch', neoPron('mensch')],
+ ['menschs', neoPron('mensch')],
+ // nin/nim (NOM=nin, GEN=nims, DAT=nim, ACC=nin)
+ ['nin', neoPron('nin')],
+ ['nims', neoPron('nin')],
+ ['nim', neoPron('nin')],
+ // oj/ojm (NOM=oj, GEN=juj, DAT=ojm, ACC=ojn)
+ ['oj', neoPron('oj')],
+ ['juj', neoPron('oj')],
+ ['ojm', neoPron('oj')],
+ ['ojn', neoPron('oj')],
+ // per/per (all forms = per; GEN = pers)
+ // Note: 'per' also occurs as a German preposition (e.g. 'per E-Mail').
+ ['per', neoPron('per')],
+ ['pers', neoPron('per')],
+ // ser/sem (NOM=ser, GEN=ses, DAT=sem, ACC=sen)
+ ['ser', neoPron('ser')],
+ ['ses', neoPron('ser')],
+ ['sem', neoPron('ser')],
+ ['sen', neoPron('ser')],
+ // Y/Y (all forms = Y; GEN = Ys) — stored lowercase; lemma retains uppercase 'Y'
+ ['y', neoPron('Y')],
+ ['ys', neoPron('Y')],
+ // zet/zerm (NOM=zet, GEN=zets, DAT=zerm, ACC=zern)
+ ['zet', neoPron('zet')],
+ ['zets', neoPron('zet')],
+ ['zerm', neoPron('zet')],
+ ['zern', neoPron('zet')],
+ // */* (Stern; all forms = *; GEN = *s)
+ ['*', neoPron('*')],
+ ['*s', neoPron('*')],
+]);
+
+// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
@@ -205,6 +341,12 @@
let m;
// ------------------------------------------------------------------
+ // 0. Neo-pronoun lexicon lookup (case-insensitive, exact form match)
+ // ------------------------------------------------------------------
+ const entry = NEO_PRONOUN_FORMS.get(word.toLowerCase());
+ if (entry) return entry;
+
+ // ------------------------------------------------------------------
// 1. Gender-sensitive NOUNS
// ------------------------------------------------------------------