blob: 8adc35c963b383cbdd555b7f44e974aeefe4e015 [file] [log] [blame]
! Gender-sensitive endings (German)
! By M. Kupietz (KorAP-Tokenizer)
! Matches patterns like: in, innen, In, Innen, IN, INNEN (case-insensitive)
define genderEndingsIn [ [i | I] [n | N] ( [n | N] [e | E] [n | N] ) ];
! Slash forms are restricted to lowercase in/innen to avoid
! false positives in compounds like "Nutzer/Innenarchitekt".
define genderEndingsInLower [ i n ( n e n ) ];
! Gender-sensitive endings with frau/frauen
! (lowercase only - capitalized Frau is a standalone word)
! Note: This is now only used for words ending in "mann"
! (with non-empty prefix) for Kaufmann/frau pattern
! Matches: Kaufmann, Geschäftsmann, etc. but NOT just "mann"
define genderEndingsFrau [ {frau} ( {en} ) ];
! General gender endings (only -in/-innen forms for colon, slash, parenthetical)
! Colon forms: Nutzer:in, Nutzer:In, Nutzer:innen
! Slash forms for -in/-innen: Nutzer/in, Nutzer/innen, Nutzer/-in, Kosovo-Albaner/innen
define genderIn [ ":" genderEndingsIn | Slash ( %- ) genderEndingsInLower ];
! Slash forms for -frau: Kaufmann/frau, Kaufmann/-frau, Geschäftsmann/frau
! Only applies when word ends in "mann" (with non-empty prefix before it)
define genderFrau [ {mann} Slash ( %- ) genderEndingsFrau ];
! Parenthetical forms for -in/-innen: Nutzer(in), Nutzer(innen), Nutzer(-in)
define genderParenIn %( ( %- ) genderEndingsIn %);
! Parenthetical forms for -frau: Kaufmann(frau), Kaufmann(-frau)
! Only applies when word ends in "mann" (with non-empty prefix before it)
define genderParenFrau {mann} %( ( %- ) genderEndingsFrau %);
define GenderEndings [ genderIn | genderFrau | genderParenIn | genderParenFrau ];