blob: dd8c7de0d4b738810dc9432a49278aa074d1957f [file] [log] [blame]
! Gender-sensitive endings (German)
! By M. Kupietz (KorAP-Tokenizer)
! Matches patterns like: in, innen, In, Innen, IN, INNEN (case-insensitive)
define genderEndingsIn [ [i | I] [n | N] ( [n | N] [e | E] [n | N] ) ];
! Slash forms are restricted to lowercase in/innen to avoid
! false positives in compounds like "Nutzer/Innenarchitekt".
define genderEndingsInLower [ i n ( n e n ) ];
! Short endings for determiners, adjectives and pronouns:
! e, n, r, s, m, es, er, em, en
define genderShortSuffix [ [e|n|r|s|m] | [ e [s|r|m|n] ] ];
define genderSeparator [":" | Slash ( %- ) | Asterisk | "_" ];
! Gender-sensitive endings with frau/frauen
! (lowercase only - capitalized Frau is a standalone word)
! Note: This is now only used for words ending in "mann"
! (with non-empty prefix) for Kaufmann/frau pattern
! Matches: Kaufmann, Geschäftsmann, etc. but NOT just "mann"
define genderEndingsFrau [ {frau} ( {en} ) ];
! General gender endings for -in/-innen without continuation:
! allow all I/i variants for :, *, _; keep slash lowercase-only to
! avoid false positives like Nutzer/Innenarchitekt.
define genderIn [
[ ":" | Asterisk | "_" ] genderEndingsIn |
Slash ( %- ) genderEndingsInLower
];
! Short forms for DET/ADJ/PRON with colon/slash/star/underscore:
! gute:r, ihm/r, ein*e, ein_e, diese:r
define genderShort [ genderSeparator genderShortSuffix ];
! Parenthetical short forms:
! eine(n), ein(e)
define genderParenShort %( ( %- ) genderShortSuffix %);
! Slash forms for -frau: Kaufmann/frau, Kaufmann/-frau, Geschäftsmann/frau
! Only applies when word ends in "mann" (with non-empty prefix before it)
define genderFrau [ {mann} Slash ( %- ) genderEndingsFrau ];
! Parenthetical forms for -in/-innen: Nutzer(in), Nutzer(innen), Nutzer(-in)
define genderParenIn %( ( %- ) genderEndingsIn %);
! Parenthetical forms for -frau: Kaufmann(frau), Kaufmann(-frau)
! Only applies when word ends in "mann" (with non-empty prefix before it)
define genderParenFrau {mann} %( ( %- ) genderEndingsFrau %);
! Compound continuation for forms like Lehrer:innenfortbildung.
! For slash, restrict to lowercase in/innen so /Innen... keeps splitting
! (e.g. Nutzer/Innenarchitekt, Innenminister/Innenministerinnen).
define genderInComp [
[ ":" | Asterisk | "_" ] genderEndingsIn Char+ |
Slash ( %- ) genderEndingsInLower Char+
];
define GenderEndings [ genderIn | genderInComp | genderShort | genderFrau | genderParenIn | genderParenShort | genderParenFrau ];