| ! Gender-sensitive endings (German) |
| ! By M. Kupietz (KorAP-Tokenizer) |
| |
| ! Matches patterns like: in, innen, In, Innen, IN, INNEN (case-insensitive) |
| define genderEndingsIn [ [i | I] [n | N] ( [n | N] [e | E] [n | N] ) ]; |
| |
| ! Slash forms are restricted to lowercase in/innen to avoid |
| ! false positives in compounds like "Nutzer/Innenarchitekt". |
| define genderEndingsInLower [ i n ( n e n ) ]; |
| |
| ! Short endings for determiners, adjectives and pronouns: |
| ! e, n, r, s, m, es, er, em, en |
| define genderShortSuffix [ [e|n|r|s|m] | [ e [s|r|m|n] ] ]; |
| |
| define genderSeparator [":" | Slash ( %- ) | Asterisk | "_" ]; |
| |
| ! Gender-sensitive endings with frau/frauen |
| ! (lowercase only - capitalized Frau is a standalone word) |
| ! Note: This is now only used for words ending in "mann" |
| ! (with non-empty prefix) for Kaufmann/frau pattern |
| ! Matches: Kaufmann, Geschäftsmann, etc. but NOT just "mann" |
| define genderEndingsFrau [ {frau} ( {en} ) ]; |
| |
| ! General gender endings for -in/-innen without continuation: |
| ! allow all I/i variants for :, *, _; keep slash lowercase-only to |
| ! avoid false positives like Nutzer/Innenarchitekt. |
| define genderIn [ |
| [ ":" | Asterisk | "_" ] genderEndingsIn | |
| Slash ( %- ) genderEndingsInLower |
| ]; |
| |
| ! Short forms for DET/ADJ/PRON with colon/slash/star/underscore: |
| ! gute:r, ihm/r, ein*e, ein_e, diese:r |
| define genderShort [ genderSeparator genderShortSuffix ]; |
| |
| ! Parenthetical short forms: |
| ! eine(n), ein(e) |
| define genderParenShort %( ( %- ) genderShortSuffix %); |
| |
| ! Slash forms for -frau: Kaufmann/frau, Kaufmann/-frau, Geschäftsmann/frau |
| ! Only applies when word ends in "mann" (with non-empty prefix before it) |
| define genderFrau [ {mann} Slash ( %- ) genderEndingsFrau ]; |
| |
| ! Parenthetical forms for -in/-innen: Nutzer(in), Nutzer(innen), Nutzer(-in) |
| define genderParenIn %( ( %- ) genderEndingsIn %); |
| |
| ! Parenthetical forms for -frau: Kaufmann(frau), Kaufmann(-frau) |
| ! Only applies when word ends in "mann" (with non-empty prefix before it) |
| define genderParenFrau {mann} %( ( %- ) genderEndingsFrau %); |
| |
| ! Compound continuation for forms like Lehrer:innenfortbildung. |
| ! For slash, restrict to lowercase in/innen so /Innen... keeps splitting |
| ! (e.g. Nutzer/Innenarchitekt, Innenminister/Innenministerinnen). |
| define genderInComp [ |
| [ ":" | Asterisk | "_" ] genderEndingsIn Char+ | |
| Slash ( %- ) genderEndingsInLower Char+ |
| ]; |
| |
| define GenderEndings [ genderIn | genderInComp | genderShort | genderFrau | genderParenIn | genderParenShort | genderParenFrau ]; |