| Akron | 3dd560e | 2026-02-04 11:23:08 +0100 | [diff] [blame^] | 1 | ! Gender-sensitive endings (German) |
| 2 | ! By M. Kupietz (KorAP-Tokenizer) |
| 3 | |
| 4 | ! Matches patterns like: in, innen, In, Innen, IN, INNEN (case-insensitive) |
| 5 | define genderEndingsIn [ [i | I] [n | N] ( [n | N] [e | E] [n | N] ) ]; |
| 6 | |
| 7 | ! Slash forms are restricted to lowercase in/innen to avoid |
| 8 | ! false positives in compounds like "Nutzer/Innenarchitekt". |
| 9 | define genderEndingsInLower [ i n ( n e n ) ]; |
| 10 | |
| 11 | ! Gender-sensitive endings with frau/frauen |
| 12 | ! (lowercase only - capitalized Frau is a standalone word) |
| 13 | ! Note: This is now only used for words ending in "mann" |
| 14 | ! (with non-empty prefix) for Kaufmann/frau pattern |
| 15 | ! Matches: Kaufmann, Geschäftsmann, etc. but NOT just "mann" |
| 16 | define genderEndingsFrau [ {frau} ( {en} ) ]; |
| 17 | |
| 18 | ! General gender endings (only -in/-innen forms for colon, slash, parenthetical) |
| 19 | ! Colon forms: Nutzer:in, Nutzer:In, Nutzer:innen |
| 20 | ! Slash forms for -in/-innen: Nutzer/in, Nutzer/innen, Nutzer/-in, Kosovo-Albaner/innen |
| 21 | define genderIn [ ":" genderEndingsIn | Slash ( %- ) genderEndingsInLower ]; |
| 22 | |
| 23 | ! Slash forms for -frau: Kaufmann/frau, Kaufmann/-frau, Geschäftsmann/frau |
| 24 | ! Only applies when word ends in "mann" (with non-empty prefix before it) |
| 25 | define genderFrau [ {mann} Slash ( %- ) genderEndingsFrau ]; |
| 26 | |
| 27 | ! Parenthetical forms for -in/-innen: Nutzer(in), Nutzer(innen), Nutzer(-in) |
| 28 | define genderParenIn %( ( %- ) genderEndingsIn %); |
| 29 | |
| 30 | ! Parenthetical forms for -frau: Kaufmann(frau), Kaufmann(-frau) |
| 31 | ! Only applies when word ends in "mann" (with non-empty prefix before it) |
| 32 | define genderParenFrau {mann} %( ( %- ) genderEndingsFrau %); |
| 33 | |
| 34 | define GenderEndings [ genderIn | genderFrau | genderParenIn | genderParenFrau ]; |