| Akron | 3dd560e | 2026-02-04 11:23:08 +0100 | [diff] [blame] | 1 | ! Gender-sensitive endings (German) |
| 2 | ! By M. Kupietz (KorAP-Tokenizer) |
| 3 | |
| 4 | ! Matches patterns like: in, innen, In, Innen, IN, INNEN (case-insensitive) |
| 5 | define genderEndingsIn [ [i | I] [n | N] ( [n | N] [e | E] [n | N] ) ]; |
| 6 | |
| 7 | ! Slash forms are restricted to lowercase in/innen to avoid |
| 8 | ! false positives in compounds like "Nutzer/Innenarchitekt". |
| 9 | define genderEndingsInLower [ i n ( n e n ) ]; |
| 10 | |
| Akron | 2f7f6f3 | 2026-02-11 15:12:48 +0100 | [diff] [blame^] | 11 | ! Short endings for determiners, adjectives and pronouns: |
| 12 | ! e, n, r, s, m, es, er, em, en |
| 13 | define genderShortSuffix [ [e|n|r|s|m] | [ e [s|r|m|n] ] ]; |
| 14 | |
| 15 | define genderSeparator [":" | Slash ( %- ) | Asterisk | "_" ]; |
| 16 | |
| Akron | 3dd560e | 2026-02-04 11:23:08 +0100 | [diff] [blame] | 17 | ! Gender-sensitive endings with frau/frauen |
| 18 | ! (lowercase only - capitalized Frau is a standalone word) |
| 19 | ! Note: This is now only used for words ending in "mann" |
| 20 | ! (with non-empty prefix) for Kaufmann/frau pattern |
| 21 | ! Matches: Kaufmann, Geschäftsmann, etc. but NOT just "mann" |
| 22 | define genderEndingsFrau [ {frau} ( {en} ) ]; |
| 23 | |
| Akron | 2f7f6f3 | 2026-02-11 15:12:48 +0100 | [diff] [blame^] | 24 | ! General gender endings for -in/-innen without continuation: |
| 25 | ! allow all I/i variants for :, *, _; keep slash lowercase-only to |
| 26 | ! avoid false positives like Nutzer/Innenarchitekt. |
| 27 | define genderIn [ |
| 28 | [ ":" | Asterisk | "_" ] genderEndingsIn | |
| 29 | Slash ( %- ) genderEndingsInLower |
| 30 | ]; |
| 31 | |
| 32 | ! Short forms for DET/ADJ/PRON with colon/slash/star/underscore: |
| 33 | ! gute:r, ihm/r, ein*e, ein_e, diese:r |
| 34 | define genderShort [ genderSeparator genderShortSuffix ]; |
| 35 | |
| 36 | ! Parenthetical short forms: |
| 37 | ! eine(n), ein(e) |
| 38 | define genderParenShort %( ( %- ) genderShortSuffix %); |
| Akron | 3dd560e | 2026-02-04 11:23:08 +0100 | [diff] [blame] | 39 | |
| 40 | ! Slash forms for -frau: Kaufmann/frau, Kaufmann/-frau, Geschäftsmann/frau |
| 41 | ! Only applies when word ends in "mann" (with non-empty prefix before it) |
| 42 | define genderFrau [ {mann} Slash ( %- ) genderEndingsFrau ]; |
| 43 | |
| 44 | ! Parenthetical forms for -in/-innen: Nutzer(in), Nutzer(innen), Nutzer(-in) |
| 45 | define genderParenIn %( ( %- ) genderEndingsIn %); |
| 46 | |
| 47 | ! Parenthetical forms for -frau: Kaufmann(frau), Kaufmann(-frau) |
| 48 | ! Only applies when word ends in "mann" (with non-empty prefix before it) |
| 49 | define genderParenFrau {mann} %( ( %- ) genderEndingsFrau %); |
| 50 | |
| Akron | 2f7f6f3 | 2026-02-11 15:12:48 +0100 | [diff] [blame^] | 51 | ! Compound continuation for forms like Lehrer:innenfortbildung. |
| 52 | ! For slash, restrict to lowercase in/innen so /Innen... keeps splitting |
| 53 | ! (e.g. Nutzer/Innenarchitekt, Innenminister/Innenministerinnen). |
| 54 | define genderInComp [ |
| 55 | [ ":" | Asterisk | "_" ] genderEndingsIn Char+ | |
| 56 | Slash ( %- ) genderEndingsInLower Char+ |
| 57 | ]; |
| 58 | |
| 59 | define GenderEndings [ genderIn | genderInComp | genderShort | genderFrau | genderParenIn | genderParenShort | genderParenFrau ]; |