| Marc Kupietz | 2dca2ed | 2025-12-24 13:01:00 +0100 | [diff] [blame^] | 1 | { |
| 2 | "title": "KorAP/KorAP-Tokenizer", |
| 3 | "description": "DFA tokenizer with character offset output, large abbreviation tables and CMC support.", |
| 4 | "license": "Apache-2.0", |
| 5 | "upload_type": "software", |
| 6 | "access_right": "open", |
| 7 | "creators": [ |
| 8 | { |
| 9 | "name": "Kupietz, Marc", |
| 10 | "affiliation": "Leibniz-Institut für Deutsche Sprache" |
| 11 | }, |
| 12 | { |
| 13 | "name": "Diewald, Nils", |
| 14 | "affiliation": "Leibniz-Institut für Deutsche Sprache" |
| 15 | } |
| 16 | ], |
| 17 | "communities": [ |
| 18 | { |
| 19 | "identifier": "natural-language-processing" |
| 20 | } |
| 21 | ], |
| 22 | "keywords": [ |
| 23 | "tokenizer", |
| 24 | "NLP", |
| 25 | "natural language processing", |
| 26 | "DFA", |
| 27 | "German", |
| 28 | "English", |
| 29 | "French", |
| 30 | "KorAP" |
| 31 | ] |
| 32 | } |