blob: 78f877c215545eebed5074d8d9a3f3782f6404d1 [file] [log] [blame]
Marc Kupietz2dca2ed2025-12-24 13:01:00 +01001{
2 "title": "KorAP/KorAP-Tokenizer",
3 "description": "DFA tokenizer with character offset output, large abbreviation tables and CMC support.",
4 "license": "Apache-2.0",
5 "upload_type": "software",
6 "access_right": "open",
7 "creators": [
8 {
9 "name": "Kupietz, Marc",
10 "affiliation": "Leibniz-Institut für Deutsche Sprache"
11 },
12 {
13 "name": "Diewald, Nils",
14 "affiliation": "Leibniz-Institut für Deutsche Sprache"
15 }
16 ],
17 "communities": [
18 {
19 "identifier": "natural-language-processing"
20 }
21 ],
22 "keywords": [
23 "tokenizer",
24 "NLP",
25 "natural language processing",
26 "DFA",
27 "German",
28 "English",
29 "French",
30 "KorAP"
31 ]
32}