Akron | 492a3bb | 2022-03-02 10:38:33 +0100 | [diff] [blame] | 1 | import sys |
2 | |||||
3 | from spacy.lang.de import German | ||||
4 | |||||
5 | nlp = German() | ||||
6 | |||||
7 | # Create a Tokenizer with the default settings for English | ||||
8 | # including punctuation rules and exceptions | ||||
9 | tokenizer = nlp.tokenizer | ||||
10 | |||||
11 | with open(sys.argv[1], 'r') as f: | ||||
12 | contents = f.read() | ||||
13 | |||||
14 | tokens = tokenizer(contents) | ||||
15 | |||||
16 | for t in tokens: | ||||
17 | print(t) |