blob: 62f2bff5ed730a8803b4e69532ded12aa5830d10 [file] [log] [blame]
Akron492a3bb2022-03-02 10:38:33 +01001import sys
2
3from spacy.lang.de import German
4
5nlp = German()
6
7# Create a Tokenizer with the default settings for English
8# including punctuation rules and exceptions
9tokenizer = nlp.tokenizer
10
11with open(sys.argv[1], 'r') as f:
12 contents = f.read()
13
14 tokens = tokenizer(contents)
15
16 for t in tokens:
17 print(t)