blob: 62f2bff5ed730a8803b4e69532ded12aa5830d10 [file] [log] [blame]
import sys
from spacy.lang.de import German
nlp = German()
# Create a Tokenizer with the default settings for English
# including punctuation rules and exceptions
tokenizer = nlp.tokenizer
with open(sys.argv[1], 'r') as f:
contents = f.read()
tokens = tokenizer(contents)
for t in tokens:
print(t)