blob: df294c032263b4ae81bb5389656e4e99e9d0d3cf [file] [log] [blame]
Akronb6efc732022-03-17 15:03:56 +01001import spacy
2import sys
3from spacy.lang.de import German
4
5# slower and more accurate: ("de_dep_news_trf")
6
7model = sys.argv[1]
8
9if model == 'dep':
10 nlp = spacy.load("de_core_news_sm")
11elif model == 'stat':
12 nlp = spacy.load("de_core_news_sm", exclude=["parser"])
13 nlp.enable_pipe("senter")
14elif model == 'sentencizer':
15 nlp = German()
16 nlp.add_pipe("sentencizer")
17
18# Create a Sentence Splitter based on dependency parsing.
19
20with open(sys.argv[2], 'r') as f:
21 contents = f.read()
22
23 doc = nlp(contents)
24
25 for sent in doc.sents:
26 print(sent.text)
27 print("</eos>")