blob: f9658bb1ed3edaa2e79237cb673d8c0de4f152dc [file] [log] [blame]
Akronb6efc732022-03-17 15:03:56 +01001import spacy
2import sys
3from spacy.lang.de import German
4
5# slower and more accurate: ("de_dep_news_trf")
6
7model = sys.argv[1]
8
9if model == 'dep':
10 nlp = spacy.load("de_core_news_sm")
11elif model == 'stat':
12 nlp = spacy.load("de_core_news_sm", exclude=["parser"])
13 nlp.enable_pipe("senter")
14elif model == 'sentencizer':
15 nlp = German()
16 nlp.add_pipe("sentencizer")
17
18# Create a Sentence Splitter based on dependency parsing.
19
20with open(sys.argv[2], 'r') as f:
21 contents = f.read()
22
Akronda9c4112022-03-19 17:51:05 +010023 nlp.max_length = len(contents) + 100
Akronb6efc732022-03-17 15:03:56 +010024
Akronda9c4112022-03-19 17:51:05 +010025 doc = nlp(contents, disable = ['ner'])
26
Akronb6efc732022-03-17 15:03:56 +010027 for sent in doc.sents:
28 print(sent.text)
Akron049e5262022-03-18 09:59:34 +010029 print(" </eos> ")