Add cutter, more spacys and Stanford Tokenizer only
Change-Id: I6ae4c014298d9c07e62850d39408b77cb145a9cd
diff --git a/spacy/spacy_sent.py b/spacy/spacy_sent.py
new file mode 100644
index 0000000..df294c0
--- /dev/null
+++ b/spacy/spacy_sent.py
@@ -0,0 +1,27 @@
+import spacy
+import sys
+from spacy.lang.de import German
+
+# slower and more accurate: ("de_dep_news_trf")
+
+model = sys.argv[1]
+
+if model == 'dep':
+ nlp = spacy.load("de_core_news_sm")
+elif model == 'stat':
+ nlp = spacy.load("de_core_news_sm", exclude=["parser"])
+ nlp.enable_pipe("senter")
+elif model == 'sentencizer':
+ nlp = German()
+ nlp.add_pipe("sentencizer")
+
+# Create a Sentence Splitter based on dependency parsing.
+
+with open(sys.argv[2], 'r') as f:
+ contents = f.read()
+
+ doc = nlp(contents)
+
+ for sent in doc.sents:
+ print(sent.text)
+ print("</eos>")