Improve empty word handling
Change-Id: Ic202c5e7e0ef2682060aa77ac9821f87a960bdb0
diff --git a/systems/parse_spacy_pipe.py b/systems/parse_spacy_pipe.py
index f6dd7fa..0abfb33 100644
--- a/systems/parse_spacy_pipe.py
+++ b/systems/parse_spacy_pipe.py
@@ -128,9 +128,9 @@
words = text.split(' ')
# Filter out empty strings to avoid spaCy errors
words = [w for w in words if w]
- # Handle edge case of empty input
+ # Handle edge case of empty input - use a placeholder token
if not words:
- words = ['']
+ words = ['_EMPTY_']
# All tokens 'own' a subsequent space character in this tokenizer
spaces = [True] * len(words)
return Doc(self.vocab, words=words, spaces=spaces)