Improve empty word handling Change-Id: Ic202c5e7e0ef2682060aa77ac9821f87a960bdb0

commit: dac2944bed11a047545a9c7fd506a94b95a8a7f8 [log] [tgz]
author: Marc Kupietz <kupietz@ids-mannheim.de> Sun Nov 02 07:42:46 2025 +0100
committer: Marc Kupietz <kupietz@ids-mannheim.de> Sun Nov 02 07:42:46 2025 +0100
tree: ed7b50ce0622bcba781b0fb7d02284ea445fc14f
parent: 732b3f4b3eb7096258f612b7e774209f679cbb47 [diff]
diff --git a/systems/parse_spacy_pipe.py b/systems/parse_spacy_pipe.py
index f6dd7fa..0abfb33 100644
--- a/systems/parse_spacy_pipe.py
+++ b/systems/parse_spacy_pipe.py

@@ -128,9 +128,9 @@
 		words = text.split(' ')
 		# Filter out empty strings to avoid spaCy errors
 		words = [w for w in words if w]
-		# Handle edge case of empty input
+		# Handle edge case of empty input - use a placeholder token
 		if not words:
-			words = ['']
+			words = ['_EMPTY_']
 		# All tokens 'own' a subsequent space character in this tokenizer
 		spaces = [True] * len(words)
 		return Doc(self.vocab, words=words, spaces=spaces)
commit	dac2944bed11a047545a9c7fd506a94b95a8a7f8	[log] [tgz]
author	Marc Kupietz <kupietz@ids-mannheim.de>	Sun Nov 02 07:42:46 2025 +0100
committer	Marc Kupietz <kupietz@ids-mannheim.de>	Sun Nov 02 07:42:46 2025 +0100
tree	ed7b50ce0622bcba781b0fb7d02284ea445fc14f
parent	732b3f4b3eb7096258f612b7e774209f679cbb47 [diff]