Add safety limits for dependency parsing

Change-Id: If901f9d7f03bc7926a812dfa55bf18a58149e583
diff --git a/systems/parse_spacy.py b/systems/parse_spacy.py
index d2bf1b1..ac27058 100644
--- a/systems/parse_spacy.py
+++ b/systems/parse_spacy.py
@@ -1,11 +1,65 @@
-import argparse
+import argparse, os
 import spacy
 from spacy.tokens import Doc
-import logging, sys, time
+import logging, sys, time, signal
 from lib.CoNLL_Annotation import get_token_type
 import my_utils.file_utils as fu
 from germalemma import GermaLemma
 
+# Dependency parsing safety limits
+DEFAULT_PARSE_TIMEOUT = 30  # seconds per sentence
+DEFAULT_MAX_SENTENCE_LENGTH = 500  # tokens
+
+class TimeoutException(Exception):
+	pass
+
+def timeout_handler(signum, frame):
+	raise TimeoutException("Dependency parsing timeout")
+
+def safe_dependency_parse(spacy_model, text, timeout=DEFAULT_PARSE_TIMEOUT, max_length=DEFAULT_MAX_SENTENCE_LENGTH):
+	"""
+	Safely parse a sentence with timeout and length limits.
+	
+	Args:
+		spacy_model: Loaded spaCy model
+		text: Text to parse
+		timeout: Maximum seconds to wait for parsing
+		max_length: Maximum sentence length in tokens
+		
+	Returns:
+		tuple: (spacy_doc, success, warning_message)
+	"""
+	# Check sentence length
+	if len(text.split()) > max_length:
+		# Process without dependency parsing for long sentences
+		disabled_components = ["ner", "parser"]
+		doc = spacy_model(text, disable=disabled_components)
+		return doc, False, f"Sentence too long ({len(text.split())} tokens > {max_length}), dependency parsing skipped"
+	
+	# Set up timeout
+	old_handler = signal.signal(signal.SIGALRM, timeout_handler)
+	signal.alarm(timeout)
+	
+	try:
+		doc = spacy_model(text)
+		signal.alarm(0)  # Cancel alarm
+		signal.signal(signal.SIGALRM, old_handler)
+		return doc, True, None
+	except TimeoutException:
+		signal.alarm(0)  # Cancel alarm
+		signal.signal(signal.SIGALRM, old_handler)
+		# Retry without dependency parsing
+		disabled_components = ["ner", "parser"]
+		doc = spacy_model(text, disable=disabled_components)
+		return doc, False, f"Dependency parsing timeout after {timeout}s, processed without dependencies"
+	except Exception as e:
+		signal.alarm(0)  # Cancel alarm
+		signal.signal(signal.SIGALRM, old_handler)
+		# Retry without dependency parsing
+		disabled_components = ["ner", "parser"]
+		doc = spacy_model(text, disable=disabled_components)
+		return doc, False, f"Dependency parsing error: {str(e)}, processed without dependencies"
+
 def format_morphological_features(token):
 	"""
 	Extract and format morphological features from a spaCy token for CoNLL-U output.