Set shorter default timeout
Change-Id: I8f14f13df9863ed62a9cb9e372857b6224e885be
diff --git a/systems/parse_spacy_pipe.py b/systems/parse_spacy_pipe.py
index 13e4bd4..b532456 100644
--- a/systems/parse_spacy_pipe.py
+++ b/systems/parse_spacy_pipe.py
@@ -8,7 +8,7 @@
from germalemma import GermaLemma
# Dependency parsing safety limits
-DEFAULT_PARSE_TIMEOUT = 30 # seconds per sentence
+DEFAULT_PARSE_TIMEOUT = 0.5 # seconds per sentence
DEFAULT_MAX_SENTENCE_LENGTH = 500 # tokens
class TimeoutException(Exception):
@@ -39,22 +39,22 @@
# Set up timeout
old_handler = signal.signal(signal.SIGALRM, timeout_handler)
- signal.alarm(timeout)
+ signal.setitimer(signal.ITIMER_REAL, timeout)
try:
doc = spacy_model(text)
- signal.alarm(0) # Cancel alarm
+ signal.setitimer(signal.ITIMER_REAL, 0) # Cancel alarm
signal.signal(signal.SIGALRM, old_handler)
return doc, True, None
except TimeoutException:
- signal.alarm(0) # Cancel alarm
+ signal.setitimer(signal.ITIMER_REAL, 0) # Cancel alarm
signal.signal(signal.SIGALRM, old_handler)
# Retry without dependency parsing
disabled_components = ["ner", "parser"]
doc = spacy_model(text, disable=disabled_components)
return doc, False, f"Dependency parsing timeout after {timeout}s, processed without dependencies"
except Exception as e:
- signal.alarm(0) # Cancel alarm
+ signal.setitimer(signal.ITIMER_REAL, 0) # Cancel alarm
signal.signal(signal.SIGALRM, old_handler)
# Retry without dependency parsing
disabled_components = ["ner", "parser"]
@@ -244,8 +244,8 @@
logger.info("GermaLemma version: unknown (no __version__ attribute)")
# Parse timeout and sentence length limits from environment variables
- parse_timeout = int(os.getenv("SPACY_PARSE_TIMEOUT", DEFAULT_PARSE_TIMEOUT))
- max_sentence_length = int(os.getenv("SPACY_MAX_SENTENCE_LENGTH", DEFAULT_MAX_SENTENCE_LENGTH))
+ parse_timeout = float(os.getenv("SPACY_PARSE_TIMEOUT", str(DEFAULT_PARSE_TIMEOUT)))
+ max_sentence_length = int(os.getenv("SPACY_MAX_SENTENCE_LENGTH", str(DEFAULT_MAX_SENTENCE_LENGTH)))
logger.info(f"Dependency parsing limits: timeout={parse_timeout}s, max_length={max_sentence_length} tokens")