Make number of procs env configurable
Change-Id: Ic511e217e8c6cfd3e83a162c1dfb7da07ebb4bf4
diff --git a/systems/parse_spacy_pipe.py b/systems/parse_spacy_pipe.py
index cd7d6c5..402d2d6 100644
--- a/systems/parse_spacy_pipe.py
+++ b/systems/parse_spacy_pipe.py
@@ -187,9 +187,9 @@
args = parser.parse_args()
file_has_next, chunk_ix = True, 0
- CHUNK_SIZE = 20000
- SPACY_BATCH = 2000
- SPACY_PROC = 10
+ CHUNK_SIZE = int(os.getenv("SPACY_CHUNK_SIZE", "20000"))
+ SPACY_BATCH = int(os.getenv("SPACY_BATCH_SIZE", "2000"))
+ SPACY_PROC = int(os.getenv("SPACY_N_PROCESS", "10"))
# =====================================================================================
# LOGGING INFO ...
@@ -210,6 +210,7 @@
logger.info(f"Using SPACY_USE_GERMALEMMA environment variable: {args.use_germalemma}")
logger.info(f"Chunking {args.corpus_name} Corpus in chunks of {CHUNK_SIZE} Sentences")
+ logger.info(f"Processing configuration: batch_size={SPACY_BATCH}, n_process={SPACY_PROC}")
# =====================================================================================
# POS TAG DOCUMENTS