Make number of procs env configurable

Change-Id: Ic511e217e8c6cfd3e83a162c1dfb7da07ebb4bf4
diff --git a/Dockerfile b/Dockerfile
index 9499173..ceeae9c 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -49,9 +49,17 @@
 ENV SPACY_USE_GERMALEMMA="True"
 ENV SPACY_PARSE_TIMEOUT="30"
 ENV SPACY_MAX_SENTENCE_LENGTH="500"
+ENV SPACY_N_PROCESS="10"
+ENV SPACY_BATCH_SIZE="2000"
+ENV SPACY_CHUNK_SIZE="20000"
 
 WORKDIR /app
-RUN mkdir -p "/app/logs"
+RUN mkdir -p "/app/logs" "/app/tmp"
+
+# Set temp directories to use app directory instead of system /tmp
+ENV TMPDIR="/app/tmp"
+ENV TEMP="/app/tmp" 
+ENV TMP="/app/tmp"
 
 # Define the entry point
 CMD ["python", "/app/systems/parse_spacy_pipe.py"]
\ No newline at end of file
diff --git a/systems/parse_spacy_pipe.py b/systems/parse_spacy_pipe.py
index cd7d6c5..402d2d6 100644
--- a/systems/parse_spacy_pipe.py
+++ b/systems/parse_spacy_pipe.py
@@ -187,9 +187,9 @@
 	args = parser.parse_args()
 	
 	file_has_next, chunk_ix = True, 0
-	CHUNK_SIZE = 20000
-	SPACY_BATCH = 2000
-	SPACY_PROC = 10
+	CHUNK_SIZE = int(os.getenv("SPACY_CHUNK_SIZE", "20000"))
+	SPACY_BATCH = int(os.getenv("SPACY_BATCH_SIZE", "2000"))
+	SPACY_PROC = int(os.getenv("SPACY_N_PROCESS", "10"))
 	
 	# =====================================================================================
 	#                    LOGGING INFO ...
@@ -210,6 +210,7 @@
 		logger.info(f"Using SPACY_USE_GERMALEMMA environment variable: {args.use_germalemma}")
 	
 	logger.info(f"Chunking {args.corpus_name} Corpus in chunks of {CHUNK_SIZE} Sentences")
+	logger.info(f"Processing configuration: batch_size={SPACY_BATCH}, n_process={SPACY_PROC}")
 	
 	# =====================================================================================
 	#                    POS TAG DOCUMENTS