Improve ETA logging
Change-Id: I0fc72f468ea9428cd521615e927614dea9269846
diff --git a/systems/parse_spacy_pipe.py b/systems/parse_spacy_pipe.py
index 402d2d6..13e4bd4 100644
--- a/systems/parse_spacy_pipe.py
+++ b/systems/parse_spacy_pipe.py
@@ -197,6 +197,12 @@
logger = logging.getLogger(__name__)
console_hdlr = logging.StreamHandler(sys.stderr)
file_hdlr = logging.FileHandler(filename=f"logs/Parse_{args.corpus_name}.SpaCy.log")
+
+ # Custom format without module name
+ formatter = logging.Formatter('%(levelname)s: %(message)s')
+ console_hdlr.setFormatter(formatter)
+ file_hdlr.setFormatter(formatter)
+
logging.basicConfig(level=logging.INFO, handlers=[console_hdlr, file_hdlr])
# Override with environment variables if set (useful for Docker)
@@ -251,7 +257,14 @@
annos, file_has_next = fu.get_file_annos_chunk(stdin, chunk_size=CHUNK_SIZE, token_class=get_token_type(args.gld_token_type), comment_str=args.comment_str, our_foundry="spacy")
if len(annos) == 0: break
total_processed_sents += len(annos)
- logger.info(f"Already processed {total_processed_sents} sentences...")
+
+ # Calculate progress statistics
+ elapsed_time = time.time() - start
+ sents_per_sec = total_processed_sents / elapsed_time if elapsed_time > 0 else 0
+ current_time = time.strftime("%Y-%m-%d %H:%M:%S")
+
+ logger.info(f"{current_time} | Processed: {total_processed_sents} sentences | Elapsed: {elapsed_time:.1f}s | Speed: {sents_per_sec:.1f} sents/sec")
+
sents = [a.get_sentence() for a in annos]
# Process sentences individually when dependency parsing is enabled for timeout protection
@@ -275,7 +288,14 @@
print(conll_str+ "\n")
end = time.time()
- logger.info(f"Processing {args.corpus_name} took {(end - start)} seconds!")
+ total_time = end - start
+ final_sents_per_sec = total_processed_sents / total_time if total_time > 0 else 0
+
+ logger.info(f"=== Processing Complete ===")
+ logger.info(f"Total sentences: {total_processed_sents}")
+ logger.info(f"Total time: {total_time:.2f}s")
+ logger.info(f"Average speed: {final_sents_per_sec:.1f} sents/sec")
+
if dependency_warnings > 0:
logger.info(f"Dependency parsing warnings: {dependency_warnings} sentences processed without dependencies")
\ No newline at end of file