Use morpho.xml if present when run on base zips
Change-Id: I32bf82fee7fb909e4cfd70123423bc2e1c1e8f81
diff --git a/Changes b/Changes
index f32e704..d5acfb1 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,5 @@
+ - korapxml2conllu: use morpho.xml if present when run on base zips
+
0.4.1 2021-07-31
- korapxml2conllu: fix patterns not extracted for last texts in archive
diff --git a/script/korapxml2conllu b/script/korapxml2conllu
index 366b0b1..17191a1 100755
--- a/script/korapxml2conllu
+++ b/script/korapxml2conllu
@@ -104,7 +104,14 @@
}
} else {
$foundry = "base";
- $morphoOrTokenCommand = "$UNZIP -c $morpho_zip '*/${sigle_pattern}*/*/*/tokens.xml' $zipsiglepattern |";
+ $morphoOrTokenCommand = "$UNZIP -l $morpho_zip '*/${sigle_pattern}*/*/*/morpho.xml' $zipsiglepattern";
+ if (`$morphoOrTokenCommand` !~ /morpho\.xml/) {
+ $morphoOrTokenCommand =~ s/morpho\.xml/tokens.xml/;
+ } else {
+ $baseOnly = 0;
+ }
+ $morphoOrTokenCommand =~ s/-l/-c/;
+ $morphoOrTokenCommand .= ' |';
$plaintextAndStructureCommand = "$UNZIP -c $data_zip '*/${sigle_pattern}*/*/[sd][ta]*.xml' $zipsiglepattern |";
}