commit | 866b20f3ca40fab432c70cb4eebc5ee94dea1e96 | [log] [tgz] |
---|---|---|
author | Harald Lüngen <luengen@ids-mannheim.de> | Tue Sep 17 09:08:34 2024 +0300 |
committer | Harald Lüngen <luengen@ids-mannheim.de> | Tue Sep 17 09:08:34 2024 +0300 |
tree | caadb10d1fe3d4d2d44ae513ddb502a1e477e6dd | |
parent | 82409bf31d2437fe1c5c86af728900095ea4faae [diff] [blame] |
added
diff --git a/rearrange-idsDoc.awk b/rearrange-idsDoc.awk new file mode 100644 index 0000000..e267553 --- /dev/null +++ b/rearrange-idsDoc.awk
@@ -0,0 +1,14 @@ +# putting the end tag /</idsDoc> in the right place +BEGIN {DOCCOUNT = 0;} + +$0 ~ "</idsDoc>" {} +$0 ~ "<idsDoc>" { + if(DOCCOUNT++ > 0){ + print " </idsDoc>"; + }; + print $0; + } +$0 ~ "</idsCorpus>" {print " </idsDoc>\n</idsCorpus>"} + +$0 !~ "</idsDoc>" && $0 !~ "</idsCorpus>" && $0 !~ "<idsDoc>" {print} +