Diff - eb7d06abdfbaec7b2f51f6a12ec6ab2dc379072a^! - KorAP/KorAP-XML-CoNLL-U

commit	eb7d06abdfbaec7b2f51f6a12ec6ab2dc379072a	[log] [tgz]
author	Marc Kupietz <kupietz@ids-mannheim.de>	Fri Mar 19 16:29:16 2021 +0100
committer	Marc Kupietz <kupietz@ids-mannheim.de>	Thu Jul 29 17:42:57 2021 +0200
tree	69a414706b5f36f716ec661e266f722262659a23
parent	6a79cadabbd92f75b996500b2be7d396f855fba1 [diff] [blame]

Add korapxml2conllu option -e <regex> to extract element/attributes

./script/korapxml2conllu -e '(posting/id|div/id)' -p "A0000" t/data/wdf19.zip  | head -12

 # foundry = base
 # filename = WDF19/A0000/10894/base/tokens.xml
 # text_id = WDF19_A0000.10894
 # start_offsets = 0 0 5 14 23 32 40 48 51 54 60
 # end_offsets = 61 4 12 22 31 39 47 50 53 59 61
 1	Arts	_	_	_	_	_	_	_	_
 2	visuels	_	_	_	_	_	_	_	_
 # div/id = i.10894_1
 # posting/id = i.10894_1_1
 3	Pourquoi	_	_	_	_	_	_	_	_
 4	toujours	_	_	_	_	_	_	_	_
 5	vouloir	_	_	_	_	_	_	_	_

Change-Id: I2cedc6580699fab0db6794d0f3225ea4da72b30f

diff --git a/Changes b/Changes
index 2b993fb..3d045db 100644
--- a/Changes
+++ b/Changes

@@ -1,5 +1,5 @@
 0.3.900 unreleased
-
+        - korapxml2conllu option -e <regex> added to extract element/attributes to comments
 
 0.3 2021-02-15
         - Provide conllu2korapxml to convert from ConLL-U to KorAP-XML zip