Add korapxml2conllu option -e <regex> to extract element/attributes

./script/korapxml2conllu -e '(posting/id|div/id)' -p "A0000" t/data/wdf19.zip  | head -12

 # foundry = base
 # filename = WDF19/A0000/10894/base/tokens.xml
 # text_id = WDF19_A0000.10894
 # start_offsets = 0 0 5 14 23 32 40 48 51 54 60
 # end_offsets = 61 4 12 22 31 39 47 50 53 59 61
 1	Arts	_	_	_	_	_	_	_	_
 2	visuels	_	_	_	_	_	_	_	_
 # div/id = i.10894_1
 # posting/id = i.10894_1_1
 3	Pourquoi	_	_	_	_	_	_	_	_
 4	toujours	_	_	_	_	_	_	_	_
 5	vouloir	_	_	_	_	_	_	_	_

Change-Id: I2cedc6580699fab0db6794d0f3225ea4da72b30f
diff --git a/t/data/wdf19.zip b/t/data/wdf19.zip
new file mode 100644
index 0000000..61a8bdf
--- /dev/null
+++ b/t/data/wdf19.zip
Binary files differ
diff --git a/t/test.t b/t/test.t
index 22d261a..48bd2b1 100644
--- a/t/test.t
+++ b/t/test.t
@@ -1,6 +1,6 @@
 use strict;
 use warnings;
-use Test::More tests => 10;
+use Test::More tests => 19;
 use Test::Script;
 use Test::TempDir::Tiny;
 use File::Copy;
@@ -75,4 +75,15 @@
 script_runs([ 'script/korapxml2conllu', "$test_tempdir/goe.tree_tagger.zip" ],
     "Converts $test_tempdir/goe.tree_tagger.zip to CoNLL-U");
 script_stdout_is $expected, "Full round trip: Converts goe.morpho.conllu to KorAP-XML and back to CoNLL-U correctly";
+
+script_runs([ 'script/korapxml2conllu', '-e',  'div/type', "t/data/goe.tree_tagger.zip" ], "Runs korapxml2conllu with morpho input and attribute extraction");
+script_stdout_like "\n# div/type = Autobiographie\n", "Extracts attributes from morpho zips";
+script_stdout_like "\n# div/type = section\n", "Extracts attributes from morpho zips";
+
+script_runs([ 'script/korapxml2conllu', '-e',  '(posting/id|div/id)', "t/data/wdf19.zip" ], "Runs korapxml2conllu with base input and regex attribute extraction");
+script_stdout_like "\n# posting/id = i.13075_11_45", "Extracts multiple attributes from base zips (1)";
+script_stdout_like "\n# div/id = i.13075_14", "Extracts multiple attributes from base zips (2)";
+script_stdout_like "\n# posting/id = i.14548_9_1\n3\tbonjour", "Extracts attributes in the right place";
+script_stdout_like "\n# posting/id = i.12610_4_4", "Extracts directly adjacent postings from base zips (1)";
+script_stdout_like "\n# posting/id = i.12610_4_5", "Extracts directly adjacent postings from base zips (2)";
 done_testing;