Add support for inline dependency structures (fixes #7)
Change-Id: I25781e1a285a6bd6345ceb5e5487b410e9bd5353
diff --git a/t/script.t b/t/script.t
index e87f1b7..2989e72 100644
--- a/t/script.t
+++ b/t/script.t
@@ -862,4 +862,82 @@
unlink $temp_out;
};
+subtest 'Handling of dependency data (1)' => sub {
+ my $t = test_tei2korapxml(
+ file => catfile($f, 'data', 'SKU21.head.i5.xml'),
+ tmp => 'script_out',
+ param => '-s --no-tokenizer --inline-tokens=csc#morpho',
+ )
+ ->stderr_like(qr!tei2korapxml:.*? text_id=SKU21_JAN\.00001!);
+ $t->unzip_xml('SKU21/JAN/00001/data.xml')
+ ->content_like(qr/cgICpWb AQNFU/)
+ ->content_like(qr/LhyS OLHV/)
+ ->content_like(qr/kdQVs hunIRQIN/)
+ ;
+
+ $t->unzip_xml('SKU21/JAN/00001/csc/morpho.xml')
+ ->attr_is('spanList span:nth-child(2)', 'id', 's1')
+ ->attr_is('#s1', 'from', '5')
+ ->attr_is('#s1', 'to', '9')
+ ->text_is('#s1 fs f fs f[name="deprel"]', 'name')
+ ->text_is('#s1 fs f fs f[name="head"]', '3')
+ ->text_is('#s1 fs f fs f[name="lemma"]', 'kCXD')
+ ->text_is('#s1 fs f fs f[name="msd"]', 'SUBCAT_Prop|CASECHANGE_Up|OTHER_UNK')
+ ->text_is('#s1 fs f fs f[name="n"]', '2')
+ ->text_is('#s1 fs f fs f[name="pos"]', 'N')
+ ;
+};
+
+subtest 'Handling of dependency data (2)' => sub {
+ my $t = test_tei2korapxml(
+ file => catfile($f, 'data', 'SKU21.head.i5.xml'),
+ tmp => 'script_out',
+ param => '-s --no-tokenizer ' .
+ '--inline-tokens=csc#morpho ' .
+ '--inline-dependencies=!csc ' .
+ '--no-skip-inline-token-annotations',
+ )
+ ->stderr_like(qr!tei2korapxml:.*? text_id=SKU21_JAN\.00001!)
+ ->stderr_like(qr!tei2korapxml:.*? text_id=SKU21_JAN\.00002!)
+ ->stderr_like(qr!tei2korapxml:.*? text_id=SKU21_JAN\.00003!)
+ ;
+
+ $t->unzip_xml('SKU21/JAN/00001/data.xml')
+ ->content_like(qr/cgICpWb AQNFU/)
+ ->content_like(qr/LhyS OLHV/)
+ ->content_like(qr/kdQVs hunIRQIN/)
+ ;
+
+ $t->unzip_xml('SKU21/JAN/00001/csc/morpho.xml')
+ ->attr_is('spanList span:nth-child(2)', 'id', 's1')
+ ->attr_is('#s1', 'from', '5')
+ ->attr_is('#s1', 'to', '9')
+ ->text_is('#s1 fs f fs f[name="lemma"]', 'kCXD')
+ ->text_is('#s1 fs f fs f[name="msd"]', 'SUBCAT_Prop|CASECHANGE_Up|OTHER_UNK')
+ ->text_is('#s1 fs f fs f[name="pos"]', 'N')
+ ->element_exists_not('#s1 fs f fs f[name="n"]')
+ ->element_exists_not('#s1 fs f fs f[name="deprel"]')
+ ->element_exists_not('#s1 fs f fs f[name="head"]')
+ ;
+
+ $t->unzip_xml('SKU21/JAN/00001/csc/dependency.xml')
+ ->attr_is('spanList span:nth-child(2)', 'id', 's1_n2')
+ ->attr_is('#s1_n2', "from", "5")
+ ->attr_is('#s1_n2', "to", "9")
+ ->attr_is('#s1_n2 rel', "label", "name")
+ ->attr_is('#s1_n2 rel span', "from", '10')
+ ->attr_is('#s1_n2 rel span', "to", '15')
+ ;
+
+ $t->unzip_xml('SKU21/JAN/00002/csc/dependency.xml')
+ ->attr_is('spanList span:nth-child(2)', 'id', 's1_n2')
+ ->attr_is('#s1_n2', "from", "4")
+ ->attr_is('#s1_n2', "to", "5")
+ ->attr_is('#s1_n2 rel', "label", "poss")
+ ->attr_is('#s1_n2 rel span', "from", '6')
+ ->attr_is('#s1_n2 rel span', "to", '12')
+ ;
+};
+
+
done_testing;