| use strict; |
| use warnings; |
| |
| use FindBin; |
| BEGIN { |
| unshift @INC, "$FindBin::Bin/../lib"; |
| }; |
| |
| use Test::More; |
| use Test::XML::Loy; |
| use_ok('KorAP::XML::TEI::Inline'); |
| |
| |
| my $inline = KorAP::XML::TEI::Inline->new; |
| |
| ok($inline->parse('aaa', \'Der <b>alte</b> Mann'), 'Parsed'); |
| |
| is($inline->data->data, 'Der alte Mann'); |
| |
| Test::XML::Loy->new($inline->structures->to_string('aaa', 2)) |
| ->attr_is('#s0', 'l', "1") |
| ->attr_is('#s0', 'to', 13) |
| ->text_is('#s0 fs f[name=name]', 'text') |
| ->attr_is('#s1', 'l', "2") |
| ->attr_is('#s1', 'from', 4) |
| ->attr_is('#s1', 'to', 8) |
| ->text_is('#s1 fs f[name=name]', 'b') |
| ; |
| |
| Test::XML::Loy->new($inline->tokens->to_string('aaa', 0)) |
| ->element_exists_not('fs') |
| ; |
| |
| |
| ok($inline->parse('aaa', \'<w>Die</w> <w>alte</w> <w>Frau</w>'), 'Parsed'); |
| |
| is($inline->data->data, 'Die alte Frau'); |
| |
| Test::XML::Loy->new($inline->structures->to_string('aaa', 2)) |
| ->attr_is('#s0', 'l', "1") |
| ->attr_is('#s0', 'to', 13) |
| ->text_is('#s0 fs f[name=name]', 'text') |
| |
| ->attr_is('#s1', 'l', "2") |
| ->attr_is('#s1', 'to', 3) |
| ->text_is('#s1 fs f[name=name]', 'w') |
| |
| ->attr_is('#s2', 'l', "2") |
| ->attr_is('#s2', 'from', 4) |
| ->attr_is('#s2', 'to', 8) |
| ->text_is('#s2 fs f[name=name]', 'w') |
| |
| ->attr_is('#s3', 'l', "2") |
| ->attr_is('#s3', 'from', 9) |
| ->attr_is('#s3', 'to', 13) |
| ->text_is('#s3 fs f[name=name]', 'w') |
| ; |
| |
| Test::XML::Loy->new($inline->tokens->to_string('aaa', 0)) |
| ->attr_is('#s0', 'l', "2") |
| ->attr_is('#s0', 'to', 3) |
| |
| ->attr_is('#s1', 'l', "2") |
| ->attr_is('#s1', 'from', 4) |
| ->attr_is('#s1', 'to', 8) |
| |
| ->attr_is('#s2', 'l', "2") |
| ->attr_is('#s2', 'from', 9) |
| ->attr_is('#s2', 'to', 13) |
| ; |
| |
| ok($inline->parse('aaa', \'<w lemma="die" type="det">Die</w> <w |
| lemma="alt" type="ADJ">alte</w> <w lemma="frau" type="NN">Frau</w>'), 'Parsed'); |
| |
| is($inline->data->data, 'Die alte Frau'); |
| |
| Test::XML::Loy->new($inline->tokens->to_string('aaa', 1)) |
| ->attr_is('#s0', 'l', "2") |
| ->attr_is('#s0', 'to', 3) |
| ->text_is('#s0 fs f[name="lemma"]', 'die') |
| ->text_is('#s0 fs f[name="type"]', 'det') |
| |
| ->attr_is('#s1', 'l', "2") |
| ->attr_is('#s1', 'from', 4) |
| ->attr_is('#s1', 'to', 8) |
| ->text_is('#s1 fs f[name="lemma"]', 'alt') |
| ->text_is('#s1 fs f[name="type"]', 'ADJ') |
| |
| ->attr_is('#s2', 'l', "2") |
| ->attr_is('#s2', 'from', 9) |
| ->attr_is('#s2', 'to', 13) |
| ->text_is('#s2 fs f[name="lemma"]', 'frau') |
| ->text_is('#s2 fs f[name="type"]', 'NN') |
| ; |
| |
| subtest 'Support dependency parsing' => sub { |
| $inline = KorAP::XML::TEI::Inline->new(0,{},0,1); |
| ok($inline->parse('Fake News Media', |
| \'<s><w n="1" lemma="Fake" pos="N" head="2" deprel="name" msd="SUBCAT_Prop|CASECHANGE_Up|OTHER_UNK">Fake</w> <w n="2" lemma="News" pos="N" head="3" deprel="name" msd="SUBCAT_Prop|CASECHANGE_Up|OTHER_UNK">News</w> <w n="3" lemma="media" pos="N" head="0" deprel="ROOT" msd="NUM_Sg|CASE_Nom|CASECHANGE_Up">Media</w></s> ' |
| ), 'Parsed'); |
| |
| is($inline->data->data, 'Fake News Media '); |
| |
| Test::XML::Loy->new($inline->tokens->to_string('aaa', 1)) |
| ->attr_is('#s0', 'l', "3") |
| ->attr_is('#s0', 'to', 4) |
| ->text_is('#s0 fs f[name="lemma"]', 'Fake') |
| ->text_is('#s0 fs f[name="pos"]', 'N') |
| ->text_is('#s0 fs f[name="n"]','1') |
| |
| ->attr_is('#s1', 'l', "3") |
| ->attr_is('#s1', 'from', 5) |
| ->attr_is('#s1', 'to', 9) |
| ->text_is('#s1 fs f[name="lemma"]', 'News') |
| ->text_is('#s1 fs f[name="pos"]', 'N') |
| ->text_is('#s1 fs f[name="n"]','2') |
| |
| ->attr_is('#s2', 'l', "3") |
| ->attr_is('#s2', 'from', 10) |
| ->attr_is('#s2', 'to', 15) |
| ->text_is('#s2 fs f[name="lemma"]', 'media') |
| ->text_is('#s2 fs f[name="pos"]', 'N') |
| ->text_is('#s2 fs f[name="n"]','3') |
| ; |
| |
| Test::XML::Loy->new($inline->tokens->to_string('aaa', 4)) |
| ->attr_is('#s0', 'l', "3") |
| ->attr_is('#s0', 'to', 4) |
| ->text_is('#s0 fs f[name="lemma"]', 'Fake') |
| ->text_is('#s0 fs f[name="pos"]', 'N') |
| ->element_exists_not('#s0 fs f[name="n"]') |
| |
| ->attr_is('#s1', 'l', "3") |
| ->attr_is('#s1', 'from', 5) |
| ->attr_is('#s1', 'to', 9) |
| ->text_is('#s1 fs f[name="lemma"]', 'News') |
| ->text_is('#s1 fs f[name="pos"]', 'N') |
| |
| ->attr_is('#s2', 'l', "3") |
| ->attr_is('#s2', 'from', 10) |
| ->attr_is('#s2', 'to', 15) |
| ->text_is('#s2 fs f[name="lemma"]', 'media') |
| ->text_is('#s2 fs f[name="pos"]', 'N') |
| ; |
| |
| Test::XML::Loy->new($inline->dependencies->to_string('aaa', 3)) |
| ->attr_is('#s1_n1', 'l', "3") |
| ->element_exists('#s1_n1[from="0"]') |
| ->attr_is('#s1_n1', 'to', 4) |
| ->attr_is('#s1_n1 rel', 'label', 'name') |
| ->attr_is('#s1_n1 rel span', 'from', 5) |
| ->attr_is('#s1_n1 rel span', 'to', 9) |
| ->element_exists_not('#s1_n1 fs') |
| |
| ->attr_is('#s1_n2', 'l', "3") |
| ->attr_is('#s1_n2', 'from', 5) |
| ->attr_is('#s1_n2', 'to', 9) |
| ->attr_is('#s1_n2 rel', 'label', 'name') |
| ->attr_is('#s1_n2 rel span', 'from', 10) |
| ->attr_is('#s1_n2 rel span', 'to', 15) |
| |
| ->attr_is('#s1_n3', 'l', "3") |
| ->attr_is('#s1_n3', 'from', 10) |
| ->attr_is('#s1_n3', 'to', 15) |
| ->attr_is('#s1_n3 rel', 'label', 'ROOT') |
| ->element_exists('#s1_n3 rel span[from="0"]') |
| ->attr_is('#s1_n3 rel span', 'to', 15) |
| ; |
| |
| $inline = KorAP::XML::TEI::Inline->new(0,{},0,1); |
| ok($inline->parse('Fake News Media', |
| \('<p xml:lang="x-|fin:2|"><s xml:lang="fin">'. |
| '<w deprel="nn" head="2" lemma="lJgkPOGUBSFSRQlx" msd="NUM_Sg|CASE_Nom|CASECHANGE_Up" n="1" pos="N">lJgkPOGUBSFSRQlx</w> '. |
| '<w deprel="nsubj" head="3" lemma="rYuqciR" msd="SUBCAT_Prop|NUM_Sg|CASE_Nom|CASECHANGE_Up|OTHER_UNK" n="2" pos="N">rYuqciR</w> '. |
| '<w deprel="ROOT" head="0" lemma="RcidTBqv" msd="PRS_Sg3|VOICE_Act|TENSE_Prt|MOOD_Ind" n="3" pos="V">RcidTBqv</w> '. |
| '<w deprel="poss" head="5" lemma="cHIf" msd="SUBCAT_Acro|NUM_Sg|CASE_Nom|CASECHANGE_Up" n="4" pos="N">cHIf</w> '. |
| '<w deprel="nommod" head="3" lemma="reuvyWZtUhN" msd="NUM_Sg|CASE_Ela" n="5" pos="N">reuvyWZtUhN</w> '. |
| '<w deprel="nsubj" head="7" lemma="KsaXYaFo" msd="NUM_Sg|CASE_Gen" n="6" pos="N">KsaXYaFo</w> '. |
| '<w deprel="iccomp" head="3" lemma="qJhgSDNOYpWg" msd="NUM_Sg|CASE_Ill|VOICE_Act|INF_Inf3" n="7" pos="V">qJhgSDNOYpWg</w> '. |
| '<w deprel="name" head="9" lemma="xtRyGN" msd="SUBCAT_Prop|CASECHANGE_Up|OTHER_UNK" n="8" pos="N">xtRyGN</w> '. |
| '<w deprel="poss" head="10" lemma="XCVuQwU" msd="SUBCAT_Prop|NUM_Sg|CASE_Gen|CASECHANGE_Up|OTHER_UNK" n="9" pos="N">XCVuQwU</w> '. |
| '<w deprel="poss" head="11" lemma="hYwEsYDUbYHmJ" msd="NUM_Sg|CASE_Gen|CASECHANGE_Up|OTHER_UNK" n="10" pos="N">hYwEsYDUbYHmJ</w> '. |
| '<w deprel="dobj" head="7" lemma="yYXOYOqX" msd="NUM_Sg|CASE_Gen" n="11" pos="N">yYXOYOqX</w> '. |
| '<w deprel="nommod" head="7" lemma="LkrLYiYgRSC" msd="NUM_Sg|CASE_Ade" n="12" pos="N">LkrLYiYgRSC</w> '. |
| '<w deprel="num" head="12" lemma="erRenLjillGtDCaRLIx" msd="_" n="13" pos="Num">erRenLjillGtDCaRLIx</w> '. |
| '<w deprel="punct" head="3" lemma="c" msd="_" n="14" pos="Punct">c</w> '. |
| '</s>'."\n". |
| '<s xml:lang="fin">'. |
| '<w deprel="nommod" head="3" lemma="LSymCdojKTj" msd="SUBCAT_Prop|NUM_Sg|CASE_Ine|CASECHANGE_Up|OTHER_UNK" n="1" pos="N">LSymCdojKTj</w> '. |
| '<w deprel="auxpass" head="3" lemma="vQ" msd="PRS_Sg3|VOICE_Act|TENSE_Prs|MOOD_Ind" n="2" pos="V">vQ</w> '. |
| '<w deprel="ROOT" head="0" lemma="nHfBTtne" msd="NUM_Sg|CASE_Nom|VOICE_Pass|PCP_PrfPrc|CMP_Pos" n="3" pos="V">nHfBTtne</w> '. |
| '<w deprel="preconj" head="6" lemma="fmcz" msd="SUBCAT_CC" n="4" pos="C">fmcz</w> '. |
| '<w deprel="poss" head="6" lemma="lHlPTQv" msd="SUBCAT_Prop|NUM_Sg|CASE_Gen|CASECHANGE_Up|OTHER_UNK" n="5" pos="N">lHlPTQv</w> '. |
| '<w deprel="dobj" head="3" lemma="IXxgORnMc" msd="NUM_Pl|CASE_Par|OTHER_UNK" n="6" pos="N">IXxgORnMc</w> '. |
| '<w deprel="cc" head="6" lemma="QdjQ" msd="SUBCAT_CC" n="7" pos="C">QdjQ</w> '. |
| '<w deprel="conj" head="6" lemma="luYMmwBGSUbXCMxqFzeZv" msd="NUM_Pl|CASE_Par|OTHER_UNK" n="8" pos="N">luYMmwBGSUbXCMxqFzeZv</w> '. |
| '<w deprel="punct" head="3" lemma="E" msd="_" n="9" pos="Punct">E</w>'. |
| '</s>'. |
| '</p>') |
| ), 'Parsed'); |
| |
| is($inline->data->data, 'lJgkPOGUBSFSRQlx rYuqciR RcidTBqv cHIf reuvyWZtUhN KsaXYaFo qJhgSDNOYpWg xtRyGN XCVuQwU hYwEsYDUbYHmJ yYXOYOqX LkrLYiYgRSC erRenLjillGtDCaRLIx c LSymCdojKTj vQ nHfBTtne fmcz lHlPTQv IXxgORnMc QdjQ luYMmwBGSUbXCMxqFzeZv E'); |
| |
| Test::XML::Loy->new($inline->dependencies->to_string('aaa', 3)) |
| ->attr_is('#s1_n3', 'l', "4") |
| ->attr_is('#s1_n3', 'from', 25) |
| ->attr_is('#s1_n3', 'to', 33) |
| ->attr_is('#s1_n3 rel', 'label', 'ROOT') |
| ->element_exists('#s1_n3 rel span[from=0]') |
| ->attr_is('#s1_n3 rel span', 'to', 144) |
| ->element_exists_not('#s1_n3 fs') |
| |
| ->attr_is('#s1_n14', 'l', "4") |
| ->attr_is('#s1_n14', 'from', 143) |
| ->attr_is('#s1_n14', 'to', 144) |
| ->attr_is('#s1_n14 rel', 'label', 'punct') |
| ->attr_is('#s1_n14 rel span', 'from', 25) |
| ->attr_is('#s1_n14 rel span', 'to', 33) |
| |
| ->attr_is('#s2_n1', 'l', "4") |
| ->attr_is('#s2_n1', 'from', 146) |
| ->attr_is('#s2_n1', 'to', 157) |
| ->attr_is('#s2_n1 rel', 'label', 'nommod') |
| ->attr_is('#s2_n1 rel span', 'from', 161) |
| ->attr_is('#s2_n1 rel span', 'to', 169) |
| |
| ->attr_is('#s2_n9', 'l', "4") |
| ->attr_is('#s2_n9', 'from', 220) |
| ->attr_is('#s2_n9', 'to', 221) |
| ->attr_is('#s2_n9 rel', 'label', 'punct') |
| ->attr_is('#s2_n9 rel span', 'from', 161) |
| ->attr_is('#s2_n9 rel span', 'to', 169) |
| |
| ->attr_is('#s2_n3', 'l', "4") |
| ->attr_is('#s2_n3', 'from', 161) |
| ->attr_is('#s2_n3', 'to', 169) |
| ->attr_is('#s2_n3 rel', 'label', 'ROOT') |
| ->attr_is('#s2_n3 rel span', 'from', 146) |
| ->attr_is('#s2_n3 rel span', 'to', 221) |
| ; |
| |
| Test::XML::Loy->new($inline->tokens->to_string('aaa', 1)) |
| ->attr_is('#s2', 'l', "4") |
| ->attr_is('#s2', 'from', 25) |
| ->attr_is('#s2', 'to', 33) |
| ->text_is('#s2 fs f[name="lemma"]', 'RcidTBqv') |
| ->text_is('#s2 fs f[name="pos"]', 'V') |
| ->text_is('#s2 fs f[name="msd"]', 'PRS_Sg3|VOICE_Act|TENSE_Prt|MOOD_Ind') |
| |
| ->attr_is('#s22', 'l', "4") |
| ->attr_is('#s22', 'from', 220) |
| ->attr_is('#s22', 'to', 221) |
| ->text_is('#s22 fs f[name="lemma"]', 'E') |
| ->text_is('#s22 fs f[name="pos"]', 'Punct') |
| ->text_is('#s22 fs f[name="msd"]', '_') |
| ; |
| }; |
| |
| subtest 'Parse msd from inline' => sub { |
| ok($inline->parse('aaa', \'<w lemma="die" pos="det" msd="SUBCAT_Prop|CASECHANGE_Up|OTHER_UNK">Die</w> <w |
| lemma="alt" pos="ADJ" msd="SUBCAT_Prop|CASECHANGE_Up|OTHER_UNK">alte</w> <w lemma="frau" pos="NN" msd="NUM_Sg|CASE_Nom|CASECHANGE_Up">Frau</w>'), 'Parsed'); |
| |
| is($inline->data->data, 'Die alte Frau'); |
| |
| Test::XML::Loy->new($inline->tokens->to_string('aaa', 1)) |
| ->attr_is('#s0', 'l', "2") |
| ->attr_is('#s0', 'to', 3) |
| ->text_is('#s0 fs f[name="lemma"]', 'die') |
| ->text_is('#s0 fs f[name="pos"]', 'det') |
| ->text_is('#s2 fs f[name="msd"]', 'NUM_Sg|CASE_Nom|CASECHANGE_Up') |
| |
| ->attr_is('#s1', 'l', "2") |
| ->attr_is('#s1', 'from', 4) |
| ->attr_is('#s1', 'to', 8) |
| ->text_is('#s1 fs f[name="lemma"]', 'alt') |
| ->text_is('#s1 fs f[name="pos"]', 'ADJ') |
| ->text_is('#s2 fs f[name="msd"]', 'NUM_Sg|CASE_Nom|CASECHANGE_Up') |
| |
| ->attr_is('#s2', 'l', "2") |
| ->attr_is('#s2', 'from', 9) |
| ->attr_is('#s2', 'to', 13) |
| ->text_is('#s2 fs f[name="lemma"]', 'frau') |
| ->text_is('#s2 fs f[name="pos"]', 'NN') |
| ->text_is('#s2 fs f[name="msd"]', 'NUM_Sg|CASE_Nom|CASECHANGE_Up') |
| ; |
| }; |
| |
| subtest 'Examples from documentation' => sub { |
| plan skip_all => 'Expected behaviour not finalized'; |
| |
| # From the documentation: |
| # |
| # Example: |
| # '... <head type="main"><s>Campagne in Frankreich</s></head><head type="sub"> <s>1792</s> ...' |
| |
| # Two text-nodes should normally be separated by a blank. |
| # In the above example, that would be the 2 text-nodes |
| # 'Campagne in Frankreich' and '1792', which are separated |
| # by the whitespace-node ' ' (see [2]). |
| # |
| # The text-node 'Campagne in Frankreich' leads to the setting |
| # of '$add_one' to 1, so that when opening the 2nd 'head'-tag, |
| # it's from-index gets set to the correct start-index of '1792' |
| # (and not to the start-index of the whitespace-node ' '). |
| # |
| # The assumption here is, that in most cases there _is_ a |
| # whitespace node between 2 text-nodes. The below code fragment |
| # enables a way, to check, if this really _was_ the case for |
| # the last 2 'non-tag'-nodes, when closing a tag: |
| # |
| # When a whitespace-node is read, its from-index is stored |
| # as a hash-key (in %ws), to state that it belongs to a ws-node. |
| # So when closing a tag, it can be checked, if the previous |
| # 'non-tag'-node (text or whitespace), which is the one before |
| # the last read 'non-tag'-node, was a actually _not_ a ws-node, |
| # but instead a text-node. In that case, the from-value of |
| # the last read 'non-tag'-node has to be corrected (see [1]), |
| # |
| # For whitespace-nodes $add_one is set to 0, so when opening |
| # the next tag (in the above example the 2nd 's'-tag), no |
| # additional 1 is added (because this was already done by the |
| # whitespace-node itself when incrementing the variable $pos). |
| # |
| # [1] |
| # Now, what happens, when 2 text-nodes are _not_ seperated by a |
| # whitespace-node (e.g.: <w>Augen<c>,</c></w>)? |
| # In this case, the falsely increased from-value has to be |
| # decreased again by 1 when closing the enclosing tag |
| # (see above code fragment '... not exists $ws{ $from - 1 } ...'). |
| # |
| # [2] |
| # Comparing the 2 examples '<w>fu</w> <w>bar</w>' and |
| # '<w>fu</w><w> </w><w>bar</w>', is ' ' in both cases handled as a |
| # whitespace-node (XML_READER_TYPE_SIGNIFICANT_WHITESPACE). |
| # |
| # The from-index of the 2nd w-tag in the second example refers to |
| # 'bar', which may not have been the intention |
| # (even though '<w> </w>' doesn't make a lot of sense). |
| # TODO: could this be a bug? |
| # |
| # Empty tags also cling to the next text-token - e.g. in |
| # '<w>tok1</w> <w>tok2</w><a><b/></a> <w>tok3</w>' are the from- |
| # and to-indizes for the tags 'a' and 'b' both 12, |
| # which is the start-index of the token 'tok3'. |
| |
| ok($inline->parse( |
| 'bbb', |
| \'<head type="main"><s>Campagne in Frankreich</s></head><head type="sub"> <s>1792</s></head>'),'Parsed'); |
| is($inline->data->data, 'Campagne in Frankreich 1792'); |
| |
| Test::XML::Loy->new($inline->structures->to_string('aaa', 2)) |
| ->attr_is('#s0', 'l', "1") |
| ->attr_is('#s0', 'to', 27) |
| ->text_is('#s0 fs f[name="name"]', 'text') |
| |
| ->attr_is('#s1', 'l', "2") |
| ->attr_is('#s1', 'to', 22) |
| ->text_is('#s1 fs f[name="name"]', 'head') |
| ->text_is('#s1 fs f[name="attr"] fs f[name=type]', 'main') |
| |
| ->attr_is('#s2', 'l', "3") |
| ->attr_is('#s2', 'to', 22) |
| ->text_is('#s2 fs f[name="name"]', 's') |
| |
| ->attr_is('#s3', 'l', "2") |
| ->attr_is('#s3', 'from', 23) |
| ->attr_is('#s3', 'to', 27) |
| ->text_is('#s3 fs f[name="name"]', 'head') |
| ->text_is('#s3 fs f[name="attr"] fs f[name=type]', 'sub') |
| |
| ->attr_is('#s4', 'l', "3") |
| ->attr_is('#s4', 'from', 23) |
| ->attr_is('#s4', 'to', 27) |
| ->text_is('#s4 fs f[name="name"]', 's') |
| ; |
| |
| ok($inline->parse( |
| 'ccc', |
| \'<w>tok1</w> <w>tok2</w><a><b/></a> <w>tok3</w>' |
| ), 'Parsed'); |
| is($inline->data->data, 'tok1 tok2 tok3'); |
| |
| Test::XML::Loy->new($inline->structures->to_string('ccc', 2)) |
| ->attr_is('#s0', 'l', "1") |
| ->attr_is('#s0', 'to', 14) |
| ->text_is('#s0 fs f[name="name"]', 'text') |
| |
| ->attr_is('#s1', 'l', "2") |
| ->attr_is('#s1', 'to', 4) |
| ->text_is('#s1 fs f[name="name"]', 'w') |
| |
| ->attr_is('#s2', 'l', "2") |
| ->attr_is('#s2', 'from', 5) |
| ->attr_is('#s2', 'to', 9) |
| ->text_is('#s2 fs f[name="name"]', 'w') |
| |
| ->attr_is('#s2', 'l', "2") |
| ->attr_is('#s2', 'from', 5) |
| ->attr_is('#s2', 'to', 9) |
| ->text_is('#s2 fs f[name="name"]', 'w') |
| |
| ->attr_is('#s3', 'l', "2") |
| ->attr_is('#s3', 'from', 10) |
| ->attr_is('#s3', 'to', 10) |
| ->text_is('#s3 fs f[name="name"]', 'a') |
| |
| ->attr_is('#s4', 'l', "3") |
| ->attr_is('#s4', 'from', 10) |
| ->attr_is('#s4', 'to', 10) |
| ->text_is('#s4 fs f[name="name"]', 'b') |
| |
| ->attr_is('#s5', 'l', "2") |
| ->attr_is('#s5', 'from', 10) |
| ->attr_is('#s5', 'to', 14) |
| ->text_is('#s5 fs f[name="name"]', 'w') |
| ; |
| |
| ok($inline->parse( |
| 'ccc', |
| \'<w>Augen<c>,</c></w> <w>die</w>' |
| ), 'Parsed'); |
| is($inline->data->data, 'Augen, die'); |
| |
| Test::XML::Loy->new($inline->structures->to_string('ddd', 2)) |
| ->attr_is('#s0', 'l', "1") |
| ->attr_is('#s0', 'to', 10) |
| ->text_is('#s0 fs f[name="name"]', 'text') |
| |
| ->attr_is('#s1', 'l', "2") |
| ->attr_is('#s1', 'to', 6) |
| ->text_is('#s1 fs f[name="name"]', 'w') |
| |
| ->attr_is('#s2', 'l', "3") |
| ->attr_is('#s2', 'from', 5) |
| ->attr_is('#s2', 'to', 6) |
| ->text_is('#s2 fs f[name="name"]', 'c') |
| |
| ->attr_is('#s3', 'l', "2") |
| ->attr_is('#s3', 'from', 7) |
| ->attr_is('#s3', 'to', 10) |
| ->text_is('#s3 fs f[name="name"]', 'w') |
| ; |
| }; |
| |
| |
| subtest 'Treatment of tokens' => sub { |
| my $inline = KorAP::XML::TEI::Inline->new(0, {b => 1}, 1); |
| |
| ok($inline->parse('aaa', \'<a>Der</a> <b>alte</b> <w pos="NN">Baum</w>'), 'Parsed'); |
| is($inline->data->data, 'Der alte Baum'); |
| |
| # Only contains '<a>' |
| Test::XML::Loy->new($inline->structures->to_string('aaa', 1)) |
| ->attr_is('#s1', 'to', 3) |
| ->element_exists_not('#s2') |
| ; |
| |
| # Only contains 'w' |
| Test::XML::Loy->new($inline->tokens->to_string('aaa', 1)) |
| ->attr_is('#s0', 'from', 9) |
| ->attr_is('#s0', 'to', 13) |
| ->attr_is('#s0 > fs > f > fs > f', 'name', 'pos') |
| ->text_is('#s0 > fs > f > fs > f[name=pos]', 'NN') |
| ->element_exists_not('#s1') |
| ; |
| }; |
| |
| done_testing; |