Blame - t/inline.t - KorAP/KorAP-XML-TEI

blob: 14dfcabff85fbc4578328b144c72bf1ff2b4cbcd [file] [log] [blame]

Akron	eb12e23	2021-02-25 13:49:50 +0100	[diff] [blame]	1	use strict;
				2	use warnings;
				3
				4	use FindBin;
				5	BEGIN {
				6	unshift @INC, "$FindBin::Bin/../lib";
				7	};
				8
				9	use Test::More;
				10	use Test::XML::Loy;
				11	use_ok('KorAP::XML::TEI::Inline');
				12
				13
				14	my $inline = KorAP::XML::TEI::Inline->new;
				15
				16	ok($inline->parse('aaa', \'Der <b>alte</b> Mann'), 'Parsed');
				17
				18	is($inline->data->data, 'Der alte Mann');
				19
				20	Test::XML::Loy->new($inline->structures->to_string('aaa', 2))
				21	->attr_is('#s0', 'l', "1")
				22	->attr_is('#s0', 'to', 13)
				23	->text_is('#s0 fs f[name=name]', 'text')
				24	->attr_is('#s1', 'l', "2")
				25	->attr_is('#s1', 'from', 4)
				26	->attr_is('#s1', 'to', 8)
				27	->text_is('#s1 fs f[name=name]', 'b')
				28	;
				29
				30	Test::XML::Loy->new($inline->tokens->to_string('aaa', 0))
				31	->element_exists_not('fs')
				32	;
				33
				34
				35	ok($inline->parse('aaa', \'<w>Die</w> <w>alte</w> <w>Frau</w>'), 'Parsed');
				36
				37	is($inline->data->data, 'Die alte Frau');
				38
				39	Test::XML::Loy->new($inline->structures->to_string('aaa', 2))
				40	->attr_is('#s0', 'l', "1")
				41	->attr_is('#s0', 'to', 13)
				42	->text_is('#s0 fs f[name=name]', 'text')
				43
				44	->attr_is('#s1', 'l', "2")
				45	->attr_is('#s1', 'to', 3)
				46	->text_is('#s1 fs f[name=name]', 'w')
				47
				48	->attr_is('#s2', 'l', "2")
				49	->attr_is('#s2', 'from', 4)
				50	->attr_is('#s2', 'to', 8)
				51	->text_is('#s2 fs f[name=name]', 'w')
				52
				53	->attr_is('#s3', 'l', "2")
				54	->attr_is('#s3', 'from', 9)
				55	->attr_is('#s3', 'to', 13)
				56	->text_is('#s3 fs f[name=name]', 'w')
				57	;
				58
				59	Test::XML::Loy->new($inline->tokens->to_string('aaa', 0))
				60	->attr_is('#s0', 'l', "2")
				61	->attr_is('#s0', 'to', 3)
				62
				63	->attr_is('#s1', 'l', "2")
				64	->attr_is('#s1', 'from', 4)
				65	->attr_is('#s1', 'to', 8)
				66
				67	->attr_is('#s2', 'l', "2")
				68	->attr_is('#s2', 'from', 9)
				69	->attr_is('#s2', 'to', 13)
				70	;
				71
				72	ok($inline->parse('aaa', \'<w lemma="die" type="det">Die</w> <w
				73	lemma="alt" type="ADJ">alte</w> <w lemma="frau" type="NN">Frau</w>'), 'Parsed');
				74
				75	is($inline->data->data, 'Die alte Frau');
				76
				77	Test::XML::Loy->new($inline->tokens->to_string('aaa', 1))
				78	->attr_is('#s0', 'l', "2")
				79	->attr_is('#s0', 'to', 3)
				80	->text_is('#s0 fs f[name="lemma"]', 'die')
				81	->text_is('#s0 fs f[name="type"]', 'det')
				82
				83	->attr_is('#s1', 'l', "2")
				84	->attr_is('#s1', 'from', 4)
				85	->attr_is('#s1', 'to', 8)
				86	->text_is('#s1 fs f[name="lemma"]', 'alt')
				87	->text_is('#s1 fs f[name="type"]', 'ADJ')
				88
				89	->attr_is('#s2', 'l', "2")
				90	->attr_is('#s2', 'from', 9)
				91	->attr_is('#s2', 'to', 13)
				92	->text_is('#s2 fs f[name="lemma"]', 'frau')
				93	->text_is('#s2 fs f[name="type"]', 'NN')
				94	;
				95
Akron	6b1f26b	2024-09-19 11:35:32 +0200	[diff] [blame]	96	subtest 'Support dependency parsing' => sub {
				97	$inline = KorAP::XML::TEI::Inline->new(0,{},0,1);
				98	ok($inline->parse('Fake News Media',
				99	\'<s><w n="1" lemma="Fake" pos="N" head="2" deprel="name" msd="SUBCAT_Prop\|CASECHANGE_Up\|OTHER_UNK">Fake</w> <w n="2" lemma="News" pos="N" head="3" deprel="name" msd="SUBCAT_Prop\|CASECHANGE_Up\|OTHER_UNK">News</w> <w n="3" lemma="media" pos="N" head="0" deprel="ROOT" msd="NUM_Sg\|CASE_Nom\|CASECHANGE_Up">Media</w></s> '
				100	), 'Parsed');
				101
				102	is($inline->data->data, 'Fake News Media ');
				103
				104	Test::XML::Loy->new($inline->tokens->to_string('aaa', 1))
				105	->attr_is('#s0', 'l', "3")
				106	->attr_is('#s0', 'to', 4)
				107	->text_is('#s0 fs f[name="lemma"]', 'Fake')
				108	->text_is('#s0 fs f[name="pos"]', 'N')
				109	->text_is('#s0 fs f[name="n"]','1')
				110
				111	->attr_is('#s1', 'l', "3")
				112	->attr_is('#s1', 'from', 5)
				113	->attr_is('#s1', 'to', 9)
				114	->text_is('#s1 fs f[name="lemma"]', 'News')
				115	->text_is('#s1 fs f[name="pos"]', 'N')
				116	->text_is('#s1 fs f[name="n"]','2')
				117
				118	->attr_is('#s2', 'l', "3")
				119	->attr_is('#s2', 'from', 10)
				120	->attr_is('#s2', 'to', 15)
				121	->text_is('#s2 fs f[name="lemma"]', 'media')
				122	->text_is('#s2 fs f[name="pos"]', 'N')
				123	->text_is('#s2 fs f[name="n"]','3')
				124	;
				125
				126	Test::XML::Loy->new($inline->tokens->to_string('aaa', 4))
				127	->attr_is('#s0', 'l', "3")
				128	->attr_is('#s0', 'to', 4)
				129	->text_is('#s0 fs f[name="lemma"]', 'Fake')
				130	->text_is('#s0 fs f[name="pos"]', 'N')
				131	->element_exists_not('#s0 fs f[name="n"]')
				132
				133	->attr_is('#s1', 'l', "3")
				134	->attr_is('#s1', 'from', 5)
				135	->attr_is('#s1', 'to', 9)
				136	->text_is('#s1 fs f[name="lemma"]', 'News')
				137	->text_is('#s1 fs f[name="pos"]', 'N')
				138
				139	->attr_is('#s2', 'l', "3")
				140	->attr_is('#s2', 'from', 10)
				141	->attr_is('#s2', 'to', 15)
				142	->text_is('#s2 fs f[name="lemma"]', 'media')
				143	->text_is('#s2 fs f[name="pos"]', 'N')
				144	;
				145
				146	Test::XML::Loy->new($inline->dependencies->to_string('aaa', 3))
				147	->attr_is('#s1_n1', 'l', "3")
				148	->element_exists('#s1_n1[from="0"]')
				149	->attr_is('#s1_n1', 'to', 4)
				150	->attr_is('#s1_n1 rel', 'label', 'name')
				151	->attr_is('#s1_n1 rel span', 'from', 5)
				152	->attr_is('#s1_n1 rel span', 'to', 9)
				153	->element_exists_not('#s1_n1 fs')
				154
				155	->attr_is('#s1_n2', 'l', "3")
				156	->attr_is('#s1_n2', 'from', 5)
				157	->attr_is('#s1_n2', 'to', 9)
				158	->attr_is('#s1_n2 rel', 'label', 'name')
				159	->attr_is('#s1_n2 rel span', 'from', 10)
				160	->attr_is('#s1_n2 rel span', 'to', 15)
				161
				162	->attr_is('#s1_n3', 'l', "3")
				163	->attr_is('#s1_n3', 'from', 10)
				164	->attr_is('#s1_n3', 'to', 15)
				165	->attr_is('#s1_n3 rel', 'label', 'ROOT')
				166	->element_exists('#s1_n3 rel span[from="0"]')
				167	->attr_is('#s1_n3 rel span', 'to', 15)
				168	;
				169
				170	$inline = KorAP::XML::TEI::Inline->new(0,{},0,1);
				171	ok($inline->parse('Fake News Media',
				172	\('<p xml:lang="x-\|fin:2\|"><s xml:lang="fin">'.
				173	'<w deprel="nn" head="2" lemma="lJgkPOGUBSFSRQlx" msd="NUM_Sg\|CASE_Nom\|CASECHANGE_Up" n="1" pos="N">lJgkPOGUBSFSRQlx</w> '.
				174	'<w deprel="nsubj" head="3" lemma="rYuqciR" msd="SUBCAT_Prop\|NUM_Sg\|CASE_Nom\|CASECHANGE_Up\|OTHER_UNK" n="2" pos="N">rYuqciR</w> '.
				175	'<w deprel="ROOT" head="0" lemma="RcidTBqv" msd="PRS_Sg3\|VOICE_Act\|TENSE_Prt\|MOOD_Ind" n="3" pos="V">RcidTBqv</w> '.
				176	'<w deprel="poss" head="5" lemma="cHIf" msd="SUBCAT_Acro\|NUM_Sg\|CASE_Nom\|CASECHANGE_Up" n="4" pos="N">cHIf</w> '.
				177	'<w deprel="nommod" head="3" lemma="reuvyWZtUhN" msd="NUM_Sg\|CASE_Ela" n="5" pos="N">reuvyWZtUhN</w> '.
				178	'<w deprel="nsubj" head="7" lemma="KsaXYaFo" msd="NUM_Sg\|CASE_Gen" n="6" pos="N">KsaXYaFo</w> '.
				179	'<w deprel="iccomp" head="3" lemma="qJhgSDNOYpWg" msd="NUM_Sg\|CASE_Ill\|VOICE_Act\|INF_Inf3" n="7" pos="V">qJhgSDNOYpWg</w> '.
				180	'<w deprel="name" head="9" lemma="xtRyGN" msd="SUBCAT_Prop\|CASECHANGE_Up\|OTHER_UNK" n="8" pos="N">xtRyGN</w> '.
				181	'<w deprel="poss" head="10" lemma="XCVuQwU" msd="SUBCAT_Prop\|NUM_Sg\|CASE_Gen\|CASECHANGE_Up\|OTHER_UNK" n="9" pos="N">XCVuQwU</w> '.
				182	'<w deprel="poss" head="11" lemma="hYwEsYDUbYHmJ" msd="NUM_Sg\|CASE_Gen\|CASECHANGE_Up\|OTHER_UNK" n="10" pos="N">hYwEsYDUbYHmJ</w> '.
				183	'<w deprel="dobj" head="7" lemma="yYXOYOqX" msd="NUM_Sg\|CASE_Gen" n="11" pos="N">yYXOYOqX</w> '.
				184	'<w deprel="nommod" head="7" lemma="LkrLYiYgRSC" msd="NUM_Sg\|CASE_Ade" n="12" pos="N">LkrLYiYgRSC</w> '.
				185	'<w deprel="num" head="12" lemma="erRenLjillGtDCaRLIx" msd="_" n="13" pos="Num">erRenLjillGtDCaRLIx</w> '.
				186	'<w deprel="punct" head="3" lemma="c" msd="_" n="14" pos="Punct">c</w> '.
				187	'</s>'."\n".
				188	'<s xml:lang="fin">'.
				189	'<w deprel="nommod" head="3" lemma="LSymCdojKTj" msd="SUBCAT_Prop\|NUM_Sg\|CASE_Ine\|CASECHANGE_Up\|OTHER_UNK" n="1" pos="N">LSymCdojKTj</w> '.
				190	'<w deprel="auxpass" head="3" lemma="vQ" msd="PRS_Sg3\|VOICE_Act\|TENSE_Prs\|MOOD_Ind" n="2" pos="V">vQ</w> '.
				191	'<w deprel="ROOT" head="0" lemma="nHfBTtne" msd="NUM_Sg\|CASE_Nom\|VOICE_Pass\|PCP_PrfPrc\|CMP_Pos" n="3" pos="V">nHfBTtne</w> '.
				192	'<w deprel="preconj" head="6" lemma="fmcz" msd="SUBCAT_CC" n="4" pos="C">fmcz</w> '.
				193	'<w deprel="poss" head="6" lemma="lHlPTQv" msd="SUBCAT_Prop\|NUM_Sg\|CASE_Gen\|CASECHANGE_Up\|OTHER_UNK" n="5" pos="N">lHlPTQv</w> '.
				194	'<w deprel="dobj" head="3" lemma="IXxgORnMc" msd="NUM_Pl\|CASE_Par\|OTHER_UNK" n="6" pos="N">IXxgORnMc</w> '.
				195	'<w deprel="cc" head="6" lemma="QdjQ" msd="SUBCAT_CC" n="7" pos="C">QdjQ</w> '.
				196	'<w deprel="conj" head="6" lemma="luYMmwBGSUbXCMxqFzeZv" msd="NUM_Pl\|CASE_Par\|OTHER_UNK" n="8" pos="N">luYMmwBGSUbXCMxqFzeZv</w> '.
				197	'<w deprel="punct" head="3" lemma="E" msd="_" n="9" pos="Punct">E</w>'.
				198	'</s>'.
				199	'</p>')
				200	), 'Parsed');
				201
				202	is($inline->data->data, 'lJgkPOGUBSFSRQlx rYuqciR RcidTBqv cHIf reuvyWZtUhN KsaXYaFo qJhgSDNOYpWg xtRyGN XCVuQwU hYwEsYDUbYHmJ yYXOYOqX LkrLYiYgRSC erRenLjillGtDCaRLIx c LSymCdojKTj vQ nHfBTtne fmcz lHlPTQv IXxgORnMc QdjQ luYMmwBGSUbXCMxqFzeZv E');
				203
				204	Test::XML::Loy->new($inline->dependencies->to_string('aaa', 3))
				205	->attr_is('#s1_n3', 'l', "4")
				206	->attr_is('#s1_n3', 'from', 25)
				207	->attr_is('#s1_n3', 'to', 33)
				208	->attr_is('#s1_n3 rel', 'label', 'ROOT')
				209	->element_exists('#s1_n3 rel span[from=0]')
				210	->attr_is('#s1_n3 rel span', 'to', 144)
				211	->element_exists_not('#s1_n3 fs')
				212
				213	->attr_is('#s1_n14', 'l', "4")
				214	->attr_is('#s1_n14', 'from', 143)
				215	->attr_is('#s1_n14', 'to', 144)
				216	->attr_is('#s1_n14 rel', 'label', 'punct')
				217	->attr_is('#s1_n14 rel span', 'from', 25)
				218	->attr_is('#s1_n14 rel span', 'to', 33)
				219
				220	->attr_is('#s2_n1', 'l', "4")
				221	->attr_is('#s2_n1', 'from', 146)
				222	->attr_is('#s2_n1', 'to', 157)
				223	->attr_is('#s2_n1 rel', 'label', 'nommod')
				224	->attr_is('#s2_n1 rel span', 'from', 161)
				225	->attr_is('#s2_n1 rel span', 'to', 169)
				226
				227	->attr_is('#s2_n9', 'l', "4")
				228	->attr_is('#s2_n9', 'from', 220)
				229	->attr_is('#s2_n9', 'to', 221)
				230	->attr_is('#s2_n9 rel', 'label', 'punct')
				231	->attr_is('#s2_n9 rel span', 'from', 161)
				232	->attr_is('#s2_n9 rel span', 'to', 169)
				233
				234	->attr_is('#s2_n3', 'l', "4")
				235	->attr_is('#s2_n3', 'from', 161)
				236	->attr_is('#s2_n3', 'to', 169)
				237	->attr_is('#s2_n3 rel', 'label', 'ROOT')
				238	->attr_is('#s2_n3 rel span', 'from', 146)
				239	->attr_is('#s2_n3 rel span', 'to', 221)
				240	;
				241
				242	Test::XML::Loy->new($inline->tokens->to_string('aaa', 1))
				243	->attr_is('#s2', 'l', "4")
				244	->attr_is('#s2', 'from', 25)
				245	->attr_is('#s2', 'to', 33)
				246	->text_is('#s2 fs f[name="lemma"]', 'RcidTBqv')
				247	->text_is('#s2 fs f[name="pos"]', 'V')
				248	->text_is('#s2 fs f[name="msd"]', 'PRS_Sg3\|VOICE_Act\|TENSE_Prt\|MOOD_Ind')
				249
				250	->attr_is('#s22', 'l', "4")
				251	->attr_is('#s22', 'from', 220)
				252	->attr_is('#s22', 'to', 221)
				253	->text_is('#s22 fs f[name="lemma"]', 'E')
				254	->text_is('#s22 fs f[name="pos"]', 'Punct')
				255	->text_is('#s22 fs f[name="msd"]', '_')
				256	;
Akron	93dbc2c	2024-09-18 12:16:25 +0200	[diff] [blame^]	257	};
Akron	6b1f26b	2024-09-19 11:35:32 +0200	[diff] [blame]	258
Akron	93dbc2c	2024-09-18 12:16:25 +0200	[diff] [blame^]	259	subtest 'Parse msd from inline' => sub {
				260	ok($inline->parse('aaa', \'<w lemma="die" pos="det" msd="SUBCAT_Prop\|CASECHANGE_Up\|OTHER_UNK">Die</w> <w
				261	lemma="alt" pos="ADJ" msd="SUBCAT_Prop\|CASECHANGE_Up\|OTHER_UNK">alte</w> <w lemma="frau" pos="NN" msd="NUM_Sg\|CASE_Nom\|CASECHANGE_Up">Frau</w>'), 'Parsed');
				262
				263	is($inline->data->data, 'Die alte Frau');
				264
				265	Test::XML::Loy->new($inline->tokens->to_string('aaa', 1))
				266	->attr_is('#s0', 'l', "2")
				267	->attr_is('#s0', 'to', 3)
				268	->text_is('#s0 fs f[name="lemma"]', 'die')
				269	->text_is('#s0 fs f[name="pos"]', 'det')
				270	->text_is('#s2 fs f[name="msd"]', 'NUM_Sg\|CASE_Nom\|CASECHANGE_Up')
				271
				272	->attr_is('#s1', 'l', "2")
				273	->attr_is('#s1', 'from', 4)
				274	->attr_is('#s1', 'to', 8)
				275	->text_is('#s1 fs f[name="lemma"]', 'alt')
				276	->text_is('#s1 fs f[name="pos"]', 'ADJ')
				277	->text_is('#s2 fs f[name="msd"]', 'NUM_Sg\|CASE_Nom\|CASECHANGE_Up')
				278
				279	->attr_is('#s2', 'l', "2")
				280	->attr_is('#s2', 'from', 9)
				281	->attr_is('#s2', 'to', 13)
				282	->text_is('#s2 fs f[name="lemma"]', 'frau')
				283	->text_is('#s2 fs f[name="pos"]', 'NN')
				284	->text_is('#s2 fs f[name="msd"]', 'NUM_Sg\|CASE_Nom\|CASECHANGE_Up')
				285	;
Akron	6b1f26b	2024-09-19 11:35:32 +0200	[diff] [blame]	286	};
				287
Akron	56b8dbd	2021-02-26 11:23:48 +0100	[diff] [blame]	288	subtest 'Examples from documentation' => sub {
				289	plan skip_all => 'Expected behaviour not finalized';
				290
				291	# From the documentation:
				292	#
				293	# Example:
				294	# '... <head type="main"><s>Campagne in Frankreich</s></head><head type="sub"> <s>1792</s> ...'
				295
				296	# Two text-nodes should normally be separated by a blank.
				297	# In the above example, that would be the 2 text-nodes
				298	# 'Campagne in Frankreich' and '1792', which are separated
				299	# by the whitespace-node ' ' (see [2]).
				300	#
				301	# The text-node 'Campagne in Frankreich' leads to the setting
				302	# of '$add_one' to 1, so that when opening the 2nd 'head'-tag,
				303	# it's from-index gets set to the correct start-index of '1792'
				304	# (and not to the start-index of the whitespace-node ' ').
				305	#
				306	# The assumption here is, that in most cases there _is_ a
				307	# whitespace node between 2 text-nodes. The below code fragment
				308	# enables a way, to check, if this really _was_ the case for
				309	# the last 2 'non-tag'-nodes, when closing a tag:
				310	#
				311	# When a whitespace-node is read, its from-index is stored
				312	# as a hash-key (in %ws), to state that it belongs to a ws-node.
				313	# So when closing a tag, it can be checked, if the previous
				314	# 'non-tag'-node (text or whitespace), which is the one before
				315	# the last read 'non-tag'-node, was a actually _not_ a ws-node,
				316	# but instead a text-node. In that case, the from-value of
				317	# the last read 'non-tag'-node has to be corrected (see [1]),
				318	#
				319	# For whitespace-nodes $add_one is set to 0, so when opening
				320	# the next tag (in the above example the 2nd 's'-tag), no
				321	# additional 1 is added (because this was already done by the
				322	# whitespace-node itself when incrementing the variable $pos).
				323	#
				324	# [1]
				325	# Now, what happens, when 2 text-nodes are _not_ seperated by a
				326	# whitespace-node (e.g.: <w>Augen<c>,</c></w>)?
				327	# In this case, the falsely increased from-value has to be
				328	# decreased again by 1 when closing the enclosing tag
				329	# (see above code fragment '... not exists $ws{ $from - 1 } ...').
				330	#
				331	# [2]
				332	# Comparing the 2 examples '<w>fu</w> <w>bar</w>' and
				333	# '<w>fu</w><w> </w><w>bar</w>', is ' ' in both cases handled as a
				334	# whitespace-node (XML_READER_TYPE_SIGNIFICANT_WHITESPACE).
				335	#
				336	# The from-index of the 2nd w-tag in the second example refers to
				337	# 'bar', which may not have been the intention
				338	# (even though '<w> </w>' doesn't make a lot of sense).
				339	# TODO: could this be a bug?
				340	#
				341	# Empty tags also cling to the next text-token - e.g. in
				342	# '<w>tok1</w> <w>tok2</w><a><b/></a> <w>tok3</w>' are the from-
				343	# and to-indizes for the tags 'a' and 'b' both 12,
				344	# which is the start-index of the token 'tok3'.
				345
				346	ok($inline->parse(
				347	'bbb',
				348	\'<head type="main"><s>Campagne in Frankreich</s></head><head type="sub"> <s>1792</s></head>'),'Parsed');
				349	is($inline->data->data, 'Campagne in Frankreich 1792');
				350
				351	Test::XML::Loy->new($inline->structures->to_string('aaa', 2))
				352	->attr_is('#s0', 'l', "1")
				353	->attr_is('#s0', 'to', 27)
				354	->text_is('#s0 fs f[name="name"]', 'text')
				355
				356	->attr_is('#s1', 'l', "2")
				357	->attr_is('#s1', 'to', 22)
				358	->text_is('#s1 fs f[name="name"]', 'head')
				359	->text_is('#s1 fs f[name="attr"] fs f[name=type]', 'main')
				360
				361	->attr_is('#s2', 'l', "3")
				362	->attr_is('#s2', 'to', 22)
				363	->text_is('#s2 fs f[name="name"]', 's')
				364
				365	->attr_is('#s3', 'l', "2")
				366	->attr_is('#s3', 'from', 23)
				367	->attr_is('#s3', 'to', 27)
				368	->text_is('#s3 fs f[name="name"]', 'head')
				369	->text_is('#s3 fs f[name="attr"] fs f[name=type]', 'sub')
				370
				371	->attr_is('#s4', 'l', "3")
				372	->attr_is('#s4', 'from', 23)
				373	->attr_is('#s4', 'to', 27)
				374	->text_is('#s4 fs f[name="name"]', 's')
				375	;
				376
				377	ok($inline->parse(
				378	'ccc',
				379	\'<w>tok1</w> <w>tok2</w><a><b/></a> <w>tok3</w>'
				380	), 'Parsed');
				381	is($inline->data->data, 'tok1 tok2 tok3');
				382
				383	Test::XML::Loy->new($inline->structures->to_string('ccc', 2))
				384	->attr_is('#s0', 'l', "1")
				385	->attr_is('#s0', 'to', 14)
				386	->text_is('#s0 fs f[name="name"]', 'text')
				387
				388	->attr_is('#s1', 'l', "2")
				389	->attr_is('#s1', 'to', 4)
				390	->text_is('#s1 fs f[name="name"]', 'w')
				391
				392	->attr_is('#s2', 'l', "2")
				393	->attr_is('#s2', 'from', 5)
				394	->attr_is('#s2', 'to', 9)
				395	->text_is('#s2 fs f[name="name"]', 'w')
				396
				397	->attr_is('#s2', 'l', "2")
				398	->attr_is('#s2', 'from', 5)
				399	->attr_is('#s2', 'to', 9)
				400	->text_is('#s2 fs f[name="name"]', 'w')
				401
				402	->attr_is('#s3', 'l', "2")
				403	->attr_is('#s3', 'from', 10)
				404	->attr_is('#s3', 'to', 10)
				405	->text_is('#s3 fs f[name="name"]', 'a')
				406
				407	->attr_is('#s4', 'l', "3")
				408	->attr_is('#s4', 'from', 10)
				409	->attr_is('#s4', 'to', 10)
				410	->text_is('#s4 fs f[name="name"]', 'b')
				411
				412	->attr_is('#s5', 'l', "2")
				413	->attr_is('#s5', 'from', 10)
				414	->attr_is('#s5', 'to', 14)
				415	->text_is('#s5 fs f[name="name"]', 'w')
				416	;
				417
				418	ok($inline->parse(
				419	'ccc',
				420	\'<w>Augen<c>,</c></w> <w>die</w>'
				421	), 'Parsed');
				422	is($inline->data->data, 'Augen, die');
				423
				424	Test::XML::Loy->new($inline->structures->to_string('ddd', 2))
				425	->attr_is('#s0', 'l', "1")
				426	->attr_is('#s0', 'to', 10)
				427	->text_is('#s0 fs f[name="name"]', 'text')
				428
				429	->attr_is('#s1', 'l', "2")
				430	->attr_is('#s1', 'to', 6)
				431	->text_is('#s1 fs f[name="name"]', 'w')
				432
				433	->attr_is('#s2', 'l', "3")
				434	->attr_is('#s2', 'from', 5)
				435	->attr_is('#s2', 'to', 6)
				436	->text_is('#s2 fs f[name="name"]', 'c')
				437
				438	->attr_is('#s3', 'l', "2")
				439	->attr_is('#s3', 'from', 7)
				440	->attr_is('#s3', 'to', 10)
				441	->text_is('#s3 fs f[name="name"]', 'w')
				442	;
				443	};
Akron	eb12e23	2021-02-25 13:49:50 +0100	[diff] [blame]	444
Akron	e2819a1	2021-10-12 15:52:55 +0200	[diff] [blame]	445
				446	subtest 'Treatment of tokens' => sub {
				447	my $inline = KorAP::XML::TEI::Inline->new(0, {b => 1}, 1);
				448
				449	ok($inline->parse('aaa', \'<a>Der</a> <b>alte</b> <w pos="NN">Baum</w>'), 'Parsed');
				450	is($inline->data->data, 'Der alte Baum');
				451
				452	# Only contains '<a>'
				453	Test::XML::Loy->new($inline->structures->to_string('aaa', 1))
				454	->attr_is('#s1', 'to', 3)
				455	->element_exists_not('#s2')
				456	;
				457
				458	# Only contains 'w'
				459	Test::XML::Loy->new($inline->tokens->to_string('aaa', 1))
				460	->attr_is('#s0', 'from', 9)
				461	->attr_is('#s0', 'to', 13)
				462	->attr_is('#s0 > fs > f > fs > f', 'name', 'pos')
				463	->text_is('#s0 > fs > f > fs > f[name=pos]', 'NN')
				464	->element_exists_not('#s1')
				465	;
				466	};
				467
Akron	eb12e23	2021-02-25 13:49:50 +0100	[diff] [blame]	468	done_testing;