blob: 14dfcabff85fbc4578328b144c72bf1ff2b4cbcd [file] [log] [blame]
Akroneb12e232021-02-25 13:49:50 +01001use strict;
2use warnings;
3
4use FindBin;
5BEGIN {
6 unshift @INC, "$FindBin::Bin/../lib";
7};
8
9use Test::More;
10use Test::XML::Loy;
11use_ok('KorAP::XML::TEI::Inline');
12
13
14my $inline = KorAP::XML::TEI::Inline->new;
15
16ok($inline->parse('aaa', \'Der <b>alte</b> Mann'), 'Parsed');
17
18is($inline->data->data, 'Der alte Mann');
19
20Test::XML::Loy->new($inline->structures->to_string('aaa', 2))
21 ->attr_is('#s0', 'l', "1")
22 ->attr_is('#s0', 'to', 13)
23 ->text_is('#s0 fs f[name=name]', 'text')
24 ->attr_is('#s1', 'l', "2")
25 ->attr_is('#s1', 'from', 4)
26 ->attr_is('#s1', 'to', 8)
27 ->text_is('#s1 fs f[name=name]', 'b')
28 ;
29
30Test::XML::Loy->new($inline->tokens->to_string('aaa', 0))
31 ->element_exists_not('fs')
32 ;
33
34
35ok($inline->parse('aaa', \'<w>Die</w> <w>alte</w> <w>Frau</w>'), 'Parsed');
36
37is($inline->data->data, 'Die alte Frau');
38
39Test::XML::Loy->new($inline->structures->to_string('aaa', 2))
40 ->attr_is('#s0', 'l', "1")
41 ->attr_is('#s0', 'to', 13)
42 ->text_is('#s0 fs f[name=name]', 'text')
43
44 ->attr_is('#s1', 'l', "2")
45 ->attr_is('#s1', 'to', 3)
46 ->text_is('#s1 fs f[name=name]', 'w')
47
48 ->attr_is('#s2', 'l', "2")
49 ->attr_is('#s2', 'from', 4)
50 ->attr_is('#s2', 'to', 8)
51 ->text_is('#s2 fs f[name=name]', 'w')
52
53 ->attr_is('#s3', 'l', "2")
54 ->attr_is('#s3', 'from', 9)
55 ->attr_is('#s3', 'to', 13)
56 ->text_is('#s3 fs f[name=name]', 'w')
57 ;
58
59Test::XML::Loy->new($inline->tokens->to_string('aaa', 0))
60 ->attr_is('#s0', 'l', "2")
61 ->attr_is('#s0', 'to', 3)
62
63 ->attr_is('#s1', 'l', "2")
64 ->attr_is('#s1', 'from', 4)
65 ->attr_is('#s1', 'to', 8)
66
67 ->attr_is('#s2', 'l', "2")
68 ->attr_is('#s2', 'from', 9)
69 ->attr_is('#s2', 'to', 13)
70 ;
71
72ok($inline->parse('aaa', \'<w lemma="die" type="det">Die</w> <w
73 lemma="alt" type="ADJ">alte</w> <w lemma="frau" type="NN">Frau</w>'), 'Parsed');
74
75is($inline->data->data, 'Die alte Frau');
76
77Test::XML::Loy->new($inline->tokens->to_string('aaa', 1))
78 ->attr_is('#s0', 'l', "2")
79 ->attr_is('#s0', 'to', 3)
80 ->text_is('#s0 fs f[name="lemma"]', 'die')
81 ->text_is('#s0 fs f[name="type"]', 'det')
82
83 ->attr_is('#s1', 'l', "2")
84 ->attr_is('#s1', 'from', 4)
85 ->attr_is('#s1', 'to', 8)
86 ->text_is('#s1 fs f[name="lemma"]', 'alt')
87 ->text_is('#s1 fs f[name="type"]', 'ADJ')
88
89 ->attr_is('#s2', 'l', "2")
90 ->attr_is('#s2', 'from', 9)
91 ->attr_is('#s2', 'to', 13)
92 ->text_is('#s2 fs f[name="lemma"]', 'frau')
93 ->text_is('#s2 fs f[name="type"]', 'NN')
94 ;
95
Akron6b1f26b2024-09-19 11:35:32 +020096subtest 'Support dependency parsing' => sub {
97 $inline = KorAP::XML::TEI::Inline->new(0,{},0,1);
98 ok($inline->parse('Fake News Media',
99 \'<s><w n="1" lemma="Fake" pos="N" head="2" deprel="name" msd="SUBCAT_Prop|CASECHANGE_Up|OTHER_UNK">Fake</w> <w n="2" lemma="News" pos="N" head="3" deprel="name" msd="SUBCAT_Prop|CASECHANGE_Up|OTHER_UNK">News</w> <w n="3" lemma="media" pos="N" head="0" deprel="ROOT" msd="NUM_Sg|CASE_Nom|CASECHANGE_Up">Media</w></s> '
100 ), 'Parsed');
101
102 is($inline->data->data, 'Fake News Media ');
103
104 Test::XML::Loy->new($inline->tokens->to_string('aaa', 1))
105 ->attr_is('#s0', 'l', "3")
106 ->attr_is('#s0', 'to', 4)
107 ->text_is('#s0 fs f[name="lemma"]', 'Fake')
108 ->text_is('#s0 fs f[name="pos"]', 'N')
109 ->text_is('#s0 fs f[name="n"]','1')
110
111 ->attr_is('#s1', 'l', "3")
112 ->attr_is('#s1', 'from', 5)
113 ->attr_is('#s1', 'to', 9)
114 ->text_is('#s1 fs f[name="lemma"]', 'News')
115 ->text_is('#s1 fs f[name="pos"]', 'N')
116 ->text_is('#s1 fs f[name="n"]','2')
117
118 ->attr_is('#s2', 'l', "3")
119 ->attr_is('#s2', 'from', 10)
120 ->attr_is('#s2', 'to', 15)
121 ->text_is('#s2 fs f[name="lemma"]', 'media')
122 ->text_is('#s2 fs f[name="pos"]', 'N')
123 ->text_is('#s2 fs f[name="n"]','3')
124 ;
125
126 Test::XML::Loy->new($inline->tokens->to_string('aaa', 4))
127 ->attr_is('#s0', 'l', "3")
128 ->attr_is('#s0', 'to', 4)
129 ->text_is('#s0 fs f[name="lemma"]', 'Fake')
130 ->text_is('#s0 fs f[name="pos"]', 'N')
131 ->element_exists_not('#s0 fs f[name="n"]')
132
133 ->attr_is('#s1', 'l', "3")
134 ->attr_is('#s1', 'from', 5)
135 ->attr_is('#s1', 'to', 9)
136 ->text_is('#s1 fs f[name="lemma"]', 'News')
137 ->text_is('#s1 fs f[name="pos"]', 'N')
138
139 ->attr_is('#s2', 'l', "3")
140 ->attr_is('#s2', 'from', 10)
141 ->attr_is('#s2', 'to', 15)
142 ->text_is('#s2 fs f[name="lemma"]', 'media')
143 ->text_is('#s2 fs f[name="pos"]', 'N')
144 ;
145
146 Test::XML::Loy->new($inline->dependencies->to_string('aaa', 3))
147 ->attr_is('#s1_n1', 'l', "3")
148 ->element_exists('#s1_n1[from="0"]')
149 ->attr_is('#s1_n1', 'to', 4)
150 ->attr_is('#s1_n1 rel', 'label', 'name')
151 ->attr_is('#s1_n1 rel span', 'from', 5)
152 ->attr_is('#s1_n1 rel span', 'to', 9)
153 ->element_exists_not('#s1_n1 fs')
154
155 ->attr_is('#s1_n2', 'l', "3")
156 ->attr_is('#s1_n2', 'from', 5)
157 ->attr_is('#s1_n2', 'to', 9)
158 ->attr_is('#s1_n2 rel', 'label', 'name')
159 ->attr_is('#s1_n2 rel span', 'from', 10)
160 ->attr_is('#s1_n2 rel span', 'to', 15)
161
162 ->attr_is('#s1_n3', 'l', "3")
163 ->attr_is('#s1_n3', 'from', 10)
164 ->attr_is('#s1_n3', 'to', 15)
165 ->attr_is('#s1_n3 rel', 'label', 'ROOT')
166 ->element_exists('#s1_n3 rel span[from="0"]')
167 ->attr_is('#s1_n3 rel span', 'to', 15)
168 ;
169
170 $inline = KorAP::XML::TEI::Inline->new(0,{},0,1);
171 ok($inline->parse('Fake News Media',
172 \('<p xml:lang="x-|fin:2|"><s xml:lang="fin">'.
173 '<w deprel="nn" head="2" lemma="lJgkPOGUBSFSRQlx" msd="NUM_Sg|CASE_Nom|CASECHANGE_Up" n="1" pos="N">lJgkPOGUBSFSRQlx</w> '.
174 '<w deprel="nsubj" head="3" lemma="rYuqciR" msd="SUBCAT_Prop|NUM_Sg|CASE_Nom|CASECHANGE_Up|OTHER_UNK" n="2" pos="N">rYuqciR</w> '.
175 '<w deprel="ROOT" head="0" lemma="RcidTBqv" msd="PRS_Sg3|VOICE_Act|TENSE_Prt|MOOD_Ind" n="3" pos="V">RcidTBqv</w> '.
176 '<w deprel="poss" head="5" lemma="cHIf" msd="SUBCAT_Acro|NUM_Sg|CASE_Nom|CASECHANGE_Up" n="4" pos="N">cHIf</w> '.
177 '<w deprel="nommod" head="3" lemma="reuvyWZtUhN" msd="NUM_Sg|CASE_Ela" n="5" pos="N">reuvyWZtUhN</w> '.
178 '<w deprel="nsubj" head="7" lemma="KsaXYaFo" msd="NUM_Sg|CASE_Gen" n="6" pos="N">KsaXYaFo</w> '.
179 '<w deprel="iccomp" head="3" lemma="qJhgSDNOYpWg" msd="NUM_Sg|CASE_Ill|VOICE_Act|INF_Inf3" n="7" pos="V">qJhgSDNOYpWg</w> '.
180 '<w deprel="name" head="9" lemma="xtRyGN" msd="SUBCAT_Prop|CASECHANGE_Up|OTHER_UNK" n="8" pos="N">xtRyGN</w> '.
181 '<w deprel="poss" head="10" lemma="XCVuQwU" msd="SUBCAT_Prop|NUM_Sg|CASE_Gen|CASECHANGE_Up|OTHER_UNK" n="9" pos="N">XCVuQwU</w> '.
182 '<w deprel="poss" head="11" lemma="hYwEsYDUbYHmJ" msd="NUM_Sg|CASE_Gen|CASECHANGE_Up|OTHER_UNK" n="10" pos="N">hYwEsYDUbYHmJ</w> '.
183 '<w deprel="dobj" head="7" lemma="yYXOYOqX" msd="NUM_Sg|CASE_Gen" n="11" pos="N">yYXOYOqX</w> '.
184 '<w deprel="nommod" head="7" lemma="LkrLYiYgRSC" msd="NUM_Sg|CASE_Ade" n="12" pos="N">LkrLYiYgRSC</w> '.
185 '<w deprel="num" head="12" lemma="erRenLjillGtDCaRLIx" msd="_" n="13" pos="Num">erRenLjillGtDCaRLIx</w> '.
186 '<w deprel="punct" head="3" lemma="c" msd="_" n="14" pos="Punct">c</w> '.
187 '</s>'."\n".
188 '<s xml:lang="fin">'.
189 '<w deprel="nommod" head="3" lemma="LSymCdojKTj" msd="SUBCAT_Prop|NUM_Sg|CASE_Ine|CASECHANGE_Up|OTHER_UNK" n="1" pos="N">LSymCdojKTj</w> '.
190 '<w deprel="auxpass" head="3" lemma="vQ" msd="PRS_Sg3|VOICE_Act|TENSE_Prs|MOOD_Ind" n="2" pos="V">vQ</w> '.
191 '<w deprel="ROOT" head="0" lemma="nHfBTtne" msd="NUM_Sg|CASE_Nom|VOICE_Pass|PCP_PrfPrc|CMP_Pos" n="3" pos="V">nHfBTtne</w> '.
192 '<w deprel="preconj" head="6" lemma="fmcz" msd="SUBCAT_CC" n="4" pos="C">fmcz</w> '.
193 '<w deprel="poss" head="6" lemma="lHlPTQv" msd="SUBCAT_Prop|NUM_Sg|CASE_Gen|CASECHANGE_Up|OTHER_UNK" n="5" pos="N">lHlPTQv</w> '.
194 '<w deprel="dobj" head="3" lemma="IXxgORnMc" msd="NUM_Pl|CASE_Par|OTHER_UNK" n="6" pos="N">IXxgORnMc</w> '.
195 '<w deprel="cc" head="6" lemma="QdjQ" msd="SUBCAT_CC" n="7" pos="C">QdjQ</w> '.
196 '<w deprel="conj" head="6" lemma="luYMmwBGSUbXCMxqFzeZv" msd="NUM_Pl|CASE_Par|OTHER_UNK" n="8" pos="N">luYMmwBGSUbXCMxqFzeZv</w> '.
197 '<w deprel="punct" head="3" lemma="E" msd="_" n="9" pos="Punct">E</w>'.
198 '</s>'.
199 '</p>')
200 ), 'Parsed');
201
202 is($inline->data->data, 'lJgkPOGUBSFSRQlx rYuqciR RcidTBqv cHIf reuvyWZtUhN KsaXYaFo qJhgSDNOYpWg xtRyGN XCVuQwU hYwEsYDUbYHmJ yYXOYOqX LkrLYiYgRSC erRenLjillGtDCaRLIx c LSymCdojKTj vQ nHfBTtne fmcz lHlPTQv IXxgORnMc QdjQ luYMmwBGSUbXCMxqFzeZv E');
203
204 Test::XML::Loy->new($inline->dependencies->to_string('aaa', 3))
205 ->attr_is('#s1_n3', 'l', "4")
206 ->attr_is('#s1_n3', 'from', 25)
207 ->attr_is('#s1_n3', 'to', 33)
208 ->attr_is('#s1_n3 rel', 'label', 'ROOT')
209 ->element_exists('#s1_n3 rel span[from=0]')
210 ->attr_is('#s1_n3 rel span', 'to', 144)
211 ->element_exists_not('#s1_n3 fs')
212
213 ->attr_is('#s1_n14', 'l', "4")
214 ->attr_is('#s1_n14', 'from', 143)
215 ->attr_is('#s1_n14', 'to', 144)
216 ->attr_is('#s1_n14 rel', 'label', 'punct')
217 ->attr_is('#s1_n14 rel span', 'from', 25)
218 ->attr_is('#s1_n14 rel span', 'to', 33)
219
220 ->attr_is('#s2_n1', 'l', "4")
221 ->attr_is('#s2_n1', 'from', 146)
222 ->attr_is('#s2_n1', 'to', 157)
223 ->attr_is('#s2_n1 rel', 'label', 'nommod')
224 ->attr_is('#s2_n1 rel span', 'from', 161)
225 ->attr_is('#s2_n1 rel span', 'to', 169)
226
227 ->attr_is('#s2_n9', 'l', "4")
228 ->attr_is('#s2_n9', 'from', 220)
229 ->attr_is('#s2_n9', 'to', 221)
230 ->attr_is('#s2_n9 rel', 'label', 'punct')
231 ->attr_is('#s2_n9 rel span', 'from', 161)
232 ->attr_is('#s2_n9 rel span', 'to', 169)
233
234 ->attr_is('#s2_n3', 'l', "4")
235 ->attr_is('#s2_n3', 'from', 161)
236 ->attr_is('#s2_n3', 'to', 169)
237 ->attr_is('#s2_n3 rel', 'label', 'ROOT')
238 ->attr_is('#s2_n3 rel span', 'from', 146)
239 ->attr_is('#s2_n3 rel span', 'to', 221)
240 ;
241
242 Test::XML::Loy->new($inline->tokens->to_string('aaa', 1))
243 ->attr_is('#s2', 'l', "4")
244 ->attr_is('#s2', 'from', 25)
245 ->attr_is('#s2', 'to', 33)
246 ->text_is('#s2 fs f[name="lemma"]', 'RcidTBqv')
247 ->text_is('#s2 fs f[name="pos"]', 'V')
248 ->text_is('#s2 fs f[name="msd"]', 'PRS_Sg3|VOICE_Act|TENSE_Prt|MOOD_Ind')
249
250 ->attr_is('#s22', 'l', "4")
251 ->attr_is('#s22', 'from', 220)
252 ->attr_is('#s22', 'to', 221)
253 ->text_is('#s22 fs f[name="lemma"]', 'E')
254 ->text_is('#s22 fs f[name="pos"]', 'Punct')
255 ->text_is('#s22 fs f[name="msd"]', '_')
256 ;
Akron93dbc2c2024-09-18 12:16:25 +0200257};
Akron6b1f26b2024-09-19 11:35:32 +0200258
Akron93dbc2c2024-09-18 12:16:25 +0200259subtest 'Parse msd from inline' => sub {
260 ok($inline->parse('aaa', \'<w lemma="die" pos="det" msd="SUBCAT_Prop|CASECHANGE_Up|OTHER_UNK">Die</w> <w
261 lemma="alt" pos="ADJ" msd="SUBCAT_Prop|CASECHANGE_Up|OTHER_UNK">alte</w> <w lemma="frau" pos="NN" msd="NUM_Sg|CASE_Nom|CASECHANGE_Up">Frau</w>'), 'Parsed');
262
263 is($inline->data->data, 'Die alte Frau');
264
265 Test::XML::Loy->new($inline->tokens->to_string('aaa', 1))
266 ->attr_is('#s0', 'l', "2")
267 ->attr_is('#s0', 'to', 3)
268 ->text_is('#s0 fs f[name="lemma"]', 'die')
269 ->text_is('#s0 fs f[name="pos"]', 'det')
270 ->text_is('#s2 fs f[name="msd"]', 'NUM_Sg|CASE_Nom|CASECHANGE_Up')
271
272 ->attr_is('#s1', 'l', "2")
273 ->attr_is('#s1', 'from', 4)
274 ->attr_is('#s1', 'to', 8)
275 ->text_is('#s1 fs f[name="lemma"]', 'alt')
276 ->text_is('#s1 fs f[name="pos"]', 'ADJ')
277 ->text_is('#s2 fs f[name="msd"]', 'NUM_Sg|CASE_Nom|CASECHANGE_Up')
278
279 ->attr_is('#s2', 'l', "2")
280 ->attr_is('#s2', 'from', 9)
281 ->attr_is('#s2', 'to', 13)
282 ->text_is('#s2 fs f[name="lemma"]', 'frau')
283 ->text_is('#s2 fs f[name="pos"]', 'NN')
284 ->text_is('#s2 fs f[name="msd"]', 'NUM_Sg|CASE_Nom|CASECHANGE_Up')
285 ;
Akron6b1f26b2024-09-19 11:35:32 +0200286};
287
Akron56b8dbd2021-02-26 11:23:48 +0100288subtest 'Examples from documentation' => sub {
289 plan skip_all => 'Expected behaviour not finalized';
290
291 # From the documentation:
292 #
293 # Example:
294 # '... <head type="main"><s>Campagne in Frankreich</s></head><head type="sub"> <s>1792</s> ...'
295
296 # Two text-nodes should normally be separated by a blank.
297 # In the above example, that would be the 2 text-nodes
298 # 'Campagne in Frankreich' and '1792', which are separated
299 # by the whitespace-node ' ' (see [2]).
300 #
301 # The text-node 'Campagne in Frankreich' leads to the setting
302 # of '$add_one' to 1, so that when opening the 2nd 'head'-tag,
303 # it's from-index gets set to the correct start-index of '1792'
304 # (and not to the start-index of the whitespace-node ' ').
305 #
306 # The assumption here is, that in most cases there _is_ a
307 # whitespace node between 2 text-nodes. The below code fragment
308 # enables a way, to check, if this really _was_ the case for
309 # the last 2 'non-tag'-nodes, when closing a tag:
310 #
311 # When a whitespace-node is read, its from-index is stored
312 # as a hash-key (in %ws), to state that it belongs to a ws-node.
313 # So when closing a tag, it can be checked, if the previous
314 # 'non-tag'-node (text or whitespace), which is the one before
315 # the last read 'non-tag'-node, was a actually _not_ a ws-node,
316 # but instead a text-node. In that case, the from-value of
317 # the last read 'non-tag'-node has to be corrected (see [1]),
318 #
319 # For whitespace-nodes $add_one is set to 0, so when opening
320 # the next tag (in the above example the 2nd 's'-tag), no
321 # additional 1 is added (because this was already done by the
322 # whitespace-node itself when incrementing the variable $pos).
323 #
324 # [1]
325 # Now, what happens, when 2 text-nodes are _not_ seperated by a
326 # whitespace-node (e.g.: <w>Augen<c>,</c></w>)?
327 # In this case, the falsely increased from-value has to be
328 # decreased again by 1 when closing the enclosing tag
329 # (see above code fragment '... not exists $ws{ $from - 1 } ...').
330 #
331 # [2]
332 # Comparing the 2 examples '<w>fu</w> <w>bar</w>' and
333 # '<w>fu</w><w> </w><w>bar</w>', is ' ' in both cases handled as a
334 # whitespace-node (XML_READER_TYPE_SIGNIFICANT_WHITESPACE).
335 #
336 # The from-index of the 2nd w-tag in the second example refers to
337 # 'bar', which may not have been the intention
338 # (even though '<w> </w>' doesn't make a lot of sense).
339 # TODO: could this be a bug?
340 #
341 # Empty tags also cling to the next text-token - e.g. in
342 # '<w>tok1</w> <w>tok2</w><a><b/></a> <w>tok3</w>' are the from-
343 # and to-indizes for the tags 'a' and 'b' both 12,
344 # which is the start-index of the token 'tok3'.
345
346 ok($inline->parse(
347 'bbb',
348 \'<head type="main"><s>Campagne in Frankreich</s></head><head type="sub"> <s>1792</s></head>'),'Parsed');
349 is($inline->data->data, 'Campagne in Frankreich 1792');
350
351 Test::XML::Loy->new($inline->structures->to_string('aaa', 2))
352 ->attr_is('#s0', 'l', "1")
353 ->attr_is('#s0', 'to', 27)
354 ->text_is('#s0 fs f[name="name"]', 'text')
355
356 ->attr_is('#s1', 'l', "2")
357 ->attr_is('#s1', 'to', 22)
358 ->text_is('#s1 fs f[name="name"]', 'head')
359 ->text_is('#s1 fs f[name="attr"] fs f[name=type]', 'main')
360
361 ->attr_is('#s2', 'l', "3")
362 ->attr_is('#s2', 'to', 22)
363 ->text_is('#s2 fs f[name="name"]', 's')
364
365 ->attr_is('#s3', 'l', "2")
366 ->attr_is('#s3', 'from', 23)
367 ->attr_is('#s3', 'to', 27)
368 ->text_is('#s3 fs f[name="name"]', 'head')
369 ->text_is('#s3 fs f[name="attr"] fs f[name=type]', 'sub')
370
371 ->attr_is('#s4', 'l', "3")
372 ->attr_is('#s4', 'from', 23)
373 ->attr_is('#s4', 'to', 27)
374 ->text_is('#s4 fs f[name="name"]', 's')
375 ;
376
377 ok($inline->parse(
378 'ccc',
379 \'<w>tok1</w> <w>tok2</w><a><b/></a> <w>tok3</w>'
380 ), 'Parsed');
381 is($inline->data->data, 'tok1 tok2 tok3');
382
383 Test::XML::Loy->new($inline->structures->to_string('ccc', 2))
384 ->attr_is('#s0', 'l', "1")
385 ->attr_is('#s0', 'to', 14)
386 ->text_is('#s0 fs f[name="name"]', 'text')
387
388 ->attr_is('#s1', 'l', "2")
389 ->attr_is('#s1', 'to', 4)
390 ->text_is('#s1 fs f[name="name"]', 'w')
391
392 ->attr_is('#s2', 'l', "2")
393 ->attr_is('#s2', 'from', 5)
394 ->attr_is('#s2', 'to', 9)
395 ->text_is('#s2 fs f[name="name"]', 'w')
396
397 ->attr_is('#s2', 'l', "2")
398 ->attr_is('#s2', 'from', 5)
399 ->attr_is('#s2', 'to', 9)
400 ->text_is('#s2 fs f[name="name"]', 'w')
401
402 ->attr_is('#s3', 'l', "2")
403 ->attr_is('#s3', 'from', 10)
404 ->attr_is('#s3', 'to', 10)
405 ->text_is('#s3 fs f[name="name"]', 'a')
406
407 ->attr_is('#s4', 'l', "3")
408 ->attr_is('#s4', 'from', 10)
409 ->attr_is('#s4', 'to', 10)
410 ->text_is('#s4 fs f[name="name"]', 'b')
411
412 ->attr_is('#s5', 'l', "2")
413 ->attr_is('#s5', 'from', 10)
414 ->attr_is('#s5', 'to', 14)
415 ->text_is('#s5 fs f[name="name"]', 'w')
416 ;
417
418 ok($inline->parse(
419 'ccc',
420 \'<w>Augen<c>,</c></w> <w>die</w>'
421 ), 'Parsed');
422 is($inline->data->data, 'Augen, die');
423
424 Test::XML::Loy->new($inline->structures->to_string('ddd', 2))
425 ->attr_is('#s0', 'l', "1")
426 ->attr_is('#s0', 'to', 10)
427 ->text_is('#s0 fs f[name="name"]', 'text')
428
429 ->attr_is('#s1', 'l', "2")
430 ->attr_is('#s1', 'to', 6)
431 ->text_is('#s1 fs f[name="name"]', 'w')
432
433 ->attr_is('#s2', 'l', "3")
434 ->attr_is('#s2', 'from', 5)
435 ->attr_is('#s2', 'to', 6)
436 ->text_is('#s2 fs f[name="name"]', 'c')
437
438 ->attr_is('#s3', 'l', "2")
439 ->attr_is('#s3', 'from', 7)
440 ->attr_is('#s3', 'to', 10)
441 ->text_is('#s3 fs f[name="name"]', 'w')
442 ;
443};
Akroneb12e232021-02-25 13:49:50 +0100444
Akrone2819a12021-10-12 15:52:55 +0200445
446subtest 'Treatment of tokens' => sub {
447 my $inline = KorAP::XML::TEI::Inline->new(0, {b => 1}, 1);
448
449 ok($inline->parse('aaa', \'<a>Der</a> <b>alte</b> <w pos="NN">Baum</w>'), 'Parsed');
450 is($inline->data->data, 'Der alte Baum');
451
452 # Only contains '<a>'
453 Test::XML::Loy->new($inline->structures->to_string('aaa', 1))
454 ->attr_is('#s1', 'to', 3)
455 ->element_exists_not('#s2')
456 ;
457
458 # Only contains 'w'
459 Test::XML::Loy->new($inline->tokens->to_string('aaa', 1))
460 ->attr_is('#s0', 'from', 9)
461 ->attr_is('#s0', 'to', 13)
462 ->attr_is('#s0 > fs > f > fs > f', 'name', 'pos')
463 ->text_is('#s0 > fs > f > fs > f[name=pos]', 'NN')
464 ->element_exists_not('#s1')
465 ;
466};
467
Akroneb12e232021-02-25 13:49:50 +0100468done_testing;