blob: 65d924513f7a23e9d95c55159d419dfd51891049 [file] [log] [blame]
Eliza Margaretha01929182014-02-19 11:48:59 +00001package de.ids_mannheim.korap.index;
2
Nils Diewald68bb1f72014-01-07 14:07:05 +00003import java.util.*;
4import java.io.*;
5
6import static org.junit.Assert.*;
7import org.junit.Test;
8import org.junit.Ignore;
9import org.junit.runner.RunWith;
10import org.junit.runners.JUnit4;
11
Nils Diewaldcde69082014-01-16 15:46:48 +000012import de.ids_mannheim.korap.index.MatchIdentifier;
Nils Diewald345bdc02014-01-21 21:48:57 +000013import de.ids_mannheim.korap.index.PosIdentifier;
Nils Diewaldcde69082014-01-16 15:46:48 +000014
Nils Diewald68bb1f72014-01-07 14:07:05 +000015import de.ids_mannheim.korap.KorapIndex;
16import de.ids_mannheim.korap.KorapQuery;
17import de.ids_mannheim.korap.KorapSearch;
18import de.ids_mannheim.korap.KorapResult;
Nils Diewaldcde69082014-01-16 15:46:48 +000019import de.ids_mannheim.korap.KorapMatch;
Nils Diewald68bb1f72014-01-07 14:07:05 +000020
21import de.ids_mannheim.korap.index.FieldDocument;
22
23@RunWith(JUnit4.class)
24public class TestMatchIdentifier {
25
26 @Test
Nils Diewaldcde69082014-01-16 15:46:48 +000027 public void identifierExample1 () throws IOException {
28 MatchIdentifier id = new MatchIdentifier("match-c1!d1-p4-20");
29 assertEquals(id.getCorpusID(), "c1");
30 assertEquals(id.getDocID(), "d1");
31 assertEquals(id.getStartPos(), 4);
32 assertEquals(id.getEndPos(), 20);
33
34 assertEquals(id.toString(), "match-c1!d1-p4-20");
35 id.addPos(10,14,2);
36 assertEquals(id.toString(), "match-c1!d1-p4-20(2)10-14");
37 id.addPos(11,12,5);
38 assertEquals(id.toString(), "match-c1!d1-p4-20(2)10-14(5)11-12");
39 // Ignore
40 id.addPos(11,12,-8);
41 assertEquals(id.toString(), "match-c1!d1-p4-20(2)10-14(5)11-12");
42 id.addPos(11,-12,8);
43 assertEquals(id.toString(), "match-c1!d1-p4-20(2)10-14(5)11-12");
44 id.addPos(-11,12,8);
45 assertEquals(id.toString(), "match-c1!d1-p4-20(2)10-14(5)11-12");
46
47 id = new MatchIdentifier("matc-c1!d1-p4-20");
48 assertNull(id.toString());
49 id = new MatchIdentifier("match-d1-p4-20");
50 assertNull(id.getCorpusID());
51 assertEquals(id.getDocID(), "d1");
52 id = new MatchIdentifier("match-p4-20");
53 assertNull(id.toString());
54
55 id = new MatchIdentifier("match-c1!d1-p4-20");
56 assertEquals(id.toString(), "match-c1!d1-p4-20");
57
58 id = new MatchIdentifier("match-c1!d1-p4-20(5)7-8");
59 assertEquals(id.toString(), "match-c1!d1-p4-20(5)7-8");
60
61 id = new MatchIdentifier("match-c1!d1-p4-20(5)7-8(-2)9-10");
62 assertEquals(id.toString(), "match-c1!d1-p4-20(5)7-8");
63
64 id = new MatchIdentifier("match-c1!d1-p4-20(5)7-8(-2)9-10(2)3-4(3)-5-6");
65 assertEquals(id.toString(), "match-c1!d1-p4-20(5)7-8(2)3-4");
66
67 id = new MatchIdentifier("match-c1!d1-p4-20(5)7-8(-2)9-10(2)3-4(3)-5-6(4)7-8");
68 assertEquals(id.toString(), "match-c1!d1-p4-20(5)7-8(2)3-4(4)7-8");
69
70 id = new MatchIdentifier("match-c1!d1-p4-20(5)7-8(-2)9-10(2)3-4(3)-5-6(4)7-8(5)9--10");
71 assertEquals(id.toString(), "match-c1!d1-p4-20(5)7-8(2)3-4(4)7-8");
72 };
73
74 @Test
Nils Diewald345bdc02014-01-21 21:48:57 +000075 public void posIdentifierExample1 () throws IOException {
76 PosIdentifier id = new PosIdentifier();
77 id.setCorpusID("c1");
78 id.setDocID("d1");
79 id.setPos(8);
80 assertEquals(id.getCorpusID(), "c1");
81 assertEquals(id.getDocID(), "d1");
82 assertEquals(id.getPos(), 8);
83 assertEquals(id.toString(), "word-c1!d1-p8");
84 };
85
86 @Test
Nils Diewald68bb1f72014-01-07 14:07:05 +000087 public void indexExample1 () throws IOException {
88 KorapIndex ki = new KorapIndex();
Nils Diewaldcde69082014-01-16 15:46:48 +000089 ki.addDoc(createSimpleFieldDoc());
Nils Diewald68bb1f72014-01-07 14:07:05 +000090 ki.commit();
91
Nils Diewaldcde69082014-01-16 15:46:48 +000092 KorapQuery kq = new KorapQuery("tokens");
Nils Diewald68bb1f72014-01-07 14:07:05 +000093 KorapSearch ks = new KorapSearch(kq._(2,kq.seq(kq.seg("s:b")).append(kq._(kq.seg("s:a")))));
94 KorapResult kr = ki.search(ks);
95
96 assertEquals("totalResults", 1, kr.totalResults());
97 assertEquals("StartPos (0)", 7, kr.match(0).startPos);
98 assertEquals("EndPos (0)", 9, kr.match(0).endPos);
99
Nils Diewaldcde69082014-01-16 15:46:48 +0000100 KorapMatch km = kr.match(0);
Nils Diewald68bb1f72014-01-07 14:07:05 +0000101
Nils Diewaldcde69082014-01-16 15:46:48 +0000102 assertEquals("SnippetBrackets (0)", "... bcabca[{2:b{a}}]c", km.snippetBrackets());
103 assertEquals("ID (0)", "match-c1!d1-p7-9(0)8-8(2)7-8", km.getID());
Nils Diewald68bb1f72014-01-07 14:07:05 +0000104 };
Nils Diewaldbfe554b2014-01-09 19:35:05 +0000105
Nils Diewaldbfe554b2014-01-09 19:35:05 +0000106 @Test
107 public void indexExample2 () throws IOException {
Nils Diewaldcde69082014-01-16 15:46:48 +0000108 KorapIndex ki = new KorapIndex();
109 ki.addDoc(createSimpleFieldDoc());
110 ki.commit();
111
112 KorapMatch km = ki.getMatch("match-c1!d1-p7-9(0)8-8(2)7-8");
113
114 assertEquals("StartPos (0)", 7, km.getStartPos());
115 assertEquals("EndPos (0)", 9, km.getEndPos());
116
Nils Diewald22fc3ad2014-01-16 19:28:31 +0000117 assertEquals("SnippetBrackets (0)",
118 "... [{2:b{a}}] ...",
119 km.getSnippetBrackets());
Nils Diewaldcde69082014-01-16 15:46:48 +0000120 assertEquals("ID (0)", "match-c1!d1-p7-9(0)8-8(2)7-8", km.getID());
Nils Diewald22fc3ad2014-01-16 19:28:31 +0000121
122 km = ki.getMatchInfo("match-c1!d1-p7-9(0)8-8(2)7-8",
123 "tokens",
124 "f",
125 "m",
126 false,
127 false);
128
129 assertEquals("SnippetBrackets (1)",
130 "... [{f/m:acht:b}{f/m:neun:a}] ...",
131 km.getSnippetBrackets());
132
133 km = ki.getMatchInfo("match-c1!d1-p7-9(0)8-8(2)7-8",
134 "tokens",
135 "f",
136 "m",
137 false,
138 true);
139
140 assertEquals("SnippetBrackets (2)",
141 "... [{2:{f/m:acht:b}{{f/m:neun:a}}}] ...",
142 km.getSnippetBrackets());
143
144 km = ki.getMatchInfo("match-c1!d1-p7-9(4)8-8(2)7-8",
145 "tokens",
146 "f",
147 "m",
148 false,
149 true);
150
151 assertEquals("SnippetBrackets (3)",
152 "... [{2:{f/m:acht:b}{4:{f/m:neun:a}}}] ...",
153 km.getSnippetBrackets());
154
155 km = ki.getMatchInfo("match-c1!d1-p7-9(4)8-8(2)7-8",
156 "tokens",
157 "f",
158 null,
159 false,
160 true);
161
162 assertEquals("SnippetBrackets (4)",
163 "... [{2:{f/m:acht:{f/y:eight:b}}{4:{f/m:neun:{f/y:nine:a}}}}] ...",
164 km.getSnippetBrackets());
165
166 assertEquals("SnippetHTML (4)",
167 "<span class=\"context-left\">"+
168 "<span class=\"more\">"+
169 "</span>"+
170 "</span>"+
171 "<span class=\"match\">"+
172 "<em class=\"class-2 level-0\">"+
173 "<span title=\"f/m:acht\">"+
174 "<span title=\"f/y:eight\">"+
175 "b"+
176 "</span>"+
177 "</span>"+
178 "<em class=\"class-4 level-1\">"+
179 "<span title=\"f/m:neun\">"+
180 "<span title=\"f/y:nine\">"+
181 "a"+
182 "</span>"+
183 "</span>"+
184 "</em>"+
185 "</em>"+
186 "</span>"+
187 "<span class=\"context-right\">"+
188 "<span class=\"more\">"+
189 "</span>"+
190 "</span>",
191 km.getSnippetHTML());
Nils Diewald345bdc02014-01-21 21:48:57 +0000192 };
Nils Diewald22fc3ad2014-01-16 19:28:31 +0000193
Nils Diewald345bdc02014-01-21 21:48:57 +0000194
195 @Test
196 public void indexExample3 () throws IOException {
197 KorapIndex ki = new KorapIndex();
198 ki.addDoc(createSimpleFieldDoc());
199 ki.commit();
200
201 KorapMatch km = ki.getMatchInfo("match-c1!d1-p7-9(4)8-8(2)7-8",
Nils Diewald22fc3ad2014-01-16 19:28:31 +0000202 "tokens",
203 null,
204 null,
205 false,
206 true);
207
Nils Diewald345bdc02014-01-21 21:48:57 +0000208
209 assertEquals("SnippetHTML (1)",
210 "<span class=\"context-left\">" +
211 "<span class=\"more\">" +
212 "</span>" +
213 "</span>" +
214 "<span class=\"match\">" +
215 "<em class=\"class-2 level-0\">" +
216 "<span title=\"f/m:acht\">" +
217 "<span title=\"f/y:eight\">" +
218 "<span title=\"it/is:8\">" +
219 "<span title=\"x/o:achtens\">" +
220 "b" +
221 "</span>" +
222 "</span>" +
223 "</span>" +
224 "</span>" +
225 "<em class=\"class-4 level-1\">" +
226 "<span title=\"f/m:neun\">" +
227 "<span title=\"f/y:nine\">" +
228 "<span title=\"it/is:9\">" +
229 "<span title=\"x/o:neuntens\">" +
230 "a" +
231 "</span>" +
232 "</span>" +
233 "</span>" +
234 "</span>" +
235 "</em>" +
236 "</em>" +
237 "</span>" +
238 "<span class=\"context-right\">" +
239 "<span class=\"more\">" +
240 "</span>" +
241 "</span>",
242 km.getSnippetHTML());
Nils Diewaldcde69082014-01-16 15:46:48 +0000243 };
244
Nils Diewald345bdc02014-01-21 21:48:57 +0000245 @Test
246 public void indexExample4 () throws IOException {
247 KorapIndex ki = new KorapIndex();
248 ki.addDoc(createSimpleFieldDoc());
249 ki.commit();
250
251 KorapMatch km = ki.getMatchInfo("match-c1!d1-p7-9(4)8-8(2)7-8",
252 "tokens",
253 null,
254 null,
255 false,
256 false);
257
258
259 assertEquals("SnippetHTML (1)",
260 "<span class=\"context-left\">" +
261 "<span class=\"more\">" +
262 "</span>" +
263 "</span>" +
264 "<span class=\"match\">" +
265 "<span title=\"f/m:acht\">" +
266 "<span title=\"f/y:eight\">" +
267 "<span title=\"it/is:8\">" +
268 "<span title=\"x/o:achtens\">" +
269 "b" +
270 "</span>" +
271 "</span>" +
272 "</span>" +
273 "</span>" +
274 "<span title=\"f/m:neun\">" +
275 "<span title=\"f/y:nine\">" +
276 "<span title=\"it/is:9\">" +
277 "<span title=\"x/o:neuntens\">" +
278 "a" +
279 "</span>" +
280 "</span>" +
281 "</span>" +
282 "</span>" +
283 "</span>" +
284 "<span class=\"context-right\">" +
285 "<span class=\"more\">" +
286 "</span>" +
287 "</span>",
288 km.getSnippetHTML());
289 };
290
291 @Test
292 public void indexExample5Spans () throws IOException {
293 KorapIndex ki = new KorapIndex();
294 ki.addDoc(createSimpleFieldDoc());
295 ki.commit();
296
297 KorapMatch km = ki.getMatchInfo("match-c1!d1-p7-9(4)8-8(2)7-8",
298 "tokens",
299 null,
300 null,
301 true,
302 false);
303
304
305 assertEquals("SnippetBrackets (1)",
306 "... [{f/m:acht:{f/y:eight:{it/is:8:{x/o:achtens:b}}}}{f/m:neun:{f/y:nine:{it/is:9:{x/o:neuntens:a}}}}] ...",
307 km.getSnippetBrackets());
308 };
309
310 @Test
311 public void indexExample6Spans () throws IOException {
312 KorapIndex ki = new KorapIndex();
313 ki.addDoc(createSimpleFieldDoc());
314 ki.commit();
315
316 KorapMatch km = ki.getMatchInfo("match-c1!d1-p7-10(4)8-8(2)7-8",
317 "tokens",
318 null,
319 null,
320 true,
321 false);
322
323
324 assertEquals("SnippetBrackets (1)",
325 "... [{x/tag:{f/m:acht:{f/y:eight:{it/is:8:{x/o:achtens:b}}}}{f/m:neun:{f/y:nine:{it/is:9:{x/o:neuntens:a}}}}{f/m:zehn:{f/y:ten:{it/is:10:{x/o:zehntens:c}}}}}]",
326 km.getSnippetBrackets());
327 };
328
329 @Test
330 public void indexExample7Spans () throws IOException {
331 KorapIndex ki = new KorapIndex();
332 ki.addDoc(createSimpleFieldDoc());
333 ki.commit();
334
335 KorapMatch km = ki.getMatchInfo("match-c1!d1-p7-10(4)8-8(2)7-8",
336 "tokens",
337 null,
338 null,
339 true,
340 true);
341
342
343 assertEquals("SnippetBrackets (1)",
344 "... [{x/tag:{2:{f/m:acht:{f/y:eight:{it/is:8:{x/o:achtens:b}}}}{4:{f/m:neun:{f/y:nine:{it/is:9:{x/o:neuntens:a}}}}}}{f/m:zehn:{f/y:ten:{it/is:10:{x/o:zehntens:c}}}}}]",
345 km.getSnippetBrackets());
346
347 assertEquals("SnippetHTML (1)",
348 "<span class=\"context-left\">" +
349 "<span class=\"more\">" +
350 "</span>" +
351 "</span>" +
352 "<span class=\"match\">" +
353 "<span title=\"x/tag\">" +
354 "<em class=\"class-2 level-0\">" +
355 "<span title=\"f/m:acht\">" +
356 "<span title=\"f/y:eight\">" +
357 "<span title=\"it/is:8\">" +
358 "<span title=\"x/o:achtens\">" +
359 "b" +
360 "</span>" +
361 "</span>" +
362 "</span>" +
363 "</span>" +
364 "<em class=\"class-4 level-1\">" +
365 "<span title=\"f/m:neun\">" +
366 "<span title=\"f/y:nine\">" +
367 "<span title=\"it/is:9\">" +
368 "<span title=\"x/o:neuntens\">" +
369 "a" +
370 "</span>" +
371 "</span>" +
372 "</span>" +
373 "</span>" +
374 "</em>" +
375 "</em>" +
376 "<span title=\"f/m:zehn\">" +
377 "<span title=\"f/y:ten\">" +
378 "<span title=\"it/is:10\">" +
379 "<span title=\"x/o:zehntens\">" +
380 "c" +
381 "</span>" +
382 "</span>" +
383 "</span>" +
384 "</span>" +
385 "</span>" +
386 "</span>" +
387 "<span class=\"context-right\">" +
388 "</span>",
389 km.getSnippetHTML());
390 };
391
392 @Test
393 public void indexExample6Relations () throws IOException {
394 KorapIndex ki = new KorapIndex();
395 ki.addDoc(createSimpleFieldDoc());
396 ki.commit();
397
398 KorapMatch km = ki.getMatchInfo("match-c1!d1-p0-5(4)8-8(2)7-8",
399 "tokens",
400 "x",
401 null,
402 true,
403 false);
404
405 assertEquals("SnippetBrackets (1)",
406 "[{x/rel:a>3:{x/o:erstens:a}}{x/o:zweitens:b}{x/o:drittens:c}{#3:{x/o:viertens:a}}{x/o:fünftens:b}] ...",
407 km.getSnippetBrackets());
408
409 assertEquals("SnippetBrackets (1)",
410 "<span class=\"context-left\">" +
411 "</span>" +
412 "<span class=\"match\">" +
413 "<span xlink:title=\"x/rel:a\" " +
414 "xlink:type=\"simple\" " +
415 "xlink:href=\"#word-c1!d1-p3\">" +
416 "<span title=\"x/o:erstens\">" +
417 "a" +
418 "</span>" +
419 "</span>" +
420 "<span title=\"x/o:zweitens\">" +
421 "b" +
422 "</span>" +
423 "<span title=\"x/o:drittens\">" +
424 "c" +
425 "</span>" +
426 "<span xml:id=\"word-c1!d1-p3\">" +
427 "<span title=\"x/o:viertens\">" +
428 "a" +
429 "</span>" +
430 "</span>" +
431 "<span title=\"x/o:fünftens\">" +
432 "b" +
433 "</span>" +
434 "</span>" +
435 "<span class=\"context-right\">" +
436 "<span class=\"more\">" +
437 "</span>" +
438 "</span>",
439 km.getSnippetHTML());
440
Nils Diewald8a1fc012014-02-19 15:23:33 +0000441 km = ki.getMatchInfo("match-c1!d1-p0-5(7)2-3(4)8-8(2)7-8",
Nils Diewald345bdc02014-01-21 21:48:57 +0000442 "tokens",
443 "x",
444 null,
445 true,
446 true);
447
448 assertEquals("SnippetBrackets (1)",
449 "<span class=\"context-left\">" +
450 "</span>" +
451 "<span class=\"match\">" +
452 "<span xlink:title=\"x/rel:a\" " +
453 "xlink:type=\"simple\" " +
454 "xlink:href=\"#word-c1!d1-p3\">" +
455 "<span title=\"x/o:erstens\">" +
456 "a" +
457 "</span>" +
458 "</span>" +
459 "<span title=\"x/o:zweitens\">" +
460 "b" +
461 "</span>" +
462 "<em class=\"class-7 level-0\">" +
463 "<span title=\"x/o:drittens\">" +
464 "c" +
465 "</span>" +
466 "<span xml:id=\"word-c1!d1-p3\">" +
467 "<span title=\"x/o:viertens\">" +
468 "a" +
469 "</span>" +
470 "</span>" +
471 "</em>" +
472 "<span title=\"x/o:fünftens\">" +
473 "b" +
474 "</span>" +
475 "</span>" +
476 "<span class=\"context-right\">" +
477 "<span class=\"more\">" +
478 "</span>" +
479 "</span>",
480 km.getSnippetHTML());
481 };
482
483
Nils Diewalda1118032014-02-13 20:50:48 +0000484 @Test
485 public void indexExample7SentenceExpansion () throws IOException {
486 KorapIndex ki = new KorapIndex();
487 ki.addDoc(createSimpleFieldDoc());
488 ki.commit();
489
490 KorapMatch km = ki.getMatchInfo("match-c1!d1-p3-4",
491 "tokens",
492 null,
493 null,
494 false,
495 false);
496
497 assertEquals("... [{f/m:vier:{f/y:four:{it/is:4:{x/o:viertens:a}}}}] ...",
498 km.getSnippetBrackets());
499
500 km = ki.getMatchInfo("match-c1!d1-p3-4",
501 "tokens",
502 null,
503 null,
504 false,
505 false,
506 true);
507
508 assertEquals("... [{f/m:drei:{f/y:three:{it/is:3:{x/o:drittens:c}}}}{f/m:vier:{f/y:four:{it/is:4:{x/o:viertens:a}}}}{f/m:fuenf:{f/y:five:{it/is:5:{x/o:fünftens:b}}}}] ...",
509 km.getSnippetBrackets());
510 };
Nils Diewald345bdc02014-01-21 21:48:57 +0000511
Nils Diewaldcde69082014-01-16 15:46:48 +0000512 private FieldDocument createSimpleFieldDoc(){
513 FieldDocument fd = new FieldDocument();
514 fd.addString("corpusID", "c1");
515 fd.addString("ID", "d1");
516 fd.addTV("tokens",
517 "abcabcabac",
Nils Diewald345bdc02014-01-21 21:48:57 +0000518 "[(0-1)s:a|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|>:x/rel:a$<i>4|_0#0-1|-:t$<i>10]" +
519 "[(1-2)s:b|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|_1#1-2]" +
Nils Diewalda1118032014-02-13 20:50:48 +0000520 "[(2-3)s:c|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2#2-3|<>:s#2-5$<i>5]" +
Nils Diewald345bdc02014-01-21 21:48:57 +0000521 "[(3-4)s:a|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|<:x/rel:b$<i>1|_3#3-4]" +
522 "[(4-5)s:b|i:b|f/m:fuenf|f/y:five|x/o:fünftens|it/is:5|_4#4-5]" +
523 "[(5-6)s:c|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5#5-6]" +
524 "[(6-7)s:a|i:a|f/m:sieben|f/y:seven|x/o:siebtens|it/is:7|_6#6-7]" +
525 "[(7-8)s:b|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/tag#7-10$<i>10|_7#7-8]" +
526 "[(8-9)s:a|i:a|f/m:neun|f/y:nine|x/o:neuntens|it/is:9|_8#8-9]" +
527 "[(9-10)s:c|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9#9-10]");
Nils Diewaldcde69082014-01-16 15:46:48 +0000528 return fd;
529 };
Nils Diewald2cd1c3d2014-01-08 22:53:08 +0000530};