| Eliza Margaretha | 0192918 | 2014-02-19 11:48:59 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.index; |
| 2 | |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 3 | import static org.junit.Assert.assertEquals; |
| 4 | import static org.junit.Assert.assertNull; |
| 5 | import static org.junit.Assert.assertTrue; |
| Nils Diewald | 68bb1f7 | 2014-01-07 14:07:05 +0000 | [diff] [blame] | 6 | |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 7 | import java.io.IOException; |
| 8 | import java.util.ArrayList; |
| 9 | |
| Nils Diewald | 68bb1f7 | 2014-01-07 14:07:05 +0000 | [diff] [blame] | 10 | import org.junit.Test; |
| Nils Diewald | 68bb1f7 | 2014-01-07 14:07:05 +0000 | [diff] [blame] | 11 | import org.junit.runner.RunWith; |
| 12 | import org.junit.runners.JUnit4; |
| 13 | |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 14 | import com.fasterxml.jackson.databind.JsonNode; |
| 15 | import com.fasterxml.jackson.databind.ObjectMapper; |
| 16 | |
| 17 | import de.ids_mannheim.korap.Krill; |
| 18 | import de.ids_mannheim.korap.KrillIndex; |
| 19 | import de.ids_mannheim.korap.query.QueryBuilder; |
| 20 | import de.ids_mannheim.korap.response.Match; |
| 21 | import de.ids_mannheim.korap.response.Result; |
| Nils Diewald | ff0f874 | 2015-02-26 20:42:45 +0000 | [diff] [blame] | 22 | import de.ids_mannheim.korap.response.match.MatchIdentifier; |
| 23 | import de.ids_mannheim.korap.response.match.PosIdentifier; |
| Nils Diewald | 6e9eb4e | 2014-06-17 19:28:01 +0000 | [diff] [blame] | 24 | import de.ids_mannheim.korap.util.QueryException; |
| 25 | |
| Nils Diewald | 68bb1f7 | 2014-01-07 14:07:05 +0000 | [diff] [blame] | 26 | @RunWith(JUnit4.class) |
| 27 | public class TestMatchIdentifier { |
| 28 | |
| Akron | 7d45e6b | 2015-06-26 17:23:42 +0200 | [diff] [blame] | 29 | ObjectMapper mapper = new ObjectMapper(); |
| 30 | |
| Akron | 13db615 | 2016-02-19 14:08:38 +0100 | [diff] [blame] | 31 | |
| Nils Diewald | 68bb1f7 | 2014-01-07 14:07:05 +0000 | [diff] [blame] | 32 | @Test |
| Nils Diewald | 6e9eb4e | 2014-06-17 19:28:01 +0000 | [diff] [blame] | 33 | public void identifierExample1 () throws IOException, QueryException { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 34 | MatchIdentifier id = new MatchIdentifier("match-c1!d1-p4-20"); |
| 35 | assertEquals(id.getCorpusID(), "c1"); |
| 36 | assertEquals(id.getDocID(), "d1"); |
| 37 | assertEquals(id.getStartPos(), 4); |
| 38 | assertEquals(id.getEndPos(), 20); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 39 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 40 | assertEquals(id.toString(), "match-c1!d1-p4-20"); |
| 41 | id.addPos(10, 14, 2); |
| 42 | assertEquals(id.toString(), "match-c1!d1-p4-20(2)10-14"); |
| 43 | id.addPos(11, 12, 5); |
| 44 | assertEquals(id.toString(), "match-c1!d1-p4-20(2)10-14(5)11-12"); |
| 45 | // Ignore |
| 46 | id.addPos(11, 12, -8); |
| 47 | assertEquals(id.toString(), "match-c1!d1-p4-20(2)10-14(5)11-12"); |
| 48 | id.addPos(11, -12, 8); |
| 49 | assertEquals(id.toString(), "match-c1!d1-p4-20(2)10-14(5)11-12"); |
| 50 | id.addPos(-11, 12, 8); |
| 51 | assertEquals(id.toString(), "match-c1!d1-p4-20(2)10-14(5)11-12"); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 52 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 53 | id = new MatchIdentifier("matc-c1!d1-p4-20"); |
| 54 | assertNull(id.toString()); |
| 55 | id = new MatchIdentifier("match-d1-p4-20"); |
| 56 | assertNull(id.getCorpusID()); |
| 57 | assertEquals(id.getDocID(), "d1"); |
| 58 | id = new MatchIdentifier("match-p4-20"); |
| 59 | assertNull(id.toString()); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 60 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 61 | id = new MatchIdentifier("match-c1!d1-p4-20"); |
| 62 | assertEquals(id.toString(), "match-c1!d1-p4-20"); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 63 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 64 | id = new MatchIdentifier("match-c1!d1-p4-20(5)7-8"); |
| 65 | assertEquals(id.toString(), "match-c1!d1-p4-20(5)7-8"); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 66 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 67 | id = new MatchIdentifier("match-c1!d1-p4-20(5)7-8(-2)9-10"); |
| 68 | assertEquals(id.toString(), "match-c1!d1-p4-20(5)7-8"); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 69 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 70 | id = new MatchIdentifier("match-c1!d1-p4-20(5)7-8(-2)9-10(2)3-4(3)-5-6"); |
| 71 | assertEquals(id.toString(), "match-c1!d1-p4-20(5)7-8(2)3-4"); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 72 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 73 | id = new MatchIdentifier( |
| 74 | "match-c1!d1-p4-20(5)7-8(-2)9-10(2)3-4(3)-5-6(4)7-8"); |
| 75 | assertEquals(id.toString(), "match-c1!d1-p4-20(5)7-8(2)3-4(4)7-8"); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 76 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 77 | id = new MatchIdentifier( |
| 78 | "match-c1!d1-p4-20(5)7-8(-2)9-10(2)3-4(3)-5-6(4)7-8(5)9--10"); |
| Akron | 1f61957 | 2015-07-08 17:33:47 +0200 | [diff] [blame] | 79 | assertEquals(4, id.getStartPos()); |
| 80 | assertEquals(20, id.getEndPos()); |
| 81 | assertEquals("c1", id.getCorpusID()); |
| 82 | assertEquals("d1", id.getDocID()); |
| 83 | assertEquals(null, id.getTextSigle()); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 84 | assertEquals(id.toString(), "match-c1!d1-p4-20(5)7-8(2)3-4(4)7-8"); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 85 | |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 86 | id = new MatchIdentifier("match-GOE!GOE_AGF.02286-p2105-2106"); |
| Akron | 1f61957 | 2015-07-08 17:33:47 +0200 | [diff] [blame] | 87 | assertEquals(2105, id.getStartPos()); |
| 88 | assertEquals(2106, id.getEndPos()); |
| 89 | assertEquals(null, id.getCorpusID()); |
| 90 | assertEquals(null, id.getDocID()); |
| 91 | assertEquals("GOE_AGF.02286", id.getTextSigle()); |
| 92 | assertEquals("match-GOE_AGF.02286-p2105-2106", id.toString()); |
| Akron | fc2625e | 2016-07-27 01:52:28 +0200 | [diff] [blame] | 93 | |
| 94 | id = new MatchIdentifier("match-corpus-1/doc-1/text-1/p2105-2106"); |
| 95 | assertEquals("match-corpus-1/doc-1/text-1-p2105-2106", id.toString()); |
| 96 | assertEquals("corpus-1/doc-1/text-1", id.getTextSigle()); |
| Akron | 1f61957 | 2015-07-08 17:33:47 +0200 | [diff] [blame] | 97 | }; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 98 | |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 99 | |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 100 | @Test |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 101 | public void posIdentifierExample1 () throws IOException { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 102 | PosIdentifier id = new PosIdentifier(); |
| 103 | id.setCorpusID("c1"); |
| 104 | id.setDocID("d1"); |
| 105 | id.setPos(8); |
| 106 | assertEquals(id.getCorpusID(), "c1"); |
| 107 | assertEquals(id.getDocID(), "d1"); |
| 108 | assertEquals(id.getPos(), 8); |
| 109 | assertEquals(id.toString(), "word-c1!d1-p8"); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 110 | }; |
| 111 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 112 | |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 113 | @Test |
| Nils Diewald | 68bb1f7 | 2014-01-07 14:07:05 +0000 | [diff] [blame] | 114 | public void indexExample1 () throws IOException { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 115 | KrillIndex ki = new KrillIndex(); |
| 116 | ki.addDoc(createSimpleFieldDoc()); |
| 117 | ki.commit(); |
| Nils Diewald | 68bb1f7 | 2014-01-07 14:07:05 +0000 | [diff] [blame] | 118 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 119 | QueryBuilder kq = new QueryBuilder("tokens"); |
| 120 | Krill ks = new Krill(kq._(2, |
| 121 | kq.seq(kq.seg("s:b")).append(kq._(kq.seg("s:a"))))); |
| 122 | Result kr = ki.search(ks); |
| Nils Diewald | 68bb1f7 | 2014-01-07 14:07:05 +0000 | [diff] [blame] | 123 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 124 | assertEquals("totalResults", kr.getTotalResults(), 1); |
| 125 | assertEquals("StartPos (0)", kr.getMatch(0).startPos, 7); |
| 126 | assertEquals("EndPos (0)", kr.getMatch(0).endPos, 9); |
| Nils Diewald | 68bb1f7 | 2014-01-07 14:07:05 +0000 | [diff] [blame] | 127 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 128 | Match km = kr.getMatch(0); |
| Nils Diewald | 68bb1f7 | 2014-01-07 14:07:05 +0000 | [diff] [blame] | 129 | |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame^] | 130 | assertEquals("SnippetBrackets (0)", "... bcabca[[{2:b{1:a}}]]c", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 131 | km.getSnippetBrackets()); |
| 132 | assertEquals("ID (0)", "match-c1!d1-p7-9(2)7-8(1)8-8", km.getID()); |
| Nils Diewald | 68bb1f7 | 2014-01-07 14:07:05 +0000 | [diff] [blame] | 133 | }; |
| Nils Diewald | bfe554b | 2014-01-09 19:35:05 +0000 | [diff] [blame] | 134 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 135 | |
| Nils Diewald | bfe554b | 2014-01-09 19:35:05 +0000 | [diff] [blame] | 136 | @Test |
| Nils Diewald | 6e9eb4e | 2014-06-17 19:28:01 +0000 | [diff] [blame] | 137 | public void indexExample2 () throws IOException, QueryException { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 138 | KrillIndex ki = new KrillIndex(); |
| 139 | ki.addDoc(createSimpleFieldDoc()); |
| 140 | ki.commit(); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 141 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 142 | Match km = ki.getMatch("match-c1!d1-p7-9(0)8-8(2)7-8"); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 143 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 144 | assertEquals("StartPos (0)", 7, km.getStartPos()); |
| 145 | assertEquals("EndPos (0)", 9, km.getEndPos()); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 146 | |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame^] | 147 | assertEquals("SnippetBrackets (0)", "... [[{2:b{a}}]] ...", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 148 | km.getSnippetBrackets()); |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 149 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 150 | assertEquals("ID (0)", "match-c1!d1-p7-9(0)8-8(2)7-8", km.getID()); |
| Nils Diewald | 22fc3ad | 2014-01-16 19:28:31 +0000 | [diff] [blame] | 151 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 152 | km = ki.getMatchInfo("match-c1!d1-p7-9(0)8-8(2)7-8", "tokens", "f", |
| 153 | "m", false, false); |
| Nils Diewald | 22fc3ad | 2014-01-16 19:28:31 +0000 | [diff] [blame] | 154 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 155 | assertEquals("SnippetBrackets (1)", |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame^] | 156 | "... [[{f/m:acht:b}{f/m:neun:a}]] ...", km.getSnippetBrackets()); |
| Nils Diewald | 22fc3ad | 2014-01-16 19:28:31 +0000 | [diff] [blame] | 157 | |
| Nils Diewald | d0d6feb | 2014-02-26 18:51:08 +0000 | [diff] [blame] | 158 | |
| Akron | 7d45e6b | 2015-06-26 17:23:42 +0200 | [diff] [blame] | 159 | km = ki.getMatchInfo("match-c1!d1-p7-9(0)8-8(2)7-8", "tokens", "f", |
| 160 | null, false, false); |
| Akron | 48937e9 | 2015-06-26 01:49:02 +0200 | [diff] [blame] | 161 | assertEquals("SnippetBrackets (1b)", |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame^] | 162 | "... [[{f/m:acht:{f/y:eight:b}}{f/m:neun:{f/y:nine:a}}]] ...", |
| Akron | 48937e9 | 2015-06-26 01:49:02 +0200 | [diff] [blame] | 163 | km.getSnippetBrackets()); |
| Nils Diewald | 22fc3ad | 2014-01-16 19:28:31 +0000 | [diff] [blame] | 164 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 165 | km = ki.getMatchInfo("match-c1!d1-p7-9(0)8-8(2)7-8", "tokens", "f", |
| 166 | "m", false, true); |
| Nils Diewald | 22fc3ad | 2014-01-16 19:28:31 +0000 | [diff] [blame] | 167 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 168 | assertEquals("SnippetBrackets (2)", |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame^] | 169 | "... [[{2:{f/m:acht:b}{{f/m:neun:a}}}]] ...", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 170 | km.getSnippetBrackets()); |
| Nils Diewald | 22fc3ad | 2014-01-16 19:28:31 +0000 | [diff] [blame] | 171 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 172 | km = ki.getMatchInfo("match-c1!d1-p7-9(4)8-8(2)7-8", "tokens", "f", |
| 173 | "m", false, true); |
| Nils Diewald | 22fc3ad | 2014-01-16 19:28:31 +0000 | [diff] [blame] | 174 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 175 | assertEquals("SnippetBrackets (3)", |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame^] | 176 | "... [[{2:{f/m:acht:b}{4:{f/m:neun:a}}}]] ...", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 177 | km.getSnippetBrackets()); |
| Nils Diewald | 22fc3ad | 2014-01-16 19:28:31 +0000 | [diff] [blame] | 178 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 179 | km = ki.getMatchInfo("match-c1!d1-p7-9(4)8-8(2)7-8", "tokens", "f", |
| 180 | null, false, true); |
| Nils Diewald | 22fc3ad | 2014-01-16 19:28:31 +0000 | [diff] [blame] | 181 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 182 | assertEquals( |
| 183 | "SnippetBrackets (4)", |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame^] | 184 | "... [[{2:{f/m:acht:{f/y:eight:b}}{4:{f/m:neun:{f/y:nine:a}}}}]] ...", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 185 | km.getSnippetBrackets()); |
| 186 | |
| 187 | assertEquals("SnippetHTML (4)", "<span class=\"context-left\">" |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame^] | 188 | + "<span class=\"more\">" + "</span>" + "</span>" + |
| 189 | "<span class=\"match\">" + "<mark>" |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 190 | + "<mark class=\"class-2 level-0\">" |
| 191 | + "<span title=\"f/m:acht\">" + "<span title=\"f/y:eight\">" |
| 192 | + "b" + "</span>" + "</span>" |
| 193 | + "<mark class=\"class-4 level-1\">" |
| 194 | + "<span title=\"f/m:neun\">" + "<span title=\"f/y:nine\">" |
| 195 | + "a" + "</span>" + "</span>" + "</mark>" + "</mark>" |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame^] | 196 | + "</mark>" + "</span>" + "<span class=\"context-right\">" |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 197 | + "<span class=\"more\">" + "</span>" + "</span>", |
| 198 | km.getSnippetHTML()); |
| Akron | 48937e9 | 2015-06-26 01:49:02 +0200 | [diff] [blame] | 199 | |
| Akron | 48937e9 | 2015-06-26 01:49:02 +0200 | [diff] [blame] | 200 | JsonNode res = mapper.readTree(km.toJsonString()); |
| 201 | assertEquals("tokens", res.at("/field").asText()); |
| 202 | assertTrue(res.at("/startMore").asBoolean()); |
| 203 | assertTrue(res.at("/endMore").asBoolean()); |
| 204 | assertEquals("c1", res.at("/corpusID").asText()); |
| 205 | assertEquals("d1", res.at("/docID").asText()); |
| Akron | 7d45e6b | 2015-06-26 17:23:42 +0200 | [diff] [blame] | 206 | assertEquals("match-c1!d1-p7-9(4)8-8(2)7-8", res.at("/matchID") |
| 207 | .asText()); |
| Akron | 48937e9 | 2015-06-26 01:49:02 +0200 | [diff] [blame] | 208 | assertTrue(res.at("/pubDate").isMissingNode()); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 209 | }; |
| Nils Diewald | 22fc3ad | 2014-01-16 19:28:31 +0000 | [diff] [blame] | 210 | |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 211 | |
| 212 | @Test |
| Nils Diewald | 6e9eb4e | 2014-06-17 19:28:01 +0000 | [diff] [blame] | 213 | public void indexExample3 () throws IOException, QueryException { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 214 | KrillIndex ki = new KrillIndex(); |
| 215 | ki.addDoc(createSimpleFieldDoc()); |
| 216 | ki.commit(); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 217 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 218 | Match km = ki.getMatchInfo("match-c1!d1-p7-9(4)8-8(2)7-8", "tokens", |
| 219 | null, null, false, true); |
| Nils Diewald | 22fc3ad | 2014-01-16 19:28:31 +0000 | [diff] [blame] | 220 | |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 221 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 222 | assertEquals("SnippetHTML (1)", "<span class=\"context-left\">" |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame^] | 223 | + "<span class=\"more\">" + "</span>" + "</span>" + "<span class=\"match\">" + "<mark>" |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 224 | + "<mark class=\"class-2 level-0\">" |
| 225 | + "<span title=\"f/m:acht\">" + "<span title=\"f/y:eight\">" |
| 226 | + "<span title=\"it/is:8\">" + "<span title=\"x/o:achtens\">" |
| 227 | + "b" + "</span>" + "</span>" + "</span>" + "</span>" |
| 228 | + "<mark class=\"class-4 level-1\">" |
| 229 | + "<span title=\"f/m:neun\">" + "<span title=\"f/y:nine\">" |
| 230 | + "<span title=\"it/is:9\">" + "<span title=\"x/o:neuntens\">" |
| 231 | + "a" + "</span>" + "</span>" + "</span>" + "</span>" |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame^] | 232 | + "</mark>" + "</mark>" + "</mark>" + "</span>" |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 233 | + "<span class=\"context-right\">" + "<span class=\"more\">" |
| 234 | + "</span>" + "</span>", km.getSnippetHTML()); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 235 | }; |
| 236 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 237 | |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 238 | @Test |
| Nils Diewald | 6e9eb4e | 2014-06-17 19:28:01 +0000 | [diff] [blame] | 239 | public void indexExample4 () throws IOException, QueryException { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 240 | KrillIndex ki = new KrillIndex(); |
| 241 | ki.addDoc(createSimpleFieldDoc()); |
| 242 | ki.commit(); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 243 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 244 | Match km = ki.getMatchInfo("match-c1!d1-p7-9(4)8-8(2)7-8", "tokens", |
| 245 | null, null, false, false); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 246 | |
| 247 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 248 | assertEquals("SnippetHTML (1)", "<span class=\"context-left\">" |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame^] | 249 | + "<span class=\"more\">" + "</span>" + "</span>" + "<span class=\"match\">" + "<mark>" |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 250 | + "<span title=\"f/m:acht\">" + "<span title=\"f/y:eight\">" |
| 251 | + "<span title=\"it/is:8\">" + "<span title=\"x/o:achtens\">" |
| 252 | + "b" + "</span>" + "</span>" + "</span>" + "</span>" |
| 253 | + "<span title=\"f/m:neun\">" + "<span title=\"f/y:nine\">" |
| 254 | + "<span title=\"it/is:9\">" + "<span title=\"x/o:neuntens\">" |
| 255 | + "a" + "</span>" + "</span>" + "</span>" + "</span>" |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame^] | 256 | + "</mark>" + "</span>" + "<span class=\"context-right\">" |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 257 | + "<span class=\"more\">" + "</span>" + "</span>", |
| 258 | km.getSnippetHTML()); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 259 | }; |
| 260 | |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 261 | |
| Akron | 1f61957 | 2015-07-08 17:33:47 +0200 | [diff] [blame] | 262 | @Test |
| 263 | public void indexNewStructure () throws IOException, QueryException { |
| 264 | KrillIndex ki = new KrillIndex(); |
| 265 | ki.addDoc(getClass().getResourceAsStream("/goe/AGX-00002.json"), false); |
| 266 | ki.commit(); |
| 267 | |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 268 | Match km = ki.getMatchInfo("match-GOE!GOE_AGX.00002-p210-211", |
| 269 | "tokens", true, (String) null, (String) null, true, true, true); |
| Akron | 1f61957 | 2015-07-08 17:33:47 +0200 | [diff] [blame] | 270 | |
| 271 | JsonNode res = mapper.readTree(km.toJsonString()); |
| 272 | assertEquals("tokens", res.at("/field").asText()); |
| 273 | assertEquals("GOE_AGX.00002", res.at("/textSigle").asText()); |
| 274 | assertEquals("Goethe, Johann Wolfgang von", res.at("/author").asText()); |
| 275 | }; |
| 276 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 277 | |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 278 | @Test |
| Nils Diewald | 6e9eb4e | 2014-06-17 19:28:01 +0000 | [diff] [blame] | 279 | public void indexExample5Spans () throws IOException, QueryException { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 280 | KrillIndex ki = new KrillIndex(); |
| 281 | ki.addDoc(createSimpleFieldDoc()); |
| 282 | ki.commit(); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 283 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 284 | Match km = ki.getMatchInfo("match-c1!d1-p7-9(4)8-8(2)7-8", "tokens", |
| 285 | null, null, true, false); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 286 | |
| 287 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 288 | assertEquals( |
| 289 | "SnippetBrackets (1)", |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame^] | 290 | "... [[{f/m:acht:{f/y:eight:{it/is:8:{x/o:achtens:b}}}}{f/m:neun:{f/y:nine:{it/is:9:{x/o:neuntens:a}}}}]] ...", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 291 | km.getSnippetBrackets()); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 292 | }; |
| 293 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 294 | |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 295 | @Test |
| Nils Diewald | 6e9eb4e | 2014-06-17 19:28:01 +0000 | [diff] [blame] | 296 | public void indexExample6Spans () throws IOException, QueryException { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 297 | KrillIndex ki = new KrillIndex(); |
| 298 | ki.addDoc(createSimpleFieldDoc()); |
| 299 | ki.commit(); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 300 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 301 | Match km = ki.getMatchInfo("match-c1!d1-p7-10(4)8-8(2)7-8", "tokens", |
| 302 | null, null, true, false); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 303 | |
| 304 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 305 | assertEquals( |
| 306 | "SnippetBrackets (1)", |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame^] | 307 | "... [[{x/tag:{f/m:acht:{f/y:eight:{it/is:8:{x/o:achtens:b}}}}{f/m:neun:{f/y:nine:{it/is:9:{x/o:neuntens:a}}}}{f/m:zehn:{f/y:ten:{it/is:10:{x/o:zehntens:c}}}}}]]", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 308 | km.getSnippetBrackets()); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 309 | }; |
| 310 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 311 | |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 312 | @Test |
| Nils Diewald | 6e9eb4e | 2014-06-17 19:28:01 +0000 | [diff] [blame] | 313 | public void indexExample7Spans () throws IOException, QueryException { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 314 | KrillIndex ki = new KrillIndex(); |
| 315 | ki.addDoc(createSimpleFieldDoc()); |
| 316 | ki.commit(); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 317 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 318 | Match km = ki.getMatchInfo("match-c1!d1-p7-10(4)8-8(2)7-8", "tokens", |
| 319 | null, null, true, true); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 320 | |
| 321 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 322 | assertEquals( |
| 323 | "SnippetBrackets (1)", |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame^] | 324 | "... [[{x/tag:{2:{f/m:acht:{f/y:eight:{it/is:8:{x/o:achtens:b}}}}{4:{f/m:neun:{f/y:nine:{it/is:9:{x/o:neuntens:a}}}}}}{f/m:zehn:{f/y:ten:{it/is:10:{x/o:zehntens:c}}}}}]]", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 325 | km.getSnippetBrackets()); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 326 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 327 | assertEquals("SnippetHTML (1)", "<span class=\"context-left\">" |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame^] | 328 | + "<span class=\"more\">" + "</span>" + "</span>" + |
| 329 | "<span class=\"match\">" +"<mark>" |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 330 | + "<span title=\"x/tag\">" + "<mark class=\"class-2 level-0\">" |
| 331 | + "<span title=\"f/m:acht\">" + "<span title=\"f/y:eight\">" |
| 332 | + "<span title=\"it/is:8\">" + "<span title=\"x/o:achtens\">" |
| 333 | + "b" + "</span>" + "</span>" + "</span>" + "</span>" |
| 334 | + "<mark class=\"class-4 level-1\">" |
| 335 | + "<span title=\"f/m:neun\">" + "<span title=\"f/y:nine\">" |
| 336 | + "<span title=\"it/is:9\">" + "<span title=\"x/o:neuntens\">" |
| 337 | + "a" + "</span>" + "</span>" + "</span>" + "</span>" |
| 338 | + "</mark>" + "</mark>" + "<span title=\"f/m:zehn\">" |
| 339 | + "<span title=\"f/y:ten\">" + "<span title=\"it/is:10\">" |
| 340 | + "<span title=\"x/o:zehntens\">" + "c" + "</span>" + "</span>" |
| 341 | + "</span>" + "</span>" + "</span>" + "</mark>" |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame^] | 342 | + "</span>" |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 343 | + "<span class=\"context-right\">" + "</span>", |
| 344 | km.getSnippetHTML()); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 345 | }; |
| 346 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 347 | |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 348 | @Test |
| Nils Diewald | 6e9eb4e | 2014-06-17 19:28:01 +0000 | [diff] [blame] | 349 | public void indexExample6Relations () throws IOException, QueryException { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 350 | KrillIndex ki = new KrillIndex(); |
| 351 | ki.addDoc(createSimpleFieldDoc()); |
| 352 | ki.commit(); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 353 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 354 | Match km = ki.getMatchInfo("match-c1!d1-p0-5(4)8-8(2)7-8", "tokens", |
| 355 | "x", null, true, false); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 356 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 357 | assertEquals( |
| 358 | "SnippetBrackets (1)", |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame^] | 359 | "[[{x/rel:a>3:{x/o:erstens:a}}{x/o:zweitens:b}{x/o:drittens:c}{#3:{x/o:viertens:a}}{x/o:fünftens:b}]] ...", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 360 | km.getSnippetBrackets()); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 361 | |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame^] | 362 | assertEquals("SnippetBrackets (1)", |
| 363 | "<span class=\"context-left\">" |
| 364 | + "</span>" |
| 365 | + "<span class=\"match\">" |
| 366 | + "<mark>" |
| 367 | + "<span xlink:title=\"x/rel:a\" " |
| 368 | + "xlink:type=\"simple\" " + "xlink:href=\"#word-c1!d1-p3\">" |
| 369 | + "<span title=\"x/o:erstens\">" + "a" + "</span>" + "</span>" |
| 370 | + "<span title=\"x/o:zweitens\">" + "b" + "</span>" |
| 371 | + "<span title=\"x/o:drittens\">" + "c" + "</span>" |
| 372 | + "<span xml:id=\"word-c1!d1-p3\">" |
| 373 | + "<span title=\"x/o:viertens\">" + "a" + "</span>" |
| 374 | + "</span>" |
| 375 | + "<span title=\"x/o:fünftens\">" + "b" + "</span>" |
| 376 | + "</mark>" |
| 377 | + "</span>" |
| 378 | + "<span class=\"context-right\">" + "<span class=\"more\">" |
| 379 | + "</span>" + |
| 380 | "</span>", km.getSnippetHTML()); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 381 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 382 | km = ki.getMatchInfo("match-c1!d1-p0-5(7)2-3(4)8-8(2)7-8", "tokens", |
| 383 | "x", null, true, true); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 384 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 385 | assertEquals("SnippetBrackets (1)", "<span class=\"context-left\">" |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame^] | 386 | + "</span>" + |
| 387 | "<span class=\"match\">" + |
| 388 | "<mark>" + "<span xlink:title=\"x/rel:a\" " |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 389 | + "xlink:type=\"simple\" " + "xlink:href=\"#word-c1!d1-p3\">" |
| 390 | + "<span title=\"x/o:erstens\">" + "a" + "</span>" + "</span>" |
| 391 | + "<span title=\"x/o:zweitens\">" + "b" + "</span>" |
| 392 | + "<mark class=\"class-7 level-0\">" |
| 393 | + "<span title=\"x/o:drittens\">" + "c" + "</span>" |
| 394 | + "<span xml:id=\"word-c1!d1-p3\">" |
| 395 | + "<span title=\"x/o:viertens\">" + "a" + "</span>" + "</span>" |
| 396 | + "</mark>" + "<span title=\"x/o:fünftens\">" + "b" + "</span>" |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame^] | 397 | + "</mark>" + |
| 398 | "</span>" + |
| 399 | "<span class=\"context-right\">" |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 400 | + "<span class=\"more\">" + "</span>" + "</span>", |
| 401 | km.getSnippetHTML()); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 402 | }; |
| 403 | |
| 404 | |
| Nils Diewald | a111803 | 2014-02-13 20:50:48 +0000 | [diff] [blame] | 405 | @Test |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 406 | public void indexExample7SentenceExpansion () throws IOException, |
| 407 | QueryException { |
| 408 | KrillIndex ki = new KrillIndex(); |
| 409 | ki.addDoc(createSimpleFieldDoc()); |
| 410 | ki.addDoc(createSimpleFieldDoc2()); |
| 411 | ki.addDoc(createSimpleFieldDoc3()); |
| 412 | ki.addDoc(createSimpleFieldDoc4()); |
| 413 | ki.commit(); |
| 414 | Match km; |
| Nils Diewald | a111803 | 2014-02-13 20:50:48 +0000 | [diff] [blame] | 415 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 416 | km = ki.getMatchInfo("match-c1!d1-p3-4", "tokens", null, null, false, |
| 417 | false); |
| Nils Diewald | a111803 | 2014-02-13 20:50:48 +0000 | [diff] [blame] | 418 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 419 | assertEquals( |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame^] | 420 | "... [[{f/m:vier:{f/y:four:{it/is:4:{x/o:viertens:a}}}}]] ...", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 421 | km.getSnippetBrackets()); |
| Nils Diewald | a111803 | 2014-02-13 20:50:48 +0000 | [diff] [blame] | 422 | |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 423 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 424 | km = ki.getMatchInfo("match-c1!d1-p3-4", "tokens", null, null, false, |
| 425 | false, true); // extendToSentence |
| Nils Diewald | a111803 | 2014-02-13 20:50:48 +0000 | [diff] [blame] | 426 | |
| Akron | cb1093a | 2016-07-28 16:27:59 +0200 | [diff] [blame] | 427 | // This will |
| 428 | // [{f/m:drei:{f/y:three:{it/is:3:{x/o:drittens:c}}}}{f/m:vier:{f/y:four:{it/is:4:{x/o:viertens:a}}}}{f/m:fuenf:{f/y:five:{it/is:5:{x/o:fünftens:b}}}}] |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 429 | assertEquals( |
| Akron | cb1093a | 2016-07-28 16:27:59 +0200 | [diff] [blame] | 430 | "[{f/m:drei:{f/y:three:{it/is:3:{x/o:drittens:c}}}}[{f/m:vier:{f/y:four:{it/is:4:{x/o:viertens:a}}}}]{f/m:fuenf:{f/y:five:{it/is:5:{x/o:fünftens:b}}}}]", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 431 | km.getSnippetBrackets()); |
| Akron | cb1093a | 2016-07-28 16:27:59 +0200 | [diff] [blame] | 432 | |
| 433 | assertEquals( |
| 434 | "<span class=\"context-left\"></span>"+ |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame^] | 435 | "<span class=\"match\">" + |
| Akron | cb1093a | 2016-07-28 16:27:59 +0200 | [diff] [blame] | 436 | "<span title=\"f/m:drei\">"+ |
| 437 | "<span title=\"f/y:three\">"+ |
| 438 | "<span title=\"it/is:3\">"+ |
| 439 | "<span title=\"x/o:drittens\">c</span>"+ |
| 440 | "</span>"+ |
| 441 | "</span>"+ |
| 442 | "</span>"+ |
| 443 | "<mark>"+ |
| 444 | "<span title=\"f/m:vier\">"+ |
| 445 | "<span title=\"f/y:four\">"+ |
| 446 | "<span title=\"it/is:4\">"+ |
| 447 | "<span title=\"x/o:viertens\">a</span>"+ |
| 448 | "</span>"+ |
| 449 | "</span>"+ |
| 450 | "</span>"+ |
| 451 | "</mark>"+ |
| 452 | "<span title=\"f/m:fuenf\">"+ |
| 453 | "<span title=\"f/y:five\">"+ |
| 454 | "<span title=\"it/is:5\">"+ |
| 455 | "<span title=\"x/o:fünftens\">b</span>"+ |
| 456 | "</span>"+ |
| 457 | "</span>"+ |
| 458 | "</span>"+ |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame^] | 459 | "</span>" + |
| Akron | cb1093a | 2016-07-28 16:27:59 +0200 | [diff] [blame] | 460 | "<span class=\"context-right\"></span>", |
| 461 | km.getSnippetHTML()); |
| 462 | |
| 463 | |
| 464 | |
| 465 | |
| Akron | 43cea66 | 2016-02-15 23:43:59 +0100 | [diff] [blame] | 466 | /* |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 467 | km = ki.getMatchInfo("match-c1!d3-p3-4", "tokens", null, null, false, |
| 468 | false, true); // extendToSentence |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 469 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 470 | assertEquals( |
| 471 | "[{f/m:drei:{f/y:three:{it/is:3:{x/o:drittens:cc}}}} {f/m:vier:{f/y:four:{it/is:4:{x/o:viertens:aa}}}} {f/m:fuenf:{f/y:five:{it/is:5:{x/o:fünftens:bb}}}}]", |
| 472 | km.getSnippetBrackets()); |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 473 | |
| Nils Diewald | 8493437 | 2014-05-20 13:48:18 +0000 | [diff] [blame] | 474 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 475 | km = ki.getMatchInfo("match-c1!d4-p4-6", "tokens", null, null, false, |
| 476 | false, true); // extendToSentence |
| 477 | assertEquals( |
| 478 | "[{f/m:drei:{f/y:three:{it/is:3:{x/o:drittens:c}}}}{f/m:vier:{f/y:four:{it/is:4:{x/o:viertens:a}}}}{f/m:fuenf:{f/y:five:{it/is:5:{x/o:fünftens:b}}}}{f/m:sechs:{f/y:six:{it/is:6:{x/o:sechstens:c}}}}{f/m:sieben:{f/y:seven:{it/is:7:{x/o:siebtens:a}}}}]", |
| 479 | km.getSnippetBrackets()); |
| Akron | 43cea66 | 2016-02-15 23:43:59 +0100 | [diff] [blame] | 480 | */ |
| Nils Diewald | a111803 | 2014-02-13 20:50:48 +0000 | [diff] [blame] | 481 | }; |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 482 | |
| Nils Diewald | 6e9eb4e | 2014-06-17 19:28:01 +0000 | [diff] [blame] | 483 | |
| 484 | @Test |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 485 | public void indexExample7Dependencies () throws IOException, QueryException { |
| 486 | KrillIndex ki = new KrillIndex(); |
| 487 | ki.addDoc(createSimpleFieldDoc2()); |
| 488 | ki.commit(); |
| Nils Diewald | 6e9eb4e | 2014-06-17 19:28:01 +0000 | [diff] [blame] | 489 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 490 | Match km = ki.getMatchInfo("match-c1!d1-p0-4", "tokens", null, null, |
| 491 | true, true); |
| Nils Diewald | 6e9eb4e | 2014-06-17 19:28:01 +0000 | [diff] [blame] | 492 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 493 | assertEquals( |
| 494 | "SnippetHTML (2)", |
| 495 | "<span class=\"context-left\">" |
| 496 | + "</span>" |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame^] | 497 | + "<span class=\"match\">" |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 498 | + "<mark>" |
| 499 | + "<span xlink:title=\"x/rel:a\" xlink:type=\"simple\" xlink:href=\"#word-c1!d1-p3\">" |
| 500 | + "<span title=\"f/m:eins\">" |
| 501 | + "<span title=\"f/y:one\">" |
| 502 | + "<span title=\"it/is:1\">" |
| 503 | + "<span title=\"x/o:erstens\">a</span>" |
| 504 | + "</span>" |
| 505 | + "</span>" |
| 506 | + "</span>" |
| 507 | + "</span>" |
| 508 | + "<span xlink:title=\"x/rel:b\" xlink:type=\"simple\" xlink:href=\"#word-c1!d1-p3\">" |
| 509 | + "<span title=\"f/m:zwei\">" |
| 510 | + "<span title=\"f/y:two\">" |
| 511 | + "<span title=\"it/is:2\">" |
| 512 | + "<span title=\"x/o:zweitens\">b</span>" + "</span>" |
| 513 | + "</span>" + "</span>" + "</span>" |
| 514 | + "<span title=\"f/m:drei\">" |
| 515 | + "<span title=\"f/y:three\">" |
| 516 | + "<span title=\"it/is:3\">" |
| 517 | + "<span title=\"x/o:drittens\">c</span>" + "</span>" |
| 518 | + "</span>" + "</span>" |
| 519 | + "<span xml:id=\"word-c1!d1-p3\">" |
| 520 | + "<span title=\"f/m:vier\">" |
| 521 | + "<span title=\"f/y:four\">" |
| 522 | + "<span title=\"it/is:4\">" |
| 523 | + "<span title=\"x/o:viertens\">a</span>" + "</span>" |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame^] | 524 | + "</span>" + "</span>" + "</span>" + "</mark>" |
| 525 | + "</span>" |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 526 | + "<span class=\"context-right\">" |
| 527 | + "<span class=\"more\">" + "</span>" + "</span>", |
| 528 | km.getSnippetHTML()); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 529 | }; |
| Nils Diewald | 50389b0 | 2014-04-11 16:27:52 +0000 | [diff] [blame] | 530 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 531 | |
| 532 | @Test |
| 533 | public void indexExampleMultipleFoundries () throws IOException, |
| 534 | QueryException { |
| 535 | KrillIndex ki = new KrillIndex(); |
| 536 | ki.addDoc(createSimpleFieldDoc4()); |
| 537 | ki.commit(); |
| 538 | |
| 539 | Match km = ki.getMatchInfo("match-c1!d4-p3-9", "tokens", "f", "m", |
| 540 | false, false); |
| 541 | assertEquals( |
| 542 | "f:m info", |
| 543 | km.getSnippetBrackets(), |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame^] | 544 | "... [[{f/m:vier:a}{f/m:fuenf:b}{f/m:sechs:c}{f/m:sieben:a}{f/m:acht:b}{f/m:neun:a}]] ..."); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 545 | |
| 546 | km = ki.getMatchInfo("match-c1!d4-p3-9", "tokens", "f", null, false, |
| 547 | false); |
| 548 | assertEquals( |
| 549 | "f info", |
| 550 | km.getSnippetBrackets(), |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame^] | 551 | "... [[{f/m:vier:{f/y:four:a}}{f/m:fuenf:{f/y:five:b}}{f/m:sechs:{f/y:six:c}}{f/m:sieben:{f/y:seven:a}}{f/m:acht:{f/y:eight:b}}{f/m:neun:{f/y:nine:a}}]] ..."); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 552 | |
| 553 | |
| 554 | km = ki.getMatchInfo("match-c1!d4-p3-4", "tokens", null, null, false, |
| 555 | false); |
| 556 | assertEquals("all info", km.getSnippetBrackets(), |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame^] | 557 | "... [[{f/m:vier:{f/y:four:{it/is:4:{x/o:viertens:a}}}}]] ..."); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 558 | |
| 559 | ArrayList<String> foundryList = new ArrayList<>(2); |
| 560 | foundryList.add("f"); |
| 561 | foundryList.add("x"); |
| 562 | |
| 563 | km = ki.getMatchInfo("match-c1!d4-p3-4", "tokens", true, foundryList, |
| 564 | (ArrayList<String>) null, false, false, false); |
| 565 | assertEquals("f|x info", km.getSnippetBrackets(), |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame^] | 566 | "... [[{f/m:vier:{f/y:four:{x/o:viertens:a}}}]] ..."); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 567 | |
| 568 | foundryList.clear(); |
| 569 | foundryList.add("y"); |
| 570 | foundryList.add("x"); |
| 571 | |
| 572 | km = ki.getMatchInfo("match-c1!d4-p3-4", "tokens", true, foundryList, |
| 573 | (ArrayList<String>) null, false, false, false); |
| 574 | assertEquals("y|x info", km.getSnippetBrackets(), |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame^] | 575 | "... [[{x/o:viertens:a}]] ..."); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 576 | |
| 577 | |
| 578 | foundryList.clear(); |
| 579 | foundryList.add("f"); |
| 580 | foundryList.add("it"); |
| 581 | |
| 582 | ArrayList<String> layerList = new ArrayList<>(2); |
| 583 | layerList.add("is"); |
| 584 | |
| 585 | km = ki.getMatchInfo("match-c1!d4-p3-4", "tokens", true, foundryList, |
| 586 | layerList, false, false, false); |
| 587 | assertEquals("f|it/is", km.getSnippetBrackets(), |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame^] | 588 | "... [[{it/is:4:a}]] ..."); |
| Nils Diewald | 50389b0 | 2014-04-11 16:27:52 +0000 | [diff] [blame] | 589 | }; |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 590 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 591 | |
| Akron | 7d45e6b | 2015-06-26 17:23:42 +0200 | [diff] [blame] | 592 | @Test |
| 593 | public void indexExampleFailingFoundry () throws IOException, |
| 594 | QueryException { |
| 595 | KrillIndex ki = new KrillIndex(); |
| 596 | ki.addDoc(createSimpleFieldDoc4()); |
| 597 | ki.commit(); |
| 598 | |
| 599 | Match km = ki.getMatchInfo("match-c1!d4-p3-9", "tokens", "*", "m", |
| 600 | false, false); |
| 601 | JsonNode res = mapper.readTree(km.toJsonString()); |
| 602 | assertEquals("c1", res.at("/corpusID").asText()); |
| 603 | assertEquals("d4", res.at("/docID").asText()); |
| 604 | assertEquals("Invalid foundry requested", res.at("/errors/0/1") |
| 605 | .asText()); |
| 606 | }; |
| 607 | |
| 608 | |
| 609 | @Test |
| Akron | 13db615 | 2016-02-19 14:08:38 +0100 | [diff] [blame] | 610 | public void indexFailingMatchID () throws IOException, QueryException { |
| Akron | 8abefa1 | 2016-02-13 05:35:42 +0100 | [diff] [blame] | 611 | KrillIndex ki = new KrillIndex(); |
| Akron | 13db615 | 2016-02-19 14:08:38 +0100 | [diff] [blame] | 612 | Match km = ki.getMatchInfo( |
| 613 | "match-PRO-DUD!PRO-DUD_KSTA-2013-01.7483-2013-01", "tokens", |
| 614 | "*", "m", false, false); |
| Akron | 8abefa1 | 2016-02-13 05:35:42 +0100 | [diff] [blame] | 615 | JsonNode res = mapper.readTree(km.toJsonString()); |
| 616 | assertEquals("730", res.at("/errors/0/0").asText()); |
| 617 | }; |
| 618 | |
| Akron | 13db615 | 2016-02-19 14:08:38 +0100 | [diff] [blame] | 619 | |
| Akron | 8abefa1 | 2016-02-13 05:35:42 +0100 | [diff] [blame] | 620 | @Test |
| Akron | 7d45e6b | 2015-06-26 17:23:42 +0200 | [diff] [blame] | 621 | public void indexExampleNullInfo () throws IOException, QueryException { |
| 622 | KrillIndex ki = new KrillIndex(); |
| 623 | ki.addDoc(createSimpleFieldDoc4()); |
| 624 | ki.commit(); |
| 625 | Match km = ki.getMatchInfo("match-c1!d4-p3-9", "tokens", null, null, |
| 626 | false, false); |
| 627 | JsonNode res = mapper.readTree(km.toJsonString()); |
| 628 | assertEquals("tokens", res.at("/field").asText()); |
| 629 | assertTrue(res.at("/startMore").asBoolean()); |
| 630 | assertTrue(res.at("/endMore").asBoolean()); |
| 631 | assertEquals("c1", res.at("/corpusID").asText()); |
| 632 | assertEquals("d4", res.at("/docID").asText()); |
| 633 | assertEquals( |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame^] | 634 | "<span class=\"context-left\"><span class=\"more\"></span></span><span class=\"match\"><mark><span title=\"f/m:vier\"><span title=\"f/y:four\"><span title=\"it/is:4\"><span title=\"x/o:viertens\">a</span></span></span></span><span title=\"f/m:fuenf\"><span title=\"f/y:five\"><span title=\"it/is:5\"><span title=\"x/o:fünftens\">b</span></span></span></span><span title=\"f/m:sechs\"><span title=\"f/y:six\"><span title=\"it/is:6\"><span title=\"x/o:sechstens\">c</span></span></span></span><span title=\"f/m:sieben\"><span title=\"f/y:seven\"><span title=\"it/is:7\"><span title=\"x/o:siebtens\">a</span></span></span></span><span title=\"f/m:acht\"><span title=\"f/y:eight\"><span title=\"it/is:8\"><span title=\"x/o:achtens\">b</span></span></span></span><span title=\"f/m:neun\"><span title=\"f/y:nine\"><span title=\"it/is:9\"><span title=\"x/o:neuntens\">a</span></span></span></span></mark></span><span class=\"context-right\"><span class=\"more\"></span></span>", |
| Akron | 7d45e6b | 2015-06-26 17:23:42 +0200 | [diff] [blame] | 635 | res.at("/snippet").asText()); |
| 636 | assertEquals("match-c1!d4-p3-9", res.at("/matchID").asText()); |
| 637 | assertTrue(res.at("/pubDate").isMissingNode()); |
| 638 | }; |
| 639 | |
| 640 | |
| Akron | b35261a | 2016-02-10 20:24:24 +0100 | [diff] [blame] | 641 | @Test |
| 642 | public void indexAttributeInfo () throws IOException, QueryException { |
| 643 | KrillIndex ki = new KrillIndex(); |
| 644 | ki.addDoc(createAttributeFieldDoc()); |
| 645 | ki.commit(); |
| Akron | 13db615 | 2016-02-19 14:08:38 +0100 | [diff] [blame] | 646 | Match km = ki.getMatchInfo("match-ca1!da1-p7-10", "tokens", null, null, |
| 647 | false, false); |
| Akron | b35261a | 2016-02-10 20:24:24 +0100 | [diff] [blame] | 648 | JsonNode res = mapper.readTree(km.toJsonString()); |
| 649 | assertEquals("tokens", res.at("/field").asText()); |
| 650 | assertTrue(res.at("/startMore").asBoolean()); |
| 651 | assertTrue(res.at("/endMore").asBoolean()); |
| 652 | assertEquals("ca1", res.at("/corpusID").asText()); |
| 653 | assertEquals("da1", res.at("/docID").asText()); |
| Akron | 13db615 | 2016-02-19 14:08:38 +0100 | [diff] [blame] | 654 | assertEquals("<span class=\"context-left\">" |
| 655 | + "<span class=\"more\">" |
| 656 | + "</span>" |
| 657 | + "</span>" |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame^] | 658 | + "<span class=\"match\"><mark>" |
| Akron | 13db615 | 2016-02-19 14:08:38 +0100 | [diff] [blame] | 659 | + |
| 660 | // "<span title=\"@:x/s:key:value\">"+ |
| 661 | "<span title=\"f/m:acht\">" |
| 662 | + "<span title=\"f/y:eight\">" |
| 663 | + "<span title=\"it/is:8\">" |
| 664 | + "<span title=\"x/o:achtens\">b</span>" |
| 665 | + |
| 666 | // "</span>"+ |
| 667 | "</span>" + "</span>" + "</span>" + "<span title=\"f/m:neun\">" |
| 668 | + "<span title=\"f/y:nine\">" + "<span title=\"it/is:9\">" |
| 669 | + "<span title=\"x/o:neuntens\">a</span>" + "</span>" |
| 670 | + "</span>" + "</span>" + "<span title=\"f/m:zehn\">" |
| 671 | + "<span title=\"f/y:ten\">" + "<span title=\"it/is:10\">" |
| 672 | + "<span title=\"x/o:zehntens\">c</span>" + "</span>" |
| 673 | + "</span>" + "</span>" + "</mark>" |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame^] | 674 | + "</span>" |
| Akron | 13db615 | 2016-02-19 14:08:38 +0100 | [diff] [blame] | 675 | + "<span class=\"context-right\">" + "</span>", |
| 676 | res.at("/snippet").asText()); |
| Akron | b35261a | 2016-02-10 20:24:24 +0100 | [diff] [blame] | 677 | }; |
| 678 | |
| 679 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 680 | private FieldDocument createSimpleFieldDoc () { |
| 681 | FieldDocument fd = new FieldDocument(); |
| 682 | fd.addString("corpusID", "c1"); |
| 683 | fd.addString("ID", "d1"); |
| 684 | fd.addTV( |
| 685 | "tokens", |
| 686 | "abcabcabac", |
| Akron | 5f04403 | 2015-12-18 00:35:38 +0100 | [diff] [blame] | 687 | "[(0-1)s:a|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|>:x/rel:a$<b>32<i>4<s>0<s>0<s>0|_0$<i>0<i>1|-:t$<i>10]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 688 | + "[(1-2)s:b|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|_1$<i>1<i>2]" |
| Akron | 43cea66 | 2016-02-15 23:43:59 +0100 | [diff] [blame] | 689 | + "[(2-3)s:c|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2$<i>2<i>3|<>:base/s:s$<b>64<i>2<i>5<i>5]" |
| Akron | 5f04403 | 2015-12-18 00:35:38 +0100 | [diff] [blame] | 690 | + "[(3-4)s:a|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|<:x/rel:b$<b>40<i>1<s>0<s>0<s>0|_3$<i>3<i>4]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 691 | + "[(4-5)s:b|i:b|f/m:fuenf|f/y:five|x/o:fünftens|it/is:5|_4$<i>4<i>5]" |
| 692 | + "[(5-6)s:c|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5$<i>5<i>6]" |
| 693 | + "[(6-7)s:a|i:a|f/m:sieben|f/y:seven|x/o:siebtens|it/is:7|_6$<i>6<i>7]" |
| Akron | 5f04403 | 2015-12-18 00:35:38 +0100 | [diff] [blame] | 694 | + "[(7-8)s:b|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/tag$<b>64<i>7<i>10<i>10|_7$<i>7<i>8]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 695 | + "[(8-9)s:a|i:a|f/m:neun|f/y:nine|x/o:neuntens|it/is:9|_8$<i>8<i>9]" |
| 696 | + "[(9-10)s:c|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9$<i>9<i>10]"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 697 | return fd; |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 698 | }; |
| Nils Diewald | 8493437 | 2014-05-20 13:48:18 +0000 | [diff] [blame] | 699 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 700 | |
| 701 | private FieldDocument createSimpleFieldDoc2 () { |
| 702 | FieldDocument fd = new FieldDocument(); |
| 703 | fd.addString("corpusID", "c1"); |
| 704 | fd.addString("ID", "d1"); |
| 705 | fd.addTV( |
| 706 | "tokens", |
| 707 | "abcabcabac", |
| Akron | 5f04403 | 2015-12-18 00:35:38 +0100 | [diff] [blame] | 708 | "[(0-1)s:a|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|>:x/rel:a$<b>32<i>4<s>0<s>0<s>0|_0$<i>0<i>1|-:t$<i>10]" |
| 709 | + "[(1-2)s:b|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|>:x/rel:b$<b>32<i>4<s>0<s>0<s>0|_1$<i>1<i>2]" |
| Akron | 43cea66 | 2016-02-15 23:43:59 +0100 | [diff] [blame] | 710 | + "[(2-3)s:c|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2$<i>2<i>3|<>:base/s:s$<b>64<i>2<i>5<i>5]" |
| Akron | 5f04403 | 2015-12-18 00:35:38 +0100 | [diff] [blame] | 711 | + "[(3-4)s:a|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|<:x/rel:b$<b>40<i>1<s>0<s>0<s>0|_3$<i>3<i>4]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 712 | + "[(4-5)s:b|i:b|f/m:fuenf|f/y:five|x/o:fünftens|it/is:5|_4$<i>4<i>5]" |
| 713 | + "[(5-6)s:c|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5$<i>5<i>6]" |
| 714 | + "[(6-7)s:a|i:a|f/m:sieben|f/y:seven|x/o:siebtens|it/is:7|_6$<i>6<i>7]" |
| Akron | 5f04403 | 2015-12-18 00:35:38 +0100 | [diff] [blame] | 715 | + "[(7-8)s:b|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/tag$<b>64<i>7<i>10<i>10|_7$<i>7<i>8]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 716 | + "[(8-9)s:a|i:a|f/m:neun|f/y:nine|x/o:neuntens|it/is:9|_8$<i>8<i>9]" |
| 717 | + "[(9-10)s:c|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9$<i>9<i>10]"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 718 | return fd; |
| 719 | }; |
| 720 | |
| 721 | |
| 722 | private FieldDocument createSimpleFieldDoc3 () { |
| 723 | FieldDocument fd = new FieldDocument(); |
| 724 | fd.addString("corpusID", "c1"); |
| 725 | fd.addString("ID", "d3"); |
| 726 | fd.addTV( |
| 727 | "tokens", |
| 728 | "aa bb cc aa bb cc aa bb aa cc ", |
| Akron | 5f04403 | 2015-12-18 00:35:38 +0100 | [diff] [blame] | 729 | "[(0-2)s:aa|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|>:x/rel:a$<b>32<i>4<s>0<s>0<s>0|_0$<i>0<i>2|-:t$<i>10]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 730 | + "[(3-5)s:bb|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|_1$<i>3<i>5]" |
| Akron | 43cea66 | 2016-02-15 23:43:59 +0100 | [diff] [blame] | 731 | + "[(6-8)s:cc|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2$<i>6<i>8|<>:base/s:s$<b>64<i>6<i>14<i>5]" |
| Akron | 5f04403 | 2015-12-18 00:35:38 +0100 | [diff] [blame] | 732 | + "[(9-11)s:aa|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|<:x/rel:b$<b>40<i>1<s>0<s>0<s>0|_3$<i>9<i>11]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 733 | + "[(12-14)s:bb|i:b|f/m:fuenf|f/y:five|x/o:fünftens|it/is:5|_4$<i>12<i>14]" |
| 734 | + "[(15-17)s:cc|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5$<i>15<i>17]" |
| 735 | + "[(18-20)s:aa|i:a|f/m:sieben|f/y:seven|x/o:siebtens|it/is:7|_6$<i>18<i>20]" |
| Akron | 5f04403 | 2015-12-18 00:35:38 +0100 | [diff] [blame] | 736 | + "[(21-23)s:bb|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/tag$<b>64<i>7<i>10<i>10|_7$<i>21<i>23]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 737 | + "[(24-26)s:aa|i:a|f/m:neun|f/y:nine|x/o:neuntens|it/is:9|_8$<i>24<i>26]" |
| 738 | + "[(27-29)s:cc|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9$<i>27<i>29]"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 739 | return fd; |
| 740 | }; |
| 741 | |
| 742 | |
| 743 | private FieldDocument createSimpleFieldDoc4 () { |
| 744 | FieldDocument fd = new FieldDocument(); |
| 745 | fd.addString("corpusID", "c1"); |
| 746 | fd.addString("ID", "d4"); |
| 747 | fd.addTV( |
| 748 | "tokens", |
| 749 | "abcabcabac", |
| Akron | 5f04403 | 2015-12-18 00:35:38 +0100 | [diff] [blame] | 750 | "[(0-1)s:a|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|>:x/rel:a$<b>32<i>4<s>0<s>0<s>0|_0$<i>0<i>1|-:t$<i>10]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 751 | + "[(1-2)s:b|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|_1$<i>1<i>2]" |
| Akron | 43cea66 | 2016-02-15 23:43:59 +0100 | [diff] [blame] | 752 | + "[(2-3)s:c|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2$<i>2<i>3|<>:base/s:s$<b>64<i>2<i>5<i>5]" |
| Akron | 5f04403 | 2015-12-18 00:35:38 +0100 | [diff] [blame] | 753 | + "[(3-4)s:a|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|<:x/rel:b$<b>40<i>1<s>0<s>0<s>0|_3$<i>3<i>4]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 754 | + "[(4-5)s:b|i:b|f/m:fuenf|f/y:five|x/o:fünftens|it/is:5|_4$<i>4<i>5]" |
| Akron | 43cea66 | 2016-02-15 23:43:59 +0100 | [diff] [blame] | 755 | + "[(5-6)s:c|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5$<i>5<i>6|<>:base/s:s$<b>64<i>5<i>7<i>7]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 756 | + "[(6-7)s:a|i:a|f/m:sieben|f/y:seven|x/o:siebtens|it/is:7|_6$<i>6<i>7]" |
| Akron | 5f04403 | 2015-12-18 00:35:38 +0100 | [diff] [blame] | 757 | + "[(7-8)s:b|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/tag$<b>64<i>7<i>10<i>10|_7$<i>7<i>8]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 758 | + "[(8-9)s:a|i:a|f/m:neun|f/y:nine|x/o:neuntens|it/is:9|_8$<i>8<i>9]" |
| 759 | + "[(9-10)s:c|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9$<i>9<i>10]"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 760 | return fd; |
| Nils Diewald | 8493437 | 2014-05-20 13:48:18 +0000 | [diff] [blame] | 761 | }; |
| 762 | |
| Akron | 13db615 | 2016-02-19 14:08:38 +0100 | [diff] [blame] | 763 | |
| Akron | b35261a | 2016-02-10 20:24:24 +0100 | [diff] [blame] | 764 | /* |
| 765 | Check for terms|spans|rels ... |
| 766 | */ |
| 767 | private FieldDocument createAttributeFieldDoc () { |
| 768 | FieldDocument fd = new FieldDocument(); |
| 769 | fd.addString("corpusID", "ca1"); |
| 770 | fd.addString("ID", "da1"); |
| 771 | fd.addTV( |
| 772 | "tokens", |
| 773 | "abcabcabac", |
| 774 | "[(0-1)s:a|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|_0$<i>0<i>1|-:t$<i>10]" |
| 775 | + "[(1-2)s:b|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|_1$<i>1<i>2]" |
| Akron | 43cea66 | 2016-02-15 23:43:59 +0100 | [diff] [blame] | 776 | + "[(2-3)s:c|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2$<i>2<i>3|<>:base/s:s$<b>64<i>2<i>5<i>5]" |
| Akron | b35261a | 2016-02-10 20:24:24 +0100 | [diff] [blame] | 777 | + "[(3-4)s:a|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|_3$<i>3<i>4]" |
| 778 | + "[(4-5)s:b|i:b|f/m:fuenf|f/y:five|x/o:fünftens|it/is:5|_4$<i>4<i>5]" |
| 779 | + "[(5-6)s:c|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5$<i>5<i>6]" |
| 780 | + "[(6-7)s:a|i:a|f/m:sieben|f/y:seven|x/o:siebtens|it/is:7|_6$<i>6<i>7]" |
| 781 | + "[(7-8)s:b|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/s:tag$<b>64<i>7<i>10<i>10<b>0<s>1|@:x/s:key:value$<b>17<i>10<s>1|_7$<i>7<i>8]" |
| 782 | + "[(8-9)s:a|i:a|f/m:neun|f/y:nine|x/o:neuntens|it/is:9|_8$<i>8<i>9]" |
| 783 | + "[(9-10)s:c|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9$<i>9<i>10]"); |
| 784 | return fd; |
| 785 | }; |
| 786 | |
| Nils Diewald | 2cd1c3d | 2014-01-08 22:53:08 +0000 | [diff] [blame] | 787 | }; |