| Eliza Margaretha | 0192918 | 2014-02-19 11:48:59 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.index; |
| 2 | |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 3 | import static org.junit.Assert.assertEquals; |
| 4 | import static org.junit.Assert.assertNull; |
| 5 | import static org.junit.Assert.assertTrue; |
| Nils Diewald | 68bb1f7 | 2014-01-07 14:07:05 +0000 | [diff] [blame] | 6 | |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 7 | import java.io.IOException; |
| 8 | import java.util.ArrayList; |
| 9 | |
| Nils Diewald | 68bb1f7 | 2014-01-07 14:07:05 +0000 | [diff] [blame] | 10 | import org.junit.Test; |
| Nils Diewald | 68bb1f7 | 2014-01-07 14:07:05 +0000 | [diff] [blame] | 11 | import org.junit.runner.RunWith; |
| 12 | import org.junit.runners.JUnit4; |
| 13 | |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 14 | import com.fasterxml.jackson.databind.JsonNode; |
| 15 | import com.fasterxml.jackson.databind.ObjectMapper; |
| 16 | |
| 17 | import de.ids_mannheim.korap.Krill; |
| 18 | import de.ids_mannheim.korap.KrillIndex; |
| 19 | import de.ids_mannheim.korap.query.QueryBuilder; |
| 20 | import de.ids_mannheim.korap.response.Match; |
| 21 | import de.ids_mannheim.korap.response.Result; |
| Nils Diewald | ff0f874 | 2015-02-26 20:42:45 +0000 | [diff] [blame] | 22 | import de.ids_mannheim.korap.response.match.MatchIdentifier; |
| 23 | import de.ids_mannheim.korap.response.match.PosIdentifier; |
| Nils Diewald | 6e9eb4e | 2014-06-17 19:28:01 +0000 | [diff] [blame] | 24 | import de.ids_mannheim.korap.util.QueryException; |
| 25 | |
| Nils Diewald | 68bb1f7 | 2014-01-07 14:07:05 +0000 | [diff] [blame] | 26 | @RunWith(JUnit4.class) |
| 27 | public class TestMatchIdentifier { |
| 28 | |
| Akron | 7d45e6b | 2015-06-26 17:23:42 +0200 | [diff] [blame] | 29 | ObjectMapper mapper = new ObjectMapper(); |
| 30 | |
| Akron | 13db615 | 2016-02-19 14:08:38 +0100 | [diff] [blame] | 31 | |
| Nils Diewald | 68bb1f7 | 2014-01-07 14:07:05 +0000 | [diff] [blame] | 32 | @Test |
| Nils Diewald | 6e9eb4e | 2014-06-17 19:28:01 +0000 | [diff] [blame] | 33 | public void identifierExample1 () throws IOException, QueryException { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 34 | MatchIdentifier id = new MatchIdentifier("match-c1!d1-p4-20"); |
| 35 | assertEquals(id.getCorpusID(), "c1"); |
| 36 | assertEquals(id.getDocID(), "d1"); |
| 37 | assertEquals(id.getStartPos(), 4); |
| 38 | assertEquals(id.getEndPos(), 20); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 39 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 40 | assertEquals(id.toString(), "match-c1!d1-p4-20"); |
| 41 | id.addPos(10, 14, 2); |
| 42 | assertEquals(id.toString(), "match-c1!d1-p4-20(2)10-14"); |
| 43 | id.addPos(11, 12, 5); |
| 44 | assertEquals(id.toString(), "match-c1!d1-p4-20(2)10-14(5)11-12"); |
| 45 | // Ignore |
| 46 | id.addPos(11, 12, -8); |
| 47 | assertEquals(id.toString(), "match-c1!d1-p4-20(2)10-14(5)11-12"); |
| 48 | id.addPos(11, -12, 8); |
| 49 | assertEquals(id.toString(), "match-c1!d1-p4-20(2)10-14(5)11-12"); |
| 50 | id.addPos(-11, 12, 8); |
| 51 | assertEquals(id.toString(), "match-c1!d1-p4-20(2)10-14(5)11-12"); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 52 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 53 | id = new MatchIdentifier("matc-c1!d1-p4-20"); |
| 54 | assertNull(id.toString()); |
| 55 | id = new MatchIdentifier("match-d1-p4-20"); |
| 56 | assertNull(id.getCorpusID()); |
| 57 | assertEquals(id.getDocID(), "d1"); |
| 58 | id = new MatchIdentifier("match-p4-20"); |
| 59 | assertNull(id.toString()); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 60 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 61 | id = new MatchIdentifier("match-c1!d1-p4-20"); |
| 62 | assertEquals(id.toString(), "match-c1!d1-p4-20"); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 63 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 64 | id = new MatchIdentifier("match-c1!d1-p4-20(5)7-8"); |
| 65 | assertEquals(id.toString(), "match-c1!d1-p4-20(5)7-8"); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 66 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 67 | id = new MatchIdentifier("match-c1!d1-p4-20(5)7-8(-2)9-10"); |
| 68 | assertEquals(id.toString(), "match-c1!d1-p4-20(5)7-8"); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 69 | |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 70 | id = new MatchIdentifier( |
| 71 | "match-c1!d1-p4-20(5)7-8(-2)9-10(2)3-4(3)-5-6"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 72 | assertEquals(id.toString(), "match-c1!d1-p4-20(5)7-8(2)3-4"); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 73 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 74 | id = new MatchIdentifier( |
| 75 | "match-c1!d1-p4-20(5)7-8(-2)9-10(2)3-4(3)-5-6(4)7-8"); |
| 76 | assertEquals(id.toString(), "match-c1!d1-p4-20(5)7-8(2)3-4(4)7-8"); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 77 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 78 | id = new MatchIdentifier( |
| 79 | "match-c1!d1-p4-20(5)7-8(-2)9-10(2)3-4(3)-5-6(4)7-8(5)9--10"); |
| Akron | 1f61957 | 2015-07-08 17:33:47 +0200 | [diff] [blame] | 80 | assertEquals(4, id.getStartPos()); |
| 81 | assertEquals(20, id.getEndPos()); |
| 82 | assertEquals("c1", id.getCorpusID()); |
| 83 | assertEquals("d1", id.getDocID()); |
| 84 | assertEquals(null, id.getTextSigle()); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 85 | assertEquals(id.toString(), "match-c1!d1-p4-20(5)7-8(2)3-4(4)7-8"); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 86 | |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 87 | id = new MatchIdentifier("match-GOE!GOE_AGF.02286-p2105-2106"); |
| Akron | 1f61957 | 2015-07-08 17:33:47 +0200 | [diff] [blame] | 88 | assertEquals(2105, id.getStartPos()); |
| 89 | assertEquals(2106, id.getEndPos()); |
| 90 | assertEquals(null, id.getCorpusID()); |
| 91 | assertEquals(null, id.getDocID()); |
| 92 | assertEquals("GOE_AGF.02286", id.getTextSigle()); |
| 93 | assertEquals("match-GOE_AGF.02286-p2105-2106", id.toString()); |
| Akron | fc2625e | 2016-07-27 01:52:28 +0200 | [diff] [blame] | 94 | |
| 95 | id = new MatchIdentifier("match-corpus-1/doc-1/text-1/p2105-2106"); |
| 96 | assertEquals("match-corpus-1/doc-1/text-1-p2105-2106", id.toString()); |
| 97 | assertEquals("corpus-1/doc-1/text-1", id.getTextSigle()); |
| Akron | 1f61957 | 2015-07-08 17:33:47 +0200 | [diff] [blame] | 98 | }; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 99 | |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 100 | |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 101 | @Test |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 102 | public void posIdentifierExample1 () throws IOException { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 103 | PosIdentifier id = new PosIdentifier(); |
| 104 | id.setCorpusID("c1"); |
| 105 | id.setDocID("d1"); |
| 106 | id.setPos(8); |
| 107 | assertEquals(id.getCorpusID(), "c1"); |
| 108 | assertEquals(id.getDocID(), "d1"); |
| 109 | assertEquals(id.getPos(), 8); |
| 110 | assertEquals(id.toString(), "word-c1!d1-p8"); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 111 | }; |
| 112 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 113 | |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 114 | @Test |
| Nils Diewald | 68bb1f7 | 2014-01-07 14:07:05 +0000 | [diff] [blame] | 115 | public void indexExample1 () throws IOException { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 116 | KrillIndex ki = new KrillIndex(); |
| 117 | ki.addDoc(createSimpleFieldDoc()); |
| 118 | ki.commit(); |
| Nils Diewald | 68bb1f7 | 2014-01-07 14:07:05 +0000 | [diff] [blame] | 119 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 120 | QueryBuilder kq = new QueryBuilder("tokens"); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 121 | Krill ks = new Krill( |
| 122 | kq._(2, kq.seq(kq.seg("s:b")).append(kq._(kq.seg("s:a"))))); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 123 | Result kr = ki.search(ks); |
| Nils Diewald | 68bb1f7 | 2014-01-07 14:07:05 +0000 | [diff] [blame] | 124 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 125 | assertEquals("totalResults", kr.getTotalResults(), 1); |
| 126 | assertEquals("StartPos (0)", kr.getMatch(0).startPos, 7); |
| 127 | assertEquals("EndPos (0)", kr.getMatch(0).endPos, 9); |
| Nils Diewald | 68bb1f7 | 2014-01-07 14:07:05 +0000 | [diff] [blame] | 128 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 129 | Match km = kr.getMatch(0); |
| Nils Diewald | 68bb1f7 | 2014-01-07 14:07:05 +0000 | [diff] [blame] | 130 | |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 131 | assertEquals("SnippetBrackets (0)", "... bcabca[[{2:b{1:a}}]]c", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 132 | km.getSnippetBrackets()); |
| 133 | assertEquals("ID (0)", "match-c1!d1-p7-9(2)7-8(1)8-8", km.getID()); |
| Nils Diewald | 68bb1f7 | 2014-01-07 14:07:05 +0000 | [diff] [blame] | 134 | }; |
| Nils Diewald | bfe554b | 2014-01-09 19:35:05 +0000 | [diff] [blame] | 135 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 136 | |
| Nils Diewald | bfe554b | 2014-01-09 19:35:05 +0000 | [diff] [blame] | 137 | @Test |
| Nils Diewald | 6e9eb4e | 2014-06-17 19:28:01 +0000 | [diff] [blame] | 138 | public void indexExample2 () throws IOException, QueryException { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 139 | KrillIndex ki = new KrillIndex(); |
| 140 | ki.addDoc(createSimpleFieldDoc()); |
| 141 | ki.commit(); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 142 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 143 | Match km = ki.getMatch("match-c1!d1-p7-9(0)8-8(2)7-8"); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 144 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 145 | assertEquals("StartPos (0)", 7, km.getStartPos()); |
| 146 | assertEquals("EndPos (0)", 9, km.getEndPos()); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 147 | |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 148 | assertEquals("SnippetBrackets (0)", "... [[{2:b{a}}]] ...", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 149 | km.getSnippetBrackets()); |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 150 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 151 | assertEquals("ID (0)", "match-c1!d1-p7-9(0)8-8(2)7-8", km.getID()); |
| Nils Diewald | 22fc3ad | 2014-01-16 19:28:31 +0000 | [diff] [blame] | 152 | |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 153 | km = ki.getMatchInfo("match-c1!d1-p7-9(0)8-8(2)7-8", "tokens", "f", "m", |
| 154 | false, false); |
| Nils Diewald | 22fc3ad | 2014-01-16 19:28:31 +0000 | [diff] [blame] | 155 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 156 | assertEquals("SnippetBrackets (1)", |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 157 | "... [[{f/m:acht:b}{f/m:neun:a}]] ...", |
| 158 | km.getSnippetBrackets()); |
| Nils Diewald | 22fc3ad | 2014-01-16 19:28:31 +0000 | [diff] [blame] | 159 | |
| Nils Diewald | d0d6feb | 2014-02-26 18:51:08 +0000 | [diff] [blame] | 160 | |
| Akron | 7d45e6b | 2015-06-26 17:23:42 +0200 | [diff] [blame] | 161 | km = ki.getMatchInfo("match-c1!d1-p7-9(0)8-8(2)7-8", "tokens", "f", |
| 162 | null, false, false); |
| Akron | 48937e9 | 2015-06-26 01:49:02 +0200 | [diff] [blame] | 163 | assertEquals("SnippetBrackets (1b)", |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 164 | "... [[{f/m:acht:{f/y:eight:b}}{f/m:neun:{f/y:nine:a}}]] ...", |
| Akron | 48937e9 | 2015-06-26 01:49:02 +0200 | [diff] [blame] | 165 | km.getSnippetBrackets()); |
| Nils Diewald | 22fc3ad | 2014-01-16 19:28:31 +0000 | [diff] [blame] | 166 | |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 167 | km = ki.getMatchInfo("match-c1!d1-p7-9(0)8-8(2)7-8", "tokens", "f", "m", |
| 168 | false, true); |
| Nils Diewald | 22fc3ad | 2014-01-16 19:28:31 +0000 | [diff] [blame] | 169 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 170 | assertEquals("SnippetBrackets (2)", |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 171 | "... [[{2:{f/m:acht:b}{{f/m:neun:a}}}]] ...", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 172 | km.getSnippetBrackets()); |
| Nils Diewald | 22fc3ad | 2014-01-16 19:28:31 +0000 | [diff] [blame] | 173 | |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 174 | km = ki.getMatchInfo("match-c1!d1-p7-9(4)8-8(2)7-8", "tokens", "f", "m", |
| 175 | false, true); |
| Nils Diewald | 22fc3ad | 2014-01-16 19:28:31 +0000 | [diff] [blame] | 176 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 177 | assertEquals("SnippetBrackets (3)", |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 178 | "... [[{2:{f/m:acht:b}{4:{f/m:neun:a}}}]] ...", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 179 | km.getSnippetBrackets()); |
| Nils Diewald | 22fc3ad | 2014-01-16 19:28:31 +0000 | [diff] [blame] | 180 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 181 | km = ki.getMatchInfo("match-c1!d1-p7-9(4)8-8(2)7-8", "tokens", "f", |
| 182 | null, false, true); |
| Nils Diewald | 22fc3ad | 2014-01-16 19:28:31 +0000 | [diff] [blame] | 183 | |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 184 | assertEquals("SnippetBrackets (4)", |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 185 | "... [[{2:{f/m:acht:{f/y:eight:b}}{4:{f/m:neun:{f/y:nine:a}}}}]] ...", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 186 | km.getSnippetBrackets()); |
| 187 | |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 188 | assertEquals("SnippetHTML (4)", |
| 189 | "<span class=\"context-left\">" + "<span class=\"more\">" |
| 190 | + "</span>" + "</span>" + "<span class=\"match\">" |
| 191 | + "<mark>" + "<mark class=\"class-2 level-0\">" |
| 192 | + "<span title=\"f/m:acht\">" |
| 193 | + "<span title=\"f/y:eight\">" + "b" + "</span>" |
| 194 | + "</span>" + "<mark class=\"class-4 level-1\">" |
| 195 | + "<span title=\"f/m:neun\">" |
| 196 | + "<span title=\"f/y:nine\">" + "a" + "</span>" |
| 197 | + "</span>" + "</mark>" + "</mark>" + "</mark>" |
| 198 | + "</span>" + "<span class=\"context-right\">" |
| 199 | + "<span class=\"more\">" + "</span>" + "</span>", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 200 | km.getSnippetHTML()); |
| Akron | 48937e9 | 2015-06-26 01:49:02 +0200 | [diff] [blame] | 201 | |
| Akron | 48937e9 | 2015-06-26 01:49:02 +0200 | [diff] [blame] | 202 | JsonNode res = mapper.readTree(km.toJsonString()); |
| 203 | assertEquals("tokens", res.at("/field").asText()); |
| 204 | assertTrue(res.at("/startMore").asBoolean()); |
| 205 | assertTrue(res.at("/endMore").asBoolean()); |
| 206 | assertEquals("c1", res.at("/corpusID").asText()); |
| 207 | assertEquals("d1", res.at("/docID").asText()); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 208 | assertEquals("match-c1!d1-p7-9(4)8-8(2)7-8", |
| 209 | res.at("/matchID").asText()); |
| Akron | 48937e9 | 2015-06-26 01:49:02 +0200 | [diff] [blame] | 210 | assertTrue(res.at("/pubDate").isMissingNode()); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 211 | }; |
| Nils Diewald | 22fc3ad | 2014-01-16 19:28:31 +0000 | [diff] [blame] | 212 | |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 213 | |
| 214 | @Test |
| Nils Diewald | 6e9eb4e | 2014-06-17 19:28:01 +0000 | [diff] [blame] | 215 | public void indexExample3 () throws IOException, QueryException { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 216 | KrillIndex ki = new KrillIndex(); |
| 217 | ki.addDoc(createSimpleFieldDoc()); |
| 218 | ki.commit(); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 219 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 220 | Match km = ki.getMatchInfo("match-c1!d1-p7-9(4)8-8(2)7-8", "tokens", |
| 221 | null, null, false, true); |
| Nils Diewald | 22fc3ad | 2014-01-16 19:28:31 +0000 | [diff] [blame] | 222 | |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 223 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 224 | assertEquals("SnippetHTML (1)", "<span class=\"context-left\">" |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 225 | + "<span class=\"more\">" + "</span>" + "</span>" |
| 226 | + "<span class=\"match\">" + "<mark>" |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 227 | + "<mark class=\"class-2 level-0\">" |
| 228 | + "<span title=\"f/m:acht\">" + "<span title=\"f/y:eight\">" |
| 229 | + "<span title=\"it/is:8\">" + "<span title=\"x/o:achtens\">" |
| 230 | + "b" + "</span>" + "</span>" + "</span>" + "</span>" |
| 231 | + "<mark class=\"class-4 level-1\">" |
| 232 | + "<span title=\"f/m:neun\">" + "<span title=\"f/y:nine\">" |
| 233 | + "<span title=\"it/is:9\">" + "<span title=\"x/o:neuntens\">" |
| 234 | + "a" + "</span>" + "</span>" + "</span>" + "</span>" |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 235 | + "</mark>" + "</mark>" + "</mark>" + "</span>" |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 236 | + "<span class=\"context-right\">" + "<span class=\"more\">" |
| 237 | + "</span>" + "</span>", km.getSnippetHTML()); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 238 | }; |
| 239 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 240 | |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 241 | @Test |
| Nils Diewald | 6e9eb4e | 2014-06-17 19:28:01 +0000 | [diff] [blame] | 242 | public void indexExample4 () throws IOException, QueryException { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 243 | KrillIndex ki = new KrillIndex(); |
| 244 | ki.addDoc(createSimpleFieldDoc()); |
| 245 | ki.commit(); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 246 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 247 | Match km = ki.getMatchInfo("match-c1!d1-p7-9(4)8-8(2)7-8", "tokens", |
| 248 | null, null, false, false); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 249 | |
| 250 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 251 | assertEquals("SnippetHTML (1)", "<span class=\"context-left\">" |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 252 | + "<span class=\"more\">" + "</span>" + "</span>" |
| 253 | + "<span class=\"match\">" + "<mark>" |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 254 | + "<span title=\"f/m:acht\">" + "<span title=\"f/y:eight\">" |
| 255 | + "<span title=\"it/is:8\">" + "<span title=\"x/o:achtens\">" |
| 256 | + "b" + "</span>" + "</span>" + "</span>" + "</span>" |
| 257 | + "<span title=\"f/m:neun\">" + "<span title=\"f/y:nine\">" |
| 258 | + "<span title=\"it/is:9\">" + "<span title=\"x/o:neuntens\">" |
| 259 | + "a" + "</span>" + "</span>" + "</span>" + "</span>" |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 260 | + "</mark>" + "</span>" + "<span class=\"context-right\">" |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 261 | + "<span class=\"more\">" + "</span>" + "</span>", |
| 262 | km.getSnippetHTML()); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 263 | }; |
| 264 | |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 265 | |
| Akron | 1f61957 | 2015-07-08 17:33:47 +0200 | [diff] [blame] | 266 | @Test |
| 267 | public void indexNewStructure () throws IOException, QueryException { |
| 268 | KrillIndex ki = new KrillIndex(); |
| 269 | ki.addDoc(getClass().getResourceAsStream("/goe/AGX-00002.json"), false); |
| 270 | ki.commit(); |
| 271 | |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 272 | Match km = ki.getMatchInfo("match-GOE!GOE_AGX.00002-p210-211", "tokens", |
| 273 | true, (String) null, (String) null, true, true, true); |
| Akron | 1f61957 | 2015-07-08 17:33:47 +0200 | [diff] [blame] | 274 | |
| 275 | JsonNode res = mapper.readTree(km.toJsonString()); |
| 276 | assertEquals("tokens", res.at("/field").asText()); |
| 277 | assertEquals("GOE_AGX.00002", res.at("/textSigle").asText()); |
| 278 | assertEquals("Goethe, Johann Wolfgang von", res.at("/author").asText()); |
| Akron | 1ad9988 | 2016-09-29 18:48:46 +0200 | [diff] [blame] | 279 | |
| 280 | /* |
| 281 | km = ki.getMatchInfo( |
| Akron | f9def5e | 2016-10-10 21:26:46 +0200 | [diff] [blame] | 282 | "match-GOE!GOE_AGX.00002-p10-20", |
| 283 | "tokens", true, (String) null, (String) null, true, true, false); |
| Akron | 1ad9988 | 2016-09-29 18:48:46 +0200 | [diff] [blame] | 284 | assertEquals("", km.toJsonString()); |
| Akron | f9def5e | 2016-10-10 21:26:46 +0200 | [diff] [blame] | 285 | */ |
| Akron | 1f61957 | 2015-07-08 17:33:47 +0200 | [diff] [blame] | 286 | }; |
| 287 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 288 | |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 289 | @Test |
| Nils Diewald | 6e9eb4e | 2014-06-17 19:28:01 +0000 | [diff] [blame] | 290 | public void indexExample5Spans () throws IOException, QueryException { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 291 | KrillIndex ki = new KrillIndex(); |
| 292 | ki.addDoc(createSimpleFieldDoc()); |
| 293 | ki.commit(); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 294 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 295 | Match km = ki.getMatchInfo("match-c1!d1-p7-9(4)8-8(2)7-8", "tokens", |
| 296 | null, null, true, false); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 297 | |
| 298 | |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 299 | assertEquals("SnippetBrackets (1)", |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 300 | "... [[{f/m:acht:{f/y:eight:{it/is:8:{x/o:achtens:b}}}}{f/m:neun:{f/y:nine:{it/is:9:{x/o:neuntens:a}}}}]] ...", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 301 | km.getSnippetBrackets()); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 302 | }; |
| 303 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 304 | |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 305 | @Test |
| Nils Diewald | 6e9eb4e | 2014-06-17 19:28:01 +0000 | [diff] [blame] | 306 | public void indexExample6Spans () throws IOException, QueryException { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 307 | KrillIndex ki = new KrillIndex(); |
| 308 | ki.addDoc(createSimpleFieldDoc()); |
| 309 | ki.commit(); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 310 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 311 | Match km = ki.getMatchInfo("match-c1!d1-p7-10(4)8-8(2)7-8", "tokens", |
| 312 | null, null, true, false); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 313 | |
| 314 | |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 315 | assertEquals("SnippetBrackets (1)", |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 316 | "... [[{x/tag:{f/m:acht:{f/y:eight:{it/is:8:{x/o:achtens:b}}}}{f/m:neun:{f/y:nine:{it/is:9:{x/o:neuntens:a}}}}{f/m:zehn:{f/y:ten:{it/is:10:{x/o:zehntens:c}}}}}]]", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 317 | km.getSnippetBrackets()); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 318 | }; |
| 319 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 320 | |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 321 | @Test |
| Nils Diewald | 6e9eb4e | 2014-06-17 19:28:01 +0000 | [diff] [blame] | 322 | public void indexExample7Spans () throws IOException, QueryException { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 323 | KrillIndex ki = new KrillIndex(); |
| 324 | ki.addDoc(createSimpleFieldDoc()); |
| 325 | ki.commit(); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 326 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 327 | Match km = ki.getMatchInfo("match-c1!d1-p7-10(4)8-8(2)7-8", "tokens", |
| 328 | null, null, true, true); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 329 | |
| 330 | |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 331 | assertEquals("SnippetBrackets (1)", |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 332 | "... [[{x/tag:{2:{f/m:acht:{f/y:eight:{it/is:8:{x/o:achtens:b}}}}{4:{f/m:neun:{f/y:nine:{it/is:9:{x/o:neuntens:a}}}}}}{f/m:zehn:{f/y:ten:{it/is:10:{x/o:zehntens:c}}}}}]]", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 333 | km.getSnippetBrackets()); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 334 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 335 | assertEquals("SnippetHTML (1)", "<span class=\"context-left\">" |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 336 | + "<span class=\"more\">" + "</span>" + "</span>" |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 337 | + "<span class=\"match\">" + "<mark>" + "<span title=\"x/tag\">" |
| 338 | + "<mark class=\"class-2 level-0\">" |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 339 | + "<span title=\"f/m:acht\">" + "<span title=\"f/y:eight\">" |
| 340 | + "<span title=\"it/is:8\">" + "<span title=\"x/o:achtens\">" |
| 341 | + "b" + "</span>" + "</span>" + "</span>" + "</span>" |
| 342 | + "<mark class=\"class-4 level-1\">" |
| 343 | + "<span title=\"f/m:neun\">" + "<span title=\"f/y:nine\">" |
| 344 | + "<span title=\"it/is:9\">" + "<span title=\"x/o:neuntens\">" |
| 345 | + "a" + "</span>" + "</span>" + "</span>" + "</span>" |
| 346 | + "</mark>" + "</mark>" + "<span title=\"f/m:zehn\">" |
| 347 | + "<span title=\"f/y:ten\">" + "<span title=\"it/is:10\">" |
| 348 | + "<span title=\"x/o:zehntens\">" + "c" + "</span>" + "</span>" |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 349 | + "</span>" + "</span>" + "</span>" + "</mark>" + "</span>" |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 350 | + "<span class=\"context-right\">" + "</span>", |
| 351 | km.getSnippetHTML()); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 352 | }; |
| 353 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 354 | |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 355 | @Test |
| Nils Diewald | 6e9eb4e | 2014-06-17 19:28:01 +0000 | [diff] [blame] | 356 | public void indexExample6Relations () throws IOException, QueryException { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 357 | KrillIndex ki = new KrillIndex(); |
| 358 | ki.addDoc(createSimpleFieldDoc()); |
| 359 | ki.commit(); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 360 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 361 | Match km = ki.getMatchInfo("match-c1!d1-p0-5(4)8-8(2)7-8", "tokens", |
| 362 | "x", null, true, false); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 363 | |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 364 | assertEquals("SnippetBrackets (1)", |
| Akron | 417eaa9 | 2017-01-13 18:00:15 +0100 | [diff] [blame] | 365 | "[[{x/o:erstens:{x/rel:a>3:a}}{x/o:zweitens:b}{x/o:drittens:c}{#3:{x/o:viertens:a}}{x/o:fünftens:b}]] ...", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 366 | km.getSnippetBrackets()); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 367 | |
| Akron | 417eaa9 | 2017-01-13 18:00:15 +0100 | [diff] [blame] | 368 | assertEquals("SnippetHTML (1)", "<span class=\"context-left\">" |
| 369 | + "</span>" + "<span class=\"match\">" |
| 370 | + "<mark>" |
| 371 | + "<span title=\"x/o:erstens\">" |
| 372 | + "<span xlink:title=\"x/rel:a\" xlink:type=\"simple\" " |
| 373 | + "xlink:href=\"#word-c1!d1-p3\">" |
| 374 | + "a" + "</span>" |
| 375 | + "</span>" |
| 376 | + "<span title=\"x/o:zweitens\">" + "b" + "</span>" |
| 377 | + "<span title=\"x/o:drittens\">" + "c" + "</span>" |
| 378 | + "<span xml:id=\"word-c1!d1-p3\">" |
| 379 | + "<span title=\"x/o:viertens\">" + "a" + "</span>" |
| 380 | + "</span>" |
| 381 | + "<span title=\"x/o:fünftens\">" + "b" + "</span>" |
| 382 | + "</mark>" |
| 383 | + "</span>" |
| 384 | + "<span class=\"context-right\">" |
| 385 | + "<span class=\"more\">" |
| 386 | + "</span>" |
| 387 | + "</span>", |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 388 | km.getSnippetHTML()); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 389 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 390 | km = ki.getMatchInfo("match-c1!d1-p0-5(7)2-3(4)8-8(2)7-8", "tokens", |
| 391 | "x", null, true, true); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 392 | |
| Akron | 417eaa9 | 2017-01-13 18:00:15 +0100 | [diff] [blame] | 393 | assertEquals("SnippetHTML (2)", |
| 394 | "<span class=\"context-left\">" |
| 395 | + "</span>" + "<span class=\"match\">"+"<mark>" |
| 396 | +"<span title=\"x/o:erstens\">" |
| 397 | +"<span xlink:title=\"x/rel:a\" " + "xlink:type=\"simple\" " |
| 398 | +"xlink:href=\"#word-c1!d1-p3\">a</span>" |
| 399 | +"</span>" |
| 400 | +"<span title=\"x/o:zweitens\">b</span>" |
| 401 | +"<mark class=\"class-7 level-0\">" |
| 402 | +"<span title=\"x/o:drittens\">c</span>" |
| 403 | +"<span xml:id=\"word-c1!d1-p3\">" |
| 404 | +"<span title=\"x/o:viertens\">a</span>" |
| 405 | +"</span>" |
| 406 | +"</mark>" |
| 407 | +"<span title=\"x/o:fünftens\">b</span>" |
| 408 | +"</mark>" |
| 409 | +"</span>" |
| 410 | +"<span class=\"context-right\">" |
| 411 | +"<span class=\"more\"></span>" |
| 412 | +"</span>", |
| 413 | km.getSnippetHTML()); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 414 | }; |
| 415 | |
| 416 | |
| Nils Diewald | a111803 | 2014-02-13 20:50:48 +0000 | [diff] [blame] | 417 | @Test |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 418 | public void indexExample7SentenceExpansion () |
| 419 | throws IOException, QueryException { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 420 | KrillIndex ki = new KrillIndex(); |
| 421 | ki.addDoc(createSimpleFieldDoc()); |
| 422 | ki.addDoc(createSimpleFieldDoc2()); |
| 423 | ki.addDoc(createSimpleFieldDoc3()); |
| 424 | ki.addDoc(createSimpleFieldDoc4()); |
| 425 | ki.commit(); |
| 426 | Match km; |
| Nils Diewald | a111803 | 2014-02-13 20:50:48 +0000 | [diff] [blame] | 427 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 428 | km = ki.getMatchInfo("match-c1!d1-p3-4", "tokens", null, null, false, |
| 429 | false); |
| Nils Diewald | a111803 | 2014-02-13 20:50:48 +0000 | [diff] [blame] | 430 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 431 | assertEquals( |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 432 | "... [[{f/m:vier:{f/y:four:{it/is:4:{x/o:viertens:a}}}}]] ...", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 433 | km.getSnippetBrackets()); |
| Nils Diewald | a111803 | 2014-02-13 20:50:48 +0000 | [diff] [blame] | 434 | |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 435 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 436 | km = ki.getMatchInfo("match-c1!d1-p3-4", "tokens", null, null, false, |
| 437 | false, true); // extendToSentence |
| Nils Diewald | a111803 | 2014-02-13 20:50:48 +0000 | [diff] [blame] | 438 | |
| Akron | cb1093a | 2016-07-28 16:27:59 +0200 | [diff] [blame] | 439 | // This will |
| 440 | // [{f/m:drei:{f/y:three:{it/is:3:{x/o:drittens:c}}}}{f/m:vier:{f/y:four:{it/is:4:{x/o:viertens:a}}}}{f/m:fuenf:{f/y:five:{it/is:5:{x/o:fünftens:b}}}}] |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 441 | assertEquals( |
| Akron | cb1093a | 2016-07-28 16:27:59 +0200 | [diff] [blame] | 442 | "[{f/m:drei:{f/y:three:{it/is:3:{x/o:drittens:c}}}}[{f/m:vier:{f/y:four:{it/is:4:{x/o:viertens:a}}}}]{f/m:fuenf:{f/y:five:{it/is:5:{x/o:fünftens:b}}}}]", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 443 | km.getSnippetBrackets()); |
| Akron | cb1093a | 2016-07-28 16:27:59 +0200 | [diff] [blame] | 444 | |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 445 | assertEquals("<span class=\"context-left\"></span>" |
| 446 | + "<span class=\"match\">" + "<span title=\"f/m:drei\">" |
| 447 | + "<span title=\"f/y:three\">" + "<span title=\"it/is:3\">" |
| 448 | + "<span title=\"x/o:drittens\">c</span>" + "</span>" |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 449 | + "</span>" + "</span>" + "<mark>" + "<span title=\"f/m:vier\">" |
| 450 | + "<span title=\"f/y:four\">" + "<span title=\"it/is:4\">" |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 451 | + "<span title=\"x/o:viertens\">a</span>" + "</span>" |
| 452 | + "</span>" + "</span>" + "</mark>" |
| 453 | + "<span title=\"f/m:fuenf\">" + "<span title=\"f/y:five\">" |
| 454 | + "<span title=\"it/is:5\">" |
| 455 | + "<span title=\"x/o:fünftens\">b</span>" + "</span>" |
| 456 | + "</span>" + "</span>" + "</span>" |
| 457 | + "<span class=\"context-right\"></span>", km.getSnippetHTML()); |
| Akron | cb1093a | 2016-07-28 16:27:59 +0200 | [diff] [blame] | 458 | |
| 459 | |
| 460 | |
| Akron | 43cea66 | 2016-02-15 23:43:59 +0100 | [diff] [blame] | 461 | /* |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 462 | km = ki.getMatchInfo("match-c1!d3-p3-4", "tokens", null, null, false, |
| 463 | false, true); // extendToSentence |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 464 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 465 | assertEquals( |
| 466 | "[{f/m:drei:{f/y:three:{it/is:3:{x/o:drittens:cc}}}} {f/m:vier:{f/y:four:{it/is:4:{x/o:viertens:aa}}}} {f/m:fuenf:{f/y:five:{it/is:5:{x/o:fünftens:bb}}}}]", |
| 467 | km.getSnippetBrackets()); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 468 | |
| 469 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 470 | km = ki.getMatchInfo("match-c1!d4-p4-6", "tokens", null, null, false, |
| 471 | false, true); // extendToSentence |
| 472 | assertEquals( |
| 473 | "[{f/m:drei:{f/y:three:{it/is:3:{x/o:drittens:c}}}}{f/m:vier:{f/y:four:{it/is:4:{x/o:viertens:a}}}}{f/m:fuenf:{f/y:five:{it/is:5:{x/o:fünftens:b}}}}{f/m:sechs:{f/y:six:{it/is:6:{x/o:sechstens:c}}}}{f/m:sieben:{f/y:seven:{it/is:7:{x/o:siebtens:a}}}}]", |
| 474 | km.getSnippetBrackets()); |
| Akron | 43cea66 | 2016-02-15 23:43:59 +0100 | [diff] [blame] | 475 | */ |
| Nils Diewald | a111803 | 2014-02-13 20:50:48 +0000 | [diff] [blame] | 476 | }; |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 477 | |
| Nils Diewald | 6e9eb4e | 2014-06-17 19:28:01 +0000 | [diff] [blame] | 478 | |
| 479 | @Test |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 480 | public void indexExample7Dependencies () |
| 481 | throws IOException, QueryException { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 482 | KrillIndex ki = new KrillIndex(); |
| 483 | ki.addDoc(createSimpleFieldDoc2()); |
| 484 | ki.commit(); |
| Nils Diewald | 6e9eb4e | 2014-06-17 19:28:01 +0000 | [diff] [blame] | 485 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 486 | Match km = ki.getMatchInfo("match-c1!d1-p0-4", "tokens", null, null, |
| Akron | 417eaa9 | 2017-01-13 18:00:15 +0100 | [diff] [blame] | 487 | true, true); |
| Nils Diewald | 6e9eb4e | 2014-06-17 19:28:01 +0000 | [diff] [blame] | 488 | |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 489 | assertEquals("SnippetHTML (2)", "<span class=\"context-left\">" |
| Akron | 417eaa9 | 2017-01-13 18:00:15 +0100 | [diff] [blame] | 490 | + "</span>" + "<span class=\"match\">" + "<mark>" |
| 491 | + "<span title=\"f/m:eins\">" + "<span title=\"f/y:one\">" |
| 492 | + "<span title=\"it/is:1\">" |
| 493 | + "<span title=\"x/o:erstens\">" |
| 494 | + "<span xlink:title=\"x/rel:a\" xlink:type=\"simple\" xlink:href=\"#word-c1!d1-p3\">" |
| 495 | + "a</span>" + "</span>" + "</span>" |
| 496 | + "</span>" + "</span>" |
| 497 | + "<span title=\"f/m:zwei\">" + "<span title=\"f/y:two\">" |
| 498 | + "<span title=\"it/is:2\">" |
| 499 | + "<span title=\"x/o:zweitens\">" |
| 500 | + "<span xlink:title=\"x/rel:b\" xlink:type=\"simple\" xlink:href=\"#word-c1!d1-p3\">" |
| 501 | + "b</span>" + "</span>" |
| 502 | + "</span>" + "</span>" + "</span>" |
| 503 | + "<span title=\"f/m:drei\">" + "<span title=\"f/y:three\">" |
| 504 | + "<span title=\"it/is:3\">" |
| 505 | + "<span title=\"x/o:drittens\">c</span>" + "</span>" |
| 506 | + "</span>" + "</span>" + "<span xml:id=\"word-c1!d1-p3\">" |
| 507 | + "<span title=\"f/m:vier\">" + "<span title=\"f/y:four\">" |
| 508 | + "<span title=\"it/is:4\">" |
| 509 | + "<span title=\"x/o:viertens\">a</span>" + "</span>" |
| 510 | + "</span>" + "</span>" + "</span>" + "</mark>" + "</span>" |
| 511 | + "<span class=\"context-right\">" + "<span class=\"more\">" |
| 512 | + "</span>" + "</span>", km.getSnippetHTML()); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 513 | }; |
| Nils Diewald | 50389b0 | 2014-04-11 16:27:52 +0000 | [diff] [blame] | 514 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 515 | |
| 516 | @Test |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 517 | public void indexExampleMultipleFoundries () |
| 518 | throws IOException, QueryException { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 519 | KrillIndex ki = new KrillIndex(); |
| 520 | ki.addDoc(createSimpleFieldDoc4()); |
| 521 | ki.commit(); |
| 522 | |
| 523 | Match km = ki.getMatchInfo("match-c1!d4-p3-9", "tokens", "f", "m", |
| 524 | false, false); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 525 | assertEquals("f:m info", km.getSnippetBrackets(), |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 526 | "... [[{f/m:vier:a}{f/m:fuenf:b}{f/m:sechs:c}{f/m:sieben:a}{f/m:acht:b}{f/m:neun:a}]] ..."); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 527 | |
| 528 | km = ki.getMatchInfo("match-c1!d4-p3-9", "tokens", "f", null, false, |
| 529 | false); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 530 | assertEquals("f info", km.getSnippetBrackets(), |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 531 | "... [[{f/m:vier:{f/y:four:a}}{f/m:fuenf:{f/y:five:b}}{f/m:sechs:{f/y:six:c}}{f/m:sieben:{f/y:seven:a}}{f/m:acht:{f/y:eight:b}}{f/m:neun:{f/y:nine:a}}]] ..."); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 532 | |
| 533 | |
| 534 | km = ki.getMatchInfo("match-c1!d4-p3-4", "tokens", null, null, false, |
| 535 | false); |
| 536 | assertEquals("all info", km.getSnippetBrackets(), |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 537 | "... [[{f/m:vier:{f/y:four:{it/is:4:{x/o:viertens:a}}}}]] ..."); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 538 | |
| 539 | ArrayList<String> foundryList = new ArrayList<>(2); |
| 540 | foundryList.add("f"); |
| 541 | foundryList.add("x"); |
| 542 | |
| 543 | km = ki.getMatchInfo("match-c1!d4-p3-4", "tokens", true, foundryList, |
| 544 | (ArrayList<String>) null, false, false, false); |
| 545 | assertEquals("f|x info", km.getSnippetBrackets(), |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 546 | "... [[{f/m:vier:{f/y:four:{x/o:viertens:a}}}]] ..."); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 547 | |
| 548 | foundryList.clear(); |
| 549 | foundryList.add("y"); |
| 550 | foundryList.add("x"); |
| 551 | |
| 552 | km = ki.getMatchInfo("match-c1!d4-p3-4", "tokens", true, foundryList, |
| 553 | (ArrayList<String>) null, false, false, false); |
| 554 | assertEquals("y|x info", km.getSnippetBrackets(), |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 555 | "... [[{x/o:viertens:a}]] ..."); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 556 | |
| 557 | |
| 558 | foundryList.clear(); |
| 559 | foundryList.add("f"); |
| 560 | foundryList.add("it"); |
| 561 | |
| 562 | ArrayList<String> layerList = new ArrayList<>(2); |
| 563 | layerList.add("is"); |
| 564 | |
| 565 | km = ki.getMatchInfo("match-c1!d4-p3-4", "tokens", true, foundryList, |
| 566 | layerList, false, false, false); |
| 567 | assertEquals("f|it/is", km.getSnippetBrackets(), |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 568 | "... [[{it/is:4:a}]] ..."); |
| Nils Diewald | 50389b0 | 2014-04-11 16:27:52 +0000 | [diff] [blame] | 569 | }; |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 570 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 571 | |
| Akron | 7d45e6b | 2015-06-26 17:23:42 +0200 | [diff] [blame] | 572 | @Test |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 573 | public void indexExampleFailingFoundry () |
| 574 | throws IOException, QueryException { |
| Akron | 7d45e6b | 2015-06-26 17:23:42 +0200 | [diff] [blame] | 575 | KrillIndex ki = new KrillIndex(); |
| 576 | ki.addDoc(createSimpleFieldDoc4()); |
| 577 | ki.commit(); |
| 578 | |
| 579 | Match km = ki.getMatchInfo("match-c1!d4-p3-9", "tokens", "*", "m", |
| 580 | false, false); |
| 581 | JsonNode res = mapper.readTree(km.toJsonString()); |
| 582 | assertEquals("c1", res.at("/corpusID").asText()); |
| 583 | assertEquals("d4", res.at("/docID").asText()); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 584 | assertEquals("Invalid foundry requested", |
| 585 | res.at("/errors/0/1").asText()); |
| Akron | 7d45e6b | 2015-06-26 17:23:42 +0200 | [diff] [blame] | 586 | }; |
| 587 | |
| 588 | |
| 589 | @Test |
| Akron | 13db615 | 2016-02-19 14:08:38 +0100 | [diff] [blame] | 590 | public void indexFailingMatchID () throws IOException, QueryException { |
| Akron | 8abefa1 | 2016-02-13 05:35:42 +0100 | [diff] [blame] | 591 | KrillIndex ki = new KrillIndex(); |
| Akron | 13db615 | 2016-02-19 14:08:38 +0100 | [diff] [blame] | 592 | Match km = ki.getMatchInfo( |
| 593 | "match-PRO-DUD!PRO-DUD_KSTA-2013-01.7483-2013-01", "tokens", |
| 594 | "*", "m", false, false); |
| Akron | 8abefa1 | 2016-02-13 05:35:42 +0100 | [diff] [blame] | 595 | JsonNode res = mapper.readTree(km.toJsonString()); |
| 596 | assertEquals("730", res.at("/errors/0/0").asText()); |
| 597 | }; |
| 598 | |
| Akron | 13db615 | 2016-02-19 14:08:38 +0100 | [diff] [blame] | 599 | |
| Akron | 8abefa1 | 2016-02-13 05:35:42 +0100 | [diff] [blame] | 600 | @Test |
| Akron | 7d45e6b | 2015-06-26 17:23:42 +0200 | [diff] [blame] | 601 | public void indexExampleNullInfo () throws IOException, QueryException { |
| 602 | KrillIndex ki = new KrillIndex(); |
| 603 | ki.addDoc(createSimpleFieldDoc4()); |
| 604 | ki.commit(); |
| 605 | Match km = ki.getMatchInfo("match-c1!d4-p3-9", "tokens", null, null, |
| 606 | false, false); |
| 607 | JsonNode res = mapper.readTree(km.toJsonString()); |
| 608 | assertEquals("tokens", res.at("/field").asText()); |
| 609 | assertTrue(res.at("/startMore").asBoolean()); |
| 610 | assertTrue(res.at("/endMore").asBoolean()); |
| 611 | assertEquals("c1", res.at("/corpusID").asText()); |
| 612 | assertEquals("d4", res.at("/docID").asText()); |
| 613 | assertEquals( |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 614 | "<span class=\"context-left\"><span class=\"more\"></span></span><span class=\"match\"><mark><span title=\"f/m:vier\"><span title=\"f/y:four\"><span title=\"it/is:4\"><span title=\"x/o:viertens\">a</span></span></span></span><span title=\"f/m:fuenf\"><span title=\"f/y:five\"><span title=\"it/is:5\"><span title=\"x/o:fünftens\">b</span></span></span></span><span title=\"f/m:sechs\"><span title=\"f/y:six\"><span title=\"it/is:6\"><span title=\"x/o:sechstens\">c</span></span></span></span><span title=\"f/m:sieben\"><span title=\"f/y:seven\"><span title=\"it/is:7\"><span title=\"x/o:siebtens\">a</span></span></span></span><span title=\"f/m:acht\"><span title=\"f/y:eight\"><span title=\"it/is:8\"><span title=\"x/o:achtens\">b</span></span></span></span><span title=\"f/m:neun\"><span title=\"f/y:nine\"><span title=\"it/is:9\"><span title=\"x/o:neuntens\">a</span></span></span></span></mark></span><span class=\"context-right\"><span class=\"more\"></span></span>", |
| Akron | 7d45e6b | 2015-06-26 17:23:42 +0200 | [diff] [blame] | 615 | res.at("/snippet").asText()); |
| 616 | assertEquals("match-c1!d4-p3-9", res.at("/matchID").asText()); |
| 617 | assertTrue(res.at("/pubDate").isMissingNode()); |
| 618 | }; |
| 619 | |
| 620 | |
| Akron | b35261a | 2016-02-10 20:24:24 +0100 | [diff] [blame] | 621 | @Test |
| 622 | public void indexAttributeInfo () throws IOException, QueryException { |
| 623 | KrillIndex ki = new KrillIndex(); |
| 624 | ki.addDoc(createAttributeFieldDoc()); |
| 625 | ki.commit(); |
| Akron | 13db615 | 2016-02-19 14:08:38 +0100 | [diff] [blame] | 626 | Match km = ki.getMatchInfo("match-ca1!da1-p7-10", "tokens", null, null, |
| 627 | false, false); |
| Akron | b35261a | 2016-02-10 20:24:24 +0100 | [diff] [blame] | 628 | JsonNode res = mapper.readTree(km.toJsonString()); |
| 629 | assertEquals("tokens", res.at("/field").asText()); |
| 630 | assertTrue(res.at("/startMore").asBoolean()); |
| 631 | assertTrue(res.at("/endMore").asBoolean()); |
| 632 | assertEquals("ca1", res.at("/corpusID").asText()); |
| 633 | assertEquals("da1", res.at("/docID").asText()); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 634 | assertEquals("<span class=\"context-left\">" + "<span class=\"more\">" |
| 635 | + "</span>" + "</span>" + "<span class=\"match\"><mark>" + |
| Akron | 13db615 | 2016-02-19 14:08:38 +0100 | [diff] [blame] | 636 | // "<span title=\"@:x/s:key:value\">"+ |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 637 | "<span title=\"f/m:acht\">" + "<span title=\"f/y:eight\">" |
| Akron | 13db615 | 2016-02-19 14:08:38 +0100 | [diff] [blame] | 638 | + "<span title=\"it/is:8\">" |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 639 | + "<span title=\"x/o:achtens\">b</span>" + |
| Akron | 13db615 | 2016-02-19 14:08:38 +0100 | [diff] [blame] | 640 | // "</span>"+ |
| 641 | "</span>" + "</span>" + "</span>" + "<span title=\"f/m:neun\">" |
| 642 | + "<span title=\"f/y:nine\">" + "<span title=\"it/is:9\">" |
| 643 | + "<span title=\"x/o:neuntens\">a</span>" + "</span>" |
| 644 | + "</span>" + "</span>" + "<span title=\"f/m:zehn\">" |
| 645 | + "<span title=\"f/y:ten\">" + "<span title=\"it/is:10\">" |
| 646 | + "<span title=\"x/o:zehntens\">c</span>" + "</span>" |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 647 | + "</span>" + "</span>" + "</mark>" + "</span>" |
| Akron | 13db615 | 2016-02-19 14:08:38 +0100 | [diff] [blame] | 648 | + "<span class=\"context-right\">" + "</span>", |
| 649 | res.at("/snippet").asText()); |
| Akron | b35261a | 2016-02-10 20:24:24 +0100 | [diff] [blame] | 650 | }; |
| 651 | |
| Akron | 394607a | 2017-05-29 13:27:37 +0200 | [diff] [blame] | 652 | @Test |
| 653 | public void indexWithFieldInfo () throws IOException, QueryException { |
| 654 | KrillIndex ki = new KrillIndex(); |
| 655 | ki.addDoc(createSimpleFieldDoc()); |
| 656 | ki.commit(); |
| 657 | |
| 658 | Match km = ki.getMatchInfo("match-c1!d1-p7-9(4)8-8(2)7-8", "tokens", |
| 659 | null, null, false, false); |
| 660 | assertEquals(km.getAvailability(), "CC-BY-SA"); |
| 661 | }; |
| 662 | |
| Akron | b35261a | 2016-02-10 20:24:24 +0100 | [diff] [blame] | 663 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 664 | private FieldDocument createSimpleFieldDoc () { |
| 665 | FieldDocument fd = new FieldDocument(); |
| 666 | fd.addString("corpusID", "c1"); |
| 667 | fd.addString("ID", "d1"); |
| Akron | 394607a | 2017-05-29 13:27:37 +0200 | [diff] [blame] | 668 | fd.addString("availability", "CC-BY-SA"); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 669 | fd.addTV("tokens", "abcabcabac", |
| Akron | 5f04403 | 2015-12-18 00:35:38 +0100 | [diff] [blame] | 670 | "[(0-1)s:a|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|>:x/rel:a$<b>32<i>4<s>0<s>0<s>0|_0$<i>0<i>1|-:t$<i>10]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 671 | + "[(1-2)s:b|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|_1$<i>1<i>2]" |
| Akron | 43cea66 | 2016-02-15 23:43:59 +0100 | [diff] [blame] | 672 | + "[(2-3)s:c|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2$<i>2<i>3|<>:base/s:s$<b>64<i>2<i>5<i>5]" |
| Akron | 5f04403 | 2015-12-18 00:35:38 +0100 | [diff] [blame] | 673 | + "[(3-4)s:a|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|<:x/rel:b$<b>40<i>1<s>0<s>0<s>0|_3$<i>3<i>4]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 674 | + "[(4-5)s:b|i:b|f/m:fuenf|f/y:five|x/o:fünftens|it/is:5|_4$<i>4<i>5]" |
| 675 | + "[(5-6)s:c|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5$<i>5<i>6]" |
| 676 | + "[(6-7)s:a|i:a|f/m:sieben|f/y:seven|x/o:siebtens|it/is:7|_6$<i>6<i>7]" |
| Akron | 5f04403 | 2015-12-18 00:35:38 +0100 | [diff] [blame] | 677 | + "[(7-8)s:b|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/tag$<b>64<i>7<i>10<i>10|_7$<i>7<i>8]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 678 | + "[(8-9)s:a|i:a|f/m:neun|f/y:nine|x/o:neuntens|it/is:9|_8$<i>8<i>9]" |
| 679 | + "[(9-10)s:c|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9$<i>9<i>10]"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 680 | return fd; |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 681 | }; |
| Nils Diewald | 8493437 | 2014-05-20 13:48:18 +0000 | [diff] [blame] | 682 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 683 | |
| 684 | private FieldDocument createSimpleFieldDoc2 () { |
| 685 | FieldDocument fd = new FieldDocument(); |
| 686 | fd.addString("corpusID", "c1"); |
| 687 | fd.addString("ID", "d1"); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 688 | fd.addTV("tokens", "abcabcabac", |
| Akron | 5f04403 | 2015-12-18 00:35:38 +0100 | [diff] [blame] | 689 | "[(0-1)s:a|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|>:x/rel:a$<b>32<i>4<s>0<s>0<s>0|_0$<i>0<i>1|-:t$<i>10]" |
| 690 | + "[(1-2)s:b|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|>:x/rel:b$<b>32<i>4<s>0<s>0<s>0|_1$<i>1<i>2]" |
| Akron | 43cea66 | 2016-02-15 23:43:59 +0100 | [diff] [blame] | 691 | + "[(2-3)s:c|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2$<i>2<i>3|<>:base/s:s$<b>64<i>2<i>5<i>5]" |
| Akron | 5f04403 | 2015-12-18 00:35:38 +0100 | [diff] [blame] | 692 | + "[(3-4)s:a|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|<:x/rel:b$<b>40<i>1<s>0<s>0<s>0|_3$<i>3<i>4]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 693 | + "[(4-5)s:b|i:b|f/m:fuenf|f/y:five|x/o:fünftens|it/is:5|_4$<i>4<i>5]" |
| 694 | + "[(5-6)s:c|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5$<i>5<i>6]" |
| 695 | + "[(6-7)s:a|i:a|f/m:sieben|f/y:seven|x/o:siebtens|it/is:7|_6$<i>6<i>7]" |
| Akron | 5f04403 | 2015-12-18 00:35:38 +0100 | [diff] [blame] | 696 | + "[(7-8)s:b|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/tag$<b>64<i>7<i>10<i>10|_7$<i>7<i>8]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 697 | + "[(8-9)s:a|i:a|f/m:neun|f/y:nine|x/o:neuntens|it/is:9|_8$<i>8<i>9]" |
| 698 | + "[(9-10)s:c|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9$<i>9<i>10]"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 699 | return fd; |
| 700 | }; |
| 701 | |
| 702 | |
| 703 | private FieldDocument createSimpleFieldDoc3 () { |
| 704 | FieldDocument fd = new FieldDocument(); |
| 705 | fd.addString("corpusID", "c1"); |
| 706 | fd.addString("ID", "d3"); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 707 | fd.addTV("tokens", "aa bb cc aa bb cc aa bb aa cc ", |
| Akron | 5f04403 | 2015-12-18 00:35:38 +0100 | [diff] [blame] | 708 | "[(0-2)s:aa|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|>:x/rel:a$<b>32<i>4<s>0<s>0<s>0|_0$<i>0<i>2|-:t$<i>10]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 709 | + "[(3-5)s:bb|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|_1$<i>3<i>5]" |
| Akron | 43cea66 | 2016-02-15 23:43:59 +0100 | [diff] [blame] | 710 | + "[(6-8)s:cc|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2$<i>6<i>8|<>:base/s:s$<b>64<i>6<i>14<i>5]" |
| Akron | 5f04403 | 2015-12-18 00:35:38 +0100 | [diff] [blame] | 711 | + "[(9-11)s:aa|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|<:x/rel:b$<b>40<i>1<s>0<s>0<s>0|_3$<i>9<i>11]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 712 | + "[(12-14)s:bb|i:b|f/m:fuenf|f/y:five|x/o:fünftens|it/is:5|_4$<i>12<i>14]" |
| 713 | + "[(15-17)s:cc|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5$<i>15<i>17]" |
| 714 | + "[(18-20)s:aa|i:a|f/m:sieben|f/y:seven|x/o:siebtens|it/is:7|_6$<i>18<i>20]" |
| Akron | 5f04403 | 2015-12-18 00:35:38 +0100 | [diff] [blame] | 715 | + "[(21-23)s:bb|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/tag$<b>64<i>7<i>10<i>10|_7$<i>21<i>23]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 716 | + "[(24-26)s:aa|i:a|f/m:neun|f/y:nine|x/o:neuntens|it/is:9|_8$<i>24<i>26]" |
| 717 | + "[(27-29)s:cc|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9$<i>27<i>29]"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 718 | return fd; |
| 719 | }; |
| 720 | |
| 721 | |
| 722 | private FieldDocument createSimpleFieldDoc4 () { |
| 723 | FieldDocument fd = new FieldDocument(); |
| 724 | fd.addString("corpusID", "c1"); |
| 725 | fd.addString("ID", "d4"); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 726 | fd.addTV("tokens", "abcabcabac", |
| Akron | 5f04403 | 2015-12-18 00:35:38 +0100 | [diff] [blame] | 727 | "[(0-1)s:a|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|>:x/rel:a$<b>32<i>4<s>0<s>0<s>0|_0$<i>0<i>1|-:t$<i>10]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 728 | + "[(1-2)s:b|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|_1$<i>1<i>2]" |
| Akron | 43cea66 | 2016-02-15 23:43:59 +0100 | [diff] [blame] | 729 | + "[(2-3)s:c|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2$<i>2<i>3|<>:base/s:s$<b>64<i>2<i>5<i>5]" |
| Akron | 5f04403 | 2015-12-18 00:35:38 +0100 | [diff] [blame] | 730 | + "[(3-4)s:a|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|<:x/rel:b$<b>40<i>1<s>0<s>0<s>0|_3$<i>3<i>4]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 731 | + "[(4-5)s:b|i:b|f/m:fuenf|f/y:five|x/o:fünftens|it/is:5|_4$<i>4<i>5]" |
| Akron | 43cea66 | 2016-02-15 23:43:59 +0100 | [diff] [blame] | 732 | + "[(5-6)s:c|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5$<i>5<i>6|<>:base/s:s$<b>64<i>5<i>7<i>7]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 733 | + "[(6-7)s:a|i:a|f/m:sieben|f/y:seven|x/o:siebtens|it/is:7|_6$<i>6<i>7]" |
| Akron | 5f04403 | 2015-12-18 00:35:38 +0100 | [diff] [blame] | 734 | + "[(7-8)s:b|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/tag$<b>64<i>7<i>10<i>10|_7$<i>7<i>8]" |
| margaretha | 4f99558 | 2015-12-14 14:14:34 +0100 | [diff] [blame] | 735 | + "[(8-9)s:a|i:a|f/m:neun|f/y:nine|x/o:neuntens|it/is:9|_8$<i>8<i>9]" |
| 736 | + "[(9-10)s:c|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9$<i>9<i>10]"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 737 | return fd; |
| Nils Diewald | 8493437 | 2014-05-20 13:48:18 +0000 | [diff] [blame] | 738 | }; |
| 739 | |
| Akron | 13db615 | 2016-02-19 14:08:38 +0100 | [diff] [blame] | 740 | |
| Akron | b35261a | 2016-02-10 20:24:24 +0100 | [diff] [blame] | 741 | /* |
| 742 | Check for terms|spans|rels ... |
| 743 | */ |
| 744 | private FieldDocument createAttributeFieldDoc () { |
| 745 | FieldDocument fd = new FieldDocument(); |
| 746 | fd.addString("corpusID", "ca1"); |
| 747 | fd.addString("ID", "da1"); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 748 | fd.addTV("tokens", "abcabcabac", |
| Akron | b35261a | 2016-02-10 20:24:24 +0100 | [diff] [blame] | 749 | "[(0-1)s:a|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|_0$<i>0<i>1|-:t$<i>10]" |
| 750 | + "[(1-2)s:b|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|_1$<i>1<i>2]" |
| Akron | 43cea66 | 2016-02-15 23:43:59 +0100 | [diff] [blame] | 751 | + "[(2-3)s:c|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2$<i>2<i>3|<>:base/s:s$<b>64<i>2<i>5<i>5]" |
| Akron | b35261a | 2016-02-10 20:24:24 +0100 | [diff] [blame] | 752 | + "[(3-4)s:a|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|_3$<i>3<i>4]" |
| 753 | + "[(4-5)s:b|i:b|f/m:fuenf|f/y:five|x/o:fünftens|it/is:5|_4$<i>4<i>5]" |
| 754 | + "[(5-6)s:c|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5$<i>5<i>6]" |
| 755 | + "[(6-7)s:a|i:a|f/m:sieben|f/y:seven|x/o:siebtens|it/is:7|_6$<i>6<i>7]" |
| 756 | + "[(7-8)s:b|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/s:tag$<b>64<i>7<i>10<i>10<b>0<s>1|@:x/s:key:value$<b>17<i>10<s>1|_7$<i>7<i>8]" |
| 757 | + "[(8-9)s:a|i:a|f/m:neun|f/y:nine|x/o:neuntens|it/is:9|_8$<i>8<i>9]" |
| 758 | + "[(9-10)s:c|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9$<i>9<i>10]"); |
| 759 | return fd; |
| 760 | }; |
| 761 | |
| Nils Diewald | 2cd1c3d | 2014-01-08 22:53:08 +0000 | [diff] [blame] | 762 | }; |