New feature and some bugfixes concerning span based context extension
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java b/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
index cd74675..6589437 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
@@ -209,8 +209,8 @@
ks.setCount(1);
ks.setCutOff(true);
- ks.leftContext.setToken(true).setLength(6);
- ks.leftContext.setCharacter(true).setLength(6);
+ ks.context.left.setCharacter(true).setLength(6);
+ ks.context.right.setToken(true).setLength(6);
assertEquals("... e des [{1:lateinischen Alphabets}] und ein Vokal. Der Buchstabe A ...", ks.run(ki).getMatch(0).getSnippetBrackets());
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
index 6481153..457be63 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
@@ -117,6 +117,7 @@
assertEquals("SnippetBrackets (0)",
"... [{2:b{a}}] ...",
km.getSnippetBrackets());
+
assertEquals("ID (0)", "match-c1!d1-p7-9(0)8-8(2)7-8", km.getID());
km = ki.getMatchInfo("match-c1!d1-p7-9(0)8-8(2)7-8",
@@ -498,9 +499,12 @@
public void indexExample7SentenceExpansion () throws IOException {
KorapIndex ki = new KorapIndex();
ki.addDoc(createSimpleFieldDoc());
+ ki.addDoc(createSimpleFieldDoc2());
+ ki.addDoc(createSimpleFieldDoc3());
ki.commit();
+ KorapMatch km;
- KorapMatch km = ki.getMatchInfo("match-c1!d1-p3-4",
+ km = ki.getMatchInfo("match-c1!d1-p3-4",
"tokens",
null,
null,
@@ -510,16 +514,29 @@
assertEquals("... [{f/m:vier:{f/y:four:{it/is:4:{x/o:viertens:a}}}}] ...",
km.getSnippetBrackets());
+
km = ki.getMatchInfo("match-c1!d1-p3-4",
"tokens",
null,
null,
false,
false,
- true);
+ true); // extendToSentence
- assertEquals("... [{f/m:drei:{f/y:three:{it/is:3:{x/o:drittens:c}}}}{f/m:vier:{f/y:four:{it/is:4:{x/o:viertens:a}}}}{f/m:fuenf:{f/y:five:{it/is:5:{x/o:fünftens:b}}}}] ...",
+ assertEquals("[{f/m:drei:{f/y:three:{it/is:3:{x/o:drittens:c}}}}{f/m:vier:{f/y:four:{it/is:4:{x/o:viertens:a}}}}{f/m:fuenf:{f/y:five:{it/is:5:{x/o:fünftens:b}}}}]",
km.getSnippetBrackets());
+
+ km = ki.getMatchInfo("match-c1!d3-p3-4",
+ "tokens",
+ null,
+ null,
+ false,
+ false,
+ true); // extendToSentence
+
+ assertEquals("[{f/m:drei:{f/y:three:{it/is:3:{x/o:drittens:cc}}}} {f/m:vier:{f/y:four:{it/is:4:{x/o:viertens:aa}}}} {f/m:fuenf:{f/y:five:{it/is:5:{x/o:fünftens:bb}}}}]",
+ km.getSnippetBrackets());
+
};
@Test
@@ -619,4 +636,23 @@
"[(9-10)s:c|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9#9-10]");
return fd;
};
+
+ private FieldDocument createSimpleFieldDoc3(){
+ FieldDocument fd = new FieldDocument();
+ fd.addString("corpusID", "c1");
+ fd.addString("ID", "d3");
+ fd.addTV("tokens",
+ "aa bb cc aa bb cc aa bb aa cc ",
+ "[(0-2)s:aa|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|>:x/rel:a$<i>4|_0#0-2|-:t$<i>10]" +
+ "[(3-5)s:bb|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|_1#3-5]" +
+ "[(6-8)s:cc|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2#6-8|<>:s#6-14$<i>5]" +
+ "[(9-11)s:aa|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|<:x/rel:b$<i>1|_3#9-11]" +
+ "[(12-14)s:bb|i:b|f/m:fuenf|f/y:five|x/o:fünftens|it/is:5|_4#12-14]" +
+ "[(15-17)s:cc|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5#15-17]" +
+ "[(18-20)s:aa|i:a|f/m:sieben|f/y:seven|x/o:siebtens|it/is:7|_6#18-20]" +
+ "[(21-23)s:bb|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/tag#7-10$<i>10|_7#21-23]" +
+ "[(24-26)s:aa|i:a|f/m:neun|f/y:nine|x/o:neuntens|it/is:9|_8#24-26]" +
+ "[(27-29)s:cc|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9#27-29]");
+ return fd;
+ };
};
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestRegexWildcardIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestRegexWildcardIndex.java
index 0cddcf9..083f8a7 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestRegexWildcardIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestRegexWildcardIndex.java
@@ -53,8 +53,8 @@
assertEquals("SpanMultiTermQueryWrapper(base:/s:af*e/)", sq.toString());
KorapSearch ks = new KorapSearch(sq);
- ks.leftContext.setToken(true).setLength(1);
- ks.rightContext.setToken(true).setLength(1);
+ ks.context.left.setToken(true).setLength(1);
+ ks.context.right.setToken(true).setLength(1);
KorapResult kr = ki.search(ks);
assertEquals(2, kr.getTotalResults());
@@ -110,8 +110,8 @@
assertEquals("SpanMultiTermQueryWrapper(base:s:af*e)", sq.toString());
KorapSearch ks = new KorapSearch(sq);
- ks.leftContext.setToken(true).setLength(1);
- ks.rightContext.setToken(true).setLength(1);
+ ks.context.left.setToken(true).setLength(1);
+ ks.context.right.setToken(true).setLength(1);
KorapResult kr = ki.search(ks);
assertEquals(2, kr.getTotalResults());
@@ -169,8 +169,8 @@
assertEquals("SpanMultiTermQueryWrapper(base:/i:af*e/)", sq.toString());
KorapSearch ks = new KorapSearch(sq);
- ks.leftContext.setToken(true).setLength(1);
- ks.rightContext.setToken(true).setLength(1);
+ ks.context.left.setToken(true).setLength(1);
+ ks.context.right.setToken(true).setLength(1);
KorapResult kr = ki.search(ks);
assertEquals(2, kr.getTotalResults());
@@ -234,8 +234,8 @@
assertEquals("spanNext(base:s:affe, SpanMultiTermQueryWrapper(base:/s:af*e/))", sq.toString());
KorapSearch ks = new KorapSearch(sq);
- ks.leftContext.setToken(true).setLength(1);
- ks.rightContext.setToken(true).setLength(1);
+ ks.context.left.setToken(true).setLength(1);
+ ks.context.right.setToken(true).setLength(1);
KorapResult kr = ki.search(ks);
assertEquals(1, kr.getTotalResults());
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java b/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
index ac89202..d2511ec 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
@@ -8,6 +8,7 @@
import de.ids_mannheim.korap.KorapQuery;
import de.ids_mannheim.korap.KorapIndex;
import de.ids_mannheim.korap.index.FieldDocument;
+import de.ids_mannheim.korap.index.SearchContext;
import de.ids_mannheim.korap.KorapFilter;
import de.ids_mannheim.korap.KorapResult;
import java.nio.file.Files;
@@ -88,8 +89,8 @@
);
ks.setCount(3);
ks.setStartIndex(5);
- ks.leftContext.setLength(1);
- ks.rightContext.setLength(1);
+ ks.context.left.setLength(1);
+ ks.context.right.setLength(1);
KorapResult kr = ks.run(ki);
assertEquals(6, kr.totalResults());
assertEquals(kr.getMatch(0).getSnippetBrackets(), "... dem [Buchstaben] A ...");
@@ -349,6 +350,47 @@
assertEquals(10, kr.getItemsPerPage());
};
+
+ @Test
+ public void searchJSONSentenceContext () throws IOException {
+
+ // Construct index
+ KorapIndex ki = new KorapIndex();
+ // Indexing test files
+ for (String i : new String[] {"00001", "00002", "00003", "00004", "00005", "00006", "02439"}) {
+ ki.addDocFile(
+ getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
+ );
+ };
+ ki.commit();
+
+ String json = getString(getClass().getResource("/queries/bsp-context-2.jsonld").getFile());
+
+ KorapSearch ks = new KorapSearch(json);
+ ks.setCutOff(false);
+ SearchContext sc = ks.getContext();
+ sc.left.setLength((short) 10);
+ sc.right.setLength((short) 10);
+
+ KorapResult kr = ks.run(ki);
+ assertEquals(kr.getMatch(1).getSnippetBrackets(), "... dezimalen [Wert] 65 sowohl ...");
+ assertEquals(3, kr.getTotalResults());
+ assertEquals(0, kr.getStartIndex());
+ assertEquals(25, kr.getItemsPerPage());
+
+ json = getString(getClass().getResource("/queries/bsp-context-sentence.jsonld").getFile());
+
+ kr = new KorapSearch(json).run(ki);
+ assertEquals(kr.getMatch(0).getSnippetBrackets(),
+ "steht a für den dezimalen [Wert] 97 sowohl im ASCII- als auch im Unicode-Zeichensatz");
+ assertEquals(kr.getMatch(1).getSnippetBrackets(),
+ "steht A für den dezimalen [Wert] 65 sowohl im ASCII- als auch im Unicode-Zeichensatz");
+ assertEquals(kr.getMatch(2).getSnippetBrackets(),
+ "In einem Zahlensystem mit einer Basis größer als 10 steht A oder a häufig für den dezimalen [Wert] 10, siehe auch Hexadezimalsystem.");
+
+ };
+
+
@Test
public void getFoundryDistribution () throws Exception {
diff --git a/src/test/resources/queries/bsp-context-2.jsonld b/src/test/resources/queries/bsp-context-2.jsonld
index 605811a..ceed6ac 100644
--- a/src/test/resources/queries/bsp-context-2.jsonld
+++ b/src/test/resources/queries/bsp-context-2.jsonld
@@ -1,27 +1,29 @@
{
- "@context": "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
- "query":{
- "@type":"korap:token",
- "wrap":{
- "@type":"korap:term",
- "foundry" : "mate",
- "layer":"l",
- "key":"wert",
- "match":"match:eq"
- }
- },
- "collections":[
- {
- "@type":"korap:meta-filter",
- "@value":{
- "@type":"korap:term",
- "@field":"korap:field#corpusID",
- "@value":"WPD"
- }
- }
- ],
- "meta":{
- "startPage":1,
- "count":25,
- "context":{"left":["char",210],"right":["char",210]},"cutOff":true}
-}
\ No newline at end of file
+ "@context": "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query":{
+ "@type":"korap:token",
+ "wrap":{
+ "@type":"korap:term",
+ "foundry" : "mate",
+ "layer":"l",
+ "key":"wert",
+ "match":"match:eq"
+ }
+ },
+ "collections":[
+ {
+ "@type":"korap:meta-filter",
+ "@value":{
+ "@type":"korap:term",
+ "@field":"korap:field#corpusID",
+ "@value":"WPD"
+ }
+ }
+ ],
+ "meta":{
+ "startPage":1,
+ "count":25,
+ "context":{"left":["char",210],"right":["char",210]},
+ "cutOff":true
+ }
+}
diff --git a/src/test/resources/queries/bsp-context-sentence.jsonld b/src/test/resources/queries/bsp-context-sentence.jsonld
new file mode 100644
index 0000000..123ce0b
--- /dev/null
+++ b/src/test/resources/queries/bsp-context-sentence.jsonld
@@ -0,0 +1,28 @@
+{
+ "@context": "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query":{
+ "@type":"korap:token",
+ "wrap":{
+ "@type":"korap:term",
+ "foundry" : "mate",
+ "layer":"l",
+ "key":"wert",
+ "match":"match:eq"
+ }
+ },
+ "collections":[
+ {
+ "@type":"korap:meta-filter",
+ "@value":{
+ "@type":"korap:term",
+ "@field":"korap:field#corpusID",
+ "@value":"WPD"
+ }
+ }
+ ],
+ "meta":{
+ "startPage":1,
+ "count":25,
+ "context":"sentence"
+ }
+}