New test for long contexts
diff --git a/src/main/java/de/ids_mannheim/korap/KorapIndex.java b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
index a2a5c7f..612fe1f 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
@@ -429,6 +429,19 @@
};
+ /*
+ Accepts a KorapInfo (with startPos, endPos, docID ... etc.)
+ everything that comes from an ID
+ and collects all information based on a prefix (like cnx/p etc.)
+
+ KorapInfo is associated with a KorapMatch and has an array with all informations
+ per position in the match.
+
+ public KorapInfo infoOf (KorapMatch km, String prefix) {
+
+ };
+ */
+
@Deprecated
public long countDocuments () throws IOException {
log.warn("countDocuments() is DEPRECATED in favor of numberOf(\"documents\")!");
@@ -477,7 +490,7 @@
};
public KorapResult search (KorapSearch ks) {
- // TODO: This might leak as hell!!!
+ // TODO: This might leak
return this.search(new KorapCollection(this), ks);
};
@@ -539,8 +552,10 @@
try {
- // Rewrite query
- for (Query rewrittenQuery = query.rewrite(this.reader()); rewrittenQuery != (Query) query; rewrittenQuery = query.rewrite(this.reader())) {
+ // Rewrite query (for regex and wildcard queries)
+ for (Query rewrittenQuery = query.rewrite(this.reader());
+ rewrittenQuery != (Query) query;
+ rewrittenQuery = query.rewrite(this.reader())) {
query = (SpanQuery) rewrittenQuery;
};
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java b/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
index 9ce6090..7e14b8f 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
@@ -211,6 +211,13 @@
assertEquals(10, kr.getTotalResults());
assertEquals(5, kr.getStartIndex());
assertEquals(5, kr.getItemsPerPage());
+
+
+ json = getString(getClass().getResource("/queries/bsp-context-2.json").getFile());
+
+ kr = new KorapSearch(json).run(ki);
+ assertEquals(-1, kr.getTotalResults());
+ assertEquals("... lls seit den Griechen beibehalten worden. 3. Bedeutungen in der Biologie steht A für das Nukleosid Adenosin steht A die Base Adenin steht A für die Aminosäure Alanin in der Informatik steht a für den dezimalen [Wert] 97 sowohl im ASCII- als auch im Unicode-Zeichensatz steht A für den dezimalen Wert 65 sowohl im ASCII- als auch im Unicode-Zeichensatz als Kfz-Kennzeichen steht A in Deutschland für Augsburg. in Österreich auf ...", kr.getMatch(0).getSnippetBrackets());
};
@Test
diff --git a/src/test/resources/queries/bsp-context-2.json b/src/test/resources/queries/bsp-context-2.json
new file mode 100644
index 0000000..d649f5a
--- /dev/null
+++ b/src/test/resources/queries/bsp-context-2.json
@@ -0,0 +1 @@
+{"@context":{"korap":"http://korap.ids-mannheim.de/ns/query","@language":"de","operands":{"@id":"korap:operands","@container":"@list"},"relation":{"@id":"korap:relation","@type":"korap:relation#types"},"class":{"@id":"korap:class","@type":"xsd:integer"},"query":"korap:query","filter":"korap:filter","meta":"korap:meta"},"query":{"@type":"korap:token","@value":{"@type":"korap:term","@value":"base:wert","relation":"="}},"meta":[{"@type":"korap:meta-filter","@value":{"@type":"korap:term","@field":"korap:field#corpusID","@value":"WPD"}}],"startPage":1,"count":25,"context":{"left":["char",210],"right":["char",210]},"cutOff":true}