Test for highlighted query on large data
diff --git a/src/main/java/de/ids_mannheim/korap/KorapMatch.java b/src/main/java/de/ids_mannheim/korap/KorapMatch.java
index 5d398b2..2072699 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapMatch.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapMatch.java
@@ -1084,7 +1084,8 @@
LinkedList<int[]> openList = new LinkedList<int[]>();
LinkedList<int[]> closeList = new LinkedList<int[]>();
- // Filter multiple identifier
+ // Filter multiple identifiers, that may be introduced and would
+ // result in invalid xml
this._filterMultipleIdentifiers();
// Add highlight spans to balance lists
@@ -1274,14 +1275,6 @@
if (this.highlight != null) {
if (DEBUG)
log.trace("There are highlights!");
-
- /*
-Here maybe?
- if (this.highlight.last.start = hl.start && this.highlight.end == hl.end && this.highlight.id < -1) {
- return;
- };
- */
-
for (Highlight highlight : this.highlight) {
int start = this.positionsToOffset.start(
@@ -1364,8 +1357,10 @@
// span is an int array: [Start, End, Number, Dummy]
int highlightNumber = this.span.get(i)[2];
- // number is an identifier
+ // Number is an identifier
if (highlightNumber < -1) {
+
+ // Get the real identifier
int idNumber = identifierNumber.get(highlightNumber);
if (identifiers.contains(idNumber)) {
removeDuplicate.add(i);
@@ -1376,6 +1371,7 @@
};
};
+ // Order the duplicates to filter from the tail
Collections.sort(removeDuplicate);
Collections.reverse(removeDuplicate);
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestRealIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestRealIndex.java
index 6be837f..aead247 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestRealIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestRealIndex.java
@@ -9,6 +9,7 @@
import de.ids_mannheim.korap.KorapFilter;
import de.ids_mannheim.korap.KorapResult;
import de.ids_mannheim.korap.KorapQuery;
+import de.ids_mannheim.korap.KorapSearch;
import org.apache.lucene.store.MMapDirectory;
import de.ids_mannheim.korap.filter.BooleanFilter;
import org.apache.lucene.search.spans.SpanQuery;
@@ -34,7 +35,8 @@
// Check if the configuration was loaded fine
assertEquals(prop.getProperty("lucene.properties"), "true");
- String indexDir = prop.getProperty("lucene.index");
+ String indexDir = prop.getProperty("lucene.indexDir");
+ System.err.println("Index directory is " + indexDir);
// Get the real index
KorapIndex ki = new KorapIndex(new MMapDirectory(new File(indexDir)));
@@ -85,4 +87,49 @@
// assertEquals(14, kc.numberOf("documents"));
};
+
+
+ @Test
+ public void realExample2 () throws IOException {
+
+ // Load configuration file
+ Properties prop = new Properties();
+ FileReader fr = new FileReader(getClass().getResource("/korap.conf").getFile());
+ prop.load(fr);
+
+ // Check if the configuration was loaded fine
+ assertEquals(prop.getProperty("lucene.properties"), "true");
+
+ String indexDir = prop.getProperty("lucene.indexDir");
+
+ // Get the real index
+ KorapIndex ki = new KorapIndex(new MMapDirectory(new File(indexDir)));
+
+ // Create a container for virtual collections:
+ KorapCollection kc = new KorapCollection(ki);
+
+ String json = getString(getClass().getResource("/queries/bsp-class-2.jsonld").getFile());
+
+ KorapResult kr = new KorapSearch(json).run(ki);
+
+ System.err.println(kr.toJSON());
+
+ // assertEquals(14, kc.numberOf("documents"));
+ };
+
+ public static String getString (String path) {
+ StringBuilder contentBuilder = new StringBuilder();
+ try {
+ BufferedReader in = new BufferedReader(new FileReader(path));
+ String str;
+ while ((str = in.readLine()) != null) {
+ contentBuilder.append(str);
+ };
+ in.close();
+ } catch (IOException e) {
+ fail(e.getMessage());
+ }
+ return contentBuilder.toString();
+ };
+
};
diff --git a/src/test/resources/queries/bsp-class-2.jsonld b/src/test/resources/queries/bsp-class-2.jsonld
new file mode 100644
index 0000000..162070f
--- /dev/null
+++ b/src/test/resources/queries/bsp-class-2.jsonld
@@ -0,0 +1,32 @@
+{
+ "@context" : "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query" : {
+ "@type" : "korap:group",
+ "operation" : "operation:class",
+ "class" : 0,
+ "operands" : [ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "key" : "Straße",
+ "layer" : "l",
+ "foundry" : "cnx",
+ "match" : "match:eq"
+ }
+ } ]
+ },
+ "meta" : {
+ "context" : {
+ "left" : [ "char", 110 ],
+ "right" : [ "char", 110 ]
+ },
+ "cutOff" : false,
+ "startPage" : 1,
+ "count" : 25
+ }
+ },
+ "context" : {
+ "left" : [ "char", 110 ],
+ "right" : [ "char", 110 ]
+ }
+}
\ No newline at end of file