Added test suit for distances bug occurring in the Schreibgebrauch instance
Change-Id: If9e8bf560bb9d283e68e9f775dccbd5a4b269c3d
diff --git a/src/test/java/de/ids_mannheim/korap/query/TestKrillQueryJSON.java b/src/test/java/de/ids_mannheim/korap/query/TestKrillQueryJSON.java
index 5e865d1..9ee4bde 100644
--- a/src/test/java/de/ids_mannheim/korap/query/TestKrillQueryJSON.java
+++ b/src/test/java/de/ids_mannheim/korap/query/TestKrillQueryJSON.java
@@ -571,6 +571,23 @@
};
};
+ @Test
+ public void queryJSONdistancesWithRegexes () throws QueryException {
+ // "der" []{2,3} [opennlp/p="NN"]
+ try {
+ String json = getString(getClass().getResource(
+ "/queries/bugs/distances_with_regex_bug.jsonld").getFile());
+ KrillQuery kq = new KrillQuery("tokens");
+
+ assertEquals(kq.fromKoral(json).toQuery().toString(),
+ "spanDistance(SpanMultiTermQueryWrapper(tokens:/s:der/), SpanMultiTermQueryWrapper(tokens:/opennlp/p:NN/), [(w[3:4], ordered, notExcluded)])");
+ }
+ catch (QueryException e) {
+ fail(e.getMessage());
+ };
+ };
+
+
public static String getString (String path) {
StringBuilder contentBuilder = new StringBuilder();
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestKrill.java b/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
index 774d48e..f4c5b8e 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
@@ -1019,6 +1019,31 @@
"Operation needs operand list");
};
+
+ @Test
+ public void searchJSONdistanceWithRegexesBug () throws IOException {
+ // Construct index
+ KrillIndex ki = new KrillIndex();
+ // Indexing test files
+ for (String i : new String[] { "00001" }) {
+ // , "00002", "00003", "00004", "00005", "00006", "02439"
+ ki.addDoc(
+ getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
+ true);
+ };
+ ki.commit();
+
+ // "der" []{2,3} [opennlp/p="NN"]
+ String json = getString(getClass().getResource(
+ "/queries/bugs/distances_with_regex_bug.jsonld").getFile());
+
+ Result kr = new Krill(json).apply(ki);
+
+ assertEquals(kr.getMatch(0).getSnippetBrackets(),
+ "Mit Ausnahme von Fremdwörtern und Namen ist das A der einzige Buchstabe im Deutschen, [[der zweifach am Anfang]] eines Wortes stehen darf, etwa im Wort Aal.");
+
+ };
+
/**
* This is a breaking test for #179
diff --git a/src/test/resources/queries/bugs/distances_with_regex_bug.jsonld b/src/test/resources/queries/bugs/distances_with_regex_bug.jsonld
new file mode 100644
index 0000000..8d3b9a1
--- /dev/null
+++ b/src/test/resources/queries/bugs/distances_with_regex_bug.jsonld
@@ -0,0 +1,46 @@
+{
+ "@context": "http://korap.ids-mannheim.de/ns/koral/0.3/context.jsonld",
+ "query": {
+ "@type": "koral:group",
+ "operation": "operation:sequence",
+ "inOrder": true,
+ "distances": [{
+ "@type": "koral:distance",
+ "key": "w",
+ "boundary": {
+ "@type": "koral:boundary",
+ "min": 2,
+ "max": 3
+ }
+ }],
+ "operands": [
+ {
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "key": "der",
+ "foundry": "opennlp",
+ "layer": "orth",
+ "type": "type:regex",
+ "match": "match:eq"
+ }
+ },
+ {
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "key": "NN",
+ "foundry": "opennlp",
+ "layer": "p",
+ "type": "type:regex",
+ "match": "match:eq"
+ }
+ }
+ ]
+ },
+ "meta": {
+ "startIndex": 0,
+ "count": 5,
+ "context": "sentence"
+ }
+}