Added broken test for #179
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java b/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
index 5c91eb5..0a32a18 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
@@ -938,6 +938,74 @@
assertEquals(kr.getErrstr(), "Operation needs exactly two operands");
};
+ /**
+ * This is a breaking test for #179
+ */
+ @Test
+ public void searchJSONexpansionBug () throws IOException {
+ // Construct index
+ KorapIndex ki = new KorapIndex();
+ // Indexing test files
+ ki.addDocFile(
+ getClass().getResource("/wiki/00002.json.gz").getFile(), true
+ );
+ ki.commit();
+
+ // Expansion bug
+ // der alte Digraph Aa durch Å
+ String json = getString(
+ getClass().getResource("/queries/bugs/expansion_bug_2.jsonld").getFile()
+ );
+
+ KorapResult kr = new KorapSearch(json).run(ki);
+ assertEquals("... Buchstabe des Alphabetes. In Dänemark ist [der alte Digraph Aa durch Å] ersetzt worden, in Eigennamen und Ortsnamen ...", kr.getMatch(0).getSnippetBrackets());
+ assertEquals("WPD_AAA.00002", kr.getMatch(0).getDocID());
+ assertEquals(1, kr.getTotalResults());
+
+ // der alte Digraph Aa durch []
+ // Works with one document
+ json = getString(
+ getClass().getResource("/queries/bugs/expansion_bug.jsonld").getFile()
+ );
+
+ kr = new KorapSearch(json).run(ki);
+ assertEquals("... Buchstabe des Alphabetes. In Dänemark ist [der alte Digraph Aa durch Å] ersetzt worden, in Eigennamen und Ortsnamen ...", kr.getMatch(0).getSnippetBrackets());
+ assertEquals("WPD_AAA.00002", kr.getMatch(0).getDocID());
+ assertEquals(1, kr.getTotalResults());
+
+ // Now try with one file ahead
+ ki = new KorapIndex();
+ for (String i : new String[] {"00001",
+ "00002"}) {
+ ki.addDocFile(
+ getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
+ );
+ };
+ ki.commit();
+
+ // Expansion bug
+ // der alte Digraph Aa durch Å
+ json = getString(
+ getClass().getResource("/queries/bugs/expansion_bug_2.jsonld").getFile()
+ );
+
+ kr = new KorapSearch(json).run(ki);
+ assertEquals("... Buchstabe des Alphabetes. In Dänemark ist [der alte Digraph Aa durch Å] ersetzt worden, in Eigennamen und Ortsnamen ...", kr.getMatch(0).getSnippetBrackets());
+ assertEquals("WPD_AAA.00002", kr.getMatch(0).getDocID());
+ assertEquals(1, kr.getTotalResults());
+
+ // der alte Digraph Aa durch []
+ json = getString(
+ getClass().getResource("/queries/bugs/expansion_bug.jsonld").getFile()
+ );
+
+ kr = new KorapSearch(json).run(ki);
+ assertEquals("... Buchstabe des Alphabetes. In Dänemark ist [der alte Digraph Aa durch Å] ersetzt worden, in Eigennamen und Ortsnamen ...", kr.getMatch(0).getSnippetBrackets());
+ assertEquals("WPD_AAA.00002", kr.getMatch(0).getDocID());
+ assertEquals(1, kr.getTotalResults());
+ };
+
+
/*
This test will crash soon - it's just here for nostalgic reasons!
diff --git a/src/test/resources/queries/bugs/expansion_bug.jsonld b/src/test/resources/queries/bugs/expansion_bug.jsonld
new file mode 100644
index 0000000..fe8b67f
--- /dev/null
+++ b/src/test/resources/queries/bugs/expansion_bug.jsonld
@@ -0,0 +1,62 @@
+{
+ "@context" : "http://ids-mannheim.de/ns/KorAP/json-ld/v0.2/context.jsonld",
+ "query" : {
+ "@type" : "korap:group",
+ "operands" : [
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "opennlp",
+ "key" : "der",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ },
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "opennlp",
+ "key" : "alte",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ },
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "opennlp",
+ "key" : "Digraph",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ },
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "opennlp",
+ "key" : "Aa",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ },
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "opennlp",
+ "key" : "durch",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ },
+ {
+ "@type" : "korap:token"
+ }
+ ],
+ "operation" : "operation:sequence"
+ }
+}
diff --git a/src/test/resources/queries/bugs/expansion_bug_2.jsonld b/src/test/resources/queries/bugs/expansion_bug_2.jsonld
new file mode 100644
index 0000000..ebafaac
--- /dev/null
+++ b/src/test/resources/queries/bugs/expansion_bug_2.jsonld
@@ -0,0 +1,69 @@
+{
+ "@context" : "http://ids-mannheim.de/ns/KorAP/json-ld/v0.2/context.jsonld",
+ "query" : {
+ "@type" : "korap:group",
+ "operands" : [
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "opennlp",
+ "key" : "der",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ },
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "opennlp",
+ "key" : "alte",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ },
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "opennlp",
+ "key" : "Digraph",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ },
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "opennlp",
+ "key" : "Aa",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ },
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "opennlp",
+ "key" : "durch",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ },
+ {
+ "@type" : "korap:token",
+ "wrap" : {
+ "@type" : "korap:term",
+ "foundry" : "opennlp",
+ "key" : "Å",
+ "layer" : "orth",
+ "match" : "match:eq"
+ }
+ }
+ ],
+ "operation" : "operation:sequence"
+ }
+}