Add support for identifiers with dashes (Schreibgebrauch project
Change-Id: Ic177c055a14438415c0bcb0cd45d4788f375042f
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
index 796a6d9..a9bb540 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
@@ -28,7 +28,6 @@
ObjectMapper mapper = new ObjectMapper();
-
@Test
public void identifierExample1 () throws IOException, QueryException {
MatchIdentifier id = new MatchIdentifier("match-c1!d1-p4-20");
@@ -550,6 +549,17 @@
@Test
+ public void indexFailingMatchID () throws IOException,
+ QueryException {
+ KrillIndex ki = new KrillIndex();
+ Match km = ki.getMatchInfo("match-PRO-DUD!PRO-DUD_KSTA-2013-01.7483-2013-01",
+ "tokens", "*", "m",
+ false, false);
+ JsonNode res = mapper.readTree(km.toJsonString());
+ assertEquals("730", res.at("/errors/0/0").asText());
+ };
+
+ @Test
public void indexExampleNullInfo () throws IOException, QueryException {
KrillIndex ki = new KrillIndex();
ki.addDoc(createSimpleFieldDoc4());
diff --git a/src/test/java/de/ids_mannheim/korap/response/TestMatch.java b/src/test/java/de/ids_mannheim/korap/response/TestMatch.java
new file mode 100644
index 0000000..182300a
--- /dev/null
+++ b/src/test/java/de/ids_mannheim/korap/response/TestMatch.java
@@ -0,0 +1,37 @@
+package de.ids_mannheim.korap.response;
+
+import de.ids_mannheim.korap.response.Match;
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+import org.junit.Ignore;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class TestMatch {
+
+ @Test
+ public void testNoMatch () {
+ Match m = new Match("aaa", false);
+ assertEquals(null, m.getID());
+ };
+
+ @Test
+ public void testMatchBug () {
+ Match m = new Match("match-PRO-DUD!PRO-DUD_KSTA-2013-01.7483-2013-01", false);
+ assertEquals(null, m.getID());
+ };
+
+ @Test
+ public void testMatchTextSigle1 () {
+ Match m = new Match("match-GOE!GOE_AGK.00000-p60348-60349", false);
+ assertEquals("GOE_AGK.00000", m.getTextSigle());
+ };
+
+ @Test
+ public void testMatchTextSigle2 () {
+ Match m = new Match("match-PRO-DUD!PRO-DUD_KSTA-2013-01.3651-p326-327", false);
+ assertEquals("PRO-DUD_KSTA-2013-01.3651", m.getTextSigle());
+ };
+};
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestKrill.java b/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
index e7a6f2f..ed64596 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
@@ -1092,4 +1092,29 @@
assertEquals("WPD_AAA.00002", kr.getMatch(0).getDocID());
assertEquals(kr.getTotalResults(), 1);
};
+
+ /**
+ * This is a Schreibgebrauch ressource that didn't work for element queries.
+ */
+ @Test
+ public void searchSchreibgebrauchData () throws IOException {
+ // Construct index
+ KrillIndex ki = new KrillIndex();
+ // Indexing test files
+ ki.addDoc(getClass().getResourceAsStream("/sgbr/BSP-2013-01-32.json.gz"), true);
+ ki.commit();
+
+ Krill k = new Krill(new QueryBuilder("tokens").tag("base/s:s"));
+
+ assertEquals(k.getSpanQuery().toString(),
+ "<tokens:base/s:s />");
+
+ Result kr = k.apply(ki);
+ assertEquals(kr.getTotalResults(), 1);
+ assertEquals(kr.getMatch(0).getSnippetBrackets(),
+ "[Selbst ist der Jeck]");
+
+ assertEquals(kr.getMatch(0).getTextSigle(), "PRO-DUD_BSP-2013-01.32");
+ };
+
};
diff --git a/src/test/resources/sgbr/BSP-2013-01-32.json b/src/test/resources/sgbr/BSP-2013-01-32.json
new file mode 100644
index 0000000..b059e10
--- /dev/null
+++ b/src/test/resources/sgbr/BSP-2013-01-32.json
@@ -0,0 +1 @@
+{"language":"de","store":{"sgbrKodex":"T","funder":"Bundesministerium für Bildung und Forschung"},"version":"0.03","data":{"tokenSource":"sgbr#lemma","foundries":"base base/sentences dereko dereko/structure sgbr sgbr/lemma sgbr/morpho","text":"Selbst ist der Jeck","layerInfos":"base/s=spans dereko/s=spans sgbr/l=tokens sgbr/lv=tokens sgbr/p=tokens","stream":[["-:base/sentences$<i>1","-:tokens$<i>4","<>:dereko/s:w$<b>64<i>0<i>6<i>1<b>4<s>2","<>:dereko/s:text$<b>64<i>0<i>18<i>3<b>0","<>:dereko/s:div$<b>64<i>0<i>18<i>3<b>1","<>:dereko/s:head$<b>64<i>0<i>18<i>3<b>2","<>:dereko/s:s$<b>64<i>0<i>18<i>3<b>3<s>1","<>:base/s:t$<b>64<i>0<i>19<i>4<b>0","<>:base/s:s$<b>64<i>0<i>19<i>4<b>2","@:dereko/s:n:1$<b>17<s>1<i>3","@:dereko/s:ana:\\#PRO.DUD.BSP.2013.01.POS.NE$<b>17<s>2<i>1","@:dereko/s:n:1$<b>17<s>2<i>1","@:dereko/s:lemmaRef:\\#PRO.DUD.BSP.2013.01.Lemmata.3773$<b>17<s>2<i>1","_0$<i>0<i>6","i:selbst","s:Selbst","sgbr/l:Selbst","sgbr/p:NE"],["<>:dereko/s:w$<b>64<i>7<i>10<i>2<b>4<s>1","@:dereko/s:lemmaRef:\\#PRO.DUD.BSP.2013.01.Lemmata.2$<b>17<s>1<i>2","@:dereko/s:ana:\\#PRO.DUD.BSP.2013.01.POS.VVFIN$<b>17<s>1<i>2","@:dereko/s:n:2$<b>17<s>1<i>2","_1$<i>7<i>10","i:ist","s:ist","sgbr/l:sein","sgbr/p:VVFIN"],["<>:dereko/s:w$<b>64<i>11<i>14<i>3<b>4<s>1","@:dereko/s:lemmaRef:\\#PRO.DUD.BSP.2013.01.Lemmata.3$<b>17<s>1<i>3","@:dereko/s:ana:\\#PRO.DUD.BSP.2013.01.POS.ART$<b>17<s>1<i>3","@:dereko/s:n:3$<b>17<s>1<i>3","_2$<i>11<i>14","i:der","s:der","sgbr/l:d_art","sgbr/p:ART"],["<>:dereko/s:div$<b>65<i>18<i>18<i>3<b>2","<>:dereko/s:p$<b>65<i>18<i>18<i>3<b>3","_3$<i>15<i>19","i:jeck","s:Jeck","sgbr/l:Jeck","sgbr/p:NE"]],"name":"tokens"},"corpusSigle":"PRO-DUD","author":"unbekannt","keywords":"sgbrKodex:T","docTitle":"Korpus zur Beobachtung des Schreibgebrauchs im Deutschen","docSigle":"PRO-DUD_BSP-2013-01","textSigle":"PRO-DUD_BSP-2013-01.32","docSubTitle":"Subkorpus Ortsblatt, Jahrgang 2013, Monat Januar","pubPlace":"Stadtingen","title":"Nur Platt, kein Deutsch","publisher":"Dorfblatt GmbH","pubDate":"20130126"}
\ No newline at end of file
diff --git a/src/test/resources/sgbr/BSP-2013-01-32.json.gz b/src/test/resources/sgbr/BSP-2013-01-32.json.gz
new file mode 100644
index 0000000..d9f206a
--- /dev/null
+++ b/src/test/resources/sgbr/BSP-2013-01-32.json.gz
Binary files differ