Add support for identifiers with dashes (Schreibgebrauch project

Change-Id: Ic177c055a14438415c0bcb0cd45d4788f375042f
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
index 796a6d9..a9bb540 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
@@ -28,7 +28,6 @@
 
     ObjectMapper mapper = new ObjectMapper();
 
-
     @Test
     public void identifierExample1 () throws IOException, QueryException {
         MatchIdentifier id = new MatchIdentifier("match-c1!d1-p4-20");
@@ -550,6 +549,17 @@
 
 
     @Test
+    public void indexFailingMatchID () throws IOException,
+            QueryException {
+        KrillIndex ki = new KrillIndex();
+        Match km = ki.getMatchInfo("match-PRO-DUD!PRO-DUD_KSTA-2013-01.7483-2013-01",
+                                   "tokens", "*", "m",
+                                   false, false);
+        JsonNode res = mapper.readTree(km.toJsonString());
+        assertEquals("730", res.at("/errors/0/0").asText());
+    };
+
+    @Test
     public void indexExampleNullInfo () throws IOException, QueryException {
         KrillIndex ki = new KrillIndex();
         ki.addDoc(createSimpleFieldDoc4());
diff --git a/src/test/java/de/ids_mannheim/korap/response/TestMatch.java b/src/test/java/de/ids_mannheim/korap/response/TestMatch.java
new file mode 100644
index 0000000..182300a
--- /dev/null
+++ b/src/test/java/de/ids_mannheim/korap/response/TestMatch.java
@@ -0,0 +1,37 @@
+package de.ids_mannheim.korap.response;
+
+import de.ids_mannheim.korap.response.Match;
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+import org.junit.Ignore;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class TestMatch {
+
+    @Test
+    public void testNoMatch () {
+        Match m = new Match("aaa", false);
+        assertEquals(null, m.getID());
+    };
+
+    @Test
+    public void testMatchBug () {
+        Match m = new Match("match-PRO-DUD!PRO-DUD_KSTA-2013-01.7483-2013-01", false);
+        assertEquals(null, m.getID());
+    };
+
+    @Test
+    public void testMatchTextSigle1 () {
+        Match m = new Match("match-GOE!GOE_AGK.00000-p60348-60349", false);
+        assertEquals("GOE_AGK.00000", m.getTextSigle());
+    };
+
+    @Test
+    public void testMatchTextSigle2 () {
+        Match m = new Match("match-PRO-DUD!PRO-DUD_KSTA-2013-01.3651-p326-327", false);
+        assertEquals("PRO-DUD_KSTA-2013-01.3651", m.getTextSigle());
+    };
+};
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestKrill.java b/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
index e7a6f2f..ed64596 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
@@ -1092,4 +1092,29 @@
         assertEquals("WPD_AAA.00002", kr.getMatch(0).getDocID());
         assertEquals(kr.getTotalResults(), 1);
     };
+
+    /**
+     * This is a Schreibgebrauch ressource that didn't work for element queries.
+     */
+    @Test
+    public void searchSchreibgebrauchData () throws IOException {
+        // Construct index
+        KrillIndex ki = new KrillIndex();
+        // Indexing test files
+        ki.addDoc(getClass().getResourceAsStream("/sgbr/BSP-2013-01-32.json.gz"), true);
+        ki.commit();
+
+        Krill k = new Krill(new QueryBuilder("tokens").tag("base/s:s"));
+
+        assertEquals(k.getSpanQuery().toString(),
+                "<tokens:base/s:s />");
+
+        Result kr = k.apply(ki);
+        assertEquals(kr.getTotalResults(), 1);
+        assertEquals(kr.getMatch(0).getSnippetBrackets(),
+                "[Selbst ist der Jeck]");
+
+        assertEquals(kr.getMatch(0).getTextSigle(), "PRO-DUD_BSP-2013-01.32");
+    };
+
 };
diff --git a/src/test/resources/sgbr/BSP-2013-01-32.json b/src/test/resources/sgbr/BSP-2013-01-32.json
new file mode 100644
index 0000000..b059e10
--- /dev/null
+++ b/src/test/resources/sgbr/BSP-2013-01-32.json
@@ -0,0 +1 @@
+{"language":"de","store":{"sgbrKodex":"T","funder":"Bundesministerium für Bildung und Forschung"},"version":"0.03","data":{"tokenSource":"sgbr#lemma","foundries":"base base/sentences dereko dereko/structure sgbr sgbr/lemma sgbr/morpho","text":"Selbst ist der Jeck","layerInfos":"base/s=spans dereko/s=spans sgbr/l=tokens sgbr/lv=tokens sgbr/p=tokens","stream":[["-:base/sentences$<i>1","-:tokens$<i>4","<>:dereko/s:w$<b>64<i>0<i>6<i>1<b>4<s>2","<>:dereko/s:text$<b>64<i>0<i>18<i>3<b>0","<>:dereko/s:div$<b>64<i>0<i>18<i>3<b>1","<>:dereko/s:head$<b>64<i>0<i>18<i>3<b>2","<>:dereko/s:s$<b>64<i>0<i>18<i>3<b>3<s>1","<>:base/s:t$<b>64<i>0<i>19<i>4<b>0","<>:base/s:s$<b>64<i>0<i>19<i>4<b>2","@:dereko/s:n:1$<b>17<s>1<i>3","@:dereko/s:ana:\\#PRO.DUD.BSP.2013.01.POS.NE$<b>17<s>2<i>1","@:dereko/s:n:1$<b>17<s>2<i>1","@:dereko/s:lemmaRef:\\#PRO.DUD.BSP.2013.01.Lemmata.3773$<b>17<s>2<i>1","_0$<i>0<i>6","i:selbst","s:Selbst","sgbr/l:Selbst","sgbr/p:NE"],["<>:dereko/s:w$<b>64<i>7<i>10<i>2<b>4<s>1","@:dereko/s:lemmaRef:\\#PRO.DUD.BSP.2013.01.Lemmata.2$<b>17<s>1<i>2","@:dereko/s:ana:\\#PRO.DUD.BSP.2013.01.POS.VVFIN$<b>17<s>1<i>2","@:dereko/s:n:2$<b>17<s>1<i>2","_1$<i>7<i>10","i:ist","s:ist","sgbr/l:sein","sgbr/p:VVFIN"],["<>:dereko/s:w$<b>64<i>11<i>14<i>3<b>4<s>1","@:dereko/s:lemmaRef:\\#PRO.DUD.BSP.2013.01.Lemmata.3$<b>17<s>1<i>3","@:dereko/s:ana:\\#PRO.DUD.BSP.2013.01.POS.ART$<b>17<s>1<i>3","@:dereko/s:n:3$<b>17<s>1<i>3","_2$<i>11<i>14","i:der","s:der","sgbr/l:d_art","sgbr/p:ART"],["<>:dereko/s:div$<b>65<i>18<i>18<i>3<b>2","<>:dereko/s:p$<b>65<i>18<i>18<i>3<b>3","_3$<i>15<i>19","i:jeck","s:Jeck","sgbr/l:Jeck","sgbr/p:NE"]],"name":"tokens"},"corpusSigle":"PRO-DUD","author":"unbekannt","keywords":"sgbrKodex:T","docTitle":"Korpus zur Beobachtung des Schreibgebrauchs im Deutschen","docSigle":"PRO-DUD_BSP-2013-01","textSigle":"PRO-DUD_BSP-2013-01.32","docSubTitle":"Subkorpus Ortsblatt, Jahrgang 2013, Monat Januar","pubPlace":"Stadtingen","title":"Nur Platt, kein Deutsch","publisher":"Dorfblatt GmbH","pubDate":"20130126"}
\ No newline at end of file
diff --git a/src/test/resources/sgbr/BSP-2013-01-32.json.gz b/src/test/resources/sgbr/BSP-2013-01-32.json.gz
new file mode 100644
index 0000000..d9f206a
--- /dev/null
+++ b/src/test/resources/sgbr/BSP-2013-01-32.json.gz
Binary files differ