Added test on behaviour of duplicate documents

Change-Id: Ib4d637e536e9bb8ea37c627375a39aa7c04722ef
diff --git a/src/main/java/de/ids_mannheim/korap/response/match/MatchIdentifier.java b/src/main/java/de/ids_mannheim/korap/response/match/MatchIdentifier.java
index ff15e13..80737cd 100644
--- a/src/main/java/de/ids_mannheim/korap/response/match/MatchIdentifier.java
+++ b/src/main/java/de/ids_mannheim/korap/response/match/MatchIdentifier.java
@@ -9,9 +9,12 @@
     private ArrayList<int[]> pos = new ArrayList<>(8);
 
     // Remember: "contains" is necessary for a compatibility bug in Kustvakt
+	// Identifier pattern is "match-
     Pattern idRegex = Pattern.compile("^(?:match-|contains-)"
-            + "(?:([^!]+?)[!\\.])?" + "([^!]+)[-/]p([0-9]+)-([0-9]+)"
-            + "((?:\\(-?[0-9]+\\)-?[0-9]+--?[0-9]+)*)" + "(?:c.+?)?$");
+									  + "(?:([^!]+?)[!\\.])?"
+									  + "([^!]+)[-/]p([0-9]+)-([0-9]+)"
+									  + "((?:\\(-?[0-9]+\\)-?[0-9]+--?[0-9]+)*)"
+									  + "(?:c.+?)?$");
     Pattern posRegex = Pattern.compile("\\(([0-9]+)\\)([0-9]+)-([0-9]+)");
 
 
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
index 20c680d..053b92a 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
@@ -631,6 +631,26 @@
         assertTrue(res.at("/pubDate").isMissingNode());
     };
 
+	@Test
+    public void indexSigleDuplicate () throws IOException, QueryException {
+        KrillIndex ki = new KrillIndex();
+        ki.addDoc(createSigleDoc2());
+        ki.addDoc(createSigleDoc1());
+        ki.commit();
+        Match km = ki.getMatchInfo("match-c1/d1/t1-p3-9", "tokens", null, null,
+                false, false);
+
+        JsonNode res = mapper.readTree(km.toJsonString());
+        assertEquals("tokens", res.at("/field").asText());
+        assertTrue(res.at("/startMore").asBoolean());
+        assertTrue(res.at("/endMore").asBoolean());
+        assertEquals("c1", res.at("/corpusSigle").asText());
+        assertEquals("c1/d1", res.at("/docSigle").asText());
+        assertEquals("c1/d1/t1", res.at("/textSigle").asText());
+        assertEquals("match-c1/d1/t1-p3-9", res.at("/matchID").asText());
+        assertEquals(2, res.at("/UID").asInt());
+    };
+
 
     @Test
     public void indexAttributeInfo () throws IOException, QueryException {
@@ -773,4 +793,44 @@
         return fd;
     };
 
+	private FieldDocument createSigleDoc1 () {
+        FieldDocument fd = new FieldDocument();
+        fd.addString("corpusSigle", "c1");
+        fd.addString("docSigle", "c1/d1");
+        fd.addString("textSigle", "c1/d1/t1");
+        fd.addInt("UID", 1);
+        fd.addTV("tokens", "abcabcabac",
+                "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10]"
+				 + "[(1-2)s:b|i:b|_1$<i>1<i>2]"
+				 + "[(2-3)s:c|i:c|_2$<i>2<i>3]"
+				 + "[(3-4)s:a|i:a|_3$<i>3<i>4]"
+				 + "[(4-5)s:b|i:b|_4$<i>4<i>5]"
+				 + "[(5-6)s:c|i:c|_5$<i>5<i>6]"
+				 + "[(6-7)s:a|i:a|_6$<i>6<i>7]"
+				 + "[(7-8)s:b|i:b|_7$<i>7<i>8]"
+				 + "[(8-9)s:a|i:a|_8$<i>8<i>9]"
+				 + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
+        return fd;
+    };
+
+	private FieldDocument createSigleDoc2 () {
+        FieldDocument fd = new FieldDocument();
+        fd.addString("corpusSigle", "c1");
+        fd.addString("docSigle", "c1/d1");
+        fd.addString("textSigle", "c1/d1/t1");
+        fd.addInt("UID", 2);
+        fd.addTV("tokens", "abcabcabac",
+                "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10]"
+				 + "[(1-2)s:b|i:b|_1$<i>1<i>2]"
+				 + "[(2-3)s:c|i:c|_2$<i>2<i>3]"
+				 + "[(3-4)s:a|i:a|_3$<i>3<i>4]"
+				 + "[(4-5)s:b|i:b|_4$<i>4<i>5]"
+				 + "[(5-6)s:c|i:c|_5$<i>5<i>6]"
+				 + "[(6-7)s:a|i:a|_6$<i>6<i>7]"
+				 + "[(7-8)s:b|i:b|_7$<i>7<i>8]"
+				 + "[(8-9)s:a|i:a|_8$<i>8<i>9]"
+				 + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
+        return fd;
+    };
+
 };