Added test on behaviour of duplicate documents
Change-Id: Ib4d637e536e9bb8ea37c627375a39aa7c04722ef
diff --git a/src/main/java/de/ids_mannheim/korap/response/match/MatchIdentifier.java b/src/main/java/de/ids_mannheim/korap/response/match/MatchIdentifier.java
index ff15e13..80737cd 100644
--- a/src/main/java/de/ids_mannheim/korap/response/match/MatchIdentifier.java
+++ b/src/main/java/de/ids_mannheim/korap/response/match/MatchIdentifier.java
@@ -9,9 +9,12 @@
private ArrayList<int[]> pos = new ArrayList<>(8);
// Remember: "contains" is necessary for a compatibility bug in Kustvakt
+ // Identifier pattern is "match-
Pattern idRegex = Pattern.compile("^(?:match-|contains-)"
- + "(?:([^!]+?)[!\\.])?" + "([^!]+)[-/]p([0-9]+)-([0-9]+)"
- + "((?:\\(-?[0-9]+\\)-?[0-9]+--?[0-9]+)*)" + "(?:c.+?)?$");
+ + "(?:([^!]+?)[!\\.])?"
+ + "([^!]+)[-/]p([0-9]+)-([0-9]+)"
+ + "((?:\\(-?[0-9]+\\)-?[0-9]+--?[0-9]+)*)"
+ + "(?:c.+?)?$");
Pattern posRegex = Pattern.compile("\\(([0-9]+)\\)([0-9]+)-([0-9]+)");
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
index 20c680d..053b92a 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
@@ -631,6 +631,26 @@
assertTrue(res.at("/pubDate").isMissingNode());
};
+ @Test
+ public void indexSigleDuplicate () throws IOException, QueryException {
+ KrillIndex ki = new KrillIndex();
+ ki.addDoc(createSigleDoc2());
+ ki.addDoc(createSigleDoc1());
+ ki.commit();
+ Match km = ki.getMatchInfo("match-c1/d1/t1-p3-9", "tokens", null, null,
+ false, false);
+
+ JsonNode res = mapper.readTree(km.toJsonString());
+ assertEquals("tokens", res.at("/field").asText());
+ assertTrue(res.at("/startMore").asBoolean());
+ assertTrue(res.at("/endMore").asBoolean());
+ assertEquals("c1", res.at("/corpusSigle").asText());
+ assertEquals("c1/d1", res.at("/docSigle").asText());
+ assertEquals("c1/d1/t1", res.at("/textSigle").asText());
+ assertEquals("match-c1/d1/t1-p3-9", res.at("/matchID").asText());
+ assertEquals(2, res.at("/UID").asInt());
+ };
+
@Test
public void indexAttributeInfo () throws IOException, QueryException {
@@ -773,4 +793,44 @@
return fd;
};
+ private FieldDocument createSigleDoc1 () {
+ FieldDocument fd = new FieldDocument();
+ fd.addString("corpusSigle", "c1");
+ fd.addString("docSigle", "c1/d1");
+ fd.addString("textSigle", "c1/d1/t1");
+ fd.addInt("UID", 1);
+ fd.addTV("tokens", "abcabcabac",
+ "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10]"
+ + "[(1-2)s:b|i:b|_1$<i>1<i>2]"
+ + "[(2-3)s:c|i:c|_2$<i>2<i>3]"
+ + "[(3-4)s:a|i:a|_3$<i>3<i>4]"
+ + "[(4-5)s:b|i:b|_4$<i>4<i>5]"
+ + "[(5-6)s:c|i:c|_5$<i>5<i>6]"
+ + "[(6-7)s:a|i:a|_6$<i>6<i>7]"
+ + "[(7-8)s:b|i:b|_7$<i>7<i>8]"
+ + "[(8-9)s:a|i:a|_8$<i>8<i>9]"
+ + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
+ return fd;
+ };
+
+ private FieldDocument createSigleDoc2 () {
+ FieldDocument fd = new FieldDocument();
+ fd.addString("corpusSigle", "c1");
+ fd.addString("docSigle", "c1/d1");
+ fd.addString("textSigle", "c1/d1/t1");
+ fd.addInt("UID", 2);
+ fd.addTV("tokens", "abcabcabac",
+ "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10]"
+ + "[(1-2)s:b|i:b|_1$<i>1<i>2]"
+ + "[(2-3)s:c|i:c|_2$<i>2<i>3]"
+ + "[(3-4)s:a|i:a|_3$<i>3<i>4]"
+ + "[(4-5)s:b|i:b|_4$<i>4<i>5]"
+ + "[(5-6)s:c|i:c|_5$<i>5<i>6]"
+ + "[(6-7)s:a|i:a|_6$<i>6<i>7]"
+ + "[(7-8)s:b|i:b|_7$<i>7<i>8]"
+ + "[(8-9)s:a|i:a|_8$<i>8<i>9]"
+ + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
+ return fd;
+ };
+
};