bugfix for multiple identifiers in relations
diff --git a/CHANGES b/CHANGES
index 272a764..3315fb5 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,3 +1,7 @@
+0.30.9 2014-04-11
+ - [bugfix] Prevent multiple identifiers in
+ relation highlighting (diewald)
+
0.30.8 2014-04-10
- Added getTermRelationJSON() to KorapCollection (diewald)
This is likely to get deprecated!
diff --git a/pom.xml b/pom.xml
index 04b3b93..d4244da 100644
--- a/pom.xml
+++ b/pom.xml
@@ -11,7 +11,7 @@
-->
<groupId>KorAP-modules</groupId>
<artifactId>KorAP-lucene-index</artifactId>
- <version>0.30.8</version>
+ <version>0.30.9</version>
<packaging>jar</packaging>
<name>KorAP-lucene-index</name>
diff --git a/src/main/java/de/ids_mannheim/korap/KorapIndex.java b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
index 8b43b19..e2d957c 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
@@ -104,6 +104,13 @@
* @author ndiewald
*/
public class KorapIndex {
+
+ // Todo: Use configuration
+ // Last line of defense for simple DOS attacks!
+ private int maxTermRelations = 100;
+ private int autoCommit = 500;
+
+
private Directory directory;
// Temp:
@@ -114,12 +121,10 @@
private IndexSearcher searcher;
private boolean readerOpen = false;
private int commitCounter = 0;
- private int autoCommit = 500; // Todo: Use configuration
private HashMap termContexts;
private ObjectMapper mapper = new ObjectMapper();
private String version;
- private int maxTermRelations = 50;
private static ByteBuffer bb = ByteBuffer.allocate(4),
bbOffset = ByteBuffer.allocate(8),
diff --git a/src/main/java/de/ids_mannheim/korap/KorapMatch.java b/src/main/java/de/ids_mannheim/korap/KorapMatch.java
index dc28449..5d398b2 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapMatch.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapMatch.java
@@ -1084,6 +1084,9 @@
LinkedList<int[]> openList = new LinkedList<int[]>();
LinkedList<int[]> closeList = new LinkedList<int[]>();
+ // Filter multiple identifier
+ this._filterMultipleIdentifiers();
+
// Add highlight spans to balance lists
openList.addAll(this.span);
closeList.addAll(this.span);
@@ -1272,6 +1275,14 @@
if (DEBUG)
log.trace("There are highlights!");
+ /*
+Here maybe?
+ if (this.highlight.last.start = hl.start && this.highlight.end == hl.end && this.highlight.id < -1) {
+ return;
+ };
+ */
+
+
for (Highlight highlight : this.highlight) {
int start = this.positionsToOffset.start(
ldid, highlight.start
@@ -1342,4 +1353,35 @@
return "{}";
};
+
+
+ // Remove duplicate identifiers
+ // Yeah ... I mean ... why not?
+ private void _filterMultipleIdentifiers () {
+ ArrayList<Integer> removeDuplicate = new ArrayList<>(10);
+ HashSet<Integer> identifiers = new HashSet<>(20);
+ for (int i = 0; i < this.span.size(); i++) {
+ // span is an int array: [Start, End, Number, Dummy]
+ int highlightNumber = this.span.get(i)[2];
+
+ // number is an identifier
+ if (highlightNumber < -1) {
+ int idNumber = identifierNumber.get(highlightNumber);
+ if (identifiers.contains(idNumber)) {
+ removeDuplicate.add(i);
+ }
+ else {
+ identifiers.add(idNumber);
+ };
+ };
+ };
+
+ Collections.sort(removeDuplicate);
+ Collections.reverse(removeDuplicate);
+
+ // Delete all duplicate identifiers
+ for (int delete : removeDuplicate) {
+ this.span.remove(delete);
+ };
+ };
};
diff --git a/src/main/resources/log4j.properties b/src/main/resources/log4j.properties
index f48fe94..af72c2d 100644
--- a/src/main/resources/log4j.properties
+++ b/src/main/resources/log4j.properties
@@ -10,7 +10,7 @@
#log4j.logger.de.ids_mannheim.korap.query.spans.NextSpans = TRACE, stdout
#log4j.logger.de.ids_mannheim.korap.query.spans.SimpleSpans = TRACE, stdout
# log4j.logger.de.ids_mannheim.korap.query.spans.ClassSpans = TRACE, stdout
-#log4j.logger.de.ids_mannheim.korap.query.spans.MatchSpans = TRACE, stdout
+# log4j.logger.de.ids_mannheim.korap.query.spans.MatchSpans = TRACE, stdout
# Collections
#log4j.logger.de.ids_mannheim.korap.KorapFilter = TRACE, stdout
diff --git a/src/test/java/de/ids_mannheim/korap/filter/TestKorapCollectionJSON.java b/src/test/java/de/ids_mannheim/korap/filter/TestKorapCollectionJSON.java
index bc8a5bb..a20c07b 100644
--- a/src/test/java/de/ids_mannheim/korap/filter/TestKorapCollectionJSON.java
+++ b/src/test/java/de/ids_mannheim/korap/filter/TestKorapCollectionJSON.java
@@ -49,6 +49,14 @@
assertEquals("filter with QueryWrapperFilter(+(+corpusID:d-1 +corpusID:d-2))",kc.getFilter(1).toString());
};
+ @Test
+ public void metaQuery9 () {
+ String metaQuery = getString(getClass().getResource("/queries/metaquery9.jsonld").getFile());
+ KorapCollection kc = new KorapCollection(metaQuery);
+ assertEquals(1,kc.getCount());
+ assertEquals("filter with QueryWrapperFilter(+corpusID:WPD)",kc.getFilter(0).toString());
+ };
+
public static String getString (String path) {
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
index 51560d6..6481153 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
@@ -522,6 +522,66 @@
km.getSnippetBrackets());
};
+ @Test
+ public void indexExample7Dependencies () throws IOException {
+ KorapIndex ki = new KorapIndex();
+ ki.addDoc(createSimpleFieldDoc2());
+ ki.commit();
+
+ KorapMatch km = ki.getMatchInfo("match-c1!d1-p0-4",
+ "tokens",
+ null,
+ null,
+ true,
+ true);
+
+
+ assertEquals("SnippetHTML (2)",
+ "<span class=\"context-left\">" +
+ "</span>" +
+ "<span class=\"match\">"+
+ "<span xlink:title=\"x/rel:a\" xlink:type=\"simple\" xlink:href=\"#word-c1!d1-p3\">"+
+ "<span title=\"f/m:eins\">"+
+ "<span title=\"f/y:one\">"+
+ "<span title=\"it/is:1\">" +
+ "<span title=\"x/o:erstens\">a</span>" +
+ "</span>" +
+ "</span>" +
+ "</span>" +
+ "</span>" +
+ "<span xlink:title=\"x/rel:b\" xlink:type=\"simple\" xlink:href=\"#word-c1!d1-p3\">"+
+ "<span title=\"f/m:zwei\">"+
+ "<span title=\"f/y:two\">"+
+ "<span title=\"it/is:2\">"+
+ "<span title=\"x/o:zweitens\">b</span>"+
+ "</span>"+
+ "</span>"+
+ "</span>"+
+ "</span>"+
+ "<span title=\"f/m:drei\">"+
+ "<span title=\"f/y:three\">"+
+ "<span title=\"it/is:3\">"+
+ "<span title=\"x/o:drittens\">c</span>"+
+ "</span>"+
+ "</span>"+
+ "</span>"+
+ "<span xml:id=\"word-c1!d1-p3\">"+
+ "<span title=\"f/m:vier\">"+
+ "<span title=\"f/y:four\">"+
+ "<span title=\"it/is:4\">"+
+ "<span title=\"x/o:viertens\">a</span>"+
+ "</span>"+
+ "</span>"+
+ "</span>"+
+ "</span>"+
+ "</span>"+
+ "<span class=\"context-right\">"+
+ "<span class=\"more\">"+
+ "</span>"+
+ "</span>",
+ km.getSnippetHTML());
+ };
+
private FieldDocument createSimpleFieldDoc(){
FieldDocument fd = new FieldDocument();
fd.addString("corpusID", "c1");
@@ -540,4 +600,23 @@
"[(9-10)s:c|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9#9-10]");
return fd;
};
+
+ private FieldDocument createSimpleFieldDoc2(){
+ FieldDocument fd = new FieldDocument();
+ fd.addString("corpusID", "c1");
+ fd.addString("ID", "d1");
+ fd.addTV("tokens",
+ "abcabcabac",
+ "[(0-1)s:a|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|>:x/rel:a$<i>4|_0#0-1|-:t$<i>10]" +
+ "[(1-2)s:b|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|>:x/rel:b$<i>4|_1#1-2]" +
+ "[(2-3)s:c|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2#2-3|<>:s#2-5$<i>5]" +
+ "[(3-4)s:a|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|<:x/rel:b$<i>1|_3#3-4]" +
+ "[(4-5)s:b|i:b|f/m:fuenf|f/y:five|x/o:fünftens|it/is:5|_4#4-5]" +
+ "[(5-6)s:c|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5#5-6]" +
+ "[(6-7)s:a|i:a|f/m:sieben|f/y:seven|x/o:siebtens|it/is:7|_6#6-7]" +
+ "[(7-8)s:b|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/tag#7-10$<i>10|_7#7-8]" +
+ "[(8-9)s:a|i:a|f/m:neun|f/y:nine|x/o:neuntens|it/is:9|_8#8-9]" +
+ "[(9-10)s:c|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9#9-10]");
+ return fd;
+ };
};
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java b/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
index 91a1717..ac89202 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
@@ -279,6 +279,12 @@
assertEquals(2, kr.getStartIndex());
assertEquals(2, kr.getItemsPerPage());
+
+ json = getString(getClass().getResource("/queries/metaquery9.jsonld").getFile());
+ KorapCollection kc = new KorapCollection(json);
+ kc.setIndex(ki);
+ assertEquals(7, kc.numberOf("documents"));
+
};
@Test