bugfix for multiple identifiers in relations
diff --git a/src/main/java/de/ids_mannheim/korap/KorapIndex.java b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
index 8b43b19..e2d957c 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
@@ -104,6 +104,13 @@
* @author ndiewald
*/
public class KorapIndex {
+
+ // Todo: Use configuration
+ // Last line of defense for simple DOS attacks!
+ private int maxTermRelations = 100;
+ private int autoCommit = 500;
+
+
private Directory directory;
// Temp:
@@ -114,12 +121,10 @@
private IndexSearcher searcher;
private boolean readerOpen = false;
private int commitCounter = 0;
- private int autoCommit = 500; // Todo: Use configuration
private HashMap termContexts;
private ObjectMapper mapper = new ObjectMapper();
private String version;
- private int maxTermRelations = 50;
private static ByteBuffer bb = ByteBuffer.allocate(4),
bbOffset = ByteBuffer.allocate(8),
diff --git a/src/main/java/de/ids_mannheim/korap/KorapMatch.java b/src/main/java/de/ids_mannheim/korap/KorapMatch.java
index dc28449..5d398b2 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapMatch.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapMatch.java
@@ -1084,6 +1084,9 @@
LinkedList<int[]> openList = new LinkedList<int[]>();
LinkedList<int[]> closeList = new LinkedList<int[]>();
+ // Filter multiple identifier
+ this._filterMultipleIdentifiers();
+
// Add highlight spans to balance lists
openList.addAll(this.span);
closeList.addAll(this.span);
@@ -1272,6 +1275,14 @@
if (DEBUG)
log.trace("There are highlights!");
+ /*
+Here maybe?
+ if (this.highlight.last.start = hl.start && this.highlight.end == hl.end && this.highlight.id < -1) {
+ return;
+ };
+ */
+
+
for (Highlight highlight : this.highlight) {
int start = this.positionsToOffset.start(
ldid, highlight.start
@@ -1342,4 +1353,35 @@
return "{}";
};
+
+
+ // Remove duplicate identifiers
+ // Yeah ... I mean ... why not?
+ private void _filterMultipleIdentifiers () {
+ ArrayList<Integer> removeDuplicate = new ArrayList<>(10);
+ HashSet<Integer> identifiers = new HashSet<>(20);
+ for (int i = 0; i < this.span.size(); i++) {
+ // span is an int array: [Start, End, Number, Dummy]
+ int highlightNumber = this.span.get(i)[2];
+
+ // number is an identifier
+ if (highlightNumber < -1) {
+ int idNumber = identifierNumber.get(highlightNumber);
+ if (identifiers.contains(idNumber)) {
+ removeDuplicate.add(i);
+ }
+ else {
+ identifiers.add(idNumber);
+ };
+ };
+ };
+
+ Collections.sort(removeDuplicate);
+ Collections.reverse(removeDuplicate);
+
+ // Delete all duplicate identifiers
+ for (int delete : removeDuplicate) {
+ this.span.remove(delete);
+ };
+ };
};
diff --git a/src/main/resources/log4j.properties b/src/main/resources/log4j.properties
index f48fe94..af72c2d 100644
--- a/src/main/resources/log4j.properties
+++ b/src/main/resources/log4j.properties
@@ -10,7 +10,7 @@
#log4j.logger.de.ids_mannheim.korap.query.spans.NextSpans = TRACE, stdout
#log4j.logger.de.ids_mannheim.korap.query.spans.SimpleSpans = TRACE, stdout
# log4j.logger.de.ids_mannheim.korap.query.spans.ClassSpans = TRACE, stdout
-#log4j.logger.de.ids_mannheim.korap.query.spans.MatchSpans = TRACE, stdout
+# log4j.logger.de.ids_mannheim.korap.query.spans.MatchSpans = TRACE, stdout
# Collections
#log4j.logger.de.ids_mannheim.korap.KorapFilter = TRACE, stdout