Added spans and dependency relations to match retrieval
diff --git a/CHANGES b/CHANGES
index 2666f1b..0fde379 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,3 +1,7 @@
+0.26.1 2014-01-21
+ - Distinct Match retrieval including spans
+ and dependency relations (diewald)
+
0.26 2014-01-16
- Introduced standalone SpanSegmentQueries (margaretha)
- [bugfix] SpanNextQueries (margaretha)
diff --git a/pom.xml b/pom.xml
index 979b146..0d5adaf 100644
--- a/pom.xml
+++ b/pom.xml
@@ -4,12 +4,12 @@
<parent>
<groupId>KorAP-modules</groupId>
<artifactId>KorAP-core-modules</artifactId>
- <version>1.0</version>
+ <version>LATEST</version>
</parent>
<groupId>KorAP-modules</groupId>
<artifactId>KorAP-lucene-index</artifactId>
- <version>0.26</version>
+ <version>0.26.1</version>
<packaging>jar</packaging>
<name>KorAP-lucene-index</name>
diff --git a/src/main/java/de/ids_mannheim/korap/KorapIndex.java b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
index 095d2d5..037452a 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
@@ -502,22 +502,23 @@
// Todo: Only support one direction!
if (includeSpans)
- regex.append("((\"<>\"|\"<\"|\">\")\":\")?");
+ regex.append("((\">\"|\"<\"\">\"?)\":\")?");
if (foundry != null) {
regex.append(foundry).append('/');
if (layer != null)
regex.append(layer).append(":");
}
else if (includeSpans) {
- regex.append("([^-is]+?|[-is][^:])");
+ regex.append("([^-is]|[-is][^:])");
}
else {
- regex.append("([^-is<>]+?|([-is<>]|\"<>\")[^:])");
+ regex.append("([^-is<>]|([-is>][^:])|<[^:>])");
};
regex.append("(.){1,}|_[0-9]+");
+
log.trace("The final regexString is {}", regex.toString());
- RegExp regexObj = new RegExp(regex.toString());
+ RegExp regexObj = new RegExp(regex.toString(), RegExp.COMPLEMENT);
fst = new CompiledAutomaton(regexObj.toAutomaton());
log.trace("The final regexObj is {}", regexObj.toString());
};
@@ -600,6 +601,8 @@
// How often does this term occur in the document?
int termOccurrences = docs.freq();
+ // log.trace("I found {} documents with this term", termOccurrences);
+
// String representation of the term
String termString = termsEnum.term().utf8ToString();
@@ -610,13 +613,17 @@
int pos = docs.nextPosition();
// Check, if the position of the term is in the interesting area
+
+ // log.trace("Check position!");
+
if (pos >= match.getStartPos() && pos < match.getEndPos()) {
log.trace(
">> {}: {}-{}-{}",
termString,
docs.freq(),
- pos, docs.getPayload()
+ pos,
+ docs.getPayload()
);
BytesRef payload = docs.getPayload();
@@ -630,8 +637,11 @@
payload.length
);
};
-
- termList.add(new TermInfo(termString, pos, bbTerm));
+ TermInfo ti = new TermInfo(termString, pos, bbTerm).analyze();
+ if (ti.getEndPos() < match.getEndPos()) {
+ log.trace("Add {}", ti.toString());
+ termList.add(ti);
+ };
};
};
};
@@ -649,6 +659,8 @@
if (t.getType() == "term" || t.getType() == "span")
match.addAnnotation(t.getStartPos(), t.getEndPos(), t.getAnnotation());
+ else if (t.getType() == "relSrc")
+ match.addRelation(t.getStartPos(), t.getEndPos(), t.getAnnotation());
};
break;
diff --git a/src/main/java/de/ids_mannheim/korap/KorapMatch.java b/src/main/java/de/ids_mannheim/korap/KorapMatch.java
index 15627de..31ccd5d 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapMatch.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapMatch.java
@@ -14,6 +14,7 @@
import static de.ids_mannheim.korap.util.KorapHTML.*;
import de.ids_mannheim.korap.index.MatchIdentifier;
+import de.ids_mannheim.korap.index.PosIdentifier;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -24,6 +25,8 @@
/*
Todo: The implemented classes and private names are horrible!
Refactor, future-me!
+
+ The number based Highlighttype is ugly - UGLY!
*/
/**
@@ -56,8 +59,14 @@
@JsonIgnore
public int localDocID = -1;
- HashMap<Integer, String> annotationNumber = new HashMap<>(16);
+ HashMap<Integer, String> annotationNumber = new HashMap<>(16);
+ HashMap<Integer, Relation> relationNumber = new HashMap<>(16);
+ HashMap<Integer, Integer> identifierNumber = new HashMap<>(16);
+
+ // -1 is match highlight
int annotationNumberCounter = 256;
+ int relationNumberCounter = 2048;
+ int identifierNumberCounter = -2;
@JsonIgnore
public boolean leftTokenContext,
@@ -116,29 +125,55 @@
this.setEndPos(id.getEndPos());
if (includeHighlights)
- for (int[] pos : id.getPos())
+ for (int[] pos : id.getPos()) {
+ if (pos[0] < id.getStartPos() || pos[1] > id.getEndPos())
+ continue;
+
this.addHighlight(pos[0], pos[1], pos[2]);
+ };
};
private class Highlight {
public int start, end;
public int number = -1;
+ // Relational highlight
+ public Highlight (int start, int end, String annotation, int ref) {
+ this.start = start;
+ this.end = end;
+ // TODO: This can overflow!
+ this.number = relationNumberCounter++;
+ relationNumber.put(this.number, new Relation(annotation, ref));
+ };
+
+ // Span highlight
public Highlight (int start, int end, String annotation) {
this.start = start;
this.end = end;
// TODO: This can overflow!
- this.number = annotationNumberCounter++;
- log.trace("Add annotation: {} ({})", annotation, this.number);
- annotationNumber.put(this.number, annotation);
+ if (annotationNumberCounter < 2048) {
+ this.number = annotationNumberCounter++;
+ annotationNumber.put(this.number, annotation);
+ };
};
+ // Simple highlight
public Highlight (int start, int end, int number) {
this.start = start;
this.end = end;
this.number = number;
};
- }
+ };
+
+ private class Relation {
+ public int ref;
+ public String annotation;
+ public Relation (String annotation, int ref) {
+ this.annotation = annotation;
+ this.ref = ref;
+ };
+ };
+
/**
* Insert a highlight for the snippet view by means of positional
@@ -180,6 +215,13 @@
this.addHighlight(new Highlight(start, end, annotation));
};
+ public void addRelation (int src, int target, String annotation) {
+ this.addHighlight(new Highlight(src, src, annotation, target));
+ int id = identifierNumberCounter--;
+ identifierNumber.put(id, target);
+ this.addHighlight(new Highlight(target, target, id));
+ };
+
public void populateDocument (Document doc, String field, HashSet<String> fields) {
@@ -294,6 +336,24 @@
return (this.identifier = id.toString());
};
+ @JsonIgnore
+ public String getPosID (int pos) {
+ if (this.identifier != null)
+ return this.identifier;
+
+ if (this.localDocID == -1)
+ return null;
+
+ PosIdentifier id = new PosIdentifier();
+
+ // Get prefix string corpus/doc
+ id.setCorpusID(this.getCorpusID());
+ id.setDocID(this.getDocID());
+ id.setPos(pos);
+
+ return id.toString();
+ };
+
private void _reset () {
this.processed = false;
this.snippetHTML = null;
@@ -435,18 +495,40 @@
};
// Return html fragment for this combinator element
- public String toHTML (FixedBitSet level, byte[] levelCache) {
+ public String toHTML (KorapMatch match, FixedBitSet level, byte[] levelCache) {
// Opening
if (this.type == 1) {
StringBuilder sb = new StringBuilder();
if (this.number == -1) {
sb.append("<span class=\"match\">");
}
- else if (this.number >= 256) {
- sb.append("<span title=\"")
- .append(annotationNumber.get(this.number))
+
+ else if (this.number < -1) {
+ sb.append("<span xml:id=\"")
+ .append(match.getPosID(
+ identifierNumber.get(this.number)))
.append("\">");
}
+
+ else if (this.number >= 256) {
+ sb.append("<span ");
+ if (this.number < 2048) {
+ sb.append("title=\"")
+ .append(annotationNumber.get(this.number))
+ .append('"');
+ }
+ else {
+ Relation rel = relationNumber.get(this.number);
+ sb.append("xlink:title=\"")
+ .append(rel.annotation)
+ .append('"');
+ sb.append(" xlink:type=\"simple\"");
+ sb.append(" xlink:href=\"#");
+ sb.append(match.getPosID(rel.ref));
+ sb.append('"');
+ };
+ sb.append('>');
+ }
else {
// Get the first free level slot
byte pos;
@@ -468,7 +550,7 @@
}
// Closing
else if (this.type == 2) {
- if (this.number == -1 || this.number >= 256)
+ if (this.number <= -1 || this.number >= 256)
return "</span>";
if (this.terminal)
@@ -484,13 +566,32 @@
public String toBrackets () {
if (this.type == 1) {
StringBuilder sb = new StringBuilder();
+
+ // Match
if (this.number == -1) {
sb.append("[");
}
+
+ // Identifier
+ else if (this.number < -1) {
+ sb.append("{#");
+ sb.append(identifierNumber.get(this.number));
+ sb.append(':');
+ }
+
+ // Highlight, Relation, Span
else {
sb.append("{");
- if (this.number >= 256)
- sb.append(annotationNumber.get(this.number)).append(':');
+ if (this.number >= 256) {
+ if (this.number < 2048)
+ sb.append(annotationNumber.get(this.number));
+ else {
+ Relation rel = relationNumber.get(this.number);
+ sb.append(rel.annotation);
+ sb.append('>').append(rel.ref);
+ };
+ sb.append(':');
+ }
else if (this.number != 0)
sb.append(this.number).append(':');
};
@@ -704,7 +805,7 @@
sb.append("<span class=\"more\"></span>");
if (elem.type == 0) {
- sb.append(elem.toHTML(level, levelCache));
+ sb.append(elem.toHTML(this, level, levelCache));
start++;
};
sb.append("</span>");
@@ -716,7 +817,7 @@
// Create context, if trhere is any
rightContext.append("<span class=\"context-right\">");
if (elem != null && elem.type == 0) {
- rightContext.append(elem.toHTML(level, levelCache));
+ rightContext.append(elem.toHTML(this, level, levelCache));
end--;
};
if (endMore)
@@ -724,7 +825,7 @@
rightContext.append("</span>");
for (short i = start; i < end; i++) {
- sb.append(this.snippetStack.get(i).toHTML(level,levelCache));
+ sb.append(this.snippetStack.get(i).toHTML(this, level,levelCache));
};
sb.append(rightContext);
diff --git a/src/main/java/de/ids_mannheim/korap/index/DocIdentifier.java b/src/main/java/de/ids_mannheim/korap/index/DocIdentifier.java
new file mode 100644
index 0000000..b66d06a
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/index/DocIdentifier.java
@@ -0,0 +1,26 @@
+package de.ids_mannheim.korap.index;
+import java.util.*;
+import java.util.regex.*;
+
+
+public class DocIdentifier {
+ protected String corpusID, docID;
+
+ public String getCorpusID () {
+ return this.corpusID;
+ };
+
+ public void setCorpusID (String id) {
+ if (id != null && !id.contains("!"))
+ this.corpusID = id;
+ };
+
+ public String getDocID () {
+ return this.docID;
+ };
+
+ public void setDocID (String id) {
+ if (!id.contains("!"))
+ this.docID = id;
+ };
+};
diff --git a/src/main/java/de/ids_mannheim/korap/index/MatchIdentifier.java b/src/main/java/de/ids_mannheim/korap/index/MatchIdentifier.java
index c5a65f1..6b22ad9 100644
--- a/src/main/java/de/ids_mannheim/korap/index/MatchIdentifier.java
+++ b/src/main/java/de/ids_mannheim/korap/index/MatchIdentifier.java
@@ -1,10 +1,10 @@
package de.ids_mannheim.korap.index;
import java.util.*;
import java.util.regex.*;
+import de.ids_mannheim.korap.index.DocIdentifier;
-public class MatchIdentifier {
- private String corpusID, docID;
+public class MatchIdentifier extends DocIdentifier {
private int startPos, endPos = 0;
private ArrayList<int[]> pos = new ArrayList<>(8);
@@ -40,24 +40,6 @@
};
};
- public String getCorpusID () {
- return this.corpusID;
- };
-
- public void setCorpusID (String id) {
- if (id != null && !id.contains("!"))
- this.corpusID = id;
- };
-
- public String getDocID () {
- return this.docID;
- };
-
- public void setDocID (String id) {
- if (!id.contains("!"))
- this.docID = id;
- };
-
public int getStartPos () {
return this.startPos;
};
diff --git a/src/main/java/de/ids_mannheim/korap/index/PosIdentifier.java b/src/main/java/de/ids_mannheim/korap/index/PosIdentifier.java
new file mode 100644
index 0000000..0bf6e90
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/index/PosIdentifier.java
@@ -0,0 +1,36 @@
+package de.ids_mannheim.korap.index;
+import java.util.*;
+import de.ids_mannheim.korap.index.DocIdentifier;
+
+public class PosIdentifier extends DocIdentifier {
+ private int pos;
+
+ public PosIdentifier () {};
+
+ public void setPos (int pos) {
+ if (pos >= 0)
+ this.pos = pos;
+ };
+
+ public int getPos () {
+ return this.pos;
+ };
+
+ public String toString () {
+
+ if (this.docID == null) return null;
+
+ StringBuffer sb = new StringBuffer("word-");
+
+ // Get prefix string corpus/doc
+ if (this.corpusID != null) {
+ sb.append(this.corpusID).append('!');
+ };
+ sb.append(this.docID);
+
+ sb.append("-p");
+ sb.append(this.pos);
+
+ return sb.toString();
+ };
+};
\ No newline at end of file
diff --git a/src/main/java/de/ids_mannheim/korap/index/TermInfo.java b/src/main/java/de/ids_mannheim/korap/index/TermInfo.java
index 38659af..b9a894a 100644
--- a/src/main/java/de/ids_mannheim/korap/index/TermInfo.java
+++ b/src/main/java/de/ids_mannheim/korap/index/TermInfo.java
@@ -28,7 +28,7 @@
private byte depth = (byte) 0;
- private Pattern prefixRegex = Pattern.compile("([^/]+)/([^:]+):(.+?)");
+ private Pattern prefixRegex = Pattern.compile("(?:([^/]+)/)?([^:/]+)(?::(.+?))?");
private Matcher matcher;
public TermInfo (String term, int pos, ByteBuffer payload) {
@@ -44,6 +44,7 @@
int ttype = 0;
String tterm = this.term;
+ int lastPos = this.payload.position();
this.payload.rewind();
switch (tterm.charAt(0)) {
@@ -86,7 +87,10 @@
matcher = prefixRegex.matcher(tterm);
if (matcher.matches() && matcher.groupCount() == 3) {
this.annotation = tterm;
- this.foundry = matcher.group(1);
+ if (matcher.group(1) != null)
+ this.foundry = matcher.group(1);
+ else
+ this.foundry = "base";
this.layer = matcher.group(2);
this.value = matcher.group(3);
};
@@ -110,7 +114,7 @@
// Unsure if this is correct
this.endPos = this.payload.getInt() -1;
- if (ttype == 2 && this.payload.hasRemaining()) {
+ if (ttype == 2 && this.payload.position() < lastPos) {
this.depth = this.payload.get();
};
@@ -167,6 +171,27 @@
return this.annotation;
};
+ public String toString () {
+ this.analyze();
+
+ StringBuffer sb = new StringBuffer();
+ sb.append('<').append(this.getType()).append('>');
+ sb.append(this.getFoundry()).append('/').append(this.getLayer());
+
+ if (this.getValue() != null)
+ sb.append(':').append(this.getValue());
+
+ if (this.getDepth() != (byte) 0)
+ sb.append('(').append(this.getDepth()).append(')');
+
+ sb.append('[').append(this.getStartPos());
+ sb.append('-').append(this.getEndPos()).append(']');
+ sb.append('[').append(this.getStartChar());
+ sb.append('-').append(this.getEndChar()).append(']');
+
+ return sb.toString();
+ };
+
@Override
public int compareTo (TermInfo obj) {
this.analyze();
diff --git a/src/main/resources/log4j.properties b/src/main/resources/log4j.properties
index 54e64d8..3e26bdd 100644
--- a/src/main/resources/log4j.properties
+++ b/src/main/resources/log4j.properties
@@ -9,8 +9,8 @@
#log4j.logger.de.ids_mannheim.korap.query.spans.SimpleSpans = TRACE, stdout
#log4j.logger.de.ids_mannheim.korap.query.spans.KorapTermSpan = TRACE, stdout
#log4j.logger.de.ids_mannheim.korap.query.spans.ClassSpans = TRACE, stdout
-#log4j.logger.de.ids_mannheim.korap.query.spans.MatchSpans = TRACE, stdout
-# log4j.logger.de.ids_mannheim.korap.KorapIndex = TRACE, stdout
+# log4j.logger.de.ids_mannheim.korap.query.spans.MatchSpans = TRACE, stdout
+#log4j.logger.de.ids_mannheim.korap.KorapIndex = TRACE, stdout
#log4j.logger.de.ids_mannheim.korap.KorapMatch = TRACE, stdout
#log4j.logger.de.ids_mannheim.korap.KorapFilter = TRACE, stdout
#log4j.logger.de.ids_mannheim.korap.KorapCollection = TRACE, stdout
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
index 4bc2973..ffbf04e 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
@@ -8,6 +8,7 @@
import org.junit.runners.JUnit4;
import de.ids_mannheim.korap.index.MatchIdentifier;
+import de.ids_mannheim.korap.index.PosIdentifier;
import de.ids_mannheim.korap.KorapIndex;
import de.ids_mannheim.korap.KorapQuery;
@@ -69,6 +70,18 @@
};
@Test
+ public void posIdentifierExample1 () throws IOException {
+ PosIdentifier id = new PosIdentifier();
+ id.setCorpusID("c1");
+ id.setDocID("d1");
+ id.setPos(8);
+ assertEquals(id.getCorpusID(), "c1");
+ assertEquals(id.getDocID(), "d1");
+ assertEquals(id.getPos(), 8);
+ assertEquals(id.toString(), "word-c1!d1-p8");
+ };
+
+ @Test
public void indexExample1 () throws IOException {
KorapIndex ki = new KorapIndex();
ki.addDoc(createSimpleFieldDoc());
@@ -174,34 +187,315 @@
"</span>"+
"</span>",
km.getSnippetHTML());
+ };
- km = ki.getMatchInfo("match-c1!d1-p7-9(4)8-8(2)7-8",
+
+ @Test
+ public void indexExample3 () throws IOException {
+ KorapIndex ki = new KorapIndex();
+ ki.addDoc(createSimpleFieldDoc());
+ ki.commit();
+
+ KorapMatch km = ki.getMatchInfo("match-c1!d1-p7-9(4)8-8(2)7-8",
"tokens",
null,
null,
false,
true);
- // --> bug:
- // System.err.println(km.snippetHTML());
+
+ assertEquals("SnippetHTML (1)",
+ "<span class=\"context-left\">" +
+ "<span class=\"more\">" +
+ "</span>" +
+ "</span>" +
+ "<span class=\"match\">" +
+ "<em class=\"class-2 level-0\">" +
+ "<span title=\"f/m:acht\">" +
+ "<span title=\"f/y:eight\">" +
+ "<span title=\"it/is:8\">" +
+ "<span title=\"x/o:achtens\">" +
+ "b" +
+ "</span>" +
+ "</span>" +
+ "</span>" +
+ "</span>" +
+ "<em class=\"class-4 level-1\">" +
+ "<span title=\"f/m:neun\">" +
+ "<span title=\"f/y:nine\">" +
+ "<span title=\"it/is:9\">" +
+ "<span title=\"x/o:neuntens\">" +
+ "a" +
+ "</span>" +
+ "</span>" +
+ "</span>" +
+ "</span>" +
+ "</em>" +
+ "</em>" +
+ "</span>" +
+ "<span class=\"context-right\">" +
+ "<span class=\"more\">" +
+ "</span>" +
+ "</span>",
+ km.getSnippetHTML());
};
+ @Test
+ public void indexExample4 () throws IOException {
+ KorapIndex ki = new KorapIndex();
+ ki.addDoc(createSimpleFieldDoc());
+ ki.commit();
+
+ KorapMatch km = ki.getMatchInfo("match-c1!d1-p7-9(4)8-8(2)7-8",
+ "tokens",
+ null,
+ null,
+ false,
+ false);
+
+
+ assertEquals("SnippetHTML (1)",
+ "<span class=\"context-left\">" +
+ "<span class=\"more\">" +
+ "</span>" +
+ "</span>" +
+ "<span class=\"match\">" +
+ "<span title=\"f/m:acht\">" +
+ "<span title=\"f/y:eight\">" +
+ "<span title=\"it/is:8\">" +
+ "<span title=\"x/o:achtens\">" +
+ "b" +
+ "</span>" +
+ "</span>" +
+ "</span>" +
+ "</span>" +
+ "<span title=\"f/m:neun\">" +
+ "<span title=\"f/y:nine\">" +
+ "<span title=\"it/is:9\">" +
+ "<span title=\"x/o:neuntens\">" +
+ "a" +
+ "</span>" +
+ "</span>" +
+ "</span>" +
+ "</span>" +
+ "</span>" +
+ "<span class=\"context-right\">" +
+ "<span class=\"more\">" +
+ "</span>" +
+ "</span>",
+ km.getSnippetHTML());
+ };
+
+ @Test
+ public void indexExample5Spans () throws IOException {
+ KorapIndex ki = new KorapIndex();
+ ki.addDoc(createSimpleFieldDoc());
+ ki.commit();
+
+ KorapMatch km = ki.getMatchInfo("match-c1!d1-p7-9(4)8-8(2)7-8",
+ "tokens",
+ null,
+ null,
+ true,
+ false);
+
+
+ assertEquals("SnippetBrackets (1)",
+ "... [{f/m:acht:{f/y:eight:{it/is:8:{x/o:achtens:b}}}}{f/m:neun:{f/y:nine:{it/is:9:{x/o:neuntens:a}}}}] ...",
+ km.getSnippetBrackets());
+ };
+
+ @Test
+ public void indexExample6Spans () throws IOException {
+ KorapIndex ki = new KorapIndex();
+ ki.addDoc(createSimpleFieldDoc());
+ ki.commit();
+
+ KorapMatch km = ki.getMatchInfo("match-c1!d1-p7-10(4)8-8(2)7-8",
+ "tokens",
+ null,
+ null,
+ true,
+ false);
+
+
+ assertEquals("SnippetBrackets (1)",
+ "... [{x/tag:{f/m:acht:{f/y:eight:{it/is:8:{x/o:achtens:b}}}}{f/m:neun:{f/y:nine:{it/is:9:{x/o:neuntens:a}}}}{f/m:zehn:{f/y:ten:{it/is:10:{x/o:zehntens:c}}}}}]",
+ km.getSnippetBrackets());
+ };
+
+ @Test
+ public void indexExample7Spans () throws IOException {
+ KorapIndex ki = new KorapIndex();
+ ki.addDoc(createSimpleFieldDoc());
+ ki.commit();
+
+ KorapMatch km = ki.getMatchInfo("match-c1!d1-p7-10(4)8-8(2)7-8",
+ "tokens",
+ null,
+ null,
+ true,
+ true);
+
+
+ assertEquals("SnippetBrackets (1)",
+ "... [{x/tag:{2:{f/m:acht:{f/y:eight:{it/is:8:{x/o:achtens:b}}}}{4:{f/m:neun:{f/y:nine:{it/is:9:{x/o:neuntens:a}}}}}}{f/m:zehn:{f/y:ten:{it/is:10:{x/o:zehntens:c}}}}}]",
+ km.getSnippetBrackets());
+
+ assertEquals("SnippetHTML (1)",
+ "<span class=\"context-left\">" +
+ "<span class=\"more\">" +
+ "</span>" +
+ "</span>" +
+ "<span class=\"match\">" +
+ "<span title=\"x/tag\">" +
+ "<em class=\"class-2 level-0\">" +
+ "<span title=\"f/m:acht\">" +
+ "<span title=\"f/y:eight\">" +
+ "<span title=\"it/is:8\">" +
+ "<span title=\"x/o:achtens\">" +
+ "b" +
+ "</span>" +
+ "</span>" +
+ "</span>" +
+ "</span>" +
+ "<em class=\"class-4 level-1\">" +
+ "<span title=\"f/m:neun\">" +
+ "<span title=\"f/y:nine\">" +
+ "<span title=\"it/is:9\">" +
+ "<span title=\"x/o:neuntens\">" +
+ "a" +
+ "</span>" +
+ "</span>" +
+ "</span>" +
+ "</span>" +
+ "</em>" +
+ "</em>" +
+ "<span title=\"f/m:zehn\">" +
+ "<span title=\"f/y:ten\">" +
+ "<span title=\"it/is:10\">" +
+ "<span title=\"x/o:zehntens\">" +
+ "c" +
+ "</span>" +
+ "</span>" +
+ "</span>" +
+ "</span>" +
+ "</span>" +
+ "</span>" +
+ "<span class=\"context-right\">" +
+ "</span>",
+ km.getSnippetHTML());
+ };
+
+ @Test
+ public void indexExample6Relations () throws IOException {
+ KorapIndex ki = new KorapIndex();
+ ki.addDoc(createSimpleFieldDoc());
+ ki.commit();
+
+ KorapMatch km = ki.getMatchInfo("match-c1!d1-p0-5(4)8-8(2)7-8",
+ "tokens",
+ "x",
+ null,
+ true,
+ false);
+
+ assertEquals("SnippetBrackets (1)",
+ "[{x/rel:a>3:{x/o:erstens:a}}{x/o:zweitens:b}{x/o:drittens:c}{#3:{x/o:viertens:a}}{x/o:fünftens:b}] ...",
+ km.getSnippetBrackets());
+
+ assertEquals("SnippetBrackets (1)",
+ "<span class=\"context-left\">" +
+ "</span>" +
+ "<span class=\"match\">" +
+ "<span xlink:title=\"x/rel:a\" " +
+ "xlink:type=\"simple\" " +
+ "xlink:href=\"#word-c1!d1-p3\">" +
+ "<span title=\"x/o:erstens\">" +
+ "a" +
+ "</span>" +
+ "</span>" +
+ "<span title=\"x/o:zweitens\">" +
+ "b" +
+ "</span>" +
+ "<span title=\"x/o:drittens\">" +
+ "c" +
+ "</span>" +
+ "<span xml:id=\"word-c1!d1-p3\">" +
+ "<span title=\"x/o:viertens\">" +
+ "a" +
+ "</span>" +
+ "</span>" +
+ "<span title=\"x/o:fünftens\">" +
+ "b" +
+ "</span>" +
+ "</span>" +
+ "<span class=\"context-right\">" +
+ "<span class=\"more\">" +
+ "</span>" +
+ "</span>",
+ km.getSnippetHTML());
+
+ km = ki.getMatchInfo("match-c1!d1-p0-5(7)2-3(4)8-8(2)7-8",
+ "tokens",
+ "x",
+ null,
+ true,
+ true);
+
+ assertEquals("SnippetBrackets (1)",
+ "<span class=\"context-left\">" +
+ "</span>" +
+ "<span class=\"match\">" +
+ "<span xlink:title=\"x/rel:a\" " +
+ "xlink:type=\"simple\" " +
+ "xlink:href=\"#word-c1!d1-p3\">" +
+ "<span title=\"x/o:erstens\">" +
+ "a" +
+ "</span>" +
+ "</span>" +
+ "<span title=\"x/o:zweitens\">" +
+ "b" +
+ "</span>" +
+ "<em class=\"class-7 level-0\">" +
+ "<span title=\"x/o:drittens\">" +
+ "c" +
+ "</span>" +
+ "<span xml:id=\"word-c1!d1-p3\">" +
+ "<span title=\"x/o:viertens\">" +
+ "a" +
+ "</span>" +
+ "</span>" +
+ "</em>" +
+ "<span title=\"x/o:fünftens\">" +
+ "b" +
+ "</span>" +
+ "</span>" +
+ "<span class=\"context-right\">" +
+ "<span class=\"more\">" +
+ "</span>" +
+ "</span>",
+ km.getSnippetHTML());
+ };
+
+
+
private FieldDocument createSimpleFieldDoc(){
FieldDocument fd = new FieldDocument();
fd.addString("corpusID", "c1");
fd.addString("ID", "d1");
fd.addTV("tokens",
"abcabcabac",
- "[(0-1)s:a|i:a|f/m:eins|f/y:one|x/o:erstens|_0#0-1|-:t$<i>10]" +
- "[(1-2)s:b|i:b|f/m:zwei|f/y:two|x/o:zweitens|_1#1-2]" +
- "[(2-3)s:c|i:c|f/m:drei|f/y:three|x/o:drittens|_2#2-3]" +
- "[(3-4)s:a|i:a|f/m:vier|f/y:four|x/o:viertens|_3#3-4]" +
- "[(4-5)s:b|i:b|f/m:fuenf|f/y:five|x/o:fünftens|_4#4-5]" +
- "[(5-6)s:c|i:c|f/m:sechs|f/y:six|x/o:sechstens|_5#5-6]" +
- "[(6-7)s:a|i:a|f/m:sieben|f/y:seven|x/o:siebtens|_6#6-7]" +
- "[(7-8)s:b|i:b|f/m:acht|f/y:eight|x/o:achtens|_7#7-8]" +
- "[(8-9)s:a|i:a|f/m:neun|f/y:nine|x/o:neuntens|_8#8-9]" +
- "[(9-10)s:c|i:c|f/m:zehn|f/y:ten|x/o:zehntens|_9#9-10]");
+ "[(0-1)s:a|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|>:x/rel:a$<i>4|_0#0-1|-:t$<i>10]" +
+ "[(1-2)s:b|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|_1#1-2]" +
+ "[(2-3)s:c|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2#2-3]" +
+ "[(3-4)s:a|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|<:x/rel:b$<i>1|_3#3-4]" +
+ "[(4-5)s:b|i:b|f/m:fuenf|f/y:five|x/o:fünftens|it/is:5|_4#4-5]" +
+ "[(5-6)s:c|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5#5-6]" +
+ "[(6-7)s:a|i:a|f/m:sieben|f/y:seven|x/o:siebtens|it/is:7|_6#6-7]" +
+ "[(7-8)s:b|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/tag#7-10$<i>10|_7#7-8]" +
+ "[(8-9)s:a|i:a|f/m:neun|f/y:nine|x/o:neuntens|it/is:9|_8#8-9]" +
+ "[(9-10)s:c|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9#9-10]");
return fd;
};
};
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestTermInfo.java b/src/test/java/de/ids_mannheim/korap/index/TestTermInfo.java
index a52455b..b9f4014 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestTermInfo.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestTermInfo.java
@@ -25,7 +25,6 @@
bb.put((byte) 4);
TermInfo term = new TermInfo("<>:mate/p:NN", 4, bb).analyze();
-
assertEquals("type", term.getType(), "span");
assertEquals("value", term.getValue(), "NN");
assertEquals("foundry", term.getFoundry(), "mate");
@@ -86,5 +85,35 @@
assertEquals("startChar", term.getStartChar(), 240);
assertEquals("endChar", term.getEndChar(), 400);
assertEquals("depth", term.getDepth(), 0);
+
+ bb.clear();
+ bb.putInt(20); // startOffset
+ bb.putInt(25); // endOffset
+ bb.putInt(24); // endPos
+ term = new TermInfo("<>:s", 20, bb).analyze();
+ assertEquals("type", term.getType(), "span");
+ assertNull("value", term.getValue());
+ assertEquals("foundry", term.getFoundry(), "base");
+ assertEquals("layer", term.getLayer(), "s");
+ assertEquals("startPos", term.getStartPos(), 20);
+ assertEquals("endPos", term.getEndPos(), 23);
+ assertEquals("startChar", term.getStartChar(), 20);
+ assertEquals("endChar", term.getEndChar(), 25);
+ assertEquals("depth", term.getDepth(), 0);
+
+ bb.clear();
+ bb.putInt(20); // startOffset
+ bb.putInt(25); // endOffset
+ bb.putInt(24); // endPos
+ term = new TermInfo("<>:tag/x", 20, bb).analyze();
+ assertEquals("type", term.getType(), "span");
+ assertNull("value", term.getValue());
+ assertEquals("foundry", term.getFoundry(), "tag");
+ assertEquals("layer", term.getLayer(), "x");
+ assertEquals("startPos", term.getStartPos(), 20);
+ assertEquals("endPos", term.getEndPos(), 23);
+ assertEquals("startChar", term.getStartChar(), 20);
+ assertEquals("endChar", term.getEndChar(), 25);
+ assertEquals("depth", term.getDepth(), 0);
};
};
\ No newline at end of file