Merge branch 'payload-id' of ssh://korap.ids-mannheim.de:29418/KorAP/Krill into payload-id
diff --git a/src/main/java/de/ids_mannheim/korap/index/TermInfo.java b/src/main/java/de/ids_mannheim/korap/index/TermInfo.java
index 6354f82..e963a22 100644
--- a/src/main/java/de/ids_mannheim/korap/index/TermInfo.java
+++ b/src/main/java/de/ids_mannheim/korap/index/TermInfo.java
@@ -89,6 +89,9 @@
// Analyze term value
if (ttype != 1) {
+
+ this.payload.get(); // Ignore PTI
+
if (DEBUG)
log.trace("Check {} for {}", tterm, prefixRegex.toString());
matcher = prefixRegex.matcher(tterm);
@@ -117,9 +120,12 @@
};
// for spans and relations
- if (ttype > 1)
+ if (ttype > 1) {
// Unsure if this is correct
this.endPos = this.payload.getInt() - 1;
+ };
+
+ // Ignore link id for the moment
if (ttype == 2 && this.payload.position() < lastPos) {
this.depth = this.payload.get();
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java
index 37036b4..f168295 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ClassSpans.java
@@ -70,7 +70,7 @@
// The highlighted payload
this.classedPayload = new ArrayList<byte[]>(3);
- this.bb = ByteBuffer.allocate(9);
+ this.bb = ByteBuffer.allocate(10);
};
@@ -147,7 +147,7 @@
// Todo: Better allocate using a Factory!
bb.clear();
- bb.putInt(spans.start()).putInt(spans.end()).put(number);
+ bb.put((byte) 0).putInt(spans.start()).putInt(spans.end()).put(number);
// Add highlight information as byte array
classedPayload.add(bb.array());
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
index b73f186..1380c91 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
@@ -36,7 +36,7 @@
// This advices the java compiler to ignore all loggings
public static final boolean DEBUG = false;
- private byte[] b = new byte[8];
+ private byte[] b = new byte[10];
public static enum PayloadTypeIdentifier {
ELEMENT(64),
@@ -112,8 +112,9 @@
else{
this.isPayloadLoaded = true;
}
-
+
List<byte[]> payload;
+
try {
payload = (List<byte[]>) termSpans.getPayload();
}
diff --git a/src/main/java/de/ids_mannheim/korap/response/Match.java b/src/main/java/de/ids_mannheim/korap/response/Match.java
index f5e3e63..865cef9 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Match.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Match.java
@@ -232,9 +232,9 @@
// Todo element searches!
// Highlights!
- if (b.length == 9) {
+ if (b[0] == 0) {
bb.put(b);
- bb.rewind();
+ bb.position(1); // Ignore PTI
int start = bb.getInt();
int end = bb.getInt();
@@ -247,6 +247,7 @@
this.getEndPos());
// Ignore classes out of match range and set by the system
+ // TODO: This may be decidable by PT!!
if ((number & 0xFF) <= 128 && start >= this.getStartPos()
&& end <= this.getEndPos())
this.addHighlight(start, end - 1, number);
@@ -254,9 +255,9 @@
// Element payload for match!
// This MAY BE the correct match
- else if (b.length == 8) {
+ else if (b[0] == 64) {
bb.put(b);
- bb.rewind();
+ bb.position(1);
if (this.potentialStartPosChar == -1) {
this.potentialStartPosChar = bb.getInt(0);
diff --git a/src/main/resources/log4j.properties b/src/main/resources/log4j.properties
index 5fed33b..d99b63c 100644
--- a/src/main/resources/log4j.properties
+++ b/src/main/resources/log4j.properties
@@ -1,4 +1,4 @@
-log4j.rootLogger = ERROR, stdout
+# log4j.rootLogger = ERROR, stdout
# Queries:
# log4j.logger.de.ids_mannheim.korap.query.SpanNextQuery = TRACE, stdout
@@ -25,10 +25,10 @@
# Responses:
# log4j.logger.de.ids_mannheim.korap.server.Node = TRACE, stdout
-# log4j.logger.de.ids_mannheim.korap.response.Match = TRACE, stdout
+log4j.logger.de.ids_mannheim.korap.response.Match = TRACE, stdout
# Index:
-# log4j.logger.de.ids_mannheim.korap.KrillIndex = TRACE, stdout
+log4j.logger.de.ids_mannheim.korap.KrillIndex = TRACE, stdout
# log4j.logger.de.ids_mannheim.korap.index.PositionsToOffset = TRACE, stdout
# log4j.logger.de.ids_mannheim.korap.index.MultiTermTokenStream = TRACE, stdout
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestClassFilterIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestClassFilterIndex.java
index 5a2565e..c87f13a 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestClassFilterIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestClassFilterIndex.java
@@ -155,7 +155,6 @@
assertEquals(6, kr.getMatch(0).getEndPos());
assertEquals(14, kr.getMatch(1).getStartPos());
assertEquals(15, kr.getMatch(1).getEndPos());
-
}
@@ -299,4 +298,4 @@
kr.getMatch(0).getSnippetBrackets());
}
-}
\ No newline at end of file
+}
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
index 9c7e292..93fe656 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
@@ -435,7 +435,6 @@
Match km = ki.getMatchInfo("match-c1!d1-p0-4", "tokens", null, null,
true, true);
-
assertEquals(
"SnippetHTML (2)",
"<span class=\"context-left\">"
@@ -578,14 +577,14 @@
fd.addTV(
"tokens",
"abcabcabac",
- "[(0-1)s:a|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|>:x/rel:a$<i>4|_0$<i>0<i>1|-:t$<i>10]"
+ "[(0-1)s:a|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|>:x/rel:a$<b>32<i>4<s>0<s>0<s>0|_0$<i>0<i>1|-:t$<i>10]"
+ "[(1-2)s:b|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|_1$<i>1<i>2]"
- + "[(2-3)s:c|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2$<i>2<i>3|<>:s#2-5$<i>5]"
- + "[(3-4)s:a|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|<:x/rel:b$<i>1|_3$<i>3<i>4]"
+ + "[(2-3)s:c|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2$<i>2<i>3|<>:s$<b>64<i>2<i>5<i>5]"
+ + "[(3-4)s:a|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|<:x/rel:b$<b>40<i>1<s>0<s>0<s>0|_3$<i>3<i>4]"
+ "[(4-5)s:b|i:b|f/m:fuenf|f/y:five|x/o:fünftens|it/is:5|_4$<i>4<i>5]"
+ "[(5-6)s:c|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5$<i>5<i>6]"
+ "[(6-7)s:a|i:a|f/m:sieben|f/y:seven|x/o:siebtens|it/is:7|_6$<i>6<i>7]"
- + "[(7-8)s:b|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/tag#7-10$<i>10|_7$<i>7<i>8]"
+ + "[(7-8)s:b|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/tag$<b>64<i>7<i>10<i>10|_7$<i>7<i>8]"
+ "[(8-9)s:a|i:a|f/m:neun|f/y:nine|x/o:neuntens|it/is:9|_8$<i>8<i>9]"
+ "[(9-10)s:c|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9$<i>9<i>10]");
return fd;
@@ -599,14 +598,14 @@
fd.addTV(
"tokens",
"abcabcabac",
- "[(0-1)s:a|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|>:x/rel:a$<i>4|_0$<i>0<i>1|-:t$<i>10]"
- + "[(1-2)s:b|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|>:x/rel:b$<i>4|_1$<i>1<i>2]"
- + "[(2-3)s:c|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2$<i>2<i>3|<>:s#2-5$<i>5]"
- + "[(3-4)s:a|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|<:x/rel:b$<i>1|_3$<i>3<i>4]"
+ "[(0-1)s:a|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|>:x/rel:a$<b>32<i>4<s>0<s>0<s>0|_0$<i>0<i>1|-:t$<i>10]"
+ + "[(1-2)s:b|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|>:x/rel:b$<b>32<i>4<s>0<s>0<s>0|_1$<i>1<i>2]"
+ + "[(2-3)s:c|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2$<i>2<i>3|<>:s$<b>64<i>2<i>5<i>5]"
+ + "[(3-4)s:a|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|<:x/rel:b$<b>40<i>1<s>0<s>0<s>0|_3$<i>3<i>4]"
+ "[(4-5)s:b|i:b|f/m:fuenf|f/y:five|x/o:fünftens|it/is:5|_4$<i>4<i>5]"
+ "[(5-6)s:c|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5$<i>5<i>6]"
+ "[(6-7)s:a|i:a|f/m:sieben|f/y:seven|x/o:siebtens|it/is:7|_6$<i>6<i>7]"
- + "[(7-8)s:b|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/tag#7-10$<i>10|_7$<i>7<i>8]"
+ + "[(7-8)s:b|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/tag$<b>64<i>7<i>10<i>10|_7$<i>7<i>8]"
+ "[(8-9)s:a|i:a|f/m:neun|f/y:nine|x/o:neuntens|it/is:9|_8$<i>8<i>9]"
+ "[(9-10)s:c|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9$<i>9<i>10]");
return fd;
@@ -620,14 +619,14 @@
fd.addTV(
"tokens",
"aa bb cc aa bb cc aa bb aa cc ",
- "[(0-2)s:aa|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|>:x/rel:a$<i>4|_0$<i>0<i>2|-:t$<i>10]"
+ "[(0-2)s:aa|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|>:x/rel:a$<b>32<i>4<s>0<s>0<s>0|_0$<i>0<i>2|-:t$<i>10]"
+ "[(3-5)s:bb|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|_1$<i>3<i>5]"
- + "[(6-8)s:cc|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2$<i>6<i>8|<>:s#6-14$<i>5]"
- + "[(9-11)s:aa|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|<:x/rel:b$<i>1|_3$<i>9<i>11]"
+ + "[(6-8)s:cc|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2$<i>6<i>8|<>:s$<b>64<i>6<i>14<i>5]"
+ + "[(9-11)s:aa|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|<:x/rel:b$<b>40<i>1<s>0<s>0<s>0|_3$<i>9<i>11]"
+ "[(12-14)s:bb|i:b|f/m:fuenf|f/y:five|x/o:fünftens|it/is:5|_4$<i>12<i>14]"
+ "[(15-17)s:cc|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5$<i>15<i>17]"
+ "[(18-20)s:aa|i:a|f/m:sieben|f/y:seven|x/o:siebtens|it/is:7|_6$<i>18<i>20]"
- + "[(21-23)s:bb|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/tag#7-10$<i>10|_7$<i>21<i>23]"
+ + "[(21-23)s:bb|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/tag$<b>64<i>7<i>10<i>10|_7$<i>21<i>23]"
+ "[(24-26)s:aa|i:a|f/m:neun|f/y:nine|x/o:neuntens|it/is:9|_8$<i>24<i>26]"
+ "[(27-29)s:cc|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9$<i>27<i>29]");
return fd;
@@ -641,14 +640,14 @@
fd.addTV(
"tokens",
"abcabcabac",
- "[(0-1)s:a|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|>:x/rel:a$<i>4|_0$<i>0<i>1|-:t$<i>10]"
+ "[(0-1)s:a|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|>:x/rel:a$<b>32<i>4<s>0<s>0<s>0|_0$<i>0<i>1|-:t$<i>10]"
+ "[(1-2)s:b|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|_1$<i>1<i>2]"
- + "[(2-3)s:c|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2$<i>2<i>3|<>:s#2-5$<i>5]"
- + "[(3-4)s:a|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|<:x/rel:b$<i>1|_3$<i>3<i>4]"
+ + "[(2-3)s:c|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2$<i>2<i>3|<>:s$<b>64<i>2<i>5<i>5]"
+ + "[(3-4)s:a|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|<:x/rel:b$<b>40<i>1<s>0<s>0<s>0|_3$<i>3<i>4]"
+ "[(4-5)s:b|i:b|f/m:fuenf|f/y:five|x/o:fünftens|it/is:5|_4$<i>4<i>5]"
- + "[(5-6)s:c|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5$<i>5<i>6|<>:s#5-7$<i>7]"
+ + "[(5-6)s:c|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5$<i>5<i>6|<>:s$<b>64<i>5<i>7<i>7]"
+ "[(6-7)s:a|i:a|f/m:sieben|f/y:seven|x/o:siebtens|it/is:7|_6$<i>6<i>7]"
- + "[(7-8)s:b|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/tag#7-10$<i>10|_7$<i>7<i>8]"
+ + "[(7-8)s:b|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/tag$<b>64<i>7<i>10<i>10|_7$<i>7<i>8]"
+ "[(8-9)s:a|i:a|f/m:neun|f/y:nine|x/o:neuntens|it/is:9|_8$<i>8<i>9]"
+ "[(9-10)s:c|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9$<i>9<i>10]");
return fd;