Fixed attribute serialization bug (at leat for non-spans)
Change-Id: I2bb46509d71450238812e4d0a26c5690dd51d49d
diff --git a/Changes b/Changes
index 95bc323..336e7a8 100644
--- a/Changes
+++ b/Changes
@@ -1,14 +1,19 @@
+0.55.1 2016-02-10
+ - [bugfix] Fixed attribute serialization in MatchInfo (diewald)
+
0.55 2016-02-04
- [feature] Introducing payload identifiers to the index
structure (diewald, margaretha)
+ ! This is a major release, introducing index features !
+ ! not compatible with indices of old versions of Krill !
0.54 2015-11-13
- [performance] Updated Lucene dependency from 4.10.3
to 5.0.0 (diewald)
- [bugfix] Minor fixes regarding invalid tokens in the
test suite (diewald)
- - Fixed krill.properties test (diewald)
- - Fixed multiple distances in disjunction query
+ - [bugfix] Fixed krill.properties test (diewald)
+ - [bugfix] Fixed multiple distances in disjunction query
(GitHub issue #2; margaretha)
0.53 2015-09-25
diff --git a/Readme.md b/Readme.md
index 712c0e9..d259816 100644
--- a/Readme.md
+++ b/Readme.md
@@ -61,7 +61,7 @@
$ cd Krill
```
-To run the test suite ...
+To run the test suite (you will need Java SDK and Maven 3) ...
```
$ mvn clean test
@@ -82,7 +82,7 @@
**Authors**: [Nils Diewald](http://nils-diewald.de/),
[Eliza Margaretha](http://www1.ids-mannheim.de/direktion/personal/margaretha.html)
-Copyright (c) 2013-2015, [IDS Mannheim](http://ids-mannheim.de/), Germany
+Copyright (c) 2013-2016, [IDS Mannheim](http://ids-mannheim.de/), Germany
Krill is developed as part of the [KorAP](http://korap.ids-mannheim.de/)
Corpus Analysis Platform at the Institute for German Language
diff --git a/misc/payloads.md b/misc/payloads.md
index a63f492..85fae6e 100644
--- a/misc/payloads.md
+++ b/misc/payloads.md
@@ -155,7 +155,7 @@
@:class=header$<b>17<i>6<s>1
-means the attribute belongs to the term, element, or relation in the
+means the attribute belongs to the element in the
same token position whose TUI is 1 and end position is 6.
*PTIs* (it’s an attribute payload, if the fourth bit is set):
diff --git a/pom.xml b/pom.xml
index 4a1e534..aefc631 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
<groupId>de.ids_mannheim.korap</groupId>
<artifactId>Krill</artifactId>
- <version>0.55</version>
+ <version>0.55.1</version>
<packaging>jar</packaging>
<name>Krill</name>
diff --git a/src/main/java/de/ids_mannheim/korap/KrillIndex.java b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
index 26b5dad..fa4cc7b 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
@@ -1003,6 +1003,7 @@
// Search for minimal surrounding sentences
if (extendToSentence) {
+ // <legacy>
String element = (match.getTextSigle() == null ? "s"
: "base/s:s");
@@ -1055,18 +1056,15 @@
// Init document iterator
docs.nextDoc();
- // Should never happen ... but hell.
+ // Should never happen ... but hell!
if (docs.docID() == DocIdSetIterator.NO_MORE_DOCS)
continue;
- // How often does this term occur in the document?
- int termOccurrences = docs.freq();
-
// String representation of the term
String termString = termsEnum.term().utf8ToString();
// Iterate over all occurrences
- for (int i = 0; i < termOccurrences; i++) {
+ for (int i = 0; i < docs.freq(); i++) {
// Init positions and get the current
int pos = docs.nextPosition();
diff --git a/src/main/java/de/ids_mannheim/korap/index/TermInfo.java b/src/main/java/de/ids_mannheim/korap/index/TermInfo.java
index 47224dd..c76f053 100644
--- a/src/main/java/de/ids_mannheim/korap/index/TermInfo.java
+++ b/src/main/java/de/ids_mannheim/korap/index/TermInfo.java
@@ -42,7 +42,6 @@
this.payload = payload;
};
-
public TermInfo analyze () {
if (analyzed)
return this;
@@ -53,6 +52,7 @@
this.payload.rewind();
// TODO: Use PTI!
+ // Add TUI and REF!
switch (tterm.charAt(0)) {
case '<':
// "<>:mate/l:..."
@@ -78,12 +78,19 @@
ttype = 3;
break;
- case '_':
- // pos
- this.type = "pos";
- ttype = 1;
- tterm = tterm.substring(1);
- break;
+ case '_':
+ // pos
+ this.type = "pos";
+ ttype = 1;
+ tterm = tterm.substring(1);
+ break;
+
+ case '@':
+ // pos
+ this.type = "attr";
+ ttype = 4;
+ tterm = tterm.substring(1);
+ break;
default:
// term
@@ -122,8 +129,8 @@
this.endChar = this.payload.getInt();
};
- // for spans and relations
- if (ttype > 1) {
+ // for spans, relations and attributes
+ if (ttype > 1 && ttype != 4) {
if (this.type.equals("relTarget")) {
this.endPos = this.startPos;
this.startPos = this.payload.getInt() - 1;
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
index 93fe656..796a6d9 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
@@ -570,6 +570,57 @@
};
+ @Test
+ public void indexAttributeInfo () throws IOException, QueryException {
+ KrillIndex ki = new KrillIndex();
+ ki.addDoc(createAttributeFieldDoc());
+ ki.commit();
+ Match km = ki.getMatchInfo("match-ca1!da1-p7-10",
+ "tokens",
+ null,
+ null,
+ false,
+ false);
+ JsonNode res = mapper.readTree(km.toJsonString());
+ assertEquals("tokens", res.at("/field").asText());
+ assertTrue(res.at("/startMore").asBoolean());
+ assertTrue(res.at("/endMore").asBoolean());
+ assertEquals("ca1", res.at("/corpusID").asText());
+ assertEquals("da1", res.at("/docID").asText());
+ assertEquals("<span class=\"context-left\">"+
+ "<span class=\"more\">"+
+ "</span>"+
+ "</span>"+
+ "<mark>"+
+ // "<span title=\"@:x/s:key:value\">"+
+ "<span title=\"f/m:acht\">"+
+ "<span title=\"f/y:eight\">"+
+ "<span title=\"it/is:8\">"+
+ "<span title=\"x/o:achtens\">b</span>"+
+ // "</span>"+
+ "</span>"+
+ "</span>"+
+ "</span>"+
+ "<span title=\"f/m:neun\">"+
+ "<span title=\"f/y:nine\">"+
+ "<span title=\"it/is:9\">"+
+ "<span title=\"x/o:neuntens\">a</span>"+
+ "</span>"+
+ "</span>"+
+ "</span>"+
+ "<span title=\"f/m:zehn\">"+
+ "<span title=\"f/y:ten\">"+
+ "<span title=\"it/is:10\">"+
+ "<span title=\"x/o:zehntens\">c</span>"+
+ "</span>"+
+ "</span>"+
+ "</span>"+
+ "</mark>"+
+ "<span class=\"context-right\">"+
+ "</span>", res.at("/snippet").asText());
+ };
+
+
private FieldDocument createSimpleFieldDoc () {
FieldDocument fd = new FieldDocument();
fd.addString("corpusID", "c1");
@@ -653,4 +704,27 @@
return fd;
};
+ /*
+ Check for terms|spans|rels ...
+ */
+ private FieldDocument createAttributeFieldDoc () {
+ FieldDocument fd = new FieldDocument();
+ fd.addString("corpusID", "ca1");
+ fd.addString("ID", "da1");
+ fd.addTV(
+ "tokens",
+ "abcabcabac",
+ "[(0-1)s:a|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|_0$<i>0<i>1|-:t$<i>10]"
+ + "[(1-2)s:b|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|_1$<i>1<i>2]"
+ + "[(2-3)s:c|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2$<i>2<i>3|<>:s$<b>64<i>2<i>5<i>5]"
+ + "[(3-4)s:a|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|_3$<i>3<i>4]"
+ + "[(4-5)s:b|i:b|f/m:fuenf|f/y:five|x/o:fünftens|it/is:5|_4$<i>4<i>5]"
+ + "[(5-6)s:c|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5$<i>5<i>6]"
+ + "[(6-7)s:a|i:a|f/m:sieben|f/y:seven|x/o:siebtens|it/is:7|_6$<i>6<i>7]"
+ + "[(7-8)s:b|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/s:tag$<b>64<i>7<i>10<i>10<b>0<s>1|@:x/s:key:value$<b>17<i>10<s>1|_7$<i>7<i>8]"
+ + "[(8-9)s:a|i:a|f/m:neun|f/y:nine|x/o:neuntens|it/is:9|_8$<i>8<i>9]"
+ + "[(9-10)s:c|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9$<i>9<i>10]");
+ return fd;
+ };
+
};