Some more tests for getMatchInfo
diff --git a/src/main/java/de/ids_mannheim/korap/KorapIndex.java b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
index 6b4ca02..095d2d5 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
@@ -98,10 +98,10 @@
*/
/**
- * @author Nils Diewald
- *
* KorapIndex implements a simple API for searching in and writing to a
* Lucene index and equesting several information but the index's nature.
+ *
+ * @author ndiewald
*/
public class KorapIndex {
private Directory directory;
@@ -118,9 +118,9 @@
private ObjectMapper mapper = new ObjectMapper();
- private static ByteBuffer bb = ByteBuffer.allocate(4);
- private static ByteBuffer bbOffset = ByteBuffer.allocate(8);
- private static ByteBuffer bbTerm = ByteBuffer.allocate(16);
+ private static ByteBuffer bb = ByteBuffer.allocate(4),
+ bbOffset = ByteBuffer.allocate(8),
+ bbTerm = ByteBuffer.allocate(16);
private byte[] pl = new byte[4];
@@ -130,7 +130,7 @@
private final static Logger log = LoggerFactory.getLogger(KorapIndex.class);
public KorapIndex () throws IOException {
- this((Directory) new RAMDirectory());
+ this((Directory) new RAMDirectory());
};
@@ -516,9 +516,10 @@
};
regex.append("(.){1,}|_[0-9]+");
- log.trace("The final regex is {}", regex.toString());
+ log.trace("The final regexString is {}", regex.toString());
RegExp regexObj = new RegExp(regex.toString());
fst = new CompiledAutomaton(regexObj.toAutomaton());
+ log.trace("The final regexObj is {}", regexObj.toString());
};
diff --git a/src/main/java/de/ids_mannheim/korap/KorapMatch.java b/src/main/java/de/ids_mannheim/korap/KorapMatch.java
index ad1a017..15627de 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapMatch.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapMatch.java
@@ -14,7 +14,6 @@
import static de.ids_mannheim.korap.util.KorapHTML.*;
import de.ids_mannheim.korap.index.MatchIdentifier;
-// import org.apache.commons.codec.binary.Base64;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
diff --git a/src/main/resources/log4j.properties b/src/main/resources/log4j.properties
index e1af705..54e64d8 100644
--- a/src/main/resources/log4j.properties
+++ b/src/main/resources/log4j.properties
@@ -10,7 +10,7 @@
#log4j.logger.de.ids_mannheim.korap.query.spans.KorapTermSpan = TRACE, stdout
#log4j.logger.de.ids_mannheim.korap.query.spans.ClassSpans = TRACE, stdout
#log4j.logger.de.ids_mannheim.korap.query.spans.MatchSpans = TRACE, stdout
-#log4j.logger.de.ids_mannheim.korap.KorapIndex = TRACE, stdout
+# log4j.logger.de.ids_mannheim.korap.KorapIndex = TRACE, stdout
#log4j.logger.de.ids_mannheim.korap.KorapMatch = TRACE, stdout
#log4j.logger.de.ids_mannheim.korap.KorapFilter = TRACE, stdout
#log4j.logger.de.ids_mannheim.korap.KorapCollection = TRACE, stdout
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
index e0eee30..4bc2973 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
@@ -99,41 +99,109 @@
assertEquals("StartPos (0)", 7, km.getStartPos());
assertEquals("EndPos (0)", 9, km.getEndPos());
- assertEquals("SnippetBrackets (0)", "... [{2:b{a}}] ...", km.snippetBrackets());
+ assertEquals("SnippetBrackets (0)",
+ "... [{2:b{a}}] ...",
+ km.getSnippetBrackets());
assertEquals("ID (0)", "match-c1!d1-p7-9(0)8-8(2)7-8", km.getID());
+
+ km = ki.getMatchInfo("match-c1!d1-p7-9(0)8-8(2)7-8",
+ "tokens",
+ "f",
+ "m",
+ false,
+ false);
+
+ assertEquals("SnippetBrackets (1)",
+ "... [{f/m:acht:b}{f/m:neun:a}] ...",
+ km.getSnippetBrackets());
+
+ km = ki.getMatchInfo("match-c1!d1-p7-9(0)8-8(2)7-8",
+ "tokens",
+ "f",
+ "m",
+ false,
+ true);
+
+ assertEquals("SnippetBrackets (2)",
+ "... [{2:{f/m:acht:b}{{f/m:neun:a}}}] ...",
+ km.getSnippetBrackets());
+
+ km = ki.getMatchInfo("match-c1!d1-p7-9(4)8-8(2)7-8",
+ "tokens",
+ "f",
+ "m",
+ false,
+ true);
+
+ assertEquals("SnippetBrackets (3)",
+ "... [{2:{f/m:acht:b}{4:{f/m:neun:a}}}] ...",
+ km.getSnippetBrackets());
+
+ km = ki.getMatchInfo("match-c1!d1-p7-9(4)8-8(2)7-8",
+ "tokens",
+ "f",
+ null,
+ false,
+ true);
+
+ assertEquals("SnippetBrackets (4)",
+ "... [{2:{f/m:acht:{f/y:eight:b}}{4:{f/m:neun:{f/y:nine:a}}}}] ...",
+ km.getSnippetBrackets());
+
+ assertEquals("SnippetHTML (4)",
+ "<span class=\"context-left\">"+
+ "<span class=\"more\">"+
+ "</span>"+
+ "</span>"+
+ "<span class=\"match\">"+
+ "<em class=\"class-2 level-0\">"+
+ "<span title=\"f/m:acht\">"+
+ "<span title=\"f/y:eight\">"+
+ "b"+
+ "</span>"+
+ "</span>"+
+ "<em class=\"class-4 level-1\">"+
+ "<span title=\"f/m:neun\">"+
+ "<span title=\"f/y:nine\">"+
+ "a"+
+ "</span>"+
+ "</span>"+
+ "</em>"+
+ "</em>"+
+ "</span>"+
+ "<span class=\"context-right\">"+
+ "<span class=\"more\">"+
+ "</span>"+
+ "</span>",
+ km.getSnippetHTML());
+
+ km = ki.getMatchInfo("match-c1!d1-p7-9(4)8-8(2)7-8",
+ "tokens",
+ null,
+ null,
+ false,
+ true);
+
+ // --> bug:
+ // System.err.println(km.snippetHTML());
};
-
- public void indexExample3 () throws IOException {
- // Construct index
- KorapIndex ki = new KorapIndex();
- // Indexing test files
- for (String i : new String[] {"00001", "00002", "00003", "00004", "00005", "00006", "02439"}) {
- ki.addDocFile(
- getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
- );
- };
- ki.commit();
- // System.err.println(ki.getMatchInfo("xxx", null, null, true, true).toJSON());
- };
-
-
private FieldDocument createSimpleFieldDoc(){
FieldDocument fd = new FieldDocument();
fd.addString("corpusID", "c1");
fd.addString("ID", "d1");
fd.addTV("tokens",
"abcabcabac",
- "[(0-1)s:a|i:a|f/m:eins|_0#0-1|-:t$<i>10]" +
- "[(1-2)s:b|i:b|f/m:zwei|_1#1-2]" +
- "[(2-3)s:c|i:c|f/m:drei|_2#2-3]" +
- "[(3-4)s:a|i:a|f/m:vier|_3#3-4]" +
- "[(4-5)s:b|i:b|f/m:fuenf|_4#4-5]" +
- "[(5-6)s:c|i:c|f/m:sechs|_5#5-6]" +
- "[(6-7)s:a|i:a|f/m:sieben|_6#6-7]" +
- "[(7-8)s:b|i:b|f/m:acht|_7#7-8]" +
- "[(8-9)s:a|i:a|f/m:neun|_8#8-9]" +
- "[(9-10)s:c|i:c|f/m:zehn|_9#9-10]");
+ "[(0-1)s:a|i:a|f/m:eins|f/y:one|x/o:erstens|_0#0-1|-:t$<i>10]" +
+ "[(1-2)s:b|i:b|f/m:zwei|f/y:two|x/o:zweitens|_1#1-2]" +
+ "[(2-3)s:c|i:c|f/m:drei|f/y:three|x/o:drittens|_2#2-3]" +
+ "[(3-4)s:a|i:a|f/m:vier|f/y:four|x/o:viertens|_3#3-4]" +
+ "[(4-5)s:b|i:b|f/m:fuenf|f/y:five|x/o:fünftens|_4#4-5]" +
+ "[(5-6)s:c|i:c|f/m:sechs|f/y:six|x/o:sechstens|_5#5-6]" +
+ "[(6-7)s:a|i:a|f/m:sieben|f/y:seven|x/o:siebtens|_6#6-7]" +
+ "[(7-8)s:b|i:b|f/m:acht|f/y:eight|x/o:achtens|_7#7-8]" +
+ "[(8-9)s:a|i:a|f/m:neun|f/y:nine|x/o:neuntens|_8#8-9]" +
+ "[(9-10)s:c|i:c|f/m:zehn|f/y:ten|x/o:zehntens|_9#9-10]");
return fd;
};
};