Further improvements on the match data collector
diff --git a/pom.xml b/pom.xml
index e07c7c6..50707e7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -59,7 +59,7 @@
<artifactId>lucene-core</artifactId>
<groupId>org.apache.lucene</groupId>
<type>jar</type>
- <version>4.3.0</version>
+ <version>4.3.1</version>
</dependency>
<!-- Lucene queryparser dependency -->
@@ -67,7 +67,7 @@
<artifactId>lucene-queryparser</artifactId>
<groupId>org.apache.lucene</groupId>
<type>jar</type>
- <version>4.3.0</version>
+ <version>4.3.1</version>
</dependency>
<!-- Lucene analyzers dependency -->
@@ -75,7 +75,7 @@
<artifactId>lucene-analyzers-common</artifactId>
<groupId>org.apache.lucene</groupId>
<type>jar</type>
- <version>4.3.0</version>
+ <version>4.3.1</version>
</dependency>
<!-- JCache -->
diff --git a/src/main/java/de/ids_mannheim/korap/KorapIndex.java b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
index 9679d31..2384f12 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
@@ -1,12 +1,13 @@
package de.ids_mannheim.korap;
+import java.util.*;
+
import java.io.File;
import java.io.IOException;
-import java.util.*;
-import java.net.URL;
+
+// import java.net.URL;
import java.nio.ByteBuffer;
-import java.util.HashMap;
import java.util.zip.GZIPInputStream;
import java.io.FileInputStream;
@@ -16,7 +17,6 @@
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.BooleanClause;
-
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.QueryWrapperFilter;
@@ -73,13 +73,10 @@
import de.ids_mannheim.korap.index.FieldDocument;
import de.ids_mannheim.korap.index.PositionsToOffset;
import de.ids_mannheim.korap.index.TermInfo;
-import de.ids_mannheim.korap.document.KorapPrimaryData;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.nio.ByteBuffer;
-
/*
Todo: Use FieldCache!
TODO: Reuse the indexreader everywhere - it should be threadsafe!
@@ -142,7 +139,7 @@
public KorapIndex (Directory directory) throws IOException {
this.directory = directory;
- fieldsToLoad = new HashSet<String>();
+ fieldsToLoad = new HashSet<String>(16);
fieldsToLoad.add("author");
fieldsToLoad.add("ID");
fieldsToLoad.add("title");
@@ -306,6 +303,7 @@
);
// Iterator is empty
+ // TODO: Maybe this is an error ...
if (docs.docID() == DocsAndPositionsEnum.NO_MORE_DOCS) {
return 0;
};
@@ -317,9 +315,7 @@
// Init nextDoc()
while (docs.nextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS) {
- // Go to first term (initialization phase)
-// TODO: THIS MAY BE WRONG!
-// TODO:: DELETEEEEE AND TESTT!
+ // Initialize (go to first term)
docs.nextPosition();
// Copy payload with the offset of the BytesRef
@@ -458,125 +454,6 @@
* Get a match.
* BE AWARE - THIS IS STILL A PLAYGROUND!
*/
- public KorapMatch getMatch (String id) {
-
- String corpusID = "WPD";
- String docID = "WPD_AAA.00003";
- String field = "tokens"; // text field
- String foundry = "mate";
- String layer = "l";
- int startPos = 20;
- int endPos = 30;
- Boolean includeSpans = true;
-
- KorapMatch km = (KorapMatch) null;
- LinkedList<TermInfo> termList = new LinkedList<TermInfo>();
-
- StringBuffer regex = new StringBuffer();
-
- // Todo: Ignore -: stuff!
-
- if (includeSpans)
- regex.append("((<>|<|>):)?");
- else
- regex.append("[^<>]");
- if (foundry != null)
- regex.append(foundry).append('/');
- if (layer != null)
- regex.append(layer).append(":");
- regex.append(".+?");
-
- BooleanQuery bool = new BooleanQuery();
- bool.add(new TermQuery(new Term("ID", docID)), BooleanClause.Occur.MUST);
- bool.add(new TermQuery(new Term("corpusID", corpusID)), BooleanClause.Occur.MUST);
-
- Filter filter = (Filter) new QueryWrapperFilter(bool);
-
- // Create an automaton for prefixed terms of interest:
- CompiledAutomaton fst = new CompiledAutomaton(
- new RegExp(regex.toString()).toAutomaton()
- );
-
- try {
- for (AtomicReaderContext atomic : this.reader().leaves()) {
- DocIdSetIterator filterIter = filter.getDocIdSet(
- atomic,
- atomic.reader().getLiveDocs()
- ).iterator();
-
- // Go to the matching doc
- int localDocID = filterIter.nextDoc();
- if (localDocID == DocIdSetIterator.NO_MORE_DOCS)
- continue;
-
- // We've found the correct document!
- HashSet<String> fieldsToLoadLocal = new HashSet<>(fieldsToLoad);
- fieldsToLoadLocal.add(field);
-
- // Load the necessary fields of the document
- Document doc = atomic.reader().document(localDocID, fieldsToLoadLocal);
- // Get terms from the document
- Terms docTerms = atomic.reader().getTermVector(localDocID, field);
-
- km = new KorapMatch(
- new PositionsToOffset(atomic, field),
- localDocID,
- startPos,
- endPos
- );
-
- // A termsEnum object could be reused here
- final TermsEnum termsEnum = docTerms.intersect(fst, null);
-
- // Create a bitset for the correct document
- // Yeah ... I know ... it could've been easier probably
- FixedBitSet bitset = new FixedBitSet(atomic.reader().numDocs());
- bitset.or(filterIter);
-
- DocsAndPositionsEnum docs = (DocsAndPositionsEnum) null;
-
- // Iterate over all terms in the document
- while (termsEnum.next() != null) {
- docs = termsEnum.docsAndPositions(
- bitset,
- docs,
- DocsAndPositionsEnum.FLAG_PAYLOADS
- );
-
- // Init docs
- docs.nextDoc();
-
- // How often does this term occur in the document?
- int termOccurrences = docs.freq();
-
- // Iterate over all occurrences
- for (int i = 0; i < termOccurrences; i++) {
-
- // Init positions and get the current
- int pos = docs.nextPosition();
-
- // Check, if the position of the term is in the interesting area
- if (pos >= startPos && pos <= endPos) {
- termList.add(new TermInfo(
- termsEnum.term().utf8ToString(),
- pos,
- docs.getPayload()
- ));
- };
- };
- };
-
- break;
- };
- }
- catch (IOException e) {
- // ...
- };
-
- return km;
- };
-
-
// TODO: collect all information based on a prefix (like cnx/p etc.)
// TODO: Generate a meaningful structure (e.g. a tree)
/*
@@ -585,6 +462,170 @@
public KorapInfo infoOf (KorapMatch km, String prefix);
*/
+ public KorapMatch getMatch (String id) {
+
+ // List of terms to populate
+ LinkedList<TermInfo> termList = new LinkedList<TermInfo>();
+
+ KorapMatch match = new KorapMatch();
+
+ // That's purely temporary
+ String corpusID = "WPD";
+ String docID = "WPD_AAA.00003";
+ String field = "tokens"; // text field
+ String foundry = "mate";
+ String layer = "l";
+ int startPos = 25;
+ int endPos = 30;
+ Boolean includeSpans = true;
+
+ // Create a filter based on the corpusID and the docID
+ BooleanQuery bool = new BooleanQuery();
+ bool.add(new TermQuery(new Term("ID", docID)), BooleanClause.Occur.MUST);
+ bool.add(new TermQuery(new Term("corpusID", corpusID)), BooleanClause.Occur.MUST);
+ Filter filter = (Filter) new QueryWrapperFilter(bool);
+
+ // Create an automaton for prefixed terms of interest based on a Regex
+ // Todo: Ignore -: stuff!
+ StringBuffer regex = new StringBuffer();
+ if (includeSpans)
+ regex.append("(((\"<>\"|\"<\"|\">\")\":\")?");
+ else
+ regex.append("[^<>]");
+ if (foundry != null)
+ regex.append(foundry).append('/');
+ if (layer != null)
+ regex.append(layer).append(":");
+ regex.append("(.){1,})|_[0-9]+");
+
+ RegExp regexObj = new RegExp(regex.toString());
+ CompiledAutomaton fst = new CompiledAutomaton(regexObj.toAutomaton());
+ log.trace("The final regex is {}", regex.toString());
+
+ try {
+
+ // Iterate over all atomic indices and find the matching document
+ for (AtomicReaderContext atomic : this.reader().leaves()) {
+ /*
+ DocIdSetIterator filterIter = filter.getDocIdSet(
+ atomic,
+ atomic.reader().getLiveDocs()
+ ).iterator();
+ */
+
+ DocIdSet filterSet = filter.getDocIdSet(
+ atomic,
+ atomic.reader().getLiveDocs()
+ );
+
+ // Create a bitset for the correct document
+ // Yeah ... I know ... it could've been easier probably
+ /*
+ FixedBitSet bitset = new FixedBitSet(atomic.reader().numDocs());
+ bitset.or(filterIter);
+ */
+ Bits bitset = filterSet.bits();
+
+ // Go to the matching doc
+ // int localDocID = bitset.iterator().nextDoc();
+ int localDocID = filterSet.iterator().nextDoc();
+
+ // log.trace("Found documents {} with the docID {}", bitset.cardinality(), localDocID);
+
+ if (localDocID == DocIdSetIterator.NO_MORE_DOCS)
+ continue;
+
+ // We've found the correct document!
+ HashSet<String> fieldsToLoadLocal = new HashSet<>(fieldsToLoad);
+ fieldsToLoadLocal.add(field);
+
+ // Get terms from the document
+ Terms docTerms = atomic.reader().getTermVector(localDocID, field);
+
+ /* ---
+ *
+ */
+ log.trace("docTerms has Payloads: {}", docTerms.hasPayloads());
+ log.trace("docTerms has Positions: {}", docTerms.hasPositions());
+
+ // Load the necessary fields of the document
+ Document doc = atomic.reader().document(localDocID, fieldsToLoadLocal);
+
+ // Put some more information to the match
+ match.setPositionsToOffset(new PositionsToOffset(atomic, field));
+ match.setLocalDocID(localDocID);
+
+ log.trace("pto and localDocID defined");
+
+ match.setStartPos(startPos);
+ match.setEndPos(endPos);
+ match.populateDocument(doc, field, fieldsToLoadLocal);
+
+ log.trace("We have found the correct document: {}", match.getTitle());
+ // log.trace("The match is: {}", doc.get("tokens"));
+
+ // A termsEnum object could be reused here
+ TermsEnum termsEnum = docTerms.intersect(fst, null);
+
+ DocsAndPositionsEnum docs = (DocsAndPositionsEnum) null;
+ // DocsAndPositionsEnum docs;
+
+ // Iterate over all terms in the document
+ while (termsEnum.next() != null) {
+
+ log.trace("> {}", termsEnum.term().utf8ToString());
+
+ docs = termsEnum.docsAndPositions(
+ null, //bitset.bits(),
+ null, //docs,
+ DocsAndPositionsEnum.FLAG_PAYLOADS
+ );
+
+ docs.nextDoc();
+ // log.trace("Check for '{}'({}) in document {}({}) from {}", termsEnum.term().utf8ToString(), termsEnum.totalTermFreq(), docs.docID(), localDocID, bitset.cardinality());
+ docs.nextPosition();
+
+ if (docs.docID() == DocIdSetIterator.NO_MORE_DOCS ||
+ (docs.docID() != localDocID && docs.advance(localDocID) != localDocID))
+ continue;
+
+ log.trace("Frequencies: {}!", docs.getPayload());
+
+
+ // Init docs
+ /*
+ if (docs.advance(localDocID) == DocIdSetIterator.NO_MORE_DOCS || docs.docID() != localDocID)
+ continue;
+ */
+
+ // How often does this term occur in the document?
+ int termOccurrences = docs.freq();
+
+ // Iterate over all occurrences
+ for (int i = 0; i < termOccurrences; i++) {
+
+ // Init positions and get the current
+ int pos = docs.nextPosition();
+
+ // Check, if the position of the term is in the interesting area
+ if (pos >= startPos && pos <= endPos) {
+ termList.add(new TermInfo(
+ termsEnum.term().utf8ToString(),
+ pos,
+ docs.getPayload()
+ ));
+ };
+ };
+ };
+ break;
+ };
+ }
+ catch (IOException e) {
+ // ...
+ };
+
+ return match;
+ };
/**
@@ -810,27 +851,10 @@
match.internalDocID = docID;
- match.setField(field);
- match.setAuthor(doc.get("author"));
- match.setTextClass(doc.get("textClass"));
- match.setDocID(doc.get("ID"));
- match.setTitle(doc.get("title"));
- match.setSubTitle(doc.get("subTitle"));
- match.setPubPlace(doc.get("pubPlace"));
- match.setCorpusID(doc.get("corpusID"));
- match.setPubDate(doc.get("pubDate"));
+ match.populateDocument(doc, field, fieldsToLoadLocal);
log.trace("I've got a match in {} of {}", match.getDocID(), count);
- // Temporary (later meta fields in term vector)
- match.setFoundries(doc.get("foundries"));
- match.setTokenization(doc.get("tokenization"));
-
- match.setLayerInfo(doc.get("layerInfo"));
-
- match.setPrimaryData(
- new KorapPrimaryData(doc.get(field))
- );
atomicMatches.add(match);
};
diff --git a/src/main/java/de/ids_mannheim/korap/KorapMatch.java b/src/main/java/de/ids_mannheim/korap/KorapMatch.java
index 8f54f5e..cc4fa60 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapMatch.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapMatch.java
@@ -5,8 +5,12 @@
import com.fasterxml.jackson.annotation.*;
import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.node.*;
import de.ids_mannheim.korap.index.PositionsToOffset;
+import de.ids_mannheim.korap.document.KorapPrimaryData;
+
import static de.ids_mannheim.korap.util.KorapHTML.*;
// import org.apache.commons.codec.binary.Base64;
@@ -15,6 +19,7 @@
import org.slf4j.LoggerFactory;
import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.document.Document;
/*
Todo: The implemented classes and private names are horrible!
@@ -32,9 +37,10 @@
// Snippet information
@JsonIgnore
- public short leftContext,
- rightContext;
+ public short leftContextOffset,
+ rightContextOffset;
+ // Should be deprecated, but used wildly in tests!
@JsonIgnore
public int startPos,
endPos;
@@ -45,6 +51,8 @@
private int startOffsetChar = 0;
+ private int localDocID = -1;
+
@JsonIgnore
public boolean leftTokenContext,
rightTokenContext;
@@ -90,6 +98,11 @@
};
/**
+ * Constructs a new KorapMatch object.
+ */
+ public KorapMatch () {};
+
+ /**
* Insert a highlight for the snippet view by means of positional
* offsets and an optional class number.
*
@@ -123,6 +136,40 @@
this.highlight.add(new int[]{ start, end, number});
};
+ public void populateDocument (Document doc, String field, HashSet<String> fields) {
+
+ this.setField(field);
+
+ this.setPrimaryData(
+ new KorapPrimaryData(doc.get(field))
+ );
+
+ if (fields.contains("corpusID"))
+ this.setCorpusID(doc.get("corpusID"));
+ if (fields.contains("ID"))
+ this.setDocID(doc.get("ID"));
+ if (fields.contains("author"))
+ this.setAuthor(doc.get("author"));
+ if (fields.contains("textClass"))
+ this.setTextClass(doc.get("textClass"));
+ if (fields.contains("title"))
+ this.setTitle(doc.get("title"));
+ if (fields.contains("subTitle"))
+ this.setSubTitle(doc.get("subTitle"));
+ if (fields.contains("pubDate"))
+ this.setPubDate(doc.get("pubDate"));
+ if (fields.contains("pubPlace"))
+ this.setPubPlace(doc.get("pubPlace"));
+
+ // Temporary (later meta fields in term vector)
+ if (fields.contains("foundries"))
+ this.setFoundries(doc.get("foundries"));
+ if (fields.contains("tokenization"))
+ this.setTokenization(doc.get("tokenization"));
+ if (fields.contains("layerInfo"))
+ this.setLayerInfo(doc.get("layerInfo"));
+ };
+
@JsonProperty("docID")
public String getDocID () {
return super.getID();
@@ -132,6 +179,66 @@
super.setID(id);
};
+ @JsonIgnore
+ public int getStartPos() {
+ return this.startPos;
+ };
+
+ @JsonIgnore
+ public void setStartPos(int pos) {
+ this.startPos = pos;
+
+ if (this.positionsToOffset == null || this.localDocID == -1) {
+ log.warn("You have to define " +
+ "positionsToOffset and localDocID first " +
+ "before adding position information");
+ return;
+ };
+
+ // Preprocess matching
+ this.positionsToOffset.add(this.localDocID, pos);
+ };
+
+ @JsonIgnore
+ public int getEndPos() {
+ return this.endPos;
+ };
+
+ @JsonIgnore
+ public void setEndPos(int pos) {
+ this.endPos = pos;
+
+ if (this.positionsToOffset == null || this.localDocID == -1) {
+ log.warn("You have to define " +
+ "positionsToOffset and localDocID first " +
+ "before adding position information");
+ return;
+ };
+
+ // Preprocess matching
+ this.positionsToOffset.add(this.localDocID, pos - 1);
+ };
+
+ @JsonIgnore
+ public int getLocalDocID () {
+ return this.localDocID;
+ };
+
+ @JsonIgnore
+ public void setLocalDocID (int id) {
+ this.localDocID = id;
+ };
+
+ @JsonIgnore
+ public void setPositionsToOffset (PositionsToOffset pto) {
+ this.positionsToOffset = pto;
+ };
+
+ @JsonIgnore
+ public PositionsToOffset getPositionsToOffset () {
+ return this.positionsToOffset;
+ };
+
@Override
@JsonProperty("ID")
public String getID () {
@@ -560,7 +667,6 @@
@JsonProperty("snippet")
public String getSnippetHTML () {
-
this._processHighlight();
if (this.processed && this.snippetHTML != null)
@@ -708,26 +814,26 @@
// left context
if (leftTokenContext) {
- startOffsetChar = this.positionsToOffset.start(ldid, startPos - this.leftContext);
+ startOffsetChar = this.positionsToOffset.start(ldid, startPos - this.leftContextOffset);
}
else {
- startOffsetChar = startPosChar - this.leftContext;
+ startOffsetChar = startPosChar - this.leftContextOffset;
};
// right context
if (rightTokenContext) {
endOffsetChar = this.positionsToOffset.end(
ldid,
- this.endPos + this.rightContext - 1
+ this.endPos + this.rightContextOffset - 1
);
- log.trace("For endOffset {} ({}+{}-1) pto returns {}", (this.endPos + this.rightContext - 1), this.endPos, this.rightContext, endOffsetChar);
+ log.trace("For endOffset {} ({}+{}-1) pto returns {}", (this.endPos + this.rightContextOffset - 1), this.endPos, this.rightContextOffset, endOffsetChar);
}
else {
if (endPosChar == -1) {
endOffsetChar = -1;
}
else {
- endOffsetChar = endPosChar + this.rightContext;
+ endOffsetChar = endPosChar + this.rightContextOffset;
};
};
@@ -750,7 +856,7 @@
this.startOffsetChar = startOffsetChar;
- log.trace("Offsetposition {} till {} with contexts {} and {}", startOffsetChar, endOffsetChar, leftContext, rightContext);
+ log.trace("Offsetposition {} till {} with contexts {} and {}", startOffsetChar, endOffsetChar, leftContextOffset, rightContextOffset);
if (endOffsetChar > -1 && endOffsetChar < this.getPrimaryDataLength()) {
this.tempSnippet = this.getPrimaryData(startOffsetChar, endOffsetChar);
@@ -760,7 +866,7 @@
endMore = false;
};
- log.trace("Temporary snippet is \"{}\"", this.tempSnippet);
+ // log.trace("Temporary snippet is \"{}\"", this.tempSnippet);
if (this.span == null)
this.span = new LinkedList<int[]>();
@@ -797,4 +903,32 @@
};
};
};
+
+
+ // Identical to KorapResult!
+ public String toJSON () {
+ ObjectNode json = (ObjectNode) mapper.valueToTree(this);
+
+ ArrayNode leftContext = mapper.createArrayNode();
+ leftContext.add(this.leftTokenContext ? "token" : "char");
+ leftContext.add(this.leftContextOffset);
+
+ ArrayNode rightContext = mapper.createArrayNode();
+ rightContext.add(this.rightTokenContext ? "token" : "char");
+ rightContext.add(this.rightContextOffset);
+
+ ObjectNode context = mapper.createObjectNode();
+ context.put("left", leftContext);
+ context.put("right", rightContext);
+ json.put("context", context);
+
+ try {
+ return mapper.writeValueAsString(json);
+ }
+ catch (Exception e) {
+ log.warn(e.getLocalizedMessage());
+ };
+
+ return "{}";
+ };
};
diff --git a/src/main/java/de/ids_mannheim/korap/KorapResult.java b/src/main/java/de/ids_mannheim/korap/KorapResult.java
index 2723424..6b05e36 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapResult.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapResult.java
@@ -29,10 +29,13 @@
private int startIndex = 0;
private short itemsPerPage = ITEMS_PER_PAGE;
- private short leftContextOffset = 6, rightContextOffset = 6;
- private boolean leftTokenContext, rightTokenContext;
+ private short leftContextOffset = 6,
+ rightContextOffset = 6;
+ private boolean leftTokenContext,
+ rightTokenContext;
- private String benchmarkSearchResults = "", benchmarkHitCounter = "0";
+ private String benchmarkSearchResults = "",
+ benchmarkHitCounter = "0";
private String error = null;
// Logger
@@ -72,15 +75,19 @@
public KorapMatch addMatch (PositionsToOffset pto, int localDocID, int startPos, int endPos) {
KorapMatch km = new KorapMatch(pto, localDocID, startPos, endPos);
- // Temporary - should use the same interface like results in the future:
- km.leftContext = this.leftContextOffset;
- km.leftTokenContext = this.leftTokenContext;
- km.rightContext = this.rightContextOffset;
- km.rightTokenContext = this.rightTokenContext;
+
+ // Temporary - should use the same interface like results
+ // in the future:
+ km.leftContextOffset = this.leftContextOffset;
+ km.leftTokenContext = this.leftTokenContext;
+ km.rightContextOffset = this.rightContextOffset;
+ km.rightTokenContext = this.rightTokenContext;
// Add pos for context
- // That's not really a good position for it, to be honest ...
- // But maybe it will make the offset information in the match be obsolete!
+ // That's not really a good position for it,
+ // to be honest ...
+ // But maybe it will make the offset
+ // information in the match be obsolete!
if (km.leftTokenContext) {
pto.add(localDocID, startPos - this.leftContextOffset);
};
@@ -159,11 +166,8 @@
return startIndex;
};
+ // Identical to KorapMatch!
public String toJSON () {
-
- // ObjectNode json = (ObjectNode) mapper.createObjectNode();
- // ObjectNode json = (ObjectNode) mapper.treeAsTokens(this);
-
ObjectNode json = (ObjectNode) mapper.valueToTree(this);
ArrayNode leftContext = mapper.createArrayNode();
@@ -180,7 +184,7 @@
json.put("context", context);
try {
- return mapper.writeValueAsString(json); // mapper.writeValueAsString(treeMapper);
+ return mapper.writeValueAsString(json);
}
catch (Exception e) {
log.warn(e.getLocalizedMessage());
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
index 121ef3c..a6e96e5 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
@@ -51,4 +51,20 @@
assertEquals("ID (0)", "match-0p7-9(0)8-8(2)7-8c7-9(0)8-9(2)7-9", kr.match(0).getID());
};
+
+
+ @Test
+ public void indexExample2 () throws IOException {
+ // Construct index
+ KorapIndex ki = new KorapIndex();
+ // Indexing test files
+ for (String i : new String[] {"00001", "00002", "00003", "00004", "00005", "00006", "02439"}) {
+ ki.addDocFile(
+ getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
+ );
+ };
+ ki.commit();
+ System.err.println(ki.getMatch("test").toJSON());
+ };
+
};