Improved KrillIndex API and documentation
diff --git a/Changes b/Changes
index 646b434..3ee25bc 100644
--- a/Changes
+++ b/Changes
@@ -1,4 +1,4 @@
-0.50.1 2015-02-27
+0.50.1 2015-03-02
- [feature] Deserialization of arbitrary elements with attributes (margaretha)
- [cleanup] Extracted KrillMeta from Krill,
renamed KorapQuery to KrillQuery,
@@ -19,7 +19,8 @@
moved model/* to index,
moved meta/SearchContext to response/SearchContext,
krillified property files,
- added Errorcodes (diewald)
+ added Errorcodes,
+ removed KrillIndex#addDocFile (diewald)
- [documentation] Improved documentation for API classes (diewald)
0.50 2015-02-23
diff --git a/src/main/java/de/ids_mannheim/korap/KrillIndex.java b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
index 0672362..032cf5b 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
@@ -1,12 +1,11 @@
package de.ids_mannheim.korap;
-// Java classes
-import java.util.*;
-import java.util.zip.GZIPInputStream;
-import java.util.regex.Pattern;
-import java.io.*;
-import java.net.URL;
-import java.nio.ByteBuffer;
+// Krill classes
+import de.ids_mannheim.korap.*;
+import de.ids_mannheim.korap.index.*;
+import de.ids_mannheim.korap.response.*;
+import de.ids_mannheim.korap.query.SpanElementQuery;
+import de.ids_mannheim.korap.util.QueryException;
// Lucene classes
import org.apache.lucene.search.*;
@@ -26,52 +25,72 @@
import com.fasterxml.jackson.annotation.*;
import com.fasterxml.jackson.databind.ObjectMapper;
-// Krill classes
-import de.ids_mannheim.korap.*;
-import de.ids_mannheim.korap.index.*;
-import de.ids_mannheim.korap.response.*;
-import de.ids_mannheim.korap.query.SpanElementQuery;
-import de.ids_mannheim.korap.util.QueryException;
-
// Log4j Logger classes
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+// Java core classes
+import java.util.*;
+import java.util.zip.GZIPInputStream;
+import java.util.regex.Pattern;
+import java.io.*;
+import java.net.URL;
+import java.nio.ByteBuffer;
+
/**
- * KrillIndex implements a simple API for searching in and writing to a
+ * <p>KrillIndex implements a simple API for searching in and writing to a
* Lucene index and requesting several information about the index' nature.
- * Please consult {@link Krill} for the preferred use of this class.
- * <br />
+ * Please consult {@link Krill} for the preferred use of this class.</p>
*
* <blockquote><pre>
+ * // Create new file backed index
* KrillIndex ki = new KrillIndex(
* new MMapDirectory(new File("/myindex"))
* );
- * Result result = new Krill(koralQueryString).apply(ki);
+ *
+ * // Add documents to the index
+ * ki.addDoc(1, "{\"ID\":\"WPD-001\", ... }");
+ * ki.addDoc(2, "{\"ID\":\"WPD-002\", ... }");
+ *
+ * // Apply Krill searches on the index
+ * String koral = "{\"@type\":"koral:group", ... }";
+ * Result result = new Krill(koral).apply(ki);
* </pre></blockquote>
*
- * Properties can be stored in a properies file called <tt>krill.properties</tt>.
- * Relevant properties are <tt>krill.version</tt> and
- * <tt>krill.name</tt>.
+ * <p>Properties can be stored in a properies file called
+ * <tt>krill.properties</tt>. Relevant properties are
+ * <tt>krill.version</tt> and <tt>krill.name</tt>.</p>
*
* @author diewald
*/
/*
- NOTE: Search could run in parallel on atomic readers (although Lucene developers
- strongly discourage that). Benefits are not clear and testing is harder,
- so let's stick to serial processing for now.
+ * Concerning parallel processing:
+ * ===============================
+ * Search /could/ be run in parallel on atomic readers
+ * (although Lucene developers strongly discourage that).
+ * Benefits are not clear and would need some benchmarks,
+ * the huge drawback would be more complicated testing.
+ * Aside from (probably) co-occurrence analysis, shared memory
+ * is not an important thing, so I guess the preferred
+ * way of using Krill on multicore machines for now is by using
+ * the same mechanism as for distribution:
+ * Running multiple nodes (and separated indices) per machine,
+ * registered independently at the Zookeeper.
+ *
+ * On the other hand: Threaded indexing should be implemented!
+ */
+/*
+ TODO: Use FieldCache!!!
TODO: Add word count as a meta data field!
TODO: Improve validation of document import!
TODO: Don't store the text in the token field!
(It has only to be lifted for match views!
Benchmark how worse that is!)
TODO: Support layer for specific foundries in terminfo (IMPORTANT)
- TODO: Use FieldCache!
TODO: Reuse the indexreader everywhere - it should be threadsafe!
TODO: Support document removal!
TODO: Support document update!
TODO: Support callback for interrupts (to stop the searching)!
- TODO: Support multiple indices (Probably).
http://invertedindex.blogspot.co.il/2009/04/lucene-dociduid-mapping-and-payload.html
see korap/search.java -> retrieveTokens
@@ -368,6 +387,9 @@
* object, that was passed to the method.
*/
public FieldDocument addDoc (FieldDocument doc) {
+ if (doc == null)
+ return doc;
+
try {
// Add document to writer
@@ -394,30 +416,101 @@
* @return The created {@link FieldDocument}.
* @throws IOException
*/
- public FieldDocument addDoc (String json) throws IOException {
- FieldDocument fd = this.mapper.readValue(json, FieldDocument.class);
- return this.addDoc(fd);
+ public FieldDocument addDoc (String json) {
+ return this.addDoc(_fromJson(json));
};
-
- // To document:
-
-
-
- // Add document to index as JSON file
- public FieldDocument addDocFile(String json) {
- return this.addDocFile(json, false);
+ /**
+ * Add a document to the index as a JSON string
+ * with a unique integer ID (unique throughout the index
+ * or even throughout the cluster of indices).
+ *
+ * @param uid The unique document identifier.
+ * @param json The document to add to the index as a string.
+ * @return The created {@link FieldDocument}.
+ * @throws IOException
+ */
+ public FieldDocument addDoc (Integer uid, String json) {
+ FieldDocument fd = _fromJson(json);
+ if (fd != null) {
+ fd.setUID(uid);
+ fd = this.addDoc(fd);
+ };
+ return fd;
};
- public FieldDocument addDocfromFile (String json, boolean gzip) {
+ /**
+ * Add a document to the index as a JSON string.
+ *
+ * @param json The document to add to the index as
+ * an {@link InputStream}.
+ * @return The created {@link FieldDocument}.
+ * @throws IOException
+ */
+ public FieldDocument addDoc (InputStream json) {
+ return this.addDoc(_fromFile(json, false));
+ };
+
+
+ /**
+ * Add a document to the index as a JSON string.
+ *
+ * @param json The document to add to the index as
+ * an {@link InputStream}.
+ * @param gzip Boolean value indicating if the file is gzipped.
+ * @return The created {@link FieldDocument}.
+ * @throws IOException
+ */
+ public FieldDocument addDoc (InputStream json, boolean gzip) {
+ return this.addDoc(_fromFile(json, gzip));
+ };
+
+
+ /**
+ * Add a document to the index as a JSON string
+ * with a unique integer ID (unique throughout the index
+ * or even throughout the cluster of indices).
+ *
+ * @param uid The unique document identifier.
+ * @param json The document to add to the index as
+ * an {@link InputStream}.
+ * @param gzip Boolean value indicating if the file is gzipped.
+ * @return The created {@link FieldDocument}.
+ * @throws IOException
+ */
+ public FieldDocument addDoc (Integer uid, InputStream json, boolean gzip) {
+ FieldDocument fd = _fromFile(json, gzip);
+ if (fd != null) {
+ fd.setUID(uid);
+ return this.addDoc(fd);
+ };
+ return fd;
+ };
+
+
+ // Parse JSON document from Input stream
+ private FieldDocument _fromJson (String json) {
+ try {
+ FieldDocument fd = this.mapper.readValue(json, FieldDocument.class);
+ return fd;
+ }
+ catch (IOException e) {
+ log.error("File json not found");
+ };
+ return (FieldDocument) null;
+ };
+
+
+ // Load json document from file
+ private FieldDocument _fromFile (InputStream json, boolean gzip) {
try {
if (gzip) {
// Create json field document
FieldDocument fd = this.mapper.readValue(
- new GZIPInputStream(new FileInputStream(json)),
+ new GZIPInputStream(json),
FieldDocument.class
);
return fd;
@@ -432,35 +525,10 @@
return (FieldDocument) null;
};
-
- // Add document to index as JSON file (possibly gzipped)
- public FieldDocument addDocFile(String json, boolean gzip) {
- return this.addDoc(this.addDocfromFile(json, gzip));
- };
-
-
- /**
- * Add a document to the index as a JSON string
- * with a unique integer ID (unique throughout the index
- * or even throughout the cluster of indices).
- *
- * @param uid The unique document identifier.
- * @param json The document to add to the index as a string.
- * @return The created {@link FieldDocument}.
- * @throws IOException
- */
- // Add document to index as JSON file (possibly gzipped)
- public FieldDocument addDocFile(int uid, String json, boolean gzip) {
- FieldDocument fd = this.addDocfromFile(json, gzip);
- if (fd != null) {
- fd.setUID(uid);
- return this.addDoc(fd);
- };
- return fd;
- };
+
@@ -729,13 +797,13 @@
};
public Match getMatchInfo (String id,
- String field,
- boolean info,
- String foundry,
- String layer,
- boolean includeSpans,
- boolean includeHighlights,
- boolean extendToSentence) throws QueryException {
+ String field,
+ boolean info,
+ String foundry,
+ String layer,
+ boolean includeSpans,
+ boolean includeHighlights,
+ boolean extendToSentence) throws QueryException {
ArrayList<String> foundryList = new ArrayList<>(1);
if (foundry != null)
foundryList.add(foundry);
@@ -763,13 +831,13 @@
per position in the match.
*/
public Match getMatchInfo (String idString,
- String field,
- boolean info,
- List<String> foundry,
- List<String> layer,
- boolean includeSpans,
- boolean includeHighlights,
- boolean extendToSentence) throws QueryException {
+ String field,
+ boolean info,
+ List<String> foundry,
+ List<String> layer,
+ boolean includeSpans,
+ boolean includeHighlights,
+ boolean extendToSentence) throws QueryException {
Match match = new Match(idString, includeHighlights);
diff --git a/src/main/java/de/ids_mannheim/korap/index/Indexer.java b/src/main/java/de/ids_mannheim/korap/index/Indexer.java
index 7c7acfd..5af92e5 100644
--- a/src/main/java/de/ids_mannheim/korap/index/Indexer.java
+++ b/src/main/java/de/ids_mannheim/korap/index/Indexer.java
@@ -61,16 +61,21 @@
System.out.print(" Index " + found + " ... ");
// Add file to the index
- if (this.index.addDocFile(found, true) == null) {
- System.out.println("fail.");
- continue;
- };
- System.out.println("done (" + count + ").");
- this.count++;
+ try {
+ if (this.index.addDoc(new FileInputStream(found), true) == null) {
+ System.out.println("fail.");
+ continue;
+ };
+ System.out.println("done (" + count + ").");
+ this.count++;
- // Commit in case the commit count is reached
- if ((this.count % this.commitCount) == 0)
- this.commit();
+ // Commit in case the commit count is reached
+ if ((this.count % this.commitCount) == 0)
+ this.commit();
+ }
+ catch (FileNotFoundException e) {
+ System.out.println("not found!");
+ };
};
};
};
diff --git a/src/main/java/de/ids_mannheim/korap/server/Resource.java b/src/main/java/de/ids_mannheim/korap/server/Resource.java
index 39e5437..2c47e63 100644
--- a/src/main/java/de/ids_mannheim/korap/server/Resource.java
+++ b/src/main/java/de/ids_mannheim/korap/server/Resource.java
@@ -148,19 +148,17 @@
kresp.setVersion(index.getVersion());
kresp.setName(index.getName());
- String ID = "Unknown";
- try {
- FieldDocument fd = index.addDoc(json);
- fd.setUID(uid);
- ID = fd.getID();
- }
- // Set HTTP to ???
- // TODO: This may be a field error!
- catch (IOException e) {
+ FieldDocument fd = index.addDoc(uid, json);
+ if (fd == null) {
+ // Set HTTP to ???
+ // TODO: This may be a field error!
kresp.addError(602, "Unable to add document to index");
return kresp.toJsonString();
};
+ String ID = "Unknown";
+ ID = fd.getID();
+
// Set HTTP to 200
kresp.addMessage(681, "Document was added successfully", ID);
diff --git a/src/test/java/de/ids_mannheim/korap/benchmark/TestBenchmarkSpans.java b/src/test/java/de/ids_mannheim/korap/benchmark/TestBenchmarkSpans.java
index e80be5f..3f9f91d 100644
--- a/src/test/java/de/ids_mannheim/korap/benchmark/TestBenchmarkSpans.java
+++ b/src/test/java/de/ids_mannheim/korap/benchmark/TestBenchmarkSpans.java
@@ -28,11 +28,11 @@
@Test
public void checkBenchmark1 () throws IOException {
Properties prop = new Properties();
- InputStream fr = new FileInputStream(getClass().getResource("/korap.conf").getFile());
+ InputStream fr = new FileInputStream(getClass().getResource("/krill.properties").getFile());
prop.load(fr);
// Get the real index
- KrillIndex ki = new KrillIndex(new MMapDirectory(new File(prop.getProperty("lucene.indexDir"))));
+ KrillIndex ki = new KrillIndex(new MMapDirectory(new File(prop.getProperty("krill.indexDir"))));
// Create a container for virtual collections:
KrillCollection kc = new KrillCollection(ki);
@@ -79,11 +79,11 @@
@Test
public void checkBenchmark2JSON () throws IOException {
Properties prop = new Properties();
- InputStream fr = new FileInputStream(getClass().getResource("/korap.conf").getFile());
+ InputStream fr = new FileInputStream(getClass().getResource("/krill.properties").getFile());
prop.load(fr);
// Get the real index
- KrillIndex ki = new KrillIndex(new MMapDirectory(new File(prop.getProperty("lucene.indexDir"))));
+ KrillIndex ki = new KrillIndex(new MMapDirectory(new File(prop.getProperty("krill.indexDir"))));
// Create a container for virtual collections:
KrillCollection kc = new KrillCollection(ki);
@@ -122,11 +122,11 @@
@Test
public void checkBenchmarkSentences () throws IOException {
Properties prop = new Properties();
- InputStream fr = new FileInputStream(getClass().getResource("/korap.conf").getFile());
+ InputStream fr = new FileInputStream(getClass().getResource("/krill.properties").getFile());
prop.load(fr);
// Get the real index
- KrillIndex ki = new KrillIndex(new MMapDirectory(new File(prop.getProperty("lucene.indexDir"))));
+ KrillIndex ki = new KrillIndex(new MMapDirectory(new File(prop.getProperty("krill.indexDir"))));
// Create a container for virtual collections:
KrillCollection kc = new KrillCollection(ki);
@@ -164,11 +164,11 @@
// [orth=Der]{1:[orth=Mann]{2:[orth=und]}}
Properties prop = new Properties();
- InputStream fr = new FileInputStream(getClass().getResource("/korap.conf").getFile());
+ InputStream fr = new FileInputStream(getClass().getResource("/krill.properties").getFile());
prop.load(fr);
// Get the real index
- KrillIndex ki = new KrillIndex(new MMapDirectory(new File(prop.getProperty("lucene.indexDir"))));
+ KrillIndex ki = new KrillIndex(new MMapDirectory(new File(prop.getProperty("krill.indexDir"))));
// Create a container for virtual collections:
KrillCollection kc = new KrillCollection(ki);
@@ -284,8 +284,8 @@
// Indexing test files
for (String d : docs) {
- FieldDocument fd = ki.addDocFile(
- getClass().getResource("/wiki/" + d + ".json.gz").getFile(),
+ FieldDocument fd = ki.addDoc(
+ getClass().getResourceAsStream("/wiki/" + d + ".json.gz"),
true
);
};
@@ -311,11 +311,11 @@
@Test
public void checkBenchmark3 () throws IOException {
Properties prop = new Properties();
- InputStream fr = new FileInputStream(getClass().getResource("/korap.conf").getFile());
+ InputStream fr = new FileInputStream(getClass().getResource("/krill.properties").getFile());
prop.load(fr);
// Get the real index
- KrillIndex ki = new KrillIndex(new MMapDirectory(new File(prop.getProperty("lucene.indexDir"))));
+ KrillIndex ki = new KrillIndex(new MMapDirectory(new File(prop.getProperty("krill.indexDir"))));
// Create a container for virtual collections:
KrillCollection kc = new KrillCollection(ki);
diff --git a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionLegacy.java b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionLegacy.java
index 5ff9891..4c2a83b 100644
--- a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionLegacy.java
+++ b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionLegacy.java
@@ -37,8 +37,8 @@
"00005",
"00006",
"02439"}) {
- ki.addDocFile(
- getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
+ ki.addDoc(
+ getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true
);
};
ki.commit();
@@ -112,8 +112,8 @@
"00005",
"00006",
"02439"}) {
- ki.addDocFile(
- getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
+ ki.addDoc(
+ getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true
);
ki.commit();
};
@@ -188,13 +188,13 @@
"00005",
"00006",
"02439"}) {
- ki.addDocFile(
- getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
+ ki.addDoc(
+ getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true
);
};
ki.commit();
- ki.addDocFile(getClass().getResource("/wiki/00012-fakemeta.json.gz").getFile(), true);
+ ki.addDoc(getClass().getResourceAsStream("/wiki/00012-fakemeta.json.gz"), true);
ki.commit();
@@ -239,9 +239,9 @@
"00005",
"00006",
"02439"}) {
- FieldDocument fd = ki.addDocFile(
- uid++,
- getClass().getResource("/wiki/" + i + ".json.gz").getFile(),
+ FieldDocument fd = ki.addDoc(
+ uid++,
+ getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
true
);
};
diff --git a/src/test/java/de/ids_mannheim/korap/highlight/TestClass.java b/src/test/java/de/ids_mannheim/korap/highlight/TestClass.java
index 34481c4..003b129 100644
--- a/src/test/java/de/ids_mannheim/korap/highlight/TestClass.java
+++ b/src/test/java/de/ids_mannheim/korap/highlight/TestClass.java
@@ -38,10 +38,10 @@
//System.out.println(sq.toString());
ki = new KrillIndex();
- ki.addDocFile(
- getClass().getResource("/wiki/JJJ-00785.json.gz").getFile(),true);
- ki.addDocFile(
- getClass().getResource("/wiki/DDD-01402.json.gz").getFile(),true);
+ ki.addDoc(
+ getClass().getResourceAsStream("/wiki/JJJ-00785.json.gz"),true);
+ ki.addDoc(
+ getClass().getResourceAsStream("/wiki/DDD-01402.json.gz"),true);
ki.commit();
kr = ki.search(sq, (short) 10);
@@ -72,8 +72,8 @@
ki = new KrillIndex();
- ki.addDocFile(
- getClass().getResource("/wiki/SSS-09803.json.gz").getFile(),true);
+ ki.addDoc(
+ getClass().getResourceAsStream("/wiki/SSS-09803.json.gz"), true);
ki.commit();
kr = ki.search(sq, (short) 10);
diff --git a/src/test/java/de/ids_mannheim/korap/highlight/TestHighlight.java b/src/test/java/de/ids_mannheim/korap/highlight/TestHighlight.java
index 6c4b633..bb5b236 100644
--- a/src/test/java/de/ids_mannheim/korap/highlight/TestHighlight.java
+++ b/src/test/java/de/ids_mannheim/korap/highlight/TestHighlight.java
@@ -273,8 +273,8 @@
KrillIndex ki = new KrillIndex();
// Indexing test files
for (String i : new String[] {"00001", "00002"}) {
- ki.addDocFile(
- getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
+ ki.addDoc(
+ getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true
);
};
ki.commit();
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestElementDistanceIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestElementDistanceIndex.java
index 4c31b16..8b52266 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestElementDistanceIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestElementDistanceIndex.java
@@ -198,7 +198,7 @@
public void testCase5() throws Exception{
//System.out.println("testCase4");
ki = new KrillIndex();
- ki.addDocFile(getClass().getResource("/wiki/00001.json.gz").getFile(),true);
+ ki.addDoc(getClass().getResourceAsStream("/wiki/00001.json.gz"),true);
ki.commit();
InputStream is = getClass().getResourceAsStream("/queries/cosmas1.json");
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java b/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
index d1c7a8c..38d53e4 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
@@ -168,8 +168,8 @@
"00005",
"00006",
"02439"}) {
- FieldDocument fd = ki.addDocFile(
- getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
+ FieldDocument fd = ki.addDoc(
+ getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true
);
};
ki.commit();
@@ -217,8 +217,8 @@
"00005",
"00006",
"02439"}) {
- FieldDocument fd = ki.addDocFile(
- getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
+ FieldDocument fd = ki.addDoc(
+ getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true
);
};
ki.commit();
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestRepetitionIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestRepetitionIndex.java
index fe0621d..087bd9f 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestRepetitionIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestRepetitionIndex.java
@@ -211,8 +211,8 @@
@Test
public void testCase5() throws IOException {
ki = new KrillIndex();
- ki.addDocFile(
- getClass().getResource("/wiki/00001.json.gz").getFile(), true
+ ki.addDoc(
+ getClass().getResourceAsStream("/wiki/00001.json.gz"), true
);
ki.commit();
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestSpanExpansionIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestSpanExpansionIndex.java
index f612409..2731cf4 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestSpanExpansionIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestSpanExpansionIndex.java
@@ -31,7 +31,7 @@
public TestSpanExpansionIndex() throws IOException {
ki = new KrillIndex();
- ki.addDocFile(getClass().getResource("/wiki/00001.json.gz").getFile(),
+ ki.addDoc(getClass().getResourceAsStream("/wiki/00001.json.gz"),
true);
ki.commit();
}
@@ -283,9 +283,9 @@
@Test
public void testCase7() throws IOException, QueryException {
KrillIndex ki = new KrillIndex();
- ki.addDocFile(getClass().getResource("/wiki/00001.json.gz").getFile(),
+ ki.addDoc(getClass().getResourceAsStream("/wiki/00001.json.gz"),
true);
- ki.addDocFile(getClass().getResource("/wiki/00002.json.gz").getFile(),
+ ki.addDoc(getClass().getResourceAsStream("/wiki/00002.json.gz"),
true);
ki.commit();
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestSubSpanIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestSubSpanIndex.java
index 41607cd..b9b41cf 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestSubSpanIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestSubSpanIndex.java
@@ -21,7 +21,7 @@
public TestSubSpanIndex () throws IOException {
ki = new KrillIndex();
- ki.addDocFile(getClass().getResource("/wiki/00001.json.gz").getFile(),
+ ki.addDoc(getClass().getResourceAsStream("/wiki/00001.json.gz"),
true);
ki.commit();
}
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestWithinIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestWithinIndex.java
index 6d35d59..e91be18 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestWithinIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestWithinIndex.java
@@ -1096,12 +1096,12 @@
KrillIndex ki = new KrillIndex();
- ki.addDocFile(
- getClass().getResource("/wiki/DDD-08370.json.gz").getFile(),
+ ki.addDoc(
+ getClass().getResourceAsStream("/wiki/DDD-08370.json.gz"),
true
);
- ki.addDocFile(
- getClass().getResource("/wiki/PPP-02924.json.gz").getFile(),
+ ki.addDoc(
+ getClass().getResourceAsStream("/wiki/PPP-02924.json.gz"),
true
);
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestKrill.java b/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
index 5932ee3..33175d2 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
@@ -94,8 +94,8 @@
"00005",
"00006",
"02439"}) {
- ki.addDocFile(
- getClass().getResource("/wiki/" + i + ".json.gz").getFile(),
+ ki.addDoc(
+ getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
true
);
};
@@ -145,8 +145,8 @@
"00005",
"00006",
"02439"}) {
- ki.addDocFile(
- getClass().getResource("/wiki/" + i + ".json.gz").getFile(),
+ ki.addDoc(
+ getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
true
);
};
@@ -187,8 +187,8 @@
"07452-deep"
*/
}) {
- ki.addDocFile(
- getClass().getResource("/wiki/" + i + ".json.gz").getFile(),
+ ki.addDoc(
+ getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
true
);
};
@@ -240,8 +240,8 @@
"00006",
"02439"
}) {
- ki.addDocFile(
- getClass().getResource("/wiki/" + i + ".json.gz").getFile(),
+ ki.addDoc(
+ getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
true
);
};
@@ -264,8 +264,8 @@
"00005",
"00006",
"02439"}) {
- ki.addDocFile(
- getClass().getResource("/wiki/" + i + ".json.gz").getFile(),
+ ki.addDoc(
+ getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
true
);
};
@@ -294,8 +294,8 @@
"00005",
"00006",
"02439"}) {
- ki.addDocFile(
- getClass().getResource("/wiki/" + i + ".json.gz").getFile(),
+ ki.addDoc(
+ getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
true
);
};
@@ -324,8 +324,8 @@
"00005",
"00006",
"02439"}) {
- ki.addDocFile(
- getClass().getResource("/wiki/" + i + ".json.gz").getFile(),
+ ki.addDoc(
+ getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
true
);
};
@@ -384,8 +384,8 @@
"00005",
"00006",
"02439"}) {
- ki.addDocFile(
- getClass().getResource("/wiki/" + i + ".json.gz").getFile(),
+ ki.addDoc(
+ getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
true
);
};
@@ -431,8 +431,8 @@
"00005",
"00006",
"02439"}) {
- ki.addDocFile(
- getClass().getResource("/wiki/" + i + ".json.gz").getFile(),
+ ki.addDoc(
+ getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
true
);
};
@@ -519,9 +519,9 @@
"00005",
"00006",
"02439"}) {
- ki.addDocFile(
+ ki.addDoc(
uid++,
- getClass().getResource("/wiki/" + i + ".json.gz").getFile(),
+ getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
true
);
};
@@ -554,11 +554,10 @@
// Construct index
KrillIndex ki = new KrillIndex();
// Indexing test files
- FieldDocument fd = ki.addDocFile(
+ FieldDocument fd = ki.addDoc(
1,
getClass().
- getResource("/goe/AGA-03828.json.gz").
- getFile(),
+ getResourceAsStream("/goe/AGA-03828.json.gz"),
true
);
ki.commit();
@@ -641,11 +640,10 @@
// Construct index
KrillIndex ki = new KrillIndex();
// Indexing test files
- FieldDocument fd = ki.addDocFile(
+ FieldDocument fd = ki.addDoc(
1,
getClass().
- getResource("/bzk/D59-00089.json.gz").
- getFile(),
+ getResourceAsStream("/bzk/D59-00089.json.gz"),
true
);
ki.commit();
@@ -732,11 +730,10 @@
// Construct index
KrillIndex ki = new KrillIndex();
// Indexing test files
- FieldDocument fd = ki.addDocFile(
+ FieldDocument fd = ki.addDoc(
1,
getClass().
- getResource("/bzk/D59-00089.json.gz").
- getFile(),
+ getResourceAsStream("/bzk/D59-00089.json.gz"),
true
);
ki.commit();
@@ -807,18 +804,16 @@
// Construct index
KrillIndex ki = new KrillIndex();
// Indexing test files
- ki.addDocFile(
+ ki.addDoc(
1,
getClass().
- getResource("/bzk/D59-00089.json.gz").
- getFile(),
+ getResourceAsStream("/bzk/D59-00089.json.gz"),
true
);
- ki.addDocFile(
+ ki.addDoc(
2,
getClass().
- getResource("/bzk/D59-00089.json.gz").
- getFile(),
+ getResourceAsStream("/bzk/D59-00089.json.gz"),
true
);
@@ -852,18 +847,16 @@
// Construct index
KrillIndex ki = new KrillIndex();
// Indexing test files
- ki.addDocFile(
+ ki.addDoc(
1,
getClass().
- getResource("/goe/AGA-03828.json.gz").
- getFile(),
+ getResourceAsStream("/goe/AGA-03828.json.gz"),
true
);
- ki.addDocFile(
+ ki.addDoc(
2,
getClass().
- getResource("/bzk/D59-00089.json.gz").
- getFile(),
+ getResourceAsStream("/bzk/D59-00089.json.gz"),
true
);
@@ -907,11 +900,10 @@
assertEquals(ki.numberOf("documents"), 0);
// Indexing test files
- FieldDocument fd = ki.addDocFile(
+ FieldDocument fd = ki.addDoc(
1,
getClass().
- getResource("/bzk/D59-00089.json.gz").
- getFile(),
+ getResourceAsStream("/bzk/D59-00089.json.gz"),
true
);
ki.commit();
@@ -995,10 +987,9 @@
"00005",
"00006",
"02439"}) {
- ki.addDocFile(
+ ki.addDoc(
getClass().
- getResource("/wiki/" + i + ".json.gz").
- getFile(),
+ getResourceAsStream("/wiki/" + i + ".json.gz"),
true
);
};
@@ -1088,10 +1079,9 @@
"00005",
"00006",
"02439"}) {
- ki.addDocFile(
+ ki.addDoc(
getClass().
- getResource("/wiki/" + i + ".json.gz").
- getFile(),
+ getResourceAsStream("/wiki/" + i + ".json.gz"),
true
);
};
@@ -1159,10 +1149,9 @@
"00005",
"00006",
"02439"}) {
- ki.addDocFile(
+ ki.addDoc(
getClass().
- getResource("/wiki/" + i + ".json.gz").
- getFile(),
+ getResourceAsStream("/wiki/" + i + ".json.gz"),
true
);
};
@@ -1191,10 +1180,9 @@
// Construct index
KrillIndex ki = new KrillIndex();
// Indexing test files
- ki.addDocFile(
+ ki.addDoc(
getClass().
- getResource("/wiki/00002.json.gz").
- getFile(),
+ getResourceAsStream("/wiki/00002.json.gz"),
true
);
ki.commit();
@@ -1236,10 +1224,9 @@
ki = new KrillIndex();
for (String i : new String[] {"00001",
"00002"}) {
- ki.addDocFile(
+ ki.addDoc(
getClass().
- getResource("/wiki/" + i + ".json.gz").
- getFile(),
+ getResourceAsStream("/wiki/" + i + ".json.gz"),
true
);
};
@@ -1294,10 +1281,9 @@
"00005",
"00006",
"02439"}) {
- ki.addDocFile(
+ ki.addDoc(
getClass().
- getResource("/wiki/" + i + ".json.gz").
- getFile(),
+ getResourceAsStream("/wiki/" + i + ".json.gz"),
true
);
};
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestMetaFields.java b/src/test/java/de/ids_mannheim/korap/search/TestMetaFields.java
index 93894cb..70480b7 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestMetaFields.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestMetaFields.java
@@ -37,8 +37,8 @@
// Indexing test files
for (String i : new String[] {"00001",
"00002"}) {
- ki.addDocFile(
- getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
+ ki.addDoc(
+ getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true
);
};
ki.commit();