Merge pull request #24 from thomaskrause/feature/fix-java8
Fix issue with payloads when using Krill on Java 8
diff --git a/src/main/java/de/ids_mannheim/korap/KrillIndex.java b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
index e4a6594..3a9349b 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
@@ -1,12 +1,11 @@
package de.ids_mannheim.korap;
// Krill classes
-import de.ids_mannheim.korap.*;
import de.ids_mannheim.korap.index.*;
import de.ids_mannheim.korap.response.*;
import de.ids_mannheim.korap.query.SpanElementQuery;
+import de.ids_mannheim.korap.util.KrillProperties;
import de.ids_mannheim.korap.util.QueryException;
-import static de.ids_mannheim.korap.util.KrillProperties.*;
// Lucene classes
import org.apache.lucene.search.*;
@@ -143,8 +142,8 @@
// Some initializations ...
{
- Properties prop = loadProperties();
- Properties info = loadInfo();
+ Properties prop = KrillProperties.loadDefaultProperties();
+ Properties info = KrillProperties.loadInfo();
if (info != null) {
this.version = info.getProperty("krill.version");
this.name = info.getProperty("krill.name");
diff --git a/src/main/java/de/ids_mannheim/korap/index/Indexer.java b/src/main/java/de/ids_mannheim/korap/index/Indexer.java
index 96054cc..d17215f 100644
--- a/src/main/java/de/ids_mannheim/korap/index/Indexer.java
+++ b/src/main/java/de/ids_mannheim/korap/index/Indexer.java
@@ -16,6 +16,8 @@
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.store.MMapDirectory;
import de.ids_mannheim.korap.KrillIndex;
+import de.ids_mannheim.korap.util.KrillProperties;
+
import static de.ids_mannheim.korap.util.KrillProperties.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -27,26 +29,37 @@
* is using the standalone server system,
* this tool may be more suitable for your needs
* (especially as it is way faster).
+ * <br><br>
+ * Input directories should contain files in the json.gz format. Files
+ * of other formats will be skipped or not indexed. The output
+ * directory can be specified in the config file. See
+ * src/main/resources/krill.properties.info to create a config file.
*
- * Usage: java -jar Krill-Indexer.jar [--config propfile]
- * [directories]*
+ * <pre>
+ * Usage:
+ *
+ * java -jar Krill-Indexer.jar -c [propfile] -i [input directories] -o
+ * [output directory]
+ *
+ * java -jar Krill-Indexer.jar --config [propfile] --input [input
+ * directories] --output [output directory]
+ * </pre>
+ *
*
* @author diewald, margaretha
*
*/
public class Indexer {
- KrillIndex index;
- int count;
- int commitCount;
+ private KrillIndex index;
+ private int count;
+ private int commitCount;
- // private static String propFile = "krill.properties";
private static String path = null;
- private static Pattern jsonFilePattern;
+ private Pattern jsonFilePattern;
// Init logger
private final static Logger log = LoggerFactory.getLogger(Indexer.class);
-
/**
* Construct a new indexer object.
*
@@ -55,18 +68,18 @@
* @throws IOException
*/
public Indexer (Properties prop) throws IOException {
- if (this.path == null) {
- this.path = prop.getProperty("krill.indexDir");
+ if (path == null) {
+ path = prop.getProperty("krill.indexDir");
}
- log.info("Output directory: " + this.path);
+ log.info("Output directory: " + path);
// Default to 1000 documents till the next commit
String commitCount = prop.getProperty("krill.index.commit.count",
"1000");
// Create a new index object based on the directory
- this.index = new KrillIndex(new MMapDirectory(Paths.get(this.path)));
+ this.index = new KrillIndex(new MMapDirectory(Paths.get(path)));
this.count = 0;
this.commitCount = Integer.parseInt(commitCount);
@@ -81,16 +94,14 @@
* The {@link File} directory containing
* documents to index.
*/
- public void parse (File dir) {
+ private void parse (File dir) {
Matcher matcher;
for (String file : dir.list()) {
- //log.info("Json file: "+file);
matcher = jsonFilePattern.matcher(file);
if (matcher.find()) {
file = dir.getPath() + '/' + file;
log.info("Adding " + file + " to the index. ");
- // Add file to the index
try {
if (this.index.addDoc(new FileInputStream(file),
true) == null) {
@@ -109,7 +120,8 @@
}
}
else {
- log.warn(file + " does not have json.gz format.");
+ log.warn("Skip " + file
+ + " since it does not have json.gz format.");
}
}
}
@@ -118,16 +130,19 @@
/**
* Commit changes to the index.
*/
- public void commit () {
+ private void commit () {
log.info("Committing index ... ");
try {
this.index.commit();
}
catch (IOException e) {
- log.error("Unable to commit to index " + this.path);
+ log.error("Unable to commit to index " + path);
}
}
+ private void closeIndex() throws IOException{
+ index.close();
+ }
/**
* Main method.
@@ -139,11 +154,11 @@
* @throws IOException
*/
public static void main (String[] argv) throws IOException {
-
+
Options options = new Options();
options.addOption(Option.builder("c").longOpt("config")
.desc("configuration file (defaults to "
- + de.ids_mannheim.korap.util.KrillProperties.propStr
+ + KrillProperties.defaultPropertiesLocation
+ ").")
.hasArg().argName("properties file").required().build());
options.addOption(Option.builder("i").longOpt("inputDir")
@@ -180,7 +195,7 @@
"Krill indexer\n java -jar -c <properties file> -i <input directories> "
+ "[-o <output directory>]",
options);
- System.exit(0);
+ return;
}
catch (ParseException e) {
log.error("Unexpected error: " + e);
@@ -188,23 +203,27 @@
}
// Load properties
- Properties prop = loadProperties(propFile);
+ Properties prop = KrillProperties.loadProperties(propFile);
// Get indexer object
- Indexer ki = new Indexer(prop);
+ Indexer indexer = new Indexer(prop);
// Iterate over list of directories
for (String arg : inputDirectories) {
- log.info("Indexing files in"+arg);
+ log.info("Indexing files in " + arg);
File f = new File(arg);
if (f.isDirectory())
- ki.parse(f);
+ indexer.parse(f);
}
-
+ indexer.closeIndex();
// Final commit
- ki.commit();
log.info("Finished indexing.");
// Finish indexing
- System.out.println("Indexed " + ki.count + " files.");
+ String message = "Indexed " + indexer.count + " file";
+ if (indexer.count > 1) {
+ message += "s";
+ }
+ System.out.print(message + ".");
+
}
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanClassFilterQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanClassFilterQuery.java
index b5a8653..cc32ad6 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanClassFilterQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanClassFilterQuery.java
@@ -12,6 +12,11 @@
import de.ids_mannheim.korap.query.spans.ClassFilteredSpans;
+/** Filters query results by means of class operations.
+ *
+ * @author margaretha
+ *
+ */
public class SpanClassFilterQuery extends SimpleSpanQuery {
public enum ClassOperation {
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanExpansionQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanExpansionQuery.java
index 2d6c29f..6458da1 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanExpansionQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanExpansionQuery.java
@@ -16,18 +16,13 @@
/**
* SpanExpansionQuery makes a span longer by stretching out the start
- * or the end
- * position of the span. The constraints of the expansion, such as how
- * large the
- * expansion should be (min and max position) and the direction of the
- * expansion
- * with respect to the original span, are specified in
- * ExpansionConstraint. The
- * direction is designated with the sign of a number, namely a
- * negative number
+ * or the end position of the span. The constraints of the expansion,
+ * such as how large the expansion should be (min and max position)
+ * and the direction of the expansion with respect to the original
+ * span, are specified in ExpansionConstraint. The direction is
+ * designated with the sign of a number, namely a negative number
* signifies the left direction, and a positive number (including 0)
- * signifies
- * the right direction.
+ * signifies the right direction.
*
* <pre>
* SpanTermQuery stq = new SpanTermQuery(new Term("tokens",
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanFocusQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanFocusQuery.java
index db51707..959c7ff 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanFocusQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanFocusQuery.java
@@ -37,7 +37,7 @@
private boolean isSorted = true;
private boolean matchTemporaryClass = false;
private boolean removeTemporaryClasses = false;
- private int windowSize = 10;
+ private int windowSize = 10; // default
/**
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanMultipleDistanceQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanMultipleDistanceQuery.java
index ce65df6..10f106b 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanMultipleDistanceQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanMultipleDistanceQuery.java
@@ -16,23 +16,18 @@
/**
* SpanMultipleDistanceQuery matches two spans with respect to a list
- * of
- * distance constraints. No repetition of constraints of the same unit
- * type
- * (e.g. word, sentence, paragraph) is allowed. For example, there
- * must only
- * exactly one constraint for word/token-based distance. A
- * SpanDistanceQuery is
- * created for each constraint.<br />
+ * of distance constraints. No repetition of constraints of the same
+ * unit type (e.g. word, sentence, paragraph) is allowed. For example,
+ * there must only exactly one constraint for word/token-based
+ * distance. A SpanDistanceQuery is created for each constraint.<br />
* <br />
* Examples:
* <ul>
*
* <li>
* Search two terms x and y which are separated by minimum two and
- * maximum three
- * other words within the same sentence. The order of x and y does not
- * matter.
+ * maximum three other words within the same sentence. The order of x
+ * and y does not matter.
*
* <pre>
* List<DistanceConstraint> constraints = new
@@ -51,10 +46,8 @@
*
* <li>
* Search term x which do <em>not</em> occur with term y in minimum
- * two and
- * maximum three other words and <em>not</em> in the same sentence. X
- * must
- * precede y.
+ * two and maximum three other words and <em>not</em> in the same
+ * sentence. X must precede y.
*
* <pre>
* List<DistanceConstraint> constraints = new
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanNextQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanNextQuery.java
index fb0d382..a728baa 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanNextQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanNextQuery.java
@@ -23,15 +23,12 @@
/**
* SpanNextQuery matches two spans which are directly next to each
- * other. It is
- * identical to a phrase query with exactly two clauses.
+ * other. It is identical to a phrase query with exactly two clauses.
*
* In the example below, the SpanNextQuery retrieves {@link NextSpans}
- * starting
- * from the start position of {@link TermSpans} "turn" and ending at
- * the end
- * position of {@link TermSpans} "off" occurring immediately after the
- * {@link TermSpans} "turn".
+ * starting from the start position of {@link TermSpans} "turn" and
+ * ending at the end position of {@link TermSpans} "off" occurring
+ * immediately after the {@link TermSpans} "turn".
*
* <pre>
* SpanNextQuery sq = new SpanNextQuery(
@@ -61,7 +58,7 @@
*/
public SpanNextQuery (SpanQuery firstClause, SpanQuery secondClause) {
this(firstClause, secondClause, true);
- };
+ }
/**
@@ -84,7 +81,7 @@
public SpanNextQuery (SpanQuery firstClause, SpanQuery secondClause,
boolean collectPayloads) {
super(firstClause, secondClause, collectPayloads);
- };
+ }
@Override
@@ -92,7 +89,7 @@
public Spans getSpans (final LeafReaderContext context, Bits acceptDocs,
Map<Term, TermContext> termContexts) throws IOException {
return (Spans) new NextSpans(this, context, acceptDocs, termContexts);
- };
+ }
@Override
@@ -102,7 +99,7 @@
(SpanQuery) secondClause.clone(), collectPayloads);
spanNextQuery.setBoost(getBoost());
return spanNextQuery;
- };
+ }
/*
@@ -133,7 +130,7 @@
return clone;
return this;
- };
+ }
@Override
@@ -167,10 +164,9 @@
return false;
return getBoost() == spanNextQuery.getBoost();
- };
+ }
- // I don't know what I am doing here
@Override
public int hashCode () {
int result;
@@ -178,5 +174,5 @@
result ^= (result << 31) | (result >>> 2); // reversible
result += Float.floatToRawIntBits(getBoost());
return result;
- };
-};
+ }
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanReferenceQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanReferenceQuery.java
index 241b6ce..c50bc78 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanReferenceQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanReferenceQuery.java
@@ -12,11 +12,40 @@
import de.ids_mannheim.korap.query.spans.ReferenceSpans;
+/**
+ * SpanReferenceQuery ensures that a span involving in more than one
+ * operations are indeed the same spans. Such a span is referred by a
+ * class and cannot be ensured in one nested SpanQuery.
+ *
+ * For instance in the following Annis query
+ *
+ * <pre>
+ * cat="vb" & cat="prp" & cat="nn" & #1 .{0,1} #2 & #1 .{0,2} #3
+ * & #3 -> #2
+ * </pre>
+ *
+ * cat="prp" is referred by a class with number 2 and involves in two
+ * operations. After resolving the first and second operations, class
+ * number 3 and 2 have to be referred at the same time to solve the
+ * third operation. However, only one class can be focused on from a
+ * span at one time. Let say, class number 3 is focused on from the
+ * resulting spans of the first and second operation, then it is
+ * matched with a new span enumeration of cat="prp" for the third
+ * operation.
+ *
+ * SpanReferenceQuery ensures that cat="prp" spans in the third
+ * operation are the same as the those in the first operation by
+ * matching their positions using the class number 2 payloads kept in
+ * spans focussing on the class number 3 (it keeps all the payloads
+ * from previous operations).
+ *
+ * @author margaretha
+ *
+ */
public class SpanReferenceQuery extends SimpleSpanQuery {
private byte classNum;
-
public SpanReferenceQuery (SpanQuery firstClause, byte classNum,
boolean collectPayloads) {
super(firstClause, collectPayloads);
@@ -26,7 +55,6 @@
@Override
public SimpleSpanQuery clone () {
- // TODO Auto-generated method stub
return null;
}
@@ -34,7 +62,6 @@
@Override
public Spans getSpans (LeafReaderContext context, Bits acceptDocs,
Map<Term, TermContext> termContexts) throws IOException {
- // TODO Auto-generated method stub
return new ReferenceSpans(this, context, acceptDocs, termContexts);
}
@@ -51,11 +78,18 @@
}
+ /** Get the class number of the referred spans.
+ * @return the class number of the referred spans
+ */
public byte getClassNum () {
return classNum;
}
+ /** Set the class number of the referred spans.
+ *
+ * @param classNum the class number of the referred spans
+ */
public void setClassNum (byte classNum) {
this.classNum = classNum;
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanRelationMatchQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanRelationMatchQuery.java
index 311a5fc..de77ef4 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanRelationMatchQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanRelationMatchQuery.java
@@ -12,6 +12,13 @@
import de.ids_mannheim.korap.query.spans.FocusSpans;
+/**
+ * Matches the source and/or target of a SpanRelationQuery to specific
+ * SpanQueries.
+ *
+ * @author margaretha
+ *
+ */
public class SpanRelationMatchQuery extends SimpleSpanQuery {
private SpanQuery operandQuery;
@@ -19,10 +26,25 @@
private SpanRelationQuery relationQuery;
+ /**
+ * Matches the left node of the given relation with the given
+ * SpanQuery.
+ *
+ * @param relation
+ * a SpanRelationQuery
+ * @param spanQuery
+ * a SpanQuery
+ * @param collectPayloads
+ * a boolean flag representing the value
+ * <code>true</code> if payloads are to be collected,
+ * otherwise
+ * <code>false</code>.
+ */
public SpanRelationMatchQuery (SpanRelationQuery relation,
- SpanQuery operand, boolean collectPayloads) {
+ SpanQuery spanQuery,
+ boolean collectPayloads) {
- checkVariables(relation, operand);
+ checkArguments(relation, spanQuery);
SpanFocusQuery sq = new SpanFocusQuery(
new SpanSegmentQuery(relationQuery, operandQuery, true),
relation.getTempClassNumbers());
@@ -36,10 +58,26 @@
}
+ /**
+ * Matches both the source and target of the given relations with
+ * the given operands.
+ *
+ * @param relation
+ * a SpanRelationQuery
+ * @param source
+ * a SpanQuery
+ * @param target
+ * a SpanQuery
+ * @param collectPayloads
+ * a boolean flag representing the value
+ * <code>true</code> if payloads are to be collected,
+ * otherwise
+ * <code>false</code>.
+ */
public SpanRelationMatchQuery (SpanRelationQuery relation, SpanQuery source,
SpanQuery target, boolean collectPayloads) {
- checkVariables(relation, source, target);
+ checkArguments(relation, source, target);
SpanFocusQuery sq = null;
SpanFocusQuery sq2 = null;
// match source and then target
@@ -76,30 +114,53 @@
}
- public void checkVariables (SpanRelationQuery relation, SpanQuery operand) {
+ /**
+ * Checks if the SpanRelationQuery and the SpanQuery are not null
+ * and if the SpanQuery has the same field as the
+ * SpanRelationQuery.
+ *
+ * @param relation
+ * SpanRelationQery
+ * @param spanQuery
+ * SpanQuery
+ */
+ public void checkArguments (SpanRelationQuery relation,
+ SpanQuery spanQuery) {
if (relation == null) {
throw new IllegalArgumentException(
"The relation query cannot be null.");
}
- if (operand == null) {
+ if (spanQuery == null) {
throw new IllegalArgumentException(
"The operand query cannot be null.");
}
this.field = relation.getField();
- if (!operand.getField().equals(field)) {
+ if (!spanQuery.getField().equals(field)) {
throw new IllegalArgumentException(
"Clauses must have the same field.");
}
this.relationQuery = relation;
- this.operandQuery = operand;
+ this.operandQuery = spanQuery;
}
- public void checkVariables (SpanRelationQuery relation, SpanQuery operand,
+ /**
+ * Checks if the SpanRelationQuery and the source and target
+ * SpanQuery are not null and if the SpanQueries have the same
+ * field as the SpanRelationQuery.
+ *
+ * @param relation
+ * SpanRelationQery
+ * @param source
+ * SpanQuery
+ * @param target
+ * SpanQuery
+ */
+ public void checkArguments (SpanRelationQuery relation, SpanQuery source,
SpanQuery target) {
- checkVariables(relation, operand);
+ checkArguments(relation, source);
if (target == null) {
- if (operand == null) {
+ if (source == null) {
throw new IllegalArgumentException(
"The target query cannot be null.");
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanRelationQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanRelationQuery.java
index 094e457..ea71d63 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanRelationQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanRelationQuery.java
@@ -18,12 +18,10 @@
/**
* SpanRelationQuery retrieves spans representing a relation between
- * tokens,
- * elements, or a-token-and-an-element. Relation are marked with
- * prefix "<" or
- * ">". The direction of the angle bracket represents the direction of
- * the
- * corresponding relation. By default, the relation is set ">".
+ * tokens, elements, or a-token-and-an-element. Relation are marked
+ * with prefix "<" or ">". The direction of the angle bracket
+ * represents the direction of the corresponding relation. By default,
+ * the relation is set ">".
* <br/><br/>
*
* This class provides two types of query:
@@ -38,11 +36,10 @@
* </pre>
* </li>
* <li>querying relations matching a certain type of sources/targets,
- * that are the
- * left or the right sides of the relations. This query is used within
- * {@link SpanRelationPartQuery}, for instance, to retrieve all
- * dependency relations
- * "<:xip/syntax-dep_rel" whose sources (right side) are noun phrases.
+ * that are the left or the right sides of the relations. This query
+ * is used within {@link SpanRelationPartQuery}, for instance, to
+ * retrieve all dependency relations "<:xip/syntax-dep_rel" whose
+ * sources (right side) are noun phrases.
* <pre>
* SpanRelationPartQuery rv =
* new SpanRelationPartQuery(sq, new SpanElementQuery("tokens","np"),
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanRepetitionQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanRepetitionQuery.java
index c169e4b..fa7f5aa 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanRepetitionQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanRepetitionQuery.java
@@ -15,18 +15,15 @@
/**
* SpanRepetitionQuery means that the given SpanQuery must appears
- * multiple
- * times in a sequence. The number of repetition depends on the
- * minimum and the
- * maximum number parameters. <br />
+ * multiple times in a sequence. The number of repetition depends on
+ * the minimum and the maximum number parameters. <br />
* <br />
*
* In the example below, SpanRepetitionQuery retrieves
* {@link RepetitionSpans} consisting of the TermSpans "tt:p/ADJ" that
- * must appear at least once or
- * consecutively two times. What appears after the RepetitionSpans is
- * not
- * considered, so it is possible that it is another "tt:p/ADJ". <br />
+ * must appear at least once or consecutively two times. What appears
+ * after the RepetitionSpans is not considered, so it is possible that
+ * it is another "tt:p/ADJ". <br />
* <br />
*
* <pre>
@@ -52,7 +49,6 @@
private int min, max;
-
/**
* Constructs a SpanRepetitionQuery for the given
* {@link SpanQuery}.
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanSegmentQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanSegmentQuery.java
index 9e36c40..dc7c81d 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanSegmentQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanSegmentQuery.java
@@ -117,7 +117,7 @@
return false;
return getBoost() == spanSegmentQuery.getBoost();
- };
+ }
@Override
@@ -137,6 +137,5 @@
public void setRelation (boolean isRelation) {
this.isRelation = isRelation;
- };
-
+ }
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanSubspanQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanSubspanQuery.java
index 5d31352..82792f2 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanSubspanQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanSubspanQuery.java
@@ -14,12 +14,9 @@
/**
* This query extracts a subspan from another span. The subspan starts
- * from a
- * startOffset until startOffset + length. A positive startOffset is
- * counted
- * from the start of the span, while a negative startOffset is
- * calculated from
- * the end of the span. <br />
+ * from a startOffset until startOffset + length. A positive
+ * startOffset is counted from the start of the span, while a negative
+ * startOffset is calculated from the end of the span. <br />
* <br />
* SpanSubspanQuery takes a SpanQuery as its input and creates
* subspans from the
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanTermWithIdQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanTermWithIdQuery.java
index 51ae8d1..33cfdfb 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanTermWithIdQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanTermWithIdQuery.java
@@ -14,11 +14,9 @@
/**
* SpanTermWithIdQuery wraps a SpanTermQuery retrieving TermSpans and
- * add a
- * spanid to the TermSpans. It is used in other spanqueries requiring
- * spans with
- * id as their child spans, for example in span relation with variable
- * query ( {@link SpanRelationPartQuery}).
+ * add a spanid to the TermSpans. It is used in other spanqueries
+ * requiring spans with id as their child spans, for example in span
+ * relation with variable query ( {@link SpanRelationPartQuery}).
*
* <pre>
* SpanTermWithIdQuery sq = new SpanTermWithIdQuery(new
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanWithinQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanWithinQuery.java
index e658046..fb16ff9 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanWithinQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanWithinQuery.java
@@ -2,27 +2,20 @@
// Based on SpanNearQuery
import java.io.IOException;
-
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Iterator;
import java.util.Map;
import java.util.Set;
-import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
-import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.Spans;
-import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
import de.ids_mannheim.korap.query.spans.WithinSpans;
-import de.ids_mannheim.korap.query.SpanElementQuery;
/**
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java
index ab0576f..5ee5e75 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java
@@ -19,28 +19,22 @@
import de.ids_mannheim.korap.query.SpanAttributeQuery;
/**
- * UPDATE THIS!
* Span enumeration of attributes which are term spans with special
- * payload
- * assignments referring to another span (e.g. element/relation span)
- * to which
- * an attribute span belongs. The class is basically a wrapper of
- * Lucene {@link TermSpans} with additional functionality regarding
- * element/relation
+ * payload assignments referring to another span (e.g.
+ * element/relation span) to which an attribute span belongs. The
+ * class is basically a wrapper of Lucene {@link TermSpans} with
+ * additional functionality regarding element/relation
* reference. Element/relation id is annotated ascendingly starting
- * from the
- * left side. <br/>
- * <br/>
+ * from the left side.
+ * <br/><br/>
* The enumeration is ordered firstly by the start position of the
- * attribute and
- * secondly by the element/relation id descendingly. This order helps
- * to match
- * element and attributes faster.
+ * attribute and secondly by the element/relation id descendingly.
+ * This order helps to match element and attributes faster.
*
- * AttributeSpans contain information about the elements they belongs
- * to, thus
- * querying them alone is sufficient to get
- * "any element having a specific attribute".
+ * AttributeSpans have the same start and end positions of the
+ * element/relations they belongs to, thus querying them alone
+ * is sufficient to get "any element having a specific
+ * attribute".
*
* @author margaretha
*/
@@ -59,6 +53,11 @@
private PayloadTypeIdentifier (int value) {
this.value = value;
}
+
+
+ public int getValue () {
+ return value;
+ }
}
protected Logger logger = LoggerFactory.getLogger(AttributeSpans.class);
@@ -101,8 +100,7 @@
/**
* Moves to the next match by checking the candidate match list or
- * setting
- * the list first when it is empty.
+ * setting the list first when it is empty.
*
* @return true if a match is found
* @throws IOException
@@ -132,10 +130,9 @@
/**
* Collects all the attributes in the same start position and sort
- * them by
- * element/relation Id in a reverse order (the ones with the
- * bigger
- * element/relation Id first).
+ * them by element/relation Id in a reverse order (the ones with
+ * the
+ * bigger element/relation Id first).
*
* @throws IOException
*/
@@ -155,8 +152,7 @@
/**
* Creates a CandidateAttributeSpan based on the child span and
- * set the
- * spanId and elementEnd from its payloads.
+ * set the spanId and elementEnd from its payloads.
*
* @param firstSpans
* an AttributeSpans
@@ -169,21 +165,10 @@
byte payloadTypeIdentifier = payloadBuffer.get(0);
short spanId = payloadBuffer.getShort(5);
- // if (payload.get(0).length == 6) {
int end = payloadBuffer.getInt(1);
return new CandidateAttributeSpan(firstSpans, payloadTypeIdentifier,
spanId, end);
-
- // }
- // else if (payload.get(0).length == 10) {
- // start = wrapper.getInt(0);
- // end = wrapper.getInt(4);
- // spanId = wrapper.getShort(8);
- // return new CandidateAttributeSpan(firstSpans, spanId, start, end);
- // }
-
- // throw new NullPointerException("Missing element end in payloads.");
}
@@ -234,14 +219,11 @@
/**
* CandidateAttributeSpan contains information about an Attribute
- * span. All
- * attribute spans occurring in an identical position are
- * collected as
- * CandidateAttributeSpans. The list of these
- * CandidateAttributeSpans are
- * sorted based on the span ids to which the attributes belong to.
- * The
- * attributes with smaller spanIds come first on the list.
+ * span. All attribute spans occurring in an identical position
+ * are collected as CandidateAttributeSpans. The list of these
+ * CandidateAttributeSpans are sorted based on the span ids to
+ * which the attributes belong to. The attributes with smaller
+ * spanIds come first on the list.
*
*/
class CandidateAttributeSpan extends CandidateSpan
@@ -277,14 +259,6 @@
}
- // public CandidateAttributeSpan (Spans span, short spanId, int start,
- // int end) throws IOException {
- // super(span);
- // setSpanId(spanId);
- // this.start = start;
- // this.end = end;
- // }
-
@Override
public int compareTo (CandidateSpan o) {
CandidateAttributeSpan cs = (CandidateAttributeSpan) o;
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpanComparator.java b/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpanComparator.java
index c351891..bc50e6d 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpanComparator.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpanComparator.java
@@ -2,6 +2,14 @@
import java.util.Comparator;
+/**
+ * Compares the positions of two CandidateSpans. The CandidateSpan
+ * with lower document number, start position and end position is
+ * ordered before the other CandidateSpan.
+ *
+ * @author margaretha
+ *
+ */
public class CandidateSpanComparator implements Comparator<CandidateSpan> {
@Override
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/DistanceSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/DistanceSpans.java
index 286f91c..d0ac492 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/DistanceSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/DistanceSpans.java
@@ -7,20 +7,16 @@
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.util.Bits;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import de.ids_mannheim.korap.query.SpanDistanceQuery;
import de.ids_mannheim.korap.query.SpanMultipleDistanceQuery;
/**
* DistanceSpan is a base class for enumeration of span matches, whose
- * two child
- * spans have a specific range of distance (within a min and a max
- * distance) and
- * other constraints (i.e. order and co-occurrence) depending on the
- * {@link SpanDistanceQuery}. All distance related spans extends this
- * class.
+ * two child spans have a specific range of distance (within a min and
+ * a max distance) and other constraints (i.e. order and
+ * co-occurrence) depending on the {@link SpanDistanceQuery}. All
+ * distance related spans extends this class.
*
* @see DistanceExclusionSpans
* @see ElementDistanceExclusionSpans
@@ -33,7 +29,6 @@
public abstract class DistanceSpans extends SimpleSpans {
protected CandidateSpan matchFirstSpan, matchSecondSpan;
- protected Logger log = LoggerFactory.getLogger(DistanceSpans.class);
protected boolean exclusion; // for MultipleDistanceQuery
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
index dcd59d3..4dd9247 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
@@ -2,7 +2,6 @@
import java.io.IOException;
import java.nio.ByteBuffer;
-import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
@@ -21,8 +20,7 @@
/**
* Enumeration of spans representing elements such as phrases,
- * sentences and
- * paragraphs. Span length is stored as a payload.
+ * sentences and paragraphs. Span length is stored as a payload.
*
* Depth and certainty value payloads have not been loaded and handled
* yet.
@@ -38,8 +36,6 @@
// This advices the java compiler to ignore all loggings
public static final boolean DEBUG = false;
- private byte[] b = new byte[10];
-
public static enum PayloadTypeIdentifier {
ELEMENT(64),
// ELEMENT_WITH_TUI(65),
@@ -49,7 +45,6 @@
private byte value;
-
private PayloadTypeIdentifier (int value) {
this.value = (byte) value;
}
@@ -142,9 +137,9 @@
this.payloadTypeIdentifier = bb.get(0);
- if (payloadTypeIdentifier != PayloadTypeIdentifier.MILESTONE.value) {
- this.matchEndPosition = bb.getInt(9);
- };
+ if (payloadTypeIdentifier != PayloadTypeIdentifier.MILESTONE.value) {
+ this.matchEndPosition = bb.getInt(9);
+ };
if (payloadTypeIdentifier == PayloadTypeIdentifier.ELEMENT.value
&& length > 15) {
@@ -164,7 +159,7 @@
return;
}
- this.matchEndPosition = this.matchStartPosition;
+ this.matchEndPosition = this.matchStartPosition;
this.setSpanId((short) -1);
this.hasSpanId = false;
this.matchPayload = null;
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/FocusSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/FocusSpans.java
index 84e5da9..51bf64f 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/FocusSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/FocusSpans.java
@@ -3,7 +3,6 @@
import static de.ids_mannheim.korap.util.KrillByte.byte2int;
import java.io.IOException;
-import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
@@ -14,8 +13,6 @@
import org.apache.lucene.index.TermState;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.util.Bits;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import de.ids_mannheim.korap.query.SpanFocusQuery;
@@ -47,7 +44,6 @@
public class FocusSpans extends SimpleSpans {
private List<Byte> classNumbers;
private SpanQuery query;
- private final Logger log = LoggerFactory.getLogger(FocusSpans.class);
// This advices the java compiler to ignore all loggings
public static final boolean DEBUG = false;
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java
index 75dd930..96eeceb 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java
@@ -10,8 +10,6 @@
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.util.Bits;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import de.ids_mannheim.korap.query.SpanNextQuery;
@@ -33,9 +31,6 @@
private int candidateListDocNum;
private boolean hasMoreFirstSpan;
- private Logger log = LoggerFactory.getLogger(NextSpans.class);
-
-
/**
* Constructs NextSpans for the given {@link SpanNextQuery}.
*
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/NonPartialOverlappingSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/NonPartialOverlappingSpans.java
index 4464d90..8e87f29 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/NonPartialOverlappingSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/NonPartialOverlappingSpans.java
@@ -8,24 +8,17 @@
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.util.Bits;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import de.ids_mannheim.korap.query.SimpleSpanQuery;
/**
* An abstract class for Span enumeration whose two child spans are
- * matched by
- * their positions and do not have a partial overlap.
+ * matched by their positions and do not have a partial overlap.
*
* @author margaretha
*/
public abstract class NonPartialOverlappingSpans extends SimpleSpans {
- private Logger log = LoggerFactory
- .getLogger(NonPartialOverlappingSpans.class);
-
-
/**
* Constructs NonPartialOverlappingSpans from the given
* {@link SimpleSpanQuery}.
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ReferenceSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ReferenceSpans.java
index 9ab35e1..a78094d 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ReferenceSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ReferenceSpans.java
@@ -12,6 +12,14 @@
import de.ids_mannheim.korap.query.SpanReferenceQuery;
+/**
+ * Resolves a reference to some class by searching for the class
+ * payload
+ * in the payloads of the subspans (firstspans).
+ *
+ * @author margaretha
+ *
+ */
public class ReferenceSpans extends SimpleSpans {
private byte classNum;
@@ -42,6 +50,16 @@
}
+ /**
+ * Iterates over the payloads of the firstspans and looks for a
+ * payload having the same class number as referenced spans. If
+ * there are more than one payloads with the same class number,
+ * but different start and end positions, the method will return
+ * false.
+ *
+ * @return true, if such a payload is found, false, otherwise.
+ * @throws IOException
+ */
private boolean hasSameClassPosition () throws IOException {
int start = 0, end = 0;
boolean isFound = false;
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/RelationBaseSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/RelationBaseSpans.java
index b00dc43..6067d9d 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/RelationBaseSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/RelationBaseSpans.java
@@ -14,12 +14,10 @@
/**
* RelationBaseSpans is a base class for relation spans containing
- * properties
- * about the start and end positions of right side of the relation. It
- * can also
- * store information about the id of the left/right side, for
- * instance, when it
- * is an element or another relation.
+ * properties about the start and end positions of right side of the
+ * relation. It can also store information about the id of the
+ * left/right side, for instance, when it is an element or another
+ * relation.
*
* @author margaretha
*
@@ -33,7 +31,7 @@
public RelationBaseSpans () {
this.hasSpanId = true;
- };
+ }
/**
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/RepetitionSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/RepetitionSpans.java
index cedd25a..22314e4 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/RepetitionSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/RepetitionSpans.java
@@ -20,8 +20,7 @@
/**
* Enumeration of spans occurring multiple times in a sequence. The
- * number of
- * repetition depends on the min and max parameters.
+ * number of repetition depends on the min and max parameters.
*
* @author margaretha
*/
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/SegmentSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/SegmentSpans.java
index 96b699f..8375b28 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/SegmentSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/SegmentSpans.java
@@ -13,8 +13,7 @@
/**
* SegmentSpans is an enumeration of Span matches in which that two
- * child spans
- * have exactly the same start and end positions.
+ * child spans have exactly the same start and end positions.
*
* @author margaretha
*/
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java
index 7917eb5..2bf24de 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java
@@ -11,23 +11,25 @@
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import de.ids_mannheim.korap.query.SimpleSpanQuery;
/**
* An abstract class for Span enumeration including span match
- * properties
- * and basic methods.
+ * properties and basic methods.
*
* @author margaretha
*/
public abstract class SimpleSpans extends Spans {
+ protected final Logger log = LoggerFactory.getLogger(SimpleSpans.class);
private SimpleSpanQuery query;
protected boolean isStartEnumeration;
protected boolean collectPayloads;
protected boolean hasMoreSpans;
- // Warning: enumeration of Spans
+ // Enumeration of Spans
protected Spans firstSpans, secondSpans;
protected int matchDocNumber, matchStartPosition, matchEndPosition;
@@ -46,7 +48,7 @@
matchEndPosition = -1;
matchPayload = new ArrayList<byte[]>();
isStartEnumeration = true;
- };
+ }
public SimpleSpans (SimpleSpanQuery simpleSpanQuery,
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedElementDistanceSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedElementDistanceSpans.java
index 6850ab4..24e277b 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedElementDistanceSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedElementDistanceSpans.java
@@ -16,14 +16,10 @@
/**
* Enumeration of span matches, whose two child spans have a specific
- * range of
- * distance (within a min and a max distance) and can be in any order.
- * The unit
- * distance is an element, which can be a sentence or a paragraph for
- * instance.
- * The distance is the difference between the positions of elements
- * containing
- * the spans.
+ * range of distance (within a min and a max distance) and can be in
+ * any order. The unit distance is an element, which can be a sentence
+ * or a paragraph for instance. The distance is the difference between
+ * the positions of elements containing the spans.
*
* @author margaretha
*/
diff --git a/src/main/java/de/ids_mannheim/korap/server/Node.java b/src/main/java/de/ids_mannheim/korap/server/Node.java
index 058cf33..9291d8b 100644
--- a/src/main/java/de/ids_mannheim/korap/server/Node.java
+++ b/src/main/java/de/ids_mannheim/korap/server/Node.java
@@ -20,6 +20,8 @@
import java.beans.PropertyVetoException;
import de.ids_mannheim.korap.KrillIndex;
+import de.ids_mannheim.korap.util.KrillProperties;
+
import org.apache.lucene.store.MMapDirectory;
import static de.ids_mannheim.korap.util.KrillProperties.*;
@@ -96,7 +98,7 @@
};
};
- Properties prop = loadProperties(propFile);
+ Properties prop = KrillProperties.loadProperties(propFile);
// Node properties
if (path != null && path.equals(":memory:")) {
diff --git a/src/main/java/de/ids_mannheim/korap/util/KrillProperties.java b/src/main/java/de/ids_mannheim/korap/util/KrillProperties.java
index 8d2f466..1ba261c 100644
--- a/src/main/java/de/ids_mannheim/korap/util/KrillProperties.java
+++ b/src/main/java/de/ids_mannheim/korap/util/KrillProperties.java
@@ -2,27 +2,33 @@
import java.util.*;
import java.io.*;
+
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import de.ids_mannheim.korap.Krill;
-// Todo: Properties may be loaded twice - althogh Java may cache automatically
+/**
+ *
+ * Todo: Properties may be loaded twice - although Java may cache automatically
+ *
+ * @author diewald, margaretha
+ *
+ */
public class KrillProperties {
- public static String propStr = "krill.properties";
- private static String infoStr = "krill.info";
+ public static final String defaultPropertiesLocation = "krill.properties";
+ public static final String defaultInfoLocation = "krill.info";
private static Properties prop, info;
// Logger
- private final static Logger log = LoggerFactory.getLogger(Krill.class);
-
+ private final static Logger log = LoggerFactory
+ .getLogger(KrillProperties.class);
// Load properties from file
- public static Properties loadProperties () {
+ public static Properties loadDefaultProperties () {
if (prop != null)
return prop;
- prop = loadProperties(propStr);
+ prop = loadProperties(defaultPropertiesLocation);
return prop;
};
@@ -30,22 +36,23 @@
// Load properties from file
public static Properties loadProperties (String propFile) {
if (propFile == null)
- return loadProperties();
+ return loadDefaultProperties();
InputStream iFile;
try {
iFile = new FileInputStream(propFile);
prop = new Properties();
prop.load(iFile);
+
}
catch (IOException t) {
try {
iFile = KrillProperties.class.getClassLoader()
.getResourceAsStream(propFile);
-
if (iFile == null) {
log.warn(
- "Cannot find {}. Please create it using \"{}.info\" as template.",
+ "Cannot find {}. Please create it using "
+ + "\"src/main/resources/krill.properties.info\" as template.",
propFile, propFile);
return null;
};
@@ -68,10 +75,10 @@
try {
info = new Properties();
InputStream iFile = KrillProperties.class.getClassLoader()
- .getResourceAsStream(infoStr);
+ .getResourceAsStream(defaultInfoLocation);
if (iFile == null) {
- log.error("Cannot find {}.", infoStr);
+ log.error("Cannot find {}.", defaultInfoLocation);
return null;
};
diff --git a/src/main/resources/log4j.properties b/src/main/resources/log4j.properties
index 1554002..2805e78 100644
--- a/src/main/resources/log4j.properties
+++ b/src/main/resources/log4j.properties
@@ -1,4 +1,4 @@
-log4j.rootLogger = DEBUG, stdout
+log4j.rootLogger = ERROR, stdout
# Queries:
# log4j.logger.de.ids_mannheim.korap.query.SpanNextQuery = TRACE, stdout
diff --git a/src/test/java/de/ids_mannheim/korap/TestIndexer.java b/src/test/java/de/ids_mannheim/korap/TestIndexer.java
new file mode 100644
index 0000000..ea2aea9
--- /dev/null
+++ b/src/test/java/de/ids_mannheim/korap/TestIndexer.java
@@ -0,0 +1,112 @@
+package de.ids_mannheim.korap;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintStream;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import de.ids_mannheim.korap.index.Indexer;
+
+/**
+ * @author margaretha
+ *
+ */
+public class TestIndexer {
+ private Logger logger = LoggerFactory.getLogger(TestIndexer.class);
+ private final ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
+ private String info = "usage: Krill indexer";
+ private File outputDirectory = new File("test-index");
+
+ @Test
+ public void testArguments () throws IOException {
+ Indexer.main(new String[] { "-c", "src/test/resources/krill.properties",
+ "-i", "src/test/resources/bzk" });
+ assertEquals("Indexed 1 file.", outputStream.toString());
+ }
+
+ @Test
+ public void testOutputArgument () throws IOException {
+ Indexer.main(new String[] { "-c", "src/test/resources/krill.properties",
+ "-i", "src/test/resources/bzk", "-o", "test-output"});
+ assertEquals("Indexed 1 file.", outputStream.toString());
+ }
+
+ @Test
+ public void testMultipleInputFiles () throws IOException {
+ Indexer.main(new String[] { "-c", "src/test/resources/krill.properties",
+ "-i", "src/test/resources/wiki" });
+ assertEquals("Indexed 14 files.", outputStream.toString());
+ }
+
+ @Test
+ public void testMultipleInputDirectories () throws IOException {
+ Indexer.main(new String[] { "-c", "src/test/resources/krill.properties",
+ "-i",
+ "src/test/resources/bzk;src/test/resources/goe;src/test/resources/sgbr",
+ "-o", "test-index" });
+ assertEquals("Indexed 3 files.", outputStream.toString());
+ }
+
+ @Test
+ public void testEmptyArgument () throws IOException {
+ Indexer.main(new String[] {});
+ logger.info(outputStream.toString());
+ assertEquals(true, outputStream.toString().startsWith(info));
+ }
+
+
+ @Test
+ public void testMissingConfig () throws IOException {
+ Indexer.main(new String[] { "-i", "src/test/resources/bzk",
+ "-o test-index" });
+ logger.info(outputStream.toString());
+ assertEquals(true, outputStream.toString().startsWith(info));
+ }
+
+ @Test
+ public void testMissingInput () throws IOException {
+ Indexer.main(new String[] { "-c", "src/test/resources/krill.properties",
+ "-o", "test-index" });
+ logger.info(outputStream.toString());
+ assertEquals(true, outputStream.toString().startsWith(info));
+ }
+
+ @Before
+ public void setOutputStream () {
+ System.setOut(new PrintStream(outputStream));
+ }
+
+ @After
+ public void cleanOutputStream () {
+ System.setOut(null);
+ }
+
+ @Before
+ public void cleanOutputDirectory () {
+
+ if (outputDirectory.exists()) {
+ logger.debug("Output directory exists");
+ deleteFile(outputDirectory);
+ }
+ }
+
+ private void deleteFile (File path) {
+ if (path.isDirectory()) {
+ File file;
+ for (String filename : path.list()) {
+ file = new File(path + "/" + filename);
+ deleteFile(file);
+ logger.debug(file.getAbsolutePath());
+ }
+ }
+ path.delete();
+ }
+}
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestReferenceIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestReferenceIndex.java
index fc4d57c..177dbc7 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestReferenceIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestReferenceIndex.java
@@ -35,40 +35,32 @@
ki.addDoc(TestRelationIndex.createFieldDoc2());
ki.commit();
- SpanTermQuery seq1 = new SpanTermQuery(new Term("base", "pos:V"));
- SpanElementQuery seq2 = new SpanElementQuery("base", "np");
- SpanClassQuery scq1 = new SpanClassQuery(seq1, (byte) 1);
- SpanClassQuery scq2 = new SpanClassQuery(seq2, (byte) 2);
+ SpanTermQuery stq = new SpanTermQuery(new Term("base", "pos:V"));
+ SpanElementQuery seq = new SpanElementQuery("base", "np");
+ SpanClassQuery scq1 = new SpanClassQuery(stq, (byte) 1);
+ SpanClassQuery scq2 = new SpanClassQuery(seq, (byte) 2);
SpanNextQuery snq1 = new SpanNextQuery(scq1, scq2);
SpanFocusQuery sfq1 = new SpanFocusQuery(snq1, (byte) 2);
SpanRelationQuery srq = new SpanRelationQuery(
new SpanTermQuery(new Term("base", "<:child-of")), true);
- // SpanSegmentQuery ssq = new SpanSegmentQuery(srq, sfq1,
- // true);
- // SpanFocusQuery sfq2 = new SpanFocusQuery(ssq, (byte) 1);
- // sfq2.setSorted(false);
- // sfq2.setMatchTemporaryClass(false);
- SpanElementQuery seq3 = new SpanElementQuery("base", "pp");
- SpanClassQuery scq3 = new SpanClassQuery(seq3, (byte) 3);
- // SpanSegmentQuery ssq2 = new SpanSegmentQuery(sfq2, scq3,
- // true);
+ SpanElementQuery seq2 = new SpanElementQuery("base", "pp");
+ SpanClassQuery scq3 = new SpanClassQuery(seq2, (byte) 3);
SpanRelationMatchQuery rq = new SpanRelationMatchQuery(srq, sfq1, scq3,
true);
-
- // System.out.println(rq.toString());
- SpanFocusQuery sfq3 = new SpanFocusQuery(rq, (byte) 1);
+
+ // focus on np
+ SpanFocusQuery sfq2 = new SpanFocusQuery(rq, (byte) 1);
DistanceConstraint constraint = new DistanceConstraint(3, 3, true,
false);
- SpanDistanceQuery sdq = new SpanDistanceQuery(sfq3, scq3, constraint,
+ SpanDistanceQuery sdq = new SpanDistanceQuery(sfq2, scq3, constraint,
true);
SpanReferenceQuery ref = new SpanReferenceQuery(sdq, (byte) 3, true);
- // System.out.println(ref.toString());
kr = ki.search(ref, (short) 10);
/*
@@ -78,7 +70,8 @@
* + " " + km.getSnippetBrackets()); }
* System.out.println(kr.getTotalResults());
*/
-
+
+ // cat=V & cat=np & cat=pp & #1 . #2 & #3 ->child-of #2 & #1 .{3,3} #3
assertEquals(
"spanReference(spanDistance(focus(1: focus(#[1,2]spanSegment("
+ "focus(#1: spanSegment(spanRelation(base:<:child-of), focus(2: spanNext("
@@ -100,7 +93,7 @@
SpanQueryWrapper sqwi = getJSONQuery(filepath);
SpanQuery sq = sqwi.toQuery();
- // cat="vb" & cat="prp" & cat="nn" & #1 .notordered #2 & #1
+ // cat="vb" & cat="prp" & cat="nn" & #1 .{0,1} #2 & #1
// .{0,2} #3 & #3 -> #2
assertEquals(
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java
index 63491a2..657fa33 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java
@@ -14,13 +14,11 @@
import de.ids_mannheim.korap.query.SpanClassQuery;
import de.ids_mannheim.korap.query.SpanElementQuery;
import de.ids_mannheim.korap.query.SpanFocusQuery;
-import de.ids_mannheim.korap.query.SpanNextQuery;
import de.ids_mannheim.korap.query.SpanRelationMatchQuery;
import de.ids_mannheim.korap.query.SpanRelationQuery;
import de.ids_mannheim.korap.query.SpanSegmentQuery;
import de.ids_mannheim.korap.query.SpanTermWithIdQuery;
import de.ids_mannheim.korap.query.SpanWithAttributeQuery;
-import de.ids_mannheim.korap.response.Match;
import de.ids_mannheim.korap.response.Result;
/*
diff --git a/src/test/java/de/ids_mannheim/korap/query/TestSpanRelationQueryJSON.java b/src/test/java/de/ids_mannheim/korap/query/TestSpanRelationQueryJSON.java
index 8be9a79..3122b4d 100644
--- a/src/test/java/de/ids_mannheim/korap/query/TestSpanRelationQueryJSON.java
+++ b/src/test/java/de/ids_mannheim/korap/query/TestSpanRelationQueryJSON.java
@@ -70,7 +70,7 @@
@Test
- public void testMatchBothRelationNodeWithAttribute ()
+ public void testMatchBothRelationNodesWithAttribute ()
throws QueryException {
String filepath = getClass()
.getResource(
diff --git a/src/test/resources/krill.properties b/src/test/resources/krill.properties
index f9cbc50..e14926b 100644
--- a/src/test/resources/krill.properties
+++ b/src/test/resources/krill.properties
@@ -1,2 +1,3 @@
krill.version = ${project.version}
krill.name = ${project.name}
+krill.indexDir = test-output
\ No newline at end of file