Added SpanSubspanQuery deserialization.
diff --git a/src/main/java/de/ids_mannheim/korap/KorapQuery.java b/src/main/java/de/ids_mannheim/korap/KorapQuery.java
index 96bd4ac..60dd0e0 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapQuery.java
@@ -1,43 +1,46 @@
package de.ids_mannheim.korap;
-import de.ids_mannheim.korap.query.wrap.*;
-import de.ids_mannheim.korap.response.Notifications;
-import de.ids_mannheim.korap.util.QueryException;
+import java.io.IOException;
-import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.util.automaton.RegExp;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.fasterxml.jackson.databind.JsonNode;
-
-import de.ids_mannheim.korap.query.SpanWithinQuery;
-
-import java.util.*;
-import java.io.*;
-
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import de.ids_mannheim.korap.query.SpanWithinQuery;
+import de.ids_mannheim.korap.query.wrap.SpanAlterQueryWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanClassQueryWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanElementQueryWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanMatchModifyQueryWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanRegexQueryWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanRepetitionQueryWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanSequenceQueryWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanSubspanQueryWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanWildcardQueryWrapper;
+import de.ids_mannheim.korap.query.wrap.SpanWithinQueryWrapper;
+import de.ids_mannheim.korap.response.Notifications;
+import de.ids_mannheim.korap.util.QueryException;
+
/*
- Todo: All queries with a final right expansion
- der alte []
- should be wrapped in a contains(<base/s=t>) to ensure
- they are not outside the text.
-
- TODO: Create Pre-filter while preparing a Query.
- The pre-filter will contain a boolena query with all
- necessary terms, supporting boolean OR, ignoring
- negation terms (and negation subqueries), like
- [base=Der]([base=alte]|[base=junge])[base=Mann & p!=ADJA]![base=war | base=lag]
- Search for all documents containing "s:Der" and ("s:alte" or "s:junge") and "s:Mann"
-
-*/
+ * Todo: All queries with a final right expansion der alte [] should be wrapped
+ * in a contains(<base/s=t>) to ensure they are not outside the text. TODO:
+ * Create Pre-filter while preparing a Query. The pre-filter will contain a
+ * boolena query with all necessary terms, supporting boolean OR, ignoring
+ * negation terms (and negation subqueries), like
+ * [base=Der]([base=alte]|[base=junge])[base=Mann & p!=ADJA]![base=war |
+ * base=lag] Search for all documents containing "s:Der" and ("s:alte" or
+ * "s:junge") and "s:Mann"
+ */
/**
* @author Nils Diewald
- *
- * KorapQuery implements a simple API for wrapping
- * KorAP Lucene Index specific query classes.
+ *
+ * KorapQuery implements a simple API for wrapping KorAP Lucene Index
+ * specific query classes.
*/
public class KorapQuery extends Notifications {
private String field;
@@ -49,72 +52,73 @@
// This advices the java compiler to ignore all loggings
public static final boolean DEBUG = false;
- public static final byte
- OVERLAP = SpanWithinQuery.OVERLAP,
- REAL_OVERLAP = SpanWithinQuery.REAL_OVERLAP,
- WITHIN = SpanWithinQuery.WITHIN,
- REAL_WITHIN = SpanWithinQuery.REAL_WITHIN,
- ENDSWITH = SpanWithinQuery.ENDSWITH,
- STARTSWITH = SpanWithinQuery.STARTSWITH,
- MATCH = SpanWithinQuery.MATCH;
+ public static final byte OVERLAP = SpanWithinQuery.OVERLAP,
+ REAL_OVERLAP = SpanWithinQuery.REAL_OVERLAP,
+ WITHIN = SpanWithinQuery.WITHIN,
+ REAL_WITHIN = SpanWithinQuery.REAL_WITHIN,
+ ENDSWITH = SpanWithinQuery.ENDSWITH,
+ STARTSWITH = SpanWithinQuery.STARTSWITH,
+ MATCH = SpanWithinQuery.MATCH;
private static final int MAX_CLASS_NUM = 255; // 127;
/**
* Constructs a new base object for query generation.
+ *
* @param field The specific index field for the query.
*/
- public KorapQuery (String field) {
- this.field = field;
- this.json = new ObjectMapper();
+ public KorapQuery(String field) {
+ this.field = field;
+ this.json = new ObjectMapper();
};
/**
* Private class for korap:boundary objects
*/
private class Boundary {
- public int min, max;
+ public int min, max;
- public Boundary (JsonNode json, int defaultMin, int defaultMax) throws QueryException {
+ public Boundary(JsonNode json, int defaultMin, int defaultMax)
+ throws QueryException {
- if (!json.has("@type"))
- throw new QueryException(701, "JSON-LD group has no @type attribute");
+ if (!json.has("@type"))
+ throw new QueryException(701,
+ "JSON-LD group has no @type attribute");
- if (!json.get("@type").asText().equals("korap:boundary"))
- throw new QueryException(702, "Boundary definition is invalid");
+ if (!json.get("@type").asText().equals("korap:boundary"))
+ throw new QueryException(702, "Boundary definition is invalid");
- // Set min boundary
- if (json.has("min"))
- this.min = json.get("min").asInt(defaultMin);
- else
- this.min = defaultMin;
+ // Set min boundary
+ if (json.has("min"))
+ this.min = json.get("min").asInt(defaultMin);
+ else
+ this.min = defaultMin;
- // Set max boundary
- if (json.has("max"))
- this.max = json.get("max").asInt(defaultMax);
- else
- this.max = defaultMax;
+ // Set max boundary
+ if (json.has("max"))
+ this.max = json.get("max").asInt(defaultMax);
+ else
+ this.max = defaultMax;
- if (DEBUG)
- log.trace("Found korap:boundary with {}:{}", min, max);
- };
+ if (DEBUG) log.trace("Found korap:boundary with {}:{}", min, max);
+ };
};
- public SpanQueryWrapper fromJson (String jsonString) throws QueryException {
- JsonNode json;
- try {
- json = this.json.readValue(jsonString, JsonNode.class);
- }
- catch (IOException e) {
- String msg = e.getMessage();
- log.warn("Unable to parse JSON: " + msg.split("\n")[0]);
- throw new QueryException(621, "Unable to parse JSON");
- };
+ public SpanQueryWrapper fromJson(String jsonString) throws QueryException {
+ JsonNode json;
+ try {
+ json = this.json.readValue(jsonString, JsonNode.class);
+ }
+ catch (IOException e) {
+ String msg = e.getMessage();
+ log.warn("Unable to parse JSON: " + msg.split("\n")[0]);
+ throw new QueryException(621, "Unable to parse JSON");
+ }
+ ;
- if (!json.has("@type") && json.has("query"))
- json = json.get("query");
+ if (!json.has("@type") && json.has("query")) json = json.get("query");
- return this.fromJson(json);
+ return this.fromJson(json);
};
// http://fasterxml.github.io/jackson-databind/javadoc/2.2.0/com/fasterxml/jackson/databind/JsonNode.html
@@ -122,887 +126,919 @@
// TODO: Use the shortcuts implemented in this class instead of the wrapper constructors
// TODO: Check for isArray()
// TODO: Rename this span context!
- public SpanQueryWrapper fromJson (JsonNode json) throws QueryException {
+ public SpanQueryWrapper fromJson(JsonNode json) throws QueryException {
- int number = 0;
+ int number = 0;
- if (!json.has("@type"))
- throw new QueryException(701, "JSON-LD group has no @type attribute");
+ if (!json.has("@type"))
+ throw new QueryException(701,
+ "JSON-LD group has no @type attribute");
- String type = json.get("@type").asText();
+ String type = json.get("@type").asText();
- switch (type) {
+ switch (type) {
- case "korap:group":
- SpanClassQueryWrapper classWrapper;
+ case "korap:group":
+ SpanClassQueryWrapper classWrapper;
- if (!json.has("operation"))
- throw new QueryException(703, "Group expects operation");
+ if (!json.has("operation"))
+ throw new QueryException(703, "Group expects operation");
- String operation = json.get("operation").asText();
+ String operation = json.get("operation").asText();
- if (DEBUG)
- log.trace("Found {} group", operation);
+ if (DEBUG) log.trace("Found {} group", operation);
- if (!json.has("operands"))
- throw new QueryException(704, "Operation needs operand list");
+ if (!json.has("operands"))
+ throw new QueryException(704,
+ "Operation needs operand list");
- // Get all operands
- JsonNode operands = json.get("operands");
+ // Get all operands
+ JsonNode operands = json.get("operands");
- if (!operands.isArray())
- throw new QueryException(704, "Operation needs operand list");
+ if (!operands.isArray())
+ throw new QueryException(704,
+ "Operation needs operand list");
- if (DEBUG)
- log.trace("Operands are {}", operands);
+ if (DEBUG) log.trace("Operands are {}", operands);
- switch (operation) {
+ switch (operation) {
- case "operation:or":
- SpanAlterQueryWrapper ssaq = new SpanAlterQueryWrapper(this.field);
- for (JsonNode operand : operands) {
- ssaq.or(this.fromJson(operand));
- };
- return ssaq;
+ case "operation:or":
+ SpanAlterQueryWrapper ssaq = new SpanAlterQueryWrapper(
+ this.field);
+ for (JsonNode operand : operands) {
+ ssaq.or(this.fromJson(operand));
+ }
+ ;
+ return ssaq;
- case "operation:position":
+ case "operation:position":
- if (operands.size() != 2)
- throw new QueryException(705, "Number of operands is not acceptable");
+ if (operands.size() != 2)
+ throw new QueryException(705,
+ "Number of operands is not acceptable");
- // TODO: Check for operands
- // TODO: LEGACY and not future proof
- String frame = json.has("frame") ?
- json.get("frame").asText() :
- "frame:contains";
+ // TODO: Check for operands
+ // TODO: LEGACY and not future proof
+ String frame = json.has("frame") ? json.get("frame")
+ .asText() : "frame:contains";
- if (DEBUG)
- log.trace("Position frame is '{}'", frame);
+ if (DEBUG) log.trace("Position frame is '{}'", frame);
- byte flag = WITHIN;
- switch (frame) {
- case "frame:contains":
- break;
- case "frame:strictlyContains":
- flag = REAL_WITHIN;
- break;
- case "frame:within":
- break;
- case "frame:startswith":
- flag = STARTSWITH;
- break;
- case "frame:endswith":
- flag = ENDSWITH;
- break;
- case "frame:matches":
- flag = MATCH;
- break;
- case "frame:overlaps":
- flag = OVERLAP;
- break;
- case "frame:strictlyOverlaps":
- flag = REAL_OVERLAP;
- break;
- case "":
- // Temporary workaround for wrongly set overlaps
- if (json.has("frames")) {
- frame = json.get("frames").get(0).asText();
- if (frame.equals("frames:overlapsLeft") ||
- frame.equals("frames:overlapsRight")) {
- flag = OVERLAP;
- break;
- };
- };
- default:
- throw new QueryException(706, "Frame type is unknown");
- };
+ byte flag = WITHIN;
+ switch (frame) {
+ case "frame:contains":
+ break;
+ case "frame:strictlyContains":
+ flag = REAL_WITHIN;
+ break;
+ case "frame:within":
+ break;
+ case "frame:startswith":
+ flag = STARTSWITH;
+ break;
+ case "frame:endswith":
+ flag = ENDSWITH;
+ break;
+ case "frame:matches":
+ flag = MATCH;
+ break;
+ case "frame:overlaps":
+ flag = OVERLAP;
+ break;
+ case "frame:strictlyOverlaps":
+ flag = REAL_OVERLAP;
+ break;
+ case "":
+ // Temporary workaround for wrongly set overlaps
+ if (json.has("frames")) {
+ frame = json.get("frames").get(0).asText();
+ if (frame.equals("frames:overlapsLeft")
+ || frame.equals("frames:overlapsRight")) {
+ flag = OVERLAP;
+ break;
+ }
+ ;
+ }
+ ;
+ default:
+ throw new QueryException(706,
+ "Frame type is unknown");
+ }
+ ;
- // Check for exclusion modificator
- Boolean exclude;
- if (json.has("exclude") && json.get("exclude").asBoolean())
- throw new QueryException(
- 760,
- "Exclusion is currently not supported in position operations"
- );
+ // Check for exclusion modificator
+ Boolean exclude;
+ if (json.has("exclude")
+ && json.get("exclude").asBoolean())
+ throw new QueryException(760,
+ "Exclusion is currently not supported in position operations");
- return new SpanWithinQueryWrapper(
- this.fromJson(operands.get(0)),
- this.fromJson(operands.get(1)),
- flag
- );
+ return new SpanWithinQueryWrapper(
+ this.fromJson(operands.get(0)),
+ this.fromJson(operands.get(1)), flag);
- // TODO: This is DEPRECATED and should be communicated that way
- case "operation:submatch":
+ // TODO: This is DEPRECATED and should be communicated that way
+ case "operation:submatch":
- if (operands.size() != 1)
- throw new QueryException(705, "Number of operands is not acceptable");
+ if (operands.size() != 1)
+ throw new QueryException(705,
+ "Number of operands is not acceptable");
- if (json.has("classRef")) {
- if (json.has("classRefOp"))
- throw new QueryException(
- 761,
- "Class reference operators are currently not supported"
- );
+ if (json.has("classRef")) {
+ if (json.has("classRefOp"))
+ throw new QueryException(761,
+ "Class reference operators are currently not supported");
- number = json.get("classRef").get(0).asInt();
- }
- else if (json.has("spanRef")) {
- throw new QueryException(
- 762,
- "Span references are currently not supported"
- );
- };
+ number = json.get("classRef").get(0).asInt();
+ }
+ else if (json.has("spanRef")) {
+ throw new QueryException(762,
+ "Span references are currently not supported");
+ }
+ ;
- return new SpanMatchModifyQueryWrapper(
- this.fromJson(operands.get(0)), number
- );
+ return new SpanMatchModifyQueryWrapper(
+ this.fromJson(operands.get(0)), number);
- case "operation:sequence":
+ case "operation:sequence":
- // Sequence with only one operand
- if (operands.size() == 1)
- return this.fromJson(operands.get(0));
+ // Sequence with only one operand
+ if (operands.size() == 1)
+ return this.fromJson(operands.get(0));
- SpanSequenceQueryWrapper sseqqw = this.seq();
+ SpanSequenceQueryWrapper sseqqw = this.seq();
- // Say if the operand order is important
- if (json.has("inOrder"))
- sseqqw.setInOrder(json.get("inOrder").asBoolean());
+ // Say if the operand order is important
+ if (json.has("inOrder"))
+ sseqqw.setInOrder(json.get("inOrder").asBoolean());
- // Introduce distance constraints
- // ATTENTION: Distances have to be set before segments are added
- if (json.has("distances")) {
+ // Introduce distance constraints
+ // ATTENTION: Distances have to be set before segments are added
+ if (json.has("distances")) {
- // TODO
- if (json.has("exclude") && json.get("exclude").asBoolean())
- throw new QueryException(
- 763,
- "Excluding distance constraints are currently not supported"
- );
+ // TODO
+ if (json.has("exclude")
+ && json.get("exclude").asBoolean())
+ throw new QueryException(763,
+ "Excluding distance constraints are currently not supported");
- if (!json.get("distances").isArray()) {
- throw new QueryException(
- 707,
- "Distance Constraints have " +
- "to be defined as arrays"
- );
- };
+ if (!json.get("distances").isArray()) {
+ throw new QueryException(707,
+ "Distance Constraints have "
+ + "to be defined as arrays");
+ }
+ ;
- // TEMPORARY: Workaround for group distances
- JsonNode firstDistance = json.get("distances").get(0);
+ // TEMPORARY: Workaround for group distances
+ JsonNode firstDistance = json.get("distances").get(
+ 0);
- if (!firstDistance.has("@type"))
- throw new QueryException(701, "JSON-LD group has no @type attribute");
+ if (!firstDistance.has("@type"))
+ throw new QueryException(701,
+ "JSON-LD group has no @type attribute");
- JsonNode distances;
- if (firstDistance.get("@type").asText().equals("korap:group")) {
- if (!firstDistance.has("operands") ||
- !firstDistance.get("operands").isArray())
- throw new QueryException(704, "Operation needs operand list");
+ JsonNode distances;
+ if (firstDistance.get("@type").asText()
+ .equals("korap:group")) {
+ if (!firstDistance.has("operands")
+ || !firstDistance.get("operands")
+ .isArray())
+ throw new QueryException(704,
+ "Operation needs operand list");
- distances = firstDistance.get("operands");
- }
+ distances = firstDistance.get("operands");
+ }
- // Support korap distances
- // Support cosmas distances
- else if (
- firstDistance.get("@type").asText().equals("korap:distance")
- ||
- firstDistance.get("@type").asText().equals("cosmas:distance")) {
+ // Support korap distances
+ // Support cosmas distances
+ else if (firstDistance.get("@type").asText()
+ .equals("korap:distance")
+ || firstDistance.get("@type").asText()
+ .equals("cosmas:distance")) {
- distances = json.get("distances");
- }
+ distances = json.get("distances");
+ }
- else
- throw new QueryException(708, "No valid distances defined");
+ else
+ throw new QueryException(708,
+ "No valid distances defined");
- // Add all distance constraint to query
- for (JsonNode constraint : distances) {
- String unit = "w";
- if (constraint.has("key"))
- unit = constraint.get("key").asText();
+ // Add all distance constraint to query
+ for (JsonNode constraint : distances) {
+ String unit = "w";
+ if (constraint.has("key"))
+ unit = constraint.get("key").asText();
- // There is a maximum of 100 fix
- int min = 0, max = 100;
- if (constraint.has("boundary")) {
- Boundary b = new Boundary(constraint.get("boundary"), 0,100);
- min = b.min;
- max = b.max;
- }
- else {
- if (constraint.has("min"))
- min = constraint.get("min").asInt(0);
- if (constraint.has("max"))
- max = constraint.get("max").asInt(100);
- };
+ // There is a maximum of 100 fix
+ int min = 0, max = 100;
+ if (constraint.has("boundary")) {
+ Boundary b = new Boundary(
+ constraint.get("boundary"), 0, 100);
+ min = b.min;
+ max = b.max;
+ }
+ else {
+ if (constraint.has("min"))
+ min = constraint.get("min").asInt(0);
+ if (constraint.has("max"))
+ max = constraint.get("max").asInt(100);
+ }
+ ;
+
+ // Add foundry and layer to the unit for new indices
+ if (constraint.has("foundry")
+ && constraint.has("layer")
+ && constraint.get("foundry").asText()
+ .length() > 0
+ && constraint.get("layer").asText()
+ .length() > 0) {
- // Add foundry and layer to the unit for new indices
- if (constraint.has("foundry") &&
- constraint.has("layer") &&
- constraint.get("foundry").asText().length() > 0 &&
- constraint.get("layer").asText().length() > 0) {
+ StringBuilder value = new StringBuilder();
+ value.append(constraint.get("foundry")
+ .asText());
+ value.append('/');
+ value.append(constraint.get("layer")
+ .asText());
+ value.append(':').append(unit);
+ unit = value.toString();
+ }
+ ;
- StringBuilder value = new StringBuilder();
- value.append(constraint.get("foundry").asText());
- value.append('/');
- value.append(constraint.get("layer").asText());
- value.append(':').append(unit);
- unit = value.toString();
- };
+ // Sanitize boundary
+ if (max < min) max = min;
- // Sanitize boundary
- if (max < min)
- max = min;
+ if (DEBUG)
+ log.trace(
+ "Add distance constraint of '{}': {}-{}",
+ unit, min, max);
- if (DEBUG)
- log.trace("Add distance constraint of '{}': {}-{}",
- unit, min, max);
+ sseqqw.withConstraint(min, max, unit);
+ }
+ ;
+ }
+ ;
- sseqqw.withConstraint(min, max, unit);
- };
- };
+ // Add segments to sequence
+ for (JsonNode operand : operands) {
+ sseqqw.append(this.fromJson(operand));
+ }
+ ;
- // Add segments to sequence
- for (JsonNode operand : operands) {
- sseqqw.append(this.fromJson(operand));
- };
+ // inOrder was set to false without a distance constraint
+ if (!sseqqw.isInOrder() && !sseqqw.hasConstraints()) {
+ sseqqw.withConstraint(1, 1, "w");
+ }
+ ;
- // inOrder was set to false without a distance constraint
- if (!sseqqw.isInOrder() && !sseqqw.hasConstraints()) {
- sseqqw.withConstraint(1,1,"w");
- };
+ return sseqqw;
- return sseqqw;
+ case "operation:class":
+ number = 1;
- case "operation:class":
- number = 1;
+ if (json.has("classOut")) {
+ number = json.get("classOut").asInt(0);
+ }
+ // Legacy classes
+ else if (json.has("class")) {
+ number = json.get("class").asInt(0);
+ }
+ ;
- if (json.has("classOut")) {
- number = json.get("classOut").asInt(0);
- }
- // Legacy classes
- else if (json.has("class")) {
- number = json.get("class").asInt(0);
- };
+ if (json.has("classRefCheck"))
+ this.addWarning(
+ 764,
+ "Class reference checks are currently not supported - results may not be correct");
- if (json.has("classRefCheck"))
- this.addWarning(
- 764,
- "Class reference checks are currently not supported - results may not be correct"
- );
+ if (json.has("classRefOp"))
+ throw new QueryException(761,
+ "Class reference operators are currently not supported");
- if (json.has("classRefOp"))
- throw new QueryException(
- 761,
- "Class reference operators are currently not supported"
- );
+ if (number > 0) {
+ if (operands.size() != 1)
+ throw new QueryException(705,
+ "Number of operands is not acceptable");
- if (number > 0) {
- if (operands.size() != 1)
- throw new QueryException(
- 705,
- "Number of operands is not acceptable"
- );
+ if (DEBUG)
+ log.trace("Found Class definition for {}",
+ number);
- if (DEBUG)
- log.trace("Found Class definition for {}", number);
+ if (number > MAX_CLASS_NUM) {
+ throw new QueryException(709,
+ "Valid class numbers exceeded");
+ }
+ ;
- if (number > MAX_CLASS_NUM) {
- throw new QueryException(
- 709, "Valid class numbers exceeded"
- );
- };
+ SpanQueryWrapper sqw = this.fromJson(operands
+ .get(0));
- SpanQueryWrapper sqw = this.fromJson(operands.get(0));
+ // Problematic
+ if (sqw.maybeExtension())
+ return sqw.setClassNumber(number);
- // Problematic
- if (sqw.maybeExtension())
- return sqw.setClassNumber(number);
+ return new SpanClassQueryWrapper(sqw, number);
+ }
+ ;
- return new SpanClassQueryWrapper(sqw, number);
- };
+ throw new QueryException(710, "Class attribute missing");
- throw new QueryException(710, "Class attribute missing");
+ case "operation:repetition":
- case "operation:repetition":
+ if (operands.size() != 1)
+ throw new QueryException(705,
+ "Number of operands is not acceptable");
- if (operands.size() != 1)
- throw new QueryException(
- 705,
- "Number of operands is not acceptable"
- );
+ int min = 0;
+ int max = 100;
- int min = 0;
- int max = 100;
+ if (json.has("boundary")) {
+ Boundary b = new Boundary(json.get("boundary"), 0,
+ 100);
+ min = b.min;
+ max = b.max;
+ }
+ else {
+ if (json.has("min"))
+ min = json.get("min").asInt(0);
+ if (json.has("max"))
+ max = json.get("max").asInt(100);
- if (json.has("boundary")) {
- Boundary b = new Boundary(json.get("boundary"), 0, 100);
- min = b.min;
- max = b.max;
- }
- else {
- if (json.has("min"))
- min = json.get("min").asInt(0);
- if (json.has("max"))
- max = json.get("max").asInt(100);
+ if (DEBUG)
+ log.trace(
+ "Boundary is set by deprecated {}-{}",
+ min, max);
+ }
+ ;
- if (DEBUG)
- log.trace(
- "Boundary is set by deprecated {}-{}",
- min,
- max);
- };
+ // Sanitize max
+ if (max < 0)
+ max = 100;
+ else if (max > 100) max = 100;
- // Sanitize max
- if (max < 0)
- max = 100;
- else if (max > 100)
- max = 100;
+ // Sanitize min
+ if (min < 0)
+ min = 0;
+ else if (min > 100) min = 100;
- // Sanitize min
- if (min < 0)
- min = 0;
- else if (min > 100)
- min = 100;
-
- // Check relation between min and max
- if (min > max)
- max = max;
+ // Check relation between min and max
+ if (min > max) max = max;
- SpanQueryWrapper sqw = this.fromJson(operands.get(0));
+ SpanQueryWrapper sqw = this.fromJson(operands.get(0));
- if (sqw.maybeExtension())
- return sqw.setMin(min).setMax(max);
+ if (sqw.maybeExtension())
+ return sqw.setMin(min).setMax(max);
- return new SpanRepetitionQueryWrapper(sqw, min, max);
+ return new SpanRepetitionQueryWrapper(sqw, min, max);
- case "operation:relation":
- throw new QueryException(765, "Relations are currently not supported");
- };
+ case "operation:relation":
+ throw new QueryException(765,
+ "Relations are currently not supported");
+ }
+ ;
- throw new QueryException(711, "Unknown group operation");
+ throw new QueryException(711, "Unknown group operation");
- case "korap:reference":
- if (json.has("operation") &&
- !json.get("operation").asText().equals("operation:focus"))
- throw new QueryException(712, "Unknown reference operation");
+ case "korap:reference":
+ if (json.has("operation")
+ && !json.get("operation").asText()
+ .equals("operation:focus"))
+ throw new QueryException(712, "Unknown reference operation");
- if (!json.has("operands"))
- throw new QueryException(
- 766, "Peripheral references are currently not supported"
- );
+ if (!json.has("operands"))
+ throw new QueryException(766,
+ "Peripheral references are currently not supported");
- operands = json.get("operands");
+ operands = json.get("operands");
- if (!operands.isArray())
- throw new QueryException(704, "Operation needs operand list");
+ if (!operands.isArray())
+ throw new QueryException(704,
+ "Operation needs operand list");
- if (operands.size() == 0)
- throw new QueryException(704, "Operation needs operand list");
+ if (operands.size() == 0)
+ throw new QueryException(704,
+ "Operation needs operand list");
- if (operands.size() != 1)
- throw new QueryException(705, "Number of operands is not acceptable");
+ if (operands.size() != 1)
+ throw new QueryException(705,
+ "Number of operands is not acceptable");
- if (json.has("classRef")) {
- if (json.has("classRefOp")) {
- throw new QueryException(
- 761,
- "Class reference operators are currently not supported"
- );
- };
+ if (json.has("classRef")) {
+ if (json.has("classRefOp")) {
+ throw new QueryException(761,
+ "Class reference operators are currently not supported");
+ }
+ ;
- number = json.get("classRef").get(0).asInt();
+ number = json.get("classRef").get(0).asInt();
+ if (number > MAX_CLASS_NUM)
+ throw new QueryException(709,
+ "Valid class numbers exceeded");
+ }
+ else if (json.has("spanRef")) {
+ JsonNode spanRef = json.get("spanRef");
+ return new SpanSubspanQueryWrapper(
+ fromJson(operands.get(0)), spanRef.get(0).asInt(),
+ spanRef.get(1).asInt());
- if (number > MAX_CLASS_NUM)
- throw new QueryException(
- 709, "Valid class numbers exceeded"
- );
- }
- else if (json.has("spanRef")) {
- throw new QueryException(
- 762,
- "Span references are currently not supported"
- );
- };
+ // throw new QueryException(
+ // 762,
+ // "Span references are currently not supported"
+ // );
+ }
+ ;
- if (DEBUG)
- log.trace("Wrap class reference {}", number);
+ if (DEBUG) log.trace("Wrap class reference {}", number);
- return new SpanMatchModifyQueryWrapper(
- this.fromJson(operands.get(0)), number
- );
+ return new SpanMatchModifyQueryWrapper(this.fromJson(operands
+ .get(0)), number);
- case "korap:token":
+ case "korap:token":
- // The token is empty and should be treated like []
- if (!json.has("wrap"))
- return new SpanRepetitionQueryWrapper();
+ // The token is empty and should be treated like []
+ if (!json.has("wrap")) return new SpanRepetitionQueryWrapper();
- return this._segFromJson(json.get("wrap"));
+ return this._segFromJson(json.get("wrap"));
- case "korap:span":
- return this._termFromJson(json);
- };
- throw new QueryException(713, "Query type is not supported");
+ case "korap:span":
+ return this._termFromJson(json);
+ }
+ ;
+ throw new QueryException(713, "Query type is not supported");
};
+ private SpanQueryWrapper _segFromJson(JsonNode json) throws QueryException {
+ if (!json.has("@type"))
+ throw new QueryException(701,
+ "JSON-LD group has no @type attribute");
- private SpanQueryWrapper _segFromJson (JsonNode json) throws QueryException {
+ String type = json.get("@type").asText();
- if (!json.has("@type"))
- throw new QueryException(701, "JSON-LD group has no @type attribute");
+ if (DEBUG) log.trace("Wrap new token definition by {}", type);
- String type = json.get("@type").asText();
+ switch (type) {
- if (DEBUG)
- log.trace("Wrap new token definition by {}", type);
+ case "korap:term":
+ String match = "match:eq";
+ if (json.has("match")) match = json.get("match").asText();
- switch (type) {
+ switch (match) {
+ case "match:ne":
+ if (DEBUG) log.trace("Term is negated");
+ SpanSegmentQueryWrapper ssqw = (SpanSegmentQueryWrapper) this
+ ._termFromJson(json);
+ ssqw.makeNegative();
+ return this.seg().without(ssqw);
+ case "match:eq":
+ return this._termFromJson(json);
+ }
+ ;
- case "korap:term":
- String match = "match:eq";
- if (json.has("match"))
- match = json.get("match").asText();
+ throw new QueryException(741, "Match relation unknown");
- switch (match) {
- case "match:ne":
- if (DEBUG)
- log.trace("Term is negated");
- SpanSegmentQueryWrapper ssqw =
- (SpanSegmentQueryWrapper) this._termFromJson(json);
- ssqw.makeNegative();
- return this.seg().without(ssqw);
- case "match:eq":
- return this._termFromJson(json);
- };
+ case "korap:termGroup":
- throw new QueryException(741, "Match relation unknown");
+ if (!json.has("operands"))
+ throw new QueryException(742,
+ "Term group needs operand list");
- case "korap:termGroup":
+ // Get operands
+ JsonNode operands = json.get("operands");
- if (!json.has("operands"))
- throw new QueryException(742, "Term group needs operand list");
+ SpanSegmentQueryWrapper ssegqw = this.seg();
- // Get operands
- JsonNode operands = json.get("operands");
+ if (!json.has("relation"))
+ throw new QueryException(743,
+ "Term group expects a relation");
- SpanSegmentQueryWrapper ssegqw = this.seg();
+ switch (json.get("relation").asText()) {
+ case "relation:and":
- if (!json.has("relation"))
- throw new QueryException(743, "Term group expects a relation");
+ for (JsonNode operand : operands) {
+ SpanQueryWrapper part = this._segFromJson(operand);
+ if (part instanceof SpanAlterQueryWrapper) {
+ ssegqw.with((SpanAlterQueryWrapper) part);
+ }
+ else if (part instanceof SpanRegexQueryWrapper) {
+ ssegqw.with((SpanRegexQueryWrapper) part);
+ }
+ else if (part instanceof SpanSegmentQueryWrapper) {
+ ssegqw.with((SpanSegmentQueryWrapper) part);
+ }
+ else {
+ throw new QueryException(744,
+ "Operand not supported in term group");
+ }
+ ;
+ }
+ ;
+ return ssegqw;
- switch (json.get("relation").asText()) {
- case "relation:and":
+ case "relation:or":
- for (JsonNode operand : operands) {
- SpanQueryWrapper part = this._segFromJson(operand);
- if (part instanceof SpanAlterQueryWrapper) {
- ssegqw.with((SpanAlterQueryWrapper) part);
- }
- else if (part instanceof SpanRegexQueryWrapper) {
- ssegqw.with((SpanRegexQueryWrapper) part);
- }
- else if (part instanceof SpanSegmentQueryWrapper) {
- ssegqw.with((SpanSegmentQueryWrapper) part);
- }
- else {
- throw new QueryException(
- 744, "Operand not supported in term group"
- );
- };
- };
- return ssegqw;
-
- case "relation:or":
-
- SpanAlterQueryWrapper ssaq = new SpanAlterQueryWrapper(this.field);
- for (JsonNode operand : operands) {
- ssaq.or(this._segFromJson(operand));
- };
- return ssaq;
- };
- };
- throw new QueryException(745, "Token type is not supported");
+ SpanAlterQueryWrapper ssaq = new SpanAlterQueryWrapper(
+ this.field);
+ for (JsonNode operand : operands) {
+ ssaq.or(this._segFromJson(operand));
+ }
+ ;
+ return ssaq;
+ }
+ ;
+ }
+ ;
+ throw new QueryException(745, "Token type is not supported");
};
+ private SpanQueryWrapper _termFromJson(JsonNode json) throws QueryException {
+ if (!json.has("key") || json.get("key").asText().length() < 1)
+ throw new QueryException(740,
+ "Key definition is missing in term or span");
- private SpanQueryWrapper _termFromJson (JsonNode json) throws QueryException {
- if (!json.has("key") || json.get("key").asText().length() < 1)
- throw new QueryException(740, "Key definition is missing in term or span");
-
- if (!json.has("@type"))
- throw new QueryException(701, "JSON-LD group has no @type attribute");
+ if (!json.has("@type"))
+ throw new QueryException(701,
+ "JSON-LD group has no @type attribute");
- Boolean isTerm = json.get("@type").asText().equals("korap:term") ? true : false;
- Boolean isCaseInsensitive = false;
+ Boolean isTerm = json.get("@type").asText().equals("korap:term") ? true
+ : false;
+ Boolean isCaseInsensitive = false;
- if (json.has("caseInsensitive") && json.get("caseInsensitive").asBoolean())
- isCaseInsensitive = true;
+ if (json.has("caseInsensitive")
+ && json.get("caseInsensitive").asBoolean())
+ isCaseInsensitive = true;
- StringBuilder value = new StringBuilder();
+ StringBuilder value = new StringBuilder();
- // expect orth? expect lemma?
- // s:den | i:den | cnx/l:die | mate/m:mood:ind | cnx/syn:@PREMOD |
- // mate/m:number:sg | opennlp/p:ART
+ // expect orth? expect lemma?
+ // s:den | i:den | cnx/l:die | mate/m:mood:ind | cnx/syn:@PREMOD |
+ // mate/m:number:sg | opennlp/p:ART
- if (json.has("foundry") && json.get("foundry").asText().length() > 0)
- value.append(json.get("foundry").asText()).append('/');
+ if (json.has("foundry") && json.get("foundry").asText().length() > 0)
+ value.append(json.get("foundry").asText()).append('/');
- // No default foundry defined
+ // No default foundry defined
- if (json.has("layer") && json.get("layer").asText().length() > 0) {
- String layer = json.get("layer").asText();
- switch (layer) {
+ if (json.has("layer") && json.get("layer").asText().length() > 0) {
+ String layer = json.get("layer").asText();
+ switch (layer) {
- case "lemma":
- layer = "l";
- break;
+ case "lemma":
+ layer = "l";
+ break;
- case "pos":
- layer = "p";
- break;
+ case "pos":
+ layer = "p";
+ break;
- case "orth":
- // TODO: THIS IS A BUG! AND SHOULD BE NAMED "SURFACE"
- layer = "s";
- break;
+ case "orth":
+ // TODO: THIS IS A BUG! AND SHOULD BE NAMED "SURFACE"
+ layer = "s";
+ break;
- case "struct":
- layer = "s";
- break;
+ case "struct":
+ layer = "s";
+ break;
- case "const":
- layer = "c";
- break;
- };
+ case "const":
+ layer = "c";
+ break;
+ }
+ ;
- if (isCaseInsensitive && isTerm) {
- if (layer.equals("s")) {
- layer = "i";
- }
- else {
- this.addWarning(
- 767,
- "Case insensitivity is currently not supported for this layer"
- );
- };
- };
+ if (isCaseInsensitive && isTerm) {
+ if (layer.equals("s")) {
+ layer = "i";
+ }
+ else {
+ this.addWarning(767,
+ "Case insensitivity is currently not supported for this layer");
+ }
+ ;
+ }
+ ;
- // Ignore foundry for orth layer
- if (layer.equals("s") || layer.equals("i"))
- value.setLength(0);
+ // Ignore foundry for orth layer
+ if (layer.equals("s") || layer.equals("i")) value.setLength(0);
- value.append(layer).append(':');
- };
+ value.append(layer).append(':');
+ }
+ ;
- if (json.has("key") && json.get("key").asText().length() > 0) {
- String key = json.get("key").asText();
- value.append(isCaseInsensitive ? key.toLowerCase() : key);
- };
+ if (json.has("key") && json.get("key").asText().length() > 0) {
+ String key = json.get("key").asText();
+ value.append(isCaseInsensitive ? key.toLowerCase() : key);
+ }
+ ;
- if (json.has("value") && json.get("value").asText().length() > 0)
- value.append(':').append(json.get("value").asText());
+ if (json.has("value") && json.get("value").asText().length() > 0)
+ value.append(':').append(json.get("value").asText());
- // Regular expression or wildcard
- if (isTerm && json.has("type")) {
- switch (json.get("type").asText()) {
- case "type:regex":
- return this.seg(this.re(value.toString(), isCaseInsensitive));
- case "type:wildcard":
- return this.seq(this.wc(value.toString(), isCaseInsensitive));
- case "type:string":
- break;
- default:
- this.addWarning(746, "Term type is not supported - treated as a string");
- };
- };
+ // Regular expression or wildcard
+ if (isTerm && json.has("type")) {
+ switch (json.get("type").asText()) {
+ case "type:regex":
+ return this
+ .seg(this.re(value.toString(), isCaseInsensitive));
+ case "type:wildcard":
+ return this
+ .seq(this.wc(value.toString(), isCaseInsensitive));
+ case "type:string":
+ break;
+ default:
+ this.addWarning(746,
+ "Term type is not supported - treated as a string");
+ }
+ ;
+ }
+ ;
- if (isTerm)
- return this.seg(value.toString());
+ if (isTerm) return this.seg(value.toString());
- if (json.has("attr"))
- this.addWarning(
- 768,
- "Attributes are currently not supported - results may not be correct");
+ if (json.has("attr"))
+ this.addWarning(768,
+ "Attributes are currently not supported - results may not be correct");
- return this.tag(value.toString());
+ return this.tag(value.toString());
};
/*
- public boolean hasWarning () {
- if (this.warning != null)
- return true;
- return true;
- };
-
- public String getWarning () {
- return this.warning;
- };
-
- public void addWarning (String msg) {
- if (msg == null)
- return;
- if (this.warning == null)
- this.warning = msg;
- else
- this.warning += "; " + msg;
- };
-
- public void setWarning (String warning) {
- this.warning = warning;
- };
- */
+ * public boolean hasWarning () { if (this.warning != null) return true;
+ * return true; }; public String getWarning () { return this.warning; };
+ * public void addWarning (String msg) { if (msg == null) return; if
+ * (this.warning == null) this.warning = msg; else this.warning += "; " +
+ * msg; }; public void setWarning (String warning) { this.warning = warning;
+ * };
+ */
// SpanRegexQueryWrapper
/**
* Create a query object based on a regular expression.
+ *
* @param re The regular expession as a string.
*/
- public SpanRegexQueryWrapper re (String re) {
- return new SpanRegexQueryWrapper(this.field, re, RegExp.ALL, false);
+ public SpanRegexQueryWrapper re(String re) {
+ return new SpanRegexQueryWrapper(this.field, re, RegExp.ALL, false);
};
/**
* Create a query object based on a regular expression.
+ *
* @param re The regular expession as a string.
* @param flas The regular expession flag as an integer.
*/
- public SpanRegexQueryWrapper re (String re, int flags) {
- return new SpanRegexQueryWrapper(this.field, re, flags, false);
+ public SpanRegexQueryWrapper re(String re, int flags) {
+ return new SpanRegexQueryWrapper(this.field, re, flags, false);
};
/**
* Create a query object based on a regular expression.
+ *
* @param re The regular expession as a string.
* @param flag The regular expession flag.
* @param caseinsensitive A boolean value indicating case insensitivity.
*/
- public SpanRegexQueryWrapper re (String re, int flags, boolean caseinsensitive) {
- return new SpanRegexQueryWrapper(this.field, re, flags, caseinsensitive);
+ public SpanRegexQueryWrapper re(String re, int flags,
+ boolean caseinsensitive) {
+ return new SpanRegexQueryWrapper(this.field, re, flags, caseinsensitive);
};
/**
* Create a query object based on a regular expression.
+ *
* @param re The regular expession as a string.
* @param caseinsensitive A boolean value indicating case insensitivity.
*/
- public SpanRegexQueryWrapper re (String re, boolean caseinsensitive) {
- return new SpanRegexQueryWrapper(this.field, re, RegExp.ALL, caseinsensitive);
+ public SpanRegexQueryWrapper re(String re, boolean caseinsensitive) {
+ return new SpanRegexQueryWrapper(this.field, re, RegExp.ALL,
+ caseinsensitive);
};
// SpanWildcardQueryWrapper
/**
* Create a query object based on a wildcard term.
+ *
* @param wc The wildcard term as a string.
*/
- public SpanWildcardQueryWrapper wc (String wc) {
- return new SpanWildcardQueryWrapper(this.field, wc, false);
+ public SpanWildcardQueryWrapper wc(String wc) {
+ return new SpanWildcardQueryWrapper(this.field, wc, false);
};
/**
* Create a query object based on a wildcard term.
+ *
* @param wc The wildcard term as a string.
* @param caseinsensitive A boolean value indicating case insensitivity.
*/
- public SpanWildcardQueryWrapper wc (String wc, boolean caseinsensitive) {
- return new SpanWildcardQueryWrapper(this.field, wc, caseinsensitive);
+ public SpanWildcardQueryWrapper wc(String wc, boolean caseinsensitive) {
+ return new SpanWildcardQueryWrapper(this.field, wc, caseinsensitive);
};
-
// SpanSegmentQueries
/**
* Create a segment query object.
*/
- public SpanSegmentQueryWrapper seg () {
- return new SpanSegmentQueryWrapper(this.field);
+ public SpanSegmentQueryWrapper seg() {
+ return new SpanSegmentQueryWrapper(this.field);
};
-
/**
* Create a segment query object.
+ *
* @param terms[] An array of terms, the segment consists of.
*/
- public SpanSegmentQueryWrapper seg (SpanRegexQueryWrapper ... terms) {
- SpanSegmentQueryWrapper ssq = new SpanSegmentQueryWrapper(this.field);
- for (SpanRegexQueryWrapper t : terms) {
- ssq.with(t);
- };
- return ssq;
+ public SpanSegmentQueryWrapper seg(SpanRegexQueryWrapper... terms) {
+ SpanSegmentQueryWrapper ssq = new SpanSegmentQueryWrapper(this.field);
+ for (SpanRegexQueryWrapper t : terms) {
+ ssq.with(t);
+ }
+ ;
+ return ssq;
};
- public SpanSegmentQueryWrapper seg (SpanAlterQueryWrapper ... terms) {
- SpanSegmentQueryWrapper ssq = new SpanSegmentQueryWrapper(this.field);
- for (SpanAlterQueryWrapper t : terms) {
- ssq.with(t);
- };
- return ssq;
+ public SpanSegmentQueryWrapper seg(SpanAlterQueryWrapper... terms) {
+ SpanSegmentQueryWrapper ssq = new SpanSegmentQueryWrapper(this.field);
+ for (SpanAlterQueryWrapper t : terms) {
+ ssq.with(t);
+ }
+ ;
+ return ssq;
};
- public SpanSegmentQueryWrapper seg (String ... terms) {
- SpanSegmentQueryWrapper ssq = new SpanSegmentQueryWrapper(this.field);
- for (String t : terms) {
- ssq.with(t);
- };
- return ssq;
+ public SpanSegmentQueryWrapper seg(String... terms) {
+ SpanSegmentQueryWrapper ssq = new SpanSegmentQueryWrapper(this.field);
+ for (String t : terms) {
+ ssq.with(t);
+ }
+ ;
+ return ssq;
};
// Create an empty segment
- public SpanRepetitionQueryWrapper empty () {
- return new SpanRepetitionQueryWrapper();
+ public SpanRepetitionQueryWrapper empty() {
+ return new SpanRepetitionQueryWrapper();
};
// SpanSegmentAlterQueries
/**
* Create a segment alternation query object.
+ *
* @param terms[] An array of alternative terms.
*/
- public SpanAlterQueryWrapper or (SpanQueryWrapper ... terms) {
- SpanAlterQueryWrapper ssaq = new SpanAlterQueryWrapper(this.field);
- for (SpanQueryWrapper t : terms) {
- ssaq.or(t);
- };
- return ssaq;
+ public SpanAlterQueryWrapper or(SpanQueryWrapper... terms) {
+ SpanAlterQueryWrapper ssaq = new SpanAlterQueryWrapper(this.field);
+ for (SpanQueryWrapper t : terms) {
+ ssaq.or(t);
+ }
+ ;
+ return ssaq;
};
- public SpanAlterQueryWrapper or (String ... terms) {
- SpanAlterQueryWrapper ssaq = new SpanAlterQueryWrapper(this.field);
- for (String t : terms) {
- ssaq.or(t);
- };
- return ssaq;
+ public SpanAlterQueryWrapper or(String... terms) {
+ SpanAlterQueryWrapper ssaq = new SpanAlterQueryWrapper(this.field);
+ for (String t : terms) {
+ ssaq.or(t);
+ }
+ ;
+ return ssaq;
};
-
// SpanSegmentSequenceQueries
/**
* Create a sequence of segments query object.
*/
- public SpanSequenceQueryWrapper seq () {
- return new SpanSequenceQueryWrapper(this.field);
+ public SpanSequenceQueryWrapper seq() {
+ return new SpanSequenceQueryWrapper(this.field);
};
-
/**
* Create a sequence of segments query object.
+ *
* @param terms[] An array of segment defining terms.
*/
- public SpanSequenceQueryWrapper seq (SpanQueryWrapper ... terms) {
- SpanSequenceQueryWrapper sssq = new SpanSequenceQueryWrapper(this.field);
- for (SpanQueryWrapper t : terms) {
- sssq.append(t);
- };
- return sssq;
+ public SpanSequenceQueryWrapper seq(SpanQueryWrapper... terms) {
+ SpanSequenceQueryWrapper sssq = new SpanSequenceQueryWrapper(this.field);
+ for (SpanQueryWrapper t : terms) {
+ sssq.append(t);
+ }
+ ;
+ return sssq;
};
-
/**
* Create a sequence of segments query object.
+ *
* @param re A SpanSegmentRegexQuery, starting the sequence.
*/
- public SpanSequenceQueryWrapper seq (SpanRegexQueryWrapper re) {
- return new SpanSequenceQueryWrapper(this.field, re);
+ public SpanSequenceQueryWrapper seq(SpanRegexQueryWrapper re) {
+ return new SpanSequenceQueryWrapper(this.field, re);
};
-
- public SpanSequenceQueryWrapper seq (Object ... terms) {
- SpanSequenceQueryWrapper ssq = new SpanSequenceQueryWrapper(this.field);
- for (Object t : terms) {
- if (t instanceof SpanQueryWrapper) {
- ssq.append((SpanQueryWrapper) t);
- }
- else if (t instanceof SpanRegexQueryWrapper) {
- ssq.append((SpanRegexQueryWrapper) t);
- }
- else {
- log.error("{} is not an acceptable parameter for seq()", t.getClass());
- return ssq;
- };
- };
- return ssq;
+ public SpanSequenceQueryWrapper seq(Object... terms) {
+ SpanSequenceQueryWrapper ssq = new SpanSequenceQueryWrapper(this.field);
+ for (Object t : terms) {
+ if (t instanceof SpanQueryWrapper) {
+ ssq.append((SpanQueryWrapper) t);
+ }
+ else if (t instanceof SpanRegexQueryWrapper) {
+ ssq.append((SpanRegexQueryWrapper) t);
+ }
+ else {
+ log.error("{} is not an acceptable parameter for seq()",
+ t.getClass());
+ return ssq;
+ }
+ ;
+ }
+ ;
+ return ssq;
};
- public SpanElementQueryWrapper tag (String element) {
- return new SpanElementQueryWrapper(this.field, element);
+ public SpanElementQueryWrapper tag(String element) {
+ return new SpanElementQueryWrapper(this.field, element);
};
/**
* Create a wrapping within query object.
+ *
* @param element A SpanQuery.
* @param embedded A SpanQuery that is wrapped in the element.
*/
@Deprecated
- public SpanWithinQueryWrapper within (SpanQueryWrapper element,
- SpanQueryWrapper embedded) {
- return new SpanWithinQueryWrapper(element, embedded);
+ public SpanWithinQueryWrapper within(SpanQueryWrapper element,
+ SpanQueryWrapper embedded) {
+ return new SpanWithinQueryWrapper(element, embedded);
};
- public SpanWithinQueryWrapper contains (SpanQueryWrapper element,
- SpanQueryWrapper embedded) {
- return new SpanWithinQueryWrapper(element, embedded, WITHIN);
+ public SpanWithinQueryWrapper contains(SpanQueryWrapper element,
+ SpanQueryWrapper embedded) {
+ return new SpanWithinQueryWrapper(element, embedded, WITHIN);
};
- public SpanWithinQueryWrapper startswith (SpanQueryWrapper element,
- SpanQueryWrapper embedded) {
- return new SpanWithinQueryWrapper(element, embedded, STARTSWITH);
+ public SpanWithinQueryWrapper startswith(SpanQueryWrapper element,
+ SpanQueryWrapper embedded) {
+ return new SpanWithinQueryWrapper(element, embedded, STARTSWITH);
};
- public SpanWithinQueryWrapper endswith (SpanQueryWrapper element,
- SpanQueryWrapper embedded) {
- return new SpanWithinQueryWrapper(element, embedded, ENDSWITH);
+ public SpanWithinQueryWrapper endswith(SpanQueryWrapper element,
+ SpanQueryWrapper embedded) {
+ return new SpanWithinQueryWrapper(element, embedded, ENDSWITH);
};
- public SpanWithinQueryWrapper overlaps (SpanQueryWrapper element,
- SpanQueryWrapper embedded) {
- return new SpanWithinQueryWrapper(element, embedded, OVERLAP);
- };
+ public SpanWithinQueryWrapper overlaps(SpanQueryWrapper element,
+ SpanQueryWrapper embedded) {
+ return new SpanWithinQueryWrapper(element, embedded, OVERLAP);
+ };
- public SpanWithinQueryWrapper matches (SpanQueryWrapper element,
- SpanQueryWrapper embedded) {
- return new SpanWithinQueryWrapper(element, embedded, MATCH);
- };
+ public SpanWithinQueryWrapper matches(SpanQueryWrapper element,
+ SpanQueryWrapper embedded) {
+ return new SpanWithinQueryWrapper(element, embedded, MATCH);
+ };
// Class
- public SpanClassQueryWrapper _ (byte number, SpanQueryWrapper element) {
- return new SpanClassQueryWrapper(element, number);
+ public SpanClassQueryWrapper _(byte number, SpanQueryWrapper element) {
+ return new SpanClassQueryWrapper(element, number);
};
- public SpanClassQueryWrapper _ (int number, SpanQueryWrapper element) {
- return new SpanClassQueryWrapper(element, number);
+ public SpanClassQueryWrapper _(int number, SpanQueryWrapper element) {
+ return new SpanClassQueryWrapper(element, number);
};
- public SpanClassQueryWrapper _ (short number, SpanQueryWrapper element) {
- return new SpanClassQueryWrapper(element, number);
+ public SpanClassQueryWrapper _(short number, SpanQueryWrapper element) {
+ return new SpanClassQueryWrapper(element, number);
};
- public SpanClassQueryWrapper _ (SpanQueryWrapper element) {
- return new SpanClassQueryWrapper(element);
+ public SpanClassQueryWrapper _(SpanQueryWrapper element) {
+ return new SpanClassQueryWrapper(element);
};
// MatchModify
- public SpanMatchModifyQueryWrapper shrink (byte number, SpanQueryWrapper element) {
- return new SpanMatchModifyQueryWrapper(element, number);
+ public SpanMatchModifyQueryWrapper shrink(byte number,
+ SpanQueryWrapper element) {
+ return new SpanMatchModifyQueryWrapper(element, number);
};
- public SpanMatchModifyQueryWrapper shrink (int number, SpanQueryWrapper element) {
- return new SpanMatchModifyQueryWrapper(element, number);
+ public SpanMatchModifyQueryWrapper shrink(int number,
+ SpanQueryWrapper element) {
+ return new SpanMatchModifyQueryWrapper(element, number);
};
- public SpanMatchModifyQueryWrapper shrink (short number, SpanQueryWrapper element) {
- return new SpanMatchModifyQueryWrapper(element, number);
+ public SpanMatchModifyQueryWrapper shrink(short number,
+ SpanQueryWrapper element) {
+ return new SpanMatchModifyQueryWrapper(element, number);
};
- public SpanMatchModifyQueryWrapper shrink (SpanQueryWrapper element) {
- return new SpanMatchModifyQueryWrapper(element);
+ public SpanMatchModifyQueryWrapper shrink(SpanQueryWrapper element) {
+ return new SpanMatchModifyQueryWrapper(element);
};
// Repetition
- public SpanRepetitionQueryWrapper repeat (SpanQueryWrapper element, int exact) {
- return new SpanRepetitionQueryWrapper(element, exact);
+ public SpanRepetitionQueryWrapper repeat(SpanQueryWrapper element, int exact) {
+ return new SpanRepetitionQueryWrapper(element, exact);
};
- public SpanRepetitionQueryWrapper repeat (SpanQueryWrapper element, int min, int max) {
- return new SpanRepetitionQueryWrapper(element, min, max);
+ public SpanRepetitionQueryWrapper repeat(SpanQueryWrapper element, int min,
+ int max) {
+ return new SpanRepetitionQueryWrapper(element, min, max);
};
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanExpansionQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanExpansionQuery.java
index 2a0c80e..65f786a 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanExpansionQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanExpansionQuery.java
@@ -75,8 +75,8 @@
* [large black leather jacket] non match
* </pre>
*
- * The positions of the expansion parts can be stored in payloads by using a
- * class number (optional).
+ * The positions of the expansion parts can be optionally stored in payloads
+ * together with a class number.
*
* @author margaretha
* */
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanSubspanQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanSubspanQuery.java
index 2cba5d9..ecb1bd9 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanSubspanQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanSubspanQuery.java
@@ -10,25 +10,26 @@
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
+import de.ids_mannheim.korap.query.spans.ElementSpans;
import de.ids_mannheim.korap.query.spans.SubSpans;
/**
* This query extracts a subspan from another span. The subspan starts from a
* startOffset until startOffset + length. A positive startOffset is counted
- * from the start of the span, while a negative startOffset is counted from the
- * end of the span. <br />
+ * from the start of the span, while a negative startOffset is calculated from
+ * the end of the span. <br />
* <br />
* SpanSubspanQuery takes a SpanQuery as its input and creates subspans from the
* resulting spans of the SpanQuery. For instance:
*
* <pre>
- * SpanTermQuery stq = new SpanTermQuery(new Term("tokens","s:Hund"))
- * SpanSubspanQuery ssq = new SpanSubspanQuery(stq, 0, 2, true);
+ * SpanElementQuery seq = new SpanElementQuery(new SpanElementQuery("tokens", "s");
+ * SpanSubspanQuery ssq = new SpanSubspanQuery(seq, 0, 2, true);
* </pre>
*
- * In this example, the SpanSubspanQuery creates subspans "Hu" from all the
- * occurrences of TermSpans "Hund", that starts from index 0 to 2. It also
- * collects all payloads from the TermSpans for the SubSpans.
+ * In this example, the SpanSubspanQuery creates subspans, that are the first
+ * two tokens of all sentences. It also collects all payloads from the
+ * {@link ElementSpans} for the SubSpans.
*
* @author margaretha
* */
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementDistanceSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementDistanceSpans.java
index cad169a..e4a77d6 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ElementDistanceSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementDistanceSpans.java
@@ -20,7 +20,7 @@
* spans' occurrences which are not in a sentence or a paragraph (with respect
* to the element distance type currently used), are ignored.
*
- * Note: elements cannot overlap to each other.
+ * Note: elements cannot overlap with each other.
*
* @author margaretha
* */
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedExclusionSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedExclusionSpans.java
index 2f3857d..d213425 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedExclusionSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedExclusionSpans.java
@@ -25,6 +25,10 @@
* span, and a positive number (including 0) signifies the expansion to the
* <em>right</em> of the original span.
*
+ * The expansion offsets, namely the start and end position of an expansion
+ * part, can be stored in payloads. A class number is assigned to the offsets
+ * grouping them altogether.
+ *
* @author margaretha
* */
public class ExpandedExclusionSpans extends SimpleSpans {
@@ -249,8 +253,8 @@
//System.out.println(start+","+end);
cs = new CandidateSpan(start, end, firstSpans.doc(),
- firstSpans.cost(), createPayloads(firstSpans.end(),
- end));
+ firstSpans.cost(),
+ createPayloads(firstSpans.end(), end));
candidateSpans.add(cs);
counter++;
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedSpans.java
index 4e4f551..da70fd9 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedSpans.java
@@ -18,6 +18,10 @@
* <code>n</code> token positions to either left or right direction from the
* original spans. See examples in {@link SpanExpansionQuery}.
*
+ * The expansion offsets, namely the start and end position of an expansion
+ * part, can be stored in payloads. A class number is assigned to the offsets
+ * grouping them altogether.
+ *
* @author margaretha
* */
public class ExpandedSpans extends SimpleSpans {
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/SubSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/SubSpans.java
index c691a46..7af989a 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/SubSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/SubSpans.java
@@ -55,9 +55,11 @@
*/
private boolean advance() throws IOException {
while (hasMoreSpans) {
- setMatch();
+ if (findMatch()) {
+ hasMoreSpans = firstSpans.next();
+ return true;
+ }
hasMoreSpans = firstSpans.next();
- return true;
}
return false;
}
@@ -67,15 +69,28 @@
*
* @throws IOException
*/
- public void setMatch() throws IOException {
- if (this.startOffset < 0)
+ public boolean findMatch() throws IOException {
+ if (this.startOffset < 0) {
matchStartPosition = firstSpans.end() + startOffset;
- else
+ if (matchStartPosition < firstSpans.start()) {
+ matchStartPosition = firstSpans.start();
+ }
+ }
+ else {
matchStartPosition = firstSpans.start() + startOffset;
+ if (matchStartPosition >= firstSpans.end()) {
+ return false;
+ }
+ }
matchEndPosition = matchStartPosition + this.length;
+ if (matchEndPosition > firstSpans.end()) {
+ matchEndPosition = firstSpans.end();
+ }
+
matchPayload = firstSpans.getPayload();
matchDocNumber = firstSpans.doc();
+ return true;
}
@Override
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/TokenDistanceSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/TokenDistanceSpans.java
index fcfab27..99a83a4 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/TokenDistanceSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/TokenDistanceSpans.java
@@ -15,7 +15,9 @@
/**
* Enumeration of token-based distance span matches consisting of two child
* spans having an actual distance in the range of the minimum and maximum
- * distance parameters specified in the corresponding query.
+ * distance parameters specified in the corresponding query. A TokenDistanceSpan
+ * starts from the minimum start positions of its child spans and ends at the
+ * maximum end positions of the child spans.
*
* @author margaretha
* */
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSubspanQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSubspanQueryWrapper.java
new file mode 100644
index 0000000..2af6c19
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSubspanQueryWrapper.java
@@ -0,0 +1,50 @@
+package de.ids_mannheim.korap.query.wrap;
+
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import de.ids_mannheim.korap.query.SpanSubspanQuery;
+import de.ids_mannheim.korap.util.QueryException;
+
+/**
+ * @author margaretha
+ *
+ */
+public class SpanSubspanQueryWrapper extends SpanQueryWrapper {
+
+ private SpanQueryWrapper subquery;
+ private int startOffset, length;
+
+ private Logger log = LoggerFactory.getLogger(SpanSubspanQueryWrapper.class);
+
+ public SpanSubspanQueryWrapper(SpanQueryWrapper sqw, int startOffset,
+ int length) {
+ this.subquery = sqw;
+ this.startOffset = startOffset;
+ this.length = length;
+ }
+
+ @Override
+ public SpanQuery toQuery() throws QueryException {
+ if (subquery == null) {
+ log.warn("Subquery of SpanSubspanquery is null.");
+ return null;
+ }
+
+ if (length == 0) {
+ log.warn("Not SpanSubspanQuery. Creating only the subquery.");
+ return subquery.toQuery();
+ }
+
+ SpanQuery sq = subquery.toQuery();
+ if (sq instanceof SpanTermQuery) {
+ log.warn("Not SpanSubspanQuery. Creating only the subquery.");
+ return sq;
+ }
+
+ return new SpanSubspanQuery(subquery.toQuery(), startOffset, length,
+ true);
+ }
+}
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestAttributeIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestAttributeIndex.java
index 293286b..d4a9ae2 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestAttributeIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestAttributeIndex.java
@@ -19,249 +19,234 @@
import de.ids_mannheim.korap.query.SpanWithAttributeQuery;
public class TestAttributeIndex {
-
- private KorapIndex ki = new KorapIndex();
- private KorapResult kr;
- private FieldDocument fd;
- public TestAttributeIndex() throws IOException {
- ki = new KorapIndex();
- }
-
- private FieldDocument createFieldDoc0(){
- fd = new FieldDocument();
- fd.addString("ID", "doc-0");
- fd.addTV("base",
- "bcbabd",
- "[(0-1)s:a|_1#0-1|<>:s#0-5$<i>5<s>-1|<>:div#0-3$<i>3<s>1|<>:div#0-2$<i>2<s>2|@:class=header$<s>1<i>3|@:class=header$<s>2<i>2]" +
- "[(1-2)s:e|_2#1-2|<>:a#1-2$<i>2<s>1|@:class=header$<s>1<i>2]" +
- "[(2-3)s:e|_3#2-3|<>:div#2-3$<i>5<s>1|@:class=time$<s>1<i>5]" +
- "[(3-4)s:a|_4#3-4|<>:div#3-5$<i>5<s>1|@:class=header$<s>1<i>5]" +
- "[(4-5)s:b|_5#4-5|<>:div#4-5$<i>5<s>1|<>:a#4-5$<i>5<s>2|@:class=header$<s>2<i>5]" +
- "[(5-6)s:d|_6#5-6|<>:s#5-6$<i>6<s>1|<>:div#5-6$<i>6<s>-1|@:class=header$<s>1<i>6]"+
- "[(6-7)s:d|_7#6-7|<>:s#6-7$<i>7<s>2|<>:div#6-7$<i>7<s>1|@:class=header$<s>1<i>7|@:class=header$<s>2<i>7]");
-
- return fd;
- }
-
- private FieldDocument createFieldDoc1(){
- fd = new FieldDocument();
- fd.addString("ID", "doc-1");
- fd.addTV("base",
- "bcbabd",
- "[(0-1)s:b|_1#0-1|<>:s#0-5$<i>5<s>-1|<>:div#0-3$<i>3<s>1|@:class=header$<s>1<i>3|@:class=title$<s>1<i>3|@:class=book$<s>1<i>3]" +
- "[(1-2)s:c|_2#1-2|<>:div#1-2$<i>2<s>1|@:class=header$<s>1<i>2|@:class=title$<s>1<i>2]" +
- "[(2-3)s:b|_3#2-3|<>:div#2-3$<i>5<s>1|@:class=book$<s>1<i>5]" +
- "[(3-4)s:a|_4#3-4|<>:div#3-5$<i>5<s>1|@:class=title$<s>1<i>5]" +
- "[(4-5)s:b|_5#4-5|<>:div#4-5$<i>5<s>1|@:class=header$<s>1<i>5|@:class=book$<s>1<i>5|@:class=title$<s>1<i>5]" +
- "[(5-6)s:d|_6#5-6|<>:s#5-6$<i>6<s>-1|<>:div#5-6$<i>6<s>1|@:class=header$<s>1<i>6]"+
- "[(6-7)s:d|_7#6-7|<>:s#6-7$<i>7<s>2|<>:div#6-7$<i>7<s>1|@:class=header$<s>1<i>7|@:class=title$<s>1<i>7]");
-
- return fd;
- }
-
- private FieldDocument createFieldDoc2(){
- fd = new FieldDocument();
- fd.addString("ID", "doc-1");
- fd.addTV("base",
- "bcbabd",
- "[(0-1)s:b|_1#0-1|<>:s#0-5$<i>5<s>1|<>:div#0-3$<i>3<s>2|@:class=header$<s>2<i>3|@:class=book$<s>1<i>5|@:class=book$<s>2<i>3]" +
- "[(1-2)s:e|_2#1-2|<>:div#1-2$<i>2<s>1|<>:a#1-2$<i>2<s>2|@:class=book$<s>2<i>2|@:class=header$<s>1<i>2]" +
- "[(2-3)s:b|_3#2-3|<>:div#2-3$<i>5<s>1|<>:a#1-2$<i>2<s>2|@:class=header$<s>2<i>2|@:class=book$<s>1<i>5]" +
- "[(3-4)s:a|_4#3-4|<>:div#3-5$<i>5<s>1|@:class=title$<s>1<i>5]" +
- "[(4-5)s:b|_5#4-5|<>:div#4-5$<i>5<s>1|@:class=header$<s>1<i>5|@:class=book$<s>1<i>5|@:class=book$<s>1<i>5]" +
- "[(5-6)s:d|_6#5-6|<>:s#5-6$<i>6<s>-1|<>:div#5-6$<i>6<s>1|@:class=header$<s>1<i>6]"+
- "[(6-7)s:d|_7#6-7|<>:s#6-7$<i>7<s>2|<>:div#6-7$<i>7<s>1|@:class=header$<s>1<i>7|@:class=book$<s>2<i>7]");
-
- return fd;
- }
-
-
- /** Test matching elementRef
- * @throws IOException
- * */
- @Test
- public void testCase1() throws IOException {
- ki.addDoc(createFieldDoc0());
- ki.commit();
-
- SpanAttributeQuery saq = new SpanAttributeQuery(
- new SpanTermQuery(new Term("base","@:class=header")),
- true);
-
- List<SpanQuery> sql = new ArrayList<>();
- sql.add(saq);
-
- // div with @class=header
- SpanQuery sq = new SpanWithAttributeQuery(
- new SpanElementQuery("base", "div"),
- sql, true);
-
- kr = ki.search(sq, (short) 10);
-
- assertEquals((long) 4, kr.getTotalResults());
- assertEquals(0,kr.getMatch(0).getStartPos());
- assertEquals(2,kr.getMatch(0).getEndPos());
- assertEquals(0,kr.getMatch(1).getStartPos());
- assertEquals(3,kr.getMatch(1).getEndPos());
- assertEquals(3,kr.getMatch(2).getStartPos());
- assertEquals(5,kr.getMatch(2).getEndPos());
- assertEquals(6,kr.getMatch(3).getStartPos());
- assertEquals(7,kr.getMatch(3).getEndPos());
- }
-
- /** Test multiple attributes and negation
- * @throws IOException
- * */
- @Test
- public void testCase2() throws IOException{
- ki.addDoc(createFieldDoc1());
- ki.commit();
- // header and title
- List<SpanQuery> sql = new ArrayList<>();
- sql.add(new SpanAttributeQuery(
- new SpanTermQuery(new Term("base","@:class=header")),true)
- );
- sql.add(new SpanAttributeQuery(
- new SpanTermQuery(new Term("base","@:class=title")),true)
- );
-
- SpanQuery sq = new SpanWithAttributeQuery(
- new SpanElementQuery("base", "div"),
- sql, true);
-
- kr = ki.search(sq, (short) 10);
-
- assertEquals((long) 4, kr.getTotalResults());
- assertEquals(0,kr.getMatch(0).getStartPos());
- assertEquals(3,kr.getMatch(0).getEndPos());
- assertEquals(1,kr.getMatch(1).getStartPos());
- assertEquals(2,kr.getMatch(1).getEndPos());
- assertEquals(4,kr.getMatch(2).getStartPos());
- assertEquals(5,kr.getMatch(2).getEndPos());
- assertEquals(6,kr.getMatch(3).getStartPos());
- assertEquals(7,kr.getMatch(3).getEndPos());
-
- // Add not Attribute
- // header and title, not book
- sql.add(new SpanAttributeQuery(
- new SpanTermQuery(new Term("base","@:class=book")),true,true)
- );
-
- sq = new SpanWithAttributeQuery(
- new SpanElementQuery("base", "div"),
- sql, true);
-
- kr = ki.search(sq, (short) 10);
-
- assertEquals((long) 2, kr.getTotalResults());
- assertEquals(1,kr.getMatch(0).getStartPos());
- assertEquals(2,kr.getMatch(0).getEndPos());
- assertEquals(6,kr.getMatch(1).getStartPos());
- assertEquals(7,kr.getMatch(1).getEndPos());
-
- // Test multiple negations
- // header, not title, not book
- sql.remove(1);
- sql.add(new SpanAttributeQuery(
- new SpanTermQuery(new Term("base","@:class=title")),true,true)
- );
-
- sq = new SpanWithAttributeQuery(
- new SpanElementQuery("base", "div"),
- sql, true);
-
- kr = ki.search(sq, (short) 10);
- assertEquals((long) 1, kr.getTotalResults());
- assertEquals(5,kr.getMatch(0).getStartPos());
- assertEquals(6,kr.getMatch(0).getEndPos());
- }
-
- /** same attribute types referring to different element types
- * */
- @Test
- public void testCase3() throws IOException{
- ki.addDoc(createFieldDoc2());
- ki.commit();
-
- List<SpanQuery> sql = new ArrayList<>();
- sql.add(new SpanAttributeQuery(
- new SpanTermQuery(new Term("base","@:class=header")),true)
- );
- sql.add(new SpanAttributeQuery(
- new SpanTermQuery(new Term("base","@:class=book")),true,true)
- );
- SpanQuery sq = new SpanWithAttributeQuery(
- new SpanElementQuery("base", "div"),
- sql, true);
-
- kr = ki.search(sq, (short) 10);
-
- assertEquals((long) 3, kr.getTotalResults());
- assertEquals(1,kr.getMatch(0).getStartPos());
- assertEquals(2,kr.getMatch(0).getEndPos());
- assertEquals(5,kr.getMatch(1).getStartPos());
- assertEquals(6,kr.getMatch(1).getEndPos());
- assertEquals(6,kr.getMatch(2).getStartPos());
- assertEquals(7,kr.getMatch(2).getEndPos());
- }
-
- /** Test skipto doc for spanWithAttribute*/
- @Test
- public void testCase4() throws IOException{
- ki.addDoc(createFieldDoc1());
- ki.addDoc(createFieldDoc0());
- ki.addDoc(createFieldDoc2());
- ki.commit();
-
- SpanAttributeQuery saq = new SpanAttributeQuery(
- new SpanTermQuery(new Term("base","@:class=book")),
- true);
-
- List<SpanQuery> sql = new ArrayList<>();
- sql.add(saq);
-
- SpanWithAttributeQuery sq = new SpanWithAttributeQuery(
- new SpanElementQuery("base", "div"),
- sql, true);
-
- kr = ki.search(sq, (short) 10);
- assertEquals((long) 6,kr.getTotalResults());
-
- SpanNextQuery snq = new SpanNextQuery(
- new SpanTermQuery(new Term("base", "s:e"))
- ,sq);
-
- kr = ki.search(snq, (short) 10);
-
- assertEquals((long) 1,kr.getTotalResults());
- assertEquals(2,kr.getMatch(0).getLocalDocID());
- assertEquals(1,kr.getMatch(0).getStartPos());
- assertEquals(5,kr.getMatch(0).getEndPos());
- }
-
- /** Arbitrary elements with a specific attribute
- * This is just spanAttribute query, to get the elementEnd,
- * you have to use getElementEnd(). Alternatives (unimplemented):
- * 1) store in payload?
- * 2) wrap as a span
- * */
- @Test
- public void testCase5() throws IOException{
- ki.addDoc(createFieldDoc1());
- ki.commit();
- SpanAttributeQuery saq = new SpanAttributeQuery(
- new SpanTermQuery(new Term("base","@:class=book")),
- true);
- kr = ki.search(saq, (short) 10);
- assertEquals((long) 3, kr.getTotalResults());
-
- /*for (int i=0; i< kr.getTotalResults(); i++){
- System.out.println(
- kr.match(i).getLocalDocID()+" "+
- kr.match(i).startPos + " " +
- kr.match(i).endPos
- );
- }*/
- }
+ private KorapIndex ki = new KorapIndex();
+ private KorapResult kr;
+ private FieldDocument fd;
+
+ public TestAttributeIndex() throws IOException {
+ ki = new KorapIndex();
+ }
+
+ private FieldDocument createFieldDoc0() {
+ fd = new FieldDocument();
+ fd.addString("ID", "doc-0");
+ fd.addTV(
+ "base",
+ "bcbabd",
+ "[(0-1)s:a|_1#0-1|<>:s#0-5$<i>5<s>-1|<>:div#0-3$<i>3<s>1|<>:div#0-2$<i>2<s>2|@:class=header$<s>1<i>3|@:class=header$<s>2<i>2]"
+ + "[(1-2)s:e|_2#1-2|<>:a#1-2$<i>2<s>1|@:class=header$<s>1<i>2]"
+ + "[(2-3)s:e|_3#2-3|<>:div#2-3$<i>5<s>1|@:class=time$<s>1<i>5]"
+ + "[(3-4)s:a|_4#3-4|<>:div#3-5$<i>5<s>1|@:class=header$<s>1<i>5]"
+ + "[(4-5)s:b|_5#4-5|<>:div#4-5$<i>5<s>1|<>:a#4-5$<i>5<s>2|@:class=header$<s>2<i>5]"
+ + "[(5-6)s:d|_6#5-6|<>:s#5-6$<i>6<s>1|<>:div#5-6$<i>6<s>-1|@:class=header$<s>1<i>6]"
+ + "[(6-7)s:d|_7#6-7|<>:s#6-7$<i>7<s>2|<>:div#6-7$<i>7<s>1|@:class=header$<s>1<i>7|@:class=header$<s>2<i>7]");
+
+ return fd;
+ }
+
+ private FieldDocument createFieldDoc1() {
+ fd = new FieldDocument();
+ fd.addString("ID", "doc-1");
+ fd.addTV(
+ "base",
+ "bcbabd",
+ "[(0-1)s:b|_1#0-1|<>:s#0-5$<i>5<s>-1|<>:div#0-3$<i>3<s>1|@:class=header$<s>1<i>3|@:class=title$<s>1<i>3|@:class=book$<s>1<i>3]"
+ + "[(1-2)s:c|_2#1-2|<>:div#1-2$<i>2<s>1|@:class=header$<s>1<i>2|@:class=title$<s>1<i>2]"
+ + "[(2-3)s:b|_3#2-3|<>:div#2-3$<i>5<s>1|@:class=book$<s>1<i>5]"
+ + "[(3-4)s:a|_4#3-4|<>:div#3-5$<i>5<s>1|@:class=title$<s>1<i>5]"
+ + "[(4-5)s:b|_5#4-5|<>:div#4-5$<i>5<s>1|@:class=header$<s>1<i>5|@:class=book$<s>1<i>5|@:class=title$<s>1<i>5]"
+ + "[(5-6)s:d|_6#5-6|<>:s#5-6$<i>6<s>-1|<>:div#5-6$<i>6<s>1|@:class=header$<s>1<i>6]"
+ + "[(6-7)s:d|_7#6-7|<>:s#6-7$<i>7<s>2|<>:div#6-7$<i>7<s>1|@:class=header$<s>1<i>7|@:class=title$<s>1<i>7]");
+
+ return fd;
+ }
+
+ private FieldDocument createFieldDoc2() {
+ fd = new FieldDocument();
+ fd.addString("ID", "doc-1");
+ fd.addTV(
+ "base",
+ "bcbabd",
+ "[(0-1)s:b|_1#0-1|<>:s#0-5$<i>5<s>1|<>:div#0-3$<i>3<s>2|@:class=header$<s>2<i>3|@:class=book$<s>1<i>5|@:class=book$<s>2<i>3]"
+ + "[(1-2)s:e|_2#1-2|<>:div#1-2$<i>2<s>1|<>:a#1-2$<i>2<s>2|@:class=book$<s>2<i>2|@:class=header$<s>1<i>2]"
+ + "[(2-3)s:b|_3#2-3|<>:div#2-3$<i>5<s>1|<>:a#1-2$<i>2<s>2|@:class=header$<s>2<i>2|@:class=book$<s>1<i>5]"
+ + "[(3-4)s:a|_4#3-4|<>:div#3-5$<i>5<s>1|@:class=title$<s>1<i>5]"
+ + "[(4-5)s:b|_5#4-5|<>:div#4-5$<i>5<s>1|@:class=header$<s>1<i>5|@:class=book$<s>1<i>5|@:class=book$<s>1<i>5]"
+ + "[(5-6)s:d|_6#5-6|<>:s#5-6$<i>6<s>-1|<>:div#5-6$<i>6<s>1|@:class=header$<s>1<i>6]"
+ + "[(6-7)s:d|_7#6-7|<>:s#6-7$<i>7<s>2|<>:div#6-7$<i>7<s>1|@:class=header$<s>1<i>7|@:class=book$<s>2<i>7]");
+
+ return fd;
+ }
+
+ /**
+ * Test matching elementRef
+ *
+ * @throws IOException
+ * */
+ @Test
+ public void testCase1() throws IOException {
+ ki.addDoc(createFieldDoc0());
+ ki.commit();
+
+ SpanAttributeQuery saq = new SpanAttributeQuery(new SpanTermQuery(
+ new Term("base", "@:class=header")), true);
+
+ // div with @class=header
+ SpanQuery sq = new SpanWithAttributeQuery(new SpanElementQuery("base",
+ "div"), saq, true);
+
+ kr = ki.search(sq, (short) 10);
+
+ assertEquals((long) 4, kr.getTotalResults());
+ assertEquals(0, kr.getMatch(0).getStartPos());
+ assertEquals(2, kr.getMatch(0).getEndPos());
+ assertEquals(0, kr.getMatch(1).getStartPos());
+ assertEquals(3, kr.getMatch(1).getEndPos());
+ assertEquals(3, kr.getMatch(2).getStartPos());
+ assertEquals(5, kr.getMatch(2).getEndPos());
+ assertEquals(6, kr.getMatch(3).getStartPos());
+ assertEquals(7, kr.getMatch(3).getEndPos());
+ }
+
+ /**
+ * Test multiple attributes and negation
+ *
+ * @throws IOException
+ * */
+ @Test
+ public void testCase2() throws IOException {
+ ki.addDoc(createFieldDoc1());
+ ki.commit();
+ // header and title
+ List<SpanQuery> sql = new ArrayList<>();
+ sql.add(new SpanAttributeQuery(new SpanTermQuery(new Term("base",
+ "@:class=header")), true));
+ sql.add(new SpanAttributeQuery(new SpanTermQuery(new Term("base",
+ "@:class=title")), true));
+
+ SpanQuery sq = new SpanWithAttributeQuery(new SpanElementQuery("base",
+ "div"), sql, true);
+
+ kr = ki.search(sq, (short) 10);
+
+ assertEquals((long) 4, kr.getTotalResults());
+ assertEquals(0, kr.getMatch(0).getStartPos());
+ assertEquals(3, kr.getMatch(0).getEndPos());
+ assertEquals(1, kr.getMatch(1).getStartPos());
+ assertEquals(2, kr.getMatch(1).getEndPos());
+ assertEquals(4, kr.getMatch(2).getStartPos());
+ assertEquals(5, kr.getMatch(2).getEndPos());
+ assertEquals(6, kr.getMatch(3).getStartPos());
+ assertEquals(7, kr.getMatch(3).getEndPos());
+
+ // Add not Attribute
+ // header and title, not book
+ sql.add(new SpanAttributeQuery(new SpanTermQuery(new Term("base",
+ "@:class=book")), true, true));
+
+ sq = new SpanWithAttributeQuery(new SpanElementQuery("base", "div"),
+ sql, true);
+
+ kr = ki.search(sq, (short) 10);
+
+ assertEquals((long) 2, kr.getTotalResults());
+ assertEquals(1, kr.getMatch(0).getStartPos());
+ assertEquals(2, kr.getMatch(0).getEndPos());
+ assertEquals(6, kr.getMatch(1).getStartPos());
+ assertEquals(7, kr.getMatch(1).getEndPos());
+
+ // Test multiple negations
+ // header, not title, not book
+ sql.remove(1);
+ sql.add(new SpanAttributeQuery(new SpanTermQuery(new Term("base",
+ "@:class=title")), true, true));
+
+ sq = new SpanWithAttributeQuery(new SpanElementQuery("base", "div"),
+ sql, true);
+
+ kr = ki.search(sq, (short) 10);
+ assertEquals((long) 1, kr.getTotalResults());
+ assertEquals(5, kr.getMatch(0).getStartPos());
+ assertEquals(6, kr.getMatch(0).getEndPos());
+ }
+
+ /**
+ * same attribute types referring to different element types
+ * */
+ @Test
+ public void testCase3() throws IOException {
+ ki.addDoc(createFieldDoc2());
+ ki.commit();
+
+ List<SpanQuery> sql = new ArrayList<>();
+ sql.add(new SpanAttributeQuery(new SpanTermQuery(new Term("base",
+ "@:class=header")), true));
+ sql.add(new SpanAttributeQuery(new SpanTermQuery(new Term("base",
+ "@:class=book")), true, true));
+ SpanQuery sq = new SpanWithAttributeQuery(new SpanElementQuery("base",
+ "div"), sql, true);
+
+ kr = ki.search(sq, (short) 10);
+
+ assertEquals((long) 3, kr.getTotalResults());
+ assertEquals(1, kr.getMatch(0).getStartPos());
+ assertEquals(2, kr.getMatch(0).getEndPos());
+ assertEquals(5, kr.getMatch(1).getStartPos());
+ assertEquals(6, kr.getMatch(1).getEndPos());
+ assertEquals(6, kr.getMatch(2).getStartPos());
+ assertEquals(7, kr.getMatch(2).getEndPos());
+ }
+
+ /** Test skipto doc for spanWithAttribute */
+ @Test
+ public void testCase4() throws IOException {
+ ki.addDoc(createFieldDoc1());
+ ki.addDoc(createFieldDoc0());
+ ki.addDoc(createFieldDoc2());
+ ki.commit();
+
+ SpanAttributeQuery saq = new SpanAttributeQuery(new SpanTermQuery(
+ new Term("base", "@:class=book")), true);
+
+ List<SpanQuery> sql = new ArrayList<>();
+ sql.add(saq);
+
+ SpanWithAttributeQuery sq = new SpanWithAttributeQuery(
+ new SpanElementQuery("base", "div"), sql, true);
+
+ kr = ki.search(sq, (short) 10);
+ assertEquals((long) 6, kr.getTotalResults());
+
+ SpanNextQuery snq = new SpanNextQuery(new SpanTermQuery(new Term(
+ "base", "s:e")), sq);
+
+ kr = ki.search(snq, (short) 10);
+
+ assertEquals((long) 1, kr.getTotalResults());
+ assertEquals(2, kr.getMatch(0).getLocalDocID());
+ assertEquals(1, kr.getMatch(0).getStartPos());
+ assertEquals(5, kr.getMatch(0).getEndPos());
+ }
+
+ /**
+ * Arbitrary elements with a specific attribute This is just spanAttribute
+ * query, to get the elementEnd, you have to use getElementEnd().
+ * Alternatives (unimplemented): 1) store in payload? 2) wrap as a span
+ * */
+ @Test
+ public void testCase5() throws IOException {
+ ki.addDoc(createFieldDoc1());
+ ki.commit();
+ SpanAttributeQuery saq = new SpanAttributeQuery(new SpanTermQuery(
+ new Term("base", "@:class=book")), true);
+ kr = ki.search(saq, (short) 10);
+ assertEquals((long) 3, kr.getTotalResults());
+
+ /*
+ * for (int i=0; i< kr.getTotalResults(); i++){ System.out.println(
+ * kr.match(i).getLocalDocID()+" "+ kr.match(i).startPos + " " +
+ * kr.match(i).endPos ); }
+ */
+ }
}
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java
index 7c5d021..81c3d67 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java
@@ -28,7 +28,7 @@
1. return all words that are subjects of (that are linked by the “SUBJ” relation to) the string “beginnt”
xip/syntax-dep_rel:beginnt >[func=”SUBJ”] xip/syntax-dep_rel:.*
--> rel("SUBJ", highlight(query1), new TermQuery("s:beginnt"))
+-> rel("SUBJ", highlight(query1), new TermQuery("s:beginnt"))
SUBJ ist modelliert mit offset für den gesamten Bereich
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestSubSpanIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestSubSpanIndex.java
index ba9b14f..58f88f5 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestSubSpanIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestSubSpanIndex.java
@@ -15,45 +15,77 @@
import de.ids_mannheim.korap.query.SpanDistanceQuery;
import de.ids_mannheim.korap.query.SpanSubspanQuery;
-
public class TestSubSpanIndex {
-
- KorapResult kr;
- KorapIndex ki;
-
+
+ KorapResult kr;
+ KorapIndex ki;
+
+ public TestSubSpanIndex() throws IOException {
+ ki = new KorapIndex();
+ ki.addDocFile(getClass().getResource("/wiki/00001.json.gz").getFile(),
+ true);
+ ki.commit();
+ }
+
@Test
public void testCase1() throws IOException {
- ki = new KorapIndex();
- ki.addDocFile(
- getClass().getResource("/wiki/00001.json.gz").getFile(),true);
- ki.commit();
-
- SpanDistanceQuery sdq = new SpanDistanceQuery(
- new SpanTermQuery(new Term("tokens","tt/p:NN")),
- new SpanTermQuery(new Term("tokens","tt/p:VAFIN")),
- new DistanceConstraint(5, 5, true, false),
- true);
-
- SpanSubspanQuery ssq = new SpanSubspanQuery(sdq, 0, 2, true);
- kr = ki.search(ssq, (short) 10);
-
- assertEquals((long) 8,kr.getTotalResults());
- assertEquals(35, kr.getMatch(0).getStartPos());
+ SpanDistanceQuery sdq = new SpanDistanceQuery(
+ new SpanTermQuery(new Term("tokens", "tt/p:NN")),
+ new SpanTermQuery(new Term("tokens", "tt/p:VAFIN")),
+ new DistanceConstraint(5, 5, true,false), true);
+
+ SpanSubspanQuery ssq = new SpanSubspanQuery(sdq, 0, 2, true);
+ kr = ki.search(ssq, (short) 10);
+
+ assertEquals((long) 8, kr.getTotalResults());
+ assertEquals(35, kr.getMatch(0).getStartPos());
assertEquals(37, kr.getMatch(0).getEndPos());
assertEquals(179, kr.getMatch(1).getStartPos());
assertEquals(181, kr.getMatch(1).getEndPos());
-
- ssq = new SpanSubspanQuery(sdq, -2, 2, true);
- kr = ki.search(ssq, (short) 10);
-
- assertEquals(39, kr.getMatch(0).getStartPos());
+
+ ssq = new SpanSubspanQuery(sdq, -2, 2, true);
+ kr = ki.search(ssq, (short) 10);
+
+ assertEquals(39, kr.getMatch(0).getStartPos());
assertEquals(41, kr.getMatch(0).getEndPos());
assertEquals(183, kr.getMatch(1).getStartPos());
assertEquals(185, kr.getMatch(1).getEndPos());
-
- /*for (KorapMatch km : kr.getMatches()){
- System.out.println(km.getStartPos() +","+km.getEndPos()
- +km.getSnippetBrackets());
- }*/
- }
+
+ /*
+ * for (KorapMatch km : kr.getMatches()){
+ * System.out.println(km.getStartPos() +","+km.getEndPos()
+ * +km.getSnippetBrackets()); }
+ */
+ }
+
+ @Test
+ public void testCase2() {
+ SpanDistanceQuery sdq = new SpanDistanceQuery(
+ new SpanTermQuery(new Term("tokens", "tt/p:NN")),
+ new SpanTermQuery(new Term("tokens", "tt/p:VAFIN")),
+ new DistanceConstraint(5, 5, true,false), true);
+
+ SpanSubspanQuery ssq = new SpanSubspanQuery(sdq, 0, 7, true);
+ kr = ki.search(ssq, (short) 10);
+
+ assertEquals(35, kr.getMatch(0).getStartPos());
+ assertEquals(41, kr.getMatch(0).getEndPos());
+ assertEquals(179, kr.getMatch(1).getStartPos());
+ assertEquals(185, kr.getMatch(1).getEndPos());
+
+ ssq = new SpanSubspanQuery(sdq, -7, 4, true);
+ kr = ki.search(ssq, (short) 10);
+
+ assertEquals((long) 8, kr.getTotalResults());
+ assertEquals(35, kr.getMatch(0).getStartPos());
+ assertEquals(39, kr.getMatch(0).getEndPos());
+ assertEquals(179, kr.getMatch(1).getStartPos());
+ assertEquals(183, kr.getMatch(1).getEndPos());
+
+ /* for (KorapMatch km : kr.getMatches()){
+ System.out.println(km.getStartPos() +","+km.getEndPos()
+ +km.getSnippetBrackets()); }*/
+
+ }
+
}
diff --git a/src/test/java/de/ids_mannheim/korap/query/TestSpanSubspanQueryJSON.java b/src/test/java/de/ids_mannheim/korap/query/TestSpanSubspanQueryJSON.java
new file mode 100644
index 0000000..e1e473d
--- /dev/null
+++ b/src/test/java/de/ids_mannheim/korap/query/TestSpanSubspanQueryJSON.java
@@ -0,0 +1,26 @@
+package de.ids_mannheim.korap.query;
+
+import static de.ids_mannheim.korap.TestSimple.getJSONQuery;
+import static org.junit.Assert.assertEquals;
+
+import org.apache.lucene.search.spans.SpanQuery;
+import org.junit.Test;
+
+import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
+import de.ids_mannheim.korap.util.QueryException;
+
+/**
+ * @author margaretha
+ *
+ */
+public class TestSpanSubspanQueryJSON {
+
+ @Test
+ public void testCase1() throws QueryException {
+ String filepath = getClass().getResource("/queries/submatch.jsonld")
+ .getFile();
+ SpanQueryWrapper sqwi = getJSONQuery(filepath);
+ SpanQuery sq = sqwi.toQuery();
+ assertEquals(sq.toString(), "subspan(<tokens:s />,1,4)");
+ }
+}
diff --git a/src/test/resources/queries/submatch.jsonld b/src/test/resources/queries/submatch.jsonld
new file mode 100644
index 0000000..36153eb
--- /dev/null
+++ b/src/test/resources/queries/submatch.jsonld
@@ -0,0 +1,30 @@
+{
+ "@context":"http://ids-mannheim.de/ns/KorAP/json-ld/v0.2/context.jsonld",
+ "errors":[
+
+ ],
+ "warnings":[
+
+ ],
+ "messages":[
+
+ ],
+ "collection":null,
+ "query":{
+ "@type":"korap:reference",
+ "operation":"operation:focus",
+ "operands":[
+ {
+ "@type":"korap:span",
+ "key":"s"
+ }
+ ],
+ "spanRef":[
+ 1,
+ 4
+ ]
+ },
+ "meta":{
+
+ }
+}