Restructured. Fixed the serialization of FCS Boolean queries.
Change-Id: If89c54781d96de95b67a1a2e7a74659c0149a121
diff --git a/src/main/java/de/ids_mannheim/korap/query/elements/Element.java b/src/main/java/de/ids_mannheim/korap/query/elements/Element.java
new file mode 100644
index 0000000..a7da5b4
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/elements/Element.java
@@ -0,0 +1,8 @@
+package de.ids_mannheim.korap.query.elements;
+
+import java.util.Map;
+
+public interface Element {
+
+ public Map<String, Object> buildMap();
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/elements/KoralGroup.java b/src/main/java/de/ids_mannheim/korap/query/elements/KoralGroup.java
index 31bd159..7d8a9a1 100644
--- a/src/main/java/de/ids_mannheim/korap/query/elements/KoralGroup.java
+++ b/src/main/java/de/ids_mannheim/korap/query/elements/KoralGroup.java
@@ -1,13 +1,26 @@
-package de.ids_mannheim.korap.query.parse.fcsql;
+package de.ids_mannheim.korap.query.elements;
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
import java.util.List;
+import java.util.Map;
-public class KoralSequence {
+import de.ids_mannheim.korap.query.serialize.MapBuilder;
+
+public class KoralGroup implements Element {
+
+ private static final KoralType type = KoralType.GROUP;
+
+ private KoralOperation operation;;
private boolean inOrder = false;
private List<Object> operands;
private List<Distance> distances;
+ public KoralGroup (KoralOperation operation) {
+ this.operation = operation;
+ }
+
public boolean isInOrder() {
return inOrder;
}
@@ -32,7 +45,32 @@
this.distances = distances;
}
- public class Distance {
+ @Override
+ public Map<String, Object> buildMap() {
+ Map<String, Object> map = new LinkedHashMap<String, Object>();
+ map.put("@type", type.toString());
+ map.put("operation", operation.toString());
+
+ if (getDistances() != null) {
+ map.put("inOrder", isInOrder());
+ List<Map<String, Object>> distanceList = new ArrayList<Map<String, Object>>();
+ for (Distance d : getDistances()) {
+ distanceList.add(d.buildMap());
+ }
+ map.put("distances", distanceList);
+ }
+
+ List<Map<String, Object>> operandList = new ArrayList<Map<String, Object>>();
+ for (Object o : getOperands()) {
+ operandList.add(MapBuilder.buildQueryMap(o));
+ }
+ map.put("operands", operandList);
+ return map;
+ }
+
+ public class Distance implements Element {
+
+ private final KoralType type = KoralType.DISTANCE;
private String key;
private String min;
private String max;
@@ -67,5 +105,16 @@
this.max = max;
}
+ @Override
+ public Map<String, Object> buildMap() {
+ Map<String, Object> distanceMap = new LinkedHashMap<String, Object>();
+ distanceMap.put("@type", type.toString());
+ distanceMap.put("key", getKey());
+ distanceMap.put("min", getMin());
+ distanceMap.put("max", getMax());
+ return distanceMap;
+
+ }
+
}
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/elements/KoralOperation.java b/src/main/java/de/ids_mannheim/korap/query/elements/KoralOperation.java
new file mode 100644
index 0000000..0e8b30a
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/elements/KoralOperation.java
@@ -0,0 +1,20 @@
+package de.ids_mannheim.korap.query.elements;
+
+public enum KoralOperation {
+ SEQUENCE("operation:sequence"), POSITION("operation:position"), DISJUNCTION(
+ "operation:disjunction"), REPETITION("operation:repetition"), CLASS(
+ "operation:class"), MERGE("operation:merge"), RELATION(
+ "operation:relation");
+
+ String value;
+
+ KoralOperation (String value) {
+ this.value = value;
+ }
+
+ @Override
+ public String toString() {
+ return value;
+ }
+
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/elements/KoralRelation.java b/src/main/java/de/ids_mannheim/korap/query/elements/KoralRelation.java
new file mode 100644
index 0000000..9f0d3eb
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/elements/KoralRelation.java
@@ -0,0 +1,17 @@
+package de.ids_mannheim.korap.query.elements;
+
+public enum KoralRelation {
+
+ AND("relation:and"), OR("relation:or");
+
+ String value;
+
+ KoralRelation (String value) {
+ this.value = value;
+ }
+
+ @Override
+ public String toString() {
+ return value;
+ }
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/elements/KoralTerm.java b/src/main/java/de/ids_mannheim/korap/query/elements/KoralTerm.java
index 1ae2e09..8f1f1c5 100644
--- a/src/main/java/de/ids_mannheim/korap/query/elements/KoralTerm.java
+++ b/src/main/java/de/ids_mannheim/korap/query/elements/KoralTerm.java
@@ -1,61 +1,105 @@
-package de.ids_mannheim.korap.query.parse.fcsql;
+package de.ids_mannheim.korap.query.elements;
-import java.util.Arrays;
import java.util.LinkedHashMap;
-import java.util.List;
import java.util.Map;
-import java.util.Set;
-import de.ids_mannheim.korap.query.serialize.FCSQLQueryProcessor;
-import de.ids_mannheim.korap.query.serialize.util.StatusCodes;
-import eu.clarin.sru.server.fcs.parser.Operator;
-import eu.clarin.sru.server.fcs.parser.RegexFlag;
+public class KoralTerm implements Element {
-public class KoralTerm {
+ public enum KoralTermType {
+ STRING("type:string"), REGEX("type:regex"), WILDCARD("type:wildcard"), PUNCT(
+ "type:punct");
- private String layer;
- private String foundry;
- private String operator;
- private String queryTerm;
- private boolean caseSensitive = true;
- private boolean invalid = false;
-
-
- public String getLayer() {
- return layer;
- }
- public void setLayer(String layer) {
- this.layer = layer;
- }
- public String getFoundry() {
- return foundry;
- }
- public void setFoundry(String foundry) {
- this.foundry = foundry;
- }
- public String getOperator() {
- return operator;
- }
- public void setOperator(String operator) {
- this.operator = operator;
- }
- public String getQueryTerm() {
- return queryTerm;
- }
- public void setQueryTerm(String queryTerm) {
- this.queryTerm = queryTerm;
- }
- public boolean isCaseSensitive() {
- return caseSensitive;
- }
- public void setCaseSensitive(boolean isCaseSensitive) {
- this.caseSensitive = isCaseSensitive;
- }
- public boolean isInvalid() {
- return invalid;
- }
- public void setInvalid(boolean invalid) {
- this.invalid = invalid;
- }
-
+ String value;
+
+ KoralTermType (String value) {
+ this.value = value;
+ }
+
+ @Override
+ public String toString() {
+ return value;
+ }
+ }
+
+ private static final KoralType koralType = KoralType.TERM;
+
+ private String layer;
+ private String foundry;
+ private String operator;
+ private String key;
+ private KoralTermType type;
+ private boolean caseSensitive = true;
+ private boolean invalid = false;
+
+ public String getLayer() {
+ return layer;
+ }
+
+ public void setLayer(String layer) {
+ this.layer = layer;
+ }
+
+ public String getFoundry() {
+ return foundry;
+ }
+
+ public void setFoundry(String foundry) {
+ this.foundry = foundry;
+ }
+
+ public String getOperator() {
+ return operator;
+ }
+
+ public void setOperator(String operator) {
+ this.operator = operator;
+ }
+
+ public String getKey() {
+ return key;
+ }
+
+ public void setKey(String key) {
+ this.key = key;
+ }
+
+ public KoralTermType getType() {
+ return type;
+ }
+
+ public void setType(KoralTermType regex) {
+ this.type = regex;
+ }
+
+ public boolean isCaseSensitive() {
+ return caseSensitive;
+ }
+
+ public void setCaseSensitive(boolean isCaseSensitive) {
+ this.caseSensitive = isCaseSensitive;
+ }
+
+ public boolean isInvalid() {
+ return invalid;
+ }
+
+ public void setInvalid(boolean invalid) {
+ this.invalid = invalid;
+ }
+
+ @Override
+ public Map<String, Object> buildMap() {
+ Map<String, Object> map = new LinkedHashMap<String, Object>();
+ map.put("@type", koralType.toString());
+ if (!isCaseSensitive()) {
+ map.put("caseInsensitive", "true");
+ }
+ map.put("key", getKey());
+ map.put("foundry", getFoundry());
+ map.put("layer", getLayer());
+ map.put("type", getType().toString());
+ map.put("match", getOperator());
+
+ return map;
+ }
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/elements/KoralTermGroup.java b/src/main/java/de/ids_mannheim/korap/query/elements/KoralTermGroup.java
new file mode 100644
index 0000000..94e6f3d
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/elements/KoralTermGroup.java
@@ -0,0 +1,60 @@
+package de.ids_mannheim.korap.query.elements;
+
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+import de.ids_mannheim.korap.query.parse.fcsql.ExpressionParser;
+import de.ids_mannheim.korap.query.serialize.MapBuilder;
+import eu.clarin.sru.server.fcs.parser.QueryNode;
+
+public class KoralTermGroup implements Element {
+
+ private static final KoralType type = KoralType.TERMGROUP;
+
+ private String relation;
+ private List<Object> operands = new ArrayList<Object>();
+
+ public KoralTermGroup () {
+
+ }
+
+ public KoralTermGroup (ExpressionParser parser, KoralRelation relation,
+ List<QueryNode> nodes) {
+ this.relation = relation.toString();
+ for (QueryNode node : nodes) {
+ operands.add(parser.parseExpression(node, false, false));
+ }
+ }
+
+ public String getRelation() {
+ return relation;
+ }
+
+ public void setRelation(String relation) {
+ this.relation = relation;
+ }
+
+ public List<Object> getOperands() {
+ return operands;
+ }
+
+ public void setOperands(List<Object> operands) {
+ this.operands = operands;
+ }
+
+ @Override
+ public Map<String, Object> buildMap() {
+ Map<String, Object> map = new LinkedHashMap<String, Object>();
+ map.put("@type", type.toString());
+ map.put("relation", getRelation());
+
+ List<Map<String, Object>> operandList = new ArrayList<Map<String, Object>>();
+ for (Object o : getOperands()) {
+ operandList.add(MapBuilder.buildQueryMap(o));
+ }
+ map.put("operands", operandList);
+ return map;
+ }
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/elements/KoralToken.java b/src/main/java/de/ids_mannheim/korap/query/elements/KoralToken.java
new file mode 100644
index 0000000..751655f
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/elements/KoralToken.java
@@ -0,0 +1,30 @@
+package de.ids_mannheim.korap.query.elements;
+
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+public class KoralToken implements Element {
+
+ private final static KoralType type = KoralType.TOKEN;
+ private Element child;
+
+ public KoralToken (Element child) {
+ this.child = child;
+ }
+
+ public Element getChild() {
+ return child;
+ }
+
+ public void setChild(Element child) {
+ this.child = child;
+ }
+
+ @Override
+ public Map<String, Object> buildMap() {
+ Map<String, Object> map = new LinkedHashMap<String, Object>();
+ map.put("@type", type.toString());
+ map.put("wrap", child.buildMap());
+ return map;
+ }
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/elements/KoralType.java b/src/main/java/de/ids_mannheim/korap/query/elements/KoralType.java
new file mode 100644
index 0000000..23c45a6
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/elements/KoralType.java
@@ -0,0 +1,19 @@
+package de.ids_mannheim.korap.query.elements;
+
+public enum KoralType {
+ TERMGROUP("koral:termGroup"), TERM("koral:term"), TOKEN("koral:token"), SPAN(
+ "koral:span"), GROUP("koral:group"), BOUNDARY("koral:boundary"), RELATION(
+ "koral:relation"), DISTANCE("koral:distance"), REFERENCE(
+ "koral:reference");
+
+ String value;
+
+ KoralType (String value) {
+ this.value = value;
+ }
+
+ @Override
+ public String toString() {
+ return value;
+ }
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/parse/fcsql/ExpressionParser.java b/src/main/java/de/ids_mannheim/korap/query/parse/fcsql/ExpressionParser.java
new file mode 100644
index 0000000..5d767a9
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/parse/fcsql/ExpressionParser.java
@@ -0,0 +1,212 @@
+package de.ids_mannheim.korap.query.parse.fcsql;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Set;
+
+import org.hamcrest.core.IsNot;
+
+import de.ids_mannheim.korap.query.elements.KoralTerm;
+import de.ids_mannheim.korap.query.elements.KoralTerm.KoralTermType;
+import de.ids_mannheim.korap.query.elements.KoralTermGroup;
+import de.ids_mannheim.korap.query.elements.KoralToken;
+import de.ids_mannheim.korap.query.elements.KoralRelation;
+import de.ids_mannheim.korap.query.elements.KoralGroup.Distance;
+import de.ids_mannheim.korap.query.serialize.FCSQLQueryProcessor;
+import de.ids_mannheim.korap.query.serialize.util.StatusCodes;
+import eu.clarin.sru.server.fcs.parser.Expression;
+import eu.clarin.sru.server.fcs.parser.ExpressionAnd;
+import eu.clarin.sru.server.fcs.parser.ExpressionNot;
+import eu.clarin.sru.server.fcs.parser.ExpressionOr;
+import eu.clarin.sru.server.fcs.parser.Operator;
+import eu.clarin.sru.server.fcs.parser.QueryNode;
+import eu.clarin.sru.server.fcs.parser.RegexFlag;
+
+public class ExpressionParser {
+
+ private static final String FOUNDRY_CNX = "cnx";
+ private static final String FOUNDRY_OPENNLP = "opennlp";
+ private static final String FOUNDRY_TT = "tt";
+ private static final String FOUNDRY_MATE = "mate";
+ private static final String FOUNDRY_XIP = "xip";
+
+ private List<String> supportedFoundries = Arrays
+ .asList(new String[] { FOUNDRY_CNX, FOUNDRY_OPENNLP, FOUNDRY_TT,
+ FOUNDRY_MATE, FOUNDRY_XIP });
+
+ private FCSQLQueryProcessor processor;
+
+ public ExpressionParser (FCSQLQueryProcessor processor) {
+ this.processor = processor;
+ }
+
+ public Object parseExpression(QueryNode queryNode) {
+ return parseExpression(queryNode, false, true);
+ }
+
+ public Object parseExpression(QueryNode queryNode, boolean isNot,
+ boolean isToken) {
+
+ if (queryNode instanceof Expression) {
+ return parseSimpleExpression((Expression) queryNode, isNot, isToken);
+ }
+ else if (queryNode instanceof ExpressionAnd) {
+ List<QueryNode> operands = queryNode.getChildren();
+ if (isNot) {
+ return parseBooleanExpression(operands, KoralRelation.OR);
+ }
+ else {
+ return parseBooleanExpression(operands, KoralRelation.AND);
+ }
+ }
+ // else if (queryNode instanceof ExpressionGroup) {
+ //
+ // }
+ else if (queryNode instanceof ExpressionNot) {
+ return parseExpression(queryNode.getChild(0), true, true);
+ }
+ else if (queryNode instanceof ExpressionOr) {
+ List<QueryNode> operands = queryNode.getChildren();
+ if (isNot) {
+ return parseBooleanExpression(operands, KoralRelation.AND);
+ }
+ else {
+ return parseBooleanExpression(operands, KoralRelation.OR);
+ }
+ }
+ // else if (queryNode instanceof ExpressionWildcard) {
+ // for distance query, using empty token
+ // }
+ else {
+ processor.addError(StatusCodes.QUERY_TOO_COMPLEX,
+ "FCS diagnostic 11: Query is too complex.");
+ return null;
+ }
+ }
+
+ private Object parseBooleanExpression(List<QueryNode> operands,
+ KoralRelation relation) {
+ KoralTermGroup termGroup = new KoralTermGroup(this, relation, operands);
+ return new KoralToken(termGroup);
+ }
+
+ private Object parseSimpleExpression(Expression expression, boolean isNot,
+ boolean isToken) {
+ KoralTerm koralTerm = parseTerm(expression, isNot);
+ if (isToken) {
+ return new KoralToken(koralTerm);
+ }
+ else {
+ return koralTerm;
+ }
+ }
+
+ public KoralTerm parseTerm(Expression expression, boolean isNot) {
+ KoralTerm koralTerm = new KoralTerm();
+ koralTerm.setType(KoralTermType.REGEX);
+ koralTerm.setKey(expression.getRegexValue());
+ parseLayerIdentifier(koralTerm, expression.getLayerIdentifier());
+ parseQualifier(koralTerm, expression.getLayerQualifier());
+ parseOperator(koralTerm, expression.getOperator(), isNot);
+ parseRegexFlags(koralTerm, expression.getRegexFlags());
+ return koralTerm;
+ }
+
+ private void parseLayerIdentifier(KoralTerm koralTerm, String identifier) {
+ String layer = null;
+ if (identifier == null) {
+ processor.addError(StatusCodes.MALFORMED_QUERY,
+ "FCS diagnostic 10: Layer identifier is missing.");
+ koralTerm.setInvalid(true);
+ }
+ else if (identifier.equals("text")) {
+ layer = "orth";
+ }
+ else if (identifier.equals("pos")) {
+ layer = "p";
+ }
+ else if (identifier.equals("lemma")) {
+ layer = "l";
+ }
+ else {
+ processor.addError(StatusCodes.UNKNOWN_QUERY_ELEMENT,
+ "SRU diagnostic 48: Layer " + identifier
+ + " is unsupported.");
+ koralTerm.setInvalid(true);
+ }
+
+ koralTerm.setLayer(layer);
+ }
+
+ private void parseQualifier(KoralTerm koralTerm, String qualifier) {
+ String layer = koralTerm.getLayer();
+ if (layer == null) {
+ koralTerm.setInvalid(true);
+ return;
+ }
+ // Set default foundry
+ if (qualifier == null) {
+ if (layer.equals("orth")) {
+ qualifier = FOUNDRY_OPENNLP;
+ }
+ else {
+ qualifier = FOUNDRY_TT;
+ }
+ }
+ else if (qualifier.equals(FOUNDRY_OPENNLP) && layer.equals("l")) {
+ processor
+ .addError(StatusCodes.UNKNOWN_QUERY_ELEMENT,
+ "SRU diagnostic 48: Layer lemma with qualifier opennlp is unsupported.");
+ koralTerm.setInvalid(true);
+ }
+ else if (!supportedFoundries.contains(qualifier)) {
+ processor.addError(StatusCodes.UNKNOWN_QUERY_ELEMENT,
+ "SRU diagnostic 48: Qualifier " + qualifier
+ + " is unsupported.");
+ koralTerm.setInvalid(true);
+ }
+
+ koralTerm.setFoundry(qualifier);
+ }
+
+ private void parseOperator(KoralTerm koralTerm, Operator operator,
+ boolean isNot) {
+ String matchOperator = null;
+ if (operator == null || operator == Operator.EQUALS) {
+ matchOperator = isNot ? "match:ne" : "match:eq";
+ }
+ else if (operator == Operator.NOT_EQUALS) {
+ matchOperator = isNot ? "match:eq" : "match:ne";
+ }
+ else {
+ processor
+ .addError(StatusCodes.UNKNOWN_QUERY_ELEMENT,
+ "SRU diagnostic 37:" + operator.name()
+ + " is unsupported.");
+ koralTerm.setInvalid(true);
+ }
+ koralTerm.setOperator(matchOperator);
+ }
+
+ private void parseRegexFlags(KoralTerm koralTerm, Set<RegexFlag> set) {
+ // default case sensitive
+ if (set != null) {
+ for (RegexFlag f : set) {
+ if (f == RegexFlag.CASE_SENSITVE) {
+ koralTerm.setCaseSensitive(true);
+ }
+ else if (f == RegexFlag.CASE_INSENSITVE) {
+ koralTerm.setCaseSensitive(false);
+ }
+ else {
+ processor.addError(StatusCodes.UNKNOWN_QUERY_ELEMENT,
+ "SRU diagnostic 48:" + f.name()
+ + " is unsupported.");
+ koralTerm.setInvalid(true);
+ }
+ }
+ }
+ }
+
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/parse/fcsql/FCSSRUQueryParser.java b/src/main/java/de/ids_mannheim/korap/query/parse/fcsql/FCSSRUQueryParser.java
index 1ca4edd..a98cc6e 100644
--- a/src/main/java/de/ids_mannheim/korap/query/parse/fcsql/FCSSRUQueryParser.java
+++ b/src/main/java/de/ids_mannheim/korap/query/parse/fcsql/FCSSRUQueryParser.java
@@ -1,36 +1,25 @@
package de.ids_mannheim.korap.query.parse.fcsql;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.List;
-import java.util.Set;
-import de.ids_mannheim.korap.query.parse.fcsql.KoralSequence.Distance;
+import de.ids_mannheim.korap.query.elements.KoralGroup;
+import de.ids_mannheim.korap.query.elements.KoralOperation;
import de.ids_mannheim.korap.query.serialize.FCSQLQueryProcessor;
import de.ids_mannheim.korap.query.serialize.util.StatusCodes;
-import eu.clarin.sru.server.fcs.parser.Expression;
-import eu.clarin.sru.server.fcs.parser.ExpressionAnd;
-import eu.clarin.sru.server.fcs.parser.Operator;
+import eu.clarin.sru.server.fcs.parser.QueryDisjunction;
import eu.clarin.sru.server.fcs.parser.QueryNode;
import eu.clarin.sru.server.fcs.parser.QuerySegment;
-import eu.clarin.sru.server.fcs.parser.RegexFlag;
+import eu.clarin.sru.server.fcs.parser.QuerySequence;
public class FCSSRUQueryParser {
- private static final String FOUNDRY_CNX = "cnx";
- private static final String FOUNDRY_OPENNLP = "opennlp";
- private static final String FOUNDRY_TT = "tt";
- private static final String FOUNDRY_MATE = "mate";
- private static final String FOUNDRY_XIP = "xip";
-
- private List<String> supportedFoundries = Arrays
- .asList(new String[] { FOUNDRY_CNX, FOUNDRY_OPENNLP, FOUNDRY_TT,
- FOUNDRY_MATE, FOUNDRY_XIP });
-
private FCSQLQueryProcessor processor;
+ private ExpressionParser expressionParser;
public FCSSRUQueryParser (FCSQLQueryProcessor processor) {
this.processor = processor;
+ this.expressionParser = new ExpressionParser(processor);
}
public Object parseQueryNode(QueryNode queryNode) {
@@ -39,10 +28,14 @@
return parseQuerySegment((QuerySegment) queryNode);
// } else if (queryNode instanceof QueryGroup) {
//
- // } else if (queryNode instanceof QuerySequence) {
- //
- // } else if (queryNode instanceof QueryDisjunction) {
- //
+ }
+ else if (queryNode instanceof QuerySequence) {
+ return parseGroupQuery(queryNode.getChildren(),
+ KoralOperation.SEQUENCE);
+ }
+ else if (queryNode instanceof QueryDisjunction) {
+ return parseGroupQuery(queryNode.getChildren(),
+ KoralOperation.DISJUNCTION);
// } else if (queryNode instanceof QueryWithWithin) {
}
@@ -54,9 +47,20 @@
}
}
+ private KoralGroup parseGroupQuery(List<QueryNode> children,
+ KoralOperation operation) {
+ KoralGroup koralGroup = new KoralGroup(operation);
+ List<Object> operands = new ArrayList<Object>();
+ for (QueryNode child : children) {
+ operands.add(parseQueryNode(child));
+ }
+ koralGroup.setOperands(operands);
+ return koralGroup;
+ }
+
private Object parseQuerySegment(QuerySegment segment) {
if ((segment.getMinOccurs() == 1) && (segment.getMaxOccurs() == 1)) {
- return parseExpression(segment.getExpression());
+ return expressionParser.parseExpression(segment.getExpression());
}
else {
processor.addError(StatusCodes.QUERY_TOO_COMPLEX,
@@ -64,154 +68,4 @@
return null;
}
}
-
- private Object parseExpression(QueryNode queryNode) {
- if (queryNode instanceof Expression) {
- Expression expression = (Expression) queryNode;
- return parseSimpleExpression(expression);
- }
- else if (queryNode instanceof ExpressionAnd) {
- ExpressionAnd expressionAnd = (ExpressionAnd) queryNode;
- return parseExpressionAnd(expressionAnd);
- }
- // else if (queryNode instanceof ExpressionGroup) {
- //
- // }
- // else if (queryNode instanceof ExpressionNot) {
- //
- // }
- // else if (queryNode instanceof ExpressionOr) {
- //
- // }
- // else if (queryNode instanceof ExpressionWildcard) {
- //
- // }
- else {
- processor.addError(StatusCodes.QUERY_TOO_COMPLEX,
- "FCS diagnostic 11: Query is too complex.");
- return null;
- }
- }
-
- private Object parseExpressionAnd(ExpressionAnd expressionAnd) {
- KoralSequence koralSequence = new KoralSequence();
- List<Object> operands = new ArrayList<Object>();
- for (QueryNode child : expressionAnd.getChildren()) {
- operands.add(parseExpression(child));
- }
-
- List<Distance> distances = new ArrayList<Distance>();
- Distance d = koralSequence.new Distance("s", 0, 0);
- distances.add(d);
-
- koralSequence.setOperands(operands);
- koralSequence.setDistances(distances);
- return koralSequence;
- }
-
- private Object parseSimpleExpression(Expression expression) {
- KoralTerm koralTerm = new KoralTerm();
- koralTerm.setQueryTerm(expression.getRegexValue());
- parseLayerIdentifier(koralTerm, expression.getLayerIdentifier());
- parseQualifier(koralTerm, expression.getLayerQualifier());
- parseOperator(koralTerm, expression.getOperator());
- parseRegexFlags(koralTerm, expression.getRegexFlags());
- return koralTerm;
- }
-
- private void parseLayerIdentifier(KoralTerm koralTerm, String identifier) {
- String layer = null;
- if (identifier == null) {
- processor.addError(StatusCodes.MALFORMED_QUERY,
- "FCS diagnostic 10: Layer identifier is missing.");
- koralTerm.setInvalid(true);
- }
- else if (identifier.equals("text")) {
- layer = "orth";
- }
- else if (identifier.equals("pos")) {
- layer = "p";
- }
- else if (identifier.equals("lemma")) {
- layer = "l";
- }
- else {
- processor.addError(StatusCodes.UNKNOWN_QUERY_ELEMENT,
- "SRU diagnostic 48: Layer " + identifier
- + " is unsupported.");
- koralTerm.setInvalid(true);
- }
-
- koralTerm.setLayer(layer);
- }
-
- private void parseQualifier(KoralTerm koralTerm, String qualifier) {
- String layer = koralTerm.getLayer();
- if (layer == null) {
- koralTerm.setInvalid(true);
- return;
- }
- // Set default foundry
- if (qualifier == null) {
- if (layer.equals("orth")) {
- qualifier = FOUNDRY_OPENNLP;
- }
- else {
- qualifier = FOUNDRY_TT;
- }
- }
- else if (qualifier.equals(FOUNDRY_OPENNLP) && layer.equals("l")) {
- processor
- .addError(StatusCodes.UNKNOWN_QUERY_ELEMENT,
- "SRU diagnostic 48: Layer lemma with qualifier opennlp is unsupported.");
- koralTerm.setInvalid(true);
- }
- else if (!supportedFoundries.contains(qualifier)) {
- processor.addError(StatusCodes.UNKNOWN_QUERY_ELEMENT,
- "SRU diagnostic 48: Qualifier " + qualifier
- + " is unsupported.");
- koralTerm.setInvalid(true);
- }
-
- koralTerm.setFoundry(qualifier);
- }
-
- private void parseOperator(KoralTerm koralTerm, Operator operator) {
- String matchOperator = null;
- if (operator == null || operator == Operator.EQUALS) {
- matchOperator = "match:eq";
- }
- else if (operator == Operator.NOT_EQUALS) {
- matchOperator = "match:ne";
- }
- else {
- processor
- .addError(StatusCodes.UNKNOWN_QUERY_ELEMENT,
- "SRU diagnostic 37:" + operator.name()
- + " is unsupported.");
- koralTerm.setInvalid(true);
- }
- koralTerm.setOperator(matchOperator);
- }
-
- private void parseRegexFlags(KoralTerm koralTerm, Set<RegexFlag> set) {
- // default case sensitive
- if (set != null) {
- for (RegexFlag f : set) {
- if (f == RegexFlag.CASE_SENSITVE) {
- koralTerm.setCaseSensitive(true);
- }
- else if (f == RegexFlag.CASE_INSENSITVE) {
- koralTerm.setCaseSensitive(false);
- }
- else {
- processor.addError(StatusCodes.UNKNOWN_QUERY_ELEMENT,
- "SRU diagnostic 48:" + f.name()
- + " is unsupported.");
- koralTerm.setInvalid(true);
- }
- }
- }
- }
-
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/FCSQLQueryProcessor.java b/src/main/java/de/ids_mannheim/korap/query/serialize/FCSQLQueryProcessor.java
index 0785237..3862664 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/FCSQLQueryProcessor.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/FCSQLQueryProcessor.java
@@ -5,10 +5,12 @@
import java.util.List;
import java.util.Map;
+import de.ids_mannheim.korap.query.elements.KoralGroup;
+import de.ids_mannheim.korap.query.elements.KoralTerm;
+import de.ids_mannheim.korap.query.elements.KoralOperation;
+import de.ids_mannheim.korap.query.elements.KoralType;
+import de.ids_mannheim.korap.query.elements.KoralGroup.Distance;
import de.ids_mannheim.korap.query.parse.fcsql.FCSSRUQueryParser;
-import de.ids_mannheim.korap.query.parse.fcsql.KoralSequence;
-import de.ids_mannheim.korap.query.parse.fcsql.KoralSequence.Distance;
-import de.ids_mannheim.korap.query.parse.fcsql.KoralTerm;
import de.ids_mannheim.korap.query.serialize.util.StatusCodes;
import eu.clarin.sru.server.SRUQueryBase;
import eu.clarin.sru.server.fcs.Constants;
@@ -35,9 +37,6 @@
}
private static final String VERSION_2_0 = "2.0";
- private static final String OPERATION_OR = "operation:or";
- private static final String OPERATION_SEQUENCE = "operation:sequence";
- private static final String OPERATION_POSITION = "operation:position";
private final QueryParser fcsParser = new QueryParser();
private String version;
@@ -99,73 +98,7 @@
private void parseFCSQueryToKoralQuery(QueryNode queryNode) {
FCSSRUQueryParser parser = new FCSSRUQueryParser(this);
Object o = parser.parseQueryNode(queryNode);
- Map<String, Object> queryMap = buildQueryMap(o);
+ Map<String, Object> queryMap = MapBuilder.buildQueryMap(o);
if (queryMap != null) requestMap.put("query", queryMap);
}
-
- private Map<String, Object> buildQueryMap(Object o) {
- if (o != null) {
- if (o instanceof KoralTerm) {
- KoralTerm koralTerm = (KoralTerm) o;
- if (!koralTerm.isInvalid()) {
- return createTermMap(koralTerm);
- }
- }
- else if (o instanceof KoralSequence) {
- KoralSequence koralSequence = (KoralSequence) o;
- return createSequenceMap(koralSequence);
- }
- }
- return null;
- }
-
- private Map<String, Object> createSequenceMap(KoralSequence koralSequence) {
- Map<String, Object> map = new LinkedHashMap<String, Object>();
- map.put("@type", "koral:group");
- map.put("operation", OPERATION_SEQUENCE);
- map.put("inOrder", koralSequence.isInOrder());
-
- if (koralSequence.getDistances() != null) {
- List<Map<String, Object>> distanceList = new ArrayList<Map<String, Object>>();
- for (Distance d : koralSequence.getDistances()) {
- distanceList.add(createDistanceMap(d));
- }
- map.put("distances", distanceList);
- }
-
- List<Map<String, Object>> operandList = new ArrayList<Map<String, Object>>();
- for (Object o : koralSequence.getOperands()) {
- operandList.add(buildQueryMap(o));
- }
- map.put("operands", operandList);
- return map;
- }
-
- private Map<String, Object> createDistanceMap(Distance distance) {
- Map<String, Object> distanceMap = new LinkedHashMap<String, Object>();
- distanceMap.put("@type", "koral:distance");
- distanceMap.put("key", distance.getKey());
- distanceMap.put("min", distance.getMin());
- distanceMap.put("max", distance.getMax());
- return distanceMap;
-
- }
-
- private Map<String, Object> createTermMap(KoralTerm fcsQuery) {
- Map<String, Object> map = new LinkedHashMap<String, Object>();
- map.put("@type", "koral:term");
- if (!fcsQuery.isCaseSensitive()) {
- map.put("caseInsensitive", "true");
- }
- map.put("key", fcsQuery.getQueryTerm());
- map.put("foundry", fcsQuery.getFoundry());
- map.put("layer", fcsQuery.getLayer());
- map.put("match", fcsQuery.getOperator());
-
- Map<String, Object> tokenMap = new LinkedHashMap<String, Object>();
- tokenMap.put("@type", "koral:token");
- tokenMap.put("wrap", map);
- return tokenMap;
- }
-
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/MapBuilder.java b/src/main/java/de/ids_mannheim/korap/query/serialize/MapBuilder.java
new file mode 100644
index 0000000..9dd6009
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/MapBuilder.java
@@ -0,0 +1,33 @@
+package de.ids_mannheim.korap.query.serialize;
+
+import java.util.Map;
+
+import de.ids_mannheim.korap.query.elements.KoralGroup;
+import de.ids_mannheim.korap.query.elements.KoralTerm;
+import de.ids_mannheim.korap.query.elements.KoralTermGroup;
+import de.ids_mannheim.korap.query.elements.KoralToken;
+
+public class MapBuilder {
+
+ public static Map<String, Object> buildQueryMap(Object o) {
+ if (o != null) {
+ if (o instanceof KoralToken) {
+ KoralToken token = (KoralToken) o;
+ return token.buildMap();
+ }
+ else if (o instanceof KoralGroup) {
+ KoralGroup group = (KoralGroup) o;
+ return group.buildMap();
+ }
+ if (o instanceof KoralTerm) {
+ KoralTerm term = (KoralTerm) o;
+ return term.buildMap();
+ }
+ else if (o instanceof KoralTermGroup) {
+ KoralTermGroup termGroup = (KoralTermGroup) o;
+ return termGroup.buildMap();
+ }
+ }
+ return null;
+ }
+}
diff --git a/src/test/java/de/ids_mannheim/korap/query/serialize/FCSQLQueryProcessorTest.java b/src/test/java/de/ids_mannheim/korap/query/serialize/FCSQLQueryProcessorTest.java
index 9b7da8b..cea843f 100644
--- a/src/test/java/de/ids_mannheim/korap/query/serialize/FCSQLQueryProcessorTest.java
+++ b/src/test/java/de/ids_mannheim/korap/query/serialize/FCSQLQueryProcessorTest.java
@@ -2,16 +2,20 @@
import static org.junit.Assert.assertEquals;
+import java.io.IOException;
import java.util.List;
import org.junit.Test;
import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
public class FCSQLQueryProcessorTest {
+ QuerySerializer qs = new QuerySerializer();
ObjectMapper mapper = new ObjectMapper();
+ JsonNode node;
private void runAndValidate(String query, String jsonLD)
throws JsonProcessingException {
@@ -30,7 +34,7 @@
public void testTermQuery() throws JsonProcessingException {
String query = "\"Sonne\"";
String jsonLd = "{@type:koral:token, wrap:{@type:koral:term, key:Sonne, "
- + "foundry:opennlp, layer:orth, match:match:eq}}";
+ + "foundry:opennlp, layer:orth, type:type:regex, match:match:eq}}";
runAndValidate(query, jsonLd);
}
@@ -38,7 +42,7 @@
public void testTermQueryWithRegexFlag() throws JsonProcessingException {
String query = "\"Fliegen\" /c";
String jsonLd = "{@type:koral:token, wrap:{@type:koral:term, caseInsensitive:true, "
- + "key:Fliegen, foundry:opennlp, layer:orth, match:match:eq}}";
+ + "key:Fliegen, foundry:opennlp, layer:orth, type:type:regex, match:match:eq}}";
runAndValidate(query, jsonLd);
}
@@ -46,17 +50,17 @@
public void testTermQueryWithSpecificLayer() throws JsonProcessingException {
String query = "[text = \"Sonne\"]";
String jsonLd = "{@type:koral:token, wrap:{@type:koral:term, key:Sonne, "
- + "foundry:opennlp, layer:orth, match:match:eq}}";
+ + "foundry:opennlp, layer:orth, type:type:regex, match:match:eq}}";
runAndValidate(query, jsonLd);
query = "[lemma = \"sein\"]";
jsonLd = "{@type:koral:token, wrap:{@type:koral:term, key:sein, "
- + "foundry:tt, layer:l, match:match:eq}}";
+ + "foundry:tt, layer:l, type:type:regex, match:match:eq}}";
runAndValidate(query, jsonLd);
query = "[pos = \"NN\"]";
jsonLd = "{@type:koral:token, wrap:{@type:koral:term, key:NN, "
- + "foundry:tt, layer:p, match:match:eq}}";
+ + "foundry:tt, layer:p, type:type:regex, match:match:eq}}";
runAndValidate(query, jsonLd);
}
@@ -64,12 +68,12 @@
public void testTermQueryWithQualifier() throws JsonProcessingException {
String query = "[mate:lemma = \"sein\"]";
String jsonLd = "{@type:koral:token, wrap:{@type:koral:term, key:sein, "
- + "foundry:mate, layer:l, match:match:eq}}";
+ + "foundry:mate, layer:l, type:type:regex, match:match:eq}}";
runAndValidate(query, jsonLd);
query = "[cnx:pos = \"N\"]";
jsonLd = "{@type:koral:token, wrap:{@type:koral:term, key:N, "
- + "foundry:cnx, layer:p, match:match:eq}}";
+ + "foundry:cnx, layer:p, type:type:regex, match:match:eq}}";
runAndValidate(query, jsonLd);
}
@@ -97,38 +101,90 @@
}
@Test
- public void testMatchOperation() throws JsonProcessingException {
+ public void testRegex() throws JsonProcessingException {
+ String query = "[text=\"M(a|ä)nn(er)?\"]";
+ String jsonLd = "{@type:koral:token,wrap:{@type:koral:term,"
+ + "key:M(a|ä)nn(er)?,foundry:opennlp,layer:orth,type:type:regex,match:match:eq}}";
+ runAndValidate(query, jsonLd);
+
+ query = "\".*?Mann.*?\"";
+ jsonLd = "{@type:koral:token,wrap:{@type:koral:term,key:.*?Mann.*?,"
+ + "foundry:opennlp,layer:orth,type:type:regex,match:match:eq}}";
+ runAndValidate(query, jsonLd);
+
+ query = "\"z.B.\"";
+ jsonLd = "{@type:koral:token,wrap:{@type:koral:term,key:z.B.,"
+ + "foundry:opennlp,layer:orth,type:type:regex,match:match:eq}}";
+ runAndValidate(query, jsonLd);
+
+ query = "\"Sonne&scheint\"";
+ jsonLd = "{@type:koral:token,wrap:{@type:koral:term,key:Sonne&scheint,"
+ + "foundry:opennlp,layer:orth,type:type:regex,match:match:eq}}";
+ runAndValidate(query, jsonLd);
+
+ // Not possible
+ // query = "\"a\\.\"";
+ }
+
+ @Test
+ public void testNot() throws JsonProcessingException {
String query = "[cnx:pos != \"N\"]";
String jsonLd = "{@type:koral:token, wrap:{@type:koral:term, key:N, "
- + "foundry:cnx, layer:p, match:match:ne}}";
+ + "foundry:cnx, layer:p, type:type:regex, match:match:ne}}";
runAndValidate(query, jsonLd);
+
+ jsonLd = "{@type:koral:token, wrap:{@type:koral:term, key:NN, "
+ + "foundry:tt, layer:p, type:type:regex, match:match:eq}}";
+ query = "[!pos != \"NN\"]";
+ runAndValidate(query, jsonLd);
+
+ // Not possible
+ // query = "![pos != \"NN\"]";
}
@Test
public void testSequenceQuery() throws JsonProcessingException {
- String query = "\"blaue\" [pos = \"NN\"]";
+ String query = "\"blaue|grüne\" [pos = \"NN\"]";
String jsonLd = "{@type:koral:group, operation:operation:sequence, operands:["
- + "{@type:koral:token, wrap:{@type:koral:term, key:blaue, foundry:opennlp, layer:orth, match:match:eq}},"
- + "{@type:koral:token, wrap:{@type:koral:term, key:NN, foundry:tt, layer:p, match:match:eq}}"
+ + "{@type:koral:token, wrap:{@type:koral:term, key:blaue|grüne, foundry:opennlp, layer:orth, type:type:regex, match:match:eq}},"
+ + "{@type:koral:token, wrap:{@type:koral:term, key:NN, foundry:tt, layer:p, type:type:regex, match:match:eq}}"
+ "]}";
runAndValidate(query, jsonLd);
+
+ query = "[text=\"blaue|grüne\"][pos = \"NN\"]";
+ runAndValidate(query, jsonLd);
}
@Test
- public void testAndQuery() throws JsonProcessingException {
- String query = "[text=\"Sonne\" & text=\"scheint\"]";
- String jsonLd = "{@type:koral:group, operation:operation:sequence, inOrder:false,"
- + "distances:["
- + "{@type:koral:distance, key:s, min:0, max:0}],"
- + "operands:["
- + "{@type:koral:token, wrap:{@type:koral:term, key:Sonne, foundry: opennlp, layer:orth, match:match:eq}},"
- + "{@type:koral:token,wrap:{@type:koral:term, key:scheint, foundry: opennlp, layer:orth,match:match:eq}"
- + "}]}";
+ public void testBooleanQuery() throws IOException {
+ String query = "[mate:lemma=\"sein\" & mate:pos=\"PPOSS\"]";
+ String jsonLd = "{@type: koral:token,"
+ + " wrap: { @type: koral:termGroup,"
+ + "relation: relation:and,"
+ + " operands:["
+ + "{@type: koral:term, key: sein, foundry: mate, layer: l, type:type:regex, match: match:eq},"
+ + "{@type: koral:term, key: PPOSS, foundry: mate, layer: p, type:type:regex, match: match:eq}]}}";
runAndValidate(query, jsonLd);
- // sru parser doesnt work for the following queries:
- // String query = "\"Sonne & scheint\"";
- // String query = "\"Sonne&scheint\"";
+ query = "[mate:lemma=\"sein\" | mate:pos=\"PPOSS\"]";
+ qs.setQuery(query, "fcsql", "2.0");
+ node = mapper.readTree(qs.toJSON());
+ assertEquals("relation:or", node.at("/query/wrap/relation").asText());
+
+ query = "[pos=\"NN\"]|[text=\"Mann\"]";
+ jsonLd = "{@type:koral:group,"
+ + "operation:operation:disjunction,"
+ + "operands:["
+ + "{@type:koral:token, wrap:{@type:koral:term,key:NN,foundry:tt,layer:p,type:type:regex,match:match:eq}},"
+ + "{@type:koral:token, wrap:{@type:koral:term,key:Mann,foundry:opennlp,layer:orth,type:type:regex,match:match:eq}}]}";
+ runAndValidate(query, jsonLd);
+ }
+
+ @Test
+ public void testGroupQuery() throws JsonProcessingException {
+ // String query = "(\"blaue\"|\"grüne\")";
+ // runAndValidate(query, jsonLd);
+
}
@Test