Implemented FCSQL Wildcard / empty token serialization.
Change-Id: I831e20c6a88feec2eef821000bc7b08620dd5565
diff --git a/README.md b/README.md
index db6206e..4711bea 100644
--- a/README.md
+++ b/README.md
@@ -103,7 +103,14 @@
There is also a command line version. After installation, simply run
java -jar target/Koral-0.2.jar [query] [queryLanguage]
-
+
+## Prerequisites
+
+Java 7 (OpenJDK or Oracle JDK with [JCE] (http://www.oracle.com/technetwork/java/javase/downloads/jce-7-download-432124.html))
+[Git](http://git-scm.com/),
+At least [Maven 3.2.1](https://maven.apache.org/).
+Further dependencies are resolved by Maven.
+
## Authorship
Koral and KoralQuery were developed by Joachim Bingel,
diff --git a/src/main/java/de/ids_mannheim/korap/query/object/KoralDistance.java b/src/main/java/de/ids_mannheim/korap/query/object/KoralDistance.java
new file mode 100644
index 0000000..eb88f58
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/object/KoralDistance.java
@@ -0,0 +1,69 @@
+package de.ids_mannheim.korap.query.object;
+
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+public class KoralDistance implements KoralObject {
+
+ private final KoralType type = KoralType.DISTANCE;
+ private String key = "w";
+ private String foundry;
+ private String layer;
+ private KoralBoundary boundary;
+
+ public KoralDistance (KoralBoundary boundary) {
+ this.boundary = boundary;
+ }
+
+ public KoralDistance (String key, KoralBoundary boundary) {
+ this(boundary);
+ this.key = key;
+ }
+
+ public String getKey() {
+ return key;
+ }
+
+ public void setKey(String key) {
+ this.key = key;
+ }
+
+ public String getFoundry() {
+ return foundry;
+ }
+
+ public void setFoundry(String foundry) {
+ this.foundry = foundry;
+ }
+
+ public String getLayer() {
+ return layer;
+ }
+
+ public void setLayer(String layer) {
+ this.layer = layer;
+ }
+
+ public KoralBoundary getBoundary() {
+ return boundary;
+ }
+
+ public void setBoundary(KoralBoundary boundary) {
+ this.boundary = boundary;
+ }
+
+ @Override
+ public Map<String, Object> buildMap() {
+ Map<String, Object> distanceMap = new LinkedHashMap<String, Object>();
+ distanceMap.put("@type", type.toString());
+ distanceMap.put("key", key);
+ if (foundry != null){
+ distanceMap.put("foundry", foundry);
+ }
+ if (layer!=null){
+ distanceMap.put("layer", layer);
+ }
+ distanceMap.put("boundary", boundary.buildMap());
+ return distanceMap;
+ }
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/object/KoralGroup.java b/src/main/java/de/ids_mannheim/korap/query/object/KoralGroup.java
index 70d05fc..b304f72 100644
--- a/src/main/java/de/ids_mannheim/korap/query/object/KoralGroup.java
+++ b/src/main/java/de/ids_mannheim/korap/query/object/KoralGroup.java
@@ -22,7 +22,7 @@
private boolean inOrder = false;
private List<KoralObject> operands;
- private List<Distance> distances;
+ private List<KoralDistance> distances;
private List<Frame> frames;
private KoralBoundary boundary;
@@ -46,11 +46,19 @@
this.operands = operands;
}
- public List<Distance> getDistances() {
+ public KoralOperation getOperation() {
+ return operation;
+ }
+
+ public void setOperation(KoralOperation operation) {
+ this.operation = operation;
+ }
+
+ public List<KoralDistance> getDistances() {
return distances;
}
- public void setDistances(List<Distance> distances) {
+ public void setDistances(List<KoralDistance> distances) {
this.distances = distances;
}
@@ -79,14 +87,14 @@
if (getDistances() != null) {
map.put("inOrder", isInOrder());
List<Map<String, Object>> distanceList = new ArrayList<Map<String, Object>>();
- for (Distance d : getDistances()) {
+ for (KoralDistance d : distances) {
distanceList.add(d.buildMap());
}
map.put("distances", distanceList);
}
List<Map<String, Object>> operandList = new ArrayList<Map<String, Object>>();
- for (Object o : getOperands()) {
+ for (Object o : operands) {
operandList.add(MapBuilder.buildQueryMap(o));
}
map.put("operands", operandList);
@@ -113,52 +121,4 @@
return "frame:"+value;
}
}
-
- public class Distance implements KoralObject {
-
- private final KoralType type = KoralType.DISTANCE;
- private String key;
- private String min;
- private String max;
-
- public Distance (String key, int min, int max) {
- this.key = key;
- this.min = String.valueOf(min);
- this.max = String.valueOf(max);
- }
-
- public String getKey() {
- return key;
- }
-
- public void setKey(String key) {
- this.key = key;
- }
-
- public String getMin() {
- return min;
- }
-
- public void setMin(String min) {
- this.min = min;
- }
-
- public String getMax() {
- return max;
- }
-
- public void setMax(String max) {
- this.max = max;
- }
-
- @Override
- public Map<String, Object> buildMap() {
- Map<String, Object> distanceMap = new LinkedHashMap<String, Object>();
- distanceMap.put("@type", type.toString());
- distanceMap.put("key", getKey());
- distanceMap.put("min", getMin());
- distanceMap.put("max", getMax());
- return distanceMap;
- }
- }
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/object/KoralToken.java b/src/main/java/de/ids_mannheim/korap/query/object/KoralToken.java
index 06a39ce..7798566 100644
--- a/src/main/java/de/ids_mannheim/korap/query/object/KoralToken.java
+++ b/src/main/java/de/ids_mannheim/korap/query/object/KoralToken.java
@@ -15,6 +15,8 @@
private final static KoralType type = KoralType.TOKEN;
private KoralObject wrappedObject;
+ public KoralToken () {}
+
public KoralToken (KoralObject wrappedObject) {
this.wrappedObject = wrappedObject;
}
@@ -30,7 +32,9 @@
public Map<String, Object> buildMap() {
Map<String, Object> map = new LinkedHashMap<String, Object>();
map.put("@type", type.toString());
- map.put("wrap", wrappedObject.buildMap());
+ if (wrappedObject != null){
+ map.put("wrap", wrappedObject.buildMap());
+ }
return map;
}
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/parse/fcsql/ExpressionParser.java b/src/main/java/de/ids_mannheim/korap/query/parse/fcsql/ExpressionParser.java
index 488b4c8..42f5f83 100644
--- a/src/main/java/de/ids_mannheim/korap/query/parse/fcsql/ExpressionParser.java
+++ b/src/main/java/de/ids_mannheim/korap/query/parse/fcsql/ExpressionParser.java
@@ -20,6 +20,7 @@
import eu.clarin.sru.server.fcs.parser.ExpressionGroup;
import eu.clarin.sru.server.fcs.parser.ExpressionNot;
import eu.clarin.sru.server.fcs.parser.ExpressionOr;
+import eu.clarin.sru.server.fcs.parser.ExpressionWildcard;
import eu.clarin.sru.server.fcs.parser.Operator;
import eu.clarin.sru.server.fcs.parser.QueryNode;
import eu.clarin.sru.server.fcs.parser.RegexFlag;
@@ -76,9 +77,9 @@
return parseBooleanExpression(operands, KoralRelation.OR);
}
}
- // else if (queryNode instanceof ExpressionWildcard) {
- // for distance query, using empty token
- // }
+ else if (queryNode instanceof ExpressionWildcard) {
+ return new KoralToken();
+ }
else {
throw new KoralException(StatusCodes.QUERY_TOO_COMPLEX,
"FCS diagnostic 11: Query is too complex.");
diff --git a/src/main/java/de/ids_mannheim/korap/query/parse/fcsql/FCSSRUQueryParser.java b/src/main/java/de/ids_mannheim/korap/query/parse/fcsql/FCSSRUQueryParser.java
index fa440e1..92f535c 100644
--- a/src/main/java/de/ids_mannheim/korap/query/parse/fcsql/FCSSRUQueryParser.java
+++ b/src/main/java/de/ids_mannheim/korap/query/parse/fcsql/FCSSRUQueryParser.java
@@ -9,11 +9,13 @@
import de.ids_mannheim.korap.query.serialize.util.StatusCodes;
import de.ids_mannheim.korap.query.object.KoralBoundary;
import de.ids_mannheim.korap.query.object.KoralGroup;
+import de.ids_mannheim.korap.query.object.KoralDistance;
import de.ids_mannheim.korap.query.object.KoralObject;
import de.ids_mannheim.korap.query.object.KoralOperation;
import de.ids_mannheim.korap.query.object.KoralSpan;
import de.ids_mannheim.korap.query.object.KoralTerm;
import de.ids_mannheim.korap.query.object.KoralGroup.Frame;
+import eu.clarin.sru.server.fcs.parser.ExpressionWildcard;
import eu.clarin.sru.server.fcs.parser.QueryDisjunction;
import eu.clarin.sru.server.fcs.parser.QueryGroup;
import eu.clarin.sru.server.fcs.parser.QueryNode;
@@ -35,7 +37,8 @@
this.expressionParser = new ExpressionParser();
}
- public KoralObject parseQueryNode(QueryNode queryNode) throws KoralException {
+ public KoralObject parseQueryNode(QueryNode queryNode)
+ throws KoralException {
if (queryNode instanceof QuerySegment) {
return parseQuerySegment((QuerySegment) queryNode);
@@ -44,63 +47,64 @@
return parseQueryNode(queryNode.getChild(0));
}
else if (queryNode instanceof QuerySequence) {
- return parseGroupQuery(queryNode.getChildren(),
- KoralOperation.SEQUENCE);
+ return parseSequenceQuery(queryNode.getChildren());
}
else if (queryNode instanceof QueryDisjunction) {
return parseGroupQuery(queryNode.getChildren(),
KoralOperation.DISJUNCTION);
}
else if (queryNode instanceof QueryWithWithin) {
- return parseWithinQuery((QueryWithWithin)queryNode);
+ return parseWithinQuery((QueryWithWithin) queryNode);
}
else if (queryNode instanceof SimpleWithin) {
- SimpleWithin withinNode = (SimpleWithin) queryNode;
- return parseWithinScope(withinNode.getScope());
- }
+ SimpleWithin withinNode = (SimpleWithin) queryNode;
+ return parseWithinScope(withinNode.getScope());
+ }
else {
throw new KoralException(StatusCodes.QUERY_TOO_COMPLEX,
"FCS diagnostic 11:" + queryNode.getNodeType().name()
+ " is currently unsupported.");
}
}
- private KoralObject parseWithinQuery(QueryWithWithin queryNode) throws KoralException {
- KoralGroup koralGroup = new KoralGroup(KoralOperation.POSITION);
- koralGroup.setFrames(Arrays.asList(Frame.IS_AROUND));
-
- List<KoralObject> operands = new ArrayList<KoralObject>();
- operands.add(parseQueryNode(queryNode.getWithin()));
- operands.add(parseQueryNode(queryNode.getQuery()));
- koralGroup.setOperands(operands);
- return koralGroup;
- }
- private KoralSpan parseWithinScope(Scope scope) throws KoralException{
- if (scope == null){
- throw new KoralException(StatusCodes.MALFORMED_QUERY,
+ private KoralObject parseWithinQuery(QueryWithWithin queryNode)
+ throws KoralException {
+ KoralGroup koralGroup = new KoralGroup(KoralOperation.POSITION);
+ koralGroup.setFrames(Arrays.asList(Frame.IS_AROUND));
+
+ List<KoralObject> operands = new ArrayList<KoralObject>();
+ operands.add(parseQueryNode(queryNode.getWithin()));
+ operands.add(parseQueryNode(queryNode.getQuery()));
+ koralGroup.setOperands(operands);
+ return koralGroup;
+ }
+
+ private KoralSpan parseWithinScope(Scope scope) throws KoralException {
+ if (scope == null) {
+ throw new KoralException(StatusCodes.MALFORMED_QUERY,
"FCS diagnostic 11: Within context is missing.");
- }
+ }
- KoralContext contextSpan;
- if (scope == Scope.SENTENCE) {
- contextSpan = KoralContext.SENTENCE;
- }
- else if (scope == Scope.PARAGRAPH){
- contextSpan = KoralContext.PARAGRAPH;
- }
- else if (scope == Scope.TEXT){
+ KoralContext contextSpan;
+ if (scope == Scope.SENTENCE) {
+ contextSpan = KoralContext.SENTENCE;
+ }
+ else if (scope == Scope.PARAGRAPH) {
+ contextSpan = KoralContext.PARAGRAPH;
+ }
+ else if (scope == Scope.TEXT) {
contextSpan = KoralContext.TEXT;
- }
- else{
- throw new KoralException(StatusCodes.QUERY_TOO_COMPLEX,
+ }
+ else {
+ throw new KoralException(StatusCodes.QUERY_TOO_COMPLEX,
"FCS diagnostic 11: Within scope " + scope.toString()
+ " is currently unsupported.");
- }
-
- return new KoralSpan(new KoralTerm(contextSpan));
+ }
+
+ return new KoralSpan(new KoralTerm(contextSpan));
}
-
- private KoralGroup parseGroupQuery(List<QueryNode> children,
+
+ private KoralGroup parseGroupQuery(List<QueryNode> children,
KoralOperation operation) throws KoralException {
KoralGroup koralGroup = new KoralGroup(operation);
List<KoralObject> operands = new ArrayList<KoralObject>();
@@ -111,10 +115,118 @@
return koralGroup;
}
- private KoralObject parseQuerySegment(QuerySegment segment) throws KoralException {
+ private KoralGroup parseSequenceQuery(List<QueryNode> children)
+ throws KoralException {
+ KoralGroup koralGroup = new KoralGroup(KoralOperation.SEQUENCE);
+ List<KoralObject> operands = new ArrayList<KoralObject>();
+ KoralObject operand;
+
+ boolean isEmptyTokenFound = false;
+ boolean isLastTokenEmpty = false;
+ int size = children.size();
+
+ for (int i = 0; i < size; i++) {
+ QueryNode child = children.get(i);
+ if (i > 0 && i < size - 1 && findEmptyToken(child)) {
+ QuerySegment qs = (QuerySegment) child;
+ if (isLastTokenEmpty) {
+ updateBoundary(operands.get(operands.size() - 1), qs);
+ }
+ else {
+ operands.add(new KoralBoundary(qs.getMinOccurs(), qs
+ .getMaxOccurs()));
+ isLastTokenEmpty = true;
+ }
+ isEmptyTokenFound = true;
+ continue;
+ }
+ operand = parseQueryNode(child);
+ operands.add(operand);
+ isLastTokenEmpty = false;
+ }
+
+ if (isEmptyTokenFound) {
+ //operands = updateOperands(operands);
+ operands = createDistance(koralGroup,operands);
+ }
+
+ koralGroup.setOperands(operands);
+ return koralGroup;
+ }
+
+ private boolean findEmptyToken(QueryNode child) {
+ if (child instanceof QuerySegment
+ && ((QuerySegment) child).getExpression() instanceof ExpressionWildcard) {
+ return true;
+ }
+ return false;
+ }
+
+ private void updateBoundary(KoralObject koralObject, QuerySegment qs) {
+ KoralBoundary boundary = (KoralBoundary) koralObject;
+ boundary.setMin(boundary.getMin() + qs.getMinOccurs());
+ boundary.setMax(boundary.getMax() + qs.getMaxOccurs());
+ }
+
+ private List<KoralObject> createDistance(KoralGroup koralGroup, List<KoralObject> operands){
+ boolean isLastOperandUpdated = false;
+ boolean isDistanceSet = false;
+ List<KoralObject> newOperands = new ArrayList<KoralObject>(
+ operands.size());
+ newOperands.add(operands.get(0));
+ int operandSize = operands.size();
+ for (int i = 1; i < operandSize - 1; i++) {
+ KoralObject operand = operands.get(i);
+ if (operand instanceof KoralBoundary) {
+ if (isDistanceSet){
+
+ }
+ else{
+ List<KoralDistance> distances = new ArrayList<KoralDistance>(1);
+ distances.add(new KoralDistance((KoralBoundary) operand));
+ koralGroup.setDistances(distances);
+ }
+ isLastOperandUpdated = true;
+ }
+ isLastOperandUpdated = false;
+ }
+ if (!isLastOperandUpdated){
+ newOperands.add(operands.get(operandSize-1));
+ }
+ return newOperands;
+ }
+
+ private List<KoralObject> updateOperands(List<KoralObject> operands) {
+ boolean isLastOperandUpdated = false;
+ List<KoralObject> newOperands = new ArrayList<KoralObject>(
+ operands.size());
+ newOperands.add(operands.get(0));
+ int operandSize = operands.size();
+ for (int i = 1; i < operandSize - 1; i++) {
+ KoralObject operand = operands.get(i);
+ if (operand instanceof KoralBoundary) {
+ KoralGroup koralGroup = new KoralGroup(KoralOperation.SEQUENCE);
+ List<KoralDistance> distances = new ArrayList<KoralDistance>(1);
+ distances.add(new KoralDistance((KoralBoundary) operand));
+ koralGroup.setDistances(distances);
+ koralGroup.setOperands(Arrays.asList(newOperands.get(i - 1),
+ operands.get(i + 1)));
+ newOperands.set(i-1,koralGroup);
+ isLastOperandUpdated = true;
+ }
+ isLastOperandUpdated = false;
+ }
+ if (!isLastOperandUpdated){
+ newOperands.add(operands.get(operandSize-1));
+ }
+ return newOperands;
+ }
+
+ private KoralObject parseQuerySegment(QuerySegment segment)
+ throws KoralException {
int minOccurs = segment.getMinOccurs();
int maxOccurs = segment.getMaxOccurs();
-
+
if ((minOccurs == 1) && (maxOccurs == 1)) {
return expressionParser.parseExpression(segment.getExpression());
}
diff --git a/src/test/java/de/ids_mannheim/korap/query/serialize/FCSQLComplexTest.java b/src/test/java/de/ids_mannheim/korap/query/serialize/FCSQLComplexTest.java
index 6aedc12..2493802 100644
--- a/src/test/java/de/ids_mannheim/korap/query/serialize/FCSQLComplexTest.java
+++ b/src/test/java/de/ids_mannheim/korap/query/serialize/FCSQLComplexTest.java
@@ -7,6 +7,8 @@
import org.junit.Test;
+import com.fasterxml.jackson.core.JsonProcessingException;
+
/**
* @author margaretha
*
@@ -149,7 +151,7 @@
query = "\"die\"?";
jsonLd = "{@type:koral:boundary,min:0, max:1}";
FCSQLQueryProcessorTest.validateNode(query, "/query/boundary", jsonLd);
-
+
query = "\"die\"*";
jsonLd = "{@type:koral:boundary,min:0}";
FCSQLQueryProcessorTest.validateNode(query, "/query/boundary", jsonLd);
@@ -159,10 +161,59 @@
FCSQLQueryProcessorTest.validateNode(query, "/query/boundary", jsonLd);
}
+ // wildcards
@Test
- public void testEmptyToken() {
+ public void testQueryWithEmptyToken() throws IOException {
+ // expansion query
+ String query = "[]{2}\"Hund\"";
+ String jsonLd = "{@type:koral:group, "
+ + "operation:operation:sequence, "
+ + "operands:["
+ + "{@type:koral:group,"
+ + "operation:operation:repetition,"
+ + "operands:["
+ + "{@type:koral:token}],"
+ + "boundary:{@type:koral:boundary,min:2,max:2}},"
+ + "{@type:koral:token, "
+ + "wrap:{@type:koral:term, key:Hund, foundry:opennlp, layer:orth, type:type:regex, match:match:eq}}"
+ + "]}";
+ FCSQLQueryProcessorTest.runAndValidate(query, jsonLd);
+
+ query = "\"Hund\"[]{2}";
+ jsonLd = "{@type:koral:group," + "operation:operation:repetition,"
+ + "operands:[" + "{@type:koral:token}],"
+ + "boundary:{@type:koral:boundary,min:2,max:2}}";
+ FCSQLQueryProcessorTest
+ .validateNode(query, "/query/operands/1", jsonLd);
+
+ // arbitrary tokens
+ query = "[]{2}";
+ FCSQLQueryProcessorTest.runAndValidate(query, jsonLd);
+
+ // sequence with extension
+ query = "[cnx:pos=\"A\"] \"Hund\"[]{2}";
+ jsonLd = "["
+ + "{@type:koral:token,wrap:{@type:koral:term,key:A,foundry:cnx,layer:p,type:type:regex,match:match:eq}},"
+ + "{@type:koral:token,wrap:{@type:koral:term,key:Hund,foundry:opennlp,layer:orth,type:type:regex,match:match:eq}},"
+ + "{@type:koral:group,operation:operation:repetition,operands:["
+ + "{@type:koral:token}],boundary:{@type:koral:boundary,min:2,max:2}}"
+ + "]";
+ FCSQLQueryProcessorTest.validateNode(query, "/query/operands", jsonLd);
+ }
+
+ @Test
+ public void testQueryWithDistance() throws JsonProcessingException {
// distance query
- // query = "\"Hund\" []{3} \"Katze\"";
+ String query = "\"Katze\" []{3} \"Hund\"";
+ String jsonLd = "{@type:koral:group,operation:operation:sequence,inOrder:false,"
+ + "distances:["
+ + "{@type:koral:distance,key:w,boundary:{@type:koral:boundary,min:3,max:3}}"
+ + "],"
+ + "operands:["
+ + "{@type:koral:token,wrap:{@type:koral:term,key:Katze,foundry:opennlp,layer:orth,type:type:regex,match:match:eq}},"
+ + "{@type:koral:token,wrap:{@type:koral:term,key:Hund,foundry:opennlp,layer:orth,type:type:regex,match:match:eq}}]}";
+ FCSQLQueryProcessorTest.runAndValidate(query, jsonLd);
+
}
// -------------------------------------------------------------------------
@@ -203,7 +254,7 @@
"FCS diagnostic 11: Within scope UTTERANCE is currently unsupported.",
(String) error.get(1));
}
-
+
@Test
public void testWrongQuery() throws IOException {
String query = "!(mate:lemma=\"sein\" | mate:pos=\"PPOSS\")";
@@ -226,8 +277,8 @@
error.get(1).toString().startsWith("FCS diagnostic 10"));
query = "[pos=\"NN\"]&[text=\"Mann\"]";
- error = FCSQLQueryProcessorTest
- .getError(new FCSQLQueryProcessor(query, "2.0"));
+ error = FCSQLQueryProcessorTest.getError(new FCSQLQueryProcessor(query,
+ "2.0"));
assertEquals(399, error.get(0));
String msg = (String) error.get(1);
assertEquals(true, msg.startsWith("FCS diagnostic 10"));
diff --git a/src/test/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusQueryProcessorTest.java b/src/test/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusQueryProcessorTest.java
index 221c0bc..fd939b3 100644
--- a/src/test/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusQueryProcessorTest.java
+++ b/src/test/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusQueryProcessorTest.java
@@ -726,6 +726,24 @@
assertEquals("koral:token", operands.get(0).at("/@type").asText());
assertEquals(true, operands.get(0).at("/key").isMissingNode());
+ query = "[base=Mann][]";
+ qs.setQuery(query, "poliqarpplus");
+ res = mapper.readTree(qs.toJSON());
+ operands = Lists.newArrayList(res.at("/query/operands").elements());
+ assertEquals("koral:token", operands.get(1).at("/@type").asText());
+ assertEquals(true, operands.get(1).at("/key").isMissingNode());
+
+ query = "[base=Mann][]{3}";
+ qs.setQuery(query, "poliqarpplus");
+ res = mapper.readTree(qs.toJSON());
+ operands = Lists.newArrayList(res.at("/query/operands").elements());
+ res = operands.get(1);
+ assertEquals("koral:group", res.at("/@type").asText());
+ assertEquals(true, res.at("/key").isMissingNode());
+ assertEquals("operation:repetition", res.at("/operation").asText());
+ assertEquals(3, res.at("/boundary/min").asInt());
+ assertEquals(3, res.at("/boundary/max").asInt());
+
query = "startswith(<s>, [][base=Mann])";
qs.setQuery(query, "poliqarpplus");
res = mapper.readTree(qs.toJSON());